added kfuns regexp_compile() and regexp_match()

this is the LPC interface to GNU regexp by Robert Leslie <rob@ccs.neu.edu> and is used by the upcoming dgd-lpmoo port
author: Adam David <adam@FreeBSD.org> 1997-01-03 04:03:04 +0000
committer: Adam David <adam@FreeBSD.org> 1997-01-03 04:03:04 +0000
commit: b4caaca0b137d62eb4988436d737f2e16de81ddd (patch)
tree: 421609ca2ff0123ef2c2a13dc6b098185a7b23df /net/dgd-net/files
parent: Added comment about font files. (diff)
2 files changed, 563 insertions, 12 deletions
diff --git a/net/dgd-net/files/patch-aa b/net/dgd-net/files/patch-aa
index f4f3fb83bb67..7d981b605960 100644
--- a/net/dgd-net/files/patch-aa
+++ b/net/dgd-net/files/patch-aa
@@ -1,7 +1,7 @@
-*** Makefile.orig	Sun Dec 10 19:21:36 1995
---- Makefile	Wed Feb  7 22:53:33 1996
+*** Makefile.orig	Thu Jan  2 23:38:50 1997
+--- Makefile	Thu Jan  2 23:41:13 1997
 ***************
-*** 3,14 ****
+*** 3,24 ****
   #
   HOST=	NETBSD
   DEFINES=-D$(HOST) #-DDUMP_FUNCS
@@ -14,7 +14,17 @@
   LD=	$(CC)
   DMAKE=	make
   BIN=	../bin
---- 3,15 ----
+  
+  OBJ=	alloc.o error.o hash.o swap.o str.o array.o object.o data.o path.o \
+! 	editor.o comm.net.o call_out.o interpret.o config.o dgd.o
+  EDOBJ=	alloc.o error.o
+  LEXOBJ=	alloc.o hash.o
+  COMPOBJ=alloc.o error.o hash.o path.o str.o array.o object.o data.o \
+! 	interpret.o config.o
+  
+  a.out:	$(OBJ) always
+  	cd comp; $(MAKE) 'DMAKE=$(DMAKE)' 'CC=$(CC)' 'CCFLAGS=$(CCFLAGS)' dgd
+--- 3,27 ----
   #
   HOST=	NETBSD
   DEFINES=-D$(HOST) #-DDUMP_FUNCS
@@ -23,22 +33,23 @@
   CCFLAGS=$(DEFINES) $(DEBUG)
   CFLAGS=	-I. -Icomp -Ilex -Ied -Ikfun $(CCFLAGS)
 ! LDFLAGS=-s
-! LIBS=-lcrypt
+! LIBS=-lcrypt -lgnuregex
 ! CC=	cc
   LD=	$(CC)
   DMAKE=	make
   BIN=	../bin
-***************
-*** 20,25 ****
---- 21,28 ----
+  
+  OBJ=	alloc.o error.o hash.o swap.o str.o array.o object.o data.o path.o \
+! 	editor.o comm.net.o call_out.o interpret.o config.o dgd.o rgx.o
+  EDOBJ=	alloc.o error.o
+  LEXOBJ=	alloc.o hash.o
   COMPOBJ=alloc.o error.o hash.o path.o str.o array.o object.o data.o \
-  	interpret.o config.o
+! 	interpret.o config.o rgx.o
+! 
+! all: a.out comp/a.out
   
-+ all: a.out comp/a.out
-+ 
   a.out:	$(OBJ) always
   	cd comp; $(MAKE) 'DMAKE=$(DMAKE)' 'CC=$(CC)' 'CCFLAGS=$(CCFLAGS)' dgd
-  	cd lex; $(MAKE) 'DMAKE=$(DMAKE)' 'CC=$(CC)' 'CCFLAGS=$(CCFLAGS)' dgd
 ***************
 *** 38,44 ****
   	-mv $(BIN)/driver $(BIN)/driver.old
@@ -77,3 +88,10 @@
   
   clean:
   	rm -f a.out $(OBJ) comp.sub lex.sub ed.sub
+***************
+*** 92,94 ****
+--- 99,102 ----
+  call_out.o config.o dgd.o: call_out.h
+  error.o comm.o call_out.o config.o dgd.o: comm.h
+  config.o: version.h
++ rgx.o: str.h array.h rgx.h interpret.h
diff --git a/net/dgd-net/files/patch-ad b/net/dgd-net/files/patch-ad
new file mode 100644
index 000000000000..451e8b2ec5c4
--- /dev/null
+++ b/net/dgd-net/files/patch-ad
@@ -0,0 +1,533 @@
+*** src.rgx/config.c	Thu Jan  2 23:34:31 1997
+--- config.c	Thu Jan  2 23:51:21 1997
+***************
+*** 19,24 ****
+--- 19,25 ----
+  # include "compile.h"
+  # include "csupport.h"
+  # include "table.h"
++ # include "rgx.h"
+  
+  typedef struct {
+      char *name;		/* name of the option */
+***************
+*** 810,815 ****
+--- 811,819 ----
+  
+      /* initialize interpreter */
+      i_init(conf[CREATE].u.str);
++ 
++     /* initialize regular expressions */
++     rgx_init();
+  
+      /* initialize compiler */
+      c_init(conf[AUTO_OBJECT].u.str,
+*** src.rgx/kfun/extra.c	Tue Sep 27 09:28:26 1994
+--- kfun/extra.c	Thu Feb  2 22:25:18 1995
+***************
+*** 560,562 ****
+--- 560,640 ----
+      error("Not yet implemented");
+  }
+  # endif
++ 
++ 
++ # ifdef FUNCDEF
++ FUNCDEF("regexp_compile", kf_regexp_compile, p_regexp_compile)
++ # else
++ char p_regexp_compile[] = { C_TYPECHECKED | C_STATIC | C_VARARGS,
++ 			    T_STRING | (1 << REFSHIFT), 2, T_STRING, T_INT };
++ 
++ /*
++  * NAME:	kfun->regexp_compile()
++  * DESCRIPTION:	compile a regexp pattern
++  */
++ int kf_regexp_compile(nargs)
++ int nargs;
++ {
++   int case_matters;
++   array *compiled;
++ 
++   if (nargs < 1)
++     return -1;
++ 
++   case_matters = (nargs == 2 ? ! (sp++)->u.number : 1);
++ 
++   compiled = rgx_new(sp->u.string, case_matters);
++ 
++   str_del(sp->u.string);
++   sp->type = T_ARRAY;
++   arr_ref(sp->u.array = compiled);
++ 
++   return 0;
++ }
++ # endif
++ 
++ 
++ # ifdef FUNCDEF
++ FUNCDEF("regexp_match", kf_regexp_match, p_regexp_match)
++ # else
++ char p_regexp_match[] = { C_TYPECHECKED | C_STATIC | C_VARARGS,
++ 			  T_INT | (1 << REFSHIFT), 3,
++ 			  T_STRING | (1 << REFSHIFT), T_STRING, T_INT };
++ 
++ /*
++  * NAME:	kfun->regexp_match()
++  * DESCRIPTION:	perform regexp matching with a previously compiled pattern
++  */
++ int kf_regexp_match(nargs)
++ int nargs;
++ {
++   int reverse;
++   string *subject;
++   array *compiled, *result;
++ 
++   if (nargs < 2)
++     return -1;
++ 
++   reverse = (nargs == 3 ? (sp++)->u.number : 0);
++   subject = sp->u.string;
++   compiled = sp[1].u.array;
++ 
++   if (compiled->size != 3)
++     return 1;
++ 
++   result = rgx_match(d_get_elts(compiled), subject, reverse);
++ 
++   str_del((sp++)->u.string);
++   arr_del(sp->u.array);
++ 
++   if (result == (array *) 0)
++     {
++       sp->type = T_INT;
++       sp->u.number = 0;
++     }
++   else
++     arr_ref(sp->u.array = result);
++ 
++   return 0;
++ }
++ # endif
+*** src.rgx/kfun/kfun.h	Sun May  8 08:15:01 1994
+--- kfun/kfun.h	Thu Feb  2 22:25:18 1995
+***************
+*** 5,7 ****
+--- 5,8 ----
+  # include "xfloat.h"
+  # include "interpret.h"
+  # include "data.h"
++ # include "rgx.h"
+*** src.rgx/rgx.c	Thu Jan  2 21:41:55 1997
+--- rgx.c	Thu Jan  2 21:17:46 1997
+***************
+*** 0 ****
+--- 1,213 ----
++ # include "dgd.h"
++ # include "str.h"
++ # include "array.h"
++ # include "interpret.h"
++ # include <gnuregex.h>
++ # include "rgx.h"
++ # include <memory.h>
++ 
++ static char trans_table[256];
++ 
++ /*
++  * NAME:	regexp->init()
++  * DESCRIPTION:	initialize regexp handling
++  */
++ void rgx_init()
++ {
++   register int i;
++ 
++   for (i = 0; i < 256; ++i)
++     trans_table[i] = i;
++   for (i = 'a'; i <= 'z'; ++i)
++     trans_table[i] = i + 'A' - 'a';
++ }
++ 
++ /*
++  * NAME:	regexp->new()
++  * DESCRIPTION:	create a new regexp buffer
++  */
++ array *rgx_new(pattern, case_matters)
++ string *pattern;
++ int case_matters;
++ {
++   char *translate;
++   struct re_pattern_buffer patbuf;
++   char fastmap[256];
++   const char *compile_error;
++   array *result;
++   register value *v;
++   string *s;
++ 
++   translate = (case_matters ? (char *) 0 : trans_table);
++ 
++   patbuf.buffer    = 0;
++   patbuf.allocated = 0;
++   patbuf.used      = 0;
++ 
++   patbuf.fastmap   = fastmap;
++   patbuf.translate = translate;
++ 
++   patbuf.fastmap_accurate = 0;
++ 
++   {
++ 	int i;
++ 	long n = 0;
++ 	for (i = 0; i < pattern->len; i++) {
++ 		switch (pattern->text[i]) {
++ 		case '[':
++ 			if (pattern->text[++i] == '^')
++ 				i++;
++ 			for (i++; i < pattern->len; i++)
++ 				if (pattern->text[i] == ']')
++ 					break;
++ 			break;
++ 		case '%':
++ 			pattern->text[i++] = '\\'; /* skip escaped char */
++ 			break;
++ 		case '\\':
++ 			pattern->text[i] == '%';   /* mark for expansion */
++ 			n++;
++ 			break;
++ 		}
++ 	}
++ 	if (n) {
++ 		int j;
++ 
++ 		s = str_new(NULL, pattern->len + n);
++ 		for (i = j = 0; i < pattern->len; i++, j++) {
++ 			switch (pattern->text[i]) {
++ 			case '[':
++ 				s->text[j++] = pattern->text[i++];
++ 				if (i == pattern->len)
++ 					goto breakout;
++ 				if (pattern->text[i] == '^') {
++ 					s->text[j++] = pattern->text[i++];
++ 					if (i == pattern->len)
++ 						goto breakout;
++ 				}
++ 				s->text[j++] = pattern->text[i++];
++ 				if (i == pattern->len)
++ 					goto breakout;
++ 				for ( ; i < pattern->len; i++, j++) {
++ 					if ((s->text[j] = pattern->text[i]) == ']')
++ 						break;
++ 				}
++ 				break;
++ 			case '%': /* expand */
++ 				s->text[j++] = '\\';
++ 				s->text[j] = '\\';
++ 				break;
++ 			case '\\': /* skip escaped char */
++ 				s->text[j++] = pattern->text[i++];
++ 				if (i == pattern->len)
++ 					goto breakout;
++ 				/* fallthru */
++ 			default:
++ 				s->text[j] = pattern->text[i];
++ 			}
++ 		}
++ breakout:
++ 	}
++   }
++   compile_error = re_compile_pattern(s->text, s->len, &patbuf);
++   str_del(s);
++   if (compile_error != (char *) 0)
++     {
++       regfree(&patbuf);
++       error(compile_error);
++     }
++ 
++   re_compile_fastmap(&patbuf);
++ 
++   result = arr_new(3L);
++   v = result->elts;
++ 
++   v->type = T_STRING;
++   str_ref(v->u.string = str_new((char *) &patbuf, (long) sizeof(patbuf)));
++   ++v;
++   v->type = T_STRING;
++   str_ref(v->u.string = str_new((char *) patbuf.buffer,
++ 				(long) patbuf.allocated));
++   ++v;
++   v->type = T_STRING;
++   str_ref(v->u.string = str_new(fastmap, 256L));
++ 
++   /* don't let regfree() try to free these */
++   patbuf.fastmap   = 0;
++   patbuf.translate = 0;
++ 
++   regfree(&patbuf);
++ 
++   return result;
++ }
++ 
++ /*
++  * NAME:	regexp->match()
++  * DESCRIPTION:	perform regexp matching, given a pattern and subject string
++  */
++ array *rgx_match(pattern, subject, reverse)
++ value *pattern;
++ string *subject;
++ int reverse;
++ {
++   long sub_len;
++   struct re_pattern_buffer patbuf;
++   struct re_registers regs;
++   regoff_t starts[RGX_NREGS + 1], ends[RGX_NREGS + 1];
++   array *result;
++   register value *v;
++   register int i;
++ 
++   if (pattern[0].u.string->len != sizeof(struct re_pattern_buffer))
++     error("Invalid compiled pattern");
++ 
++   memcpy((char *) &patbuf, pattern[0].u.string->text,
++ 	 sizeof(struct re_pattern_buffer));
++ 
++   if (patbuf.allocated != (unsigned long) pattern[1].u.string->len ||
++       pattern[2].u.string->len != 256)
++     error("Invalid compiled pattern");
++ 
++   patbuf.buffer  = (unsigned char *) pattern[1].u.string->text;
++   patbuf.fastmap = pattern[2].u.string->text;
++ 
++   regs.num_regs = RGX_NREGS;
++   regs.start = starts;
++   regs.end   = ends;
++   patbuf.regs_allocated = REGS_FIXED;
++ 
++   sub_len = subject->len;
++   if (re_search(&patbuf, subject->text, sub_len, reverse ? sub_len : 0,
++ 		reverse ? -(sub_len + 1) : sub_len + 1, &regs) == -1)
++     return (array *) 0;
++ 
++   result = arr_new((long) RGX_NREGS * 2);
++   v = result->elts;
++ 
++   v->type = T_INT;
++   v->u.number = starts[0];
++   ++v;
++ 
++   v->type = T_INT;
++   v->u.number = ends[0] - 1;
++   ++v;
++ 
++   for (i = 1; i < RGX_NREGS; ++i, v += 2)
++     {
++       v[0].type = T_INT;
++       v[1].type = T_INT;
++ 
++       if (starts[i] == -1)
++ 	{
++ 	  v[0].u.number = 0;
++ 	  v[1].u.number = -1;
++ 	}
++       else
++ 	{
++ 	  v[0].u.number = starts[i];
++ 	  v[1].u.number = ends[i] - 1;
++ 	}
++     }
++ 
++   return result;
++ }
+*** src.rgx/rgx.h	Thu Jan  2 21:42:05 1997
+--- rgx.h	Fri Feb  3 03:09:54 1995
+***************
+*** 0 ****
+--- 1,5 ----
++ # define RGX_NREGS  10
++ 
++ extern void		rgx_init	P((void));
++ extern array	       *rgx_new		P((string*, int));
++ extern array	       *rgx_match	P((value*, string*, int));
+*** doc.rgx/example.c	Thu Jan  1 00:00:00 1970
+--- ../doc/rgx_example.c	Fri Feb  3 03:30:01 1995
+***************
+*** 0 ****
+--- 1,49 ----
++ /*
++  * This file shows how an interface can be built to cache regexp patterns
++  * and ultimately provide a more streamlined interface to the regexp kfuns.
++  *
++  * Note that since regexp_match() severely depends on the return result from
++  * regexp_compile() being unaltered, it is a good idea to provide an
++  * interface like this, and also to mask the regexp_match() kfun from the
++  * auto object.
++  */
++ 
++ # define CACHE_SIZE  10
++ 
++ private	mapping	cache;
++ private	string *list;
++ private	string	last_pattern;
++ 
++ static
++ void create(void)
++ {
++   cache = ([ ]);
++   list  = ({ });
++ }
++ 
++ int *match(string subject, string pattern)
++ {
++   string *buffer;
++ 
++   if ((buffer = cache[pattern]) == 0)
++     {
++       buffer = regexp_compile(pattern);
++ 
++       if (sizeof(list) >= CACHE_SIZE)
++ 	{
++ 	  cache[list[0]] = 0;
++ 	  list = list[1 ..] + ({ pattern });
++ 	}
++       else
++ 	list += ({ pattern });
++ 
++       cache[pattern] = buffer;
++     }
++   else if (pattern != last_pattern)
++     {
++       list = list - ({ pattern }) + ({ pattern });
++       last_pattern = pattern;
++     }
++ 
++   return regexp_match(buffer, subject);
++ }
+diff -crN doc.rgx/kfun/regexp_compile doc/kfun/regexp_compile
+*** doc.rgx/kfun/regexp_compile	Thu Jan  1 00:00:00 1970
+--- ../doc/kfun/regexp_compile	Tue Jul 26 00:02:34 1994
+***************
+*** 0 ****
+--- 1,27 ----
++ NAME
++ 	regexp_compile - compile a regular expression
++ 
++ SYNOPSIS
++ 	varargs string *regexp_compile(string pattern, int case_insensitive)
++ 
++ DESCRIPTION
++ 	The argument pattern is compiled as a regular expression. If the
++ 	argument case_insensitive is nonzero, the pattern is compiled in
++ 	such a way that subsequent matching will be done without case
++ 	sensitivity. The default is to be case-sensitive.
++ 
++ 	An array of strings is returned; these strings contain binary
++ 	data and must not be altered in any way before being passed to
++ 	regexp_match().
++ 
++ 	The compiled regexp can be saved and used any number of times with
++ 	regexp_match().
++ 
++ ERRORS
++ 	If the argument pattern contains a syntactically malformed regular
++ 	expression, an error will result. An error can also occur if the
++ 	pattern is too complicated, or if there is not enough memory to
++ 	compile the pattern.
++ 
++ SEE ALSO
++ 	kfun/regexp_match
+*** doc.rgx/kfun/regexp_match	Thu Jan  1 00:00:00 1970
+--- ../doc/kfun/regexp_match	Mon Jul 25 22:19:42 1994
+***************
+*** 0 ****
+--- 1,34 ----
++ NAME
++ 	regexp_match - perform regular expression matching
++ 
++ SYNOPSIS
++ 	varargs int *regexp_match(string *pattern, string subject, int reverse)
++ 
++ DESCRIPTION
++ 	The argument subject is matched against the compiled regular
++ 	expression pattern. If the argument reverse is nonzero, matching
++ 	is performed from right-to-left; otherwise, matching is performed
++ 	left-to-right.
++ 
++ 	The pattern argument must be an array of strings exactly as it
++ 	was received from regexp_compile(); otherwise, the result of
++ 	calling this function is undefined.
++ 
++ 	If the argument subject could not be matched with the regular
++ 	expression, 0 is returned. Otherwise, an array of 20 integers
++ 	is returned with this format:
++ 
++ 	    ({ start0, end0, start1, end1, ..., start9, end9 })
++ 
++ 	Each element is a character index into the subject string. The
++ 	first two elements, start0 and end0, indicate the part of the subject
++ 	that was matched by the regular expression as a whole. The following
++ 	elements indicate the starting and ending indices of each
++ 	subexpression (denoted by "%(" and "%)" pairs in the original
++ 	pattern) that were matched.
++ 
++ 	If any subexpression was not matched, the corresponding start and
++ 	end elements will be 0 and -1, respectively.
++ 
++ SEE ALSO
++ 	kfun/regexp_compile
+*** doc.rgx/regexps	Thu Jan  1 00:00:00 1970
+--- ../doc/regexps	Mon Jul 25 22:58:57 1994
+***************
+*** 0 ****
+--- 1,32 ----
++ 
++ Regular expressions are composed of the following operators:
++ 
++ .		Match any single character
++ XY		Match X immediately followed by Y
++ X*		Match zero-or-more of X
++ X+		Match one-or-more of X
++ X?		Match zero-or-one of X
++ X%|Y		Match either X or Y
++ [charset]	Match any single character in `charset'
++ [^charset]	Match any single character not in `charset'
++ %(X%)		Match X, but also remember the match as a subexpression
++ %digit		Match the numbered previous subexpression
++ ^X		Match X anchored at the beginning of a line
++ X$		Match X anchored at the end of a line
++ %b		Match the empty string at the beginning or end of a word
++ %B		Match the empty string only within the middle of a word
++ %<		Match the beginning of a word
++ %>		Match the end of a word
++ %w		Match any word-constituent character
++ %W		Match any character that is not word-constituent
++ 
++ Any other character in a regular expression is matched literally with itself.
++ To match any of the special operator characters .*+?%[^$ literally, precede
++ the character with `%'.
++ 
++ A `charset' is formed by listing all desired characters with brackets. To
++ include a literal `^' in a charset, do not list it in the first position. To
++ include a literal `]', list it immediately after the opening `[' or `[^'. All
++ characters are non-special (and should not be escaped) within a charset,
++ except `-', which denotes a character range. To include a literal `-', list it
++ either first or last.
+*** README.rgx.old	Fri Jan  3 03:17:21 1997
+--- ../README.rgx	Fri Jan  3 03:14:29 1997
+***************
+*** 0 ****
+--- 1,18 ----
++ dgd-rgx was written by Robert Leslie <rob@ccs.neu.edu> as an LPC interface to
++ GNU regex, adding two kfuns to DGD for regular expression matching:
++ 
++ 	regexp_compile()
++ 	regexp_match()
++ 
++ For a description of the regular expression language accepted by these kfuns,
++ please read doc/regexps.
++ 
++ Complete details for the two kfuns can be found in the doc/kfun directory.
++ 
++ Adapted by Adam David <adam@veda.is> for DGD 1.0.97 and to use the unmodified
++ GNU regexp library.
++ 
++ This software is a modification of DGD, and is therefore protected by the
++ DGD Copyright.
++ 
++ There is no warranty for this software.
author	Adam David <adam@FreeBSD.org>	1997-01-03 04:03:04 +0000
committer	Adam David <adam@FreeBSD.org>	1997-01-03 04:03:04 +0000
commit	b4caaca0b137d62eb4988436d737f2e16de81ddd (patch)
tree	421609ca2ff0123ef2c2a13dc6b098185a7b23df /net/dgd-net/files
parent	Added comment about font files. (diff)