svn commit: r223439 - user/gabor/tre-integration/lib/libc/regex/grot

Wed Jun 22 20:20:50 UTC 2011

Author: gabor
Date: Wed Jun 22 20:20:49 2011
New Revision: 223439
URL: http://svn.freebsd.org/changeset/base/223439

Log:
  - Readd regression tests from previous regex code; probably it will be
    still useful

Added:
  user/gabor/tre-integration/lib/libc/regex/grot/
  user/gabor/tre-integration/lib/libc/regex/grot/Makefile   (contents, props changed)
  user/gabor/tre-integration/lib/libc/regex/grot/debug.c   (contents, props changed)
  user/gabor/tre-integration/lib/libc/regex/grot/main.c   (contents, props changed)
  user/gabor/tre-integration/lib/libc/regex/grot/mkh   (contents, props changed)
  user/gabor/tre-integration/lib/libc/regex/grot/split.c   (contents, props changed)
  user/gabor/tre-integration/lib/libc/regex/grot/tests

Added: user/gabor/tre-integration/lib/libc/regex/grot/Makefile
==============================================================================

--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/gabor/tre-integration/lib/libc/regex/grot/Makefile	Wed Jun 22 20:20:49 2011	(r223439)
@@ -0,0 +1,98 @@
+# $FreeBSD$
+# You probably want to take -DREDEBUG out of CFLAGS, and put something like
+# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
+# internal assertion checking).  Take -Dconst= out for an ANSI compiler.
+# Do not take -DPOSIX_MISTAKE out.  REGCFLAGS isn't important to you (it's
+# for my use in some special contexts).
+
+PATHS= ${.CURDIR}/.. ${.CURDIR}/../../locale ${.CURDIR}/../../../../include
+.PATH: ${PATHS}
+
+CFLAGS+= -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
+.for incpath in ${PATHS}
+CFLAGS+= -I${incpath}
+.endfor
+
+# If you have an ANSI compiler, take -o out of MKHFLAGS.  If you want
+# the Berkeley __P macro, put -b in.
+MKHFLAGS =
+
+LDFLAGS =
+
+# If you have an ANSI environment, take limits.h and stdlib.h out of
+# HMISSING and take memmove out of SRCMISSING and OBJMISSING.
+HMISSING =
+SRCMISSING = split.c
+OBJMISSING = split.o
+H = cname.h regex2.h utils.h $(HMISSING)
+REGSRC = regcomp.c regerror.c regexec.c regfree.c engine.c
+SRC = $(REGSRC) debug.c main.c $(SRCMISSING)
+
+# Internal stuff, should not need changing.
+OBJPRODN = regcomp.o regexec.o regerror.o regfree.o
+OBJS = $(OBJPRODN) debug.o main.o $(OBJMISSING)
+
+# Stuff that matters only if you're trying to lint the package.
+LINTFLAGS = -I. -Dstatic= -Dconst= -DREDEBUG
+LINTC = regcomp.c regexec.c regerror.c regfree.c debug.c main.c $(SRCMISSING)
+JUNKLINT =possible pointer alignment|null effect
+
+.SUFFIXES:	.ih .h
+.c.ih:
+	sh mkh $(MKHFLAGS) -p $< >$@
+
+default:	r
+
+re:	$(OBJS)
+	$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) -o $@
+
+o:	$(OBJPRODN)
+
+REGEXHSRC = ../regex2.h ../reg*.c
+h:	$(REGEXHSRC)
+	sh mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp
+	cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
+	rm -f regex.tmp
+
+regex.h: h
+
+regcomp.o regexec.o regfree.o debug.o:	utils.h regex.h regex2.h
+regcomp.o:	cname.h regcomp.ih
+regexec.o:	engine.c engine.ih
+regerror.o:	regerror.ih
+regerror.o:	utils.h
+debug.o:	debug.ih
+main.o:	main.ih
+
+r:	re tests
+	./re <tests
+	./re -el <tests
+	./re -er <tests
+
+ra:	./re tests
+	-./re <tests
+	-./re -el <tests
+	-./re -er <tests
+
+rx:	./re tests
+	./re -x <tests
+	./re -x -el <tests
+	./re -x -er <tests
+
+t:	./re tests
+	-time ./re <tests
+	-time ./re -cs <tests
+	-time ./re -el <tests
+	-time ./re -cs -el <tests
+
+l:	$(LINTC)
+	lint $(LINTFLAGS) -h $(LINTC) 2>&1 | egrep -v '$(JUNKLINT)' | tee lint
+
+clean:	tidy
+	rm -f *.o *.s *.ih re
+
+tidy:
+	rm -f junk* core regex.tmp lint
+
+spotless:	clean
+	rm -f regex.h

Added: user/gabor/tre-integration/lib/libc/regex/grot/debug.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/gabor/tre-integration/lib/libc/regex/grot/debug.c	Wed Jun 22 20:20:49 2011	(r223439)
@@ -0,0 +1,212 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <regex.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "utils.h"
+#include "regex2.h"
+#include "debug.ih"
+
+/*
+ - regprint - print a regexp for debugging
+ == void regprint(regex_t *r, FILE *d);
+ */
+void
+regprint(r, d)
+regex_t *r;
+FILE *d;
+{
+	struct re_guts *g = r->re_g;
+	int i;
+	int c;
+	int last;
+
+	fprintf(d, "%ld states", (long)g->nstates);
+	fprintf(d, ", first %ld last %ld", (long)g->firststate,
+						(long)g->laststate);
+	if (g->iflags&USEBOL)
+		fprintf(d, ", USEBOL");
+	if (g->iflags&USEEOL)
+		fprintf(d, ", USEEOL");
+	if (g->iflags&BAD)
+		fprintf(d, ", BAD");
+	if (g->nsub > 0)
+		fprintf(d, ", nsub=%ld", (long)g->nsub);
+	if (g->must != NULL)
+		fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen,
+								g->must);
+	if (g->backrefs)
+		fprintf(d, ", backrefs");
+	if (g->nplus > 0)
+		fprintf(d, ", nplus %ld", (long)g->nplus);
+	fprintf(d, "\n");
+	s_print(g, d);
+}
+
+/*
+ - s_print - print the strip for debugging
+ == static void s_print(struct re_guts *g, FILE *d);
+ */
+static void
+s_print(g, d)
+struct re_guts *g;
+FILE *d;
+{
+	sop *s;
+	cset *cs;
+	int i;
+	int done = 0;
+	sop opnd;
+	int col = 0;
+	int last;
+	sopno offset = 2;
+#	define	GAP()	{	if (offset % 5 == 0) { \
+					if (col > 40) { \
+						fprintf(d, "\n\t"); \
+						col = 0; \
+					} else { \
+						fprintf(d, " "); \
+						col++; \
+					} \
+				} else \
+					col++; \
+				offset++; \
+			}
+
+	if (OP(g->strip[0]) != OEND)
+		fprintf(d, "missing initial OEND!\n");
+	for (s = &g->strip[1]; !done; s++) {
+		opnd = OPND(*s);
+		switch (OP(*s)) {
+		case OEND:
+			fprintf(d, "\n");
+			done = 1;
+			break;
+		case OCHAR:
+			if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
+				fprintf(d, "\\%c", (char)opnd);
+			else
+				fprintf(d, "%s", regchar((char)opnd));
+			break;
+		case OBOL:
+			fprintf(d, "^");
+			break;
+		case OEOL:
+			fprintf(d, "$");
+			break;
+		case OBOW:
+			fprintf(d, "\\{");
+			break;
+		case OEOW:
+			fprintf(d, "\\}");
+			break;
+		case OANY:
+			fprintf(d, ".");
+			break;
+		case OANYOF:
+			fprintf(d, "[(%ld)", (long)opnd);
+#if 0
+			cs = &g->sets[opnd];
+			last = -1;
+			for (i = 0; i < g->csetsize+1; i++)	/* +1 flushes */
+				if (CHIN(cs, i) && i < g->csetsize) {
+					if (last < 0) {
+						fprintf(d, "%s", regchar(i));
+						last = i;
+					}
+				} else {
+					if (last >= 0) {
+						if (last != i-1)
+							fprintf(d, "-%s",
+								regchar(i-1));
+						last = -1;
+					}
+				}
+#endif
+			fprintf(d, "]");
+			break;
+		case OBACK_:
+			fprintf(d, "(\\<%ld>", (long)opnd);
+			break;
+		case O_BACK:
+			fprintf(d, "<%ld>\\)", (long)opnd);
+			break;
+		case OPLUS_:
+			fprintf(d, "(+");
+			if (OP(*(s+opnd)) != O_PLUS)
+				fprintf(d, "<%ld>", (long)opnd);
+			break;
+		case O_PLUS:
+			if (OP(*(s-opnd)) != OPLUS_)
+				fprintf(d, "<%ld>", (long)opnd);
+			fprintf(d, "+)");
+			break;
+		case OQUEST_:
+			fprintf(d, "(?");
+			if (OP(*(s+opnd)) != O_QUEST)
+				fprintf(d, "<%ld>", (long)opnd);
+			break;
+		case O_QUEST:
+			if (OP(*(s-opnd)) != OQUEST_)
+				fprintf(d, "<%ld>", (long)opnd);
+			fprintf(d, "?)");
+			break;
+		case OLPAREN:
+			fprintf(d, "((<%ld>", (long)opnd);
+			break;
+		case ORPAREN:
+			fprintf(d, "<%ld>))", (long)opnd);
+			break;
+		case OCH_:
+			fprintf(d, "<");
+			if (OP(*(s+opnd)) != OOR2)
+				fprintf(d, "<%ld>", (long)opnd);
+			break;
+		case OOR1:
+			if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_)
+				fprintf(d, "<%ld>", (long)opnd);
+			fprintf(d, "|");
+			break;
+		case OOR2:
+			fprintf(d, "|");
+			if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH)
+				fprintf(d, "<%ld>", (long)opnd);
+			break;
+		case O_CH:
+			if (OP(*(s-opnd)) != OOR1)
+				fprintf(d, "<%ld>", (long)opnd);
+			fprintf(d, ">");
+			break;
+		default:
+			fprintf(d, "!%d(%d)!", OP(*s), opnd);
+			break;
+		}
+		if (!done)
+			GAP();
+	}
+}
+
+/*
+ - regchar - make a character printable
+ == static char *regchar(int ch);
+ */
+static char *			/* -> representation */
+regchar(ch)
+int ch;
+{
+	static char buf[10];
+
+	if (isprint(ch) || ch == ' ')
+		sprintf(buf, "%c", ch);
+	else
+		sprintf(buf, "\\%o", ch);
+	return(buf);
+}

Added: user/gabor/tre-integration/lib/libc/regex/grot/main.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/gabor/tre-integration/lib/libc/regex/grot/main.c	Wed Jun 22 20:20:49 2011	(r223439)
@@ -0,0 +1,513 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <regex.h>
+#include <assert.h>
+
+#include "main.ih"
+
+char *progname;
+int debug = 0;
+int line = 0;
+int status = 0;
+
+int copts = REG_EXTENDED;
+int eopts = 0;
+regoff_t startoff = 0;
+regoff_t endoff = 0;
+
+
+extern int split();
+extern void regprint();
+
+/*
+ - main - do the simple case, hand off to regress() for regression
+ */
+main(argc, argv)
+int argc;
+char *argv[];
+{
+	regex_t re;
+#	define	NS	10
+	regmatch_t subs[NS];
+	char erbuf[100];
+	int err;
+	size_t len;
+	int c;
+	int errflg = 0;
+	int i;
+	extern int optind;
+	extern char *optarg;
+
+	progname = argv[0];
+
+	while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
+		switch (c) {
+		case 'c':	/* compile options */
+			copts = options('c', optarg);
+			break;
+		case 'e':	/* execute options */
+			eopts = options('e', optarg);
+			break;
+		case 'S':	/* start offset */
+			startoff = (regoff_t)atoi(optarg);
+			break;
+		case 'E':	/* end offset */
+			endoff = (regoff_t)atoi(optarg);
+			break;
+		case 'x':	/* Debugging. */
+			debug++;
+			break;
+		case '?':
+		default:
+			errflg++;
+			break;
+		}
+	if (errflg) {
+		fprintf(stderr, "usage: %s ", progname);
+		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
+		exit(2);
+	}
+
+	if (optind >= argc) {
+		regress(stdin);
+		exit(status);
+	}
+
+	err = regcomp(&re, argv[optind++], copts);
+	if (err) {
+		len = regerror(err, &re, erbuf, sizeof(erbuf));
+		fprintf(stderr, "error %s, %d/%d `%s'\n",
+			eprint(err), len, sizeof(erbuf), erbuf);
+		exit(status);
+	}
+	regprint(&re, stdout);	
+
+	if (optind >= argc) {
+		regfree(&re);
+		exit(status);
+	}
+
+	if (eopts&REG_STARTEND) {
+		subs[0].rm_so = startoff;
+		subs[0].rm_eo = strlen(argv[optind]) - endoff;
+	}
+	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
+	if (err) {
+		len = regerror(err, &re, erbuf, sizeof(erbuf));
+		fprintf(stderr, "error %s, %d/%d `%s'\n",
+			eprint(err), len, sizeof(erbuf), erbuf);
+		exit(status);
+	}
+	if (!(copts&REG_NOSUB)) {
+		len = (int)(subs[0].rm_eo - subs[0].rm_so);
+		if (subs[0].rm_so != -1) {
+			if (len != 0)
+				printf("match `%.*s'\n", len,
+					argv[optind] + subs[0].rm_so);
+			else
+				printf("match `'@%.1s\n",
+					argv[optind] + subs[0].rm_so);
+		}
+		for (i = 1; i < NS; i++)
+			if (subs[i].rm_so != -1)
+				printf("(%d) `%.*s'\n", i,
+					(int)(subs[i].rm_eo - subs[i].rm_so),
+					argv[optind] + subs[i].rm_so);
+	}
+	exit(status);
+}
+
+/*
+ - regress - main loop of regression test
+ == void regress(FILE *in);
+ */
+void
+regress(in)
+FILE *in;
+{
+	char inbuf[1000];
+#	define	MAXF	10
+	char *f[MAXF];
+	int nf;
+	int i;
+	char erbuf[100];
+	size_t ne;
+	char *badpat = "invalid regular expression";
+#	define	SHORT	10
+	char *bpname = "REG_BADPAT";
+	regex_t re;
+
+	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
+		line++;
+		if (inbuf[0] == '#' || inbuf[0] == '\n')
+			continue;			/* NOTE CONTINUE */
+		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
+		if (debug)
+			fprintf(stdout, "%d:\n", line);
+		nf = split(inbuf, f, MAXF, "\t\t");
+		if (nf < 3) {
+			fprintf(stderr, "bad input, line %d\n", line);
+			exit(1);
+		}
+		for (i = 0; i < nf; i++)
+			if (strcmp(f[i], "\"\"") == 0)
+				f[i] = "";
+		if (nf <= 3)
+			f[3] = NULL;
+		if (nf <= 4)
+			f[4] = NULL;
+		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
+		if (opt('&', f[1]))	/* try with either type of RE */
+			try(f[0], f[1], f[2], f[3], f[4],
+					options('c', f[1]) &~ REG_EXTENDED);
+	}
+
+	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
+	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
+		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
+							erbuf, badpat);
+		status = 1;
+	}
+	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
+	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
+						ne != strlen(badpat)+1) {
+		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
+						erbuf, SHORT-1, badpat);
+		status = 1;
+	}
+	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
+	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
+		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
+						erbuf, bpname);
+		status = 1;
+	}
+	re.re_endp = bpname;
+	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
+	if (atoi(erbuf) != (int)REG_BADPAT) {
+		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
+						erbuf, (long)REG_BADPAT);
+		status = 1;
+	} else if (ne != strlen(erbuf)+1) {
+		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
+						erbuf, (long)REG_BADPAT);
+		status = 1;
+	}
+}
+
+/*
+ - try - try it, and report on problems
+ == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
+ */
+void
+try(f0, f1, f2, f3, f4, opts)
+char *f0;
+char *f1;
+char *f2;
+char *f3;
+char *f4;
+int opts;			/* may not match f1 */
+{
+	regex_t re;
+#	define	NSUBS	10
+	regmatch_t subs[NSUBS];
+#	define	NSHOULD	15
+	char *should[NSHOULD];
+	int nshould;
+	char erbuf[100];
+	int err;
+	int len;
+	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
+	int i;
+	char *grump;
+	char f0copy[1000];
+	char f2copy[1000];
+
+	strcpy(f0copy, f0);
+	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
+	fixstr(f0copy);
+	err = regcomp(&re, f0copy, opts);
+	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
+		/* unexpected error or wrong error */
+		len = regerror(err, &re, erbuf, sizeof(erbuf));
+		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
+					line, type, eprint(err), len,
+					sizeof(erbuf), erbuf);
+		status = 1;
+	} else if (err == 0 && opt('C', f1)) {
+		/* unexpected success */
+		fprintf(stderr, "%d: %s should have given REG_%s\n",
+						line, type, f2);
+		status = 1;
+		err = 1;	/* so we won't try regexec */
+	}
+
+	if (err != 0) {
+		regfree(&re);
+		return;
+	}
+
+	strcpy(f2copy, f2);
+	fixstr(f2copy);
+
+	if (options('e', f1)&REG_STARTEND) {
+		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
+			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
+		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
+		subs[0].rm_eo = strchr(f2, ')') - f2;
+	}
+	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
+
+	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
+		/* unexpected error or wrong error */
+		len = regerror(err, &re, erbuf, sizeof(erbuf));
+		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
+					line, type, eprint(err), len,
+					sizeof(erbuf), erbuf);
+		status = 1;
+	} else if (err != 0) {
+		/* nothing more to check */
+	} else if (f3 == NULL) {
+		/* unexpected success */
+		fprintf(stderr, "%d: %s exec should have failed\n",
+						line, type);
+		status = 1;
+		err = 1;		/* just on principle */
+	} else if (opts&REG_NOSUB) {
+		/* nothing more to check */
+	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
+		fprintf(stderr, "%d: %s %s\n", line, type, grump);
+		status = 1;
+		err = 1;
+	}
+
+	if (err != 0 || f4 == NULL) {
+		regfree(&re);
+		return;
+	}
+
+	for (i = 1; i < NSHOULD; i++)
+		should[i] = NULL;
+	nshould = split(f4, should+1, NSHOULD-1, ",");
+	if (nshould == 0) {
+		nshould = 1;
+		should[1] = "";
+	}
+	for (i = 1; i < NSUBS; i++) {
+		grump = check(f2, subs[i], should[i]);
+		if (grump != NULL) {
+			fprintf(stderr, "%d: %s $%d %s\n", line,
+							type, i, grump);
+			status = 1;
+			err = 1;
+		}
+	}
+
+	regfree(&re);
+}
+
+/*
+ - options - pick options out of a regression-test string
+ == int options(int type, char *s);
+ */
+int
+options(type, s)
+int type;			/* 'c' compile, 'e' exec */
+char *s;
+{
+	char *p;
+	int o = (type == 'c') ? copts : eopts;
+	char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
+
+	for (p = s; *p != '\0'; p++)
+		if (strchr(legal, *p) != NULL)
+			switch (*p) {
+			case 'b':
+				o &= ~REG_EXTENDED;
+				break;
+			case 'i':
+				o |= REG_ICASE;
+				break;
+			case 's':
+				o |= REG_NOSUB;
+				break;
+			case 'n':
+				o |= REG_NEWLINE;
+				break;
+			case 'm':
+				o &= ~REG_EXTENDED;
+				o |= REG_NOSPEC;
+				break;
+			case 'p':
+				o |= REG_PEND;
+				break;
+			case '^':
+				o |= REG_NOTBOL;
+				break;
+			case '$':
+				o |= REG_NOTEOL;
+				break;
+			case '#':
+				o |= REG_STARTEND;
+				break;
+			case 't':	/* trace */
+				o |= REG_TRACE;
+				break;
+			case 'l':	/* force long representation */
+				o |= REG_LARGE;
+				break;
+			case 'r':	/* force backref use */
+				o |= REG_BACKR;
+				break;
+			}
+	return(o);
+}
+
+/*
+ - opt - is a particular option in a regression string?
+ == int opt(int c, char *s);
+ */
+int				/* predicate */
+opt(c, s)
+int c;
+char *s;
+{
+	return(strchr(s, c) != NULL);
+}
+
+/*
+ - fixstr - transform magic characters in strings
+ == void fixstr(char *p);
+ */
+void
+fixstr(p)
+char *p;
+{
+	if (p == NULL)
+		return;
+
+	for (; *p != '\0'; p++)
+		if (*p == 'N')
+			*p = '\n';
+		else if (*p == 'T')
+			*p = '\t';
+		else if (*p == 'S')
+			*p = ' ';
+		else if (*p == 'Z')
+			*p = '\0';
+}
+
+/*
+ - check - check a substring match
+ == char *check(char *str, regmatch_t sub, char *should);
+ */
+char *				/* NULL or complaint */
+check(str, sub, should)
+char *str;
+regmatch_t sub;
+char *should;
+{
+	int len;
+	int shlen;
+	char *p;
+	static char grump[500];
+	char *at = NULL;
+
+	if (should != NULL && strcmp(should, "-") == 0)
+		should = NULL;
+	if (should != NULL && should[0] == '@') {
+		at = should + 1;
+		should = "";
+	}
+
+	/* check rm_so and rm_eo for consistency */
+	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
+				(sub.rm_so != -1 && sub.rm_eo == -1) ||
+				(sub.rm_so != -1 && sub.rm_so < 0) ||
+				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
+		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
+							(long)sub.rm_eo);
+		return(grump);
+	}
+
+	/* check for no match */
+	if (sub.rm_so == -1 && should == NULL)
+		return(NULL);
+	if (sub.rm_so == -1)
+		return("did not match");
+
+	/* check for in range */
+	if (sub.rm_eo > strlen(str)) {
+		sprintf(grump, "start %ld end %ld, past end of string",
+					(long)sub.rm_so, (long)sub.rm_eo);
+		return(grump);
+	}
+
+	len = (int)(sub.rm_eo - sub.rm_so);
+	shlen = (int)strlen(should);
+	p = str + sub.rm_so;
+
+	/* check for not supposed to match */
+	if (should == NULL) {
+		sprintf(grump, "matched `%.*s'", len, p);
+		return(grump);
+	}
+
+	/* check for wrong match */
+	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
+		sprintf(grump, "matched `%.*s' instead", len, p);
+		return(grump);
+	}
+	if (shlen > 0)
+		return(NULL);
+
+	/* check null match in right place */
+	if (at == NULL)
+		return(NULL);
+	shlen = strlen(at);
+	if (shlen == 0)
+		shlen = 1;	/* force check for end-of-string */
+	if (strncmp(p, at, shlen) != 0) {
+		sprintf(grump, "matched null at `%.20s'", p);
+		return(grump);
+	}
+	return(NULL);
+}
+
+/*
+ - eprint - convert error number to name
+ == static char *eprint(int err);
+ */
+static char *
+eprint(err)
+int err;
+{
+	static char epbuf[100];
+	size_t len;
+
+	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
+	assert(len <= sizeof(epbuf));
+	return(epbuf);
+}
+
+/*
+ - efind - convert error name to number
+ == static int efind(char *name);
+ */
+static int
+efind(name)
+char *name;
+{
+	static char efbuf[100];
+	size_t n;
+	regex_t re;
+
+	sprintf(efbuf, "REG_%s", name);
+	assert(strlen(efbuf) < sizeof(efbuf));
+	re.re_endp = efbuf;
+	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
+	return(atoi(efbuf));
+}

Added: user/gabor/tre-integration/lib/libc/regex/grot/mkh
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/gabor/tre-integration/lib/libc/regex/grot/mkh	Wed Jun 22 20:20:49 2011	(r223439)
@@ -0,0 +1,77 @@
+#! /bin/sh
+# mkh - pull headers out of C source
+# $FreeBSD: head/lib/libc/regex/grot/mkh 92971 2002-03-22 19:45:43Z obrien $
+PATH=/bin:/usr/bin ; export PATH
+
+# egrep pattern to pick out marked lines
+egrep='^ =([ 	]|$)'
+
+# Sed program to process marked lines into lines for the header file.
+# The markers have already been removed.  Two things are done here:  removal
+# of backslashed newlines, and some fudging of comments.  The first is done
+# because -o needs to have prototypes on one line to strip them down.
+# Getting comments into the output is tricky; we turn C++-style // comments
+# into /* */ comments, after altering any existing */'s to avoid trouble.
+peel='	/\\$/N
+	/\\\n[ 	]*/s///g
+	/\/\//s;\*/;* /;g
+	/\/\//s;//\(.*\);/*\1 */;'
+
+for a
+do
+	case "$a" in
+	-o)	# old (pre-function-prototype) compiler
+		# add code to comment out argument lists
+		peel="$peel
+			"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);'
+		shift
+		;;
+	-b)	# funny Berkeley __P macro
+		peel="$peel
+			"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));'
+		shift
+		;;
+	-s)	# compiler doesn't like `static foo();'
+		# add code to get rid of the `static'
+		peel="$peel
+			"'/^static[ 	][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;'
+		shift
+		;;
+	-p)	# private declarations
+		egrep='^ ==([ 	]|$)'
+		shift
+		;;
+	-i)	# wrap in #ifndef, argument is name
+		ifndef="$2"
+		shift ; shift
+		;;
+	*)	break
+		;;
+	esac
+done
+
+if test " $ifndef" != " "
+then
+	echo "#ifndef $ifndef"
+	echo "#define	$ifndef	/* never again */"
+fi
+echo "/* ========= begin header generated by $0 ========= */"
+echo '#ifdef __cplusplus'
+echo 'extern "C" {'
+echo '#endif'
+for f
+do
+	echo
+	echo "/* === $f === */"
+	egrep "$egrep" $f | sed 's/^ ==*[ 	]//;s/^ ==*$//' | sed "$peel"
+	echo
+done
+echo '#ifdef __cplusplus'
+echo '}'
+echo '#endif'
+echo "/* ========= end header generated by $0 ========= */"
+if test " $ifndef" != " "
+then
+	echo "#endif"
+fi
+exit 0

Added: user/gabor/tre-integration/lib/libc/regex/grot/split.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/gabor/tre-integration/lib/libc/regex/grot/split.c	Wed Jun 22 20:20:49 2011	(r223439)
@@ -0,0 +1,319 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <string.h>
+
+/*
+ - split - divide a string into fields, like awk split()
+ = int split(char *string, char *fields[], int nfields, char *sep);
+ */
+int				/* number of fields, including overflow */
+split(string, fields, nfields, sep)
+char *string;
+char *fields[];			/* list is not NULL-terminated */
+int nfields;			/* number of entries available in fields[] */
+char *sep;			/* "" white, "c" single char, "ab" [ab]+ */
+{
+	char *p = string;
+	char c;			/* latest character */
+	char sepc = sep[0];
+	char sepc2;
+	int fn;
+	char **fp = fields;
+	char *sepp;
+	int trimtrail;
+
+	/* white space */
+	if (sepc == '\0') {
+		while ((c = *p++) == ' ' || c == '\t')
+			continue;
+		p--;
+		trimtrail = 1;
+		sep = " \t";	/* note, code below knows this is 2 long */
+		sepc = ' ';
+	} else
+		trimtrail = 0;
+	sepc2 = sep[1];		/* now we can safely pick this up */
+
+	/* catch empties */
+	if (*p == '\0')
+		return(0);
+
+	/* single separator */
+	if (sepc2 == '\0') {
+		fn = nfields;
+		for (;;) {
+			*fp++ = p;
+			fn--;
+			if (fn == 0)
+				break;
+			while ((c = *p++) != sepc)
+				if (c == '\0')
+					return(nfields - fn);
+			*(p-1) = '\0';
+		}
+		/* we have overflowed the fields vector -- just count them */
+		fn = nfields;
+		for (;;) {
+			while ((c = *p++) != sepc)
+				if (c == '\0')
+					return(fn);
+			fn++;
+		}
+		/* not reached */
+	}
+
+	/* two separators */
+	if (sep[2] == '\0') {
+		fn = nfields;
+		for (;;) {
+			*fp++ = p;
+			fn--;
+			while ((c = *p++) != sepc && c != sepc2)
+				if (c == '\0') {
+					if (trimtrail && **(fp-1) == '\0')
+						fn++;
+					return(nfields - fn);
+				}
+			if (fn == 0)
+				break;
+			*(p-1) = '\0';
+			while ((c = *p++) == sepc || c == sepc2)
+				continue;
+			p--;
+		}

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***