svn commit: r223439 - user/gabor/tre-integration/lib/libc/regex/grot
Gabor Kovesdan
gabor at FreeBSD.org
Wed Jun 22 20:20:50 UTC 2011
Author: gabor
Date: Wed Jun 22 20:20:49 2011
New Revision: 223439
URL: http://svn.freebsd.org/changeset/base/223439
Log:
- Readd regression tests from previous regex code; probably it will be
still useful
Added:
user/gabor/tre-integration/lib/libc/regex/grot/
user/gabor/tre-integration/lib/libc/regex/grot/Makefile (contents, props changed)
user/gabor/tre-integration/lib/libc/regex/grot/debug.c (contents, props changed)
user/gabor/tre-integration/lib/libc/regex/grot/main.c (contents, props changed)
user/gabor/tre-integration/lib/libc/regex/grot/mkh (contents, props changed)
user/gabor/tre-integration/lib/libc/regex/grot/split.c (contents, props changed)
user/gabor/tre-integration/lib/libc/regex/grot/tests
Added: user/gabor/tre-integration/lib/libc/regex/grot/Makefile
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/gabor/tre-integration/lib/libc/regex/grot/Makefile Wed Jun 22 20:20:49 2011 (r223439)
@@ -0,0 +1,98 @@
+# $FreeBSD$
+# You probably want to take -DREDEBUG out of CFLAGS, and put something like
+# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
+# internal assertion checking). Take -Dconst= out for an ANSI compiler.
+# Do not take -DPOSIX_MISTAKE out. REGCFLAGS isn't important to you (it's
+# for my use in some special contexts).
+
+PATHS= ${.CURDIR}/.. ${.CURDIR}/../../locale ${.CURDIR}/../../../../include
+.PATH: ${PATHS}
+
+CFLAGS+= -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
+.for incpath in ${PATHS}
+CFLAGS+= -I${incpath}
+.endfor
+
+# If you have an ANSI compiler, take -o out of MKHFLAGS. If you want
+# the Berkeley __P macro, put -b in.
+MKHFLAGS =
+
+LDFLAGS =
+
+# If you have an ANSI environment, take limits.h and stdlib.h out of
+# HMISSING and take memmove out of SRCMISSING and OBJMISSING.
+HMISSING =
+SRCMISSING = split.c
+OBJMISSING = split.o
+H = cname.h regex2.h utils.h $(HMISSING)
+REGSRC = regcomp.c regerror.c regexec.c regfree.c engine.c
+SRC = $(REGSRC) debug.c main.c $(SRCMISSING)
+
+# Internal stuff, should not need changing.
+OBJPRODN = regcomp.o regexec.o regerror.o regfree.o
+OBJS = $(OBJPRODN) debug.o main.o $(OBJMISSING)
+
+# Stuff that matters only if you're trying to lint the package.
+LINTFLAGS = -I. -Dstatic= -Dconst= -DREDEBUG
+LINTC = regcomp.c regexec.c regerror.c regfree.c debug.c main.c $(SRCMISSING)
+JUNKLINT =possible pointer alignment|null effect
+
+.SUFFIXES: .ih .h
+.c.ih:
+ sh mkh $(MKHFLAGS) -p $< >$@
+
+default: r
+
+re: $(OBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) -o $@
+
+o: $(OBJPRODN)
+
+REGEXHSRC = ../regex2.h ../reg*.c
+h: $(REGEXHSRC)
+ sh mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp
+ cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
+ rm -f regex.tmp
+
+regex.h: h
+
+regcomp.o regexec.o regfree.o debug.o: utils.h regex.h regex2.h
+regcomp.o: cname.h regcomp.ih
+regexec.o: engine.c engine.ih
+regerror.o: regerror.ih
+regerror.o: utils.h
+debug.o: debug.ih
+main.o: main.ih
+
+r: re tests
+ ./re <tests
+ ./re -el <tests
+ ./re -er <tests
+
+ra: ./re tests
+ -./re <tests
+ -./re -el <tests
+ -./re -er <tests
+
+rx: ./re tests
+ ./re -x <tests
+ ./re -x -el <tests
+ ./re -x -er <tests
+
+t: ./re tests
+ -time ./re <tests
+ -time ./re -cs <tests
+ -time ./re -el <tests
+ -time ./re -cs -el <tests
+
+l: $(LINTC)
+ lint $(LINTFLAGS) -h $(LINTC) 2>&1 | egrep -v '$(JUNKLINT)' | tee lint
+
+clean: tidy
+ rm -f *.o *.s *.ih re
+
+tidy:
+ rm -f junk* core regex.tmp lint
+
+spotless: clean
+ rm -f regex.h
Added: user/gabor/tre-integration/lib/libc/regex/grot/debug.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/gabor/tre-integration/lib/libc/regex/grot/debug.c Wed Jun 22 20:20:49 2011 (r223439)
@@ -0,0 +1,212 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <regex.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "utils.h"
+#include "regex2.h"
+#include "debug.ih"
+
+/*
+ - regprint - print a regexp for debugging
+ == void regprint(regex_t *r, FILE *d);
+ */
+void
+regprint(r, d)
+regex_t *r;
+FILE *d;
+{
+ struct re_guts *g = r->re_g;
+ int i;
+ int c;
+ int last;
+
+ fprintf(d, "%ld states", (long)g->nstates);
+ fprintf(d, ", first %ld last %ld", (long)g->firststate,
+ (long)g->laststate);
+ if (g->iflags&USEBOL)
+ fprintf(d, ", USEBOL");
+ if (g->iflags&USEEOL)
+ fprintf(d, ", USEEOL");
+ if (g->iflags&BAD)
+ fprintf(d, ", BAD");
+ if (g->nsub > 0)
+ fprintf(d, ", nsub=%ld", (long)g->nsub);
+ if (g->must != NULL)
+ fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen,
+ g->must);
+ if (g->backrefs)
+ fprintf(d, ", backrefs");
+ if (g->nplus > 0)
+ fprintf(d, ", nplus %ld", (long)g->nplus);
+ fprintf(d, "\n");
+ s_print(g, d);
+}
+
+/*
+ - s_print - print the strip for debugging
+ == static void s_print(struct re_guts *g, FILE *d);
+ */
+static void
+s_print(g, d)
+struct re_guts *g;
+FILE *d;
+{
+ sop *s;
+ cset *cs;
+ int i;
+ int done = 0;
+ sop opnd;
+ int col = 0;
+ int last;
+ sopno offset = 2;
+# define GAP() { if (offset % 5 == 0) { \
+ if (col > 40) { \
+ fprintf(d, "\n\t"); \
+ col = 0; \
+ } else { \
+ fprintf(d, " "); \
+ col++; \
+ } \
+ } else \
+ col++; \
+ offset++; \
+ }
+
+ if (OP(g->strip[0]) != OEND)
+ fprintf(d, "missing initial OEND!\n");
+ for (s = &g->strip[1]; !done; s++) {
+ opnd = OPND(*s);
+ switch (OP(*s)) {
+ case OEND:
+ fprintf(d, "\n");
+ done = 1;
+ break;
+ case OCHAR:
+ if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
+ fprintf(d, "\\%c", (char)opnd);
+ else
+ fprintf(d, "%s", regchar((char)opnd));
+ break;
+ case OBOL:
+ fprintf(d, "^");
+ break;
+ case OEOL:
+ fprintf(d, "$");
+ break;
+ case OBOW:
+ fprintf(d, "\\{");
+ break;
+ case OEOW:
+ fprintf(d, "\\}");
+ break;
+ case OANY:
+ fprintf(d, ".");
+ break;
+ case OANYOF:
+ fprintf(d, "[(%ld)", (long)opnd);
+#if 0
+ cs = &g->sets[opnd];
+ last = -1;
+ for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */
+ if (CHIN(cs, i) && i < g->csetsize) {
+ if (last < 0) {
+ fprintf(d, "%s", regchar(i));
+ last = i;
+ }
+ } else {
+ if (last >= 0) {
+ if (last != i-1)
+ fprintf(d, "-%s",
+ regchar(i-1));
+ last = -1;
+ }
+ }
+#endif
+ fprintf(d, "]");
+ break;
+ case OBACK_:
+ fprintf(d, "(\\<%ld>", (long)opnd);
+ break;
+ case O_BACK:
+ fprintf(d, "<%ld>\\)", (long)opnd);
+ break;
+ case OPLUS_:
+ fprintf(d, "(+");
+ if (OP(*(s+opnd)) != O_PLUS)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case O_PLUS:
+ if (OP(*(s-opnd)) != OPLUS_)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, "+)");
+ break;
+ case OQUEST_:
+ fprintf(d, "(?");
+ if (OP(*(s+opnd)) != O_QUEST)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case O_QUEST:
+ if (OP(*(s-opnd)) != OQUEST_)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, "?)");
+ break;
+ case OLPAREN:
+ fprintf(d, "((<%ld>", (long)opnd);
+ break;
+ case ORPAREN:
+ fprintf(d, "<%ld>))", (long)opnd);
+ break;
+ case OCH_:
+ fprintf(d, "<");
+ if (OP(*(s+opnd)) != OOR2)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case OOR1:
+ if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, "|");
+ break;
+ case OOR2:
+ fprintf(d, "|");
+ if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case O_CH:
+ if (OP(*(s-opnd)) != OOR1)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, ">");
+ break;
+ default:
+ fprintf(d, "!%d(%d)!", OP(*s), opnd);
+ break;
+ }
+ if (!done)
+ GAP();
+ }
+}
+
+/*
+ - regchar - make a character printable
+ == static char *regchar(int ch);
+ */
+static char * /* -> representation */
+regchar(ch)
+int ch;
+{
+ static char buf[10];
+
+ if (isprint(ch) || ch == ' ')
+ sprintf(buf, "%c", ch);
+ else
+ sprintf(buf, "\\%o", ch);
+ return(buf);
+}
Added: user/gabor/tre-integration/lib/libc/regex/grot/main.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/gabor/tre-integration/lib/libc/regex/grot/main.c Wed Jun 22 20:20:49 2011 (r223439)
@@ -0,0 +1,513 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <regex.h>
+#include <assert.h>
+
+#include "main.ih"
+
+char *progname;
+int debug = 0;
+int line = 0;
+int status = 0;
+
+int copts = REG_EXTENDED;
+int eopts = 0;
+regoff_t startoff = 0;
+regoff_t endoff = 0;
+
+
+extern int split();
+extern void regprint();
+
+/*
+ - main - do the simple case, hand off to regress() for regression
+ */
+main(argc, argv)
+int argc;
+char *argv[];
+{
+ regex_t re;
+# define NS 10
+ regmatch_t subs[NS];
+ char erbuf[100];
+ int err;
+ size_t len;
+ int c;
+ int errflg = 0;
+ int i;
+ extern int optind;
+ extern char *optarg;
+
+ progname = argv[0];
+
+ while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
+ switch (c) {
+ case 'c': /* compile options */
+ copts = options('c', optarg);
+ break;
+ case 'e': /* execute options */
+ eopts = options('e', optarg);
+ break;
+ case 'S': /* start offset */
+ startoff = (regoff_t)atoi(optarg);
+ break;
+ case 'E': /* end offset */
+ endoff = (regoff_t)atoi(optarg);
+ break;
+ case 'x': /* Debugging. */
+ debug++;
+ break;
+ case '?':
+ default:
+ errflg++;
+ break;
+ }
+ if (errflg) {
+ fprintf(stderr, "usage: %s ", progname);
+ fprintf(stderr, "[-c copt][-C][-d] [re]\n");
+ exit(2);
+ }
+
+ if (optind >= argc) {
+ regress(stdin);
+ exit(status);
+ }
+
+ err = regcomp(&re, argv[optind++], copts);
+ if (err) {
+ len = regerror(err, &re, erbuf, sizeof(erbuf));
+ fprintf(stderr, "error %s, %d/%d `%s'\n",
+ eprint(err), len, sizeof(erbuf), erbuf);
+ exit(status);
+ }
+ regprint(&re, stdout);
+
+ if (optind >= argc) {
+ regfree(&re);
+ exit(status);
+ }
+
+ if (eopts®_STARTEND) {
+ subs[0].rm_so = startoff;
+ subs[0].rm_eo = strlen(argv[optind]) - endoff;
+ }
+ err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
+ if (err) {
+ len = regerror(err, &re, erbuf, sizeof(erbuf));
+ fprintf(stderr, "error %s, %d/%d `%s'\n",
+ eprint(err), len, sizeof(erbuf), erbuf);
+ exit(status);
+ }
+ if (!(copts®_NOSUB)) {
+ len = (int)(subs[0].rm_eo - subs[0].rm_so);
+ if (subs[0].rm_so != -1) {
+ if (len != 0)
+ printf("match `%.*s'\n", len,
+ argv[optind] + subs[0].rm_so);
+ else
+ printf("match `'@%.1s\n",
+ argv[optind] + subs[0].rm_so);
+ }
+ for (i = 1; i < NS; i++)
+ if (subs[i].rm_so != -1)
+ printf("(%d) `%.*s'\n", i,
+ (int)(subs[i].rm_eo - subs[i].rm_so),
+ argv[optind] + subs[i].rm_so);
+ }
+ exit(status);
+}
+
+/*
+ - regress - main loop of regression test
+ == void regress(FILE *in);
+ */
+void
+regress(in)
+FILE *in;
+{
+ char inbuf[1000];
+# define MAXF 10
+ char *f[MAXF];
+ int nf;
+ int i;
+ char erbuf[100];
+ size_t ne;
+ char *badpat = "invalid regular expression";
+# define SHORT 10
+ char *bpname = "REG_BADPAT";
+ regex_t re;
+
+ while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
+ line++;
+ if (inbuf[0] == '#' || inbuf[0] == '\n')
+ continue; /* NOTE CONTINUE */
+ inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
+ if (debug)
+ fprintf(stdout, "%d:\n", line);
+ nf = split(inbuf, f, MAXF, "\t\t");
+ if (nf < 3) {
+ fprintf(stderr, "bad input, line %d\n", line);
+ exit(1);
+ }
+ for (i = 0; i < nf; i++)
+ if (strcmp(f[i], "\"\"") == 0)
+ f[i] = "";
+ if (nf <= 3)
+ f[3] = NULL;
+ if (nf <= 4)
+ f[4] = NULL;
+ try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
+ if (opt('&', f[1])) /* try with either type of RE */
+ try(f[0], f[1], f[2], f[3], f[4],
+ options('c', f[1]) &~ REG_EXTENDED);
+ }
+
+ ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
+ if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
+ fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
+ erbuf, badpat);
+ status = 1;
+ }
+ ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
+ if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
+ ne != strlen(badpat)+1) {
+ fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
+ erbuf, SHORT-1, badpat);
+ status = 1;
+ }
+ ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
+ if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
+ fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
+ erbuf, bpname);
+ status = 1;
+ }
+ re.re_endp = bpname;
+ ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
+ if (atoi(erbuf) != (int)REG_BADPAT) {
+ fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
+ erbuf, (long)REG_BADPAT);
+ status = 1;
+ } else if (ne != strlen(erbuf)+1) {
+ fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
+ erbuf, (long)REG_BADPAT);
+ status = 1;
+ }
+}
+
+/*
+ - try - try it, and report on problems
+ == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
+ */
+void
+try(f0, f1, f2, f3, f4, opts)
+char *f0;
+char *f1;
+char *f2;
+char *f3;
+char *f4;
+int opts; /* may not match f1 */
+{
+ regex_t re;
+# define NSUBS 10
+ regmatch_t subs[NSUBS];
+# define NSHOULD 15
+ char *should[NSHOULD];
+ int nshould;
+ char erbuf[100];
+ int err;
+ int len;
+ char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
+ int i;
+ char *grump;
+ char f0copy[1000];
+ char f2copy[1000];
+
+ strcpy(f0copy, f0);
+ re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
+ fixstr(f0copy);
+ err = regcomp(&re, f0copy, opts);
+ if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
+ /* unexpected error or wrong error */
+ len = regerror(err, &re, erbuf, sizeof(erbuf));
+ fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
+ line, type, eprint(err), len,
+ sizeof(erbuf), erbuf);
+ status = 1;
+ } else if (err == 0 && opt('C', f1)) {
+ /* unexpected success */
+ fprintf(stderr, "%d: %s should have given REG_%s\n",
+ line, type, f2);
+ status = 1;
+ err = 1; /* so we won't try regexec */
+ }
+
+ if (err != 0) {
+ regfree(&re);
+ return;
+ }
+
+ strcpy(f2copy, f2);
+ fixstr(f2copy);
+
+ if (options('e', f1)®_STARTEND) {
+ if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
+ fprintf(stderr, "%d: bad STARTEND syntax\n", line);
+ subs[0].rm_so = strchr(f2, '(') - f2 + 1;
+ subs[0].rm_eo = strchr(f2, ')') - f2;
+ }
+ err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
+
+ if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
+ /* unexpected error or wrong error */
+ len = regerror(err, &re, erbuf, sizeof(erbuf));
+ fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
+ line, type, eprint(err), len,
+ sizeof(erbuf), erbuf);
+ status = 1;
+ } else if (err != 0) {
+ /* nothing more to check */
+ } else if (f3 == NULL) {
+ /* unexpected success */
+ fprintf(stderr, "%d: %s exec should have failed\n",
+ line, type);
+ status = 1;
+ err = 1; /* just on principle */
+ } else if (opts®_NOSUB) {
+ /* nothing more to check */
+ } else if ((grump = check(f2, subs[0], f3)) != NULL) {
+ fprintf(stderr, "%d: %s %s\n", line, type, grump);
+ status = 1;
+ err = 1;
+ }
+
+ if (err != 0 || f4 == NULL) {
+ regfree(&re);
+ return;
+ }
+
+ for (i = 1; i < NSHOULD; i++)
+ should[i] = NULL;
+ nshould = split(f4, should+1, NSHOULD-1, ",");
+ if (nshould == 0) {
+ nshould = 1;
+ should[1] = "";
+ }
+ for (i = 1; i < NSUBS; i++) {
+ grump = check(f2, subs[i], should[i]);
+ if (grump != NULL) {
+ fprintf(stderr, "%d: %s $%d %s\n", line,
+ type, i, grump);
+ status = 1;
+ err = 1;
+ }
+ }
+
+ regfree(&re);
+}
+
+/*
+ - options - pick options out of a regression-test string
+ == int options(int type, char *s);
+ */
+int
+options(type, s)
+int type; /* 'c' compile, 'e' exec */
+char *s;
+{
+ char *p;
+ int o = (type == 'c') ? copts : eopts;
+ char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
+
+ for (p = s; *p != '\0'; p++)
+ if (strchr(legal, *p) != NULL)
+ switch (*p) {
+ case 'b':
+ o &= ~REG_EXTENDED;
+ break;
+ case 'i':
+ o |= REG_ICASE;
+ break;
+ case 's':
+ o |= REG_NOSUB;
+ break;
+ case 'n':
+ o |= REG_NEWLINE;
+ break;
+ case 'm':
+ o &= ~REG_EXTENDED;
+ o |= REG_NOSPEC;
+ break;
+ case 'p':
+ o |= REG_PEND;
+ break;
+ case '^':
+ o |= REG_NOTBOL;
+ break;
+ case '$':
+ o |= REG_NOTEOL;
+ break;
+ case '#':
+ o |= REG_STARTEND;
+ break;
+ case 't': /* trace */
+ o |= REG_TRACE;
+ break;
+ case 'l': /* force long representation */
+ o |= REG_LARGE;
+ break;
+ case 'r': /* force backref use */
+ o |= REG_BACKR;
+ break;
+ }
+ return(o);
+}
+
+/*
+ - opt - is a particular option in a regression string?
+ == int opt(int c, char *s);
+ */
+int /* predicate */
+opt(c, s)
+int c;
+char *s;
+{
+ return(strchr(s, c) != NULL);
+}
+
+/*
+ - fixstr - transform magic characters in strings
+ == void fixstr(char *p);
+ */
+void
+fixstr(p)
+char *p;
+{
+ if (p == NULL)
+ return;
+
+ for (; *p != '\0'; p++)
+ if (*p == 'N')
+ *p = '\n';
+ else if (*p == 'T')
+ *p = '\t';
+ else if (*p == 'S')
+ *p = ' ';
+ else if (*p == 'Z')
+ *p = '\0';
+}
+
+/*
+ - check - check a substring match
+ == char *check(char *str, regmatch_t sub, char *should);
+ */
+char * /* NULL or complaint */
+check(str, sub, should)
+char *str;
+regmatch_t sub;
+char *should;
+{
+ int len;
+ int shlen;
+ char *p;
+ static char grump[500];
+ char *at = NULL;
+
+ if (should != NULL && strcmp(should, "-") == 0)
+ should = NULL;
+ if (should != NULL && should[0] == '@') {
+ at = should + 1;
+ should = "";
+ }
+
+ /* check rm_so and rm_eo for consistency */
+ if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
+ (sub.rm_so != -1 && sub.rm_eo == -1) ||
+ (sub.rm_so != -1 && sub.rm_so < 0) ||
+ (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
+ sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
+ (long)sub.rm_eo);
+ return(grump);
+ }
+
+ /* check for no match */
+ if (sub.rm_so == -1 && should == NULL)
+ return(NULL);
+ if (sub.rm_so == -1)
+ return("did not match");
+
+ /* check for in range */
+ if (sub.rm_eo > strlen(str)) {
+ sprintf(grump, "start %ld end %ld, past end of string",
+ (long)sub.rm_so, (long)sub.rm_eo);
+ return(grump);
+ }
+
+ len = (int)(sub.rm_eo - sub.rm_so);
+ shlen = (int)strlen(should);
+ p = str + sub.rm_so;
+
+ /* check for not supposed to match */
+ if (should == NULL) {
+ sprintf(grump, "matched `%.*s'", len, p);
+ return(grump);
+ }
+
+ /* check for wrong match */
+ if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
+ sprintf(grump, "matched `%.*s' instead", len, p);
+ return(grump);
+ }
+ if (shlen > 0)
+ return(NULL);
+
+ /* check null match in right place */
+ if (at == NULL)
+ return(NULL);
+ shlen = strlen(at);
+ if (shlen == 0)
+ shlen = 1; /* force check for end-of-string */
+ if (strncmp(p, at, shlen) != 0) {
+ sprintf(grump, "matched null at `%.20s'", p);
+ return(grump);
+ }
+ return(NULL);
+}
+
+/*
+ - eprint - convert error number to name
+ == static char *eprint(int err);
+ */
+static char *
+eprint(err)
+int err;
+{
+ static char epbuf[100];
+ size_t len;
+
+ len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
+ assert(len <= sizeof(epbuf));
+ return(epbuf);
+}
+
+/*
+ - efind - convert error name to number
+ == static int efind(char *name);
+ */
+static int
+efind(name)
+char *name;
+{
+ static char efbuf[100];
+ size_t n;
+ regex_t re;
+
+ sprintf(efbuf, "REG_%s", name);
+ assert(strlen(efbuf) < sizeof(efbuf));
+ re.re_endp = efbuf;
+ (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
+ return(atoi(efbuf));
+}
Added: user/gabor/tre-integration/lib/libc/regex/grot/mkh
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/gabor/tre-integration/lib/libc/regex/grot/mkh Wed Jun 22 20:20:49 2011 (r223439)
@@ -0,0 +1,77 @@
+#! /bin/sh
+# mkh - pull headers out of C source
+# $FreeBSD: head/lib/libc/regex/grot/mkh 92971 2002-03-22 19:45:43Z obrien $
+PATH=/bin:/usr/bin ; export PATH
+
+# egrep pattern to pick out marked lines
+egrep='^ =([ ]|$)'
+
+# Sed program to process marked lines into lines for the header file.
+# The markers have already been removed. Two things are done here: removal
+# of backslashed newlines, and some fudging of comments. The first is done
+# because -o needs to have prototypes on one line to strip them down.
+# Getting comments into the output is tricky; we turn C++-style // comments
+# into /* */ comments, after altering any existing */'s to avoid trouble.
+peel=' /\\$/N
+ /\\\n[ ]*/s///g
+ /\/\//s;\*/;* /;g
+ /\/\//s;//\(.*\);/*\1 */;'
+
+for a
+do
+ case "$a" in
+ -o) # old (pre-function-prototype) compiler
+ # add code to comment out argument lists
+ peel="$peel
+ "'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);'
+ shift
+ ;;
+ -b) # funny Berkeley __P macro
+ peel="$peel
+ "'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));'
+ shift
+ ;;
+ -s) # compiler doesn't like `static foo();'
+ # add code to get rid of the `static'
+ peel="$peel
+ "'/^static[ ][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;'
+ shift
+ ;;
+ -p) # private declarations
+ egrep='^ ==([ ]|$)'
+ shift
+ ;;
+ -i) # wrap in #ifndef, argument is name
+ ifndef="$2"
+ shift ; shift
+ ;;
+ *) break
+ ;;
+ esac
+done
+
+if test " $ifndef" != " "
+then
+ echo "#ifndef $ifndef"
+ echo "#define $ifndef /* never again */"
+fi
+echo "/* ========= begin header generated by $0 ========= */"
+echo '#ifdef __cplusplus'
+echo 'extern "C" {'
+echo '#endif'
+for f
+do
+ echo
+ echo "/* === $f === */"
+ egrep "$egrep" $f | sed 's/^ ==*[ ]//;s/^ ==*$//' | sed "$peel"
+ echo
+done
+echo '#ifdef __cplusplus'
+echo '}'
+echo '#endif'
+echo "/* ========= end header generated by $0 ========= */"
+if test " $ifndef" != " "
+then
+ echo "#endif"
+fi
+exit 0
Added: user/gabor/tre-integration/lib/libc/regex/grot/split.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/gabor/tre-integration/lib/libc/regex/grot/split.c Wed Jun 22 20:20:49 2011 (r223439)
@@ -0,0 +1,319 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <string.h>
+
+/*
+ - split - divide a string into fields, like awk split()
+ = int split(char *string, char *fields[], int nfields, char *sep);
+ */
+int /* number of fields, including overflow */
+split(string, fields, nfields, sep)
+char *string;
+char *fields[]; /* list is not NULL-terminated */
+int nfields; /* number of entries available in fields[] */
+char *sep; /* "" white, "c" single char, "ab" [ab]+ */
+{
+ char *p = string;
+ char c; /* latest character */
+ char sepc = sep[0];
+ char sepc2;
+ int fn;
+ char **fp = fields;
+ char *sepp;
+ int trimtrail;
+
+ /* white space */
+ if (sepc == '\0') {
+ while ((c = *p++) == ' ' || c == '\t')
+ continue;
+ p--;
+ trimtrail = 1;
+ sep = " \t"; /* note, code below knows this is 2 long */
+ sepc = ' ';
+ } else
+ trimtrail = 0;
+ sepc2 = sep[1]; /* now we can safely pick this up */
+
+ /* catch empties */
+ if (*p == '\0')
+ return(0);
+
+ /* single separator */
+ if (sepc2 == '\0') {
+ fn = nfields;
+ for (;;) {
+ *fp++ = p;
+ fn--;
+ if (fn == 0)
+ break;
+ while ((c = *p++) != sepc)
+ if (c == '\0')
+ return(nfields - fn);
+ *(p-1) = '\0';
+ }
+ /* we have overflowed the fields vector -- just count them */
+ fn = nfields;
+ for (;;) {
+ while ((c = *p++) != sepc)
+ if (c == '\0')
+ return(fn);
+ fn++;
+ }
+ /* not reached */
+ }
+
+ /* two separators */
+ if (sep[2] == '\0') {
+ fn = nfields;
+ for (;;) {
+ *fp++ = p;
+ fn--;
+ while ((c = *p++) != sepc && c != sepc2)
+ if (c == '\0') {
+ if (trimtrail && **(fp-1) == '\0')
+ fn++;
+ return(nfields - fn);
+ }
+ if (fn == 0)
+ break;
+ *(p-1) = '\0';
+ while ((c = *p++) == sepc || c == sepc2)
+ continue;
+ p--;
+ }
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-user
mailing list