svn commit: r226035 - in head/usr.bin/grep: . regex

Gabor Kovesdan gabor at FreeBSD.org
Wed Oct 5 09:56:43 UTC 2011


Author: gabor
Date: Wed Oct  5 09:56:43 2011
New Revision: 226035
URL: http://svn.freebsd.org/changeset/base/226035

Log:
  Update BSD grep to the latest development version.  It has some code
  backported that was written for the TRE integration project in Google
  Summer of Code 2011.  This is a temporary solution until the whole
  regex library is not replaced so that BSD grep development can continue
  and the backported code gets some review and testing.  This change only
  improves scalability slightly, there is no big performance boost yet
  but several minor bugs have been found and fixed.
  
  Approved by:	delphij (mentor)
  Sposored by:	Google Summer of Code 2011
  MFC after:	1 week

Added:
  head/usr.bin/grep/regex/
  head/usr.bin/grep/regex/fastmatch.c   (contents, props changed)
  head/usr.bin/grep/regex/fastmatch.h   (contents, props changed)
  head/usr.bin/grep/regex/glue.h   (contents, props changed)
  head/usr.bin/grep/regex/hashtable.c   (contents, props changed)
  head/usr.bin/grep/regex/hashtable.h   (contents, props changed)
  head/usr.bin/grep/regex/tre-compile.c   (contents, props changed)
  head/usr.bin/grep/regex/tre-fastmatch.c   (contents, props changed)
  head/usr.bin/grep/regex/tre-fastmatch.h   (contents, props changed)
  head/usr.bin/grep/regex/xmalloc.c   (contents, props changed)
  head/usr.bin/grep/regex/xmalloc.h   (contents, props changed)
Deleted:
  head/usr.bin/grep/fastgrep.c
Modified:
  head/usr.bin/grep/Makefile
  head/usr.bin/grep/file.c
  head/usr.bin/grep/grep.c
  head/usr.bin/grep/grep.h
  head/usr.bin/grep/util.c

Modified: head/usr.bin/grep/Makefile
==============================================================================
--- head/usr.bin/grep/Makefile	Wed Oct  5 08:33:50 2011	(r226034)
+++ head/usr.bin/grep/Makefile	Wed Oct  5 09:56:43 2011	(r226035)
@@ -8,28 +8,52 @@
 PROG=	grep
 .else
 PROG=	bsdgrep
+CLEANFILES+= bsdgrep.1
+
+bsdgrep.1: grep.1
+	cp ${.ALLSRC} ${.TARGET}
 .endif
-SRCS=	fastgrep.c file.c grep.c queue.c util.c
+SRCS=	file.c grep.c queue.c util.c
+
+# Extra files ported backported form some regex improvements
+.PATH: ${.CURDIR}/regex
+SRCS+=	fastmatch.c hashtable.c tre-compile.c tre-fastmatch.c xmalloc.c
+CFLAGS+=-I${.CURDIR}/regex
 
 .if ${MK_BSD_GREP} == "yes"
 LINKS=	${BINDIR}/grep ${BINDIR}/egrep \
 	${BINDIR}/grep ${BINDIR}/fgrep \
 	${BINDIR}/grep ${BINDIR}/zgrep \
 	${BINDIR}/grep ${BINDIR}/zegrep \
-	${BINDIR}/grep ${BINDIR}/zfgrep
+	${BINDIR}/grep ${BINDIR}/zfgrep \
+	${BINDIR}/grep ${BINDIR}/bzgrep \
+	${BINDIR}/grep ${BINDIR}/bzegrep \
+	${BINDIR}/grep ${BINDIR}/bzfgrep \
+	${BINDIR}/grep ${BINDIR}/xzgrep \
+	${BINDIR}/grep ${BINDIR}/xzegrep \
+	${BINDIR}/grep ${BINDIR}/xzfgrep \
+	${BINDIR}/grep ${BINDIR}/lzgrep \
+	${BINDIR}/grep ${BINDIR}/lzegrep \
+	${BINDIR}/grep ${BINDIR}/lzfgrep
 
 MLINKS= grep.1 egrep.1 \
 	grep.1 fgrep.1 \
 	grep.1 zgrep.1 \
 	grep.1 zegrep.1 \
-	grep.1 zfgrep.1
+	grep.1 zfgrep.1 \
+	grep.1 bzgrep.1 \
+	grep.1 bzegrep.1 \
+	grep.1 bzfgrep.1 \
+	grep.1 xzgrep.1 \
+	grep.1 xzegrep.1 \
+	grep.1 xzfgrep.1 \
+	grep.1 lzgrep.1 \
+	grep.1 lzegrep.1 \
+	grep.1 lzfgrep.1
 .endif
 
-bsdgrep.1: grep.1
-	cp ${.ALLSRC} ${.TARGET}
-
-LDADD=	-lz -lbz2
-DPADD=	${LIBZ} ${LIBBZ2}
+LDADD=	-lz -lbz2 -llzma
+DPADD=	${LIBZ} ${LIBBZ2} ${LIBLZMA}
 
 .if !defined(WITHOUT_GNU_COMPAT)
 CFLAGS+= -I/usr/include/gnu

Modified: head/usr.bin/grep/file.c
==============================================================================
--- head/usr.bin/grep/file.c	Wed Oct  5 08:33:50 2011	(r226034)
+++ head/usr.bin/grep/file.c	Wed Oct  5 09:56:43 2011	(r226035)
@@ -34,13 +34,15 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
-#include <sys/types.h>
+#include <sys/mman.h>
 #include <sys/stat.h>
+#include <sys/types.h>
 
 #include <bzlib.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <lzma.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
@@ -56,10 +58,12 @@ __FBSDID("$FreeBSD$");
 
 static gzFile gzbufdesc;
 static BZFILE* bzbufdesc;
+static lzma_stream lstrm = LZMA_STREAM_INIT;
 
-static unsigned char buffer[MAXBUFSIZ];
+static unsigned char *buffer;
 static unsigned char *bufpos;
 static size_t bufrem;
+static size_t fsiz;
 
 static unsigned char *lnbuf;
 static size_t lnbuflen;
@@ -70,6 +74,9 @@ grep_refill(struct file *f)
 	ssize_t nr;
 	int bzerr;
 
+	if (filebehave == FILE_MMAP)
+		return (0);
+
 	bufpos = buffer;
 	bufrem = 0;
 
@@ -101,6 +108,36 @@ grep_refill(struct file *f)
 			/* Make sure we exit with an error */
 			nr = -1;
 		}
+	} else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) {
+		lzma_action action = LZMA_RUN;
+		uint8_t in_buf[MAXBUFSIZ];
+		lzma_ret ret;
+
+		ret = (filebehave == FILE_XZ) ?
+		    lzma_stream_decoder(&lstrm, UINT64_MAX,
+		    LZMA_CONCATENATED) :
+		    lzma_alone_decoder(&lstrm, UINT64_MAX);
+
+		if (ret != LZMA_OK)
+			return (-1);
+
+		lstrm.next_out = buffer;
+		lstrm.avail_out = MAXBUFSIZ;
+		lstrm.next_in = in_buf;
+		nr = read(f->fd, in_buf, MAXBUFSIZ);
+
+		if (nr < 0)
+			return (-1);
+		else if (nr == 0)
+			action = LZMA_FINISH;
+
+		lstrm.avail_in = nr;
+		ret = lzma_code(&lstrm, action);
+
+		if (ret != LZMA_OK && ret != LZMA_STREAM_END)
+			return (-1);
+		bufrem = MAXBUFSIZ - lstrm.avail_out;
+		return (0);
 	} else
 		nr = read(f->fd, buffer, MAXBUFSIZ);
 
@@ -186,56 +223,76 @@ error:
 	return (NULL);
 }
 
-static inline struct file *
-grep_file_init(struct file *f)
+/*
+ * Opens a file for processing.
+ */
+struct file *
+grep_open(const char *path)
 {
+	struct file *f;
+
+	f = grep_malloc(sizeof *f);
+	memset(f, 0, sizeof *f);
+	if (path == NULL) {
+		/* Processing stdin implies --line-buffered. */
+		lbflag = true;
+		f->fd = STDIN_FILENO;
+	} else if ((f->fd = open(path, O_RDONLY)) == -1)
+		goto error1;
+
+	if (filebehave == FILE_MMAP) {
+		struct stat st;
+
+		if ((fstat(f->fd, &st) == -1) || (st.st_size > OFF_MAX) ||
+		    (!S_ISREG(st.st_mode)))
+			filebehave = FILE_STDIO;
+		else {
+			int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC;
+#ifdef MAP_PREFAULT_READ
+			flags |= MAP_PREFAULT_READ;
+#endif
+			fsiz = st.st_size;
+			buffer = mmap(NULL, fsiz, PROT_READ, flags,
+			     f->fd, (off_t)0);
+			if (buffer == MAP_FAILED)
+				filebehave = FILE_STDIO;
+			else {
+				bufrem = st.st_size;
+				bufpos = buffer;
+				madvise(buffer, st.st_size, MADV_SEQUENTIAL);
+			}
+		}
+	}
+
+	if ((buffer == NULL) || (buffer == MAP_FAILED))
+		buffer = grep_malloc(MAXBUFSIZ);
 
 	if (filebehave == FILE_GZIP &&
 	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
-		goto error;
+		goto error2;
 
 	if (filebehave == FILE_BZIP &&
 	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
-		goto error;
+		goto error2;
 
 	/* Fill read buffer, also catches errors early */
-	if (grep_refill(f) != 0)
-		goto error;
+	if (bufrem == 0 && grep_refill(f) != 0)
+		goto error2;
 
 	/* Check for binary stuff, if necessary */
 	if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
-		f->binary = true;
+	f->binary = true;
 
 	return (f);
-error:
+
+error2:
 	close(f->fd);
+error1:
 	free(f);
 	return (NULL);
 }
 
 /*
- * Opens a file for processing.
- */
-struct file *
-grep_open(const char *path)
-{
-	struct file *f;
-
-	f = grep_malloc(sizeof *f);
-	memset(f, 0, sizeof *f);
-	if (path == NULL) {
-		/* Processing stdin implies --line-buffered. */
-		lbflag = true;
-		f->fd = STDIN_FILENO;
-	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
-		free(f);
-		return (NULL);
-	}
-
-	return (grep_file_init(f));
-}
-
-/*
  * Closes a file.
  */
 void
@@ -245,6 +302,10 @@ grep_close(struct file *f)
 	close(f->fd);
 
 	/* Reset read buffer and line buffer */
+	if (filebehave == FILE_MMAP) {
+		munmap(buffer, fsiz);
+		buffer = NULL;
+	}
 	bufpos = buffer;
 	bufrem = 0;
 

Modified: head/usr.bin/grep/grep.c
==============================================================================
--- head/usr.bin/grep/grep.c	Wed Oct  5 08:33:50 2011	(r226034)
+++ head/usr.bin/grep/grep.c	Wed Oct  5 09:56:43 2011	(r226035)
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <getopt.h>
 #include <limits.h>
 #include <libgen.h>
@@ -48,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <string.h>
 #include <unistd.h>
 
+#include "fastmatch.h"
 #include "grep.h"
 
 #ifndef WITHOUT_NLS
@@ -81,9 +83,9 @@ bool		 matchall;
 
 /* Searching patterns */
 unsigned int	 patterns, pattern_sz;
-char		**pattern;
+struct pat	*pattern;
 regex_t		*r_pattern;
-fastgrep_t	*fg_pattern;
+fastmatch_t	*fg_pattern;
 
 /* Filename exclusion/inclusion patterns */
 unsigned int	 fpatterns, fpattern_sz;
@@ -104,7 +106,7 @@ bool	 hflag;		/* -h: don't print filenam
 bool	 iflag;		/* -i: ignore case */
 bool	 lflag;		/* -l: only show names of files with matches */
 bool	 mflag;		/* -m x: stop reading the files after x matches */
-unsigned long long mcount;	/* count for -m */
+long long mcount;	/* count for -m */
 bool	 nflag;		/* -n: show line numbers in front of matching lines */
 bool	 oflag;		/* -o: print only matching part */
 bool	 qflag;		/* -q: quiet mode (don't output anything) */
@@ -164,7 +166,7 @@ usage(void)
 	exit(2);
 }
 
-static const char	*optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
+static const char	*optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
 
 struct option long_options[] =
 {
@@ -200,6 +202,7 @@ struct option long_options[] =
 	{"files-with-matches",	no_argument,		NULL, 'l'},
 	{"files-without-match", no_argument,            NULL, 'L'},
 	{"max-count",		required_argument,	NULL, 'm'},
+	{"lzma",		no_argument,		NULL, 'M'},
 	{"line-number",		no_argument,		NULL, 'n'},
 	{"only-matching",	no_argument,		NULL, 'o'},
 	{"quiet",		no_argument,		NULL, 'q'},
@@ -212,6 +215,7 @@ struct option long_options[] =
 	{"version",		no_argument,		NULL, 'V'},
 	{"word-regexp",		no_argument,		NULL, 'w'},
 	{"line-regexp",		no_argument,		NULL, 'x'},
+	{"xz",			no_argument,		NULL, 'X'},
 	{"decompress",          no_argument,            NULL, 'Z'},
 	{NULL,			no_argument,		NULL, 0}
 };
@@ -223,23 +227,35 @@ static void
 add_pattern(char *pat, size_t len)
 {
 
+	/* Do not add further pattern is we already match everything */
+	if (matchall)
+	  return;
+
 	/* Check if we can do a shortcut */
-	if (len == 0 || matchall) {
+	if (len == 0) {
 		matchall = true;
+		for (unsigned int i = 0; i < patterns; i++) {
+			free(pattern[i].pat);
+		}
+		pattern = grep_realloc(pattern, sizeof(struct pat));
+		pattern[0].pat = NULL;
+		pattern[0].len = 0;
+		patterns = 1;
 		return;
 	}
 	/* Increase size if necessary */
 	if (patterns == pattern_sz) {
 		pattern_sz *= 2;
 		pattern = grep_realloc(pattern, ++pattern_sz *
-		    sizeof(*pattern));
+		    sizeof(struct pat));
 	}
 	if (len > 0 && pat[len - 1] == '\n')
 		--len;
 	/* pat may not be NUL-terminated */
-	pattern[patterns] = grep_malloc(len + 1);
-	memcpy(pattern[patterns], pat, len);
-	pattern[patterns][len] = '\0';
+	pattern[patterns].pat = grep_malloc(len + 1);
+	memcpy(pattern[patterns].pat, pat, len);
+	pattern[patterns].len = len;
+	pattern[patterns].pat[len] = '\0';
 	++patterns;
 }
 
@@ -285,14 +301,19 @@ add_dpattern(const char *pat, int mode)
 static void
 read_patterns(const char *fn)
 {
+	struct stat st;
 	FILE *f;
 	char *line;
 	size_t len;
 
 	if ((f = fopen(fn, "r")) == NULL)
 		err(2, "%s", fn);
-	while ((line = fgetln(f, &len)) != NULL)
-		add_pattern(line, *line == '\n' ? 0 : len);
+	if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
+		fclose(f);
+		return;
+	}
+        while ((line = fgetln(f, &len)) != NULL)
+		add_pattern(line, line[0] == '\n' ? 0 : len);
 	if (ferror(f))
 		err(2, "%s", fn);
 	fclose(f);
@@ -311,7 +332,7 @@ int
 main(int argc, char *argv[])
 {
 	char **aargv, **eargv, *eopts;
-	char *ep;
+	char *pn, *ep;
 	unsigned long long l;
 	unsigned int aargc, eargc, i;
 	int c, lastc, needpattern, newarg, prevoptind;
@@ -325,30 +346,27 @@ main(int argc, char *argv[])
 	/* Check what is the program name of the binary.  In this
 	   way we can have all the funcionalities in one binary
 	   without the need of scripting and using ugly hacks. */
-	switch (__progname[0]) {
+	pn = __progname;
+	if (pn[0] == 'b' && pn[1] == 'z') {
+		filebehave = FILE_BZIP;
+		pn += 2;
+	} else if (pn[0] == 'x' && pn[1] == 'z') {
+		filebehave = FILE_XZ;
+		pn += 2;
+	} else if (pn[0] == 'l' && pn[1] == 'z') {
+		filebehave = FILE_LZMA;
+		pn += 2;
+	} else if (pn[0] == 'z') {
+		filebehave = FILE_GZIP;
+		pn += 1;
+	}
+	switch (pn[0]) {
 	case 'e':
 		grepbehave = GREP_EXTENDED;
 		break;
 	case 'f':
 		grepbehave = GREP_FIXED;
 		break;
-	case 'g':
-		grepbehave = GREP_BASIC;
-		break;
-	case 'z':
-		filebehave = FILE_GZIP;
-		switch(__progname[1]) {
-		case 'e':
-			grepbehave = GREP_EXTENDED;
-			break;
-		case 'f':
-			grepbehave = GREP_FIXED;
-			break;
-		case 'g':
-			grepbehave = GREP_BASIC;
-			break;
-		}
-		break;
 	}
 
 	lastc = '\0';
@@ -503,8 +521,8 @@ main(int argc, char *argv[])
 		case 'm':
 			mflag = true;
 			errno = 0;
-			mcount = strtoull(optarg, &ep, 10);
-			if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
+			mcount = strtoll(optarg, &ep, 10);
+			if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
 			    ((errno == EINVAL) && (mcount == 0)))
 				err(2, NULL);
 			else if (ep[0] != '\0') {
@@ -512,6 +530,9 @@ main(int argc, char *argv[])
 				err(2, NULL);
 			}
 			break;
+		case 'M':
+			filebehave = FILE_LZMA;
+			break;
 		case 'n':
 			nflag = true;
 			break;
@@ -544,7 +565,7 @@ main(int argc, char *argv[])
 			break;
 		case 'u':
 		case MMAP_OPT:
-			/* noop, compatibility */
+			filebehave = FILE_MMAP;
 			break;
 		case 'V':
 			printf(getstr(9), __progname, VERSION);
@@ -560,6 +581,9 @@ main(int argc, char *argv[])
 			xflag = true;
 			cflags &= ~REG_NOSUB;
 			break;
+		case 'X':
+			filebehave = FILE_XZ;
+			break;
 		case 'Z':
 			filebehave = FILE_GZIP;
 			break;
@@ -630,6 +654,10 @@ main(int argc, char *argv[])
 	aargc -= optind;
 	aargv += optind;
 
+	/* Empty pattern file matches nothing */
+	if (!needpattern && (patterns == 0))
+		exit(1);
+
 	/* Fail if we don't have any pattern */
 	if (aargc == 0 && needpattern)
 		usage();
@@ -642,9 +670,12 @@ main(int argc, char *argv[])
 	}
 
 	switch (grepbehave) {
-	case GREP_FIXED:
 	case GREP_BASIC:
 		break;
+	case GREP_FIXED:
+		/* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
+		cflags |= 0020;
+		break;
 	case GREP_EXTENDED:
 		cflags |= REG_EXTENDED;
 		break;
@@ -655,24 +686,17 @@ main(int argc, char *argv[])
 
 	fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
 	r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
-/*
- * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
- * Optimizations should be done there.
- */
-		/* Check if cheating is allowed (always is for fgrep). */
-	if (grepbehave == GREP_FIXED) {
-		for (i = 0; i < patterns; ++i)
-			fgrepcomp(&fg_pattern[i], pattern[i]);
-	} else {
-		for (i = 0; i < patterns; ++i) {
-			if (fastcomp(&fg_pattern[i], pattern[i])) {
-				/* Fall back to full regex library */
-				c = regcomp(&r_pattern[i], pattern[i], cflags);
-				if (c != 0) {
-					regerror(c, &r_pattern[i], re_error,
-					    RE_ERROR_BUF);
-					errx(2, "%s", re_error);
-				}
+
+	/* Check if cheating is allowed (always is for fgrep). */
+	for (i = 0; i < patterns; ++i) {
+		if (fastncomp(&fg_pattern[i], pattern[i].pat,
+		    pattern[i].len, cflags) != 0) {
+			/* Fall back to full regex library */
+			c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
+			if (c != 0) {
+				regerror(c, &r_pattern[i], re_error,
+				    RE_ERROR_BUF);
+				errx(2, "%s", re_error);
 			}
 		}
 	}

Modified: head/usr.bin/grep/grep.h
==============================================================================
--- head/usr.bin/grep/grep.h	Wed Oct  5 08:33:50 2011	(r226034)
+++ head/usr.bin/grep/grep.h	Wed Oct  5 09:56:43 2011	(r226035)
@@ -36,6 +36,8 @@
 #include <stdio.h>
 #include <zlib.h>
 
+#include "fastmatch.h"
+
 #ifdef WITHOUT_NLS
 #define getstr(n)	 errstr[n]
 #else
@@ -58,8 +60,11 @@ extern const char		*errstr[];
 #define BINFILE_TEXT	2
 
 #define FILE_STDIO	0
-#define FILE_GZIP	1
-#define FILE_BZIP	2
+#define FILE_MMAP	1
+#define FILE_GZIP	2
+#define FILE_BZIP	3
+#define FILE_XZ		4
+#define FILE_LZMA	5
 
 #define DIR_READ	0
 #define DIR_SKIP	1
@@ -90,22 +95,16 @@ struct str {
 	int		 line_no;
 };
 
+struct pat {
+	char		*pat;
+	int		 len;
+};
+
 struct epat {
 	char		*pat;
 	int		 mode;
 };
 
-typedef struct {
-	size_t		 len;
-	unsigned char	*pattern;
-	int		 qsBc[UCHAR_MAX + 1];
-	/* flags */
-	bool		 bol;
-	bool		 eol;
-	bool		 reversed;
-	bool		 word;
-} fastgrep_t;
-
 /* Flags passed to regcomp() and regexec() */
 extern int	 cflags, eflags;
 
@@ -114,7 +113,8 @@ extern bool	 Eflag, Fflag, Gflag, Hflag,
 		 bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag,
 		 qflag, sflag, vflag, wflag, xflag;
 extern bool	 dexclude, dinclude, fexclude, finclude, lbflag, nullflag;
-extern unsigned long long Aflag, Bflag, mcount;
+extern unsigned long long Aflag, Bflag;
+extern long long mcount;
 extern char	*label;
 extern const char *color;
 extern int	 binbehave, devbehave, dirbehave, filebehave, grepbehave, linkbehave;
@@ -122,10 +122,10 @@ extern int	 binbehave, devbehave, dirbeh
 extern bool	 first, matchall, notfound, prev;
 extern int	 tail;
 extern unsigned int dpatterns, fpatterns, patterns;
-extern char    **pattern;
+extern struct pat *pattern;
 extern struct epat *dpattern, *fpattern;
 extern regex_t	*er_pattern, *r_pattern;
-extern fastgrep_t *fg_pattern;
+extern fastmatch_t *fg_pattern;
 
 /* For regex errors  */
 #define RE_ERROR_BUF	512
@@ -150,8 +150,3 @@ void	 clearqueue(void);
 void		 grep_close(struct file *f);
 struct file	*grep_open(const char *path);
 char		*grep_fgetln(struct file *f, size_t *len);
-
-/* fastgrep.c */
-int		 fastcomp(fastgrep_t *, const char *);
-void		 fgrepcomp(fastgrep_t *, const char *);
-int		 grep_search(fastgrep_t *, const unsigned char *, size_t, regmatch_t *);

Added: head/usr.bin/grep/regex/fastmatch.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/usr.bin/grep/regex/fastmatch.c	Wed Oct  5 09:56:43 2011	(r226035)
@@ -0,0 +1,169 @@
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (C) 2011 Gabor Kovesdan <gabor at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "glue.h"
+
+#include <errno.h>
+#include <fastmatch.h>
+#include <regex.h>
+#include <string.h>
+
+#include "tre-fastmatch.h"
+#include "xmalloc.h"
+
+int
+tre_fixncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
+{
+  int ret;
+  tre_char_t *wregex;
+  size_t wlen;
+
+  if (n != 0)
+    {
+      ret = tre_convert_pattern(regex, n, &wregex, &wlen);
+      if (ret != REG_OK)
+	return ret;
+      else 
+	ret = tre_compile_literal(preg, wregex, wlen, cflags);
+      tre_free_pattern(wregex);
+      return ret;
+    }
+  else
+    return tre_compile_literal(preg, NULL, 0, cflags);
+}
+
+int
+tre_fastncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
+{
+  int ret;
+  tre_char_t *wregex;
+  size_t wlen;
+
+  if (n != 0)
+    {
+      ret = tre_convert_pattern(regex, n, &wregex, &wlen);
+      if (ret != REG_OK)
+	return ret;
+      else
+	ret = (cflags & REG_LITERAL)
+	      ? tre_compile_literal(preg, wregex, wlen, cflags)
+	      : tre_compile_fast(preg, wregex, wlen, cflags);
+      tre_free_pattern(wregex);
+      return ret;
+    }
+  else
+    return tre_compile_literal(preg, NULL, 0, cflags);
+}
+
+
+int
+tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags)
+{
+  return tre_fixncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
+}
+
+int
+tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags)
+{
+  return tre_fastncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
+}
+
+int
+tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
+{
+  return tre_compile_literal(preg, regex, n, cflags);
+}
+
+int
+tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
+{
+  return (cflags & REG_LITERAL) ?
+    tre_compile_literal(preg, regex, n, cflags) :
+    tre_compile_fast(preg, regex, n, cflags);
+}
+
+int
+tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
+{
+  return tre_fixwncomp(preg, regex, regex ? tre_strlen(regex) : 0, cflags);
+}
+
+int
+tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
+{
+  return tre_fastwncomp(preg, regex, regex ? tre_strlen(regex) : 0, cflags);
+}
+
+void
+tre_fastfree(fastmatch_t *preg)
+{
+  tre_free_fast(preg);
+}
+
+int
+tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
+         size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
+
+  if (eflags & REG_STARTEND)
+    CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
+		     type, nmatch, pmatch, eflags));
+  else
+    return tre_match_fast(preg, string, len, type, nmatch,
+      pmatch, eflags);
+}
+
+int
+tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
+	     regmatch_t pmatch[], int eflags)
+{
+  return tre_fastnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
+}
+
+int
+tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
+          size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  tre_str_type_t type = STR_WIDE;
+
+  if (eflags & REG_STARTEND)
+    CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
+		     type, nmatch, pmatch, eflags));
+  else
+    return tre_match_fast(preg, string, len, type, nmatch,
+      pmatch, eflags);
+}
+
+int
+tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
+         size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  return tre_fastwnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
+}
+

Added: head/usr.bin/grep/regex/fastmatch.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/usr.bin/grep/regex/fastmatch.h	Wed Oct  5 09:56:43 2011	(r226035)
@@ -0,0 +1,108 @@
+/* $FreeBSD$ */
+
+#ifndef FASTMATCH_H
+#define FASTMATCH_H 1
+
+#include <limits.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <wchar.h>
+
+typedef struct {
+  size_t	 wlen;
+  size_t	 len;
+  wchar_t	*wpattern;
+  bool		*wescmap;
+  unsigned int	 qsBc[UCHAR_MAX + 1];
+  unsigned int	*bmGs;
+  char		*pattern;
+  bool		*escmap;
+  unsigned int	 defBc;
+  void		*qsBc_table;
+  unsigned int	*sbmGs;
+  const char	*re_endp;
+
+  /* flags */
+  bool		 hasdot;
+  bool		 bol;
+  bool		 eol;
+  bool		 word;
+  bool		 icase;
+  bool		 newline;
+  bool		 nosub;
+  bool		 matchall;
+  bool		 reversed;
+} fastmatch_t;
+
+extern int
+tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags);
+
+extern int
+tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags);
+
+extern int
+tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
+  regmatch_t pmatch[], int eflags);
+
+extern void
+tre_fastfree(fastmatch_t *preg);
+
+extern int
+tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
+
+extern int
+tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
+
+extern int
+tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
+         size_t nmatch, regmatch_t pmatch[], int eflags);
+
+/* Versions with a maximum length argument and therefore the capability to
+   handle null characters in the middle of the strings. */
+extern int
+tre_fixncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
+
+extern int
+tre_fastncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
+
+extern int
+tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
+  size_t nmatch, regmatch_t pmatch[], int eflags);
+
+extern int
+tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
+
+extern int
+tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
+
+extern int
+tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
+  size_t nmatch, regmatch_t pmatch[], int eflags);
+
+#define fixncomp	tre_fixncomp
+#define fastncomp	tre_fastncomp
+#define fixcomp		tre_fixcomp
+#define fastcomp	tre_fastcomp
+#define fixwncomp	tre_fixwncomp
+#define fastwncomp	tre_fastwncomp
+#define fixwcomp	tre_fixwcomp
+#define fastwcomp	tre_fastwcomp
+#define fastfree	tre_fastfree
+#define fastnexec	tre_fastnexec
+#define fastexec	tre_fastexec
+#define fastwnexec	tre_fastwnexec
+#define fastwexec	tre_fastwexec
+#define fixcomp		tre_fixcomp
+#define fastcomp	tre_fastcomp
+#define fastexec	tre_fastexec
+#define fastfree	tre_fastfree
+#define fixwcomp	tre_fixwcomp
+#define fastwcomp	tre_fastwcomp
+#define fastwexec	tre_fastwexec
+#define fixncomp	tre_fixncomp
+#define fastncomp	tre_fastncomp
+#define fastnexec	tre_fastnexec
+#define fixwncomp	tre_fixwncomp
+#define fastwncomp	tre_fastwncomp
+#define fastwnexec	tre_fastwnexec
+#endif		/* FASTMATCH_H */

Added: head/usr.bin/grep/regex/glue.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/usr.bin/grep/regex/glue.h	Wed Oct  5 09:56:43 2011	(r226035)
@@ -0,0 +1,67 @@
+/* $FreeBSD$ */
+
+#ifndef GLUE_H
+#define GLUE_H
+
+#include <limits.h>
+#undef RE_DUP_MAX
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define TRE_WCHAR			1
+#define TRE_MULTIBYTE			1
+#define HAVE_MBSTATE_T			1
+
+#define TRE_CHAR(n) L##n
+#define CHF "%lc"
+
+#define tre_char_t			wchar_t
+#define tre_mbrtowc(pwc, s, n, ps)	(mbrtowc((pwc), (s), (n), (ps)))
+#define tre_strlen			wcslen
+#define tre_isspace			iswspace
+#define tre_isalnum			iswalnum
+
+#define REG_OK				0
+#define REG_LITERAL			0020
+#define REG_WORD			0100
+#define REG_GNU				0400
+
+#define TRE_MB_CUR_MAX			MB_CUR_MAX
+
+#ifndef _GREP_DEBUG
+#define DPRINT(msg)
+#else			
+#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
+#endif
+
+#define MIN(a,b)			((a > b) ? (b) : (a))
+#define MAX(a,b)			((a > b) ? (a) : (b))
+
+typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
+
+#define CALL_WITH_OFFSET(fn)						\
+  do									\
+    {									\
+      size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);	\
+      size_t offset = pmatch[0].rm_so;					\
+      int ret;								\
+									\
+      if ((long long)pmatch[0].rm_eo - pmatch[0].rm_so < 0)		\
+	return REG_NOMATCH;						\
+      ret = fn;								\
+      for (unsigned i = 0; (!(eflags & REG_NOSUB) && (i < nmatch)); i++)\
+	{								\
+	  pmatch[i].rm_so += offset;					\
+	  pmatch[i].rm_eo += offset;					\
+	}								\
+      return ret;							\
+    } while (0 /*CONSTCOND*/)
+
+int
+tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
+    size_t *wn);
+
+void
+tre_free_pattern(tre_char_t *wregex);
+#endif

Added: head/usr.bin/grep/regex/hashtable.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/usr.bin/grep/regex/hashtable.c	Wed Oct  5 09:56:43 2011	(r226035)
@@ -0,0 +1,268 @@
+/*      $FreeBSD$       */
+
+/*-
+ * Copyright (C) 2011 Gabor Kovesdan <gabor at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list