PERFORCE change 147530 for review
Gabor Kovesdan
gabor at FreeBSD.org
Sat Aug 16 12:44:23 UTC 2008
http://perforce.freebsd.org/chv.cgi?CH=147530
Change 147530 by gabor at gabor_server on 2008/08/16 12:43:58
IFC
Affected files ...
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/Makefile#3 integrate
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/fastgrep.c#1 branch
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/file.c#5 integrate
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.c#3 integrate
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.h#3 integrate
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/util.c#3 integrate
Differences ...
==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/Makefile#3 (text+ko) ====
@@ -2,7 +2,7 @@
# $OpenBSD: Makefile,v 1.6 2003/06/25 15:00:04 millert Exp $
PROG= grep
-SRCS= file.c grep.c queue.c util.c
+SRCS= fastgrep.c file.c grep.c queue.c util.c
LINKS= ${BINDIR}/grep ${BINDIR}/egrep \
${BINDIR}/grep ${BINDIR}/fgrep \
${BINDIR}/grep ${BINDIR}/zgrep \
==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/file.c#5 (text+ko) ====
@@ -119,36 +119,22 @@
{
int i = 0;
char ch;
- size_t size;
- wchar_t wbinbuf[BUFSIZ];
- const char *src = binbuf;
- mbstate_t mbs;
/* Fill in the buffer if it is empty. */
if (binbufptr == NULL) {
/* Only pre-read to the buffer if we need the binary check. */
if (binbehave != BINFILE_TEXT) {
- for (; i < sizeof(wbinbuf) && !grep_feof(f); i++) {
+ for (; i < sizeof(binbuf) && !grep_feof(f); i++) {
ch = grep_fgetc(f);
- binbuf[i] = ch;
+ if (ch != EOF)
+ binbuf[i] = ch;
+ else
+ break;
}
- binbufsiz = i;
- binbufptr = binbuf;
-
- /* Convert at most (BUFSIZ * sizeof(wint_t)) characters or
- (BUFSIZ - 1) bytes to wide character string. */
- size = mbsnrtowcs(wbinbuf, &src, sizeof(wbinbuf), BUFSIZ - 1, &mbs);
- f->binary = 0;
- for (; size > 0; size--)
- if (iswbinary(wbinbuf[size])) {
- f->binary = 1;
- break;
- }
-
- } else {
- binbufsiz = i;
- binbufptr = binbuf;
+ f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ? '\0' : '\200', i - 1) != 0;
}
+ binbufsiz = i;
+ binbufptr = binbuf;
}
/* Read a line whether from the buffer or from the file itself. */
==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.c#3 (text+ko) ====
@@ -95,6 +95,7 @@
int patterns, pattern_sz;
char **pattern;
regex_t *r_pattern;
+fastgrep_t *fg_pattern;
#ifdef WITH_PCRE
pcre **perl_pattern;
#endif
@@ -590,14 +591,26 @@
usage();
}
if (grepbehave != GREP_PERL) {
- /* Compile regexes with regcomp() */
+ fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
+/*
+ * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
+ * Optimizations should be done there.
+ */
for (i = 0; i < patterns; ++i) {
- c = regcomp(&r_pattern[i], pattern[i], cflags);
- if (c != 0) {
- regerror(c, &r_pattern[i], re_error,
- RE_ERROR_BUF);
- errx(2, "%s", re_error);
+ /* Check if cheating is allowed (always is for fgrep). */
+ if (grepbehave == GREP_FIXED)
+ fgrepcomp(&fg_pattern[i], pattern[i]);
+ else {
+ if (fastcomp(&fg_pattern[i], pattern[i])) {
+ /* Fall back to full regex library */
+ c = regcomp(&r_pattern[i], pattern[i], cflags);
+ if (c != 0) {
+ regerror(c, &r_pattern[i], re_error,
+ RE_ERROR_BUF);
+ errx(2, "%s", re_error);
+ }
+ }
}
}
} else {
==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.h#3 (text+ko) ====
@@ -27,6 +27,7 @@
*/
#include <bzlib.h>
+#include <limits.h>
#include <regex.h>
#include <stdio.h>
#include <zlib.h>
@@ -90,6 +91,16 @@
char *dat;
};
+typedef struct {
+ unsigned char *pattern;
+ int len;
+ int qsBc[UCHAR_MAX + 1];
+ /* flags */
+ int bol;
+ int eol;
+ int reversed;
+} fastgrep_t;
+
/* Flags passed to regcomp() and regexec() */
extern int cflags, eflags;
@@ -105,6 +116,7 @@
extern int first, prev, matchall, patterns, epatterns, tail, notfound;
extern char **pattern, **epattern;
extern regex_t *r_pattern, *er_pattern;
+extern fastgrep_t *fg_pattern;
#ifdef WITH_PCRE
extern pcre **perl_pattern;
@@ -138,3 +150,8 @@
int grep_feof(struct file *f);
int grep_fgetc(struct file *f);
char *grep_fgetln(struct file *f, size_t *len);
+
+/* fastgrep.c */
+int fastcomp(fastgrep_t *, const char *);
+void fgrepcomp(fastgrep_t *, const char *);
+int grep_search(fastgrep_t *, unsigned char *, size_t, regmatch_t *);
==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/util.c#3 (text+ko) ====
@@ -243,14 +243,12 @@
regmatch_t pmatch;
regmatch_t matches[MAX_LINE_MATCHES];
regoff_t st = 0;
- int c = 0, i, r = 0, m = 0, t;
+ int c = 0, i, r = 0, m = 0;
#ifdef WITH_PCRE
int ovector[3];
#endif
if (!matchall) {
- t = vflag ? REG_NOMATCH : 0;
-
/* Loop to process the whole line */
while (st <= l->len) {
pmatch.rm_so = st;
@@ -258,8 +256,18 @@
/* Loop to compare with all the patterns */
for (i = 0; i < patterns; i++) {
- if (grepbehave != GREP_PERL) {
+/*
+ * XXX: grep_search() is a workaround for speed up and should be
+ * removed in the future. See fastgrep.c.
+ */
+ if (fg_pattern[i].pattern) {
+ r = grep_search(&fg_pattern[i], (unsigned char *)l->dat,
+ l->len, &pmatch);
+ r = (r == 0) ? (vflag ? REG_NOMATCH : 0) : (vflag ? 0 : REG_NOMATCH);
+ st = pmatch.rm_eo;
+ } else if (grepbehave != GREP_PERL) {
r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
+ r = (r == 0) ? (vflag ? REG_NOMATCH : 0) : (vflag ? 0 : REG_NOMATCH);
st = pmatch.rm_eo;
} else {
#ifdef WITH_PCRE
@@ -271,7 +279,7 @@
;
#endif
}
- if (r == REG_NOMATCH && t == 0)
+ if (r == REG_NOMATCH)
continue;
/* Check for full match */
if (r == 0 && xflag)
@@ -290,7 +298,7 @@
r = REG_NOMATCH;
free(wbegin);
}
- if (r == t) {
+ if (r == 0) {
if (m == 0)
c++;
if (m < MAX_LINE_MATCHES)
@@ -313,7 +321,7 @@
return (c); /* Binary file */
/* Dealing with the context */
- if ((tail || (c && !vflag)) && !cflag && !qflag) {
+ if ((tail || c) && !cflag && !qflag) {
if (c) {
if (!first && !prev && !tail && Aflag)
printf("--\n");
More information about the p4-projects
mailing list