PERFORCE change 147060 for review
Gabor Kovesdan
gabor at FreeBSD.org
Sun Aug 10 08:12:35 UTC 2008
http://perforce.freebsd.org/chv.cgi?CH=147060
Change 147060 by gabor at gabor_server on 2008/08/10 08:11:47
- Add back fixed string code. It is faster than the regex library. :(
Affected files ...
.. //depot/projects/soc2008/gabor_textproc/grep/Makefile#13 edit
.. //depot/projects/soc2008/gabor_textproc/grep/fastgrep.c#1 add
.. //depot/projects/soc2008/gabor_textproc/grep/grep.c#72 edit
.. //depot/projects/soc2008/gabor_textproc/grep/grep.h#40 edit
.. //depot/projects/soc2008/gabor_textproc/grep/util.c#71 edit
Differences ...
==== //depot/projects/soc2008/gabor_textproc/grep/Makefile#13 (text+ko) ====
@@ -2,7 +2,7 @@
# $OpenBSD: Makefile,v 1.6 2003/06/25 15:00:04 millert Exp $
PROG= grep
-SRCS= file.c grep.c queue.c util.c
+SRCS= fastgrep.c file.c grep.c queue.c util.c
LINKS= ${BINDIR}/grep ${BINDIR}/egrep \
${BINDIR}/grep ${BINDIR}/fgrep \
${BINDIR}/grep ${BINDIR}/zgrep \
==== //depot/projects/soc2008/gabor_textproc/grep/grep.c#72 (text+ko) ====
@@ -95,6 +95,7 @@
int patterns, pattern_sz;
char **pattern;
regex_t *r_pattern;
+fastgrep_t *fg_pattern;
#ifdef WITH_PCRE
pcre **perl_pattern;
#endif
@@ -590,14 +591,26 @@
usage();
}
if (grepbehave != GREP_PERL) {
- /* Compile regexes with regcomp() */
+ fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
+/*
+ * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
+ * Optimizations should be done there.
+ */
for (i = 0; i < patterns; ++i) {
- c = regcomp(&r_pattern[i], pattern[i], cflags);
- if (c != 0) {
- regerror(c, &r_pattern[i], re_error,
- RE_ERROR_BUF);
- errx(2, "%s", re_error);
+ /* Check if cheating is allowed (always is for fgrep). */
+ if (grepbehave == GREP_FIXED)
+ fgrepcomp(&fg_pattern[i], pattern[i]);
+ else {
+ if (fastcomp(&fg_pattern[i], pattern[i])) {
+ /* Fall back to full regex library */
+ c = regcomp(&r_pattern[i], pattern[i], cflags);
+ if (c != 0) {
+ regerror(c, &r_pattern[i], re_error,
+ RE_ERROR_BUF);
+ errx(2, "%s", re_error);
+ }
+ }
}
}
} else {
==== //depot/projects/soc2008/gabor_textproc/grep/grep.h#40 (text+ko) ====
@@ -27,6 +27,7 @@
*/
#include <bzlib.h>
+#include <limits.h>
#include <regex.h>
#include <stdio.h>
#include <zlib.h>
@@ -90,6 +91,17 @@
char *dat;
};
+typedef struct {
+ unsigned char *pattern;
+ int patternLen;
+ int qsBc[UCHAR_MAX + 1];
+ /* flags */
+ int bol;
+ int eol;
+ int wmatch;
+ int reversedSearch;
+} fastgrep_t;
+
/* Flags passed to regcomp() and regexec() */
extern int cflags, eflags;
@@ -105,6 +117,7 @@
extern int first, prev, matchall, patterns, epatterns, tail, notfound;
extern char **pattern, **epattern;
extern regex_t *r_pattern, *er_pattern;
+extern fastgrep_t *fg_pattern;
#ifdef WITH_PCRE
extern pcre **perl_pattern;
@@ -138,3 +151,8 @@
int grep_feof(struct file *f);
int grep_fgetc(struct file *f);
char *grep_fgetln(struct file *f, size_t *len);
+
+/* fastgrep.c */
+int fastcomp(fastgrep_t *, const char *);
+void fgrepcomp(fastgrep_t *, const char *);
+int grep_search(fastgrep_t *, unsigned char *, size_t, regmatch_t *);
==== //depot/projects/soc2008/gabor_textproc/grep/util.c#71 (text+ko) ====
@@ -256,7 +256,14 @@
/* Loop to compare with all the patterns */
for (i = 0; i < patterns; i++) {
- if (grepbehave != GREP_PERL) {
+/*
+ * XXX: grep_search() is a workaround for speed up and should be
+ * removed in the future. See fastgrep.c.
+ */
+ if (fg_pattern[i].pattern) {
+ r = grep_search(&fg_pattern[i], (unsigned char *)l->dat,
+ l->len, &pmatch);
+ } else if (grepbehave != GREP_PERL) {
r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
r = (r == 0) ? (vflag ? REG_NOMATCH : 0) : (vflag ? 0 : REG_NOMATCH);
st = pmatch.rm_eo;
More information about the p4-projects
mailing list