PERFORCE change 144086 for review
Gabor Kovesdan
gabor at FreeBSD.org
Wed Jun 25 12:05:28 UTC 2008
http://perforce.freebsd.org/chv.cgi?CH=144086
Change 144086 by gabor at gabor_server on 2008/06/25 12:05:02
- Remove the fastgrep code. The regex library can auto-detect the fixed
strings and operate with the same speed. If -F is specified, also
set the REG_NOSPEC explicitly.
- As the whole fastgrep code is removed, there is no need to fix
the wide char handling there, grep should be localized now.
Obtained from: NetBSD Project
Affected files ...
.. //depot/projects/soc2008/gabor_textproc/grep/grep.c#46 edit
.. //depot/projects/soc2008/gabor_textproc/grep/grep.h#26 edit
.. //depot/projects/soc2008/gabor_textproc/grep/util.c#40 edit
Differences ...
==== //depot/projects/soc2008/gabor_textproc/grep/grep.c#46 (text+ko) ====
@@ -84,7 +84,6 @@
int patterns, pattern_sz;
char **pattern;
regex_t *r_pattern;
-struct fastgrep *fg_pattern;
/* For regex errors */
char re_error[RE_ERROR_BUF + 1];
@@ -616,24 +615,17 @@
++argv;
}
- if (Eflag)
+ if (Fflag)
+ cflags |= REG_NOSPEC;
+ else if (Eflag)
cflags |= REG_EXTENDED;
- fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
for (i = 0; i < patterns; ++i) {
- /* Check if cheating is allowed (always is for fgrep). */
- if (Fflag) {
- fgrepcomp(&fg_pattern[i], pattern[i]);
- } else {
- if (fastcomp(&fg_pattern[i], pattern[i])) {
- /* Fall back to full regex library */
- c = regcomp(&r_pattern[i], pattern[i], cflags);
- if (c != 0) {
- regerror(c, &r_pattern[i], re_error,
- RE_ERROR_BUF);
- errx(2, "%s", re_error);
- }
- }
+ c = regcomp(&r_pattern[i], pattern[i], cflags);
+ if (c != 0) {
+ regerror(c, &r_pattern[i], re_error,
+ RE_ERROR_BUF);
+ errx(2, "%s", re_error);
}
}
==== //depot/projects/soc2008/gabor_textproc/grep/grep.h#26 (text+ko) ====
@@ -98,7 +98,6 @@
extern int first, prev, matchall, patterns, tail, notfound;
extern char **pattern;
-extern struct fastgrep *fg_pattern;
extern regex_t *r_pattern;
/* For regex errors */
@@ -112,8 +111,6 @@
void *grep_calloc(size_t nmemb, size_t size);
void *grep_realloc(void *ptr, size_t size);
void printline(struct str *line, int sep);
-int fastcomp(struct fastgrep *, const char *);
-void fgrepcomp(struct fastgrep *, const char *);
/* queue.c */
void initqueue(void);
==== //depot/projects/soc2008/gabor_textproc/grep/util.c#40 (text+ko) ====
@@ -56,9 +56,6 @@
static int linesqueued;
static int procline(struct str *l, int);
-static int grep_search(struct fastgrep *, unsigned char *, size_t, regmatch_t *pmatch);
-static int grep_cmp(const unsigned char *, const unsigned char *, size_t);
-static void grep_revstr(unsigned char *, int);
int
grep_tree(char **argv)
@@ -204,13 +201,6 @@
return (c);
}
-
-/*
- * Process an individual line in a file. Return non-zero if it matches.
- */
-
-#define isword(x) (isalnum(x) || (x) == '_')
-
static int
procline(struct str *l, int nottext)
{
@@ -219,14 +209,9 @@
if (!matchall) {
for (c = i = 0; i < patterns; i++) {
- if (fg_pattern[i].pattern) {
- r = grep_search(&fg_pattern[i], (unsigned char *)l->dat,
- l->len, &pmatch);
- } else {
- pmatch.rm_so = 0;
- pmatch.rm_eo = l->len;
- r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
- }
+ pmatch.rm_so = 0;
+ pmatch.rm_eo = l->len;
+ r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
if (r == 0 && xflag) {
if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
r = REG_NOMATCH;
@@ -306,283 +291,6 @@
return (c);
}
-void
-fgrepcomp(struct fastgrep *fg, const char *pattern)
-{
- int i;
-
- /* Initialize. */
- fg->patternLen = strlen(pattern);
- fg->bol = 0;
- fg->eol = 0;
- fg->wmatch = wflag;
- fg->reversedSearch = 0;
-
- /*
- * Make a copy and upper case it for later if in -i mode,
- * else just copy the pointer.
- */
- if (iflag) {
- fg->pattern = grep_malloc(fg->patternLen + 1);
- for (i = 0; i < fg->patternLen; i++)
- fg->pattern[i] = toupper(pattern[i]);
- fg->pattern[fg->patternLen] = '\0';
- } else
- fg->pattern = (unsigned char *)pattern; /* really const */
-
- /* Preprocess pattern. */
- for (i = 0; i <= UCHAR_MAX; i++)
- fg->qsBc[i] = fg->patternLen;
- for (i = 1; i < fg->patternLen; i++) {
- fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
- /*
- * If case is ignored, make the jump apply to both upper and
- * lower cased characters. As the pattern is stored in upper
- * case, apply the same to the lower case equivalents.
- */
- if (iflag)
- fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
- }
-}
-
-/*
- * Returns: -1 on failure, 0 on success
- */
-int
-fastcomp(struct fastgrep *fg, const char *pattern)
-{
- int i;
- int bol = 0;
- int eol = 0;
- int shiftPatternLen;
- int hasDot = 0;
- int firstHalfDot = -1;
- int firstLastHalfDot = -1;
- int lastHalfDot = 0;
-
- /* Initialize. */
- fg->patternLen = strlen(pattern);
- fg->bol = 0;
- fg->eol = 0;
- fg->wmatch = 0;
- fg->reversedSearch = 0;
-
- /* Remove end-of-line character ('$'). */
- if (pattern[fg->patternLen - 1] == '$') {
- eol++;
- fg->eol = 1;
- fg->patternLen--;
- }
-
- /* Remove beginning-of-line character ('^'). */
- if (pattern[0] == '^') {
- bol++;
- fg->bol = 1;
- fg->patternLen--;
- }
-
- /* Remove enclosing [[:<:]] and [[:>:]] (word match). */
- if (wflag) {
- /* basic re's use \( \), extended re's ( ) */
- int extra = Eflag ? 1 : 2;
- fg->patternLen -= 14 + 2 * extra;
- fg->wmatch = 7 + extra;
- } else if (fg->patternLen >= 14 &&
- strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 &&
- strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) {
- fg->patternLen -= 14;
- fg->wmatch = 7;
- }
-
- /*
- * Copy pattern minus '^' and '$' characters as well as word
- * match character classes at the beginning and ending of the
- * string respectively.
- */
- fg->pattern = grep_malloc(fg->patternLen + 1);
- memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen);
- fg->pattern[fg->patternLen] = '\0';
-
- /* Look for ways to cheat...er...avoid the full regex engine. */
- for (i = 0; i < fg->patternLen; i++)
- {
- /* Can still cheat? */
- if ((isalnum(fg->pattern[i])) || isspace(fg->pattern[i]) ||
- (fg->pattern[i] == '_') || (fg->pattern[i] == ',') ||
- (fg->pattern[i] == '=') || (fg->pattern[i] == '-') ||
- (fg->pattern[i] == ':') || (fg->pattern[i] == '/')) {
- /* As long as it is good, upper case it for later. */
- if (iflag)
- fg->pattern[i] = toupper(fg->pattern[i]);
- } else if (fg->pattern[i] == '.') {
- hasDot = i;
- if (i < fg->patternLen / 2) {
- if (firstHalfDot < 0)
- /* Closest dot to the beginning */
- firstHalfDot = i;
- } else {
- /* Closest dot to the end of the pattern. */
- lastHalfDot = i;
- if (firstLastHalfDot < 0)
- firstLastHalfDot = i;
- }
- } else {
- /* Free memory and let others know this is empty. */
- free(fg->pattern);
- fg->pattern = NULL;
- return (-1);
- }
- }
-
- /*
- * Determine if a reverse search would be faster based on the placement
- * of the dots.
- */
- if ((!(lflag || cflag)) && ((!(bol || eol)) &&
- ((lastHalfDot) && ((firstHalfDot < 0) ||
- ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) {
- fg->reversedSearch = 1;
- hasDot = fg->patternLen - (firstHalfDot < 0 ?
- firstLastHalfDot : firstHalfDot) - 1;
- grep_revstr(fg->pattern, fg->patternLen);
- }
-
- /*
- * Normal Quick Search would require a shift based on the position the
- * next character after the comparison is within the pattern. With
- * wildcards, the position of the last dot effects the maximum shift
- * distance.
- * The closer to the end the wild card is the slower the search. A
- * reverse version of this algorithm would be useful for wildcards near
- * the end of the string.
- *
- * Examples:
- * Pattern Max shift
- * ------- ---------
- * this 5
- * .his 4
- * t.is 3
- * th.s 2
- * thi. 1
- */
-
- /* Adjust the shift based on location of the last dot ('.'). */
- shiftPatternLen = fg->patternLen - hasDot;
-
- /* Preprocess pattern. */
- for (i = 0; i <= UCHAR_MAX; i++)
- fg->qsBc[i] = shiftPatternLen;
- for (i = hasDot + 1; i < fg->patternLen; i++) {
- fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
- /*
- * If case is ignored, make the jump apply to both upper and
- * lower cased characters. As the pattern is stored in upper
- * case, apply the same to the lower case equivalents.
- */
- if (iflag)
- fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
- }
-
- /*
- * Put pattern back to normal after pre-processing to allow for easy
- * comparisons later.
- */
- if (fg->reversedSearch)
- grep_revstr(fg->pattern, fg->patternLen);
-
- return (0);
-}
-
-/*
- * Word boundaries using regular expressions are defined as the point
- * of transition from a non-word char to a word char, or vice versa.
- * This means that grep -w +a and grep -w a+ never match anything,
- * because they lack a starting or ending transition, but grep -w a+b
- * does match a line containing a+b.
- */
-#define wmatch(d, l, s, e) \
- ((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \
- e > s && isword(d[s]) && isword(d[e-1]))
-
-static int
-grep_search(struct fastgrep *fg, unsigned char *data, size_t dataLen, regmatch_t *pmatch)
-{
- int j;
- int rtrnVal = REG_NOMATCH;
-
- pmatch->rm_so = -1;
- pmatch->rm_eo = -1;
-
- /* No point in going farther if we do not have enough data. */
- if (dataLen < fg->patternLen)
- return (rtrnVal);
-
- /* Only try once at the beginning or ending of the line. */
- if (fg->bol || fg->eol) {
- /* Simple text comparison. */
- /* Verify data is >= pattern length before searching on it. */
- if (dataLen >= fg->patternLen) {
- /* Determine where in data to start search at. */
- if (fg->eol)
- j = dataLen - fg->patternLen;
- else
- j = 0;
- if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen)))
- if (grep_cmp(fg->pattern, data + j,
- fg->patternLen) == -1) {
- pmatch->rm_so = j;
- pmatch->rm_eo = j + fg->patternLen;
- if (!fg->wmatch || wmatch(data, dataLen,
- pmatch->rm_so, pmatch->rm_eo))
- rtrnVal = 0;
- }
- }
- } else if (fg->reversedSearch) {
- /* Quick Search algorithm. */
- j = dataLen;
- do {
- if (grep_cmp(fg->pattern, data + j - fg->patternLen,
- fg->patternLen) == -1) {
- pmatch->rm_so = j - fg->patternLen;
- pmatch->rm_eo = j;
- if (!fg->wmatch || wmatch(data, dataLen,
- pmatch->rm_so, pmatch->rm_eo)) {
- rtrnVal = 0;
- break;
- }
- }
- /* Shift if within bounds, otherwise, we are done. */
- if (j == fg->patternLen)
- break;
- j -= fg->qsBc[data[j - fg->patternLen - 1]];
- } while (j >= fg->patternLen);
- } else {
- /* Quick Search algorithm. */
- j = 0;
- do {
- if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) {
- pmatch->rm_so = j;
- pmatch->rm_eo = j + fg->patternLen;
- if (fg->patternLen == 0 || !fg->wmatch ||
- wmatch(data, dataLen, pmatch->rm_so,
- pmatch->rm_eo)) {
- rtrnVal = 0;
- break;
- }
- }
-
- /* Shift if within bounds, otherwise, we are done. */
- if (j + fg->patternLen == dataLen)
- break;
- else
- j += fg->qsBc[data[j + fg->patternLen]];
- } while (j <= (dataLen - fg->patternLen));
- }
-
- return (rtrnVal);
-}
-
-
void *
grep_malloc(size_t size)
{
@@ -612,38 +320,6 @@
return (ptr);
}
-/*
- * Returns: i >= 0 on failure (position that it failed)
- * -1 on success
- */
-static int
-grep_cmp(const unsigned char *pattern, const unsigned char *data, size_t len)
-{
- int i;
-
- for (i = 0; i < len; i++) {
- if (((pattern[i] == data[i]) || (!Fflag && pattern[i] == '.'))
- || (iflag && pattern[i] == toupper(data[i])))
- continue;
- return (i);
- }
-
- return (-1);
-}
-
-static void
-grep_revstr(unsigned char *str, int len)
-{
- int i;
- char c;
-
- for (i = 0; i < len / 2; i++) {
- c = str[i];
- str[i] = str[len - i - 1];
- str[len - i - 1] = c;
- }
-}
-
void
printline(struct str *line, int sep)
{
More information about the p4-projects
mailing list