svn commit: r226177 - user/gabor/tre-integration/contrib/tre/lib
Gabor Kovesdan
gabor at FreeBSD.org
Sun Oct 9 21:39:14 UTC 2011
Author: gabor
Date: Sun Oct 9 21:39:14 2011
New Revision: 226177
URL: http://svn.freebsd.org/changeset/base/226177
Log:
- Rework heuristics to try to use a better maximum shift if possible
Modified:
user/gabor/tre-integration/contrib/tre/lib/regexec.c
user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h
Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/regexec.c Sun Oct 9 21:36:14 2011 (r226176)
+++ user/gabor/tre-integration/contrib/tre/lib/regexec.c Sun Oct 9 21:39:14 2011 (r226177)
@@ -187,7 +187,7 @@ tre_match(const tre_tnfa_t *tnfa, const
if ((heur != NULL) && (type != STR_USER))
{
int ret;
- size_t st = 0, n;
+ size_t st = 0, i = 1, n;
const char *data_byte = string;
const tre_char_t *data_wide = string;
@@ -198,52 +198,50 @@ tre_match(const tre_tnfa_t *tnfa, const
{
SEEK_TO(st);
- /* Look for the beginning of possibly matching text. */
- ret = tre_match_fast(heur->start, string, len - st, type, nmatch,
+ /* Prefix heuristic */
+ ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch,
pmatch, eflags);
if (ret != REG_OK)
return ret;
st += pmatch[0].rm_so;
n = pmatch[0].rm_eo;
- /*
- * When having a fixed-length pattern there is only
- * one heuristic.
- */
- if (heur->end == NULL)
+ /* Intermediate heuristics */
+ while (!((heur->heurs[i] == NULL) ||
+ (heur->prefix && heur->heurs[i + 1] == NULL)))
{
- SEEK_TO(st);
-
- DPRINT(("tre_match: calling NFA with offsets [%u/%u]\n",
- st, heur->prefix ? len : n + st));
+ SEEK_TO(st + n);
+ ret = tre_match_fast(heur->heurs[i], string, len - st - n, type,
+ nmatch, pmatch, eflags);
+ if (ret != REG_OK)
+ return ret;
+ n += pmatch[0].rm_eo;
+ i++;
+ }
- ret = tre_match(tnfa, string,
- heur->prefix ? (len - st) :
- n, type, nmatch,
- pmatch, eflags, NULL, NULL);
+ /* Suffix heuristic available */
+ if (heur->prefix && heur->heurs[i] != NULL)
+ {
+ SEEK_TO(st + n);
+ ret = tre_match_fast(heur->heurs[i], string, len - st - n, type,
+ nmatch, pmatch, eflags);
+ if (ret != REG_OK)
+ return ret;
+ n += pmatch[0].rm_eo;
+ SEEK_TO(st);
+ ret = tre_match(tnfa, string, n, type, nmatch, pmatch,
+ eflags, NULL, NULL);
+ FIX_OFFSETS;
+ }
+ /* Suffix heuristic not available */
+ else
+ {
+ SEEK_TO(st);
+ ret = tre_match(tnfa, string, len - st, type, nmatch, pmatch,
+ eflags, NULL, NULL);
FIX_OFFSETS;
}
-
- SEEK_TO(st + n);
-
- /* Look for the end of possibly matching text. */
- ret = tre_match_fast(heur->end, string, len - st - n, type,
- nmatch, pmatch, eflags);
- n += pmatch[0].rm_eo;
-
- if (ret != REG_OK)
- return ret;
-
- SEEK_TO(st);
-
- DPRINT(("tre_match: calling NFA with offsets [%u/%u]\n",
- st, st + n));
-
- ret = tre_match(tnfa, string, n,
- type, nmatch, pmatch, eflags, NULL, NULL);
-
- FIX_OFFSETS;
}
}
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Sun Oct 9 21:36:14 2011 (r226176)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Sun Oct 9 21:39:14 2011 (r226177)
@@ -54,6 +54,8 @@
* performance impact.
*/
+#define MAX_FRAGMENTS 32
+
/*
* Parses bracket expression seeking to the end of the enclosed text.
* The parameters are the opening (oe) and closing elements (ce).
@@ -122,18 +124,15 @@
int
tre_compile_heur(heur_t *h, const tre_char_t *regex, size_t len, int cflags)
{
- tre_char_t *heur;
- int st = 0, pos = 0;
+ tre_char_t *arr[MAX_FRAGMENTS], *heur;
+ size_t length[MAX_FRAGMENTS];
+ int errcode, j = 0, pos = 0, st = 0;
bool escaped = false;
- int errcode, ret;
- /* Temporary space, len will be enough. */
- heur = xmalloc(len);
+ heur = xmalloc(len * sizeof(tre_char_t));
if (!heur)
return REG_ESPACE;
- memset(h, 0, sizeof(*h));
-
while (true)
{
@@ -274,7 +273,7 @@ tre_compile_heur(heur_t *h, const tre_ch
if ((cflags & REG_EXTENDED) && !escaped)
{
errcode = REG_BADPAT;
- goto badpat2;
+ goto err;
}
else if (!(cflags & REG_EXTENDED) && escaped)
END_SEGMENT;
@@ -309,87 +308,102 @@ tre_compile_heur(heur_t *h, const tre_ch
end_segment:
- /* If it is not initialized yet, then we just got the first segment. */
- if (h->start == NULL)
+ if (st == len && pos == 0)
{
-
- /*
- * An empty or a one-char prefix segment is useless,
- * better to just fail.
- */
- if (pos <= 1)
- {
- errcode = REG_BADPAT;
- DPRINT("tre_compile_heur: pattern does not have a "
- " fixed-length prefix that is long enough\n");
- goto badpat1;
- }
-
- h->start = xmalloc(sizeof(fastmatch_t));
- if (!h->start)
- {
- errcode = REG_ESPACE;
- goto space1;
- }
-
- ret = tre_compile_fast(h->start, heur, pos, 0);
- if (ret != REG_OK)
+ if (j == 0)
{
errcode = REG_BADPAT;
- goto badpat2;
+ goto err;
}
- DPRINT(("tre_compile_heur: fixed-length prefix is %s\n",
- h->start->pattern));
+ h->prefix = true;
+ goto ok;
}
- /*
- * If true, this is the last segment. We do not care about the
- * middle ones.
- */
- else if (st == len)
+ if (j == MAX_FRAGMENTS)
{
-
- /* If empty, we only have a prefix heuristic. */
- if (pos == 0)
- {
- h->prefix = true;
- errcode = REG_OK;
- DPRINT("tre-compile_heur: using only a fixed-length prefix; "
- "no fixed-length suffix is available\n");
- goto ok;
- }
-
- h->end = xmalloc(sizeof(fastmatch_t));
- if (!h->end)
- {
- errcode = REG_ESPACE;
- goto space2;
- }
-
- ret = tre_compile_fast(h->end, heur, pos, 0);
- if (ret != REG_OK)
- {
- xfree(h->end);
- h->prefix = true;
- }
- errcode = REG_OK;
- DPRINT(("tre_compile_heur: fixed-length suffix is %s\n",
- h->end->pattern));
- goto ok;
+ errcode = REG_BADPAT;
+ goto err;
}
- /* Just drop middle segments by overwriting the temporary space. */
+ arr[j] = xmalloc((pos + 1) * sizeof(tre_char_t));
+ if (!arr[j])
+ {
+ errcode = REG_ESPACE;
+ goto err;
+ }
+ memcpy(&arr[j], &heur, pos);
+ arr[j][pos] = TRE_CHAR('\0');
+ length[j] = pos;
+ j++;
pos = 0;
}
-badpat2:
-space2:
- if (h->start != NULL)
- xfree(h->start);
-badpat1:
-space1:
- DPRINT("tre_compile_heur: compiling a heuristic failed\n");
ok:
+
+ {
+ size_t m = 1;
+ int ret;
+
+ for(int i = 1; i < j; i++)
+ m = (length[i] > length[m]) ? i : m;
+
+ if (!h->heurs)
+ {
+ errcode = REG_ESPACE;
+ goto err;
+ }
+
+ for (int i = 0; i < MIN(3, j + 1); i++)
+ {
+ h->heurs[i] = xmalloc(sizeof(fastmatch_t));
+ if (!h->heurs[i])
+ {
+ errcode = REG_ESPACE;
+ goto err;
+ }
+ }
+
+#define CHECK_ERR
+ if (ret != REG_OK)
+ {
+ errcode = REG_BADPAT;
+ goto err2;
+ }
+
+ ret = tre_compile_fast(h->heurs[0], arr[0], length[0], 0);
+ CHECK_ERR
+ if (j == 1)
+ {
+ xfree(h->heurs[1]);
+ h->heurs[1] = NULL;
+ goto finish;
+ }
+ else
+ ret = tre_compile_fast(h->heurs[1], arr[m], length[m], 0);
+ CHECK_ERR
+ if (h->prefix)
+ {
+ xfree(h->heurs[2]);
+ h->heurs[2] = NULL;
+ goto finish;
+ }
+ else
+ ret = tre_compile_fast(h->heurs[2], arr[j - 1], length[j - 1], 0);
+ CHECK_ERR
+ h->heurs[3] = NULL;
+
+ errcode = REG_OK;
+ goto finish;
+ }
+
+err2:
+ for (int i = 0; h->heurs[i] != NULL; i++)
+ tre_free_fast(h->heurs[i]);
+ xfree(h->heurs);
+err:
+finish:
+ for (int i = 0; i < j; i++)
+ xfree(arr[i]);
xfree(heur);
return errcode;
}
@@ -400,10 +414,8 @@ ok:
void
tre_free_heur(heur_t *h)
{
- if (h->start != NULL)
- xfree(h->start);
- if (h->end != NULL)
- xfree(h->end);
+ for (int i = 0; h->heurs[i] != NULL; i++)
+ tre_free_fast(h->heurs[i]);
DPRINT("tre_free_heur: resources are freed\n");
}
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Sun Oct 9 21:36:14 2011 (r226176)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Sun Oct 9 21:39:14 2011 (r226177)
@@ -8,12 +8,11 @@
#include "tre-internal.h"
typedef struct {
- fastmatch_t *start;
- fastmatch_t *end;
+ fastmatch_t *heurs[4];
bool prefix;
+ bool newline;
} heur_t;
-
extern int tre_compile_heur(heur_t *h, const tre_char_t *regex,
size_t len, int cflags);
extern void tre_free_heur(heur_t *h);
More information about the svn-src-user
mailing list