svn commit: r368357 - in head/lib: libc/regex libregex/tests
Kyle Evans
kevans at FreeBSD.org
Sat Dec 5 03:13:48 UTC 2020
Author: kevans
Date: Sat Dec 5 03:13:47 2020
New Revision: 368357
URL: https://svnweb.freebsd.org/changeset/base/368357
Log:
libregex: implement \` and \' (begin-of-subj, end-of-subj)
These are GNU extensions, generally equivalent to ^ and $ except that the
new syntax will not match beginning of line after the first in a multi-line
expression or the end of line before absolute last in a multi-line
expression.
Modified:
head/lib/libc/regex/engine.c
head/lib/libc/regex/regcomp.c
head/lib/libc/regex/regex2.h
head/lib/libregex/tests/gnuext.in
Modified: head/lib/libc/regex/engine.c
==============================================================================
--- head/lib/libc/regex/engine.c Sat Dec 5 02:23:11 2020 (r368356)
+++ head/lib/libc/regex/engine.c Sat Dec 5 03:13:47 2020 (r368357)
@@ -109,7 +109,7 @@ static int matcher(struct re_guts *g, const char *stri
static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast);
-static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
+static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft, int sflags);
#define MAX_RECURSION 100
#define BOL (OUT-1)
#define EOL (BOL-1)
@@ -119,6 +119,10 @@ static states step(struct re_guts *g, sopno start, sop
#define EOW (BOL-5)
#define BADCHAR (BOL-6)
#define NONCHAR(c) ((c) <= OUT)
+/* sflags */
+#define SBOS 0x0001
+#define SEOS 0x0002
+
#ifdef REDEBUG
static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
#endif
@@ -457,6 +461,8 @@ dissect(struct match *m,
case OEOL:
case OBOW:
case OEOW:
+ case OBOS:
+ case OEOS:
break;
case OANY:
case OANYOF:
@@ -657,6 +663,18 @@ backref(struct match *m,
if (wc == BADCHAR || !CHIN(cs, wc))
return(NULL);
break;
+ case OBOS:
+ if (sp == m->beginp && (m->eflags & REG_NOTBOL) == 0)
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case OEOS:
+ if (sp == m->endp && (m->eflags & REG_NOTEOL) == 0)
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
case OBOL:
if ((sp == m->beginp && !(m->eflags®_NOTBOL)) ||
(sp > m->offp && sp < m->endp &&
@@ -819,15 +837,16 @@ walk(struct match *m, const char *start, const char *s
wint_t c;
wint_t lastc; /* previous c */
wint_t flagch;
- int i;
+ int i, sflags;
const char *matchp; /* last p at which a match ended */
size_t clen;
+ sflags = 0;
AT("slow", start, stop, startst, stopst);
CLEAR(st);
SET1(st, startst);
SP("sstart", st, *p);
- st = step(m->g, startst, stopst, st, NOTHING, st);
+ st = step(m->g, startst, stopst, st, NOTHING, st, sflags);
if (fast)
ASSIGN(fresh, st);
matchp = NULL;
@@ -844,6 +863,7 @@ walk(struct match *m, const char *start, const char *s
for (;;) {
/* next character */
lastc = c;
+ sflags = 0;
if (p == m->endp) {
c = OUT;
clen = 0;
@@ -866,9 +886,20 @@ walk(struct match *m, const char *start, const char *s
flagch = (flagch == BOL) ? BOLEOL : EOL;
i += m->g->neol;
}
+ if (lastc == OUT && (m->eflags & REG_NOTBOL) == 0) {
+ sflags |= SBOS;
+ /* Step one more for BOS. */
+ i++;
+ }
+ if (c == OUT && (m->eflags & REG_NOTEOL) == 0) {
+ sflags |= SEOS;
+ /* Step one more for EOS. */
+ i++;
+ }
if (i != 0) {
for (; i > 0; i--)
- st = step(m->g, startst, stopst, st, flagch, st);
+ st = step(m->g, startst, stopst, st, flagch, st,
+ sflags);
SP("sboleol", st, c);
}
@@ -882,7 +913,7 @@ walk(struct match *m, const char *start, const char *s
flagch = EOW;
}
if (flagch == BOW || flagch == EOW) {
- st = step(m->g, startst, stopst, st, flagch, st);
+ st = step(m->g, startst, stopst, st, flagch, st, sflags);
SP("sboweow", st, c);
}
@@ -903,9 +934,10 @@ walk(struct match *m, const char *start, const char *s
else
ASSIGN(st, empty);
assert(c != OUT);
- st = step(m->g, startst, stopst, tmp, c, st);
+ st = step(m->g, startst, stopst, tmp, c, st, sflags);
SP("saft", st, c);
- assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+ assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags),
+ st));
p += clen;
}
@@ -939,7 +971,8 @@ step(struct re_guts *g,
sopno stop, /* state after stop state within strip */
states bef, /* states reachable before */
wint_t ch, /* character or NONCHAR code */
- states aft) /* states already known reachable after */
+ states aft, /* states already known reachable after */
+ int sflags) /* state flags */
{
cset *cs;
sop s;
@@ -958,6 +991,14 @@ step(struct re_guts *g,
/* only characters can match */
assert(!NONCHAR(ch) || ch != OPND(s));
if (ch == OPND(s))
+ FWD(aft, bef, 1);
+ break;
+ case OBOS:
+ if ((ch == BOL || ch == BOLEOL) && (sflags & SBOS) != 0)
+ FWD(aft, bef, 1);
+ break;
+ case OEOS:
+ if ((ch == EOL || ch == BOLEOL) && (sflags & SEOS) != 0)
FWD(aft, bef, 1);
break;
case OBOL:
Modified: head/lib/libc/regex/regcomp.c
==============================================================================
--- head/lib/libc/regex/regcomp.c Sat Dec 5 02:23:11 2020 (r368356)
+++ head/lib/libc/regex/regcomp.c Sat Dec 5 03:13:47 2020 (r368357)
@@ -480,6 +480,12 @@ p_ere_exp(struct parse *p, struct branchc *bc)
if (p->gnuext) {
handled = 1;
switch (wc) {
+ case '`':
+ EMIT(OBOS, 0);
+ break;
+ case '\'':
+ EMIT(OEOS, 0);
+ break;
case 'W':
case 'w':
case 'S':
@@ -833,6 +839,12 @@ p_simp_re(struct parse *p, struct branchc *bc)
if (p->gnuext) {
handled = true;
switch (c) {
+ case BACKSL|'`':
+ EMIT(OBOS, 0);
+ break;
+ case BACKSL|'\'':
+ EMIT(OEOS, 0);
+ break;
case BACKSL|'W':
case BACKSL|'w':
case BACKSL|'S':
@@ -1878,6 +1890,8 @@ findmust(struct parse *p, struct re_guts *g)
case OEOW:
case OBOL:
case OEOL:
+ case OBOS:
+ case OEOS:
case O_QUEST:
case O_CH:
case OEND:
Modified: head/lib/libc/regex/regex2.h
==============================================================================
--- head/lib/libc/regex/regex2.h Sat Dec 5 02:23:11 2020 (r368356)
+++ head/lib/libc/regex/regex2.h Sat Dec 5 03:13:47 2020 (r368357)
@@ -104,6 +104,8 @@ typedef unsigned long sopno;
#define O_CH (18L<<OPSHIFT) /* end choice back to OOR1 */
#define OBOW (19L<<OPSHIFT) /* begin word - */
#define OEOW (20L<<OPSHIFT) /* end word - */
+#define OBOS (21L<<OPSHIFT) /* begin subj. - */
+#define OEOS (22L<<OPSHIFT) /* end subj. - */
/*
* Structures for [] character-set representation.
Modified: head/lib/libregex/tests/gnuext.in
==============================================================================
--- head/lib/libregex/tests/gnuext.in Sat Dec 5 02:23:11 2020 (r368356)
+++ head/lib/libregex/tests/gnuext.in Sat Dec 5 03:13:47 2020 (r368357)
@@ -25,8 +25,12 @@ a\|b\|c b abc a
#\B[abc]\B & <abc> b
#\B[abc]+ - <abc> bc
#\B[abc]\+ b <abc> bc
-#\`abc\' & abc abc
-#\`.+\' - abNc abNc
-#\`.\+\' b abNc abNc
-#(\`a) - Na
-#(a\') - aN
+\`abc & abc abc
+abc\' & abc abc
+\`abc\' & abc abc
+\`.+\' - abNc abNc
+\`.\+\' b abNc abNc
+(\`a) - Na
+(a\`) - aN
+(a\') - aN
+(\'a) - Na
More information about the svn-src-head
mailing list