svn commit: r368357 - in head/lib: libc/regex libregex/tests

Kyle Evans kevans at FreeBSD.org
Sat Dec 5 03:13:48 UTC 2020


Author: kevans
Date: Sat Dec  5 03:13:47 2020
New Revision: 368357
URL: https://svnweb.freebsd.org/changeset/base/368357

Log:
  libregex: implement \` and \' (begin-of-subj, end-of-subj)
  
  These are GNU extensions, generally equivalent to ^ and $ except that the
  new syntax will not match beginning of line after the first in a multi-line
  expression or the end of line before absolute last in a multi-line
  expression.

Modified:
  head/lib/libc/regex/engine.c
  head/lib/libc/regex/regcomp.c
  head/lib/libc/regex/regex2.h
  head/lib/libregex/tests/gnuext.in

Modified: head/lib/libc/regex/engine.c
==============================================================================
--- head/lib/libc/regex/engine.c	Sat Dec  5 02:23:11 2020	(r368356)
+++ head/lib/libc/regex/engine.c	Sat Dec  5 03:13:47 2020	(r368357)
@@ -109,7 +109,7 @@ static int matcher(struct re_guts *g, const char *stri
 static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
 static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
 static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast);
-static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
+static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft, int sflags);
 #define MAX_RECURSION	100
 #define	BOL	(OUT-1)
 #define	EOL	(BOL-1)
@@ -119,6 +119,10 @@ static states step(struct re_guts *g, sopno start, sop
 #define	EOW	(BOL-5)
 #define	BADCHAR	(BOL-6)
 #define	NONCHAR(c)	((c) <= OUT)
+/* sflags */
+#define	SBOS	0x0001
+#define	SEOS	0x0002
+
 #ifdef REDEBUG
 static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
 #endif
@@ -457,6 +461,8 @@ dissect(struct match *m,
 		case OEOL:
 		case OBOW:
 		case OEOW:
+		case OBOS:
+		case OEOS:
 			break;
 		case OANY:
 		case OANYOF:
@@ -657,6 +663,18 @@ backref(struct match *m,
 			if (wc == BADCHAR || !CHIN(cs, wc))
 				return(NULL);
 			break;
+		case OBOS:
+			if (sp == m->beginp && (m->eflags & REG_NOTBOL) == 0)
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOS:
+			if (sp == m->endp && (m->eflags & REG_NOTEOL) == 0)
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
 		case OBOL:
 			if ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
 			    (sp > m->offp && sp < m->endp &&
@@ -819,15 +837,16 @@ walk(struct match *m, const char *start, const char *s
 	wint_t c;
 	wint_t lastc;		/* previous c */
 	wint_t flagch;
-	int i;
+	int i, sflags;
 	const char *matchp;	/* last p at which a match ended */
 	size_t clen;
 
+	sflags = 0;
 	AT("slow", start, stop, startst, stopst);
 	CLEAR(st);
 	SET1(st, startst);
 	SP("sstart", st, *p);
-	st = step(m->g, startst, stopst, st, NOTHING, st);
+	st = step(m->g, startst, stopst, st, NOTHING, st, sflags);
 	if (fast)
 		ASSIGN(fresh, st);
 	matchp = NULL;
@@ -844,6 +863,7 @@ walk(struct match *m, const char *start, const char *s
 	for (;;) {
 		/* next character */
 		lastc = c;
+		sflags = 0;
 		if (p == m->endp) {
 			c = OUT;
 			clen = 0;
@@ -866,9 +886,20 @@ walk(struct match *m, const char *start, const char *s
 			flagch = (flagch == BOL) ? BOLEOL : EOL;
 			i += m->g->neol;
 		}
+		if (lastc == OUT && (m->eflags & REG_NOTBOL) == 0) {
+			sflags |= SBOS;
+			/* Step one more for BOS. */
+			i++;
+		}
+		if (c == OUT && (m->eflags & REG_NOTEOL) == 0) {
+			sflags |= SEOS;
+			/* Step one more for EOS. */
+			i++;
+		}
 		if (i != 0) {
 			for (; i > 0; i--)
-				st = step(m->g, startst, stopst, st, flagch, st);
+				st = step(m->g, startst, stopst, st, flagch, st,
+				    sflags);
 			SP("sboleol", st, c);
 		}
 
@@ -882,7 +913,7 @@ walk(struct match *m, const char *start, const char *s
 			flagch = EOW;
 		}
 		if (flagch == BOW || flagch == EOW) {
-			st = step(m->g, startst, stopst, st, flagch, st);
+			st = step(m->g, startst, stopst, st, flagch, st, sflags);
 			SP("sboweow", st, c);
 		}
 
@@ -903,9 +934,10 @@ walk(struct match *m, const char *start, const char *s
 		else
 			ASSIGN(st, empty);
 		assert(c != OUT);
-		st = step(m->g, startst, stopst, tmp, c, st);
+		st = step(m->g, startst, stopst, tmp, c, st, sflags);
 		SP("saft", st, c);
-		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags),
+		    st));
 		p += clen;
 	}
 
@@ -939,7 +971,8 @@ step(struct re_guts *g,
 	sopno stop,		/* state after stop state within strip */
 	states bef,		/* states reachable before */
 	wint_t ch,		/* character or NONCHAR code */
-	states aft)		/* states already known reachable after */
+	states aft,		/* states already known reachable after */
+	int sflags)		/* state flags */
 {
 	cset *cs;
 	sop s;
@@ -958,6 +991,14 @@ step(struct re_guts *g,
 			/* only characters can match */
 			assert(!NONCHAR(ch) || ch != OPND(s));
 			if (ch == OPND(s))
+				FWD(aft, bef, 1);
+			break;
+		case OBOS:
+			if ((ch == BOL || ch == BOLEOL) && (sflags & SBOS) != 0)
+				FWD(aft, bef, 1);
+			break;
+		case OEOS:
+			if ((ch == EOL || ch == BOLEOL) && (sflags & SEOS) != 0)
 				FWD(aft, bef, 1);
 			break;
 		case OBOL:

Modified: head/lib/libc/regex/regcomp.c
==============================================================================
--- head/lib/libc/regex/regcomp.c	Sat Dec  5 02:23:11 2020	(r368356)
+++ head/lib/libc/regex/regcomp.c	Sat Dec  5 03:13:47 2020	(r368357)
@@ -480,6 +480,12 @@ p_ere_exp(struct parse *p, struct branchc *bc)
 		if (p->gnuext) {
 			handled = 1;
 			switch (wc) {
+			case '`':
+				EMIT(OBOS, 0);
+				break;
+			case '\'':
+				EMIT(OEOS, 0);
+				break;
 			case 'W':
 			case 'w':
 			case 'S':
@@ -833,6 +839,12 @@ p_simp_re(struct parse *p, struct branchc *bc)
 		if (p->gnuext) {
 			handled = true;
 			switch (c) {
+			case BACKSL|'`':
+				EMIT(OBOS, 0);
+				break;
+			case BACKSL|'\'':
+				EMIT(OEOS, 0);
+				break;
 			case BACKSL|'W':
 			case BACKSL|'w':
 			case BACKSL|'S':
@@ -1878,6 +1890,8 @@ findmust(struct parse *p, struct re_guts *g)
 		case OEOW:
 		case OBOL:
 		case OEOL:
+		case OBOS:
+		case OEOS:
 		case O_QUEST:
 		case O_CH:
 		case OEND:

Modified: head/lib/libc/regex/regex2.h
==============================================================================
--- head/lib/libc/regex/regex2.h	Sat Dec  5 02:23:11 2020	(r368356)
+++ head/lib/libc/regex/regex2.h	Sat Dec  5 03:13:47 2020	(r368357)
@@ -104,6 +104,8 @@ typedef unsigned long sopno;
 #define	O_CH	(18L<<OPSHIFT)	/* end choice	back to OOR1		*/
 #define	OBOW	(19L<<OPSHIFT)	/* begin word	-			*/
 #define	OEOW	(20L<<OPSHIFT)	/* end word	-			*/
+#define	OBOS	(21L<<OPSHIFT)	/* begin subj.  -			*/
+#define	OEOS	(22L<<OPSHIFT)	/* end subj.	-			*/
 
 /*
  * Structures for [] character-set representation.

Modified: head/lib/libregex/tests/gnuext.in
==============================================================================
--- head/lib/libregex/tests/gnuext.in	Sat Dec  5 02:23:11 2020	(r368356)
+++ head/lib/libregex/tests/gnuext.in	Sat Dec  5 03:13:47 2020	(r368357)
@@ -25,8 +25,12 @@ a\|b\|c	b	abc	a
 #\B[abc]\B	&	<abc>	b
 #\B[abc]+	-	<abc>	bc
 #\B[abc]\+	b	<abc>	bc
-#\`abc\'	&	abc	abc
-#\`.+\'	-	abNc	abNc
-#\`.\+\'	b	abNc	abNc
-#(\`a)	-	Na
-#(a\')	-	aN
+\`abc	&	abc	abc
+abc\'	&	abc	abc
+\`abc\'	&	abc	abc
+\`.+\'	-	abNc	abNc
+\`.\+\'	b	abNc	abNc
+(\`a)	-	Na
+(a\`)	-	aN
+(a\')	-	aN
+(\'a)	-	Na


More information about the svn-src-head mailing list