ports/184632: textproc/sgmlformat: broken regexp -> regex conversion
Christian Weisgerber
naddy at FreeBSD.org
Mon Dec 9 15:00:01 UTC 2013
>Number: 184632
>Category: ports
>Synopsis: textproc/sgmlformat: broken regexp -> regex conversion
>Confidential: no
>Severity: non-critical
>Priority: low
>Responsible: freebsd-ports-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Mon Dec 09 15:00:00 UTC 2013
>Closed-Date:
>Last-Modified:
>Originator: Christian Weisgerber
>Release: FreeBSD 9.2-STABLE amd64
>Organization:
>Environment:
System: FreeBSD lorvorc.mips.inka.de 9.2-STABLE FreeBSD 9.2-STABLE #0 r259069: Sat Dec 7 15:52:28 CET 2013 naddy at lorvorc.mips.inka.de:/usr/obj/usr/src/sys/GENERIC amd64
>Description:
I don't know how to test this functionality, but the conversion
from UNIX V8 <regexp.h> to POSIX <regex.h> in patch-regex.txt cannot
possibly be correct.
You may want to compare the regex(3) and old regexp(3) man pages.
http://svnweb.freebsd.org/base/stable/8/lib/libcompat/regexp/regexp.3?revision=196045&view=markup
The most obvious problem is in translate.c, where changes like this
- if (!regexec(t->attpair[a].rex, atval)) match = 0;
+ if (!regexec(&t->attpair[a].rex, atval, 0, NULL, 0)) match = 0;
ignore that V8 regexec() and POSIX regexec() have inverted result
codes. V8 regexec() returns 0 for failure, POSIX regexec() returns
0 for success.
The problems in traninit.c are more subtle:
- if (!(T.var_RE_value=regcomp(buf))) {
+ if (regcomp(&T.var_RE_value, buf, 0) != 0) {
What happens in the error case? When POSIX regcomp() returns an
error, the value of var_RE_value will be undefined. However, in
translate.c, regexec() is blindly called with this value.
Also, V8 regcomp() uses extended regular expressions.
I have attached a replacement patch that reimplements the conversion
from regexp to regex. It uses two simple wrapper functions that
map the required V8 regcomp/regexec functionality onto POSIX
regcomp/regexec. In particular, this allows us to still use
(regex_t *)NULL to indicate an invalid/nonexistent regular expression.
Again, I can't actually test this.
>How-To-Repeat:
>Fix:
--- instant/tables.c.orig 1996-09-08 03:55:10.000000000 +0200
+++ instant/tables.c 2013-11-30 23:51:25.000000000 +0100
@@ -84,7 +84,7 @@
#include <sys/types.h>
#include <errno.h>
-#include <regexp.h>
+#include <regex.h>
#include "general.h"
#include "translate.h"
--- instant/traninit.c.orig 1997-07-16 18:44:12.000000000 +0200
+++ instant/traninit.c 2013-11-30 23:51:25.000000000 +0100
@@ -69,7 +69,7 @@
#include <memory.h>
#include <sys/types.h>
#include <errno.h>
-#include <regexp.h>
+#include <regex.h>
#include "general.h"
#include "translate.h"
@@ -100,6 +100,23 @@
void AddSDATA(const char *from, const char *to);
/* ______________________________________________________________________ */
+/* minimal compatibility wrapper for UNIX V8 regexp, match only
+ */
+
+static regex_t *v8_regcomp(const char *pattern)
+{
+ regex_t *re;
+ if ((re = malloc(sizeof(regex_t))) != NULL) {
+ if (regcomp(re, pattern, REG_EXTENDED|REG_NOSUB)) {
+ free(re);
+ return NULL;
+ }
+ }
+ return re;
+}
+#define regcomp v8_regcomp
+
+/* ______________________________________________________________________ */
/* Read the translation specs from the input file, storing in memory.
* Arguments:
* Name of translation spec file.
--- instant/translate.c.orig 1996-09-08 03:55:10.000000000 +0200
+++ instant/translate.c 2013-11-30 23:51:25.000000000 +0100
@@ -69,7 +69,7 @@
#include <memory.h>
#include <sys/types.h>
#include <errno.h>
-#include <regexp.h>
+#include <regex.h>
#include "general.h"
#define STORAGE
@@ -82,6 +82,18 @@
static void WasProcessed(Element_t *);
/* ______________________________________________________________________ */
+/* minimal compatibility wrapper for UNIX V8 regexp, match only
+ */
+
+static int v8_regexec(const regex_t *re, const char *string)
+{
+ if (re == NULL)
+ return 0;
+ return !regexec(re, string, 0, NULL, 0);
+}
+#define regexec v8_regexec
+
+/* ______________________________________________________________________ */
/* Translate the subtree starting at 'e'. Output goes to 'fp'.
* This is the entry point for translating an instance.
* Arguments:
--- instant/translate.h.orig 1996-09-08 03:55:10.000000000 +0200
+++ instant/translate.h 2013-11-30 23:51:25.000000000 +0100
@@ -75,7 +75,7 @@
typedef struct {
char *name; /* attribute name string */
char *val; /* attribute value string */
- regexp *rex; /* attribute value reg expr (compiled) */
+ regex_t *rex; /* attribute value reg expr (compiled) */
} AttPair_t;
typedef struct _Trans {
@@ -83,19 +83,19 @@
char *gi; /* element name of tag under consideration */
char **gilist; /* list of element names (multiple gi's) */
char *context; /* context in tree - looking depth levels up */
- regexp *context_re; /* tree heirarchy looking depth levels up */
+ regex_t *context_re; /* tree heirarchy looking depth levels up */
int depth; /* number of levels to look up the tree */
AttPair_t *attpair; /* attr name-value pairs */
int nattpairs; /* number of name-value pairs */
char *parent; /* GI has this element as parent */
int nth_child; /* GI is Nth child of this of parent element */
char *content; /* element has this string in content */
- regexp *content_re; /* content reg expr (compiled) */
+ regex_t *content_re; /* content reg expr (compiled) */
char *pattrset; /* is this attr set (any value) in parent? */
char *var_name; /* variable name */
char *var_value; /* variable value */
char *var_RE_name; /* variable name (for VarREValue) */
- regexp *var_RE_value; /* variable value (compiled, for VarREValue) */
+ regex_t *var_RE_value; /* variable value (compiled, for VarREValue) */
Map_t *relations; /* various relations to check */
/* actions */
@@ -150,4 +150,3 @@
void OSFtable(Element_t *, FILE *, char **, int);
/* ______________________________________________________________________ */
-
--- instant/tranvar.c.orig 1997-02-07 03:40:45.000000000 +0100
+++ instant/tranvar.c 2013-11-30 23:51:25.000000000 +0100
@@ -66,7 +66,7 @@
#include <sys/types.h>
#include <errno.h>
-#include <regexp.h>
+#include <regex.h>
#include "general.h"
#include "translate.h"
--- instant/util.c.orig 1996-09-08 03:55:10.000000000 +0200
+++ instant/util.c 2013-11-30 23:51:25.000000000 +0100
@@ -85,7 +85,7 @@
#include <sys/stat.h>
#include <sys/file.h>
#include <errno.h>
-#include <regexp.h>
+#include <regex.h>
/* CSS don't have it and I don't see where it's used
#include <values.h>
*/
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-ports-bugs
mailing list