git: 343b776fd028 - main - unifdef: Reapply our 61287be18189.

From: Dag-Erling Smørgrav <des_at_FreeBSD.org>
Date: Mon, 21 Aug 2023 17:53:20 UTC
The branch main has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=343b776fd0280f4dc95a1a61fd9ac40c78ce684e

commit 343b776fd0280f4dc95a1a61fd9ac40c78ce684e
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2023-08-21 17:43:21 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2023-08-21 17:52:06 +0000

    unifdef: Reapply our 61287be18189.
---
 contrib/unifdef/unifdef.c | 139 +++++++++++++++++++++++++++-------------------
 contrib/unifdef/unifdef.h |   3 +
 2 files changed, 86 insertions(+), 56 deletions(-)

diff --git a/contrib/unifdef/unifdef.c b/contrib/unifdef/unifdef.c
index dc145a24232f..82f6acd66038 100644
--- a/contrib/unifdef/unifdef.c
+++ b/contrib/unifdef/unifdef.c
@@ -138,7 +138,6 @@ static char const * const linestate_name[] = {
  */
 #define	MAXDEPTH        64			/* maximum #if nesting */
 #define	MAXLINE         4096			/* maximum length of line */
-#define	MAXSYMS         16384			/* maximum number of symbols */
 
 /*
  * Sometimes when editing a keyword the replacement text is longer, so
@@ -146,6 +145,26 @@ static char const * const linestate_name[] = {
  */
 #define	EDITSLOP        10
 
+/*
+ * C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large
+ * parameters.
+ */
+struct macro {
+	RB_ENTRY(macro)	entry;
+	const char	*name;
+	const char	*value;
+	bool		ignore;		/* -iDsym or -iUsym */
+};
+
+static int
+macro_cmp(struct macro *a, struct macro *b)
+{
+	return (strcmp(a->name, b->name));
+}
+
+static RB_HEAD(MACROMAP, macro) macro_tree = RB_INITIALIZER(&macro_tree);
+RB_GENERATE_STATIC(MACROMAP, macro, entry, macro_cmp);
+
 /*
  * Globals.
  */
@@ -163,11 +182,6 @@ static bool             symlist;		/* -s: output symbol list */
 static bool             symdepth;		/* -S: output symbol depth */
 static bool             text;			/* -t: this is a text file */
 
-static const char      *symname[MAXSYMS];	/* symbol name */
-static const char      *value[MAXSYMS];		/* -Dsym=value */
-static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
-static int              nsyms;			/* number of symbols */
-
 static FILE            *input;			/* input file pointer */
 static const char      *filename;		/* input file name */
 static int              linenum;		/* current line number */
@@ -216,12 +230,12 @@ static char            *astrcat(const char *, const char *);
 static void             cleantemp(void);
 static void             closeio(void);
 static void             debug(const char *, ...);
-static void             debugsym(const char *, int);
+static void             debugsym(const char *, const struct macro *);
 static bool             defundef(void);
 static void             defundefile(const char *);
 static void             done(void);
 static void             error(const char *);
-static int              findsym(const char **);
+static struct macro    *findsym(const char **);
 static void             flushline(bool);
 static void             hashline(void);
 static void             help(void);
@@ -781,7 +795,7 @@ static Linetype
 parseline(void)
 {
 	const char *cp;
-	int cursym;
+	struct macro *cursym;
 	Linetype retval;
 	Comment_state wascomment;
 
@@ -803,15 +817,15 @@ parseline(void)
 	if ((cp = matchsym("ifdef", keyword)) != NULL ||
 	    (cp = matchsym("ifndef", keyword)) != NULL) {
 		cp = skipcomment(cp);
-		if ((cursym = findsym(&cp)) < 0)
+		if ((cursym = findsym(&cp)) == NULL)
 			retval = LT_IF;
 		else {
 			retval = (keyword[2] == 'n')
 			    ? LT_FALSE : LT_TRUE;
-			if (value[cursym] == NULL)
+			if (cursym->value == NULL)
 				retval = (retval == LT_TRUE)
 				    ? LT_FALSE : LT_TRUE;
-			if (ignore[cursym])
+			if (cursym->ignore)
 				retval = (retval == LT_TRUE)
 				    ? LT_TRUEI : LT_FALSEI;
 		}
@@ -1013,7 +1027,7 @@ eval_unary(const struct ops *ops, long *valp, const char **cpp)
 {
 	const char *cp;
 	char *ep;
-	int sym;
+	struct macro *sym;
 	bool defparen;
 	Linetype lt;
 
@@ -1078,27 +1092,27 @@ eval_unary(const struct ops *ops, long *valp, const char **cpp)
 			debug("eval%d defined missing ')'", prec(ops));
 			return (LT_ERROR);
 		}
-		if (sym < 0) {
+		if (sym == NULL) {
 			debug("eval%d defined unknown", prec(ops));
 			lt = LT_IF;
 		} else {
-			debug("eval%d defined %s", prec(ops), symname[sym]);
-			*valp = (value[sym] != NULL);
+			debug("eval%d defined %s", prec(ops), sym->name);
+			*valp = (sym->value != NULL);
 			lt = *valp ? LT_TRUE : LT_FALSE;
 		}
 		constexpr = false;
 	} else if (!endsym(*cp)) {
 		debug("eval%d symbol", prec(ops));
 		sym = findsym(&cp);
-		if (sym < 0) {
+		if (sym == NULL) {
 			lt = LT_IF;
 			cp = skipargs(cp);
-		} else if (value[sym] == NULL) {
+		} else if (sym->value == NULL) {
 			*valp = 0;
 			lt = LT_FALSE;
 		} else {
-			*valp = strtol(value[sym], &ep, 0);
-			if (*ep != '\0' || ep == value[sym])
+			*valp = strtol(sym->value, &ep, 0);
+			if (*ep != '\0' || ep == sym->value)
 				return (LT_ERROR);
 			lt = *valp ? LT_TRUE : LT_FALSE;
 			cp = skipargs(cp);
@@ -1426,17 +1440,18 @@ matchsym(const char *s, const char *t)
  * Look for the symbol in the symbol table. If it is found, we return
  * the symbol table index, else we return -1.
  */
-static int
+static struct macro *
 findsym(const char **strp)
 {
 	const char *str;
-	int symind;
+	char *strkey;
+	struct macro key, *res;
 
 	str = *strp;
 	*strp = skipsym(str);
 	if (symlist) {
 		if (*strp == str)
-			return (-1);
+			return (NULL);
 		if (symdepth && firstsym)
 			printf("%s%3d", zerosyms ? "" : "\n", depth);
 		firstsym = zerosyms = false;
@@ -1445,15 +1460,26 @@ findsym(const char **strp)
 		       (int)(*strp-str), str,
 		       symdepth ? "" : "\n");
 		/* we don't care about the value of the symbol */
-		return (0);
+		return (NULL);
 	}
-	for (symind = 0; symind < nsyms; ++symind) {
-		if (matchsym(symname[symind], str) != NULL) {
-			debugsym("findsym", symind);
-			return (symind);
-		}
-	}
-	return (-1);
+
+	/*
+	 * 'str' just points into the current mid-parse input and is not
+	 * nul-terminated.  We know the length of the symbol, *strp - str, but
+	 * need to provide a nul-terminated lookup key for RB_FIND's comparison
+	 * function.  Create one here.
+	 */
+	strkey = malloc(*strp - str + 1);
+	memcpy(strkey, str, *strp - str);
+	strkey[*strp - str] = 0;
+
+	key.name = strkey;
+	res = RB_FIND(MACROMAP, &macro_tree, &key);
+	if (res != NULL)
+		debugsym("findsym", res);
+
+	free(strkey);
+	return (res);
 }
 
 /*
@@ -1463,22 +1489,23 @@ static void
 indirectsym(void)
 {
 	const char *cp;
-	int changed, sym, ind;
+	int changed;
+	struct macro *sym, *ind;
 
 	do {
 		changed = 0;
-		for (sym = 0; sym < nsyms; ++sym) {
-			if (value[sym] == NULL)
+		RB_FOREACH(sym, MACROMAP, &macro_tree) {
+			if (sym->value == NULL)
 				continue;
-			cp = value[sym];
+			cp = sym->value;
 			ind = findsym(&cp);
-			if (ind == -1 || ind == sym ||
+			if (ind == NULL || ind == sym ||
 			    *cp != '\0' ||
-			    value[ind] == NULL ||
-			    value[ind] == value[sym])
+			    ind->value == NULL ||
+			    ind->value == sym->value)
 				continue;
 			debugsym("indir...", sym);
-			value[sym] = value[ind];
+			sym->value = ind->value;
 			debugsym("...ectsym", sym);
 			changed++;
 		}
@@ -1510,29 +1537,29 @@ addsym1(bool ignorethis, bool definethis, char *symval)
  * Add a symbol to the symbol table.
  */
 static void
-addsym2(bool ignorethis, const char *sym, const char *val)
+addsym2(bool ignorethis, const char *symname, const char *val)
 {
-	const char *cp = sym;
-	int symind;
-
-	symind = findsym(&cp);
-	if (symind < 0) {
-		if (nsyms >= MAXSYMS)
-			errx(2, "too many symbols");
-		symind = nsyms++;
+	const char *cp = symname;
+	struct macro *sym, *r;
+
+	sym = findsym(&cp);
+	if (sym == NULL) {
+		sym = calloc(1, sizeof(*sym));
+		sym->ignore = ignorethis;
+		sym->name = symname;
+		sym->value = val;
+		r = RB_INSERT(MACROMAP, &macro_tree, sym);
+		assert(r == NULL);
 	}
-	ignore[symind] = ignorethis;
-	symname[symind] = sym;
-	value[symind] = val;
-	debugsym("addsym", symind);
+	debugsym("addsym", sym);
 }
 
 static void
-debugsym(const char *why, int symind)
+debugsym(const char *why, const struct macro *sym)
 {
-	debug("%s %s%c%s", why, symname[symind],
-	    value[symind] ? '=' : ' ',
-	    value[symind] ? value[symind] : "undef");
+	debug("%s %s%c%s", why, sym->name,
+	    sym->value ? '=' : ' ',
+	    sym->value ? sym->value : "undef");
 }
 
 /*
diff --git a/contrib/unifdef/unifdef.h b/contrib/unifdef/unifdef.h
index 2cd706d986ab..b785129406c9 100644
--- a/contrib/unifdef/unifdef.h
+++ b/contrib/unifdef/unifdef.h
@@ -23,8 +23,11 @@
  * SUCH DAMAGE.
  */
 
+#include <sys/param.h>
 #include <sys/stat.h>
+#include <sys/tree.h>
 
+#include <assert.h>
 #include <ctype.h>
 #include <err.h>
 #include <stdarg.h>