git: 587458b7d67a - main - rpcgen: Perform shell-style word expansion on RPCGEN_CPP

From: Jessica Clarke <jrtc27_at_FreeBSD.org>
Date: Fri, 14 Jul 2023 01:37:23 UTC
The branch main has been updated by jrtc27:

URL: https://cgit.FreeBSD.org/src/commit/?id=587458b7d67a697c53fc2e31fb3407d1084fe773

commit 587458b7d67a697c53fc2e31fb3407d1084fe773
Author:     Jessica Clarke <jrtc27@FreeBSD.org>
AuthorDate: 2023-07-14 01:35:25 +0000
Commit:     Jessica Clarke <jrtc27@FreeBSD.org>
CommitDate: 2023-07-14 01:35:25 +0000

    rpcgen: Perform shell-style word expansion on RPCGEN_CPP
    
    Up until recently, CPP has been a list of space-separated words, with no
    quotes, backslashes or other characters with special meaning to a shell.
    However, as of 8fad2cda93c7, (escaped) quotes appear in CPP, and the
    rudimentary parser in rpcgen is insufficient, since it will leave the
    escaped quotes as escaped rather than performing one level of expansion
    as would be done by a shell (whether in a script or a Makefile).
    
    Rather than hack around this in all the places RPCGEN_CPP gets set,
    implement proper expansion inside rpcgen. Note that this only deals with
    a subset of shell syntax, since we don't handle any of:
    
      | & ; < > ( ) $ ` * ? [ # ˜ = %
    
    having special meaning (with the exception of how a backslash behaves
    inside double quotes, where \$ means a literal $ inside double quotes
    but \a means a literal \a), instead using their literal value, but those
    are all reasonable restrictions, and can be worked around by avoiding
    their use; what's important is that we get the quoting and splitting
    right.
    
    This fixes -Winvalid-pp-token spew during build${libcompat}.
    
    Reviewed by:    brooks
    Fixes:          8fad2cda93c7 ("bsd.compat.mk: Provide new CPP and sub-make variables")
    Differential Revision:  https://reviews.freebsd.org/D41013
---
 usr.bin/rpcgen/rpc_main.c | 111 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 103 insertions(+), 8 deletions(-)

diff --git a/usr.bin/rpcgen/rpc_main.c b/usr.bin/rpcgen/rpc_main.c
index ba9eda676b39..6576d6cfb919 100644
--- a/usr.bin/rpcgen/rpc_main.c
+++ b/usr.bin/rpcgen/rpc_main.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 
 #include <err.h>
 #include <ctype.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
@@ -270,20 +271,114 @@ add_warning(void)
 static void
 prepend_cpp(void)
 {
-	int idx = 0;
-	const char *var;
-	char *dupvar, *s, *t;
+	int idx = 0, quoted;
+	const char *var, *s;
+	char *dupvar, *t, *word;
 
 	if (CPP != NULL)
 		insarg(idx++, CPP);
 	else if ((var = getenv("RPCGEN_CPP")) == NULL)
 		insarg(idx++, "/usr/bin/cpp");
 	else {
-		/* Parse command line in a rudimentary way */
-		dupvar = xstrdup(var);
-		for (s = dupvar; (t = strsep(&s, " \t")) != NULL; ) {
-			if (t[0])
-				insarg(idx++, t);
+		/*
+		 * Parse command line like a shell (but only handle whitespace,
+		 * quotes and backslash).
+		 */
+		dupvar = malloc(strlen(var) + 1);
+		quoted = 0;
+		word = NULL;
+		for (s = var, t = dupvar; *s; ++s) {
+			switch (quoted) {
+			/* Unquoted */
+			case 0:
+				switch (*s) {
+				case ' ':
+				case '\t':
+				case '\n':
+					if (word != NULL) {
+						*t++ = '\0';
+						insarg(idx++, word);
+						word = NULL;
+					}
+					break;
+				case '\'':
+					if (word == NULL)
+						word = t;
+					quoted = 1;
+					break;
+				case '"':
+					if (word == NULL)
+						word = t;
+					quoted = 2;
+					break;
+				case '\\':
+					switch (*(s + 1)) {
+					case '\0':
+						break;
+					case '\n':
+						++s;
+						continue;
+					default:
+						++s;
+						break;
+					}
+					/* FALLTHROUGH */
+				default:
+					if (word == NULL)
+						word = t;
+					*t++ = *s;
+					break;
+				}
+				break;
+
+			/* Single-quoted */
+			case 1:
+				switch (*s) {
+				case '\'':
+					quoted = 0;
+					break;
+				default:
+					*t++ = *s;
+					break;
+				}
+				break;
+
+			/* Double-quoted */
+			case 2:
+				switch (*s) {
+				case '"':
+					quoted = 0;
+					break;
+				case '\\':
+					switch (*(s + 1)) {
+					case '\0':
+						break;
+					case '$':
+					case '`':
+					case '"':
+					case '\\':
+						++s;
+						break;
+					case '\n':
+						++s;
+						continue;
+					default:
+						break;
+					}
+					/* FALLTHROUGH */
+				default:
+					*t++ = *s;
+					break;
+				}
+				break;
+			}
+		}
+		if (quoted)
+			errx(1, "RPCGEN_CPP: unterminated %c",
+			    quoted == 1 ? '\'' : '"');
+		if (word != NULL) {
+			*t++ = '\0';
+			insarg(idx++, word);
 		}
 		free(dupvar);
 	}