git: 83e8c2317846 - main - bintrans: replace the quoted printable encoder/decoder

From: Baptiste Daroussin <bapt_at_FreeBSD.org>
Date: Thu, 25 Aug 2022 07:31:49 UTC
The branch main has been updated by bapt:

URL: https://cgit.FreeBSD.org/src/commit/?id=83e8c2317846dd0c889fb784a11f94f98b982062

commit 83e8c2317846dd0c889fb784a11f94f98b982062
Author:     Baptiste Daroussin <bapt@FreeBSD.org>
AuthorDate: 2022-08-23 15:45:11 +0000
Commit:     Baptiste Daroussin <bapt@FreeBSD.org>
CommitDate: 2022-08-25 07:29:18 +0000

    bintrans: replace the quoted printable encoder/decoder
    
    Replace the quoted printable code with one that respects RFC2045
    Add tests about this parsers and encoders, using examples from the
    wikipedia page.
    
    Reviewed by:    pstef
    Differential Revision: https://reviews.freebsd.org/D36314
---
 usr.bin/bintrans/Makefile               |   2 +-
 usr.bin/bintrans/qp.c                   | 198 ++++++++++++++++++++++++
 usr.bin/bintrans/quoted-printable.c     | 265 --------------------------------
 usr.bin/bintrans/tests/Makefile         |   2 +
 usr.bin/bintrans/tests/bintrans_test.sh |  35 +++++
 usr.bin/bintrans/tests/textqpdec        |   2 +
 usr.bin/bintrans/tests/textqpenc        |   6 +
 7 files changed, 244 insertions(+), 266 deletions(-)

diff --git a/usr.bin/bintrans/Makefile b/usr.bin/bintrans/Makefile
index a83c820bd660..dc8cada566c7 100644
--- a/usr.bin/bintrans/Makefile
+++ b/usr.bin/bintrans/Makefile
@@ -4,7 +4,7 @@
 .include <src.opts.mk>
 
 PROG=	bintrans
-SRCS=	bintrans.c uuencode.c uudecode.c quoted-printable.c
+SRCS=	bintrans.c uuencode.c uudecode.c qp.c
 MAN=	bintrans.1 uuencode.format.5
 LINKS+=	${BINDIR}/bintrans ${BINDIR}/uuencode
 LINKS+=	${BINDIR}/bintrans ${BINDIR}/b64encode
diff --git a/usr.bin/bintrans/qp.c b/usr.bin/bintrans/qp.c
new file mode 100644
index 000000000000..61367e096b9f
--- /dev/null
+++ b/usr.bin/bintrans/qp.c
@@ -0,0 +1,198 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Baptiste Daroussin <bapt@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+extern int main_quotedprintable(int, char *[]);
+
+static int
+hexval(int c)
+{
+	if ('0' <= c && c <= '9')
+		return c - '0';
+	return (10 + c - 'A');
+}
+
+
+static int
+decode_char(const char *s)
+{
+	return (16 * hexval(toupper(s[1])) + hexval(toupper(s[2])));
+}
+
+
+static void
+decode_quoted_printable(const char *body, FILE *fpo)
+{
+	while (*body != '\0') {
+		switch (*body) {
+		case '=':
+			if (strlen(body) < 2) {
+				fputc(*body, fpo);
+				break;
+			}
+
+			if (body[1] == '\r' && body[2] == '\n') {
+				body += 2;
+				break;
+			}
+			if (body[1] == '\n') {
+				body++;
+				break;
+			}
+			if (strchr("0123456789ABCDEFabcdef", body[1]) == NULL) {
+				fputc(*body, fpo);
+				break;
+			}
+			if (strchr("0123456789ABCDEFabcdef", body[2]) == NULL) {
+				fputc(*body, fpo);
+				break;
+			}
+			fputc(decode_char(body), fpo);
+			body += 2;
+			break;
+		default:
+			fputc(*body, fpo);
+			break;
+		}
+		body++;
+	}
+}
+
+static void
+encode_quoted_printable(const char *body, FILE *fpo)
+{
+	char prev;
+	const char *end = body + strlen(body);
+	size_t linelen = 0;
+
+	while (*body != '\0') {
+		if (linelen == 75) {
+			fputs("=\r\n", fpo);
+			linelen = 0;
+		}
+		if (!isascii(*body) ||
+		    *body == '=' ||
+		    (*body == '.' && body + 1 < end &&
+		      (body[1] == '\n' || body[1] == '\r'))) {
+			fprintf(fpo, "=%02X", (unsigned char)*body);
+			linelen += 2;
+			prev = *body;
+		} else if (*body < 33 && *body != '\n') {
+			if ((*body == ' ' || *body == '\t') &&
+			    body + 1 < end &&
+			    (body[1] != '\n' && body[1] != '\r')) {
+				fputc(*body, fpo);
+				prev = *body;
+			} else {
+				fprintf(fpo, "=%02X", (unsigned char)*body);
+				linelen += 2;
+				prev = '_';
+			}
+		} else if (*body == '\n') {
+			if (prev == ' ' || prev == '\t') {
+				fputc('=', fpo);
+			}
+			fputc('\n', fpo);
+			linelen = 0;
+			prev = 0;
+		} else {
+			fputc(*body, fpo);
+			prev = *body;
+		}
+		body++;
+		linelen++;
+	}
+}
+
+static void
+qp(FILE *fp, FILE *fpo, bool encode)
+{
+	char *line = NULL;
+	size_t linecap = 0;
+	ssize_t linelen;
+	void (*codec)(const char *line, FILE *f);
+
+	codec = encode ? encode_quoted_printable : decode_quoted_printable ;
+
+	while ((linelen = getline(&line, &linecap, fp)) > 0)
+		codec(line, fpo);
+	free(line);
+}
+
+static void
+usage(void)
+{
+	fprintf(stderr,
+	   "usage: bintrans qp [-u] [-o outputfile] [file name]\n");
+}
+
+int
+main_quotedprintable(int argc, char *argv[])
+{
+	int i;
+	bool encode = true;
+	FILE *fp = stdin;
+	FILE *fpo = stdout;
+
+	for (i = 1; i < argc; ++i) {
+		if (argv[i][0] == '-') {
+			switch (argv[i][1]) {
+			case 'o':
+				if (++i >= argc) {
+					fprintf(stderr, "qp: -o requires a file name.\n");
+					exit(EXIT_FAILURE);
+				}
+				fpo = fopen(argv[i], "w");
+				if (fpo == NULL) {
+					perror(argv[i]);
+					exit(EXIT_FAILURE);
+				}
+				break;
+			case 'u':
+				encode = false;
+				break;
+			default:
+				usage();
+				exit(EXIT_FAILURE);
+			}
+		} else {
+			fp = fopen(argv[i], "r");
+			if (fp == NULL) {
+				perror(argv[i]);
+				exit(EXIT_FAILURE);
+			}
+		}
+	}
+	qp(fp, fpo, encode);
+
+	return (EXIT_SUCCESS);
+}
diff --git a/usr.bin/bintrans/quoted-printable.c b/usr.bin/bintrans/quoted-printable.c
deleted file mode 100644
index b0e35509a2d9..000000000000
--- a/usr.bin/bintrans/quoted-printable.c
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
-Copyright (c) 1991 Bell Communications Research, Inc. (Bellcore)
-
-Permission to use, copy, modify, and distribute this material
-for any purpose and without fee is hereby granted, provided
-that the above copyright notice and this permission notice
-appear in all copies, and that the name of Bellcore not be
-used in advertising or publicity pertaining to this
-material without the specific, prior written permission
-of an authorized representative of Bellcore.  BELLCORE
-MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY
-OF THIS MATERIAL FOR ANY PURPOSE.  IT IS PROVIDED "AS IS",
-WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
-*/
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-
-extern int	main_quotedprintable(int, char *[]);
-
-static int
-PendingBoundary(char *s, char **Boundaries, int *BoundaryCt)
-{
-	int i;
-	size_t len;
-
-	if (s[0] != '-' || s[1] != '-')
-		return (0);
-
-	for (i = 0; i < *BoundaryCt; ++i) {
-		len = strlen(Boundaries[i]);
-		if (strncmp(s, Boundaries[i], len) == 0) {
-			if (s[len] == '-' && s[len + 1] == '-')
-				*BoundaryCt = i;
-			return (1);
-		}
-	}
-	return (0);
-}
-
-#define basis_hex "0123456789ABCDEF"
-static const char index_hex[128] = {
-	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	 0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
-	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
-};
-
-/* The following version generated complaints on Solaris. */
-/* #define hexchar(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])  */
-/*  Since we're no longer ever calling it with anything signed, this should work: */
-#define hexchar(c)  (((c) > 127) ? -1 : index_hex[(c)])
-
-static void
-toqp(FILE *infile, FILE *outfile)
-{
-	int c, ct = 0, prevc = 255;
-
-	while ((c = getc(infile)) != EOF) {
-		if ((c < 32 && (c != '\n' && c != '\t'))
-			 || (c == '=')
-			 || (c >= 127)
-			 /* Following line is to avoid single periods alone on lines,
-			   which messes up some dumb smtp implementations, sigh... */
-			 || (ct == 0 && c == '.')) {
-			putc('=', outfile);
-			putc(basis_hex[c >> 4], outfile);
-			putc(basis_hex[c & 0xF], outfile);
-			ct += 3;
-			prevc = 'A'; /* close enough */
-		} else if (c == '\n') {
-			if (prevc == ' ' || prevc == '\t') {
-				putc('=', outfile); /* soft & hard lines */
-				putc(c, outfile);
-			}
-			putc(c, outfile);
-			ct = 0;
-			prevc = c;
-		} else {
-			if (c == 'F' && prevc == '\n') {
-				/* HORRIBLE but clever hack suggested by MTR for sendmail-avoidance */
-				c = getc(infile);
-				if (c == 'r') {
-					c = getc(infile);
-					if (c == 'o') {
-						c = getc(infile);
-						if (c == 'm') {
-							c = getc(infile);
-							if (c == ' ') {
-								/* This is the case we are looking for */
-								fputs("=46rom", outfile);
-								ct += 6;
-							} else {
-								fputs("From", outfile);
-								ct += 4;
-							}
-						} else {
-							fputs("Fro", outfile);
-							ct += 3;
-						}
-					} else {
-						fputs("Fr", outfile);
-						ct += 2;
-					}
-				} else {
-					putc('F', outfile);
-					++ct;
-				}
-				ungetc(c, infile);
-				prevc = 'x'; /* close enough -- printable */
-			} else { /* END horrible hack */
-				putc(c, outfile);
-				++ct;
-				prevc = c;
-			}
-		}
-		if (ct > 72) {
-			putc('=', outfile);
-			putc('\n', outfile);
-			ct = 0;
-			prevc = '\n';
-		}
-	}
-	if (ct) {
-		putc('=', outfile);
-		putc('\n', outfile);
-	}
-}
-
-static void
-fromqp(FILE *infile, FILE *outfile, char **boundaries, int *boundaryct)
-{
-	int c1, c2;
-	bool sawnewline = true, neednewline = false;
-	/* The neednewline hack is necessary because the newline leading into
-	  a multipart boundary is part of the boundary, not the data */
-
-	while ((c1 = getc(infile)) != EOF) {
-		if (sawnewline && boundaries && c1 == '-') {
-			char Buf[200];
-			unsigned char *s;
-
-			ungetc(c1, infile);
-			fgets(Buf, sizeof(Buf), infile);
-			if (boundaries
-				 && Buf[0] == '-'
-				 && Buf[1] == '-'
-				 && PendingBoundary(Buf, boundaries, boundaryct)) {
-				return;
-			}
-			/* Not a boundary, now we must treat THIS line as q-p, sigh */
-			if (neednewline) {
-				putc('\n', outfile);
-				neednewline = false;
-			}
-			for (s = (unsigned char *)Buf; *s; ++s) {
-				if (*s == '=') {
-					if (*++s == 0)
-						break;
-					if (*s == '\n') {
-						/* ignore it */
-						sawnewline = true;
-					} else {
-						c1 = hexchar(*s);
-						if (*++s == 0)
-							break;
-						c2 = hexchar(*s);
-						putc(c1 << 4 | c2, outfile);
-					}
-				} else {
-					putc(*s, outfile);
-				}
-			}
-		} else {
-			if (neednewline) {
-				putc('\n', outfile);
-				neednewline = false;
-			}
-			if (c1 == '=') {
-				sawnewline = false;
-				c1 = getc(infile);
-				if (c1 == '\n') {
-					/* ignore it */
-					sawnewline = true;
-				} else {
-					c2 = getc(infile);
-					c1 = hexchar(c1);
-					c2 = hexchar(c2);
-					putc(c1 << 4 | c2, outfile);
-					if (c2 == '\n')
-						sawnewline = true;
-				}
-			} else {
-				if (c1 == '\n') {
-					sawnewline = true;
-					neednewline = true;
-				} else {
-					sawnewline = false;
-					putc(c1, outfile);
-				}
-			}
-		}
-	}
-	if (neednewline) {
-		putc('\n', outfile);
-		neednewline = false;
-	}
-}
-
-static void
-usage(void)
-{
-	fprintf(stderr,
-	   "usage: bintrans qp [-u] [-o outputfile] [file name]\n");
-}
-
-int
-main_quotedprintable(int argc, char *argv[])
-{
-	int i;
-	bool encode = true;
-	FILE *fp = stdin;
-	FILE *fpo = stdout;
-
-	for (i = 1; i < argc; ++i) {
-		if (argv[i][0] == '-') {
-			switch (argv[i][1]) {
-			case 'o':
-				if (++i >= argc) {
-					fprintf(stderr, "qp: -o requires a file name.\n");
-					exit(EXIT_FAILURE);
-				}
-				fpo = fopen(argv[i], "w");
-				if (fpo == NULL) {
-					perror(argv[i]);
-					exit(EXIT_FAILURE);
-				}
-				break;
-			case 'u':
-				encode = false;
-				break;
-			default:
-				usage();
-				exit(EXIT_FAILURE);
-			}
-		} else {
-			fp = fopen(argv[i], "r");
-			if (fp == NULL) {
-				perror(argv[i]);
-				exit(EXIT_FAILURE);
-			}
-		}
-	}
-	if (encode)
-		toqp(fp, fpo);
-	else
-		fromqp(fp, fpo, NULL, 0);
-	return (0);
-}
diff --git a/usr.bin/bintrans/tests/Makefile b/usr.bin/bintrans/tests/Makefile
index d89babfaa16a..13770d8cefb7 100644
--- a/usr.bin/bintrans/tests/Makefile
+++ b/usr.bin/bintrans/tests/Makefile
@@ -2,8 +2,10 @@
 
 PACKAGE=	tests
 
+ATF_TESTS_SH+=	bintrans_test
 TAP_TESTS_SH=	legacy_test
 
+${PACKAGE}FILES+=		textqpenc textqpdec
 ${PACKAGE}FILES+=		regress.base64.in regress.base64.out
 ${PACKAGE}FILES+=		regress.in regress.out
 ${PACKAGE}FILES+=		regress.sh
diff --git a/usr.bin/bintrans/tests/bintrans_test.sh b/usr.bin/bintrans/tests/bintrans_test.sh
new file mode 100644
index 000000000000..b1f9d8f6d1d6
--- /dev/null
+++ b/usr.bin/bintrans/tests/bintrans_test.sh
@@ -0,0 +1,35 @@
+atf_test_case encode_qp
+encode_qp_body()
+{
+	atf_check -e empty -o file:"$(atf_get_srcdir)/textqpenc" bintrans qp $(atf_get_srcdir)/textqpdec
+}
+
+atf_test_case decode_qp
+decode_qp_body()
+{
+	printf "=" > test
+	atf_check -e empty -o inline:"=" bintrans qp -u test
+	printf "=\ra" > test
+	atf_check -e empty -o inline:"=\ra" bintrans qp -u test
+	printf "=\r\na" > test
+	atf_check -e empty -o inline:"a" bintrans qp -u test
+	printf "This is a line" > test
+	atf_check -e empty -o inline:"This is a line" bintrans qp -u test
+	printf "This= is a line" > test
+	atf_check -e empty -o inline:"This= is a line" bintrans qp -u test
+	printf "This=2 is a line" > test
+	atf_check -e empty -o inline:"This=2 is a line" bintrans qp -u test
+	printf "This=23 is a line" > test
+	atf_check -e empty -o inline:"This# is a line" bintrans qp -u test
+	printf "This=3D is a line" > test
+	atf_check -e empty -o inline:"This= is a line" bintrans qp -u test
+	printf "This_ is a line" > test
+	atf_check -e empty -o inline:"This_ is a line" bintrans qp -u test
+	atf_check -e empty -o file:"$(atf_get_srcdir)/textqpdec" bintrans qp -u $(atf_get_srcdir)/textqpenc
+}
+
+atf_init_test_cases()
+{
+	atf_add_test_case decode_qp
+	atf_add_test_case encode_qp
+}
diff --git a/usr.bin/bintrans/tests/textqpdec b/usr.bin/bintrans/tests/textqpdec
new file mode 100644
index 000000000000..f48aacc191f2
--- /dev/null
+++ b/usr.bin/bintrans/tests/textqpdec
@@ -0,0 +1,2 @@
+J'interdis aux marchands de vanter trop leurs marchandises. Car ils se font vite pédagogues et t'enseignent comme but ce qui n'est par essence qu'un moyen, et te trompant ainsi sur la route à suivre les voilà bientôt qui te dégradent, car si leur musique est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.
+    — Antoine de Saint-Exupéry, Citadelle (1948)
diff --git a/usr.bin/bintrans/tests/textqpenc b/usr.bin/bintrans/tests/textqpenc
new file mode 100644
index 000000000000..c51772d033ab
--- /dev/null
+++ b/usr.bin/bintrans/tests/textqpenc
@@ -0,0 +1,6 @@
+J'interdis aux marchands de vanter trop leurs marchandises. Car ils se font=
+ vite p=C3=A9dagogues et t'enseignent comme but ce qui n'est par essence qu=
+'un moyen, et te trompant ainsi sur la route =C3=A0 suivre les voil=C3=A0 b=
+ient=C3=B4t qui te d=C3=A9gradent, car si leur musique est vulgaire ils te =
+fabriquent pour te la vendre une =C3=A2me vulgaire=2E
+    =E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, Citadelle (1948)