git: dad64f0e7cad - main - wc: Clean up and modernize.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 14 Feb 2023 22:26:59 UTC
The branch main has been updated by des: URL: https://cgit.FreeBSD.org/src/commit/?id=dad64f0e7cadb5913d6463fc014f59f8706d316c commit dad64f0e7cadb5913d6463fc014f59f8706d316c Author: Dag-Erling Smørgrav <des@FreeBSD.org> AuthorDate: 2023-02-14 22:26:34 +0000 Commit: Dag-Erling Smørgrav <des@FreeBSD.org> CommitDate: 2023-02-14 22:26:47 +0000 wc: Clean up and modernize. * Drop <err.h>, which is unnecessary since we use libxo. * As per POSIX, report an error if output fails. * Fix some type mismatches. * Use bool instead of int where appropriate. * Avoid repeatedly checking for a null filename. * Miscellaneous other tidying. * Add tests (partly derived from work performed by SHENG-YI HONG <i19780219111@kimo.com>). Sponsored by: Klara, Inc. Reviewed by: kevans Differential Revision: https://reviews.freebsd.org/D38496 --- etc/mtree/BSD.tests.dist | 2 + usr.bin/wc/Makefile | 3 + usr.bin/wc/tests/Makefile | 8 ++ usr.bin/wc/tests/wc_test.sh | 197 ++++++++++++++++++++++++++++++++++++++++++++ usr.bin/wc/wc.c | 142 ++++++++++++++++--------------- 5 files changed, 279 insertions(+), 73 deletions(-) diff --git a/etc/mtree/BSD.tests.dist b/etc/mtree/BSD.tests.dist index 910db16f0d15..859b5c5ace7f 100644 --- a/etc/mtree/BSD.tests.dist +++ b/etc/mtree/BSD.tests.dist @@ -1120,6 +1120,8 @@ .. vmstat .. + wc + .. xargs .. xinstall diff --git a/usr.bin/wc/Makefile b/usr.bin/wc/Makefile index 550b718e1478..060c67193e00 100644 --- a/usr.bin/wc/Makefile +++ b/usr.bin/wc/Makefile @@ -12,4 +12,7 @@ LIBADD+= cap_fileargs CFLAGS+=-DWITH_CASPER .endif +HAS_TESTS= +SUBDIR.${MK_TESTS}= tests + .include <bsd.prog.mk> diff --git a/usr.bin/wc/tests/Makefile b/usr.bin/wc/tests/Makefile new file mode 100644 index 000000000000..81e6c4246544 --- /dev/null +++ b/usr.bin/wc/tests/Makefile @@ -0,0 +1,8 @@ +# $FreeBSD$ + +PACKAGE= tests + +ATF_TESTS_SH= wc_test +BINDIR= ${TESTSDIR} + +.include <bsd.test.mk> diff --git a/usr.bin/wc/tests/wc_test.sh b/usr.bin/wc/tests/wc_test.sh new file mode 100755 index 000000000000..574f2daacaca --- /dev/null +++ b/usr.bin/wc/tests/wc_test.sh @@ -0,0 +1,197 @@ +# +# Copyright (c) 2023 Klara, Inc. +# +# SPDX-License-Identifier: BSD-2-Clause +# + +# +# These tests need to run in a multibyte locale with non-localized +# error messages. +# +export LC_CTYPE=C.UTF-8 +export LC_MESSAGES=C + +# +# Sample text containing multibyte characters +# +tv="Der bode en underlig gråsprængt en +på den yderste nøgne ø; – +han gjorde visst intet menneske mén +hverken på land eller sjø; +dog stundom gnistred hans øjne stygt, – +helst mod uroligt vejr, – +og da mente folk, at han var forrykt, +og da var der få, som uden frykt +kom Terje Vigen nær. +" +tvl=10 +tvw=55 +tvc=300 +tvm=283 +tvcL=42 +tvmL=39 + +# +# Run a series of tests using the same input file. The first argument +# is the name of the file. The next three are the expected line, +# word, and byte counts. The optional fifth is the expected character +# count; if not provided, it is expected to be identical to the byte +# count. +# +atf_check_wc() { + local file="$1" + local l="$2" + local w="$3" + local c="$4" + local m="${5-$4}" + + atf_check -o match:"^ +${l} +${w} +${c}\$" wc <"${file}" + atf_check -o match:"^ +${l}\$" wc -l <"${file}" + atf_check -o match:"^ +${w}\$" wc -w <"${file}" + atf_check -o match:"^ +${c}\$" wc -c <"${file}" + atf_check -o match:"^ +${m}\$" wc -m <"${file}" + atf_check -o match:"^ +${l} +${w} +${c} ${file}\$" wc "$file" + atf_check -o match:"^ +${l} ${file}\$" wc -l "$file" + atf_check -o match:"^ +${w} ${file}\$" wc -w "$file" + atf_check -o match:"^ +${c} ${file}\$" wc -c "$file" + atf_check -o match:"^ +${m} ${file}\$" wc -m "$file" +} + +atf_test_case basic +basic_head() +{ + atf_set "descr" "Basic test case" +} +basic_body() +{ + printf "a b\n" >foo + atf_check_wc foo 1 2 4 +} + +atf_test_case blank +blank_head() +{ + atf_set "descr" "Input containing only blank lines" +} +blank_body() +{ + printf "\n\n\n" >foo + atf_check_wc foo 3 0 3 +} + +atf_test_case empty +empty_head() +{ + atf_set "descr" "Empty input" +} +empty_body() +{ + printf "" >foo + atf_check_wc foo 0 0 0 +} + +atf_test_case invalid +invalid_head() +{ + atf_set "descr" "Invalid multibyte input" +} +invalid_body() +{ + printf "a\377b\n" >foo + atf_check \ + -e match:"Illegal byte sequence" \ + -o match:"^ +4 foo$" \ + wc -m foo +} + +atf_test_case multiline +multiline_head() +{ + atf_set "descr" "Multiline, multibyte input" +} +multiline_body() +{ + printf "%s\n" "$tv" >foo + atf_check_wc foo $tvl $tvw $tvc $tvm + # longest line in bytes + atf_check -o match:"^ +$tvc +$tvcL foo" wc -cL foo + atf_check -o match:"^ +$tvc +$tvcL" wc -cL <foo + # longest line in characters + atf_check -o match:"^ +$tvm +$tvmL foo" wc -mL foo + atf_check -o match:"^ +$tvm +$tvmL" wc -mL <foo +} + +atf_test_case multiline_repeated +multiline_repeated_head() +{ + atf_set "descr" "Multiline input exceeding the input buffer size" +} +multiline_repeated_body() +{ + local c=0 + while [ $c -lt 1000 ] ; do + printf "%1\$s\n%1\$s\n%1\$s\n%1\$s\n%1\$s\n" "$tv" + c=$((c+5)) + done >foo + atf_check_wc foo $((tvl*c)) $((tvw*c)) $((tvc*c)) $((tvm*c)) +} + +atf_test_case total +total_head() +{ + atf_set "descr" "Multiple inputs" +} +total_body() +{ + printf "%s\n" "$tv" >foo + printf "%s\n" "$tv" >bar + atf_check \ + -o match:"^ +$((tvl*2)) +$((tvw*2)) +$((tvc*2)) total$" \ + wc foo bar +} + +atf_test_case unterminated +unterminated_head() +{ + atf_set "descr" "Input not ending in newline" +} +unterminated_body() +{ + printf "a b" >foo + atf_check_wc foo 0 2 3 +} + +atf_test_case usage +usage_head() +{ + atf_set "descr" "Trigger usage message" +} +usage_body() +{ + atf_check -s exit:1 -e match:"usage: wc" wc -\? +} + +atf_test_case whitespace +whitespace_head() +{ + atf_set "descr" "Input containing only whitespace and newlines" +} +whitespace_body() +{ + printf "\n \n\t\n" >foo + atf_check_wc foo 3 0 5 +} + +atf_init_test_cases() +{ + atf_add_test_case basic + atf_add_test_case blank + atf_add_test_case empty + atf_add_test_case invalid + atf_add_test_case multiline + atf_add_test_case multiline_repeated + atf_add_test_case total + atf_add_test_case unterminated + atf_add_test_case usage + atf_add_test_case whitespace +} diff --git a/usr.bin/wc/wc.c b/usr.bin/wc/wc.c index a12c13a3e36d..9c129917dd04 100644 --- a/usr.bin/wc/wc.c +++ b/usr.bin/wc/wc.c @@ -50,10 +50,10 @@ __FBSDID("$FreeBSD$"); #include <capsicum_helpers.h> #include <ctype.h> -#include <err.h> #include <errno.h> #include <fcntl.h> #include <locale.h> +#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -66,9 +66,11 @@ __FBSDID("$FreeBSD$"); #include <libcasper.h> #include <casper/cap_fileargs.h> +static const char *stdin_filename = "stdin"; + static fileargs_t *fa; static uintmax_t tlinect, twordct, tcharct, tlongline; -static int doline, doword, dochar, domulti, dolongline; +static bool doline, doword, dochar, domulti, dolongline; static volatile sig_atomic_t siginfo; static xo_handle_t *stderr_handle; @@ -102,26 +104,26 @@ main(int argc, char *argv[]) argc = xo_parse_args(argc, argv); if (argc < 0) - return (argc); + exit(EXIT_FAILURE); while ((ch = getopt(argc, argv, "clmwL")) != -1) switch((char)ch) { case 'l': - doline = 1; + doline = true; break; case 'w': - doword = 1; + doword = true; break; case 'c': - dochar = 1; - domulti = 0; + dochar = true; + domulti = false; break; case 'L': - dolongline = 1; + dolongline = true; break; case 'm': - domulti = 1; - dochar = 0; + domulti = true; + dochar = false; break; case '?': default: @@ -134,27 +136,17 @@ main(int argc, char *argv[]) fa = fileargs_init(argc, argv, O_RDONLY, 0, cap_rights_init(&rights, CAP_READ, CAP_FSTAT), FA_OPEN); - if (fa == NULL) { - xo_warn("Unable to init casper"); - exit(1); - } - + if (fa == NULL) + xo_err(EXIT_FAILURE, "Unable to initialize casper"); caph_cache_catpages(); - if (caph_limit_stdio() < 0) { - xo_warn("Unable to limit stdio"); - fileargs_free(fa); - exit(1); - } - - if (caph_enter_casper() < 0) { - xo_warn("Unable to enter capability mode"); - fileargs_free(fa); - exit(1); - } + if (caph_limit_stdio() < 0) + xo_err(EXIT_FAILURE, "Unable to limit stdio"); + if (caph_enter_casper() < 0) + xo_err(EXIT_FAILURE, "Unable to enter capability mode"); /* Wc's flags are on by default. */ - if (doline + doword + dochar + domulti + dolongline == 0) - doline = doword = dochar = 1; + if (!(doline || doword || dochar || domulti || dolongline)) + doline = doword = dochar = true; stderr_handle = xo_create_to_file(stderr, XO_STYLE_TEXT, 0); xo_open_container("wc"); @@ -162,19 +154,19 @@ main(int argc, char *argv[]) errors = 0; total = 0; - if (!*argv) { - xo_open_instance("file"); - if (cnt((char *)NULL) != 0) + if (argc == 0) { + xo_open_instance("file"); + if (cnt(NULL) != 0) ++errors; - xo_close_instance("file"); + xo_close_instance("file"); } else { - do { - xo_open_instance("file"); - if (cnt(*argv) != 0) + while (argc--) { + xo_open_instance("file"); + if (cnt(*argv++) != 0) ++errors; - xo_close_instance("file"); + xo_close_instance("file"); ++total; - } while(*++argv); + } } xo_close_list("file"); @@ -187,8 +179,9 @@ main(int argc, char *argv[]) fileargs_free(fa); xo_close_container("wc"); - xo_finish(); - exit(errors == 0 ? 0 : 1); + if (xo_finish() < 0) + xo_err(EXIT_FAILURE, "stdout"); + exit(errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } static void @@ -212,7 +205,7 @@ show_cnt(const char *file, uintmax_t linect, uintmax_t wordct, xo_emit_h(xop, " {:characters/%7ju/%ju}", charct); if (dolongline) xo_emit_h(xop, " {:long-lines/%7ju/%ju}", llct); - if (file != NULL) + if (file != stdin_filename) xo_emit_h(xop, " {:filename/%s}\n", file); else xo_emit_h(xop, "\n"); @@ -221,20 +214,21 @@ show_cnt(const char *file, uintmax_t linect, uintmax_t wordct, static int cnt(const char *file) { + char buf[MAXBSIZE], *p; struct stat sb; + mbstate_t mbs; uintmax_t linect, wordct, charct, llct, tmpll; - int fd, len, warned; + ssize_t len; size_t clen; - short gotsp; - u_char *p; - u_char buf[MAXBSIZE]; + int fd; wchar_t wch; - mbstate_t mbs; + bool gotsp, warned; linect = wordct = charct = llct = tmpll = 0; - if (file == NULL) + if (file == NULL) { fd = STDIN_FILENO; - else if ((fd = fileargs_open(fa, file)) < 0) { + file = stdin_filename; + } else if ((fd = fileargs_open(fa, file)) < 0) { xo_warn("%s: open", file); return (1); } @@ -246,7 +240,7 @@ cnt(const char *file) */ if (doline == 0 && dolongline == 0) { if (fstat(fd, &sb)) { - xo_warn("%s: fstat", file != NULL ? file : "stdin"); + xo_warn("%s: fstat", file); (void)close(fd); return (1); } @@ -265,9 +259,9 @@ cnt(const char *file) * lines than to get words, since the word count requires locale * handling. */ - while ((len = read(fd, buf, MAXBSIZE))) { - if (len == -1) { - xo_warn("%s: read", file != NULL ? file : "stdin"); + while ((len = read(fd, buf, sizeof(buf)))) { + if (len < 0) { + xo_warn("%s: read", file); (void)close(fd); return (1); } @@ -275,14 +269,16 @@ cnt(const char *file) show_cnt(file, linect, wordct, charct, llct); charct += len; if (doline || dolongline) { - for (p = buf; len--; ++p) + for (p = buf; len > 0; --len, ++p) { if (*p == '\n') { if (tmpll > llct) llct = tmpll; tmpll = 0; ++linect; - } else + } else { tmpll++; + } + } } } reset_siginfo(); @@ -297,12 +293,12 @@ cnt(const char *file) return (0); /* Do it the hard way... */ -word: gotsp = 1; - warned = 0; +word: gotsp = true; + warned = false; memset(&mbs, 0, sizeof(mbs)); - while ((len = read(fd, buf, MAXBSIZE)) != 0) { - if (len == -1) { - xo_warn("%s: read", file != NULL ? file : "stdin"); + while ((len = read(fd, buf, sizeof(buf))) != 0) { + if (len < 0) { + xo_warn("%s: read", file); (void)close(fd); return (1); } @@ -313,21 +309,20 @@ word: gotsp = 1; if (!domulti || MB_CUR_MAX == 1) { clen = 1; wch = (unsigned char)*p; - } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == - (size_t)-1) { + } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == 0) { + clen = 1; + } else if (clen == (size_t)-1) { if (!warned) { errno = EILSEQ; - xo_warn("%s", - file != NULL ? file : "stdin"); - warned = 1; + xo_warn("%s", file); + warned = true; } memset(&mbs, 0, sizeof(mbs)); clen = 1; wch = (unsigned char)*p; - } else if (clen == (size_t)-2) + } else if (clen == (size_t)-2) { break; - else if (clen == 0) - clen = 1; + } charct++; if (wch != L'\n') tmpll++; @@ -339,18 +334,19 @@ word: gotsp = 1; tmpll = 0; ++linect; } - if (iswspace(wch)) - gotsp = 1; - else if (gotsp) { - gotsp = 0; + if (iswspace(wch)) { + gotsp = true; + } else if (gotsp) { + gotsp = false; ++wordct; } } } reset_siginfo(); - if (domulti && MB_CUR_MAX > 1) + if (domulti && MB_CUR_MAX > 1) { if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned) - xo_warn("%s", file != NULL ? file : "stdin"); + xo_warn("%s", file); + } if (doline) tlinect += linect; if (doword) @@ -368,5 +364,5 @@ static void usage(void) { xo_error("usage: wc [-Lclmw] [file ...]\n"); - exit(1); + exit(EXIT_FAILURE); }