Re: git: 05c9a0158f68 - main - libc: Add strverscmp(3) and versionsort(3)
Date: Thu, 25 Aug 2022 01:07:12 UTC
On 25 Aug 2022, at 01:29, Konstantin Belousov <kib@FreeBSD.org> wrote: > > The branch main has been updated by kib: > > URL: https://cgit.FreeBSD.org/src/commit/?id=05c9a0158f6837bb3a3781e4ed75f66115f6415a > > commit 05c9a0158f6837bb3a3781e4ed75f66115f6415a > Author: Aymeric Wibo <obiwac@gmail.com> > AuthorDate: 2022-08-24 23:20:13 +0000 > Commit: Konstantin Belousov <kib@FreeBSD.org> > CommitDate: 2022-08-25 00:29:03 +0000 > > libc: Add strverscmp(3) and versionsort(3) > > Add a strverscmp(3) function to libc, a GNU extension I implemented by > reading its glibc manual page. It orders strings following a much more > natural ordering (e.g. "ent1 < ent2 < ent10" as opposed to > "ent1 < ent10 < ent2" with strcmp(3)'s lexicographic ordering). > > Also add versionsort(3) for use as scandir(3)'s compar argument. > > Update manual page for scandir(3) and add one for strverscmp(3). > > Reviewed by: pstef, gbe, kib > MFC after: 1 week > Differential Revision: https://reviews.freebsd.org/D35807 > --- > include/dirent.h | 1 + > include/string.h | 1 + > lib/libc/gen/Makefile.inc | 3 +- > lib/libc/gen/Symbol.map | 1 + > lib/libc/gen/scandir.3 | 22 +++++++- > lib/libc/gen/scandir.c | 7 +++ > lib/libc/string/Makefile.inc | 4 +- > lib/libc/string/Symbol.map | 1 + > lib/libc/string/strverscmp.3 | 56 ++++++++++++++++++++ > lib/libc/string/strverscmp.c | 91 ++++++++++++++++++++++++++++++++ > lib/libc/tests/string/Makefile | 5 +- > lib/libc/tests/string/strverscmp_test.c | 93 +++++++++++++++++++++++++++++++++ > 12 files changed, 278 insertions(+), 7 deletions(-) > > diff --git a/include/dirent.h b/include/dirent.h > index 047206258471..751a016838c0 100644 > --- a/include/dirent.h > +++ b/include/dirent.h > @@ -108,6 +108,7 @@ int alphasort(const struct dirent **, const struct dirent **); > int dirfd(DIR *); > #endif > #if __BSD_VISIBLE > +int versionsort(const struct dirent **, const struct dirent **); > DIR *__opendir2(const char *, int); > int fdclosedir(DIR *); > ssize_t getdents(int, char *, size_t); > diff --git a/include/string.h b/include/string.h > index 15d4dc7e9701..dc5756830208 100644 > --- a/include/string.h > +++ b/include/string.h > @@ -81,6 +81,7 @@ char *strcat(char * __restrict, const char * __restrict); > char *strchr(const char *, int) __pure; > #if __BSD_VISIBLE > char *strchrnul(const char*, int) __pure; > +int strverscmp(const char *, const char *) __pure; > #endif > int strcmp(const char *, const char *) __pure; > int strcoll(const char *, const char *); > diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc > index 6d0e542a1f7b..45977b6b9005 100644 > --- a/lib/libc/gen/Makefile.inc > +++ b/lib/libc/gen/Makefile.inc > @@ -495,7 +495,8 @@ MLINKS+=rand48.3 _rand48.3 \ > MLINKS+=recv.2 recvmmsg.2 > MLINKS+=scandir.3 alphasort.3 \ > scandir.3 scandirat.3 \ > - scandir.3 scandir_b.3 > + scandir.3 scandir_b.3 \ > + scandir.3 versionsort.3 > MLINKS+=sem_open.3 sem_close.3 \ > sem_open.3 sem_unlink.3 > MLINKS+=sem_wait.3 sem_trywait.3 > diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map > index 74676ffe2270..0a20a41d0e20 100644 > --- a/lib/libc/gen/Symbol.map > +++ b/lib/libc/gen/Symbol.map > @@ -443,6 +443,7 @@ FBSD_1.7 { > sched_getaffinity; > sched_setaffinity; > sched_getcpu; > + versionsort; > __cpuset_alloc; > __cpuset_free; > }; > diff --git a/lib/libc/gen/scandir.3 b/lib/libc/gen/scandir.3 > index b533b33ce7a7..07d8074ae592 100644 > --- a/lib/libc/gen/scandir.3 > +++ b/lib/libc/gen/scandir.3 > @@ -35,7 +35,8 @@ > .Nm scandir , > .Nm scandirat , > .Nm scandir_b , > -.Nm alphasort > +.Nm alphasort , > +.Nm versionsort > .Nd scan a directory > .Sh LIBRARY > .Lb libc > @@ -65,6 +66,8 @@ > .Fc > .Ft int > .Fn alphasort "const struct dirent **d1" "const struct dirent **d2" > +.Ft int > +.Fn versionsort "const struct dirent **d1" "const struct dirent **d2" > .Sh DESCRIPTION > The > .Fn scandir > @@ -106,6 +109,13 @@ is a routine which can be used for the > argument to sort the array alphabetically using > .Xr strcoll 3 . > .Pp > +The > +.Fn versionsort > +function is a routine which can be used for the > +.Fa compar > +argument to sort the array naturally using > +.Xr strverscmp 3 . > +.Pp > The memory allocated for the array can be deallocated with > .Xr free 3 , > by freeing each pointer in the array and then the array itself. > @@ -161,7 +171,12 @@ cannot allocate enough memory to hold all the data structures. > .Xr malloc 3 , > .Xr qsort 3 , > .Xr strcoll 3 , > +.Xr strverscmp 3 , > .Xr dir 5 > +.Sh STANDARDS > +The > +.Fn versionsort > +function is a GNU extension and conforms to no standard. > .Sh HISTORY > The > .Fn scandir > @@ -171,5 +186,8 @@ functions appeared in > .Bx 4.2 . > The > .Fn scandirat > -function was added in > +and > +.Fn > +versionsort > +functions were added in > .Fx 14.0 . > diff --git a/lib/libc/gen/scandir.c b/lib/libc/gen/scandir.c > index 3a891b0ad3f2..8a260adcd2f3 100644 > --- a/lib/libc/gen/scandir.c > +++ b/lib/libc/gen/scandir.c > @@ -191,6 +191,13 @@ alphasort(const struct dirent **d1, const struct dirent **d2) > return (strcoll((*d1)->d_name, (*d2)->d_name)); > } > > +int > +versionsort(const struct dirent **d1, const struct dirent **d2) > +{ > + > + return (strverscmp((*d1)->d_name, (*d2)->d_name)); > +} > + > static int > alphasort_thunk(void *thunk, const void *p1, const void *p2) > { > diff --git a/lib/libc/string/Makefile.inc b/lib/libc/string/Makefile.inc > index 1df3d40e329f..afc113eeb867 100644 > --- a/lib/libc/string/Makefile.inc > +++ b/lib/libc/string/Makefile.inc > @@ -16,7 +16,7 @@ MISRCS+=bcmp.c bcopy.c bzero.c explicit_bzero.c \ > strcspn.c strdup.c strerror.c strlcat.c strlcpy.c strlen.c strmode.c \ > strncat.c strncmp.c strncpy.c strndup.c strnlen.c strnstr.c \ > strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \ > - strxfrm.c swab.c \ > + strverscmp.c strxfrm.c swab.c \ > timingsafe_bcmp.c \ > timingsafe_memcmp.c \ > wcpcpy.c wcpncpy.c wcscasecmp.c wcscat.c \ > @@ -46,7 +46,7 @@ MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \ > memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 strcasecmp.3 strcat.3 \ > strchr.3 strcmp.3 strcoll.3 strcpy.3 strdup.3 strerror.3 \ > string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strsep.3 \ > - strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 \ > + strspn.3 strstr.3 strtok.3 strverscmp.3 strxfrm.3 swab.3 \ > timingsafe_bcmp.3 \ > wcscoll.3 wcstok.3 \ > wcswidth.3 wcsxfrm.3 wmemchr.3 > diff --git a/lib/libc/string/Symbol.map b/lib/libc/string/Symbol.map > index ec45d7fd7ddb..4fcd194bafd8 100644 > --- a/lib/libc/string/Symbol.map > +++ b/lib/libc/string/Symbol.map > @@ -116,6 +116,7 @@ FBSD_1.6 { > > FBSD_1.7 { > mempcpy; > + strverscmp; > wmempcpy; > }; > > diff --git a/lib/libc/string/strverscmp.3 b/lib/libc/string/strverscmp.3 > new file mode 100644 > index 000000000000..e4413fb96e36 > --- /dev/null > +++ b/lib/libc/string/strverscmp.3 > @@ -0,0 +1,56 @@ > +.\" SPDX-License-Identifier: BSD-2-Clause > +.\" Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com> > +.Dd July 11, 2022 > +.Dt STRVERSCMP 3 > +.Os > +.Sh NAME > +.Nm strverscmp > +.Nd compare strings according to natural order > +.Sh LIBRARY > +.Lb libc > +.Sh SYNOPSIS > +.In string.h > +.Ft int > +.Fn strverscmp "const char *s1" "const char *s2" > +.Sh DESCRIPTION > +The > +.Fn strverscmp > +function > +compares the null-terminated strings > +.Fa s1 > +and > +.Fa s2 > +according to their natural order > +and returns an integer greater than, equal to, or less than 0, > +depending on whether > +.Fa s1 > +is greater than, equal to, or less than > +.Fa s2 . > +.Pp > +More specifically, this natural order is found by iterating over both > +strings until a difference is found. > +If the difference is between non-decimal characters, > +.Fn strverscmp > +acts like > +.Xr strcmp 3 > +(thus, the ordering would be "a", "b", "train"). > +If a decimal digit is found, the whole number is read and compared > +(thus, the ordering would be "9", "10", "420" which is different to lexicographic order, > +what > +.Xr strcmp 3 > +would have done). > +Numbers with leading zeroes are interpreted as fractional parts (even without a decimal point), > +and numbers with more leading zeroes are placed before numbers with fewer leading zeroes > +(thus, the ordering would be "000", "00", "01", "010", "09", "0", "1", "9", "10"). > +.Sh SEE ALSO > +.Xr strcmp 3 , > +.Xr versionsort 3 > +.Sh STANDARDS > +The > +.Fn strverscmp > +function is a GNU extension and conforms to no standard. > +.Sh HISTORY > +The > +.Fn strverscmp > +function was added in > +.Fx 14.0 . > diff --git a/lib/libc/string/strverscmp.c b/lib/libc/string/strverscmp.c > new file mode 100644 > index 000000000000..6051adb35499 > --- /dev/null > +++ b/lib/libc/string/strverscmp.c > @@ -0,0 +1,91 @@ > +/*- > +* SPDX-License-Identifier: BSD-2-Clause > +* Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com> > +*/ > + > +#include <ctype.h> > +#include <stddef.h> > + > +int > +strverscmp(const char *s1, const char *s2) > +{ > + size_t digit_count_1, digit_count_2; > + size_t zeros_count_1, zeros_count_2; > + const unsigned char *num_1, *num_2; > + const unsigned char *u1 = __DECONST(const unsigned char *, s1); > + const unsigned char *u2 = __DECONST(const unsigned char *, s2); Why is __DECONST needed? Casting from const char * to const unsigned char * should never warn, surely? Jess > + /* > + * If pointers are the same, no need to go through to process of > + * comparing them. > + */ > + if (s1 == s2) > + return (0); > + > + while (*u1 != '\0' && *u2 != '\0') { > + /* If either character is not a digit, act like strcmp(3). */ > + > + if (!isdigit(*u1) || !isdigit(*u2)) { > + if (*u1 != *u2) > + return (*u1 - *u2); > + u1++; > + u2++; > + continue; > + } > + if (*u1 == '0' || *u2 == '0') { > + /* > + * Treat leading zeros as if they were the fractional > + * part of a number, i.e. as if they had a decimal point > + * in front. First, count the leading zeros (more zeros > + * == smaller number). > + */ > + zeros_count_1 = 0; > + zeros_count_2 = 0; > + for (; *u1 == '0'; u1++) > + zeros_count_1++; > + for (; *u2 == '0'; u2++) > + zeros_count_2++; > + if (zeros_count_1 != zeros_count_2) > + return (zeros_count_2 - zeros_count_1); > + > + /* Handle the case where 0 < 09. */ > + if (!isdigit(*u1) && isdigit(*u2)) > + return (1); > + if (!isdigit(*u2) && isdigit(*u1)) > + return (-1); > + } else { > + /* > + * No leading zeros; we're simply comparing two numbers. > + * It is necessary to first count how many digits there > + * are before going back to compare each digit, so that > + * e.g. 7 is not considered larger than 60. > + */ > + num_1 = u1; > + num_2 = u2; > + > + /* Count digits (more digits == larger number). */ > + for (; isdigit(*u1); u1++) > + ; > + for (; isdigit(*u2); u2++) > + ; > + digit_count_1 = u1 - num_1; > + digit_count_2 = u2 - num_2; > + if (digit_count_1 != digit_count_2) > + return (digit_count_1 - digit_count_2); > + > + /* > + * If there are the same number of digits, go back to > + * the start of the number. > + */ > + u1 = num_1; > + u2 = num_2; > + } > + > + /* Compare each digit until there are none left. */ > + for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) { > + if (*u1 != *u2) > + return (*u1 - *u2); > + } > + } > + return (*u1 - *u2); > +} > diff --git a/lib/libc/tests/string/Makefile b/lib/libc/tests/string/Makefile > index c6a98572564d..eacf7e15c27c 100644 > --- a/lib/libc/tests/string/Makefile > +++ b/lib/libc/tests/string/Makefile > @@ -4,10 +4,11 @@ ATF_TESTS_C+= memcmp_test > ATF_TESTS_C+= memset_s_test > ATF_TESTS_C+= stpncpy_test > ATF_TESTS_C+= strerror2_test > -ATF_TESTS_C+= wcscasecmp_test > -ATF_TESTS_C+= wcsnlen_test > +ATF_TESTS_C+= strverscmp_test > ATF_TESTS_C+= strxfrm_test > +ATF_TESTS_C+= wcscasecmp_test > ATF_TESTS_C+= wcscoll_test > +ATF_TESTS_C+= wcsnlen_test > > # TODO: popcount, stresep > > diff --git a/lib/libc/tests/string/strverscmp_test.c b/lib/libc/tests/string/strverscmp_test.c > new file mode 100644 > index 000000000000..fd6a2620cb48 > --- /dev/null > +++ b/lib/libc/tests/string/strverscmp_test.c > @@ -0,0 +1,93 @@ > +/*- > +* SPDX-License-Identifier: BSD-2-Clause > +* Copyright (c) 2022 Aymeric Wibo <obiwac@gmail.com> > +*/ > + > +#include <atf-c.h> > +#include <string.h> > + > +static void > +check_all(size_t len, const char *ordered[len]) > +{ > + const char *a, *b; > + > + for (size_t i = 0; i < len; i++) { > + for (size_t j = 0; j < len; j++) { > + a = ordered[i]; > + b = ordered[j]; > + > + if (i == j) > + ATF_CHECK_MSG( > + strverscmp(a, b) == 0, > + "strverscmp(\"%s\", \"%s\") == 0", > + a, b > + ); > + else if (i < j) > + ATF_CHECK_MSG( > + strverscmp(a, b) < 0, > + "strverscmp(\"%s\", \"%s\") < 0", > + a, b > + ); > + else if (i > j) > + ATF_CHECK_MSG( > + strverscmp(a, b) > 0, > + "strverscmp(\"%s\", \"%s\") > 0", > + a, b > + ); > + } > + } > +} > + > +#define CHECK_ALL(...) do { \ > + const char *ordered[] = { __VA_ARGS__ }; \ > + check_all(sizeof(ordered) / sizeof(*ordered), ordered); \ > +} while (0) > + > +ATF_TC_WITHOUT_HEAD(strcmp_functionality); > +ATF_TC_BODY(strcmp_functionality, tc) > +{ > + CHECK_ALL("", "a", "b"); > +} > + > +/* from Linux man page strverscmp(3) */ > + > +ATF_TC_WITHOUT_HEAD(vers_ordering); > +ATF_TC_BODY(vers_ordering, tc) > +{ > + CHECK_ALL("000", "00", "01", "010", "09", "0", "1", "9", "10"); > +} > + > +ATF_TC_WITHOUT_HEAD(natural_ordering); > +ATF_TC_BODY(natural_ordering, tc) > +{ > + CHECK_ALL("jan1", "jan2", "jan9", "jan10", "jan11", "jan19", "jan20"); > +} > + > +/* https://sourceware.org/bugzilla/show_bug.cgi?id=9913 */ > + > +ATF_TC_WITHOUT_HEAD(glibc_bug_9913); > +ATF_TC_BODY(glibc_bug_9913, tc) > +{ > + CHECK_ALL( > + "B0075022800016.gbp.corp.com", > + "B007502280067.gbp.corp.com", > + "B007502357019.GBP.CORP.COM" > + ); > +} > + > +ATF_TC_WITHOUT_HEAD(semver_ordering); > +ATF_TC_BODY(semver_ordering, tc) > +{ > + CHECK_ALL("2.6.20", "2.6.21"); > +} > + > +ATF_TP_ADD_TCS(tp) > +{ > + ATF_TP_ADD_TC(tp, strcmp_functionality); > + ATF_TP_ADD_TC(tp, vers_ordering); > + ATF_TP_ADD_TC(tp, natural_ordering); > + ATF_TP_ADD_TC(tp, glibc_bug_9913); > + ATF_TP_ADD_TC(tp, semver_ordering); > + > + return (atf_no_error()); > +}