git: 8dcf3a82c54c - main - libc: Implement bsort(3) a bitonic type of sorting algorithm.
- Reply: Yuri : "Re: git: 8dcf3a82c54c - main - libc: Implement bsort(3) a bitonic type of sorting algorithm."
- Reply: Brooks Davis : "Re: git: 8dcf3a82c54c - main - libc: Implement bsort(3) a bitonic type of sorting algorithm."
- Reply: Charlie Li : "Re: git: 8dcf3a82c54c - main - libc: Implement bsort(3) a bitonic type of sorting algorithm."
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 19 Apr 2023 12:06:26 UTC
The branch main has been updated by hselasky: URL: https://cgit.FreeBSD.org/src/commit/?id=8dcf3a82c54cb216df3213a013047907636a01da commit 8dcf3a82c54cb216df3213a013047907636a01da Author: Hans Petter Selasky <hselasky@FreeBSD.org> AuthorDate: 2022-09-08 10:16:43 +0000 Commit: Hans Petter Selasky <hselasky@FreeBSD.org> CommitDate: 2023-04-19 12:04:22 +0000 libc: Implement bsort(3) a bitonic type of sorting algorithm. The bsort(3) algorithm works by swapping objects, similarly to qsort(3), and does not require any significant amount of additional memory. The bsort(3) algorithm doesn't suffer from the processing time issues known the plague the qsort(3) family of algorithms, and is bounded by a complexity of O(log2(N) * log2(N) * N), where N is the number of elements in the sorting array. The additional complexity compared to mergesort(3) is a fair tradeoff in situations where no memory may be allocated. The bsort(3) APIs are identical to those of qsort(3), allowing for easy drop-in and testing. The design of the bsort(3) algorithm allows for future parallell CPU execution when sorting arrays. The current version of the bsort(3) algorithm is single threaded. This is possible because fixed areas of the sorting data is compared at a time, and can easily be divided among different CPU's to sort large arrays faster. Reviewed by: gbe@, delphij@, pauamma_gundo.com (manpages) Sponsored by: NVIDIA Networking Differential Revision: https://reviews.freebsd.org/D36493 --- include/stdlib.h | 13 +++ lib/libc/stdlib/Makefile.inc | 4 +- lib/libc/stdlib/Symbol.map | 4 + lib/libc/stdlib/bsort.3 | 203 ++++++++++++++++++++++++++++++++ lib/libc/stdlib/bsort.c | 267 +++++++++++++++++++++++++++++++++++++++++++ lib/libc/stdlib/bsort_r.c | 25 ++++ lib/libc/stdlib/bsort_s.c | 5 + 7 files changed, 520 insertions(+), 1 deletion(-) diff --git a/include/stdlib.h b/include/stdlib.h index 730223e7fd77..857092b9053e 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -107,6 +107,10 @@ void *malloc(size_t) __malloc_like __result_use_check __alloc_size(1); int mblen(const char *, size_t); size_t mbstowcs(wchar_t * __restrict , const char * __restrict, size_t); int mbtowc(wchar_t * __restrict, const char * __restrict, size_t); +#if __BSD_VISIBLE +void bsort(void *, size_t, size_t, + int (* _Nonnull)(const void *, const void *)); +#endif void qsort(void *, size_t, size_t, int (* _Nonnull)(const void *, const void *)); int rand(void); @@ -300,6 +304,8 @@ int heapsort(void *, size_t, size_t, #ifdef __BLOCKS__ int heapsort_b(void *, size_t, size_t, int (^ _Nonnull)(const void *, const void *)); +void bsort_b(void *, size_t, size_t, + int (^ _Nonnull)(const void *, const void *)); void qsort_b(void *, size_t, size_t, int (^ _Nonnull)(const void *, const void *)); #endif @@ -313,6 +319,8 @@ int mkostemps(char *, int, int); int mkostempsat(int, char *, int, int); void qsort_r(void *, size_t, size_t, int (*)(const void *, const void *, void *), void *); +void bsort_r(void *, size_t, size_t, + int (*)(const void *, const void *, void *), void *); int radixsort(const unsigned char **, int, const unsigned char *, unsigned); void *reallocarray(void *, size_t, size_t) __result_use_check @@ -397,6 +405,11 @@ errno_t qsort_s(void *, rsize_t, rsize_t, int (*)(const void *, const void *, void *), void *); #endif /* __EXT1_VISIBLE */ +#if __BSD_VISIBLE +errno_t bsort_s(void *, rsize_t, rsize_t, + int (*)(const void *, const void *, void *), void *); +#endif /* __BSD_VISIBLE */ + __END_DECLS __NULLABILITY_PRAGMA_POP diff --git a/lib/libc/stdlib/Makefile.inc b/lib/libc/stdlib/Makefile.inc index 964e7ce30594..7503a82a6045 100644 --- a/lib/libc/stdlib/Makefile.inc +++ b/lib/libc/stdlib/Makefile.inc @@ -11,6 +11,7 @@ MISRCS+=C99_Exit.c a64l.c abort.c abs.c atexit.c atof.c atoi.c atol.c atoll.c \ getsubopt.c hcreate.c hcreate_r.c hdestroy_r.c heapsort.c heapsort_b.c \ hsearch_r.c imaxabs.c imaxdiv.c \ insque.c l64a.c labs.c ldiv.c llabs.c lldiv.c lsearch.c \ + bsort.c bsort_r.c bsort_s.c \ merge.c mergesort_b.c ptsname.c qsort.c qsort_r.c qsort_r_compat.c \ qsort_s.c quick_exit.c radixsort.c rand.c \ random.c reallocarray.c reallocf.c realpath.c remque.c \ @@ -36,7 +37,7 @@ MAN+= a64l.3 abort.3 abs.3 alloca.3 atexit.3 atof.3 \ atoi.3 atol.3 at_quick_exit.3 bsearch.3 \ div.3 exit.3 getenv.3 getopt.3 getopt_long.3 getsubopt.3 \ hcreate.3 imaxabs.3 imaxdiv.3 insque.3 labs.3 ldiv.3 llabs.3 lldiv.3 \ - lsearch.3 memory.3 ptsname.3 qsort.3 \ + lsearch.3 memory.3 ptsname.3 bsort.3 qsort.3 \ quick_exit.3 \ radixsort.3 rand.3 random.3 reallocarray.3 reallocf.3 realpath.3 \ set_constraint_handler_s.3 \ @@ -54,6 +55,7 @@ MLINKS+=hcreate.3 hcreate_r.3 hcreate.3 hdestroy_r.3 hcreate.3 hsearch_r.3 MLINKS+=insque.3 remque.3 MLINKS+=lsearch.3 lfind.3 MLINKS+=ptsname.3 grantpt.3 ptsname.3 ptsname_r.3 ptsname.3 unlockpt.3 +MLINKS+=bsort.3 bsort_r.3 bsort.3 bsort_b.3 bsort.3 bsort_s.3 MLINKS+=qsort.3 heapsort.3 qsort.3 mergesort.3 qsort.3 qsort_r.3 \ qsort.3 qsort_s.3 MLINKS+=rand.3 rand_r.3 rand.3 srand.3 diff --git a/lib/libc/stdlib/Symbol.map b/lib/libc/stdlib/Symbol.map index a105f781734d..50583a30ad3d 100644 --- a/lib/libc/stdlib/Symbol.map +++ b/lib/libc/stdlib/Symbol.map @@ -126,6 +126,10 @@ FBSD_1.6 { }; FBSD_1.7 { + bsort; + bsort_b; + bsort_r; + bsort_s; clearenv; qsort_r; secure_getenv; diff --git a/lib/libc/stdlib/bsort.3 b/lib/libc/stdlib/bsort.3 new file mode 100644 index 000000000000..9cc2edc04dd2 --- /dev/null +++ b/lib/libc/stdlib/bsort.3 @@ -0,0 +1,203 @@ +.\" +.\" Copyright (c) 2022 Hans Petter Selasky +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd April 19, 2023 +.Dt BSORT 3 +.Os +.Sh NAME +.Nm bsort , +.Nm bsort_b , +.Nm bsort_r +and +.Nm bsort_s +.Nd sort functions +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In stdlib.h +.Ft void +.Fo bsort +.Fa "void *base" +.Fa "size_t nmemb" +.Fa "size_t size" +.Fa "int \*[lp]*compar\*[rp]\*[lp]const void *, const void *\*[rp]" +.Fc +.Ft void +.Fo bsort_b +.Fa "void *base" +.Fa "size_t nmemb" +.Fa "size_t size" +.Fa "bsort_block compar" +.Fc +.Ft void +.Fo bsort_r +.Fa "void *base" +.Fa "size_t nmemb" +.Fa "size_t size" +.Fa "int \*[lp]*compar\*[rp]\*[lp]const void *, const void *, void *\*[rp]" +.Fa "void *thunk" +.Fc +.Fd #define __STDC_WANT_LIB_EXT1__ 1 +.Ft errno_t +.Fo bsort_s +.Fa "void *base" +.Fa "size_t nmemb" +.Fa "size_t size" +.Fa "int \*[lp]*compar\*[rp]\*[lp]void *, const void *, const void *\*[rp]" +.Fa "void *thunk" +.Fc +.Sh DESCRIPTION +The +.Fn bsort +function is based on so-called in-place bitonic sorting. +Bitonic sorting is suitable for parallel execution, +because the elements in the array to sort are compared in a predefined +sequence not depending on the data. +The complexity of the +.Fn bsort +algorithm is bounded by O(log2(N) * log2(N) * N), where N is the +number of elements in the sorting array. +The +.Fn bsort +function provides a reliable in-place sorting algorithm, +with little memory usage and without the excess processing usage +caveats of other algorithms like +.Xr qsort 3 . +.Pp +The comparison function must return an integer less than, equal to, or +greater than zero if the first argument is considered to be respectively +less than, equal to, or greater than the second. +.Pp +The +.Fn bsort_r +function behaves identically to +.Fn bsort , +except it takes an additional argument, +.Fa thunk , +which is passed unchanged as the last argument to the function +.Fa compar +points to. +This allows the comparison function to access additional +data without using global variables, and thus +.Fn bsort_r +is suitable for use in functions which must be reentrant. +The +.Fn bsort_b +function behaves identically to +.Fn bsort , +except it takes a block, rather than a function pointer. +.Pp +The algorithm implemented by +.Fn bsort , +.Fn bsort_b , +.Fn bsort_r +and +.Fn bsort_s +is +.Em not +stable, that is, if two members compare as equal, their order in +the sorted array is undefined. +.Pp +The +.Fn bsort_s +function behaves the same as +.Fn bsort_r +except if +.Fa size +is greater than +.Dv RSIZE_MAX , +or +.Fa nmemb +is not zero and +.Fa compar +is +.Dv NULL +or +.Fa size +is zero, then the runtime-constraint handler is called, and +.Fn bsort_s +returns an error. +Note the handler is called before +.Fn bsort_s +returns the error, and the handler function might not return. +.Sh RETURN VALUES +The +.Fn bsort , +.Fn bsort_b +and +.Fn bsort_r +functions +return no value. +The +.Fn bsort_s +function returns zero on success, non-zero on error. +.Sh EXAMPLES +A sample program sorting an array of +.Vt int +values in place using +.Fn bsort , +and then prints the sorted array to standard output is: +.Bd -literal +#include <stdio.h> +#include <stdlib.h> + +/* + * Custom comparison function comparing 'int' values through pointers + * passed by bsort(3). + */ +static int +int_compare(const void *p1, const void *p2) +{ + int left = *(const int *)p1; + int right = *(const int *)p2; + + return ((left > right) - (left < right)); +} + +/* + * Sort an array of 'int' values and print it to standard output. + */ +int +main(void) +{ + int int_array[] = { 4, 5, 9, 3, 0, 1, 7, 2, 8, 6 }; + size_t array_size = sizeof(int_array) / sizeof(int_array[0]); + size_t k; + + bsort(&int_array, array_size, sizeof(int_array[0]), int_compare); + for (k = 0; k < array_size; k++) + printf(" %d", int_array[k]); + puts(""); + return (EXIT_SUCCESS); +} +.Ed +.Sh SEE ALSO +.Xr sort 1 , +.Xr qsort 3 +and +.Xr radixsort 3 +.Sh HISTORY +This implementation was created by Hans Petter Selasky. diff --git a/lib/libc/stdlib/bsort.c b/lib/libc/stdlib/bsort.c new file mode 100644 index 000000000000..0c470654cfe7 --- /dev/null +++ b/lib/libc/stdlib/bsort.c @@ -0,0 +1,267 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2016-2023 Hans Petter Selasky + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/param.h> + +#include <errno.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "libc_private.h" + +/* + * A variant of bitonic sorting + * + * Worst case sorting complexity: log2(N) * log2(N) * N + * Additional memory and stack usage: none + */ + +#if defined(I_AM_BSORT_R) +typedef int cmp_t (const void *, const void *, void *); +#define ARG_PROTO void *arg, +#define ARG_NAME arg , +#define CMP(fn,arg,a,b) (fn)(a, b, arg) +#elif defined(I_AM_BSORT_S) +typedef int cmp_t (const void *, const void *, void *); +#define ARG_PROTO void *arg, +#define ARG_NAME arg , +#define CMP(fn,arg,a,b) (fn)(a, b, arg) +#else +typedef int cmp_t (const void *, const void *); +#define ARG_PROTO +#define ARG_NAME +#define CMP(fn,arg,a,b) (fn)(a, b) +#endif + +static inline void +bsort_swap(char *pa, char *pb, const size_t es) +{ + if (__builtin_constant_p(es) && es == 8) { + uint64_t tmp; + + /* swap */ + tmp = *(uint64_t *)pa; + *(uint64_t *)pa = *(uint64_t *)pb; + *(uint64_t *)pb = tmp; + } else if (__builtin_constant_p(es) && es == 4) { + uint32_t tmp; + + /* swap */ + tmp = *(uint32_t *)pa; + *(uint32_t *)pa = *(uint32_t *)pb; + *(uint32_t *)pb = tmp; + } else if (__builtin_constant_p(es) && es == 2) { + uint16_t tmp; + + /* swap */ + tmp = *(uint16_t *)pa; + *(uint16_t *)pa = *(uint16_t *)pb; + *(uint16_t *)pb = tmp; + } else if (__builtin_constant_p(es) && es == 1) { + uint8_t tmp; + + /* swap */ + tmp = *(uint8_t *)pa; + *(uint8_t *)pa = *(uint8_t *)pb; + *(uint8_t *)pb = tmp; + } else if (es <= 256) { + char tmp[es] __aligned(8); + + /* swap using fast memcpy() */ + memcpy(tmp, pa, es); + memcpy(pa, pb, es); + memcpy(pb, tmp, es); + } else { + /* swap byte-by-byte to avoid huge stack usage */ + for (size_t x = 0; x != es; x++) { + char tmp; + + /* swap */ + tmp = pa[x]; + pa[x] = pb[x]; + pb[x] = tmp; + } + } +} + +/* The following function returns true when the array is completely sorted. */ +static inline bool +bsort_complete(void *ptr, const size_t lim, const size_t es, ARG_PROTO cmp_t *fn) +{ + for (size_t x = 1; x != lim; x++) { + if (CMP(fn, arg, ptr, (char *)ptr + es) > 0) + return (false); + ptr = (char *)ptr + es; + } + return (true); +} + +static inline void +bsort_xform(char *ptr, const size_t n, size_t lim, const size_t es, ARG_PROTO cmp_t *fn) +{ +#define BSORT_TABLE_MAX (1UL << 4) + size_t x, y, z; + unsigned t, u, v; + size_t p[BSORT_TABLE_MAX]; + char *q[BSORT_TABLE_MAX]; + + lim *= es; + x = n; + for (;;) { + /* optimise */ + if (x >= BSORT_TABLE_MAX) + v = BSORT_TABLE_MAX; + else if (x >= 2) + v = x; + else + break; + + /* divide down */ + x /= v; + + /* generate ramp table */ + for (t = 0; t != v; t += 2) { + p[t] = (t * x); + p[t + 1] = (t + 2) * x - 1; + } + + /* bitonic sort */ + for (y = 0; y != n; y += (v * x)) { + for (z = 0; z != x; z++) { + const size_t w = y + z; + + /* p[0] is always zero and is omitted */ + q[0] = ptr + w * es; + + /* insertion sort */ + for (t = 1; t != v; t++) { + q[t] = ptr + (w ^ p[t]) * es; + + /* check for array lengths not power of two */ + if ((size_t)(q[t] - ptr) >= lim) + break; + for (u = t; u != 0 && CMP(fn, arg, q[u - 1], q[u]) > 0; u--) + bsort_swap(q[u - 1], q[u], es); + } + } + } + } +} + +static void +local_bsort(void *ptr, const size_t n, const size_t es, ARG_PROTO cmp_t *fn) +{ + size_t max; + + /* if there are less than 2 elements, then sorting is not needed */ + if (__predict_false(n < 2)) + return; + + /* compute power of two, into max */ + if (sizeof(size_t) <= sizeof(unsigned long)) + max = 1UL << (8 * sizeof(unsigned long) - __builtin_clzl(n) - 1); + else + max = 1ULL << (8 * sizeof(unsigned long long) - __builtin_clzll(n) - 1); + + /* round up power of two, if needed */ + if (!powerof2(n)) + max <<= 1; + + /* optimize common sort scenarios */ + switch (es) { + case 8: + while (!bsort_complete(ptr, n, 8, ARG_NAME fn)) + bsort_xform(ptr, max, n, 8, ARG_NAME fn); + break; + case 4: + while (!bsort_complete(ptr, n, 4, ARG_NAME fn)) + bsort_xform(ptr, max, n, 4, ARG_NAME fn); + break; + case 2: + while (!bsort_complete(ptr, n, 2, ARG_NAME fn)) + bsort_xform(ptr, max, n, 2, ARG_NAME fn); + break; + case 1: + while (!bsort_complete(ptr, n, 1, ARG_NAME fn)) + bsort_xform(ptr, max, n, 1, ARG_NAME fn); + break; + case 0: + /* undefined behaviour */ + break; + default: + while (!bsort_complete(ptr, n, es, ARG_NAME fn)) + bsort_xform(ptr, max, n, es, ARG_NAME fn); + break; + } +} + +#if defined(I_AM_BSORT_R) +void +bsort_r(void *a, size_t n, size_t es, cmp_t *cmp, void *arg) +{ + local_bsort(a, n, es, cmp, arg); +} +#elif defined(I_AM_BSORT_S) +errno_t +bsort_s(void *a, rsize_t n, rsize_t es, cmp_t *cmp, void *arg) +{ + if (n > RSIZE_MAX) { + __throw_constraint_handler_s("bsort_s : n > RSIZE_MAX", EINVAL); + return (EINVAL); + } else if (es > RSIZE_MAX) { + __throw_constraint_handler_s("bsort_s : es > RSIZE_MAX", + EINVAL); + return (EINVAL); + } else if (n != 0) { + if (a == NULL) { + __throw_constraint_handler_s("bsort_s : a == NULL", + EINVAL); + return (EINVAL); + } else if (cmp == NULL) { + __throw_constraint_handler_s("bsort_s : cmp == NULL", + EINVAL); + return (EINVAL); + } else if (es <= 0) { + __throw_constraint_handler_s("bsort_s : es <= 0", + EINVAL); + return (EINVAL); + } + } + + local_bsort(a, n, es, cmp, arg); + return (0); +} +#else +void +bsort(void *a, size_t n, size_t es, cmp_t *cmp) +{ + local_bsort(a, n, es, cmp); +} +#endif diff --git a/lib/libc/stdlib/bsort_r.c b/lib/libc/stdlib/bsort_r.c new file mode 100644 index 000000000000..762f3c5aa1ec --- /dev/null +++ b/lib/libc/stdlib/bsort_r.c @@ -0,0 +1,25 @@ +/* + * This file is in the public domain. + */ +#include "block_abi.h" +#define I_AM_BSORT_R +#include "bsort.c" + +typedef DECLARE_BLOCK(int, bsort_block, const void *, const void *); + +static int +bsort_b_compare(const void *pa, const void *pb, void *arg) +{ + bsort_block compar; + int (*cmp)(void *, const void *, const void *); + + compar = arg; + cmp = (void *)compar->invoke; + return (cmp(compar, pa, pb)); +} + +void +bsort_b(void *base, size_t nel, size_t width, bsort_block compar) +{ + bsort_r(base, nel, width, &bsort_b_compare, compar); +} diff --git a/lib/libc/stdlib/bsort_s.c b/lib/libc/stdlib/bsort_s.c new file mode 100644 index 000000000000..57abde76a257 --- /dev/null +++ b/lib/libc/stdlib/bsort_s.c @@ -0,0 +1,5 @@ +/* + * This file is in the public domain. + */ +#define I_AM_BSORT_S +#include "bsort.c"