git: 127709d30a2b - main - lib/libc/gen: use Lemire's algorithm for arc4random_uniform().

From: Robert Clausecker <fuz_at_FreeBSD.org>
Date: Mon, 02 Dec 2024 10:51:05 UTC
The branch main has been updated by fuz:

URL: https://cgit.FreeBSD.org/src/commit/?id=127709d30a2b8a38be995dc5053390947895a723

commit 127709d30a2b8a38be995dc5053390947895a723
Author:     Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2024-11-18 12:30:55 +0000
Commit:     Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2024-12-02 10:41:11 +0000

    lib/libc/gen: use Lemire's algorithm for arc4random_uniform().
    
    Daniel Lemire has published a more efficient range reduction algorithm
    for finding a random number in a given range without bias, reducing the
    number of divisions to none in the common case and 1 in case the initial
    sample is rejected.
    
    This speeds up performance by 22% on amd64, 15% on i386, and 70% on armv7.
    
    os: FreeBSD
    arch: amd64
    cpu: Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz
                       │ benchmark.out │
                       │    sec/op     │
    Arc4random_uniform     56.53n ± 0%
    Fast_uniform           44.00n ± 0%
    geomean                49.87n
    
    Reviewed by:    cem
    Approved by:    emaste
    Differential Revision:  https://reviews.freebsd.org/D47659
---
 lib/libc/gen/arc4random.3         | 13 ++++++-
 lib/libc/gen/arc4random_uniform.c | 71 +++++++++++++++++----------------------
 2 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/lib/libc/gen/arc4random.3 b/lib/libc/gen/arc4random.3
index 1d5b3b9f43cb..1b042f15f000 100644
--- a/lib/libc/gen/arc4random.3
+++ b/lib/libc/gen/arc4random.3
@@ -30,7 +30,7 @@
 .\"
 .\" Manual page, using -mandoc macros
 .\"
-.Dd April 13, 2020
+.Dd November 18, 2024
 .Dt ARC4RANDOM 3
 .Os
 .Sh NAME
@@ -129,6 +129,17 @@ functions using
 .%O Document ID: 4027b5256e17b9796842e6d0f68b0b5e
 .%U http://cr.yp.to/papers.html#chacha
 .Re
+.Rs
+.%A Daniel Lemire
+.%T Fast Random Integer Generation in an Interval
+.%D January 2019
+.%J ACM Trans. Model. Comput. Simul.
+.%I Association for Computing Machinery
+.%C New York, NY, USA
+.%V vol. 29
+.%N no. 1
+.%P pp. 1\(en12
+.Re
 .Sh HISTORY
 These functions first appeared in
 .Ox 2.1 .
diff --git a/lib/libc/gen/arc4random_uniform.c b/lib/libc/gen/arc4random_uniform.c
index 06cd29c6dbe4..23455e545899 100644
--- a/lib/libc/gen/arc4random_uniform.c
+++ b/lib/libc/gen/arc4random_uniform.c
@@ -1,56 +1,47 @@
-/*	$OpenBSD: arc4random_uniform.c,v 1.3 2019/01/20 02:59:07 bcook Exp $	*/
-
-/*
- * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+/*-
+ * SPDX-License-Identifier: 0BSD
  *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
+ * Copyright (c) Robert Clausecker <fuz@FreeBSD.org>
+ * Based on a publication by Daniel Lemire.
+ * Public domain where applicable.
  *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ * Daniel Lemire, "Fast Random Integer Generation in an Interval",
+ * Association for Computing Machinery, ACM Trans. Model. Comput. Simul.,
+ * no. 1, vol. 29, pp. 1--12, New York, NY, USA, January 2019.
  */
 
 #include <stdint.h>
 #include <stdlib.h>
 
-/*
- * Calculate a uniformly distributed random number less than upper_bound
- * avoiding "modulo bias".
- *
- * Uniformity is achieved by generating new random numbers until the one
- * returned is outside the range [0, 2**32 % upper_bound).  This
- * guarantees the selected random number will be inside
- * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound)
- * after reduction modulo upper_bound.
- */
 uint32_t
 arc4random_uniform(uint32_t upper_bound)
 {
-	uint32_t r, min;
-
-	if (upper_bound < 2)
-		return 0;
-
-	/* 2**32 % x == (2**32 - x) % x */
-	min = -upper_bound % upper_bound;
+	uint64_t product;
 
 	/*
-	 * This could theoretically loop forever but each retry has
-	 * p > 0.5 (worst case, usually far better) of selecting a
-	 * number inside the range we need, so it should rarely need
-	 * to re-roll.
+	 * The paper uses these variable names:
+	 *
+	 * L -- log2(UINT32_MAX+1)
+	 * s -- upper_bound
+	 * x -- arc4random() return value
+	 * m -- product
+	 * l -- (uint32_t)product
+	 * t -- threshold
 	 */
-	for (;;) {
-		r = arc4random();
-		if (r >= min)
-			break;
+
+	if (upper_bound <= 1)
+		return (0);
+
+	product = upper_bound * (uint64_t)arc4random();
+
+	if ((uint32_t)product < upper_bound) {
+		uint32_t threshold;
+
+		/* threshold = (2**32 - upper_bound) % upper_bound */
+		threshold = -upper_bound % upper_bound;
+		while ((uint32_t)product < threshold)
+			product = upper_bound * (uint64_t)arc4random();
 	}
 
-	return r % upper_bound;
+	return (product >> 32);
 }