git: 3f224333af16 - main - lib/libc/aarch64/string: add timingsafe_memcmp() assembly implementation

From: Robert Clausecker <fuz_at_FreeBSD.org>
Date: Fri, 10 Jan 2025 15:04:05 UTC
The branch main has been updated by fuz:

URL: https://cgit.FreeBSD.org/src/commit/?id=3f224333af163d5fcd7547a20993dcf18f19076c

commit 3f224333af163d5fcd7547a20993dcf18f19076c
Author:     Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2024-12-09 09:50:00 +0000
Commit:     Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2025-01-10 15:02:41 +0000

    lib/libc/aarch64/string: add timingsafe_memcmp() assembly implementation
    
    A port of the amd64 implementation with some slight changes due to
    differences in instructions provided by aarch64.
    
    No ASIMD for the same reason as the amd64 code: it's just not particularly
    suitable for this application.
    
    Event:          EuroBSDcon 2024
    Approved by:    security (cperciva)
    Reviewed by:    getz, cperciva
    Differential Revision:  https://reviews.freebsd.org/D46758
---
 lib/libc/aarch64/string/Makefile.inc        |   1 +
 lib/libc/aarch64/string/timingsafe_memcmp.S | 117 ++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)

diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc
index 8019ab4adafc..9574aad95933 100644
--- a/lib/libc/aarch64/string/Makefile.inc
+++ b/lib/libc/aarch64/string/Makefile.inc
@@ -32,6 +32,7 @@ MDSRCS+= \
 	strlcat.c \
 	strlen.S \
 	timingsafe_bcmp.S \
+	timingsafe_memcmp.S \
 	bcopy.c \
 	bzero.c
 
diff --git a/lib/libc/aarch64/string/timingsafe_memcmp.S b/lib/libc/aarch64/string/timingsafe_memcmp.S
new file mode 100644
index 000000000000..28fdd911a387
--- /dev/null
+++ b/lib/libc/aarch64/string/timingsafe_memcmp.S
@@ -0,0 +1,117 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Robert Clausecker
+ */
+
+#include <machine/asm.h>
+
+ENTRY(timingsafe_memcmp)
+	cmp	x2, #16			// at least 17 bytes to process?
+	bhi	.Lgt16
+
+	cmp	x2, #8			// at least 9 bytes to process?
+	bhi	.L0916
+
+	cmp	x2, #4			// at least 5 bytes to process?
+	bhi	.L0508
+
+	cmp	x2, #2			// at least 3 bytes to process?
+	bhi	.L0304
+
+	cbnz	x2, .L0102		// buffer empty?
+
+	mov	w0, #0			// empty buffer always matches
+	ret
+
+.L0102:	ldrb	w3, [x0]		// load first bytes
+	ldrb	w4, [x1]
+	sub	x2, x2, #1
+	ldrb	w5, [x0, x2]		// load last bytes
+	ldrb	w6, [x1, x2]
+	bfi	w5, w3, #8, #8		// join bytes in big endian
+	bfi	w6, w4, #8, #8
+	sub	w0, w5, w6
+	ret
+
+
+.L0304:	ldrh	w3, [x0]		// load first halfwords
+	ldrh	w4, [x1]
+	sub	x2, x2, #2
+	ldrh	w5, [x0, x2]		// load last halfwords
+	ldrh	w6, [x1, x2]
+	bfi	w3, w5, #16, #16	// join halfwords in little endian
+	bfi	w4, w6, #16, #16
+	rev	w3, w3			// swap word order
+	rev	w4, w4
+	cmp	w3, w4
+	csetm	w0, lo			// w0 = w3 >= w4 ? 0 : -1
+	csinc	w0, w0, wzr, ls		// w0 = w3 <=> w4 ? 1 : 0 : -1
+	ret
+
+.L0508:	ldr	w3, [x0]		// load first words
+	ldr	w4, [x1]
+	sub	x2, x2, #4
+	ldr	w5, [x0, x2]		// load last words
+	ldr	w6, [x1, x2]
+	bfi	x3, x5, #32, #32	// join words in little endian
+	bfi	x4, x6, #32, #32
+	rev	x3, x3			// swap word order
+	rev	x4, x4
+	cmp	x3, x4
+	csetm	w0, lo			// x0 = x3 >= w4 ? 0 : -1
+	csinc	w0, w0, wzr, ls		// x0 = x3 <=> w4 ? 1 : 0 : -1
+	ret
+
+.L0916:	ldr	x3, [x0]
+	ldr	x4, [x1]
+	sub	x2, x2, #8
+	ldr	x5, [x0, x2]
+	ldr	x6, [x1, x2]
+	cmp	x3, x4			// mismatch in first pair?
+	csel	x3, x3, x5, ne		// use second pair if first pair equal
+	csel	x4, x4, x6, ne
+	rev	x3, x3
+	rev	x4, x4
+	cmp	x3, x4
+	csetm	w0, lo
+	csinc	w0, w0, wzr, ls
+	ret
+
+	/* more than 16 bytes: process buffer in a loop */
+.Lgt16:	ldp	x3, x4, [x0], #16
+	ldp	x5, x6, [x1], #16
+	cmp	x3, x5			// mismatch in first pair?
+	csel	x3, x3, x4, ne		// use second pair if first pair equal
+	csel	x5, x5, x6, ne
+	subs	x2, x2, #32
+	bls	.Ltail
+
+0:	ldp	x4, x7, [x0], #16
+	ldp	x6, x8, [x1], #16
+	cmp	x4, x6			// mismatch in first pair?
+	csel	x4, x4, x7, ne		// if not, try second pair
+	csel	x6, x6, x8, ne
+	cmp	x3, x5			// was there a mismatch previously?
+	csel	x3, x3, x4, ne		// apply new pair if there was not
+	csel	x5, x5, x6, ne
+	subs	x2, x2, #16
+	bhi	0b
+
+.Ltail:	add	x0, x0, x2
+	add	x1, x1, x2
+	ldp	x4, x7, [x0]
+	ldp	x6, x8, [x1]
+	cmp	x4, x6			// mismatch in first pair?
+	csel	x4, x4, x7, ne		// if not, try second pair
+	csel	x6, x6, x8, ne
+	cmp	x3, x5			// was there a mismatch previously?
+	csel	x3, x3, x4, ne		// apply new pair if there was not
+	csel	x5, x5, x6, ne
+	rev	x3, x3
+	rev	x5, x5
+	cmp	x3, x5
+	csetm	w0, lo
+	csinc	w0, w0, wzr, ls
+	ret
+END(timingsafe_bcmp)