git: f2c98669fc1b - main - lib/libc/aarch64/string: add ASIMD-enhanced timingsafe_bcmp implementation
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 10 Jan 2025 15:04:04 UTC
The branch main has been updated by fuz: URL: https://cgit.FreeBSD.org/src/commit/?id=f2c98669fc1b3fd2dbc7a7e3eedd098970a10dec commit f2c98669fc1b3fd2dbc7a7e3eedd098970a10dec Author: Robert Clausecker <fuz@FreeBSD.org> AuthorDate: 2024-12-09 09:49:49 +0000 Commit: Robert Clausecker <fuz@FreeBSD.org> CommitDate: 2025-01-10 15:02:41 +0000 lib/libc/aarch64/string: add ASIMD-enhanced timingsafe_bcmp implementation A straightforward port of the amd64 implementation. Approved by: security (cperciva) Reviewed by: getz, cperciva Event: EuroBSDcon 2024 Differential Revision: https://reviews.freebsd.org/D46757 --- lib/libc/aarch64/string/Makefile.inc | 1 + lib/libc/aarch64/string/timingsafe_bcmp.S | 113 ++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc index 752cc6d9900b..8019ab4adafc 100644 --- a/lib/libc/aarch64/string/Makefile.inc +++ b/lib/libc/aarch64/string/Makefile.inc @@ -31,6 +31,7 @@ MDSRCS+= \ strncat.c \ strlcat.c \ strlen.S \ + timingsafe_bcmp.S \ bcopy.c \ bzero.c diff --git a/lib/libc/aarch64/string/timingsafe_bcmp.S b/lib/libc/aarch64/string/timingsafe_bcmp.S new file mode 100644 index 000000000000..baa5c6f0940c --- /dev/null +++ b/lib/libc/aarch64/string/timingsafe_bcmp.S @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Robert Clausecker + */ + +#include <machine/asm.h> + +ENTRY(timingsafe_bcmp) + cmp x2, #32 // at least 33 bytes to process? + bhi .Lgt32 + + cmp x2, #16 // at least 17 bytes to process? + bhi .L1732 + + cmp x2, #8 // at least 9 bytes to process? + bhi .L0916 + + cmp x2, #4 // at least 5 bytes to process? + bhi .L0508 + + cmp x2, #2 // at least 3 bytes to process? + bhi .L0304 + + cbnz x2, .L0102 // buffer empty? + + mov w0, #0 // empty buffer always matches + ret + +.L0102: ldrb w3, [x0] // load first bytes + ldrb w4, [x1] + sub x2, x2, #1 + ldrb w5, [x0, x2] // load last bytes + ldrb w6, [x1, x2] + eor w3, w3, w4 + eor w5, w5, w6 + orr w0, w3, w5 + ret + +.L0304: ldrh w3, [x0] // load first halfwords + ldrh w4, [x1] + sub x2, x2, #2 + ldrh w5, [x0, x2] // load last halfwords + ldrh w6, [x1, x2] + eor w3, w3, w4 + eor w5, w5, w6 + orr w0, w3, w5 + ret + +.L0508: ldr w3, [x0] // load first words + ldr w4, [x1] + sub x2, x2, #4 + ldr w5, [x0, x2] // load last words + ldr w6, [x1, x2] + eor w3, w3, w4 + eor w5, w5, w6 + orr w0, w3, w5 + ret + +.L0916: ldr x3, [x0] + ldr x4, [x1] + sub x2, x2, #8 + ldr x5, [x0, x2] + ldr x6, [x1, x2] + eor x3, x3, x4 + eor x5, x5, x6 + orr x0, x3, x5 + orr x0, x0, x0, lsr #32 // ensure low 32 bits are nonzero iff mismatch + ret + +.L1732: ldr q0, [x0] + ldr q1, [x1] + sub x2, x2, #16 + ldr q2, [x0, x2] + ldr q3, [x1, x2] + eor v0.16b, v0.16b, v1.16b + eor v2.16b, v2.16b, v3.16b + orr v0.16b, v0.16b, v2.16b + umaxv s0, v0.4s // get a nonzero word if any + mov w0, v0.s[0] + ret + + /* more than 32 bytes: process buffer in a loop */ +.Lgt32: ldp q0, q1, [x0], #32 + ldp q2, q3, [x1], #32 + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v3.16b + orr v4.16b, v0.16b, v1.16b + subs x2, x2, #64 // enough left for another iteration? + bls .Ltail + +0: ldp q0, q1, [x0], #32 + ldp q2, q3, [x1], #32 + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v3.16b + orr v0.16b, v0.16b, v1.16b + orr v4.16b, v4.16b, v0.16b + subs x2, x2, #32 + bhi 0b + + /* process last 32 bytes */ +.Ltail: add x0, x0, x2 // point to the last 32 bytes in the buffer + add x1, x1, x2 + ldp q0, q1, [x0] + ldp q2, q3, [x1] + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v3.16b + orr v0.16b, v0.16b, v1.16b + orr v4.16b, v4.16b, v0.16b + umaxv s0, v4.4s // get a nonzero word if any + mov w0, v0.s[0] + ret +END(timingsafe_bcmp)