git: 3f224333af16 - main - lib/libc/aarch64/string: add timingsafe_memcmp() assembly implementation
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 10 Jan 2025 15:04:05 UTC
The branch main has been updated by fuz: URL: https://cgit.FreeBSD.org/src/commit/?id=3f224333af163d5fcd7547a20993dcf18f19076c commit 3f224333af163d5fcd7547a20993dcf18f19076c Author: Robert Clausecker <fuz@FreeBSD.org> AuthorDate: 2024-12-09 09:50:00 +0000 Commit: Robert Clausecker <fuz@FreeBSD.org> CommitDate: 2025-01-10 15:02:41 +0000 lib/libc/aarch64/string: add timingsafe_memcmp() assembly implementation A port of the amd64 implementation with some slight changes due to differences in instructions provided by aarch64. No ASIMD for the same reason as the amd64 code: it's just not particularly suitable for this application. Event: EuroBSDcon 2024 Approved by: security (cperciva) Reviewed by: getz, cperciva Differential Revision: https://reviews.freebsd.org/D46758 --- lib/libc/aarch64/string/Makefile.inc | 1 + lib/libc/aarch64/string/timingsafe_memcmp.S | 117 ++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc index 8019ab4adafc..9574aad95933 100644 --- a/lib/libc/aarch64/string/Makefile.inc +++ b/lib/libc/aarch64/string/Makefile.inc @@ -32,6 +32,7 @@ MDSRCS+= \ strlcat.c \ strlen.S \ timingsafe_bcmp.S \ + timingsafe_memcmp.S \ bcopy.c \ bzero.c diff --git a/lib/libc/aarch64/string/timingsafe_memcmp.S b/lib/libc/aarch64/string/timingsafe_memcmp.S new file mode 100644 index 000000000000..28fdd911a387 --- /dev/null +++ b/lib/libc/aarch64/string/timingsafe_memcmp.S @@ -0,0 +1,117 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Robert Clausecker + */ + +#include <machine/asm.h> + +ENTRY(timingsafe_memcmp) + cmp x2, #16 // at least 17 bytes to process? + bhi .Lgt16 + + cmp x2, #8 // at least 9 bytes to process? + bhi .L0916 + + cmp x2, #4 // at least 5 bytes to process? + bhi .L0508 + + cmp x2, #2 // at least 3 bytes to process? + bhi .L0304 + + cbnz x2, .L0102 // buffer empty? + + mov w0, #0 // empty buffer always matches + ret + +.L0102: ldrb w3, [x0] // load first bytes + ldrb w4, [x1] + sub x2, x2, #1 + ldrb w5, [x0, x2] // load last bytes + ldrb w6, [x1, x2] + bfi w5, w3, #8, #8 // join bytes in big endian + bfi w6, w4, #8, #8 + sub w0, w5, w6 + ret + + +.L0304: ldrh w3, [x0] // load first halfwords + ldrh w4, [x1] + sub x2, x2, #2 + ldrh w5, [x0, x2] // load last halfwords + ldrh w6, [x1, x2] + bfi w3, w5, #16, #16 // join halfwords in little endian + bfi w4, w6, #16, #16 + rev w3, w3 // swap word order + rev w4, w4 + cmp w3, w4 + csetm w0, lo // w0 = w3 >= w4 ? 0 : -1 + csinc w0, w0, wzr, ls // w0 = w3 <=> w4 ? 1 : 0 : -1 + ret + +.L0508: ldr w3, [x0] // load first words + ldr w4, [x1] + sub x2, x2, #4 + ldr w5, [x0, x2] // load last words + ldr w6, [x1, x2] + bfi x3, x5, #32, #32 // join words in little endian + bfi x4, x6, #32, #32 + rev x3, x3 // swap word order + rev x4, x4 + cmp x3, x4 + csetm w0, lo // x0 = x3 >= w4 ? 0 : -1 + csinc w0, w0, wzr, ls // x0 = x3 <=> w4 ? 1 : 0 : -1 + ret + +.L0916: ldr x3, [x0] + ldr x4, [x1] + sub x2, x2, #8 + ldr x5, [x0, x2] + ldr x6, [x1, x2] + cmp x3, x4 // mismatch in first pair? + csel x3, x3, x5, ne // use second pair if first pair equal + csel x4, x4, x6, ne + rev x3, x3 + rev x4, x4 + cmp x3, x4 + csetm w0, lo + csinc w0, w0, wzr, ls + ret + + /* more than 16 bytes: process buffer in a loop */ +.Lgt16: ldp x3, x4, [x0], #16 + ldp x5, x6, [x1], #16 + cmp x3, x5 // mismatch in first pair? + csel x3, x3, x4, ne // use second pair if first pair equal + csel x5, x5, x6, ne + subs x2, x2, #32 + bls .Ltail + +0: ldp x4, x7, [x0], #16 + ldp x6, x8, [x1], #16 + cmp x4, x6 // mismatch in first pair? + csel x4, x4, x7, ne // if not, try second pair + csel x6, x6, x8, ne + cmp x3, x5 // was there a mismatch previously? + csel x3, x3, x4, ne // apply new pair if there was not + csel x5, x5, x6, ne + subs x2, x2, #16 + bhi 0b + +.Ltail: add x0, x0, x2 + add x1, x1, x2 + ldp x4, x7, [x0] + ldp x6, x8, [x1] + cmp x4, x6 // mismatch in first pair? + csel x4, x4, x7, ne // if not, try second pair + csel x6, x6, x8, ne + cmp x3, x5 // was there a mismatch previously? + csel x3, x3, x4, ne // apply new pair if there was not + csel x5, x5, x6, ne + rev x3, x3 + rev x5, x5 + cmp x3, x5 + csetm w0, lo + csinc w0, w0, wzr, ls + ret +END(timingsafe_bcmp)