git: b91003acffe7 - main - lib/libc/aarch64/string: add strspn optimized implementation
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 10 Jan 2025 15:03:49 UTC
The branch main has been updated by fuz: URL: https://cgit.FreeBSD.org/src/commit/?id=b91003acffe7b50dd6506be15116c6b42fc512c6 commit b91003acffe7b50dd6506be15116c6b42fc512c6 Author: Getz Mikalsen <getz@FreeBSD.org> AuthorDate: 2024-08-26 18:13:54 +0000 Commit: Robert Clausecker <fuz@FreeBSD.org> CommitDate: 2025-01-10 15:02:39 +0000 lib/libc/aarch64/string: add strspn optimized implementation This is a port of the Scalar optimized variant of strspn for amd64 to aarch64. It utilizes a LUT to speed up the function, a SIMD variant is still under development. See the DR for benchmark results. Tested by: fuz (exprun) Reviewed by: fuz, emaste Sponsored by: Google LLC (GSoC 2024) PR: 281175 Differential Revision: https://reviews.freebsd.org/D46396 --- lib/libc/aarch64/string/Makefile.inc | 4 +- lib/libc/aarch64/string/strspn.S | 111 +++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) diff --git a/lib/libc/aarch64/string/Makefile.inc b/lib/libc/aarch64/string/Makefile.inc index ba0947511872..09bfaef963eb 100644 --- a/lib/libc/aarch64/string/Makefile.inc +++ b/lib/libc/aarch64/string/Makefile.inc @@ -21,7 +21,9 @@ AARCH64_STRING_FUNCS= \ # SIMD-enhanced routines not derived from Arm's code MDSRCS+= \ - strcmp.S + strcmp.S \ + strspn.S + # # Add the above functions. Generate an asm file that includes the needed # Arm Optimized Routines file defining the function name to the libc name. diff --git a/lib/libc/aarch64/string/strspn.S b/lib/libc/aarch64/string/strspn.S new file mode 100644 index 000000000000..0ef42c2b737e --- /dev/null +++ b/lib/libc/aarch64/string/strspn.S @@ -0,0 +1,111 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org> +*/ + +#include <machine/asm.h> + + .weak strspn + .set strspn, __strspn + .text + +ENTRY(__strspn) + + /* check for special cases */ + ldrb w4, [x1] // first character in set + cbz w4, .Lzero // empty set always returns 0 + + mov x15, #1 // preload register with 1 for stores + + // set is only one character + ldrb w5, [x1, #1] // second character in the set + cbz w5, .Lsingle + + stp x29, x30, [sp, #-16]! + mov x29, sp + sub sp, sp, #256 // allocate 256 bytes on the stack + + /* no special case matches -- prepare lookup table */ + mov w3, #28 +0: add x9, sp, x3, lsl #3 + stp xzr, xzr, [x9] + stp xzr, xzr, [x9, #16] + subs w3, w3, #4 + b.cs 0b + + strb w15, [sp, x4] // register first character in set + add x1, x1, #2 + + /* process remaining chars in set */ + .p2align 4 + + +0: ldrb w4, [x1] // next char in set + strb w15, [sp, x5] // register previous char + cbz w4, 1f // NUL encountered? + + ldrb w5, [x1, #1] + add x1, x1, #2 + strb w15, [sp, x4] + cbnz w5, 0b + +1: mov x5, x0 // stash a copy of src + + /* find mismatch */ + .p2align 4 +0: ldrb w8, [x0] + ldrb w9, [sp, x8] + cbz w9, 2f + + ldrb w8, [x0, #1] + ldrb w9, [sp, x8] + cbz w9, 3f + + ldrb w8, [x0, #2] + ldrb w9, [sp, x8] + cbz w9, 4f + + ldrb w8, [x0, #3] + add x0, x0, #4 + ldrb w9, [sp, x8] + cbnz w9, 0b + + sub x0, x0, #3 +4: sub x5, x5, #1 +3: add x0, x0, #1 +2: sub x0, x0, x5 + mov sp, x29 + ldp x29, x30, [sp], #16 + ret + +.Lzero: + mov x0, #0 + ret + +.Lsingle: + ldrb w8, [x0, x5] + cmp w4, w8 + b.ne 1f + + add x5, x5, #1 + ldrb w8, [x0, x5] + cmp w4, w8 + b.ne 1f + + add x5, x5, #1 + ldrb w8, [x0, x5] + cmp w4, w8 + b.ne 1f + + add x5, x5, #1 + ldrb w8, [x0, x5] + add x5, x5, #1 + cmp w4, w8 + b.eq .Lsingle + + sub x5, x5, #1 +1: mov x0, x5 + ret + +END(__strspn)