git: c16fc9eae3ad - stable/13 - amd64: import asm strlen into libc
Mateusz Guzik
mjg at FreeBSD.org
Sat Apr 10 13:58:34 UTC 2021
The branch stable/13 has been updated by mjg:
URL: https://cgit.FreeBSD.org/src/commit/?id=c16fc9eae3adca98f6d12ec4f54e043db1f8902b
commit c16fc9eae3adca98f6d12ec4f54e043db1f8902b
Author: Mateusz Guzik <mjg at FreeBSD.org>
AuthorDate: 2021-02-21 21:20:04 +0000
Commit: Mateusz Guzik <mjg at FreeBSD.org>
CommitDate: 2021-04-10 13:54:12 +0000
amd64: import asm strlen into libc
Reviewed by: kib
Differential Revision: https://reviews.freebsd.org/D28845
(cherry picked from commit 7f06b217c53c3f5e4ac81eb11125adfb71359ac6)
---
lib/libc/amd64/string/Makefile.inc | 1 +
lib/libc/amd64/string/strlen.S | 81 ++++++++++++++++++++++++++++++++++++++
2 files changed, 82 insertions(+)
diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc
index db88ac723539..cb370bc6be1c 100644
--- a/lib/libc/amd64/string/Makefile.inc
+++ b/lib/libc/amd64/string/Makefile.inc
@@ -8,4 +8,5 @@ MDSRCS+= \
memset.S \
strcat.S \
strcmp.S \
+ strlen.S \
stpcpy.S
diff --git a/lib/libc/amd64/string/strlen.S b/lib/libc/amd64/string/strlen.S
new file mode 100644
index 000000000000..1d2428e3420e
--- /dev/null
+++ b/lib/libc/amd64/string/strlen.S
@@ -0,0 +1,81 @@
+/*
+ * Written by Mateusz Guzik <mjg at freebsd.org>
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Note: this routine was written with kernel use in mind (read: no simd),
+ * it is only present in userspace as a temporary measure until something
+ * better gets imported.
+ */
+
+#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
+/*
+ * strlen(string)
+ * %rdi
+ *
+ * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
+ *
+ * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
+ * with leaq.
+ *
+ * For a description see either:
+ * - "Hacker's Delight" by Henry S. Warren, Jr.
+ * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
+ * by Agner Fog
+ *
+ * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
+ */
+ENTRY(strlen)
+ movabsq $0xfefefefefefefeff,%r8
+ movabsq $0x8080808080808080,%r9
+
+ movq %rdi,%r10
+ movq %rdi,%rcx
+ testb $7,%dil
+ jz 2f
+
+ /*
+ * Handle misaligned reads: align to 8 and fill
+ * the spurious bytes.
+ */
+ andq $~7,%rdi
+ movq (%rdi),%r11
+ shlq $3,%rcx
+ movq $-1,%rdx
+ shlq %cl,%rdx
+ notq %rdx
+ orq %rdx,%r11
+
+ leaq (%r11,%r8),%rcx
+ notq %r11
+ andq %r11,%rcx
+ andq %r9,%rcx
+ jnz 3f
+
+ /*
+ * Main loop.
+ */
+ ALIGN_TEXT
+1:
+ leaq 8(%rdi),%rdi
+2:
+ movq (%rdi),%r11
+ leaq (%r11,%r8),%rcx
+ notq %r11
+ andq %r11,%rcx
+ andq %r9,%rcx
+ jz 1b
+3:
+ bsfq %rcx,%rcx
+ shrq $3,%rcx
+ leaq (%rcx,%rdi),%rax
+ subq %r10,%rax
+ ret
+END(strlen)
+
+ .section .note.GNU-stack,"",%progbits
More information about the dev-commits-src-branches
mailing list