svn commit: r338963 - in head/sys: amd64/amd64 conf
Mateusz Guzik
mjg at FreeBSD.org
Thu Sep 27 14:05:47 UTC 2018
Author: mjg
Date: Thu Sep 27 14:05:44 2018
New Revision: 338963
URL: https://svnweb.freebsd.org/changeset/base/338963
Log:
amd64: implement memcmp in assembly
Both the in-kernel C variant and libc asm variant have very poor performance.
The former compiles to a single byte comparison loop, which breaks down even
for small sizes. The latter uses rep cmpsq/b which turn out to have very poor
throughput and are slower than a hand-coded 32-byte comparison loop.
Depending on size this is about 3-4 times faster than the current routines.
Reviewed by: kib
Approved by: re (gjb)
Differential Revision: https://reviews.freebsd.org/D17328
Modified:
head/sys/amd64/amd64/support.S
head/sys/conf/files
head/sys/conf/files.arm
head/sys/conf/files.arm64
head/sys/conf/files.i386
head/sys/conf/files.mips
head/sys/conf/files.powerpc
head/sys/conf/files.riscv
head/sys/conf/files.sparc64
Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S Thu Sep 27 13:54:09 2018 (r338962)
+++ head/sys/amd64/amd64/support.S Thu Sep 27 14:05:44 2018 (r338963)
@@ -101,6 +101,100 @@ ENTRY(sse2_pagezero)
END(sse2_pagezero)
/*
+ * memcmpy(b1, b2, len)
+ * rdi,rsi,len
+ */
+ENTRY(memcmp)
+ PUSH_FRAME_POINTER
+ cmpq $16,%rdx
+ jae 5f
+1:
+ testq %rdx,%rdx
+ je 3f
+ xorl %ecx,%ecx
+2:
+ movzbl (%rdi,%rcx,1),%eax
+ movzbl (%rsi,%rcx,1),%r8d
+ cmpb %r8b,%al
+ jne 4f
+ addq $1,%rcx
+ cmpq %rcx,%rdx
+ jz 3f
+ movzbl (%rdi,%rcx,1),%eax
+ movzbl (%rsi,%rcx,1),%r8d
+ cmpb %r8b,%al
+ jne 4f
+ addq $1,%rcx
+ cmpq %rcx,%rdx
+ jz 3f
+ movzbl (%rdi,%rcx,1),%eax
+ movzbl (%rsi,%rcx,1),%r8d
+ cmpb %r8b,%al
+ jne 4f
+ addq $1,%rcx
+ cmpq %rcx,%rdx
+ jz 3f
+ movzbl (%rdi,%rcx,1),%eax
+ movzbl (%rsi,%rcx,1),%r8d
+ cmpb %r8b,%al
+ jne 4f
+ addq $1,%rcx
+ cmpq %rcx,%rdx
+ jne 2b
+3:
+ xorl %eax,%eax
+ POP_FRAME_POINTER
+ ret
+4:
+ subl %r8d,%eax
+ POP_FRAME_POINTER
+ ret
+5:
+ cmpq $32,%rdx
+ jae 7f
+6:
+ /*
+ * 8 bytes
+ */
+ movq (%rdi),%r8
+ movq (%rsi),%r9
+ cmpq %r8,%r9
+ jne 1b
+ leaq 8(%rdi),%rdi
+ leaq 8(%rsi),%rsi
+ subq $8,%rdx
+ cmpq $8,%rdx
+ jae 6b
+ jl 1b
+ jmp 3b
+7:
+ /*
+ * 32 bytes
+ */
+ movq (%rsi),%r8
+ movq 8(%rsi),%r9
+ subq (%rdi),%r8
+ subq 8(%rdi),%r9
+ or %r8,%r9
+ jnz 1b
+
+ movq 16(%rsi),%r8
+ movq 24(%rsi),%r9
+ subq 16(%rdi),%r8
+ subq 24(%rdi),%r9
+ or %r8,%r9
+ jnz 1b
+
+ leaq 32(%rdi),%rdi
+ leaq 32(%rsi),%rsi
+ subq $32,%rdx
+ cmpq $32,%rdx
+ jae 7b
+ jnz 1b
+ jmp 3b
+END(memcmp)
+
+/*
* memmove(dst, src, cnt)
* rdi, rsi, rdx
* Adapted from bcopy written by:
Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Thu Sep 27 13:54:09 2018 (r338962)
+++ head/sys/conf/files Thu Sep 27 14:05:44 2018 (r338963)
@@ -4041,7 +4041,6 @@ libkern/murmur3_32.c standard
libkern/mcount.c optional profiling-routine
libkern/memcchr.c standard
libkern/memchr.c standard
-libkern/memcmp.c standard
libkern/memmem.c optional gdb
libkern/qsort.c standard
libkern/qsort_r.c standard
Modified: head/sys/conf/files.arm
==============================================================================
--- head/sys/conf/files.arm Thu Sep 27 13:54:09 2018 (r338962)
+++ head/sys/conf/files.arm Thu Sep 27 14:05:44 2018 (r338963)
@@ -163,6 +163,7 @@ libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
libkern/lshrdi3.c standard
+libkern/memcmp.c standard
libkern/moddi3.c standard
libkern/qdivrem.c standard
libkern/ucmpdi2.c standard
Modified: head/sys/conf/files.arm64
==============================================================================
--- head/sys/conf/files.arm64 Thu Sep 27 13:54:09 2018 (r338962)
+++ head/sys/conf/files.arm64 Thu Sep 27 14:05:44 2018 (r338963)
@@ -244,6 +244,7 @@ libkern/ffsll.c standard
libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
+libkern/memcmp.c standard
libkern/memset.c standard
libkern/arm64/crc32c_armv8.S standard
cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S optional zfs | dtrace compile-with "${CDDL_C}"
Modified: head/sys/conf/files.i386
==============================================================================
--- head/sys/conf/files.i386 Thu Sep 27 13:54:09 2018 (r338962)
+++ head/sys/conf/files.i386 Thu Sep 27 14:05:44 2018 (r338963)
@@ -548,6 +548,7 @@ kern/subr_sfbuf.c standard
libkern/divdi3.c standard
libkern/ffsll.c standard
libkern/flsll.c standard
+libkern/memcmp.c standard
libkern/memset.c standard
libkern/moddi3.c standard
libkern/qdivrem.c standard
Modified: head/sys/conf/files.mips
==============================================================================
--- head/sys/conf/files.mips Thu Sep 27 13:54:09 2018 (r338962)
+++ head/sys/conf/files.mips Thu Sep 27 14:05:44 2018 (r338963)
@@ -65,6 +65,7 @@ libkern/cmpdi2.c optional mips | mipshf | mipsel | m
libkern/ucmpdi2.c optional mips | mipshf | mipsel | mipselhf
libkern/ashldi3.c standard
libkern/ashrdi3.c standard
+libkern/memcmp.c standard
# cfe support
dev/cfe/cfe_api.c optional cfe
Modified: head/sys/conf/files.powerpc
==============================================================================
--- head/sys/conf/files.powerpc Thu Sep 27 13:54:09 2018 (r338962)
+++ head/sys/conf/files.powerpc Thu Sep 27 14:05:44 2018 (r338963)
@@ -98,6 +98,7 @@ libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
libkern/lshrdi3.c optional powerpc | powerpcspe
+libkern/memcmp.c standard
libkern/memset.c standard
libkern/moddi3.c optional powerpc | powerpcspe
libkern/qdivrem.c optional powerpc | powerpcspe
Modified: head/sys/conf/files.riscv
==============================================================================
--- head/sys/conf/files.riscv Thu Sep 27 13:54:09 2018 (r338962)
+++ head/sys/conf/files.riscv Thu Sep 27 14:05:44 2018 (r338963)
@@ -22,6 +22,7 @@ libkern/ffsll.c standard
libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
+libkern/memcmp.c standard
libkern/memset.c standard
riscv/riscv/autoconf.c standard
riscv/riscv/bus_machdep.c standard
Modified: head/sys/conf/files.sparc64
==============================================================================
--- head/sys/conf/files.sparc64 Thu Sep 27 13:54:09 2018 (r338962)
+++ head/sys/conf/files.sparc64 Thu Sep 27 14:05:44 2018 (r338963)
@@ -71,6 +71,7 @@ libkern/ffsll.c standard
libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
+libkern/memcmp.c standard
sparc64/central/central.c optional central
sparc64/ebus/ebus.c optional ebus
sparc64/ebus/epic.c optional epic ebus
More information about the svn-src-all
mailing list