git: a3503647f776 - stable/12 - amd64: move memcmp checks upfront
Mateusz Guzik
mjg at FreeBSD.org
Sat Jun 26 16:28:25 UTC 2021
The branch stable/12 has been updated by mjg:
URL: https://cgit.FreeBSD.org/src/commit/?id=a3503647f776a96ae8e65b6225cc4f29ad573bf9
commit a3503647f776a96ae8e65b6225cc4f29ad573bf9
Author: Mateusz Guzik <mjg at FreeBSD.org>
AuthorDate: 2021-01-31 15:46:18 +0000
Commit: Mateusz Guzik <mjg at FreeBSD.org>
CommitDate: 2021-06-26 15:54:58 +0000
amd64: move memcmp checks upfront
This is a tradeoff which saves jumps for smaller sizes while making
the 8-16 range slower (roughly in line with the other cases).
Tested with glibc test suite.
For example size 3 (most common with vfs namecache) (ops/s):
before: 407086026
after: 461391995
The regressed range of 8-16 (with 8 as example):
before: 540850489
after: 461671032
(cherry picked from commit f1be262ec11c1c35e6485f432415b5b52adb505d)
---
lib/libc/amd64/string/memcmp.S | 50 ++++++++++++++++++++++------------------
sys/amd64/amd64/support.S | 52 +++++++++++++++++++++++-------------------
2 files changed, 57 insertions(+), 45 deletions(-)
diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S
index 231ab2175804..04c32bebe439 100644
--- a/lib/libc/amd64/string/memcmp.S
+++ b/lib/libc/amd64/string/memcmp.S
@@ -39,9 +39,25 @@ ENTRY(memcmp)
cmpq $16,%rdx
ja 101632f
-100816:
cmpb $8,%dl
- jl 100408f
+ jg 100816f
+
+ cmpb $4,%dl
+ jg 100408f
+
+ cmpb $2,%dl
+ jge 100204f
+
+ cmpb $1,%dl
+ jl 100000f
+ movzbl (%rdi),%eax
+ movzbl (%rsi),%r8d
+ subl %r8d,%eax
+100000:
+ ret
+
+ ALIGN_TEXT
+100816:
movq (%rdi),%r8
movq (%rsi),%r9
cmpq %r8,%r9
@@ -51,9 +67,8 @@ ENTRY(memcmp)
cmpq %r8,%r9
jne 10081608f
ret
+ ALIGN_TEXT
100408:
- cmpb $4,%dl
- jl 100204f
movl (%rdi),%r8d
movl (%rsi),%r9d
cmpl %r8d,%r9d
@@ -63,9 +78,8 @@ ENTRY(memcmp)
cmpl %r8d,%r9d
jne 10040804f
ret
+ ALIGN_TEXT
100204:
- cmpb $2,%dl
- jl 100001f
movzwl (%rdi),%r8d
movzwl (%rsi),%r9d
cmpl %r8d,%r9d
@@ -75,15 +89,7 @@ ENTRY(memcmp)
cmpl %r8d,%r9d
jne 1f
ret
-100001:
- cmpb $1,%dl
- jl 100000f
- movzbl (%rdi),%eax
- movzbl (%rsi),%r8d
- subl %r8d,%eax
-100000:
- ret
-ALIGN_TEXT
+ ALIGN_TEXT
101632:
cmpq $32,%rdx
ja 103200f
@@ -104,7 +110,7 @@ ALIGN_TEXT
cmpq %r8,%r9
jne 10163224f
ret
-ALIGN_TEXT
+ ALIGN_TEXT
103200:
movq (%rdi),%r8
movq 8(%rdi),%r9
@@ -134,7 +140,7 @@ ALIGN_TEXT
*
* Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
*/
-ALIGN_TEXT
+ ALIGN_TEXT
10320016:
leaq 16(%rdi),%rdi
leaq 16(%rsi),%rsi
@@ -146,29 +152,29 @@ ALIGN_TEXT
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jmp 80f
-ALIGN_TEXT
+ ALIGN_TEXT
10081608:
10163224:
leaq -8(%rdi,%rdx),%rdi
leaq -8(%rsi,%rdx),%rsi
jmp 80f
-ALIGN_TEXT
+ ALIGN_TEXT
10163216:
leaq -16(%rdi,%rdx),%rdi
leaq -16(%rsi,%rdx),%rsi
jmp 80f
-ALIGN_TEXT
+ ALIGN_TEXT
10163208:
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jmp 80f
-ALIGN_TEXT
+ ALIGN_TEXT
10040804:
leaq -4(%rdi,%rdx),%rdi
leaq -4(%rsi,%rdx),%rsi
jmp 1f
-ALIGN_TEXT
+ ALIGN_TEXT
80:
movl (%rdi),%r8d
movl (%rsi),%r9d
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index 93d2d17150cc..1a08315c2c46 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -117,9 +117,26 @@ ENTRY(memcmp)
cmpq $16,%rdx
ja 101632f
-100816:
cmpb $8,%dl
- jl 100408f
+ jg 100816f
+
+ cmpb $4,%dl
+ jg 100408f
+
+ cmpb $2,%dl
+ jge 100204f
+
+ cmpb $1,%dl
+ jl 100000f
+ movzbl (%rdi),%eax
+ movzbl (%rsi),%r8d
+ subl %r8d,%eax
+100000:
+ POP_FRAME_POINTER
+ ret
+
+ ALIGN_TEXT
+100816:
movq (%rdi),%r8
movq (%rsi),%r9
cmpq %r8,%r9
@@ -130,9 +147,8 @@ ENTRY(memcmp)
jne 10081608f
POP_FRAME_POINTER
ret
+ ALIGN_TEXT
100408:
- cmpb $4,%dl
- jl 100204f
movl (%rdi),%r8d
movl (%rsi),%r9d
cmpl %r8d,%r9d
@@ -143,9 +159,8 @@ ENTRY(memcmp)
jne 10040804f
POP_FRAME_POINTER
ret
+ ALIGN_TEXT
100204:
- cmpb $2,%dl
- jl 100001f
movzwl (%rdi),%r8d
movzwl (%rsi),%r9d
cmpl %r8d,%r9d
@@ -156,16 +171,7 @@ ENTRY(memcmp)
jne 1f
POP_FRAME_POINTER
ret
-100001:
- cmpb $1,%dl
- jl 100000f
- movzbl (%rdi),%eax
- movzbl (%rsi),%r8d
- subl %r8d,%eax
-100000:
- POP_FRAME_POINTER
- ret
-ALIGN_TEXT
+ ALIGN_TEXT
101632:
cmpq $32,%rdx
ja 103200f
@@ -187,7 +193,7 @@ ALIGN_TEXT
jne 10163224f
POP_FRAME_POINTER
ret
-ALIGN_TEXT
+ ALIGN_TEXT
103200:
movq (%rdi),%r8
movq 8(%rdi),%r9
@@ -218,7 +224,7 @@ ALIGN_TEXT
*
* Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
*/
-ALIGN_TEXT
+ ALIGN_TEXT
10320016:
leaq 16(%rdi),%rdi
leaq 16(%rsi),%rsi
@@ -230,29 +236,29 @@ ALIGN_TEXT
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jmp 80f
-ALIGN_TEXT
+ ALIGN_TEXT
10081608:
10163224:
leaq -8(%rdi,%rdx),%rdi
leaq -8(%rsi,%rdx),%rsi
jmp 80f
-ALIGN_TEXT
+ ALIGN_TEXT
10163216:
leaq -16(%rdi,%rdx),%rdi
leaq -16(%rsi,%rdx),%rsi
jmp 80f
-ALIGN_TEXT
+ ALIGN_TEXT
10163208:
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jmp 80f
-ALIGN_TEXT
+ ALIGN_TEXT
10040804:
leaq -4(%rdi,%rdx),%rdi
leaq -4(%rsi,%rdx),%rsi
jmp 1f
-ALIGN_TEXT
+ ALIGN_TEXT
80:
movl (%rdi),%r8d
movl (%rsi),%r9d
More information about the dev-commits-src-all
mailing list