svn commit: r338968 - head/sys/amd64/amd64
Mateusz Guzik
mjg at FreeBSD.org
Thu Sep 27 15:27:54 UTC 2018
Author: mjg
Date: Thu Sep 27 15:27:53 2018
New Revision: 338968
URL: https://svnweb.freebsd.org/changeset/base/338968
Log:
amd64: mostly depessimize copystr
- remove a forward branch in the common case
- replace xchg + lodsb/stosb loop with simple movs
A simple test on Intel(R) Core(TM) i7-4600U CPU @ 2.10GH copying
/foo/bar/baz in a loop goes from 295715863 ops/s to 465807408.
Further changes are pending.
Reviewed by: kib
Approved by: re (gjb)
Differential Revision: https://reviews.freebsd.org/D17281
Modified:
head/sys/amd64/amd64/support.S
Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S Thu Sep 27 15:24:16 2018 (r338967)
+++ head/sys/amd64/amd64/support.S Thu Sep 27 15:27:53 2018 (r338968)
@@ -1122,34 +1122,33 @@ ENTRY(copystr)
PUSH_FRAME_POINTER
movq %rdx,%r8 /* %r8 = maxlen */
- xchgq %rdi,%rsi
- incq %rdx
+ incq %rdx
1:
decq %rdx
jz 4f
- lodsb
- stosb
- orb %al,%al
+ movb (%rdi),%al
+ movb %al,(%rsi)
+ incq %rsi
+ incq %rdi
+ testb %al,%al
jnz 1b
/* Success -- 0 byte reached */
decq %rdx
xorl %eax,%eax
- jmp 6f
-4:
- /* rdx is zero -- return ENAMETOOLONG */
- movq $ENAMETOOLONG,%rax
-
-6:
-
+2:
testq %rcx,%rcx
- jz 7f
+ jz 3f
/* set *lencopied and return %rax */
subq %rdx,%r8
movq %r8,(%rcx)
-7:
+3:
POP_FRAME_POINTER
ret
+4:
+ /* rdx is zero -- return ENAMETOOLONG */
+ movl $ENAMETOOLONG,%eax
+ jmp 2b
END(copystr)
/*
More information about the svn-src-all
mailing list