svn commit: r334537 - head/sys/amd64/amd64
Mateusz Guzik
mjg at FreeBSD.org
Sat Jun 2 20:14:45 UTC 2018
Author: mjg
Date: Sat Jun 2 20:14:43 2018
New Revision: 334537
URL: https://svnweb.freebsd.org/changeset/base/334537
Log:
amd64: add a mild depessimization to rep mov/stos users
Currently all the primitives are waiting for a rewrite, tidy them up in the
meantime.
Vast majority of cases pass sizes which are multiple of 8. Which means the
following rep stosb/movb has nothing to do. Turns out testing first if there
is anything to do is a big win across the board (cpus with and without ERMS,
Intel and AMD) while not pessimizing the case where there is work to do.
Sample results for zeroing 64 bytes (ops/second):
Ryzen Threadripper 1950X 91433212 -> 147265741
Intel(R) Xeon(R) CPU X5675 @ 3.07GHz 90714044 -> 121992888
bzero and bcopy are on their way out and were not modified. Nothing in the
tree uses them.
Modified:
head/sys/amd64/amd64/support.S
Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S Sat Jun 2 20:11:28 2018 (r334536)
+++ head/sys/amd64/amd64/support.S Sat Jun 2 20:14:43 2018 (r334537)
@@ -205,6 +205,11 @@ ENTRY(memmove)
movsq
movq %rdx,%rcx
andq $7,%rcx /* any bytes left? */
+ jne 2f
+ movq %r9,%rax
+ POP_FRAME_POINTER
+ ret
+2:
rep
movsb
movq %r9,%rax
@@ -248,6 +253,10 @@ ENTRY(memcpy)
movsq
movq %rdx,%rcx
andq $7,%rcx /* any bytes left? */
+ jne 1f
+ POP_FRAME_POINTER
+ ret
+1:
rep
movsb
POP_FRAME_POINTER
@@ -269,6 +278,11 @@ ENTRY(memset)
stosq
movq %rdx,%rcx
andq $7,%rcx
+ jne 1f
+ movq %r9,%rax
+ POP_FRAME_POINTER
+ ret
+1:
rep
stosb
movq %r9,%rax
@@ -358,6 +372,7 @@ ENTRY(copyout)
movsq
movb %dl,%cl
andb $7,%cl
+ je done_copyout
rep
movsb
@@ -406,6 +421,7 @@ ENTRY(copyin)
movsq
movb %al,%cl
andb $7,%cl /* copy remaining bytes */
+ je done_copyin
rep
movsb
More information about the svn-src-head
mailing list