svn commit: r341351 - in head: lib/libc/amd64/string sys/amd64/amd64
Mateusz Guzik
mjg at FreeBSD.org
Fri Nov 30 20:58:10 UTC 2018
Author: mjg
Date: Fri Nov 30 20:58:08 2018
New Revision: 341351
URL: https://svnweb.freebsd.org/changeset/base/341351
Log:
amd64: handle small memmove buffers with overlapping stores
Handling sizes of > 32 backwards will be updated later.
Reviewed by: kib (kernel part)
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D18387
Modified:
head/lib/libc/amd64/string/memmove.S
head/sys/amd64/amd64/support.S
Modified: head/lib/libc/amd64/string/memmove.S
==============================================================================
--- head/lib/libc/amd64/string/memmove.S Fri Nov 30 19:59:28 2018 (r341350)
+++ head/lib/libc/amd64/string/memmove.S Fri Nov 30 20:58:08 2018 (r341351)
@@ -42,11 +42,19 @@ __FBSDID("$FreeBSD$");
* rsi - source
* rdx - count
*
- * The macro possibly clobbers the above and: rcx, r8.
- * It does not clobber rax, r10 nor r11.
+ * The macro possibly clobbers the above and: rcx, r8, r9, 10
+ * It does not clobber rax nor r11.
*/
.macro MEMMOVE erms overlap begin end
\begin
+
+ /*
+ * For sizes 0..32 all data is read before it is written, so there
+ * is no correctness issue with direction of copying.
+ */
+ cmpq $32,%rcx
+ jbe 101632f
+
.if \overlap == 1
movq %rdi,%r8
subq %rsi,%r8
@@ -54,13 +62,10 @@ __FBSDID("$FreeBSD$");
jb 2f
.endif
- cmpq $32,%rcx
- jb 1016f
-
cmpq $256,%rcx
ja 1256f
-1032:
+103200:
movq (%rsi),%rdx
movq %rdx,(%rdi)
movq 8(%rsi),%rdx
@@ -73,56 +78,62 @@ __FBSDID("$FreeBSD$");
leaq 32(%rdi),%rdi
subq $32,%rcx
cmpq $32,%rcx
- jae 1032b
+ jae 103200b
cmpb $0,%cl
- jne 1016f
+ jne 101632f
\end
ret
ALIGN_TEXT
-1016:
+101632:
cmpb $16,%cl
- jl 1008f
+ jl 100816f
movq (%rsi),%rdx
+ movq 8(%rsi),%r8
+ movq -16(%rsi,%rcx),%r9
+ movq -8(%rsi,%rcx),%r10
movq %rdx,(%rdi)
- movq 8(%rsi),%rdx
- movq %rdx,8(%rdi)
- subb $16,%cl
- jz 1000f
- leaq 16(%rsi),%rsi
- leaq 16(%rdi),%rdi
-1008:
+ movq %r8,8(%rdi)
+ movq %r9,-16(%rdi,%rcx)
+ movq %r10,-8(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100816:
cmpb $8,%cl
- jl 1004f
+ jl 100408f
movq (%rsi),%rdx
+ movq -8(%rsi,%rcx),%r8
movq %rdx,(%rdi)
- subb $8,%cl
- jz 1000f
- leaq 8(%rsi),%rsi
- leaq 8(%rdi),%rdi
-1004:
+ movq %r8,-8(%rdi,%rcx,)
+ \end
+ ret
+ ALIGN_TEXT
+100408:
cmpb $4,%cl
- jl 1002f
+ jl 100204f
movl (%rsi),%edx
+ movl -4(%rsi,%rcx),%r8d
movl %edx,(%rdi)
- subb $4,%cl
- jz 1000f
- leaq 4(%rsi),%rsi
- leaq 4(%rdi),%rdi
-1002:
+ movl %r8d,-4(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100204:
cmpb $2,%cl
- jl 1001f
- movw (%rsi),%dx
+ jl 100001f
+ movzwl (%rsi),%edx
+ movzwl -2(%rsi,%rcx),%r8d
movw %dx,(%rdi)
- subb $2,%cl
- jz 1000f
- leaq 2(%rsi),%rsi
- leaq 2(%rdi),%rdi
-1001:
+ movw %r8w,-2(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100001:
cmpb $1,%cl
- jl 1000f
+ jl 100000f
movb (%rsi),%dl
movb %dl,(%rdi)
-1000:
+100000:
\end
ret
@@ -136,8 +147,8 @@ __FBSDID("$FreeBSD$");
rep
movsq
movq %rdx,%rcx
- andb $7,%cl /* any bytes left? */
- jne 1004b
+ andl $7,%ecx /* any bytes left? */
+ jne 100408b
.endif
\end
ret
@@ -246,6 +257,7 @@ __FBSDID("$FreeBSD$");
ret
.endif
.endm
+
.macro MEMMOVE_BEGIN
movq %rdi,%rax
Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S Fri Nov 30 19:59:28 2018 (r341350)
+++ head/sys/amd64/amd64/support.S Fri Nov 30 20:58:08 2018 (r341351)
@@ -205,11 +205,19 @@ END(memcmp)
* rsi - source
* rdx - count
*
- * The macro possibly clobbers the above and: rcx, r8.
- * It does not clobber rax, r10 nor r11.
+ * The macro possibly clobbers the above and: rcx, r8, r9, 10
+ * It does not clobber rax nor r11.
*/
.macro MEMMOVE erms overlap begin end
\begin
+
+ /*
+ * For sizes 0..32 all data is read before it is written, so there
+ * is no correctness issue with direction of copying.
+ */
+ cmpq $32,%rcx
+ jbe 101632f
+
.if \overlap == 1
movq %rdi,%r8
subq %rsi,%r8
@@ -217,13 +225,10 @@ END(memcmp)
jb 2f
.endif
- cmpq $32,%rcx
- jb 1016f
-
cmpq $256,%rcx
ja 1256f
-1032:
+103200:
movq (%rsi),%rdx
movq %rdx,(%rdi)
movq 8(%rsi),%rdx
@@ -236,56 +241,62 @@ END(memcmp)
leaq 32(%rdi),%rdi
subq $32,%rcx
cmpq $32,%rcx
- jae 1032b
+ jae 103200b
cmpb $0,%cl
- jne 1016f
+ jne 101632f
\end
ret
ALIGN_TEXT
-1016:
+101632:
cmpb $16,%cl
- jl 1008f
+ jl 100816f
movq (%rsi),%rdx
+ movq 8(%rsi),%r8
+ movq -16(%rsi,%rcx),%r9
+ movq -8(%rsi,%rcx),%r10
movq %rdx,(%rdi)
- movq 8(%rsi),%rdx
- movq %rdx,8(%rdi)
- subb $16,%cl
- jz 1000f
- leaq 16(%rsi),%rsi
- leaq 16(%rdi),%rdi
-1008:
+ movq %r8,8(%rdi)
+ movq %r9,-16(%rdi,%rcx)
+ movq %r10,-8(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100816:
cmpb $8,%cl
- jl 1004f
+ jl 100408f
movq (%rsi),%rdx
+ movq -8(%rsi,%rcx),%r8
movq %rdx,(%rdi)
- subb $8,%cl
- jz 1000f
- leaq 8(%rsi),%rsi
- leaq 8(%rdi),%rdi
-1004:
+ movq %r8,-8(%rdi,%rcx,)
+ \end
+ ret
+ ALIGN_TEXT
+100408:
cmpb $4,%cl
- jl 1002f
+ jl 100204f
movl (%rsi),%edx
+ movl -4(%rsi,%rcx),%r8d
movl %edx,(%rdi)
- subb $4,%cl
- jz 1000f
- leaq 4(%rsi),%rsi
- leaq 4(%rdi),%rdi
-1002:
+ movl %r8d,-4(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100204:
cmpb $2,%cl
- jl 1001f
- movw (%rsi),%dx
+ jl 100001f
+ movzwl (%rsi),%edx
+ movzwl -2(%rsi,%rcx),%r8d
movw %dx,(%rdi)
- subb $2,%cl
- jz 1000f
- leaq 2(%rsi),%rsi
- leaq 2(%rdi),%rdi
-1001:
+ movw %r8w,-2(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100001:
cmpb $1,%cl
- jl 1000f
+ jl 100000f
movb (%rsi),%dl
movb %dl,(%rdi)
-1000:
+100000:
\end
ret
@@ -299,8 +310,8 @@ END(memcmp)
rep
movsq
movq %rdx,%rcx
- andb $7,%cl /* any bytes left? */
- jne 1004b
+ andl $7,%ecx /* any bytes left? */
+ jne 100408b
.endif
\end
ret
More information about the svn-src-all
mailing list