svn commit: r340688 - in releng/12.0: . lib/libc/amd64/string sys/amd64/amd64
Mateusz Guzik
mjg at FreeBSD.org
Tue Nov 20 18:14:35 UTC 2018
Author: mjg
Date: Tue Nov 20 18:14:30 2018
New Revision: 340688
URL: https://svnweb.freebsd.org/changeset/base/340688
Log:
MFC r339531,r339579,r340252,r340463,r340464,340472,r340587
amd64: tidy up memset to have rax set earlier for small sizes
amd64: finish the tail in memset with an overlapping store
amd64: align memset buffers to 16 bytes before using rep stos
amd64: convert libc bzero to a C func to avoid future bloat
amd64: sync up libc memset with the kernel version
amd64: handle small memset buffers with overlapping stores
Fix -DNO_CLEAN amd64 build after r340463
Approved by: re (gjb)
Added:
releng/12.0/lib/libc/amd64/string/bzero.c
- copied unchanged from r340684, stable/12/lib/libc/amd64/string/bzero.c
Deleted:
releng/12.0/lib/libc/amd64/string/bzero.S
Modified:
releng/12.0/Makefile.inc1
releng/12.0/lib/libc/amd64/string/Makefile.inc
releng/12.0/lib/libc/amd64/string/memset.S
releng/12.0/sys/amd64/amd64/support.S
Directory Properties:
releng/12.0/ (props changed)
Modified: releng/12.0/Makefile.inc1
==============================================================================
--- releng/12.0/Makefile.inc1 Tue Nov 20 18:13:18 2018 (r340687)
+++ releng/12.0/Makefile.inc1 Tue Nov 20 18:14:30 2018 (r340688)
@@ -948,6 +948,13 @@ _cleanobj_fast_depend_hack: .PHONY
${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.*}; \
fi
.endfor
+# 20181115 r340463 bzero reimplemented as .c
+ @if [ -e "${OBJTOP}/lib/libc/.depend.bzero.o" ] && \
+ egrep -qw 'bzero\.[sS]' ${OBJTOP}/lib/libc/.depend.bzero.o; then \
+ echo "Removing stale dependencies for bzero"; \
+ rm -f ${OBJTOP}/lib/libc/.depend.bzero.* \
+ ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.bzero.*}; \
+ fi
# 20181009 track migration from ntp's embedded libevent to updated one
@if [ -e "${OBJTOP}/usr.sbin/ntp/libntpevent/.depend.bufferevent_openssl.o" ] && \
egrep -q 'contrib/ntp/sntp/libevent/bufferevent_openssl.c' \
Modified: releng/12.0/lib/libc/amd64/string/Makefile.inc
==============================================================================
--- releng/12.0/lib/libc/amd64/string/Makefile.inc Tue Nov 20 18:13:18 2018 (r340687)
+++ releng/12.0/lib/libc/amd64/string/Makefile.inc Tue Nov 20 18:14:30 2018 (r340688)
@@ -2,7 +2,6 @@
MDSRCS+= \
bcmp.S \
- bzero.S \
memcmp.S \
memcpy.S \
memmove.S \
Copied: releng/12.0/lib/libc/amd64/string/bzero.c (from r340684, stable/12/lib/libc/amd64/string/bzero.c)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ releng/12.0/lib/libc/amd64/string/bzero.c Tue Nov 20 18:14:30 2018 (r340688, copy of r340684, stable/12/lib/libc/amd64/string/bzero.c)
@@ -0,0 +1,15 @@
+/*-
+ * Public domain.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <string.h>
+
+void
+bzero(void *b, size_t len)
+{
+
+ memset(b, 0, len);
+}
Modified: releng/12.0/lib/libc/amd64/string/memset.S
==============================================================================
--- releng/12.0/lib/libc/amd64/string/memset.S Tue Nov 20 18:13:18 2018 (r340687)
+++ releng/12.0/lib/libc/amd64/string/memset.S Tue Nov 20 18:14:30 2018 (r340688)
@@ -31,101 +31,112 @@
#include <machine/asm.h>
__FBSDID("$FreeBSD$");
-.macro MEMSET bzero erms
-.if \bzero == 1
- movq %rsi,%rcx
- movq %rsi,%rdx
- xorl %eax,%eax
-.else
- movq %rdi,%r9
+#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
+.macro MEMSET erms
+ movq %rdi,%rax
movq %rdx,%rcx
movzbq %sil,%r8
- movabs $0x0101010101010101,%rax
- imulq %r8,%rax
-.endif
+ movabs $0x0101010101010101,%r10
+ imulq %r8,%r10
cmpq $32,%rcx
- jb 1016f
+ jbe 101632f
cmpq $256,%rcx
ja 1256f
-1032:
- movq %rax,(%rdi)
- movq %rax,8(%rdi)
- movq %rax,16(%rdi)
- movq %rax,24(%rdi)
+103200:
+ movq %r10,(%rdi)
+ movq %r10,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r10,24(%rdi)
leaq 32(%rdi),%rdi
subq $32,%rcx
cmpq $32,%rcx
- jae 1032b
- cmpb $0,%cl
- je 1000f
-1016:
+ ja 103200b
cmpb $16,%cl
- jl 1008f
- movq %rax,(%rdi)
- movq %rax,8(%rdi)
- subb $16,%cl
- jz 1000f
- leaq 16(%rdi),%rdi
-1008:
+ ja 201632f
+ movq %r10,-16(%rdi,%rcx)
+ movq %r10,-8(%rdi,%rcx)
+ ret
+ ALIGN_TEXT
+101632:
+ cmpb $16,%cl
+ jl 100816f
+201632:
+ movq %r10,(%rdi)
+ movq %r10,8(%rdi)
+ movq %r10,-16(%rdi,%rcx)
+ movq %r10,-8(%rdi,%rcx)
+ ret
+ ALIGN_TEXT
+100816:
cmpb $8,%cl
- jl 1004f
- movq %rax,(%rdi)
- subb $8,%cl
- jz 1000f
- leaq 8(%rdi),%rdi
-1004:
+ jl 100408f
+ movq %r10,(%rdi)
+ movq %r10,-8(%rdi,%rcx)
+ ret
+ ALIGN_TEXT
+100408:
cmpb $4,%cl
- jl 1002f
- movl %eax,(%rdi)
- subb $4,%cl
- jz 1000f
- leaq 4(%rdi),%rdi
-1002:
+ jl 100204f
+ movl %r10d,(%rdi)
+ movl %r10d,-4(%rdi,%rcx)
+ ret
+ ALIGN_TEXT
+100204:
cmpb $2,%cl
- jl 1001f
- movw %ax,(%rdi)
- subb $2,%cl
- jz 1000f
- leaq 2(%rdi),%rdi
-1001:
- cmpb $1,%cl
- jl 1000f
- movb %al,(%rdi)
-1000:
-.if \bzero == 0
- movq %r9,%rax
-.endif
+ jl 100001f
+ movw %r10w,(%rdi)
+ movw %r10w,-2(%rdi,%rcx)
ret
-
+ ALIGN_TEXT
+100001:
+ cmpb $0,%cl
+ je 100000f
+ movb %r10b,(%rdi)
+100000:
+ ret
+ ALIGN_TEXT
1256:
+ movq %rdi,%r9
+ movq %r10,%rax
+ testl $15,%edi
+ jnz 3f
+1:
.if \erms == 1
rep
stosb
+ movq %r9,%rax
.else
+ movq %rcx,%rdx
shrq $3,%rcx
rep
stosq
- movq %rdx,%rcx
- andb $7,%cl
- jne 1004b
-.endif
-.if \bzero == 0
movq %r9,%rax
+ andl $7,%edx
+ jnz 2f
+ ret
+2:
+ movq %r10,-8(%rdi,%rdx)
.endif
ret
+ ALIGN_TEXT
+3:
+ movq %r10,(%rdi)
+ movq %r10,8(%rdi)
+ movq %rdi,%r8
+ andq $15,%r8
+ leaq -16(%rcx,%r8),%rcx
+ neg %r8
+ leaq 16(%rdi,%r8),%rdi
+ jmp 1b
.endm
-#ifndef BZERO
+
ENTRY(memset)
- MEMSET bzero=0 erms=0
+ MEMSET erms=0
END(memset)
-#else
-ENTRY(bzero)
- MEMSET bzero=1 erms=0
-END(bzero)
-#endif
.section .note.GNU-stack,"",%progbits
Modified: releng/12.0/sys/amd64/amd64/support.S
==============================================================================
--- releng/12.0/sys/amd64/amd64/support.S Tue Nov 20 18:13:18 2018 (r340687)
+++ releng/12.0/sys/amd64/amd64/support.S Tue Nov 20 18:14:30 2018 (r340688)
@@ -452,82 +452,112 @@ END(memcpy_erms)
*/
.macro MEMSET erms
PUSH_FRAME_POINTER
- movq %rdi,%r9
+ movq %rdi,%rax
movq %rdx,%rcx
movzbq %sil,%r8
- movabs $0x0101010101010101,%rax
- imulq %r8,%rax
+ movabs $0x0101010101010101,%r10
+ imulq %r8,%r10
cmpq $32,%rcx
- jb 1016f
+ jbe 101632f
cmpq $256,%rcx
ja 1256f
-1032:
- movq %rax,(%rdi)
- movq %rax,8(%rdi)
- movq %rax,16(%rdi)
- movq %rax,24(%rdi)
+103200:
+ movq %r10,(%rdi)
+ movq %r10,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r10,24(%rdi)
leaq 32(%rdi),%rdi
subq $32,%rcx
cmpq $32,%rcx
- jae 1032b
- cmpb $0,%cl
- je 1000f
-1016:
+ ja 103200b
cmpb $16,%cl
- jl 1008f
- movq %rax,(%rdi)
- movq %rax,8(%rdi)
- subb $16,%cl
- jz 1000f
- leaq 16(%rdi),%rdi
-1008:
+ ja 201632f
+ movq %r10,-16(%rdi,%rcx)
+ movq %r10,-8(%rdi,%rcx)
+ POP_FRAME_POINTER
+ ret
+ ALIGN_TEXT
+101632:
+ cmpb $16,%cl
+ jl 100816f
+201632:
+ movq %r10,(%rdi)
+ movq %r10,8(%rdi)
+ movq %r10,-16(%rdi,%rcx)
+ movq %r10,-8(%rdi,%rcx)
+ POP_FRAME_POINTER
+ ret
+ ALIGN_TEXT
+100816:
cmpb $8,%cl
- jl 1004f
- movq %rax,(%rdi)
- subb $8,%cl
- jz 1000f
- leaq 8(%rdi),%rdi
-1004:
+ jl 100408f
+ movq %r10,(%rdi)
+ movq %r10,-8(%rdi,%rcx)
+ POP_FRAME_POINTER
+ ret
+ ALIGN_TEXT
+100408:
cmpb $4,%cl
- jl 1002f
- movl %eax,(%rdi)
- subb $4,%cl
- jz 1000f
- leaq 4(%rdi),%rdi
-1002:
+ jl 100204f
+ movl %r10d,(%rdi)
+ movl %r10d,-4(%rdi,%rcx)
+ POP_FRAME_POINTER
+ ret
+ ALIGN_TEXT
+100204:
cmpb $2,%cl
- jl 1001f
- movw %ax,(%rdi)
- subb $2,%cl
- jz 1000f
- leaq 2(%rdi),%rdi
-1001:
- cmpb $1,%cl
- jl 1000f
- movb %al,(%rdi)
-1000:
- movq %r9,%rax
+ jl 100001f
+ movw %r10w,(%rdi)
+ movw %r10w,-2(%rdi,%rcx)
POP_FRAME_POINTER
ret
ALIGN_TEXT
+100001:
+ cmpb $0,%cl
+ je 100000f
+ movb %r10b,(%rdi)
+100000:
+ POP_FRAME_POINTER
+ ret
+ ALIGN_TEXT
1256:
+ movq %rdi,%r9
+ movq %r10,%rax
+ testl $15,%edi
+ jnz 3f
+1:
.if \erms == 1
rep
stosb
+ movq %r9,%rax
.else
+ movq %rcx,%rdx
shrq $3,%rcx
rep
stosq
- movq %rdx,%rcx
- andb $7,%cl
- jne 1004b
-.endif
movq %r9,%rax
+ andl $7,%edx
+ jnz 2f
POP_FRAME_POINTER
ret
+2:
+ movq %r10,-8(%rdi,%rdx)
+.endif
+ POP_FRAME_POINTER
+ ret
+ ALIGN_TEXT
+3:
+ movq %r10,(%rdi)
+ movq %r10,8(%rdi)
+ movq %rdi,%r8
+ andq $15,%r8
+ leaq -16(%rcx,%r8),%rcx
+ neg %r8
+ leaq 16(%rdi,%r8),%rdi
+ jmp 1b
.endm
ENTRY(memset_std)
More information about the svn-src-releng
mailing list