svn commit: r290066 - in projects/openssl-1.0.2/secure/lib/libcrypto: . amd64
Jung-uk Kim
jkim at FreeBSD.org
Tue Oct 27 21:17:39 UTC 2015
Author: jkim
Date: Tue Oct 27 21:17:37 2015
New Revision: 290066
URL: https://svnweb.freebsd.org/changeset/base/290066
Log:
Regen assembly files for amd64.
Added:
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S (contents, props changed)
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S (contents, props changed)
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S (contents, props changed)
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S (contents, props changed)
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/rsaz-avx2.S (contents, props changed)
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/rsaz-x86_64.S (contents, props changed)
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S (contents, props changed)
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S (contents, props changed)
Deleted:
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/modexp512-x86_64.S
Modified:
projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.asm
projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.inc
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aes-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/bsaes-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/cmll-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/ghash-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/md5-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/rc4-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/sha1-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/sha256-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/sha512-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/vpaes-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/wp-x86_64.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/x86_64-gf2m.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/x86_64-mont.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/x86_64-mont5.S
projects/openssl-1.0.2/secure/lib/libcrypto/amd64/x86_64cpuid.S
Modified: projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.asm
==============================================================================
--- projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.asm Tue Oct 27 21:16:29 2015 (r290065)
+++ projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.asm Tue Oct 27 21:17:37 2015 (r290066)
@@ -12,34 +12,39 @@
${LCRYPTO_SRC}/crypto/aes/asm \
${LCRYPTO_SRC}/crypto/bn/asm \
${LCRYPTO_SRC}/crypto/camellia/asm \
+ ${LCRYPTO_SRC}/crypto/ec/asm \
${LCRYPTO_SRC}/crypto/md5/asm \
${LCRYPTO_SRC}/crypto/modes/asm \
${LCRYPTO_SRC}/crypto/rc4/asm \
- ${LCRYPTO_SRC}/crypto/rc5/asm \
${LCRYPTO_SRC}/crypto/sha/asm \
${LCRYPTO_SRC}/crypto/whrlpool/asm
# aes
-SRCS= aes-x86_64.pl aesni-sha1-x86_64.pl aesni-x86_64.pl bsaes-x86_64.pl \
+SRCS= aes-x86_64.pl aesni-mb-x86_64.pl aesni-sha1-x86_64.pl \
+ aesni-sha256-x86_64.pl aesni-x86_64.pl bsaes-x86_64.pl \
vpaes-x86_64.pl
# bn
-SRCS+= modexp512-x86_64.pl x86_64-gf2m.pl x86_64-mont.pl x86_64-mont5.pl
+SRCS+= rsaz-avx2.pl rsaz-x86_64.pl x86_64-gf2m.pl x86_64-mont.pl \
+ x86_64-mont5.pl
# camellia
SRCS+= cmll-x86_64.pl
+# ec
+SRCS+= ecp_nistz256-x86_64.pl
+
# md5
SRCS+= md5-x86_64.pl
# modes
-SRCS+= ghash-x86_64.pl
+SRCS+= aesni-gcm-x86_64.pl ghash-x86_64.pl
# rc4
SRCS+= rc4-md5-x86_64.pl rc4-x86_64.pl
# sha
-SRCS+= sha1-x86_64.pl sha512-x86_64.pl
+SRCS+= sha1-mb-x86_64.pl sha1-x86_64.pl sha256-mb-x86_64.pl sha512-x86_64.pl
# whrlpool
SRCS+= wp-x86_64.pl
Modified: projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.inc
==============================================================================
--- projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.inc Tue Oct 27 21:16:29 2015 (r290065)
+++ projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.inc Tue Oct 27 21:17:37 2015 (r290066)
@@ -16,6 +16,7 @@ CFLAGS+= -DOPENSSL_THREADS -DDSO_DLFCN -
.if ${MACHINE_CPUARCH} == "amd64"
CFLAGS+=-DL_ENDIAN -DOPENSSL_IA32_SSE2
CFLAGS+=-DAES_ASM -DBSAES_ASM -DVPAES_ASM
+CFLAGS+=-DECP_NISTZ256_ASM
CFLAGS+=-DOPENSSL_BN_ASM_MONT -DOPENSSL_BN_ASM_MONT5 -DOPENSSL_BN_ASM_GF2m
CFLAGS+=-DMD5_ASM
CFLAGS+=-DGHASH_ASM
Modified: projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aes-x86_64.S
==============================================================================
--- projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aes-x86_64.S Tue Oct 27 21:16:29 2015 (r290065)
+++ projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aes-x86_64.S Tue Oct 27 21:17:37 2015 (r290066)
@@ -151,7 +151,7 @@ _x86_64_AES_encrypt:
xorl %r11d,%ebx
xorl %r12d,%ecx
xorl %r8d,%edx
-.byte 0xf3,0xc3
+.byte 0xf3,0xc3
.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
.type _x86_64_AES_encrypt_compact, at function
.align 16
@@ -176,80 +176,78 @@ _x86_64_AES_encrypt_compact:
movzbl %al,%r10d
movzbl %bl,%r11d
movzbl %cl,%r12d
- movzbl (%r14,%r10,1),%r10d
- movzbl (%r14,%r11,1),%r11d
- movzbl (%r14,%r12,1),%r12d
-
movzbl %dl,%r8d
movzbl %bh,%esi
movzbl %ch,%edi
+ shrl $16,%ecx
+ movzbl %dh,%ebp
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
movzbl (%r14,%r8,1),%r8d
- movzbl (%r14,%rsi,1),%r9d
- movzbl (%r14,%rdi,1),%r13d
- movzbl %dh,%ebp
+ movzbl (%r14,%rsi,1),%r9d
movzbl %ah,%esi
- shrl $16,%ecx
+ movzbl (%r14,%rdi,1),%r13d
+ movzbl %cl,%edi
movzbl (%r14,%rbp,1),%ebp
movzbl (%r14,%rsi,1),%esi
- shrl $16,%edx
- movzbl %cl,%edi
shll $8,%r9d
+ shrl $16,%edx
shll $8,%r13d
- movzbl (%r14,%rdi,1),%edi
xorl %r9d,%r10d
- xorl %r13d,%r11d
-
- movzbl %dl,%r9d
shrl $16,%eax
+ movzbl %dl,%r9d
shrl $16,%ebx
- movzbl %al,%r13d
+ xorl %r13d,%r11d
shll $8,%ebp
- shll $8,%esi
- movzbl (%r14,%r9,1),%r9d
- movzbl (%r14,%r13,1),%r13d
+ movzbl %al,%r13d
+ movzbl (%r14,%rdi,1),%edi
xorl %ebp,%r12d
- xorl %esi,%r8d
+ shll $8,%esi
movzbl %bl,%ebp
- movzbl %dh,%esi
shll $16,%edi
- movzbl (%r14,%rbp,1),%ebp
- movzbl (%r14,%rsi,1),%esi
+ xorl %esi,%r8d
+ movzbl (%r14,%r9,1),%r9d
+ movzbl %dh,%esi
+ movzbl (%r14,%r13,1),%r13d
xorl %edi,%r10d
- movzbl %ah,%edi
shrl $8,%ecx
+ movzbl %ah,%edi
+ shll $16,%r9d
shrl $8,%ebx
+ shll $16,%r13d
+ xorl %r9d,%r11d
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
movzbl (%r14,%rdi,1),%edi
movzbl (%r14,%rcx,1),%edx
movzbl (%r14,%rbx,1),%ecx
- shll $16,%r9d
- shll $16,%r13d
+
shll $16,%ebp
- xorl %r9d,%r11d
xorl %r13d,%r12d
- xorl %ebp,%r8d
-
shll $24,%esi
+ xorl %ebp,%r8d
shll $24,%edi
- shll $24,%edx
xorl %esi,%r10d
- shll $24,%ecx
+ shll $24,%edx
xorl %edi,%r11d
+ shll $24,%ecx
movl %r10d,%eax
movl %r11d,%ebx
xorl %r12d,%ecx
xorl %r8d,%edx
cmpq 16(%rsp),%r15
je .Lenc_compact_done
- movl %eax,%esi
- movl %ebx,%edi
- andl $2155905152,%esi
- andl $2155905152,%edi
- movl %esi,%r10d
- movl %edi,%r11d
+ movl $2155905152,%r10d
+ movl $2155905152,%r11d
+ andl %eax,%r10d
+ andl %ebx,%r11d
+ movl %r10d,%esi
+ movl %r11d,%edi
shrl $7,%r10d
leal (%rax,%rax,1),%r8d
shrl $7,%r11d
@@ -267,25 +265,25 @@ _x86_64_AES_encrypt_compact:
xorl %r8d,%eax
xorl %r9d,%ebx
- movl %ecx,%esi
- movl %edx,%edi
+ movl $2155905152,%r12d
roll $24,%eax
+ movl $2155905152,%ebp
roll $24,%ebx
- andl $2155905152,%esi
- andl $2155905152,%edi
+ andl %ecx,%r12d
+ andl %edx,%ebp
xorl %r8d,%eax
xorl %r9d,%ebx
- movl %esi,%r12d
- movl %edi,%ebp
+ movl %r12d,%esi
rorl $16,%r10d
+ movl %ebp,%edi
rorl $16,%r11d
- shrl $7,%r12d
leal (%rcx,%rcx,1),%r8d
+ shrl $7,%r12d
xorl %r10d,%eax
- xorl %r11d,%ebx
shrl $7,%ebp
- leal (%rdx,%rdx,1),%r9d
+ xorl %r11d,%ebx
rorl $8,%r10d
+ leal (%rdx,%rdx,1),%r9d
rorl $8,%r11d
subl %r12d,%esi
subl %ebp,%edi
@@ -301,23 +299,23 @@ _x86_64_AES_encrypt_compact:
xorl %esi,%r8d
xorl %edi,%r9d
+ rorl $16,%r12d
xorl %r8d,%ecx
+ rorl $16,%ebp
xorl %r9d,%edx
roll $24,%ecx
+ movl 0(%r14),%esi
roll $24,%edx
xorl %r8d,%ecx
- xorl %r9d,%edx
- movl 0(%r14),%esi
- rorl $16,%r12d
- rorl $16,%ebp
movl 64(%r14),%edi
- xorl %r12d,%ecx
- xorl %ebp,%edx
+ xorl %r9d,%edx
movl 128(%r14),%r8d
+ xorl %r12d,%ecx
rorl $8,%r12d
+ xorl %ebp,%edx
rorl $8,%ebp
- movl 192(%r14),%r9d
xorl %r12d,%ecx
+ movl 192(%r14),%r9d
xorl %ebp,%edx
jmp .Lenc_loop_compact
.align 16
@@ -326,7 +324,7 @@ _x86_64_AES_encrypt_compact:
xorl 4(%r15),%ebx
xorl 8(%r15),%ecx
xorl 12(%r15),%edx
-.byte 0xf3,0xc3
+.byte 0xf3,0xc3
.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
.globl AES_encrypt
.type AES_encrypt, at function
@@ -548,7 +546,7 @@ _x86_64_AES_decrypt:
xorl %r11d,%ebx
xorl %r12d,%ecx
xorl %r8d,%edx
-.byte 0xf3,0xc3
+.byte 0xf3,0xc3
.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
.type _x86_64_AES_decrypt_compact, at function
.align 16
@@ -574,70 +572,69 @@ _x86_64_AES_decrypt_compact:
movzbl %al,%r10d
movzbl %bl,%r11d
movzbl %cl,%r12d
- movzbl (%r14,%r10,1),%r10d
- movzbl (%r14,%r11,1),%r11d
- movzbl (%r14,%r12,1),%r12d
-
movzbl %dl,%r8d
movzbl %dh,%esi
movzbl %ah,%edi
+ shrl $16,%edx
+ movzbl %bh,%ebp
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
movzbl (%r14,%r8,1),%r8d
- movzbl (%r14,%rsi,1),%r9d
- movzbl (%r14,%rdi,1),%r13d
- movzbl %bh,%ebp
+ movzbl (%r14,%rsi,1),%r9d
movzbl %ch,%esi
- shrl $16,%ecx
+ movzbl (%r14,%rdi,1),%r13d
movzbl (%r14,%rbp,1),%ebp
movzbl (%r14,%rsi,1),%esi
- shrl $16,%edx
- movzbl %cl,%edi
- shll $8,%r9d
+ shrl $16,%ecx
shll $8,%r13d
- movzbl (%r14,%rdi,1),%edi
- xorl %r9d,%r10d
- xorl %r13d,%r11d
-
- movzbl %dl,%r9d
+ shll $8,%r9d
+ movzbl %cl,%edi
shrl $16,%eax
+ xorl %r9d,%r10d
shrl $16,%ebx
- movzbl %al,%r13d
+ movzbl %dl,%r9d
+
shll $8,%ebp
+ xorl %r13d,%r11d
shll $8,%esi
- movzbl (%r14,%r9,1),%r9d
- movzbl (%r14,%r13,1),%r13d
+ movzbl %al,%r13d
+ movzbl (%r14,%rdi,1),%edi
xorl %ebp,%r12d
- xorl %esi,%r8d
-
movzbl %bl,%ebp
- movzbl %bh,%esi
+
shll $16,%edi
+ xorl %esi,%r8d
+ movzbl (%r14,%r9,1),%r9d
+ movzbl %bh,%esi
movzbl (%r14,%rbp,1),%ebp
- movzbl (%r14,%rsi,1),%esi
xorl %edi,%r10d
-
+ movzbl (%r14,%r13,1),%r13d
movzbl %ch,%edi
+
+ shll $16,%ebp
shll $16,%r9d
shll $16,%r13d
- movzbl (%r14,%rdi,1),%ebx
+ xorl %ebp,%r8d
+ movzbl %dh,%ebp
xorl %r9d,%r11d
+ shrl $8,%eax
xorl %r13d,%r12d
- movzbl %dh,%edi
- shrl $8,%eax
- shll $16,%ebp
- movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%ebx
+ movzbl (%r14,%rbp,1),%ecx
movzbl (%r14,%rax,1),%edx
- xorl %ebp,%r8d
+ movl %r10d,%eax
shll $24,%esi
shll $24,%ebx
shll $24,%ecx
- xorl %esi,%r10d
+ xorl %esi,%eax
shll $24,%edx
xorl %r11d,%ebx
- movl %r10d,%eax
xorl %r12d,%ecx
xorl %r8d,%edx
cmpq 16(%rsp),%r15
@@ -650,12 +647,12 @@ _x86_64_AES_decrypt_compact:
orq %rbx,%rax
orq %rdx,%rcx
movq 256+16(%r14),%rbp
- movq %rax,%rbx
- movq %rcx,%rdx
- andq %rsi,%rbx
- andq %rsi,%rdx
- movq %rbx,%r9
- movq %rdx,%r12
+ movq %rsi,%r9
+ movq %rsi,%r12
+ andq %rax,%r9
+ andq %rcx,%r12
+ movq %r9,%rbx
+ movq %r12,%rdx
shrq $7,%r9
leaq (%rax,%rax,1),%r8
shrq $7,%r12
@@ -666,15 +663,15 @@ _x86_64_AES_decrypt_compact:
andq %rdi,%r11
andq %rbp,%rbx
andq %rbp,%rdx
- xorq %r8,%rbx
- xorq %r11,%rdx
- movq %rbx,%r8
- movq %rdx,%r11
-
- andq %rsi,%rbx
- andq %rsi,%rdx
- movq %rbx,%r10
- movq %rdx,%r13
+ xorq %rbx,%r8
+ xorq %rdx,%r11
+ movq %rsi,%r10
+ movq %rsi,%r13
+
+ andq %r8,%r10
+ andq %r11,%r13
+ movq %r10,%rbx
+ movq %r13,%rdx
shrq $7,%r10
leaq (%r8,%r8,1),%r9
shrq $7,%r13
@@ -685,15 +682,15 @@ _x86_64_AES_decrypt_compact:
andq %rdi,%r12
andq %rbp,%rbx
andq %rbp,%rdx
- xorq %r9,%rbx
- xorq %r12,%rdx
- movq %rbx,%r9
- movq %rdx,%r12
-
- andq %rsi,%rbx
- andq %rsi,%rdx
- movq %rbx,%r10
- movq %rdx,%r13
+ xorq %rbx,%r9
+ xorq %rdx,%r12
+ movq %rsi,%r10
+ movq %rsi,%r13
+
+ andq %r9,%r10
+ andq %r12,%r13
+ movq %r10,%rbx
+ movq %r13,%rdx
shrq $7,%r10
xorq %rax,%r8
shrq $7,%r13
@@ -718,51 +715,51 @@ _x86_64_AES_decrypt_compact:
movq %rax,%rbx
movq %rcx,%rdx
xorq %r10,%r9
- xorq %r13,%r12
shrq $32,%rbx
+ xorq %r13,%r12
shrq $32,%rdx
xorq %r8,%r10
- xorq %r11,%r13
roll $8,%eax
+ xorq %r11,%r13
roll $8,%ecx
xorq %r9,%r10
+ roll $8,%ebx
xorq %r12,%r13
- roll $8,%ebx
roll $8,%edx
xorl %r10d,%eax
- xorl %r13d,%ecx
shrq $32,%r10
+ xorl %r13d,%ecx
shrq $32,%r13
xorl %r10d,%ebx
xorl %r13d,%edx
movq %r8,%r10
- movq %r11,%r13
- shrq $32,%r10
- shrq $32,%r13
roll $24,%r8d
+ movq %r11,%r13
roll $24,%r11d
- roll $24,%r10d
- roll $24,%r13d
+ shrq $32,%r10
xorl %r8d,%eax
+ shrq $32,%r13
xorl %r11d,%ecx
+ roll $24,%r10d
movq %r9,%r8
+ roll $24,%r13d
movq %r12,%r11
+ shrq $32,%r8
xorl %r10d,%ebx
+ shrq $32,%r11
xorl %r13d,%edx
movq 0(%r14),%rsi
- shrq $32,%r8
- shrq $32,%r11
- movq 64(%r14),%rdi
roll $16,%r9d
+ movq 64(%r14),%rdi
roll $16,%r12d
movq 128(%r14),%rbp
roll $16,%r8d
- roll $16,%r11d
movq 192(%r14),%r10
xorl %r9d,%eax
+ roll $16,%r11d
xorl %r12d,%ecx
movq 256(%r14),%r13
xorl %r8d,%ebx
@@ -774,7 +771,7 @@ _x86_64_AES_decrypt_compact:
xorl 4(%r15),%ebx
xorl 8(%r15),%ecx
xorl 12(%r15),%edx
-.byte 0xf3,0xc3
+.byte 0xf3,0xc3
.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
.globl AES_decrypt
.type AES_decrypt, at function
@@ -860,10 +857,6 @@ private_AES_set_encrypt_key:
call _x86_64_AES_set_encrypt_key
- movq 8(%rsp),%r15
- movq 16(%rsp),%r14
- movq 24(%rsp),%r13
- movq 32(%rsp),%r12
movq 40(%rsp),%rbp
movq 48(%rsp),%rbx
addq $56,%rsp
@@ -1108,7 +1101,7 @@ _x86_64_AES_set_encrypt_key:
.Lbadpointer:
movq $-1,%rax
.Lexit:
-.byte 0xf3,0xc3
+.byte 0xf3,0xc3
.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
.globl private_AES_set_decrypt_key
.type private_AES_set_decrypt_key, at function
@@ -1161,12 +1154,12 @@ private_AES_set_decrypt_key:
leaq 16(%r15),%r15
movq 0(%r15),%rax
movq 8(%r15),%rcx
- movq %rax,%rbx
- movq %rcx,%rdx
- andq %rsi,%rbx
- andq %rsi,%rdx
- movq %rbx,%r9
- movq %rdx,%r12
+ movq %rsi,%r9
+ movq %rsi,%r12
+ andq %rax,%r9
+ andq %rcx,%r12
+ movq %r9,%rbx
+ movq %r12,%rdx
shrq $7,%r9
leaq (%rax,%rax,1),%r8
shrq $7,%r12
@@ -1177,15 +1170,15 @@ private_AES_set_decrypt_key:
andq %rdi,%r11
andq %rbp,%rbx
andq %rbp,%rdx
- xorq %r8,%rbx
- xorq %r11,%rdx
- movq %rbx,%r8
- movq %rdx,%r11
-
- andq %rsi,%rbx
- andq %rsi,%rdx
- movq %rbx,%r10
- movq %rdx,%r13
+ xorq %rbx,%r8
+ xorq %rdx,%r11
+ movq %rsi,%r10
+ movq %rsi,%r13
+
+ andq %r8,%r10
+ andq %r11,%r13
+ movq %r10,%rbx
+ movq %r13,%rdx
shrq $7,%r10
leaq (%r8,%r8,1),%r9
shrq $7,%r13
@@ -1196,15 +1189,15 @@ private_AES_set_decrypt_key:
andq %rdi,%r12
andq %rbp,%rbx
andq %rbp,%rdx
- xorq %r9,%rbx
- xorq %r12,%rdx
- movq %rbx,%r9
- movq %rdx,%r12
-
- andq %rsi,%rbx
- andq %rsi,%rdx
- movq %rbx,%r10
- movq %rdx,%r13
+ xorq %rbx,%r9
+ xorq %rdx,%r12
+ movq %rsi,%r10
+ movq %rsi,%r13
+
+ andq %r9,%r10
+ andq %r12,%r13
+ movq %r10,%rbx
+ movq %r13,%rdx
shrq $7,%r10
xorq %rax,%r8
shrq $7,%r13
@@ -1229,51 +1222,51 @@ private_AES_set_decrypt_key:
movq %rax,%rbx
movq %rcx,%rdx
xorq %r10,%r9
- xorq %r13,%r12
shrq $32,%rbx
+ xorq %r13,%r12
shrq $32,%rdx
xorq %r8,%r10
- xorq %r11,%r13
roll $8,%eax
+ xorq %r11,%r13
roll $8,%ecx
xorq %r9,%r10
+ roll $8,%ebx
xorq %r12,%r13
- roll $8,%ebx
roll $8,%edx
xorl %r10d,%eax
- xorl %r13d,%ecx
shrq $32,%r10
+ xorl %r13d,%ecx
shrq $32,%r13
xorl %r10d,%ebx
xorl %r13d,%edx
movq %r8,%r10
- movq %r11,%r13
- shrq $32,%r10
- shrq $32,%r13
roll $24,%r8d
+ movq %r11,%r13
roll $24,%r11d
- roll $24,%r10d
- roll $24,%r13d
+ shrq $32,%r10
xorl %r8d,%eax
+ shrq $32,%r13
xorl %r11d,%ecx
+ roll $24,%r10d
movq %r9,%r8
+ roll $24,%r13d
movq %r12,%r11
+ shrq $32,%r8
xorl %r10d,%ebx
+ shrq $32,%r11
xorl %r13d,%edx
- shrq $32,%r8
- shrq $32,%r11
-
roll $16,%r9d
+
roll $16,%r12d
roll $16,%r8d
- roll $16,%r11d
xorl %r9d,%eax
+ roll $16,%r11d
xorl %r12d,%ecx
xorl %r8d,%ebx
@@ -1389,7 +1382,7 @@ AES_cbc_encrypt:
leaq 80(%rsp),%rdi
leaq 80(%rsp),%r15
movl $30,%ecx
-.long 0x90A548F3
+.long 0x90A548F3
movl %eax,(%rdi)
.Lcbc_skip_ecopy:
movq %r15,0(%rsp)
@@ -1551,7 +1544,7 @@ AES_cbc_encrypt:
je .Lcbc_exit
movl $30,%ecx
xorq %rax,%rax
-.long 0x90AB48F3
+.long 0x90AB48F3
jmp .Lcbc_exit
@@ -1606,7 +1599,7 @@ AES_cbc_encrypt:
movl 4(%rbp),%ebx
movl 8(%rbp),%ecx
movl 12(%rbp),%edx
- jz .Lcbc_slow_enc_tail
+ jz .Lcbc_slow_enc_tail
.align 4
.Lcbc_slow_enc_loop:
@@ -1651,16 +1644,16 @@ AES_cbc_encrypt:
movq %r10,%rcx
movq %r8,%rsi
movq %r9,%rdi
-.long 0x9066A4F3
+.long 0x9066A4F3
movq $16,%rcx
subq %r10,%rcx
xorq %rax,%rax
-.long 0x9066AAF3
+.long 0x9066AAF3
movq %r9,%r8
movq $16,%r10
movq %r11,%rax
movq %r12,%rcx
- jmp .Lcbc_slow_enc_loop
+ jmp .Lcbc_slow_enc_loop
.align 16
.LSLOW_DECRYPT:
@@ -1736,7 +1729,7 @@ AES_cbc_encrypt:
movq %r9,%rdi
leaq 64(%rsp),%rsi
leaq 16(%r10),%rcx
-.long 0x9066A4F3
+.long 0x9066A4F3
jmp .Lcbc_exit
.align 16
Added: projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S Tue Oct 27 21:17:37 2015 (r290066)
@@ -0,0 +1,16 @@
+ # $FreeBSD$
+.text
+
+.globl aesni_gcm_encrypt
+.type aesni_gcm_encrypt, at function
+aesni_gcm_encrypt:
+ xorl %eax,%eax
+ .byte 0xf3,0xc3
+.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
+
+.globl aesni_gcm_decrypt
+.type aesni_gcm_decrypt, at function
+aesni_gcm_decrypt:
+ xorl %eax,%eax
+ .byte 0xf3,0xc3
+.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
Added: projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S Tue Oct 27 21:17:37 2015 (r290066)
@@ -0,0 +1,507 @@
+ # $FreeBSD$
+.text
+
+
+
+.globl aesni_multi_cbc_encrypt
+.type aesni_multi_cbc_encrypt, at function
+.align 32
+aesni_multi_cbc_encrypt:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+
+
+
+
+ subq $48,%rsp
+ andq $-64,%rsp
+ movq %rax,16(%rsp)
+
+.Lenc4x_body:
+ movdqu (%rsi),%xmm12
+ leaq 120(%rsi),%rsi
+ leaq 80(%rdi),%rdi
+
+.Lenc4x_loop_grande:
+ movl %edx,24(%rsp)
+ xorl %edx,%edx
+ movl -64(%rdi),%ecx
+ movq -80(%rdi),%r8
+ cmpl %edx,%ecx
+ movq -72(%rdi),%r12
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu -56(%rdi),%xmm2
+ movl %ecx,32(%rsp)
+ cmovleq %rsp,%r8
+ movl -24(%rdi),%ecx
+ movq -40(%rdi),%r9
+ cmpl %edx,%ecx
+ movq -32(%rdi),%r13
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu -16(%rdi),%xmm3
+ movl %ecx,36(%rsp)
+ cmovleq %rsp,%r9
+ movl 16(%rdi),%ecx
+ movq 0(%rdi),%r10
+ cmpl %edx,%ecx
+ movq 8(%rdi),%r14
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu 24(%rdi),%xmm4
+ movl %ecx,40(%rsp)
+ cmovleq %rsp,%r10
+ movl 56(%rdi),%ecx
+ movq 40(%rdi),%r11
+ cmpl %edx,%ecx
+ movq 48(%rdi),%r15
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu 64(%rdi),%xmm5
+ movl %ecx,44(%rsp)
+ cmovleq %rsp,%r11
+ testl %edx,%edx
+ jz .Lenc4x_done
+
+ movups 16-120(%rsi),%xmm1
+ pxor %xmm12,%xmm2
+ movups 32-120(%rsi),%xmm0
+ pxor %xmm12,%xmm3
+ movl 240-120(%rsi),%eax
+ pxor %xmm12,%xmm4
+ movdqu (%r8),%xmm6
+ pxor %xmm12,%xmm5
+ movdqu (%r9),%xmm7
+ pxor %xmm6,%xmm2
+ movdqu (%r10),%xmm8
+ pxor %xmm7,%xmm3
+ movdqu (%r11),%xmm9
+ pxor %xmm8,%xmm4
+ pxor %xmm9,%xmm5
+ movdqa 32(%rsp),%xmm10
+ xorq %rbx,%rbx
+ jmp .Loop_enc4x
+
+.align 32
+.Loop_enc4x:
+ addq $16,%rbx
+ leaq 16(%rsp),%rbp
+ movl $1,%ecx
+ subq %rbx,%rbp
+
+.byte 102,15,56,220,209
+ prefetcht0 31(%r8,%rbx,1)
+ prefetcht0 31(%r9,%rbx,1)
+.byte 102,15,56,220,217
+ prefetcht0 31(%r10,%rbx,1)
+ prefetcht0 31(%r10,%rbx,1)
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 48-120(%rsi),%xmm1
+ cmpl 32(%rsp),%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+ cmovgeq %rbp,%r8
+ cmovgq %rbp,%r12
+.byte 102,15,56,220,232
+ movups -56(%rsi),%xmm0
+ cmpl 36(%rsp),%ecx
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+ cmovgeq %rbp,%r9
+ cmovgq %rbp,%r13
+.byte 102,15,56,220,233
+ movups -40(%rsi),%xmm1
+ cmpl 40(%rsp),%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+ cmovgeq %rbp,%r10
+ cmovgq %rbp,%r14
+.byte 102,15,56,220,232
+ movups -24(%rsi),%xmm0
+ cmpl 44(%rsp),%ecx
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+ cmovgeq %rbp,%r11
+ cmovgq %rbp,%r15
+.byte 102,15,56,220,233
+ movups -8(%rsi),%xmm1
+ movdqa %xmm10,%xmm11
+.byte 102,15,56,220,208
+ prefetcht0 15(%r12,%rbx,1)
+ prefetcht0 15(%r13,%rbx,1)
+.byte 102,15,56,220,216
+ prefetcht0 15(%r14,%rbx,1)
+ prefetcht0 15(%r15,%rbx,1)
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups 128-120(%rsi),%xmm0
+ pxor %xmm12,%xmm12
+
+.byte 102,15,56,220,209
+ pcmpgtd %xmm12,%xmm11
+ movdqu -120(%rsi),%xmm12
+.byte 102,15,56,220,217
+ paddd %xmm11,%xmm10
+ movdqa %xmm10,32(%rsp)
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 144-120(%rsi),%xmm1
+
+ cmpl $11,%eax
+
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups 160-120(%rsi),%xmm0
+
+ jb .Lenc4x_tail
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 176-120(%rsi),%xmm1
+
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups 192-120(%rsi),%xmm0
+
+ je .Lenc4x_tail
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 208-120(%rsi),%xmm1
+
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups 224-120(%rsi),%xmm0
+ jmp .Lenc4x_tail
+
+.align 32
+.Lenc4x_tail:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movdqu (%r8,%rbx,1),%xmm6
+ movdqu 16-120(%rsi),%xmm1
+
+.byte 102,15,56,221,208
+ movdqu (%r9,%rbx,1),%xmm7
+ pxor %xmm12,%xmm6
+.byte 102,15,56,221,216
+ movdqu (%r10,%rbx,1),%xmm8
+ pxor %xmm12,%xmm7
+.byte 102,15,56,221,224
+ movdqu (%r11,%rbx,1),%xmm9
+ pxor %xmm12,%xmm8
+.byte 102,15,56,221,232
+ movdqu 32-120(%rsi),%xmm0
+ pxor %xmm12,%xmm9
+
+ movups %xmm2,-16(%r12,%rbx,1)
+ pxor %xmm6,%xmm2
+ movups %xmm3,-16(%r13,%rbx,1)
+ pxor %xmm7,%xmm3
+ movups %xmm4,-16(%r14,%rbx,1)
+ pxor %xmm8,%xmm4
+ movups %xmm5,-16(%r15,%rbx,1)
+ pxor %xmm9,%xmm5
+
+ decl %edx
+ jnz .Loop_enc4x
+
+ movq 16(%rsp),%rax
+ movl 24(%rsp),%edx
+
+
+
+
+
+
+
+
+
+
+ leaq 160(%rdi),%rdi
+ decl %edx
+ jnz .Lenc4x_loop_grande
+
+.Lenc4x_done:
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list