svn commit: r338842 - in projects/openssl111/secure/lib/libcrypto: . i386
Jung-uk Kim
jkim at FreeBSD.org
Thu Sep 20 21:34:07 UTC 2018
Author: jkim
Date: Thu Sep 20 21:34:05 2018
New Revision: 338842
URL: https://svnweb.freebsd.org/changeset/base/338842
Log:
Regen assembly files for i386.
Added:
projects/openssl111/secure/lib/libcrypto/i386/cast-586.S (contents, props changed)
projects/openssl111/secure/lib/libcrypto/i386/chacha-x86.S (contents, props changed)
projects/openssl111/secure/lib/libcrypto/i386/e_padlock-x86.S (contents, props changed)
projects/openssl111/secure/lib/libcrypto/i386/ecp_nistz256-x86.S (contents, props changed)
projects/openssl111/secure/lib/libcrypto/i386/poly1305-x86.S (contents, props changed)
Deleted:
projects/openssl111/secure/lib/libcrypto/i386/bf-686.S
Modified:
projects/openssl111/secure/lib/libcrypto/Makefile.asm
projects/openssl111/secure/lib/libcrypto/i386/aes-586.S
projects/openssl111/secure/lib/libcrypto/i386/aesni-x86.S
projects/openssl111/secure/lib/libcrypto/i386/bf-586.S
projects/openssl111/secure/lib/libcrypto/i386/bn-586.S
projects/openssl111/secure/lib/libcrypto/i386/cmll-x86.S
projects/openssl111/secure/lib/libcrypto/i386/co-586.S
projects/openssl111/secure/lib/libcrypto/i386/crypt586.S
projects/openssl111/secure/lib/libcrypto/i386/des-586.S
projects/openssl111/secure/lib/libcrypto/i386/ghash-x86.S
projects/openssl111/secure/lib/libcrypto/i386/md5-586.S
projects/openssl111/secure/lib/libcrypto/i386/rc4-586.S
projects/openssl111/secure/lib/libcrypto/i386/rc5-586.S
projects/openssl111/secure/lib/libcrypto/i386/rmd-586.S
projects/openssl111/secure/lib/libcrypto/i386/sha1-586.S
projects/openssl111/secure/lib/libcrypto/i386/sha256-586.S
projects/openssl111/secure/lib/libcrypto/i386/sha512-586.S
projects/openssl111/secure/lib/libcrypto/i386/vpaes-x86.S
projects/openssl111/secure/lib/libcrypto/i386/wp-mmx.S
projects/openssl111/secure/lib/libcrypto/i386/x86-gf2m.S
projects/openssl111/secure/lib/libcrypto/i386/x86-mont.S
projects/openssl111/secure/lib/libcrypto/i386/x86cpuid.S
Modified: projects/openssl111/secure/lib/libcrypto/Makefile.asm
==============================================================================
--- projects/openssl111/secure/lib/libcrypto/Makefile.asm Thu Sep 20 20:32:08 2018 (r338841)
+++ projects/openssl111/secure/lib/libcrypto/Makefile.asm Thu Sep 20 21:34:05 2018 (r338842)
@@ -59,6 +59,9 @@ sha256-armv8.S: sha512-armv8.pl
${LCRYPTO_SRC}/crypto/whrlpool/asm \
${LCRYPTO_SRC}/engines/asm
+# cpuid
+SRCS+= x86_64cpuid.pl
+
# aes
SRCS= aes-x86_64.pl aesni-mb-x86_64.pl aesni-sha1-x86_64.pl \
aesni-sha256-x86_64.pl aesni-x86_64.pl bsaes-x86_64.pl \
@@ -77,9 +80,6 @@ SRCS+= chacha-x86_64.pl
# ec
SRCS+= ecp_nistz256-x86_64.pl x25519-x86_64.pl
-# engines
-SRCS+= e_padlock-x86_64.pl
-
# md5
SRCS+= md5-x86_64.pl
@@ -99,8 +99,8 @@ SRCS+= keccak1600-x86_64.pl sha1-mb-x86_64.pl sha1-x86
# whrlpool
SRCS+= wp-x86_64.pl
-# cpuid
-SRCS+= x86_64cpuid.pl
+# engines
+SRCS+= e_padlock-x86_64.pl
SHA_ASM= sha256-x86_64 sha512-x86_64
SHA_SRC= sha512-x86_64.pl
@@ -175,22 +175,30 @@ aes-armv4.S: aes-armv4.pl
${LCRYPTO_SRC}/crypto/bf/asm \
${LCRYPTO_SRC}/crypto/bn/asm \
${LCRYPTO_SRC}/crypto/camellia/asm \
+ ${LCRYPTO_SRC}/crypto/cast/asm \
+ ${LCRYPTO_SRC}/crypto/chacha/asm \
${LCRYPTO_SRC}/crypto/des/asm \
+ ${LCRYPTO_SRC}/crypto/ec/asm \
${LCRYPTO_SRC}/crypto/md5/asm \
${LCRYPTO_SRC}/crypto/modes/asm \
+ ${LCRYPTO_SRC}/crypto/poly1305/asm \
${LCRYPTO_SRC}/crypto/rc4/asm \
${LCRYPTO_SRC}/crypto/rc5/asm \
${LCRYPTO_SRC}/crypto/ripemd/asm \
${LCRYPTO_SRC}/crypto/sha/asm \
- ${LCRYPTO_SRC}/crypto/whrlpool/asm
+ ${LCRYPTO_SRC}/crypto/whrlpool/asm \
+ ${LCRYPTO_SRC}/engines/asm
-PERLPATH= -I${LCRYPTO_SRC}/crypto/des/asm -I${LCRYPTO_SRC}/crypto/perlasm
+#PERLPATH= -I${LCRYPTO_SRC}/crypto/des/asm -I${LCRYPTO_SRC}/crypto/perlasm
+# cpuid
+SRCS= x86cpuid.pl
+
# aes
-SRCS= aes-586.pl aesni-x86.pl vpaes-x86.pl
+SRCS+= aes-586.pl aesni-x86.pl vpaes-x86.pl
# blowfish
-SRCS+= bf-586.pl bf-686.pl
+SRCS+= bf-586.pl
# bn
SRCS+= bn-586.pl co-586.pl x86-gf2m.pl x86-mont.pl
@@ -198,15 +206,27 @@ SRCS+= bn-586.pl co-586.pl x86-gf2m.pl x86-mont.pl
# camellia
SRCS+= cmll-x86.pl
+# cast
+SRCS+= cast-586.pl
+
+# chacha
+SRCS+= chacha-x86.pl
+
# des
SRCS+= crypt586.pl des-586.pl
+# ec
+SRCS+= ecp_nistz256-x86.pl
+
# md5
SRCS+= md5-586.pl
# modes
SRCS+= ghash-x86.pl
+# poly1305
+SRCS+= poly1305-x86.pl
+
# rc4
SRCS+= rc4-586.pl
@@ -222,25 +242,26 @@ SRCS+= sha1-586.pl sha256-586.pl sha512-586.pl
# whrlpool
SRCS+= wp-mmx.pl
-# cpuid
-SRCS+= x86cpuid.pl
+# engines
+SRCS+= e_padlock-x86.pl
ASM= ${SRCS:R:S/$/.S/}
all: ${ASM}
-CLEANFILES= ${ASM}
+CLEANFILES= ${ASM} ${SRCS:R:S/$/.s/}
.SUFFIXES: .pl
.pl.S:
( echo '/* $$'FreeBSD'$$ */' ;\
echo '/* Do not modify. This file is auto-generated from ${.IMPSRC:T}. */' ;\
echo '#ifdef PIC' ;\
- env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} -fpic -DPIC ;\
+ env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} -fpic -DPIC ${.IMPSRC:R:S/$/.s/} ;\
+ cat ${.IMPSRC:R:S/$/.s/} ;\
echo '#else' ;\
- env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} ;\
- echo '#endif') |\
- sed -E 's|(\.file[[:blank:]]+)".*"|\1"${.TARGET}"|' > ${.TARGET}
+ env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} ${.IMPSRC:R:S/$/.s/} ;\
+ cat ${.IMPSRC:R:S/$/.s/} ;\
+ echo '#endif' ) > ${.TARGET}
.endif
.include <bsd.prog.mk>
Modified: projects/openssl111/secure/lib/libcrypto/i386/aes-586.S
==============================================================================
--- projects/openssl111/secure/lib/libcrypto/i386/aes-586.S Thu Sep 20 20:32:08 2018 (r338841)
+++ projects/openssl111/secure/lib/libcrypto/i386/aes-586.S Thu Sep 20 21:34:05 2018 (r338842)
@@ -1,7 +1,6 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from aes-586.pl. */
#ifdef PIC
-.file "aes-586.S"
.text
.type _x86_AES_encrypt_compact, at function
.align 16
@@ -2999,19 +2998,19 @@ _x86_AES_set_encrypt_key:
popl %ebp
ret
.size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key
-.globl private_AES_set_encrypt_key
-.type private_AES_set_encrypt_key, at function
+.globl AES_set_encrypt_key
+.type AES_set_encrypt_key, at function
.align 16
-private_AES_set_encrypt_key:
-.L_private_AES_set_encrypt_key_begin:
+AES_set_encrypt_key:
+.L_AES_set_encrypt_key_begin:
call _x86_AES_set_encrypt_key
ret
-.size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin
-.globl private_AES_set_decrypt_key
-.type private_AES_set_decrypt_key, at function
+.size AES_set_encrypt_key,.-.L_AES_set_encrypt_key_begin
+.globl AES_set_decrypt_key
+.type AES_set_decrypt_key, at function
.align 16
-private_AES_set_decrypt_key:
-.L_private_AES_set_decrypt_key_begin:
+AES_set_decrypt_key:
+.L_AES_set_decrypt_key_begin:
call _x86_AES_set_encrypt_key
cmpl $0,%eax
je .L054proceed
@@ -3240,13 +3239,12 @@ private_AES_set_decrypt_key:
popl %ebx
popl %ebp
ret
-.size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin
+.size AES_set_decrypt_key,.-.L_AES_set_decrypt_key_begin
.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
#else
-.file "aes-586.S"
.text
.type _x86_AES_encrypt_compact, at function
.align 16
@@ -6244,19 +6242,19 @@ _x86_AES_set_encrypt_key:
popl %ebp
ret
.size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key
-.globl private_AES_set_encrypt_key
-.type private_AES_set_encrypt_key, at function
+.globl AES_set_encrypt_key
+.type AES_set_encrypt_key, at function
.align 16
-private_AES_set_encrypt_key:
-.L_private_AES_set_encrypt_key_begin:
+AES_set_encrypt_key:
+.L_AES_set_encrypt_key_begin:
call _x86_AES_set_encrypt_key
ret
-.size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin
-.globl private_AES_set_decrypt_key
-.type private_AES_set_decrypt_key, at function
+.size AES_set_encrypt_key,.-.L_AES_set_encrypt_key_begin
+.globl AES_set_decrypt_key
+.type AES_set_decrypt_key, at function
.align 16
-private_AES_set_decrypt_key:
-.L_private_AES_set_decrypt_key_begin:
+AES_set_decrypt_key:
+.L_AES_set_decrypt_key_begin:
call _x86_AES_set_encrypt_key
cmpl $0,%eax
je .L054proceed
@@ -6485,7 +6483,7 @@ private_AES_set_decrypt_key:
popl %ebx
popl %ebp
ret
-.size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin
+.size AES_set_decrypt_key,.-.L_AES_set_decrypt_key_begin
.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
Modified: projects/openssl111/secure/lib/libcrypto/i386/aesni-x86.S
==============================================================================
--- projects/openssl111/secure/lib/libcrypto/i386/aesni-x86.S Thu Sep 20 20:32:08 2018 (r338841)
+++ projects/openssl111/secure/lib/libcrypto/i386/aesni-x86.S Thu Sep 20 21:34:05 2018 (r338842)
@@ -1,7 +1,6 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from aesni-x86.pl. */
#ifdef PIC
-.file "aesni-x86.S"
.text
.globl aesni_encrypt
.type aesni_encrypt, at function
@@ -1793,6 +1792,796 @@ aesni_xts_decrypt:
popl %ebp
ret
.size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
+.globl aesni_ocb_encrypt
+.type aesni_ocb_encrypt, at function
+.align 16
+aesni_ocb_encrypt:
+.L_aesni_ocb_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movdqu (%ecx),%xmm0
+ movl 36(%esp),%ebp
+ movdqu (%ebx),%xmm1
+ movl 44(%esp),%ebx
+ movl %esp,%ecx
+ subl $132,%esp
+ andl $-16,%esp
+ subl %esi,%edi
+ shll $4,%eax
+ leal -96(%esi,%eax,1),%eax
+ movl %edi,120(%esp)
+ movl %eax,124(%esp)
+ movl %ecx,128(%esp)
+ movl 240(%edx),%ecx
+ testl $1,%ebp
+ jnz .L074odd
+ bsfl %ebp,%eax
+ addl $1,%ebp
+ shll $4,%eax
+ movdqu (%ebx,%eax,1),%xmm7
+ movl %edx,%eax
+ movdqu (%esi),%xmm2
+ leal 16(%esi),%esi
+ pxor %xmm0,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L075enc1_loop_15:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L075enc1_loop_15
+.byte 102,15,56,221,209
+ xorps %xmm7,%xmm2
+ movdqa %xmm7,%xmm0
+ movdqa %xmm6,%xmm1
+ movups %xmm2,-16(%edi,%esi,1)
+ movl 240(%eax),%ecx
+ movl %eax,%edx
+ movl 124(%esp),%eax
+.L074odd:
+ shll $4,%ecx
+ movl $16,%edi
+ subl %ecx,%edi
+ movl %edx,112(%esp)
+ leal 32(%edx,%ecx,1),%edx
+ movl %edi,116(%esp)
+ cmpl %eax,%esi
+ ja .L076short
+ jmp .L077grandloop
+.align 32
+.L077grandloop:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ leal 5(%ebp),%edi
+ addl $6,%ebp
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ bsfl %edi,%edi
+ shll $4,%ecx
+ shll $4,%eax
+ shll $4,%edi
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ movdqu (%ebx,%edi,1),%xmm7
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movdqa %xmm7,80(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm5,%xmm1
+ pxor %xmm0,%xmm5
+ pxor %xmm6,%xmm1
+ pxor %xmm0,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm0,%xmm7
+ movdqa %xmm1,96(%esp)
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor 80(%esp),%xmm7
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ movl 120(%esp),%edi
+ movl 124(%esp),%eax
+ call .L_aesni_encrypt6_enter
+ movdqa 80(%esp),%xmm0
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor %xmm0,%xmm7
+ movdqa 96(%esp),%xmm1
+ movdqu %xmm2,-96(%edi,%esi,1)
+ movdqu %xmm3,-80(%edi,%esi,1)
+ movdqu %xmm4,-64(%edi,%esi,1)
+ movdqu %xmm5,-48(%edi,%esi,1)
+ movdqu %xmm6,-32(%edi,%esi,1)
+ movdqu %xmm7,-16(%edi,%esi,1)
+ cmpl %eax,%esi
+ jb .L077grandloop
+.L076short:
+ addl $96,%eax
+ subl %esi,%eax
+ jz .L078done
+ cmpl $32,%eax
+ jb .L079one
+ je .L080two
+ cmpl $64,%eax
+ jb .L081three
+ je .L082four
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ pxor %xmm7,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm5,%xmm1
+ pxor %xmm0,%xmm5
+ pxor %xmm6,%xmm1
+ pxor %xmm0,%xmm6
+ movdqa %xmm1,96(%esp)
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ movl 120(%esp),%edi
+ call .L_aesni_encrypt6_enter
+ movdqa 64(%esp),%xmm0
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor %xmm0,%xmm6
+ movdqa 96(%esp),%xmm1
+ movdqu %xmm2,(%edi,%esi,1)
+ movdqu %xmm3,16(%edi,%esi,1)
+ movdqu %xmm4,32(%edi,%esi,1)
+ movdqu %xmm5,48(%edi,%esi,1)
+ movdqu %xmm6,64(%edi,%esi,1)
+ jmp .L078done
+.align 16
+.L079one:
+ movdqu (%ebx),%xmm7
+ movl 112(%esp),%edx
+ movdqu (%esi),%xmm2
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movl 120(%esp),%edi
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L083enc1_loop_16:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L083enc1_loop_16
+.byte 102,15,56,221,209
+ xorps %xmm7,%xmm2
+ movdqa %xmm7,%xmm0
+ movdqa %xmm6,%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ jmp .L078done
+.align 16
+.L080two:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm6
+ movdqu (%ebx,%ecx,1),%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm7,%xmm3
+ movdqa %xmm1,%xmm5
+ movl 120(%esp),%edi
+ call _aesni_encrypt2
+ xorps %xmm6,%xmm2
+ xorps %xmm7,%xmm3
+ movdqa %xmm7,%xmm0
+ movdqa %xmm5,%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ movups %xmm3,16(%edi,%esi,1)
+ jmp .L078done
+.align 16
+.L081three:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm5
+ movdqu (%ebx,%ecx,1),%xmm6
+ movdqa %xmm5,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm5,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm6,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm7,%xmm4
+ movdqa %xmm1,96(%esp)
+ movl 120(%esp),%edi
+ call _aesni_encrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movdqa %xmm7,%xmm0
+ movdqa 96(%esp),%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ movups %xmm3,16(%edi,%esi,1)
+ movups %xmm4,32(%edi,%esi,1)
+ jmp .L078done
+.align 16
+.L082four:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ movl 112(%esp),%edx
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm4
+ movdqu (%ebx,%ecx,1),%xmm5
+ movdqa %xmm4,%xmm6
+ movdqu (%ebx,%eax,1),%xmm7
+ pxor %xmm0,%xmm4
+ movdqu (%esi),%xmm2
+ pxor %xmm4,%xmm5
+ movdqu 16(%esi),%xmm3
+ pxor %xmm5,%xmm6
+ movdqa %xmm4,(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm5,16(%esp)
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movl 240(%edx),%ecx
+ pxor %xmm2,%xmm1
+ pxor (%esp),%xmm2
+ pxor %xmm3,%xmm1
+ pxor 16(%esp),%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm1
+ pxor %xmm7,%xmm5
+ movdqa %xmm1,96(%esp)
+ movl 120(%esp),%edi
+ call _aesni_encrypt4
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm6,%xmm4
+ movups %xmm2,(%edi,%esi,1)
+ xorps %xmm7,%xmm5
+ movups %xmm3,16(%edi,%esi,1)
+ movdqa %xmm7,%xmm0
+ movups %xmm4,32(%edi,%esi,1)
+ movdqa 96(%esp),%xmm1
+ movups %xmm5,48(%edi,%esi,1)
+.L078done:
+ movl 128(%esp),%edx
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm2,16(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm2,32(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm2,48(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm2,64(%esp)
+ movdqa %xmm2,80(%esp)
+ movdqa %xmm2,96(%esp)
+ leal (%edx),%esp
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movdqu %xmm0,(%ecx)
+ pxor %xmm0,%xmm0
+ movdqu %xmm1,(%ebx)
+ pxor %xmm1,%xmm1
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size aesni_ocb_encrypt,.-.L_aesni_ocb_encrypt_begin
+.globl aesni_ocb_decrypt
+.type aesni_ocb_decrypt, at function
+.align 16
+aesni_ocb_decrypt:
+.L_aesni_ocb_decrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movdqu (%ecx),%xmm0
+ movl 36(%esp),%ebp
+ movdqu (%ebx),%xmm1
+ movl 44(%esp),%ebx
+ movl %esp,%ecx
+ subl $132,%esp
+ andl $-16,%esp
+ subl %esi,%edi
+ shll $4,%eax
+ leal -96(%esi,%eax,1),%eax
+ movl %edi,120(%esp)
+ movl %eax,124(%esp)
+ movl %ecx,128(%esp)
+ movl 240(%edx),%ecx
+ testl $1,%ebp
+ jnz .L084odd
+ bsfl %ebp,%eax
+ addl $1,%ebp
+ shll $4,%eax
+ movdqu (%ebx,%eax,1),%xmm7
+ movl %edx,%eax
+ movdqu (%esi),%xmm2
+ leal 16(%esi),%esi
+ pxor %xmm0,%xmm7
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L085dec1_loop_17:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L085dec1_loop_17
+.byte 102,15,56,223,209
+ xorps %xmm7,%xmm2
+ movaps %xmm6,%xmm1
+ movdqa %xmm7,%xmm0
+ xorps %xmm2,%xmm1
+ movups %xmm2,-16(%edi,%esi,1)
+ movl 240(%eax),%ecx
+ movl %eax,%edx
+ movl 124(%esp),%eax
+.L084odd:
+ shll $4,%ecx
+ movl $16,%edi
+ subl %ecx,%edi
+ movl %edx,112(%esp)
+ leal 32(%edx,%ecx,1),%edx
+ movl %edi,116(%esp)
+ cmpl %eax,%esi
+ ja .L086short
+ jmp .L087grandloop
+.align 32
+.L087grandloop:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ leal 5(%ebp),%edi
+ addl $6,%ebp
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ bsfl %edi,%edi
+ shll $4,%ecx
+ shll $4,%eax
+ shll $4,%edi
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ movdqu (%ebx,%edi,1),%xmm7
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movdqa %xmm7,80(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ movdqa %xmm1,96(%esp)
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
+ pxor %xmm0,%xmm7
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor 80(%esp),%xmm7
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+ movl 120(%esp),%edi
+ movl 124(%esp),%eax
+ call .L_aesni_decrypt6_enter
+ movdqa 80(%esp),%xmm0
+ pxor (%esp),%xmm2
+ movdqa 96(%esp),%xmm1
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor %xmm0,%xmm7
+ pxor %xmm2,%xmm1
+ movdqu %xmm2,-96(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movdqu %xmm3,-80(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ movdqu %xmm4,-64(%edi,%esi,1)
+ pxor %xmm5,%xmm1
+ movdqu %xmm5,-48(%edi,%esi,1)
+ pxor %xmm6,%xmm1
+ movdqu %xmm6,-32(%edi,%esi,1)
+ pxor %xmm7,%xmm1
+ movdqu %xmm7,-16(%edi,%esi,1)
+ cmpl %eax,%esi
+ jb .L087grandloop
+.L086short:
+ addl $96,%eax
+ subl %esi,%eax
+ jz .L088done
+ cmpl $32,%eax
+ jb .L089one
+ je .L090two
+ cmpl $64,%eax
+ jb .L091three
+ je .L092four
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ pxor %xmm7,%xmm7
+ movdqa %xmm1,96(%esp)
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+ movl 120(%esp),%edi
+ call .L_aesni_decrypt6_enter
+ movdqa 64(%esp),%xmm0
+ pxor (%esp),%xmm2
+ movdqa 96(%esp),%xmm1
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor %xmm0,%xmm6
+ pxor %xmm2,%xmm1
+ movdqu %xmm2,(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movdqu %xmm3,16(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ movdqu %xmm4,32(%edi,%esi,1)
+ pxor %xmm5,%xmm1
+ movdqu %xmm5,48(%edi,%esi,1)
+ pxor %xmm6,%xmm1
+ movdqu %xmm6,64(%edi,%esi,1)
+ jmp .L088done
+.align 16
+.L089one:
+ movdqu (%ebx),%xmm7
+ movl 112(%esp),%edx
+ movdqu (%esi),%xmm2
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm7
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movl 120(%esp),%edi
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L093dec1_loop_18:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L093dec1_loop_18
+.byte 102,15,56,223,209
+ xorps %xmm7,%xmm2
+ movaps %xmm6,%xmm1
+ movdqa %xmm7,%xmm0
+ xorps %xmm2,%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ jmp .L088done
+.align 16
+.L090two:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm6
+ movdqu (%ebx,%ecx,1),%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movl 240(%edx),%ecx
+ movdqa %xmm1,%xmm5
+ pxor %xmm0,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm6,%xmm2
+ pxor %xmm7,%xmm3
+ movl 120(%esp),%edi
+ call _aesni_decrypt2
+ xorps %xmm6,%xmm2
+ xorps %xmm7,%xmm3
+ movdqa %xmm7,%xmm0
+ xorps %xmm2,%xmm5
+ movups %xmm2,(%edi,%esi,1)
+ xorps %xmm3,%xmm5
+ movups %xmm3,16(%edi,%esi,1)
+ movaps %xmm5,%xmm1
+ jmp .L088done
+.align 16
+.L091three:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm5
+ movdqu (%ebx,%ecx,1),%xmm6
+ movdqa %xmm5,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movl 240(%edx),%ecx
+ movdqa %xmm1,96(%esp)
+ pxor %xmm0,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm5,%xmm2
+ pxor %xmm6,%xmm3
+ pxor %xmm7,%xmm4
+ movl 120(%esp),%edi
+ call _aesni_decrypt3
+ movdqa 96(%esp),%xmm1
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi,%esi,1)
+ pxor %xmm2,%xmm1
+ movdqa %xmm7,%xmm0
+ movups %xmm3,16(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movups %xmm4,32(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ jmp .L088done
+.align 16
+.L092four:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ movl 112(%esp),%edx
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm4
+ movdqu (%ebx,%ecx,1),%xmm5
+ movdqa %xmm4,%xmm6
+ movdqu (%ebx,%eax,1),%xmm7
+ pxor %xmm0,%xmm4
+ movdqu (%esi),%xmm2
+ pxor %xmm4,%xmm5
+ movdqu 16(%esi),%xmm3
+ pxor %xmm5,%xmm6
+ movdqa %xmm4,(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm5,16(%esp)
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movl 240(%edx),%ecx
+ movdqa %xmm1,96(%esp)
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm7,%xmm5
+ movl 120(%esp),%edi
+ call _aesni_decrypt4
+ movdqa 96(%esp),%xmm1
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm6,%xmm4
+ movups %xmm2,(%edi,%esi,1)
+ pxor %xmm2,%xmm1
+ xorps %xmm7,%xmm5
+ movups %xmm3,16(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movdqa %xmm7,%xmm0
+ movups %xmm4,32(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ movups %xmm5,48(%edi,%esi,1)
+ pxor %xmm5,%xmm1
+.L088done:
+ movl 128(%esp),%edx
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list