svn commit: r338877 - in projects/openssl111/secure/lib/libcrypto: . arm
Jung-uk Kim
jkim at FreeBSD.org
Sat Sep 22 02:42:53 UTC 2018
Author: jkim
Date: Sat Sep 22 02:42:51 2018
New Revision: 338877
URL: https://svnweb.freebsd.org/changeset/base/338877
Log:
Regen assembly files for arm.
Added:
projects/openssl111/secure/lib/libcrypto/arm/chacha-armv4.S (contents, props changed)
projects/openssl111/secure/lib/libcrypto/arm/ecp_nistz256-armv4.S (contents, props changed)
projects/openssl111/secure/lib/libcrypto/arm/keccak1600-armv4.S (contents, props changed)
projects/openssl111/secure/lib/libcrypto/arm/poly1305-armv4.S (contents, props changed)
Modified:
projects/openssl111/secure/lib/libcrypto/Makefile.asm
projects/openssl111/secure/lib/libcrypto/arm/aes-armv4.S
projects/openssl111/secure/lib/libcrypto/arm/aesv8-armx.S
projects/openssl111/secure/lib/libcrypto/arm/armv4-gf2m.S
projects/openssl111/secure/lib/libcrypto/arm/armv4-mont.S
projects/openssl111/secure/lib/libcrypto/arm/bsaes-armv7.S
projects/openssl111/secure/lib/libcrypto/arm/ghash-armv4.S
projects/openssl111/secure/lib/libcrypto/arm/ghashv8-armx.S
projects/openssl111/secure/lib/libcrypto/arm/sha1-armv4-large.S
projects/openssl111/secure/lib/libcrypto/arm/sha256-armv4.S
projects/openssl111/secure/lib/libcrypto/arm/sha512-armv4.S
Modified: projects/openssl111/secure/lib/libcrypto/Makefile.asm
==============================================================================
--- projects/openssl111/secure/lib/libcrypto/Makefile.asm Sat Sep 22 02:23:42 2018 (r338876)
+++ projects/openssl111/secure/lib/libcrypto/Makefile.asm Sat Sep 22 02:42:51 2018 (r338877)
@@ -149,22 +149,34 @@ ${s}.S: ${s}.s
.PATH: ${LCRYPTO_SRC}/crypto \
${LCRYPTO_SRC}/crypto/aes/asm \
${LCRYPTO_SRC}/crypto/bn/asm \
+ ${LCRYPTO_SRC}/crypto/chacha/asm \
+ ${LCRYPTO_SRC}/crypto/ec/asm \
${LCRYPTO_SRC}/crypto/modes/asm \
+ ${LCRYPTO_SRC}/crypto/poly1305/asm \
${LCRYPTO_SRC}/crypto/sha/asm
PERLPATH= -I${LCRYPTO_SRC}/crypto/perlasm
# aes
-SRCS= aesv8-armx.pl bsaes-armv7.pl
+SRCS= aes-armv4.pl aesv8-armx.pl bsaes-armv7.pl
# bn
SRCS+= armv4-mont.pl armv4-gf2m.pl
+# chacha
+SRCS+= chacha-armv4.pl
+
+# ec
+SRCS+= ecp_nistz256-armv4.pl
+
# modes
SRCS+= ghash-armv4.pl ghashv8-armx.pl
+# poly1305
+SRCS+= poly1305-armv4.pl
+
# sha
-SRCS+= sha1-armv4-large.pl sha256-armv4.pl sha512-armv4.pl
+SRCS+= keccak1600-armv4.pl sha1-armv4-large.pl sha256-armv4.pl sha512-armv4.pl
ASM= aes-armv4.S ${SRCS:R:S/$/.S/}
Modified: projects/openssl111/secure/lib/libcrypto/arm/aes-armv4.S
==============================================================================
--- projects/openssl111/secure/lib/libcrypto/arm/aes-armv4.S Sat Sep 22 02:23:42 2018 (r338876)
+++ projects/openssl111/secure/lib/libcrypto/arm/aes-armv4.S Sat Sep 22 02:42:51 2018 (r338877)
@@ -1,6 +1,13 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from aes-armv4.pl. */
+@ Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved.
+@
+@ Licensed under the OpenSSL license (the "License"). You may not use
+@ this file except in compliance with the License. You can obtain a copy
+@ in the file LICENSE in the source distribution or at
+@ https://www.openssl.org/source/license.html
+
@ ====================================================================
@ Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@@ -40,15 +47,12 @@
#endif
.text
-#if __ARM_ARCH__<7
-.code 32
-#else
+#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
-# ifdef __thumb2__
.thumb
-# else
+#else
.code 32
-# endif
+#undef __thumb2__
#endif
.type AES_Te,%object
@@ -159,19 +163,23 @@ AES_Te:
@ void AES_encrypt(const unsigned char *in, unsigned char *out,
@ const AES_KEY *key) {
-.global AES_encrypt
-.type AES_encrypt,%function
+.globl AES_encrypt
+.type AES_encrypt,%function
.align 5
AES_encrypt:
-#if __ARM_ARCH__<7
+#ifndef __thumb2__
sub r3,pc,#8 @ AES_encrypt
#else
adr r3,.
#endif
- stmdb sp!,{r1,r4-r12,lr}
+ stmdb sp!,{r1,r4-r12,lr}
+#if defined(__thumb2__) || defined(__APPLE__)
+ adr r10,AES_Te
+#else
+ sub r10,r3,#AES_encrypt-AES_Te @ Te
+#endif
mov r12,r0 @ inp
mov r11,r2
- sub r10,r3,#AES_encrypt-AES_Te @ Te
#if __ARM_ARCH__<7
ldrb r0,[r12,#3] @ load input data in endian-neutral
ldrb r4,[r12,#2] @ manner...
@@ -258,20 +266,20 @@ AES_encrypt:
strb r3,[r12,#15]
#endif
#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r12,pc}
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
- ldmia sp!,{r4-r12,lr}
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size AES_encrypt,.-AES_encrypt
-.type _armv4_AES_encrypt,%function
+.type _armv4_AES_encrypt,%function
.align 2
_armv4_AES_encrypt:
str lr,[sp,#-4]! @ push lr
- ldmia r11!,{r4-r7}
+ ldmia r11!,{r4,r5,r6,r7}
eor r0,r0,r4
ldr r12,[r11,#240-16]
eor r1,r1,r5
@@ -404,24 +412,24 @@ _armv4_AES_encrypt:
ldr pc,[sp],#4 @ pop and return
.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
-.global private_AES_set_encrypt_key
-.type private_AES_set_encrypt_key,%function
+.globl AES_set_encrypt_key
+.type AES_set_encrypt_key,%function
.align 5
-private_AES_set_encrypt_key:
+AES_set_encrypt_key:
_armv4_AES_set_encrypt_key:
-#if __ARM_ARCH__<7
+#ifndef __thumb2__
sub r3,pc,#8 @ AES_set_encrypt_key
#else
adr r3,.
#endif
teq r0,#0
-#if __ARM_ARCH__>=7
+#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
moveq r0,#-1
beq .Labrt
teq r2,#0
-#if __ARM_ARCH__>=7
+#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
moveq r0,#-1
@@ -432,19 +440,23 @@ _armv4_AES_set_encrypt_key:
teq r1,#192
beq .Lok
teq r1,#256
-#if __ARM_ARCH__>=7
+#ifdef __thumb2__
itt ne @ Thumb2 thing, sanity check in ARM
#endif
movne r0,#-1
bne .Labrt
-.Lok: stmdb sp!,{r4-r12,lr}
- sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
-
+.Lok: stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
mov r12,r0 @ inp
mov lr,r1 @ bits
mov r11,r2 @ key
+#if defined(__thumb2__) || defined(__APPLE__)
+ adr r10,AES_Te+1024 @ Te4
+#else
+ sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
+#endif
+
#if __ARM_ARCH__<7
ldrb r0,[r12,#3] @ load input data in endian-neutral
ldrb r4,[r12,#2] @ manner...
@@ -589,7 +601,7 @@ _armv4_AES_set_encrypt_key:
str r2,[r11,#-16]
subs r12,r12,#1
str r3,[r11,#-12]
-#if __ARM_ARCH__>=7
+#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
subeq r2,r11,#216
@@ -661,7 +673,7 @@ _armv4_AES_set_encrypt_key:
str r2,[r11,#-24]
subs r12,r12,#1
str r3,[r11,#-20]
-#if __ARM_ARCH__>=7
+#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
subeq r2,r11,#256
@@ -695,21 +707,21 @@ _armv4_AES_set_encrypt_key:
.align 2
.Ldone: mov r0,#0
- ldmia sp!,{r4-r12,lr}
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
.Labrt:
#if __ARM_ARCH__>=5
bx lr @ .word 0xe12fff1e
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
-.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
+.size AES_set_encrypt_key,.-AES_set_encrypt_key
-.global private_AES_set_decrypt_key
-.type private_AES_set_decrypt_key,%function
+.globl AES_set_decrypt_key
+.type AES_set_decrypt_key,%function
.align 5
-private_AES_set_decrypt_key:
+AES_set_decrypt_key:
str lr,[sp,#-4]! @ push lr
bl _armv4_AES_set_encrypt_key
teq r0,#0
@@ -719,20 +731,20 @@ private_AES_set_decrypt_key:
mov r0,r2 @ AES_set_encrypt_key preserves r2,
mov r1,r2 @ which is AES_KEY *key
b _armv4_AES_set_enc2dec_key
-.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
+.size AES_set_decrypt_key,.-AES_set_decrypt_key
@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
-.global AES_set_enc2dec_key
+.globl AES_set_enc2dec_key
.type AES_set_enc2dec_key,%function
.align 5
AES_set_enc2dec_key:
_armv4_AES_set_enc2dec_key:
- stmdb sp!,{r4-r12,lr}
+ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
ldr r12,[r0,#240]
mov r7,r0 @ input
add r8,r0,r12,lsl#4
- mov r11,r1 @ ouput
+ mov r11,r1 @ output
add r10,r1,r12,lsl#4
str r12,[r1,#240]
@@ -809,12 +821,12 @@ _armv4_AES_set_enc2dec_key:
mov r0,#0
#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r12,pc}
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
- ldmia sp!,{r4-r12,lr}
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size AES_set_enc2dec_key,.-AES_set_enc2dec_key
@@ -922,19 +934,23 @@ AES_Td:
@ void AES_decrypt(const unsigned char *in, unsigned char *out,
@ const AES_KEY *key) {
-.global AES_decrypt
-.type AES_decrypt,%function
+.globl AES_decrypt
+.type AES_decrypt,%function
.align 5
AES_decrypt:
-#if __ARM_ARCH__<7
+#ifndef __thumb2__
sub r3,pc,#8 @ AES_decrypt
#else
adr r3,.
#endif
- stmdb sp!,{r1,r4-r12,lr}
+ stmdb sp!,{r1,r4-r12,lr}
+#if defined(__thumb2__) || defined(__APPLE__)
+ adr r10,AES_Td
+#else
+ sub r10,r3,#AES_decrypt-AES_Td @ Td
+#endif
mov r12,r0 @ inp
mov r11,r2
- sub r10,r3,#AES_decrypt-AES_Td @ Td
#if __ARM_ARCH__<7
ldrb r0,[r12,#3] @ load input data in endian-neutral
ldrb r4,[r12,#2] @ manner...
@@ -1021,20 +1037,20 @@ AES_decrypt:
strb r3,[r12,#15]
#endif
#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r12,pc}
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
- ldmia sp!,{r4-r12,lr}
+ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size AES_decrypt,.-AES_decrypt
-.type _armv4_AES_decrypt,%function
+.type _armv4_AES_decrypt,%function
.align 2
_armv4_AES_decrypt:
str lr,[sp,#-4]! @ push lr
- ldmia r11!,{r4-r7}
+ ldmia r11!,{r4,r5,r6,r7}
eor r0,r0,r4
ldr r12,[r11,#240-16]
eor r1,r1,r5
@@ -1175,5 +1191,6 @@ _armv4_AES_decrypt:
sub r10,r10,#1024
ldr pc,[sp],#4 @ pop and return
.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
-.asciz "AES for ARMv4, CRYPTOGAMS by <appro at openssl.org>"
+.byte 65,69,83,32,102,111,114,32,65,82,77,118,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
.align 2
Modified: projects/openssl111/secure/lib/libcrypto/arm/aesv8-armx.S
==============================================================================
--- projects/openssl111/secure/lib/libcrypto/arm/aesv8-armx.S Sat Sep 22 02:23:42 2018 (r338876)
+++ projects/openssl111/secure/lib/libcrypto/arm/aesv8-armx.S Sat Sep 22 02:42:51 2018 (r338877)
@@ -4,11 +4,12 @@
#if __ARM_MAX_ARCH__>=7
.text
-.arch armv7-a
+.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
.fpu neon
.code 32
+#undef __thumb2__
.align 5
-rcon:
+.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
@@ -31,7 +32,7 @@ aes_v8_set_encrypt_key:
tst r1,#0x3f
bne .Lenc_key_abort
- adr r3,rcon
+ adr r3,.Lrcon
cmp r1,#192
veor q0,q0,q0
@@ -49,14 +50,14 @@ aes_v8_set_encrypt_key:
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
- .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
- veor q10,q10,q1
+ veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
@@ -68,13 +69,13 @@ aes_v8_set_encrypt_key:
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
- .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
- veor q10,q10,q1
+ veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
@@ -83,13 +84,13 @@ aes_v8_set_encrypt_key:
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
- .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
- veor q10,q10,q1
+ veor q10,q10,q1
veor q3,q3,q9
veor q3,q3,q10
vst1.32 {q3},[r2]
@@ -110,7 +111,7 @@ aes_v8_set_encrypt_key:
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {d16},[r2]!
- .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
@@ -121,7 +122,7 @@ aes_v8_set_encrypt_key:
vdup.32 q9,d7[1]
veor q9,q9,q8
- veor q10,q10,q1
+ veor q10,q10,q1
vext.8 q8,q0,q8,#12
vshl.u8 q1,q1,#1
veor q8,q8,q9
@@ -146,14 +147,14 @@ aes_v8_set_encrypt_key:
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {q8},[r2]!
- .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
- veor q10,q10,q1
+ veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
@@ -162,7 +163,7 @@ aes_v8_set_encrypt_key:
vdup.32 q10,d7[1]
vext.8 q9,q0,q8,#12
- .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
+.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q8,q8,q9
vext.8 q9,q0,q9,#12
@@ -179,7 +180,7 @@ aes_v8_set_encrypt_key:
.Lenc_key_abort:
mov r0,r3 @ return value
-
+
bx lr
.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
@@ -205,15 +206,15 @@ aes_v8_set_decrypt_key:
.Loop_imc:
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
- .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
cmp r0,r2
bhi .Loop_imc
vld1.32 {q0},[r2]
- .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
vst1.32 {q0},[r0]
eor r0,r0,r0 @ return value
@@ -231,19 +232,19 @@ aes_v8_encrypt:
vld1.32 {q1},[r2]!
.Loop_enc:
- .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
- .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
+.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
+.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
- .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
- .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
+.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
+.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q1},[r2]!
bgt .Loop_enc
- .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
- .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
+.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
+.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]
- .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
+.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
@@ -260,19 +261,19 @@ aes_v8_decrypt:
vld1.32 {q1},[r2]!
.Loop_dec:
- .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
- .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
+.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
+.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
- .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
- .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
+.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
+.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q1},[r2]!
bgt .Loop_dec
- .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
- .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
+.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
+.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]
- .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
+.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
@@ -283,9 +284,9 @@ aes_v8_decrypt:
.align 5
aes_v8_cbc_encrypt:
mov ip,sp
- stmdb sp!,{r4-r8,lr}
- vstmdb sp!,{d8-d15} @ ABI specification says so
- ldmia ip,{r4-r5} @ load remaining args
+ stmdb sp!,{r4,r5,r6,r7,r8,lr}
+ vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
+ ldmia ip,{r4,r5} @ load remaining args
subs r2,r2,#16
mov r8,#16
blo .Lcbc_abort
@@ -297,13 +298,13 @@ aes_v8_cbc_encrypt:
vld1.8 {q6},[r4]
vld1.8 {q0},[r0],r8
- vld1.32 {q8-q9},[r3] @ load key schedule...
+ vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#6
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
sub r5,r5,#2
- vld1.32 {q10-q11},[r7]!
- vld1.32 {q12-q13},[r7]!
- vld1.32 {q14-q15},[r7]!
+ vld1.32 {q10,q11},[r7]!
+ vld1.32 {q12,q13},[r7]!
+ vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
@@ -315,62 +316,62 @@ aes_v8_cbc_encrypt:
veor q5,q8,q7
beq .Lcbc_enc128
- vld1.32 {q2-q3},[r7]
+ vld1.32 {q2,q3},[r7]
add r7,r3,#16
add r6,r3,#16*4
add r12,r3,#16*5
- .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
add r14,r3,#16*6
add r3,r3,#16*7
b .Lenter_cbc_enc
.align 4
.Loop_cbc_enc:
- .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- vst1.8 {q6},[r1]!
+.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vst1.8 {q6},[r1]!
.Lenter_cbc_enc:
- .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r6]
cmp r5,#4
- .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r12]
beq .Lcbc_enc192
- .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r14]
- .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r3]
nop
.Lcbc_enc192:
- .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- subs r2,r2,#16
- .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- moveq r8,#0
- .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- vld1.8 {q8},[r0],r8
- .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- veor q8,q8,q5
- .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
- .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
+.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ subs r2,r2,#16
+.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ moveq r8,#0
+.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vld1.8 {q8},[r0],r8
+.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ veor q8,q8,q5
+.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
+.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs .Loop_cbc_enc
@@ -379,36 +380,36 @@ aes_v8_cbc_encrypt:
.align 5
.Lcbc_enc128:
- vld1.32 {q2-q3},[r7]
- .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vld1.32 {q2,q3},[r7]
+.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
- .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- vst1.8 {q6},[r1]!
+.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vst1.8 {q6},[r1]!
.Lenter_cbc_enc128:
- .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- subs r2,r2,#16
- .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- moveq r8,#0
- .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- vld1.8 {q8},[r0],r8
- .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
- .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
- veor q8,q8,q5
- .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
+.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ subs r2,r2,#16
+.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ moveq r8,#0
+.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ vld1.8 {q8},[r0],r8
+.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
+.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
+ veor q8,q8,q5
+.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs .Loop_cbc_enc128
@@ -431,81 +432,81 @@ aes_v8_cbc_encrypt:
vorr q11,q10,q10
.Loop3x_cbc_dec:
- .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
- .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
- .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
+.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
- .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
- .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
- .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
+.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt .Loop3x_cbc_dec
- .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
- .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
- .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
- veor q4,q6,q7
- subs r2,r2,#0x30
- veor q5,q2,q7
- movlo r6,r2 @ r6, r6, is zero at this point
- .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
- .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
- .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
- veor q9,q3,q7
- add r0,r0,r6 @ r0 is adjusted in such way that
+.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
+.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+ veor q4,q6,q7
+ subs r2,r2,#0x30
+ veor q5,q2,q7
+ movlo r6,r2 @ r6, r6, is zero at this point
+.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
+.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+ veor q9,q3,q7
+ add r0,r0,r6 @ r0 is adjusted in such way that
@ at exit from the loop q1-q10
@ are loaded with last "words"
- vorr q6,q11,q11
- mov r7,r3
- .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
- .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
- .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
- vld1.8 {q2},[r0]!
- .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
- .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
- .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
- vld1.8 {q3},[r0]!
- .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
- .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
- .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
- vld1.8 {q11},[r0]!
- .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
- .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
- .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
- vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
- add r6,r5,#2
+ vorr q6,q11,q11
+ mov r7,r3
+.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
+.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+ vld1.8 {q2},[r0]!
+.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
+.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+ vld1.8 {q3},[r0]!
+.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
+.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
+.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+ vld1.8 {q11},[r0]!
+.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
+.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
+.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
+ vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
+ add r6,r5,#2
veor q4,q4,q0
veor q5,q5,q1
veor q10,q10,q9
- vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
+ vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q4},[r1]!
- vorr q0,q2,q2
+ vorr q0,q2,q2
vst1.8 {q5},[r1]!
- vorr q1,q3,q3
+ vorr q1,q3,q3
vst1.8 {q10},[r1]!
- vorr q10,q11,q11
+ vorr q10,q11,q11
bhs .Loop3x_cbc_dec
cmn r2,#0x30
@@ -513,244 +514,244 @@ aes_v8_cbc_encrypt:
nop
.Lcbc_dec_tail:
- .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
- .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt .Lcbc_dec_tail
- .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
- .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
- .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
- cmn r2,#0x20
- .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
- veor q5,q6,q7
- .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
- .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
- .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
- .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
- veor q9,q3,q7
- .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
- .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
+.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+ cmn r2,#0x20
+.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+ veor q5,q6,q7
+.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
+.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
+.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
+.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
+ veor q9,q3,q7
+.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
+.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
beq .Lcbc_dec_one
veor q5,q5,q1
veor q9,q9,q10
- vorr q6,q11,q11
+ vorr q6,q11,q11
vst1.8 {q5},[r1]!
vst1.8 {q9},[r1]!
b .Lcbc_done
.Lcbc_dec_one:
veor q5,q5,q10
- vorr q6,q11,q11
+ vorr q6,q11,q11
vst1.8 {q5},[r1]!
.Lcbc_done:
vst1.8 {q6},[r4]
.Lcbc_abort:
- vldmia sp!,{d8-d15}
- ldmia sp!,{r4-r8,pc}
+ vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
+ ldmia sp!,{r4,r5,r6,r7,r8,pc}
.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
.globl aes_v8_ctr32_encrypt_blocks
.type aes_v8_ctr32_encrypt_blocks,%function
.align 5
aes_v8_ctr32_encrypt_blocks:
- mov ip,sp
- stmdb sp!,{r4-r10,lr}
- vstmdb sp!,{d8-d15} @ ABI specification says so
- ldr r4, [ip] @ load remaining arg
- ldr r5,[r3,#240]
+ mov ip,sp
+ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
+ vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
+ ldr r4, [ip] @ load remaining arg
+ ldr r5,[r3,#240]
- ldr r8, [r4, #12]
- vld1.32 {q0},[r4]
+ ldr r8, [r4, #12]
+ vld1.32 {q0},[r4]
- vld1.32 {q8-q9},[r3] @ load key schedule...
- sub r5,r5,#4
- mov r12,#16
- cmp r2,#2
- add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list