git: 74d73bb743c7 - main - libcrypto: Generate new files added in OpenSSL 3.0.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 22 Aug 2023 04:04:23 UTC
The branch main has been updated by jhb: URL: https://cgit.FreeBSD.org/src/commit/?id=74d73bb743c759e6d4d67435d383d501585c4680 commit 74d73bb743c759e6d4d67435d383d501585c4680 Author: John Baldwin <jhb@FreeBSD.org> AuthorDate: 2023-08-22 04:02:29 +0000 Commit: John Baldwin <jhb@FreeBSD.org> CommitDate: 2023-08-22 04:02:29 +0000 libcrypto: Generate new files added in OpenSSL 3.0. Reviewed by: gallatin, ngie, emaste Differential Revision: https://reviews.freebsd.org/D41538 --- secure/lib/libcrypto/arch/amd64/aes-x86_64.S | 2680 ++++++++++ secure/lib/libcrypto/arch/amd64/bsaes-x86_64.S | 2619 ++++++++++ secure/lib/libcrypto/arch/i386/aes-586.S | 6644 ++++++++++++++++++++++++ 3 files changed, 11943 insertions(+) diff --git a/secure/lib/libcrypto/arch/amd64/aes-x86_64.S b/secure/lib/libcrypto/arch/amd64/aes-x86_64.S new file mode 100644 index 000000000000..fc375184a20a --- /dev/null +++ b/secure/lib/libcrypto/arch/amd64/aes-x86_64.S @@ -0,0 +1,2680 @@ +/* Do not modify. This file is auto-generated from aes-x86_64.pl. */ +.text +.type _x86_64_AES_encrypt,@function +.align 16 +_x86_64_AES_encrypt: +.cfi_startproc + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Lenc_loop +.align 16 +.Lenc_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + shrl $16,%ecx + movzbl %ah,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movl 12(%r15),%edx + movzbl %bh,%edi + movzbl %ch,%ebp + movl 0(%r15),%eax + xorl 1(%r14,%rdi,8),%r12d + xorl 1(%r14,%rbp,8),%r8d + + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + subl $1,%r13d + jnz .Lenc_loop + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl 2(%r14,%rsi,8),%r10d + movzbl 2(%r14,%rdi,8),%r11d + movzbl 2(%r14,%rbp,8),%r12d + + movzbl %dl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl 2(%r14,%rsi,8),%r8d + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $0x0000ff00,%edi + andl $0x0000ff00,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%ecx + + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + + andl $0x0000ff00,%esi + andl $0x0000ff00,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $0x00ff0000,%esi + andl $0x00ff0000,%edi + andl $0x00ff0000,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movl 0(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 2(%r14,%rbp,8),%ebp + + andl $0x00ff0000,%esi + andl $0xff000000,%edi + andl $0xff000000,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %bh,%esi + movzbl %ch,%edi + movl 16+12(%r15),%edx + movl 2(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 16+0(%r15),%eax + + andl $0xff000000,%esi + andl $0xff000000,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt +.type _x86_64_AES_encrypt_compact,@function +.align 16 +_x86_64_AES_encrypt_compact: +.cfi_startproc + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Lenc_loop_compact +.align 16 +.Lenc_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%ecx + movzbl %dh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ah,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl %cl,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shll $8,%r9d + shrl $16,%edx + shll $8,%r13d + xorl %r9d,%r10d + shrl $16,%eax + movzbl %dl,%r9d + shrl $16,%ebx + xorl %r13d,%r11d + shll $8,%ebp + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + + shll $8,%esi + movzbl %bl,%ebp + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %dh,%esi + movzbl (%r14,%r13,1),%r13d + xorl %edi,%r10d + + shrl $8,%ecx + movzbl %ah,%edi + shll $16,%r9d + shrl $8,%ebx + shll $16,%r13d + xorl %r9d,%r11d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rcx,1),%edx + movzbl (%r14,%rbx,1),%ecx + + shll $16,%ebp + xorl %r13d,%r12d + shll $24,%esi + xorl %ebp,%r8d + shll $24,%edi + xorl %esi,%r10d + shll $24,%edx + xorl %edi,%r11d + shll $24,%ecx + movl %r10d,%eax + movl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Lenc_compact_done + movl $0x80808080,%r10d + movl $0x80808080,%r11d + andl %eax,%r10d + andl %ebx,%r11d + movl %r10d,%esi + movl %r11d,%edi + shrl $7,%r10d + leal (%rax,%rax,1),%r8d + shrl $7,%r11d + leal (%rbx,%rbx,1),%r9d + subl %r10d,%esi + subl %r11d,%edi + andl $0xfefefefe,%r8d + andl $0xfefefefe,%r9d + andl $0x1b1b1b1b,%esi + andl $0x1b1b1b1b,%edi + movl %eax,%r10d + movl %ebx,%r11d + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%eax + xorl %r9d,%ebx + movl $0x80808080,%r12d + roll $24,%eax + movl $0x80808080,%ebp + roll $24,%ebx + andl %ecx,%r12d + andl %edx,%ebp + xorl %r8d,%eax + xorl %r9d,%ebx + movl %r12d,%esi + rorl $16,%r10d + movl %ebp,%edi + rorl $16,%r11d + leal (%rcx,%rcx,1),%r8d + shrl $7,%r12d + xorl %r10d,%eax + shrl $7,%ebp + xorl %r11d,%ebx + rorl $8,%r10d + leal (%rdx,%rdx,1),%r9d + rorl $8,%r11d + subl %r12d,%esi + subl %ebp,%edi + xorl %r10d,%eax + xorl %r11d,%ebx + + andl $0xfefefefe,%r8d + andl $0xfefefefe,%r9d + andl $0x1b1b1b1b,%esi + andl $0x1b1b1b1b,%edi + movl %ecx,%r12d + movl %edx,%ebp + xorl %esi,%r8d + xorl %edi,%r9d + + rorl $16,%r12d + xorl %r8d,%ecx + rorl $16,%ebp + xorl %r9d,%edx + roll $24,%ecx + movl 0(%r14),%esi + roll $24,%edx + xorl %r8d,%ecx + movl 64(%r14),%edi + xorl %r9d,%edx + movl 128(%r14),%r8d + xorl %r12d,%ecx + rorl $8,%r12d + xorl %ebp,%edx + rorl $8,%ebp + xorl %r12d,%ecx + movl 192(%r14),%r9d + xorl %ebp,%edx + jmp .Lenc_loop_compact +.align 16 +.Lenc_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact +.globl AES_encrypt +.type AES_encrypt,@function +.align 16 +.globl asm_AES_encrypt +.hidden asm_AES_encrypt +asm_AES_encrypt: +AES_encrypt: +.cfi_startproc +.byte 243,15,30,250 + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $0x3c0,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %rax,24(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08 +.Lenc_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Te+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $0x300,%rbp + leaq (%r14,%rbp,1),%r14 + + call _x86_64_AES_encrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lenc_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size AES_encrypt,.-AES_encrypt +.type _x86_64_AES_decrypt,@function +.align 16 +_x86_64_AES_decrypt: +.cfi_startproc + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Ldec_loop +.align 16 +.Ldec_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %bh,%esi + shrl $16,%eax + movzbl %ch,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + movl 12(%r15),%edx + movzbl %ah,%ebp + xorl 1(%r14,%rsi,8),%r12d + movl 0(%r15),%eax + xorl 1(%r14,%rbp,8),%r8d + + xorl %r10d,%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r12d,%ecx + xorl %r11d,%ebx + xorl %r8d,%edx + subl $1,%r13d + jnz .Ldec_loop + leaq 2048(%r14),%r14 + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl (%r14,%rsi,1),%r10d + movzbl (%r14,%rdi,1),%r11d + movzbl (%r14,%rbp,1),%r12d + + movzbl %dl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movzbl (%r14,%rsi,1),%r8d + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $8,%edi + shll $8,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%edx + + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%eax + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + + shll $8,%esi + shll $8,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $16,%edi + shll $16,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $24,%edi + shll $24,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %dh,%esi + movzbl %ah,%edi + movl 16+12(%r15),%edx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movl 16+0(%r15),%eax + + shll $24,%esi + shll $24,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + leaq -2048(%r14),%r14 + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt +.type _x86_64_AES_decrypt_compact,@function +.align 16 +_x86_64_AES_decrypt_compact: +.cfi_startproc + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Ldec_loop_compact + +.align 16 +.Ldec_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movzbl %bh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ch,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shrl $16,%ecx + shll $8,%r13d + shll $8,%r9d + movzbl %cl,%edi + shrl $16,%eax + xorl %r9d,%r10d + shrl $16,%ebx + movzbl %dl,%r9d + + shll $8,%ebp + xorl %r13d,%r11d + shll $8,%esi + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + movzbl %bl,%ebp + + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %bh,%esi + movzbl (%r14,%rbp,1),%ebp + xorl %edi,%r10d + movzbl (%r14,%r13,1),%r13d + movzbl %ch,%edi + + shll $16,%ebp + shll $16,%r9d + shll $16,%r13d + xorl %ebp,%r8d + movzbl %dh,%ebp + xorl %r9d,%r11d + shrl $8,%eax + xorl %r13d,%r12d + + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%ebx + movzbl (%r14,%rbp,1),%ecx + movzbl (%r14,%rax,1),%edx + + movl %r10d,%eax + shll $24,%esi + shll $24,%ebx + shll $24,%ecx + xorl %esi,%eax + shll $24,%edx + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Ldec_compact_done + + movq 256+0(%r14),%rsi + shlq $32,%rbx + shlq $32,%rdx + movq 256+8(%r14),%rdi + orq %rbx,%rax + orq %rdx,%rcx + movq 256+16(%r14),%rbp + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + movq 0(%r14),%rsi + roll $16,%r9d + movq 64(%r14),%rdi + roll $16,%r12d + movq 128(%r14),%rbp + roll $16,%r8d + movq 192(%r14),%r10 + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + movq 256(%r14),%r13 + xorl %r8d,%ebx + xorl %r11d,%edx + jmp .Ldec_loop_compact +.align 16 +.Ldec_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact +.globl AES_decrypt +.type AES_decrypt,@function +.align 16 +.globl asm_AES_decrypt +.hidden asm_AES_decrypt +asm_AES_decrypt: +AES_decrypt: +.cfi_startproc +.byte 243,15,30,250 + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $0x3c0,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %rax,24(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08 +.Ldec_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Td+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $0x300,%rbp + leaq (%r14,%rbp,1),%r14 + shrq $3,%rbp + addq %rbp,%r14 + + call _x86_64_AES_decrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Ldec_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size AES_decrypt,.-AES_decrypt +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,@function +.align 16 +AES_set_encrypt_key: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 +.Lenc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + movq 40(%rsp),%rbp +.cfi_restore %rbp + movq 48(%rsp),%rbx +.cfi_restore %rbx + addq $56,%rsp +.cfi_adjust_cfa_offset -56 +.Lenc_key_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size AES_set_encrypt_key,.-AES_set_encrypt_key + +.type _x86_64_AES_set_encrypt_key,@function +.align 16 +_x86_64_AES_set_encrypt_key: +.cfi_startproc + movl %esi,%ecx + movq %rdi,%rsi + movq %rdx,%rdi + + testq $-1,%rsi + jz .Lbadpointer + testq $-1,%rdi + jz .Lbadpointer + + leaq .LAES_Te(%rip),%rbp + leaq 2048+128(%rbp),%rbp + + + movl 0-128(%rbp),%eax + movl 32-128(%rbp),%ebx + movl 64-128(%rbp),%r8d + movl 96-128(%rbp),%edx + movl 128-128(%rbp),%eax + movl 160-128(%rbp),%ebx + movl 192-128(%rbp),%r8d + movl 224-128(%rbp),%edx + + cmpl $128,%ecx + je .L10rounds + cmpl $192,%ecx *** 10995 LINES SKIPPED ***