git: 74d73bb743c7 - main - libcrypto: Generate new files added in OpenSSL 3.0.

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Tue, 22 Aug 2023 04:04:23 UTC
The branch main has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=74d73bb743c759e6d4d67435d383d501585c4680

commit 74d73bb743c759e6d4d67435d383d501585c4680
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2023-08-22 04:02:29 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2023-08-22 04:02:29 +0000

    libcrypto: Generate new files added in OpenSSL 3.0.
    
    Reviewed by:    gallatin, ngie, emaste
    Differential Revision:  https://reviews.freebsd.org/D41538
---
 secure/lib/libcrypto/arch/amd64/aes-x86_64.S   | 2680 ++++++++++
 secure/lib/libcrypto/arch/amd64/bsaes-x86_64.S | 2619 ++++++++++
 secure/lib/libcrypto/arch/i386/aes-586.S       | 6644 ++++++++++++++++++++++++
 3 files changed, 11943 insertions(+)

diff --git a/secure/lib/libcrypto/arch/amd64/aes-x86_64.S b/secure/lib/libcrypto/arch/amd64/aes-x86_64.S
new file mode 100644
index 000000000000..fc375184a20a
--- /dev/null
+++ b/secure/lib/libcrypto/arch/amd64/aes-x86_64.S
@@ -0,0 +1,2680 @@
+/* Do not modify. This file is auto-generated from aes-x86_64.pl. */
+.text	
+.type	_x86_64_AES_encrypt,@function
+.align	16
+_x86_64_AES_encrypt:
+.cfi_startproc	
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+
+	movl	240(%r15),%r13d
+	subl	$1,%r13d
+	jmp	.Lenc_loop
+.align	16
+.Lenc_loop:
+
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movl	0(%r14,%rsi,8),%r10d
+	movl	0(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r12d
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	movzbl	%dl,%ebp
+	xorl	3(%r14,%rsi,8),%r10d
+	xorl	3(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r8d
+
+	movzbl	%dh,%esi
+	shrl	$16,%ecx
+	movzbl	%ah,%ebp
+	xorl	3(%r14,%rsi,8),%r12d
+	shrl	$16,%edx
+	xorl	3(%r14,%rbp,8),%r8d
+
+	shrl	$16,%ebx
+	leaq	16(%r15),%r15
+	shrl	$16,%eax
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	xorl	2(%r14,%rsi,8),%r10d
+	xorl	2(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r12d
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	movzbl	%bl,%ebp
+	xorl	1(%r14,%rsi,8),%r10d
+	xorl	1(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r8d
+
+	movl	12(%r15),%edx
+	movzbl	%bh,%edi
+	movzbl	%ch,%ebp
+	movl	0(%r15),%eax
+	xorl	1(%r14,%rdi,8),%r12d
+	xorl	1(%r14,%rbp,8),%r8d
+
+	movl	4(%r15),%ebx
+	movl	8(%r15),%ecx
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+	subl	$1,%r13d
+	jnz	.Lenc_loop
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movzbl	2(%r14,%rsi,8),%r10d
+	movzbl	2(%r14,%rdi,8),%r11d
+	movzbl	2(%r14,%rbp,8),%r12d
+
+	movzbl	%dl,%esi
+	movzbl	%bh,%edi
+	movzbl	%ch,%ebp
+	movzbl	2(%r14,%rsi,8),%r8d
+	movl	0(%r14,%rdi,8),%edi
+	movl	0(%r14,%rbp,8),%ebp
+
+	andl	$0x0000ff00,%edi
+	andl	$0x0000ff00,%ebp
+
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+	shrl	$16,%ecx
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	shrl	$16,%edx
+	movl	0(%r14,%rsi,8),%esi
+	movl	0(%r14,%rdi,8),%edi
+
+	andl	$0x0000ff00,%esi
+	andl	$0x0000ff00,%edi
+	shrl	$16,%ebx
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+	shrl	$16,%eax
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	movl	0(%r14,%rsi,8),%esi
+	movl	0(%r14,%rdi,8),%edi
+	movl	0(%r14,%rbp,8),%ebp
+
+	andl	$0x00ff0000,%esi
+	andl	$0x00ff0000,%edi
+	andl	$0x00ff0000,%ebp
+
+	xorl	%esi,%r10d
+	xorl	%edi,%r11d
+	xorl	%ebp,%r12d
+
+	movzbl	%bl,%esi
+	movzbl	%dh,%edi
+	movzbl	%ah,%ebp
+	movl	0(%r14,%rsi,8),%esi
+	movl	2(%r14,%rdi,8),%edi
+	movl	2(%r14,%rbp,8),%ebp
+
+	andl	$0x00ff0000,%esi
+	andl	$0xff000000,%edi
+	andl	$0xff000000,%ebp
+
+	xorl	%esi,%r8d
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	movl	16+12(%r15),%edx
+	movl	2(%r14,%rsi,8),%esi
+	movl	2(%r14,%rdi,8),%edi
+	movl	16+0(%r15),%eax
+
+	andl	$0xff000000,%esi
+	andl	$0xff000000,%edi
+
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+
+	movl	16+4(%r15),%ebx
+	movl	16+8(%r15),%ecx
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+.byte	0xf3,0xc3
+.cfi_endproc	
+.size	_x86_64_AES_encrypt,.-_x86_64_AES_encrypt
+.type	_x86_64_AES_encrypt_compact,@function
+.align	16
+_x86_64_AES_encrypt_compact:
+.cfi_startproc	
+	leaq	128(%r14),%r8
+	movl	0-128(%r8),%edi
+	movl	32-128(%r8),%ebp
+	movl	64-128(%r8),%r10d
+	movl	96-128(%r8),%r11d
+	movl	128-128(%r8),%edi
+	movl	160-128(%r8),%ebp
+	movl	192-128(%r8),%r10d
+	movl	224-128(%r8),%r11d
+	jmp	.Lenc_loop_compact
+.align	16
+.Lenc_loop_compact:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+	leaq	16(%r15),%r15
+	movzbl	%al,%r10d
+	movzbl	%bl,%r11d
+	movzbl	%cl,%r12d
+	movzbl	%dl,%r8d
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	shrl	$16,%ecx
+	movzbl	%dh,%ebp
+	movzbl	(%r14,%r10,1),%r10d
+	movzbl	(%r14,%r11,1),%r11d
+	movzbl	(%r14,%r12,1),%r12d
+	movzbl	(%r14,%r8,1),%r8d
+
+	movzbl	(%r14,%rsi,1),%r9d
+	movzbl	%ah,%esi
+	movzbl	(%r14,%rdi,1),%r13d
+	movzbl	%cl,%edi
+	movzbl	(%r14,%rbp,1),%ebp
+	movzbl	(%r14,%rsi,1),%esi
+
+	shll	$8,%r9d
+	shrl	$16,%edx
+	shll	$8,%r13d
+	xorl	%r9d,%r10d
+	shrl	$16,%eax
+	movzbl	%dl,%r9d
+	shrl	$16,%ebx
+	xorl	%r13d,%r11d
+	shll	$8,%ebp
+	movzbl	%al,%r13d
+	movzbl	(%r14,%rdi,1),%edi
+	xorl	%ebp,%r12d
+
+	shll	$8,%esi
+	movzbl	%bl,%ebp
+	shll	$16,%edi
+	xorl	%esi,%r8d
+	movzbl	(%r14,%r9,1),%r9d
+	movzbl	%dh,%esi
+	movzbl	(%r14,%r13,1),%r13d
+	xorl	%edi,%r10d
+
+	shrl	$8,%ecx
+	movzbl	%ah,%edi
+	shll	$16,%r9d
+	shrl	$8,%ebx
+	shll	$16,%r13d
+	xorl	%r9d,%r11d
+	movzbl	(%r14,%rbp,1),%ebp
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rcx,1),%edx
+	movzbl	(%r14,%rbx,1),%ecx
+
+	shll	$16,%ebp
+	xorl	%r13d,%r12d
+	shll	$24,%esi
+	xorl	%ebp,%r8d
+	shll	$24,%edi
+	xorl	%esi,%r10d
+	shll	$24,%edx
+	xorl	%edi,%r11d
+	shll	$24,%ecx
+	movl	%r10d,%eax
+	movl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+	cmpq	16(%rsp),%r15
+	je	.Lenc_compact_done
+	movl	$0x80808080,%r10d
+	movl	$0x80808080,%r11d
+	andl	%eax,%r10d
+	andl	%ebx,%r11d
+	movl	%r10d,%esi
+	movl	%r11d,%edi
+	shrl	$7,%r10d
+	leal	(%rax,%rax,1),%r8d
+	shrl	$7,%r11d
+	leal	(%rbx,%rbx,1),%r9d
+	subl	%r10d,%esi
+	subl	%r11d,%edi
+	andl	$0xfefefefe,%r8d
+	andl	$0xfefefefe,%r9d
+	andl	$0x1b1b1b1b,%esi
+	andl	$0x1b1b1b1b,%edi
+	movl	%eax,%r10d
+	movl	%ebx,%r11d
+	xorl	%esi,%r8d
+	xorl	%edi,%r9d
+
+	xorl	%r8d,%eax
+	xorl	%r9d,%ebx
+	movl	$0x80808080,%r12d
+	roll	$24,%eax
+	movl	$0x80808080,%ebp
+	roll	$24,%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%ebp
+	xorl	%r8d,%eax
+	xorl	%r9d,%ebx
+	movl	%r12d,%esi
+	rorl	$16,%r10d
+	movl	%ebp,%edi
+	rorl	$16,%r11d
+	leal	(%rcx,%rcx,1),%r8d
+	shrl	$7,%r12d
+	xorl	%r10d,%eax
+	shrl	$7,%ebp
+	xorl	%r11d,%ebx
+	rorl	$8,%r10d
+	leal	(%rdx,%rdx,1),%r9d
+	rorl	$8,%r11d
+	subl	%r12d,%esi
+	subl	%ebp,%edi
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+
+	andl	$0xfefefefe,%r8d
+	andl	$0xfefefefe,%r9d
+	andl	$0x1b1b1b1b,%esi
+	andl	$0x1b1b1b1b,%edi
+	movl	%ecx,%r12d
+	movl	%edx,%ebp
+	xorl	%esi,%r8d
+	xorl	%edi,%r9d
+
+	rorl	$16,%r12d
+	xorl	%r8d,%ecx
+	rorl	$16,%ebp
+	xorl	%r9d,%edx
+	roll	$24,%ecx
+	movl	0(%r14),%esi
+	roll	$24,%edx
+	xorl	%r8d,%ecx
+	movl	64(%r14),%edi
+	xorl	%r9d,%edx
+	movl	128(%r14),%r8d
+	xorl	%r12d,%ecx
+	rorl	$8,%r12d
+	xorl	%ebp,%edx
+	rorl	$8,%ebp
+	xorl	%r12d,%ecx
+	movl	192(%r14),%r9d
+	xorl	%ebp,%edx
+	jmp	.Lenc_loop_compact
+.align	16
+.Lenc_compact_done:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+.byte	0xf3,0xc3
+.cfi_endproc	
+.size	_x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
+.globl	AES_encrypt
+.type	AES_encrypt,@function
+.align	16
+.globl	asm_AES_encrypt
+.hidden	asm_AES_encrypt
+asm_AES_encrypt:
+AES_encrypt:
+.cfi_startproc	
+.byte	243,15,30,250
+	movq	%rsp,%rax
+.cfi_def_cfa_register	%rax
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+
+
+	leaq	-63(%rdx),%rcx
+	andq	$-64,%rsp
+	subq	%rsp,%rcx
+	negq	%rcx
+	andq	$0x3c0,%rcx
+	subq	%rcx,%rsp
+	subq	$32,%rsp
+
+	movq	%rsi,16(%rsp)
+	movq	%rax,24(%rsp)
+.cfi_escape	0x0f,0x05,0x77,0x18,0x06,0x23,0x08
+.Lenc_prologue:
+
+	movq	%rdx,%r15
+	movl	240(%r15),%r13d
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+
+	shll	$4,%r13d
+	leaq	(%r15,%r13,1),%rbp
+	movq	%r15,(%rsp)
+	movq	%rbp,8(%rsp)
+
+
+	leaq	.LAES_Te+2048(%rip),%r14
+	leaq	768(%rsp),%rbp
+	subq	%r14,%rbp
+	andq	$0x300,%rbp
+	leaq	(%r14,%rbp,1),%r14
+
+	call	_x86_64_AES_encrypt_compact
+
+	movq	16(%rsp),%r9
+	movq	24(%rsp),%rsi
+.cfi_def_cfa	%rsi,8
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	movq	-48(%rsi),%r15
+.cfi_restore	%r15
+	movq	-40(%rsi),%r14
+.cfi_restore	%r14
+	movq	-32(%rsi),%r13
+.cfi_restore	%r13
+	movq	-24(%rsi),%r12
+.cfi_restore	%r12
+	movq	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	leaq	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
+.Lenc_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	AES_encrypt,.-AES_encrypt
+.type	_x86_64_AES_decrypt,@function
+.align	16
+_x86_64_AES_decrypt:
+.cfi_startproc	
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+
+	movl	240(%r15),%r13d
+	subl	$1,%r13d
+	jmp	.Ldec_loop
+.align	16
+.Ldec_loop:
+
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movl	0(%r14,%rsi,8),%r10d
+	movl	0(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r12d
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	movzbl	%dl,%ebp
+	xorl	3(%r14,%rsi,8),%r10d
+	xorl	3(%r14,%rdi,8),%r11d
+	movl	0(%r14,%rbp,8),%r8d
+
+	movzbl	%bh,%esi
+	shrl	$16,%eax
+	movzbl	%ch,%ebp
+	xorl	3(%r14,%rsi,8),%r12d
+	shrl	$16,%edx
+	xorl	3(%r14,%rbp,8),%r8d
+
+	shrl	$16,%ebx
+	leaq	16(%r15),%r15
+	shrl	$16,%ecx
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	xorl	2(%r14,%rsi,8),%r10d
+	xorl	2(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r12d
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	movzbl	%bl,%ebp
+	xorl	1(%r14,%rsi,8),%r10d
+	xorl	1(%r14,%rdi,8),%r11d
+	xorl	2(%r14,%rbp,8),%r8d
+
+	movzbl	%dh,%esi
+	movl	12(%r15),%edx
+	movzbl	%ah,%ebp
+	xorl	1(%r14,%rsi,8),%r12d
+	movl	0(%r15),%eax
+	xorl	1(%r14,%rbp,8),%r8d
+
+	xorl	%r10d,%eax
+	movl	4(%r15),%ebx
+	movl	8(%r15),%ecx
+	xorl	%r12d,%ecx
+	xorl	%r11d,%ebx
+	xorl	%r8d,%edx
+	subl	$1,%r13d
+	jnz	.Ldec_loop
+	leaq	2048(%r14),%r14
+	movzbl	%al,%esi
+	movzbl	%bl,%edi
+	movzbl	%cl,%ebp
+	movzbl	(%r14,%rsi,1),%r10d
+	movzbl	(%r14,%rdi,1),%r11d
+	movzbl	(%r14,%rbp,1),%r12d
+
+	movzbl	%dl,%esi
+	movzbl	%dh,%edi
+	movzbl	%ah,%ebp
+	movzbl	(%r14,%rsi,1),%r8d
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rbp,1),%ebp
+
+	shll	$8,%edi
+	shll	$8,%ebp
+
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+	shrl	$16,%edx
+
+	movzbl	%bh,%esi
+	movzbl	%ch,%edi
+	shrl	$16,%eax
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+
+	shll	$8,%esi
+	shll	$8,%edi
+	shrl	$16,%ebx
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+	shrl	$16,%ecx
+
+	movzbl	%cl,%esi
+	movzbl	%dl,%edi
+	movzbl	%al,%ebp
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rbp,1),%ebp
+
+	shll	$16,%esi
+	shll	$16,%edi
+	shll	$16,%ebp
+
+	xorl	%esi,%r10d
+	xorl	%edi,%r11d
+	xorl	%ebp,%r12d
+
+	movzbl	%bl,%esi
+	movzbl	%bh,%edi
+	movzbl	%ch,%ebp
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movzbl	(%r14,%rbp,1),%ebp
+
+	shll	$16,%esi
+	shll	$24,%edi
+	shll	$24,%ebp
+
+	xorl	%esi,%r8d
+	xorl	%edi,%r10d
+	xorl	%ebp,%r11d
+
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	movl	16+12(%r15),%edx
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%edi
+	movl	16+0(%r15),%eax
+
+	shll	$24,%esi
+	shll	$24,%edi
+
+	xorl	%esi,%r12d
+	xorl	%edi,%r8d
+
+	movl	16+4(%r15),%ebx
+	movl	16+8(%r15),%ecx
+	leaq	-2048(%r14),%r14
+	xorl	%r10d,%eax
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+.byte	0xf3,0xc3
+.cfi_endproc	
+.size	_x86_64_AES_decrypt,.-_x86_64_AES_decrypt
+.type	_x86_64_AES_decrypt_compact,@function
+.align	16
+_x86_64_AES_decrypt_compact:
+.cfi_startproc	
+	leaq	128(%r14),%r8
+	movl	0-128(%r8),%edi
+	movl	32-128(%r8),%ebp
+	movl	64-128(%r8),%r10d
+	movl	96-128(%r8),%r11d
+	movl	128-128(%r8),%edi
+	movl	160-128(%r8),%ebp
+	movl	192-128(%r8),%r10d
+	movl	224-128(%r8),%r11d
+	jmp	.Ldec_loop_compact
+
+.align	16
+.Ldec_loop_compact:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+	leaq	16(%r15),%r15
+	movzbl	%al,%r10d
+	movzbl	%bl,%r11d
+	movzbl	%cl,%r12d
+	movzbl	%dl,%r8d
+	movzbl	%dh,%esi
+	movzbl	%ah,%edi
+	shrl	$16,%edx
+	movzbl	%bh,%ebp
+	movzbl	(%r14,%r10,1),%r10d
+	movzbl	(%r14,%r11,1),%r11d
+	movzbl	(%r14,%r12,1),%r12d
+	movzbl	(%r14,%r8,1),%r8d
+
+	movzbl	(%r14,%rsi,1),%r9d
+	movzbl	%ch,%esi
+	movzbl	(%r14,%rdi,1),%r13d
+	movzbl	(%r14,%rbp,1),%ebp
+	movzbl	(%r14,%rsi,1),%esi
+
+	shrl	$16,%ecx
+	shll	$8,%r13d
+	shll	$8,%r9d
+	movzbl	%cl,%edi
+	shrl	$16,%eax
+	xorl	%r9d,%r10d
+	shrl	$16,%ebx
+	movzbl	%dl,%r9d
+
+	shll	$8,%ebp
+	xorl	%r13d,%r11d
+	shll	$8,%esi
+	movzbl	%al,%r13d
+	movzbl	(%r14,%rdi,1),%edi
+	xorl	%ebp,%r12d
+	movzbl	%bl,%ebp
+
+	shll	$16,%edi
+	xorl	%esi,%r8d
+	movzbl	(%r14,%r9,1),%r9d
+	movzbl	%bh,%esi
+	movzbl	(%r14,%rbp,1),%ebp
+	xorl	%edi,%r10d
+	movzbl	(%r14,%r13,1),%r13d
+	movzbl	%ch,%edi
+
+	shll	$16,%ebp
+	shll	$16,%r9d
+	shll	$16,%r13d
+	xorl	%ebp,%r8d
+	movzbl	%dh,%ebp
+	xorl	%r9d,%r11d
+	shrl	$8,%eax
+	xorl	%r13d,%r12d
+
+	movzbl	(%r14,%rsi,1),%esi
+	movzbl	(%r14,%rdi,1),%ebx
+	movzbl	(%r14,%rbp,1),%ecx
+	movzbl	(%r14,%rax,1),%edx
+
+	movl	%r10d,%eax
+	shll	$24,%esi
+	shll	$24,%ebx
+	shll	$24,%ecx
+	xorl	%esi,%eax
+	shll	$24,%edx
+	xorl	%r11d,%ebx
+	xorl	%r12d,%ecx
+	xorl	%r8d,%edx
+	cmpq	16(%rsp),%r15
+	je	.Ldec_compact_done
+
+	movq	256+0(%r14),%rsi
+	shlq	$32,%rbx
+	shlq	$32,%rdx
+	movq	256+8(%r14),%rdi
+	orq	%rbx,%rax
+	orq	%rdx,%rcx
+	movq	256+16(%r14),%rbp
+	movq	%rsi,%r9
+	movq	%rsi,%r12
+	andq	%rax,%r9
+	andq	%rcx,%r12
+	movq	%r9,%rbx
+	movq	%r12,%rdx
+	shrq	$7,%r9
+	leaq	(%rax,%rax,1),%r8
+	shrq	$7,%r12
+	leaq	(%rcx,%rcx,1),%r11
+	subq	%r9,%rbx
+	subq	%r12,%rdx
+	andq	%rdi,%r8
+	andq	%rdi,%r11
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r8
+	xorq	%rdx,%r11
+	movq	%rsi,%r10
+	movq	%rsi,%r13
+
+	andq	%r8,%r10
+	andq	%r11,%r13
+	movq	%r10,%rbx
+	movq	%r13,%rdx
+	shrq	$7,%r10
+	leaq	(%r8,%r8,1),%r9
+	shrq	$7,%r13
+	leaq	(%r11,%r11,1),%r12
+	subq	%r10,%rbx
+	subq	%r13,%rdx
+	andq	%rdi,%r9
+	andq	%rdi,%r12
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r9
+	xorq	%rdx,%r12
+	movq	%rsi,%r10
+	movq	%rsi,%r13
+
+	andq	%r9,%r10
+	andq	%r12,%r13
+	movq	%r10,%rbx
+	movq	%r13,%rdx
+	shrq	$7,%r10
+	xorq	%rax,%r8
+	shrq	$7,%r13
+	xorq	%rcx,%r11
+	subq	%r10,%rbx
+	subq	%r13,%rdx
+	leaq	(%r9,%r9,1),%r10
+	leaq	(%r12,%r12,1),%r13
+	xorq	%rax,%r9
+	xorq	%rcx,%r12
+	andq	%rdi,%r10
+	andq	%rdi,%r13
+	andq	%rbp,%rbx
+	andq	%rbp,%rdx
+	xorq	%rbx,%r10
+	xorq	%rdx,%r13
+
+	xorq	%r10,%rax
+	xorq	%r13,%rcx
+	xorq	%r10,%r8
+	xorq	%r13,%r11
+	movq	%rax,%rbx
+	movq	%rcx,%rdx
+	xorq	%r10,%r9
+	shrq	$32,%rbx
+	xorq	%r13,%r12
+	shrq	$32,%rdx
+	xorq	%r8,%r10
+	roll	$8,%eax
+	xorq	%r11,%r13
+	roll	$8,%ecx
+	xorq	%r9,%r10
+	roll	$8,%ebx
+	xorq	%r12,%r13
+
+	roll	$8,%edx
+	xorl	%r10d,%eax
+	shrq	$32,%r10
+	xorl	%r13d,%ecx
+	shrq	$32,%r13
+	xorl	%r10d,%ebx
+	xorl	%r13d,%edx
+
+	movq	%r8,%r10
+	roll	$24,%r8d
+	movq	%r11,%r13
+	roll	$24,%r11d
+	shrq	$32,%r10
+	xorl	%r8d,%eax
+	shrq	$32,%r13
+	xorl	%r11d,%ecx
+	roll	$24,%r10d
+	movq	%r9,%r8
+	roll	$24,%r13d
+	movq	%r12,%r11
+	shrq	$32,%r8
+	xorl	%r10d,%ebx
+	shrq	$32,%r11
+	xorl	%r13d,%edx
+
+	movq	0(%r14),%rsi
+	roll	$16,%r9d
+	movq	64(%r14),%rdi
+	roll	$16,%r12d
+	movq	128(%r14),%rbp
+	roll	$16,%r8d
+	movq	192(%r14),%r10
+	xorl	%r9d,%eax
+	roll	$16,%r11d
+	xorl	%r12d,%ecx
+	movq	256(%r14),%r13
+	xorl	%r8d,%ebx
+	xorl	%r11d,%edx
+	jmp	.Ldec_loop_compact
+.align	16
+.Ldec_compact_done:
+	xorl	0(%r15),%eax
+	xorl	4(%r15),%ebx
+	xorl	8(%r15),%ecx
+	xorl	12(%r15),%edx
+.byte	0xf3,0xc3
+.cfi_endproc	
+.size	_x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
+.globl	AES_decrypt
+.type	AES_decrypt,@function
+.align	16
+.globl	asm_AES_decrypt
+.hidden	asm_AES_decrypt
+asm_AES_decrypt:
+AES_decrypt:
+.cfi_startproc	
+.byte	243,15,30,250
+	movq	%rsp,%rax
+.cfi_def_cfa_register	%rax
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+
+
+	leaq	-63(%rdx),%rcx
+	andq	$-64,%rsp
+	subq	%rsp,%rcx
+	negq	%rcx
+	andq	$0x3c0,%rcx
+	subq	%rcx,%rsp
+	subq	$32,%rsp
+
+	movq	%rsi,16(%rsp)
+	movq	%rax,24(%rsp)
+.cfi_escape	0x0f,0x05,0x77,0x18,0x06,0x23,0x08
+.Ldec_prologue:
+
+	movq	%rdx,%r15
+	movl	240(%r15),%r13d
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+
+	shll	$4,%r13d
+	leaq	(%r15,%r13,1),%rbp
+	movq	%r15,(%rsp)
+	movq	%rbp,8(%rsp)
+
+
+	leaq	.LAES_Td+2048(%rip),%r14
+	leaq	768(%rsp),%rbp
+	subq	%r14,%rbp
+	andq	$0x300,%rbp
+	leaq	(%r14,%rbp,1),%r14
+	shrq	$3,%rbp
+	addq	%rbp,%r14
+
+	call	_x86_64_AES_decrypt_compact
+
+	movq	16(%rsp),%r9
+	movq	24(%rsp),%rsi
+.cfi_def_cfa	%rsi,8
+	movl	%eax,0(%r9)
+	movl	%ebx,4(%r9)
+	movl	%ecx,8(%r9)
+	movl	%edx,12(%r9)
+
+	movq	-48(%rsi),%r15
+.cfi_restore	%r15
+	movq	-40(%rsi),%r14
+.cfi_restore	%r14
+	movq	-32(%rsi),%r13
+.cfi_restore	%r13
+	movq	-24(%rsi),%r12
+.cfi_restore	%r12
+	movq	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	leaq	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
+.Ldec_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	AES_decrypt,.-AES_decrypt
+.globl	AES_set_encrypt_key
+.type	AES_set_encrypt_key,@function
+.align	16
+AES_set_encrypt_key:
+.cfi_startproc	
+.byte	243,15,30,250
+	pushq	%rbx
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r15,-56
+	subq	$8,%rsp
+.cfi_adjust_cfa_offset	8
+.Lenc_key_prologue:
+
+	call	_x86_64_AES_set_encrypt_key
+
+	movq	40(%rsp),%rbp
+.cfi_restore	%rbp
+	movq	48(%rsp),%rbx
+.cfi_restore	%rbx
+	addq	$56,%rsp
+.cfi_adjust_cfa_offset	-56
+.Lenc_key_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	AES_set_encrypt_key,.-AES_set_encrypt_key
+
+.type	_x86_64_AES_set_encrypt_key,@function
+.align	16
+_x86_64_AES_set_encrypt_key:
+.cfi_startproc	
+	movl	%esi,%ecx
+	movq	%rdi,%rsi
+	movq	%rdx,%rdi
+
+	testq	$-1,%rsi
+	jz	.Lbadpointer
+	testq	$-1,%rdi
+	jz	.Lbadpointer
+
+	leaq	.LAES_Te(%rip),%rbp
+	leaq	2048+128(%rbp),%rbp
+
+
+	movl	0-128(%rbp),%eax
+	movl	32-128(%rbp),%ebx
+	movl	64-128(%rbp),%r8d
+	movl	96-128(%rbp),%edx
+	movl	128-128(%rbp),%eax
+	movl	160-128(%rbp),%ebx
+	movl	192-128(%rbp),%r8d
+	movl	224-128(%rbp),%edx
+
+	cmpl	$128,%ecx
+	je	.L10rounds
+	cmpl	$192,%ecx
*** 10995 LINES SKIPPED ***