git: 0410e97d88e6 - 2022Q4 - security/putty: fix ARM NEON AES-GCM code

From: Matthias Andree <mandree_at_FreeBSD.org>
Date: Wed, 12 Oct 2022 20:16:39 UTC
The branch 2022Q4 has been updated by mandree:

URL: https://cgit.FreeBSD.org/ports/commit/?id=0410e97d88e6ec99529638448811a9fa516e054d

commit 0410e97d88e6ec99529638448811a9fa516e054d
Author:     Matthias Andree <mandree@FreeBSD.org>
AuthorDate: 2022-10-12 20:11:48 +0000
Commit:     Matthias Andree <mandree@FreeBSD.org>
CommitDate: 2022-10-12 20:16:04 +0000

    security/putty: fix ARM NEON AES-GCM code
    
    Cherry-pick a test patch from upstream maintainer:
    
    - this uses fallback code for compilers not providing vaddq_p128
      (f.i. clang-10 on FreeBSD 12.3 AMD64/aarch64)
    - and uses vaddq_p128 on systems that provide it, for instance,
      FreeBSD 13-STABLE AMD64/aarch64 with clang 14.0.5
    
    Obtained from:  Simon Tatham
    MFH:            2022Q4
    
    (cherry picked from commit 1632d93d92d9ba58401834a571ba89860963a171)
---
 security/putty/Makefile                          |  2 +-
 security/putty/files/patch-crypto_CMakeLists.txt | 15 ------
 security/putty/files/patch-vaddq_p128            | 65 ++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/security/putty/Makefile b/security/putty/Makefile
index fe19c50a6f96..2819f4bccfc3 100644
--- a/security/putty/Makefile
+++ b/security/putty/Makefile
@@ -1,7 +1,7 @@
 PORTNAME=	putty
 PORTVERSION=	0.78~pre20220922
 DISTVERSIONSUFFIX=	.9fcfd67
-PORTREVISION=	0
+PORTREVISION=	1
 CATEGORIES=	security
 #MASTER_SITES=	http://the.earth.li/~sgtatham/putty/${PORTVERSION}/ \
 #		ftp://ftp.chiark.greenend.org.uk/users/sgtatham/putty-latest/
diff --git a/security/putty/files/patch-crypto_CMakeLists.txt b/security/putty/files/patch-crypto_CMakeLists.txt
deleted file mode 100644
index 82639b778fd0..000000000000
--- a/security/putty/files/patch-crypto_CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-This is a crude hack to avoid aesgcm-neon.c on FreeBSD 12.3,
-which does - by way of clang 10.0.1 - not offer vaddq_p128()
-through arm-neon.h. clang 13 as of FreeBSD 13.1 does.
-
---- crypto/CMakeLists.txt.orig	2022-09-21 23:42:30 UTC
-+++ crypto/CMakeLists.txt
-@@ -192,7 +192,7 @@ if(neon)
-       volatile poly128_t r;
-       volatile poly64_t a, b;
-       volatile poly64x2_t u, v;
--      int main(void) { r = vmull_p64(a, b); r = vmull_high_p64(u, v); }"
-+      int main(void) { r = vmull_p64(a, b); r = vmull_high_p64(u, v); r = vaddq_p128(r, r); }"
-     ADD_SOURCES_IF_SUCCESSFUL aesgcm-neon.c)
- 
-   # The 'sha3' architecture extension, despite the name, includes
diff --git a/security/putty/files/patch-vaddq_p128 b/security/putty/files/patch-vaddq_p128
new file mode 100644
index 000000000000..ed5e0ca0cca4
--- /dev/null
+++ b/security/putty/files/patch-vaddq_p128
@@ -0,0 +1,65 @@
+commit 2222cd104dc5bd424fe025b98c133c91195cf9f3
+Author: Simon Tatham <anakin@pobox.com>
+Date:   Wed Oct 12 12:54:36 2022 +0100
+
+    AES-GCM NEON: cope with missing vaddq_p128.
+    
+    In some compilers (I'm told clang 10, in particular), the NEON
+    intrinsic vaddq_p128 is missing, even though its input type poly128_t
+    is provided.
+    
+    vaddq_p128 is just an XOR of two vector registers, so that's easy to
+    work around by casting to a more mundane type and back. Added a
+    configure-time test for that intrinsic, and a workaround to be used in
+    its absence.
+
+diff --git a/cmake/cmake.h.in b/cmake/cmake.h.in
+index 91d52d78..5ad32515 100644
+--- ./cmake/cmake.h.in
++++ b/cmake/cmake.h.in
+@@ -54,6 +54,7 @@
+ #cmakedefine01 HAVE_CLMUL
+ #cmakedefine01 HAVE_NEON_CRYPTO
+ #cmakedefine01 HAVE_NEON_PMULL
++#cmakedefine01 HAVE_NEON_VADDQ_P128
+ #cmakedefine01 HAVE_NEON_SHA512
+ #cmakedefine01 HAVE_NEON_SHA512_INTRINSICS
+ #cmakedefine01 USE_ARM64_NEON_H
+diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
+index ff04efb5..4b0aa907 100644
+--- ./crypto/CMakeLists.txt
++++ b/crypto/CMakeLists.txt
+@@ -195,6 +195,14 @@ if(neon)
+       int main(void) { r = vmull_p64(a, b); r = vmull_high_p64(u, v); }"
+     ADD_SOURCES_IF_SUCCESSFUL aesgcm-neon.c)
+ 
++  test_compile_with_flags(HAVE_NEON_VADDQ_P128
++    GNU_FLAGS -march=armv8-a+crypto
++    MSVC_FLAGS -D_ARM_USE_NEW_NEON_INTRINSICS
++    TEST_SOURCE "
++      #include <${neon_header}>
++      volatile poly128_t r;
++      int main(void) { r = vaddq_p128(r, r); }")
++
+   # The 'sha3' architecture extension, despite the name, includes
+   # support for SHA-512 (from the SHA-2 standard) as well as SHA-3
+   # proper.
+diff --git a/crypto/aesgcm-neon.c b/crypto/aesgcm-neon.c
+index dd7b83cc..64bc8349 100644
+--- ./crypto/aesgcm-neon.c
++++ b/crypto/aesgcm-neon.c
+@@ -87,6 +87,14 @@ static inline void store_p128_be(void *p, poly128_t v)
+     vst1q_u8(p, vrev64q_u8(vreinterpretq_u8_p128(swapped)));
+ }
+ 
++#if !HAVE_NEON_VADDQ_P128
++static inline poly128_t vaddq_p128(poly128_t a, poly128_t b)
++{
++    return vreinterpretq_p128_u32(veorq_u32(
++        vreinterpretq_u32_p128(a), vreinterpretq_u32_p128(b)));
++}
++#endif
++
+ /*
+  * Key setup is just like in aesgcm-ref-poly.c. There's no point using
+  * vector registers to accelerate this, because it happens rarely.