git: 980463894015 - main - math/sfft: port to armv7/aarch64, touch up

From: Robert Clausecker <fuz_at_FreeBSD.org>
Date: Mon, 21 Oct 2024 09:37:11 UTC
The branch main has been updated by fuz:

URL: https://cgit.FreeBSD.org/ports/commit/?id=9804638940156bf9ec989aa6c4f3a19f164f4b3d

commit 9804638940156bf9ec989aa6c4f3a19f164f4b3d
Author:     Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2024-10-15 16:25:07 +0000
Commit:     Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2024-10-21 09:36:01 +0000

    math/sfft: port to armv7/aarch64, touch up
    
     - replace complex.h hack with less crude hack
     - use sse2neon to build on armv7/aarch64
     - armv7 should work, but falls to an unrelated issue
     - touch up CFLAGS slightly
     - rework do-test
---
 math/sfft/Makefile                                 | 57 +++++++++-------------
 .../files/patch-src__computefourier-1.0-2.0.cc     | 10 ++--
 math/sfft/files/patch-src__computefourier-3.0.cc   | 10 ++--
 math/sfft/files/patch-src_fft.h                    | 15 ++++++
 math/sfft/files/patch-src_intrinsics.h             | 13 +++++
 math/sfft/files/patch-src_simulation.cc            | 11 +++++
 math/sfft/files/patch-src_timing__many.cc          | 11 +++++
 7 files changed, 82 insertions(+), 45 deletions(-)

diff --git a/math/sfft/Makefile b/math/sfft/Makefile
index 2a1f158197f6..18373880bb4c 100644
--- a/math/sfft/Makefile
+++ b/math/sfft/Makefile
@@ -1,6 +1,6 @@
 PORTNAME=	sfft
-PORTVERSION=	0.1.0
-PORTREVISION=	12
+DISTVERSION=	0.1.0
+PORTREVISION=	13
 CATEGORIES=	math
 MASTER_SITES=	http://spiral.net/software/sfft/ LOCAL/bf
 DISTFILES=	${DISTNAME}${EXTRACT_SUFX}
@@ -12,12 +12,18 @@ WWW=		https://spiral.net/software/sfft.html
 
 LICENSE=	GPLv2
 
-ONLY_FOR_ARCHS=		amd64 i386
+ONLY_FOR_ARCHS=		aarch64 amd64 armv7 i386
 ONLY_FOR_ARCHS_REASON=	requires SSE instructions, which are x86-specific
 
+BROKEN_armv7=	/usr/local/bin/ld: error: unsupported option: -z relro
+
+BUILD_DEPENDS_aarch64=	${LOCALBASE}/include/sse2neon.h:devel/sse2neon
+BUILD_DEPENDS_armv7=	${BUILD_DEPENDS_aarch64}
+BUILD_DEPENDS+=	${BUILD_DEPENDS_${ARCH}}
+
 LIB_DEPENDS=	libfftw3.so:math/fftw3
 
-USES=		uidfix zip
+USES=		localbase:ldflags uidfix zip
 USE_GCC=	yes
 USE_LDCONFIG=	yes
 
@@ -26,8 +32,10 @@ OPTIONS_DEFAULT=	OPTIMIZED_CFLAGS
 
 BUILD_WRKSRC=	${WRKSRC}/src
 INSTALL_WRKSRC=	${BUILD_WRKSRC}
-CFLAGS+=	-fopenmp -msse2 -Iflopcount -I${LOCALBASE}/include
-LDFLAGS+=	-L${LOCALBASE}/lib
+CFLAGS_armv7=	-mfpu=neon
+CFLAGS_i386=	-msse2
+CFLAGS+=	-fopenmp -Iflopcount
+CXXFLAGS+=	${CFLAGS_${ARCH}} -fopenmp -Iflopcount
 HEADERS=	sfft.h
 HDIR=		include/sfft
 MAKE_ENV=	LDADD="-lfftw3 ${LIBM}" LIB=sfft SHLIB_MAJOR="${SHLIB_MAJOR}" \
@@ -43,11 +51,6 @@ SRCS=	common.cc computefourier-1.0-2.0.cc \
 
 .include <bsd.port.options.mk>
 
-.if !${ARCH:Mamd64} && !${MACHINE_CPU:Msse2}
-IGNORE=	this port requires SSE2, and benefits from SSE3 -- set CPUTYPE\
-appropriately
-.endif
-
 LIBM=	-lm
 
 .if ${PORT_OPTIONS:MDOCS} || make(makesum)
@@ -73,36 +76,20 @@ MAKE_ENV+=	WITHOUT_PROFILE=yes
 .endif
 
 post-extract:
-	@${CP} /usr/include/complex.h ${BUILD_WRKSRC}/sfftcomplex.h
 	@${PRINTF} "LIBDIR=\t${PREFIX}/lib\n.include <bsd.lib.mk>\n" > \
 		${BUILD_WRKSRC}/Makefile
 
-post-patch:
-	@${REINPLACE_CMD} -e 's/string\.h/cstring/' \
-		${WRKSRC}/src/utils.cc
-	@${REINPLACE_CMD} -E -e '/<complex\.h>/ \
-	{s/<complex\.h>/ "sfftcomplex.h"/; x ; \
-	s|^.*$$|#endif|; G; x; \
-	s|^.*$$|extern "C" {|; G; x; \
-	s|^.*$$|#ifdef __cplusplus|; G; x; \
-	s|^.*$$|#ifdef __cplusplus|; H; \
-	s|^.*$$|}|; H; \
-	s|^.*$$|#endif|; H; x;}' \
-		${WRKSRC}/src/computefourier-1.0-2.0.h \
-		${WRKSRC}/src/computefourier-3.0.h \
-		${WRKSRC}/src/fft.h
-
 CORELIMIT?=	/usr/bin/limits -Sc 0
 
 do-test:
-	@cd ${BUILD_WRKSRC}; \
-	${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \
-	${LDFLAGS} libsfft.a -lfftw3 ${LIBM} ; \
-	for _v in 1 2 3 ; do \
-	for _k in 5 10 50; do \
-	echo "Checking sfft version $${_v} with $${_k} frequency components:"; \
-	${CORELIMIT} ./sfft-verification -k $${_k} -r 3 -v $${_v} || ${TRUE} ; \
-	done ; done
+	cd ${BUILD_WRKSRC} && ${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \
+		${LDFLAGS} libsfft.a -lfftw3 ${LIBM}
+.for v in 1 2 3
+. for k in 5 10 50
+	@${ECHO_CMD} "Checking sfft version $v with $k frequency components:"
+	cd ${BUILD_WRKSRC} && ${CORELIMIT} ./sfft-verification -k $k -r 3 -v $v || ${TRUE}
+. endfor
+.endfor
 
 post-install:
 	@${MKDIR} ${STAGEDIR}${PREFIX}/${HDIR}
diff --git a/math/sfft/files/patch-src__computefourier-1.0-2.0.cc b/math/sfft/files/patch-src__computefourier-1.0-2.0.cc
index 09b1e51d5b29..d7a691a639bb 100644
--- a/math/sfft/files/patch-src__computefourier-1.0-2.0.cc
+++ b/math/sfft/files/patch-src__computefourier-1.0-2.0.cc
@@ -1,6 +1,6 @@
---- src/computefourier-1.0-2.0.cc.orig	2013-06-13 08:12:25.000000000 -0400
-+++ src/computefourier-1.0-2.0.cc	2013-08-09 00:26:54.000000000 -0400
-@@ -248,8 +248,13 @@
+--- src/computefourier-1.0-2.0.cc.orig	2013-06-13 12:12:25 UTC
++++ src/computefourier-1.0-2.0.cc
+@@ -248,8 +248,13 @@ inner_loop_locate(sfft_v1v2_data * data, complex_t * o
            __m128d ad_bc = _mm_mul_pd(ab, dc);
            __m128d ac_mbd = _mm_mul_pd(ac_bd, signs);
  
@@ -15,7 +15,7 @@
            unsigned int i_mod_B_p_offset = (i & B2_m_1) + offset;
            __m128d xy = _mm_load_pd(d_x_sampt + i_mod_B_p_offset);
            __m128d st = _mm_add_pd(xy, ab_times_cd);
-@@ -283,7 +288,13 @@
+@@ -283,7 +288,13 @@ inner_loop_locate(sfft_v1v2_data * data, complex_t * o
        __m128d ab_square = _mm_mul_pd(ab, ab);
        __m128d cd_square = _mm_mul_pd(cd, cd);
  
@@ -29,7 +29,7 @@
  
        _mm_store_pd(samples + j, r);
      }
-@@ -390,11 +401,23 @@
+@@ -390,11 +401,23 @@ estimate_values(sfft_v1v2_data * data, const int *hits
            __m128d ad_bc = _mm_mul_pd(ab, dc);
            __m128d mad_bc = _mm_mul_pd(ad_bc, signs);
  
diff --git a/math/sfft/files/patch-src__computefourier-3.0.cc b/math/sfft/files/patch-src__computefourier-3.0.cc
index aba106fdd70a..e1653f6f1de0 100644
--- a/math/sfft/files/patch-src__computefourier-3.0.cc
+++ b/math/sfft/files/patch-src__computefourier-3.0.cc
@@ -1,6 +1,6 @@
---- src/computefourier-3.0.cc.orig	2013-06-13 08:12:26.000000000 -0400
-+++ src/computefourier-3.0.cc	2013-08-10 17:02:52.000000000 -0400
-@@ -416,27 +416,64 @@
+--- src/computefourier-3.0.cc.orig	2013-06-13 12:12:26 UTC
++++ src/computefourier-3.0.cc
+@@ -416,27 +416,64 @@ update_gaussian_loops2(int key, complex_t value, compl
  
    __m128d t1r = _mm_mul_pd(v1r, ab31);
    __m128d t1i = _mm_mul_pd(v1i, ba31);
@@ -65,7 +65,7 @@
  
    FLOPCOUNT_INCREMENT(6 * (4 + 2));
  
-@@ -524,11 +561,28 @@
+@@ -524,11 +561,28 @@ estimate_freq_gauss_loops2(sfft_v3_data * data, int WH
        __m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3);
        FLOPCOUNT_INCREMENT(8);
  
@@ -94,7 +94,7 @@
        FLOPCOUNT_INCREMENT(1);
  
        _mm_store_pd(zero_buck_check, zbc);
-@@ -681,13 +735,35 @@
+@@ -681,13 +735,35 @@ estimate_freq_mansour_loops2(sfft_v3_data * data, int 
        __m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3);
        FLOPCOUNT_INCREMENT(8);
  
diff --git a/math/sfft/files/patch-src_fft.h b/math/sfft/files/patch-src_fft.h
new file mode 100644
index 000000000000..0c3f9d3483a9
--- /dev/null
+++ b/math/sfft/files/patch-src_fft.h
@@ -0,0 +1,15 @@
+--- src/fft.h.orig	2024-10-15 16:15:00 UTC
++++ src/fft.h
+@@ -32,6 +32,12 @@
+ //#define USE_FLOAT
+ #define USE_DOUBLE
+ 
++#ifdef __cplusplus
++#define complex __complex__
++#undef I
++#define I ((float __complex__)1.0j)
++#endif
++
+ #ifdef USE_FLOAT
+ typedef float complex complex_t;
+ typedef float real_t;
diff --git a/math/sfft/files/patch-src_intrinsics.h b/math/sfft/files/patch-src_intrinsics.h
new file mode 100644
index 000000000000..e721bb6706aa
--- /dev/null
+++ b/math/sfft/files/patch-src_intrinsics.h
@@ -0,0 +1,13 @@
+--- src/intrinsics.h.orig	2024-10-15 15:55:56 UTC
++++ src/intrinsics.h
+@@ -20,7 +20,9 @@
+  */
+ 
+ 
+-#if defined(__ICC)
++#if defined(__arm__) || defined(__aarch64__)
++#include <sse2neon.h>
++#elif defined(__ICC)
+ #include <xmmintrin.h>
+ #elif defined(__GNUC__)
+ #include <x86intrin.h>
diff --git a/math/sfft/files/patch-src_simulation.cc b/math/sfft/files/patch-src_simulation.cc
new file mode 100644
index 000000000000..c97106ae1a38
--- /dev/null
+++ b/math/sfft/files/patch-src_simulation.cc
@@ -0,0 +1,11 @@
+--- src/simulation.cc.orig	2024-10-15 16:05:30 UTC
++++ src/simulation.cc
+@@ -50,7 +50,7 @@ void simulation::setup(int argc, char **argv)
+   int version = 1;
+   int fftw_opt = FFTW_ESTIMATE;
+ 
+-  char ch;
++  int ch;
+   while ((ch = getopt(argc, argv, "hton:k:r:v:")) != EOF)
+     {
+       switch (ch)
diff --git a/math/sfft/files/patch-src_timing__many.cc b/math/sfft/files/patch-src_timing__many.cc
new file mode 100644
index 000000000000..15b475a5eab6
--- /dev/null
+++ b/math/sfft/files/patch-src_timing__many.cc
@@ -0,0 +1,11 @@
+--- src/timing_many.cc.orig	2024-10-15 16:05:53 UTC
++++ src/timing_many.cc
+@@ -51,7 +51,7 @@ parse_arguments(int argc, char **argv, int *n, int *k,
+ parse_arguments(int argc, char **argv, int *n, int *k, int *num_inputs,
+                 int *version, int *fftw_opt, bool * simple_parallelism)
+ {
+-  char ch;
++  int ch;
+   while ((ch = getopt(argc, argv, "htosi:n:k:v:")) != EOF)
+     {
+       switch (ch)