git: 980463894015 - main - math/sfft: port to armv7/aarch64, touch up
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 21 Oct 2024 09:37:11 UTC
The branch main has been updated by fuz: URL: https://cgit.FreeBSD.org/ports/commit/?id=9804638940156bf9ec989aa6c4f3a19f164f4b3d commit 9804638940156bf9ec989aa6c4f3a19f164f4b3d Author: Robert Clausecker <fuz@FreeBSD.org> AuthorDate: 2024-10-15 16:25:07 +0000 Commit: Robert Clausecker <fuz@FreeBSD.org> CommitDate: 2024-10-21 09:36:01 +0000 math/sfft: port to armv7/aarch64, touch up - replace complex.h hack with less crude hack - use sse2neon to build on armv7/aarch64 - armv7 should work, but falls to an unrelated issue - touch up CFLAGS slightly - rework do-test --- math/sfft/Makefile | 57 +++++++++------------- .../files/patch-src__computefourier-1.0-2.0.cc | 10 ++-- math/sfft/files/patch-src__computefourier-3.0.cc | 10 ++-- math/sfft/files/patch-src_fft.h | 15 ++++++ math/sfft/files/patch-src_intrinsics.h | 13 +++++ math/sfft/files/patch-src_simulation.cc | 11 +++++ math/sfft/files/patch-src_timing__many.cc | 11 +++++ 7 files changed, 82 insertions(+), 45 deletions(-) diff --git a/math/sfft/Makefile b/math/sfft/Makefile index 2a1f158197f6..18373880bb4c 100644 --- a/math/sfft/Makefile +++ b/math/sfft/Makefile @@ -1,6 +1,6 @@ PORTNAME= sfft -PORTVERSION= 0.1.0 -PORTREVISION= 12 +DISTVERSION= 0.1.0 +PORTREVISION= 13 CATEGORIES= math MASTER_SITES= http://spiral.net/software/sfft/ LOCAL/bf DISTFILES= ${DISTNAME}${EXTRACT_SUFX} @@ -12,12 +12,18 @@ WWW= https://spiral.net/software/sfft.html LICENSE= GPLv2 -ONLY_FOR_ARCHS= amd64 i386 +ONLY_FOR_ARCHS= aarch64 amd64 armv7 i386 ONLY_FOR_ARCHS_REASON= requires SSE instructions, which are x86-specific +BROKEN_armv7= /usr/local/bin/ld: error: unsupported option: -z relro + +BUILD_DEPENDS_aarch64= ${LOCALBASE}/include/sse2neon.h:devel/sse2neon +BUILD_DEPENDS_armv7= ${BUILD_DEPENDS_aarch64} +BUILD_DEPENDS+= ${BUILD_DEPENDS_${ARCH}} + LIB_DEPENDS= libfftw3.so:math/fftw3 -USES= uidfix zip +USES= localbase:ldflags uidfix zip USE_GCC= yes USE_LDCONFIG= yes @@ -26,8 +32,10 @@ OPTIONS_DEFAULT= OPTIMIZED_CFLAGS BUILD_WRKSRC= ${WRKSRC}/src INSTALL_WRKSRC= ${BUILD_WRKSRC} -CFLAGS+= -fopenmp -msse2 -Iflopcount -I${LOCALBASE}/include -LDFLAGS+= -L${LOCALBASE}/lib +CFLAGS_armv7= -mfpu=neon +CFLAGS_i386= -msse2 +CFLAGS+= -fopenmp -Iflopcount +CXXFLAGS+= ${CFLAGS_${ARCH}} -fopenmp -Iflopcount HEADERS= sfft.h HDIR= include/sfft MAKE_ENV= LDADD="-lfftw3 ${LIBM}" LIB=sfft SHLIB_MAJOR="${SHLIB_MAJOR}" \ @@ -43,11 +51,6 @@ SRCS= common.cc computefourier-1.0-2.0.cc \ .include <bsd.port.options.mk> -.if !${ARCH:Mamd64} && !${MACHINE_CPU:Msse2} -IGNORE= this port requires SSE2, and benefits from SSE3 -- set CPUTYPE\ -appropriately -.endif - LIBM= -lm .if ${PORT_OPTIONS:MDOCS} || make(makesum) @@ -73,36 +76,20 @@ MAKE_ENV+= WITHOUT_PROFILE=yes .endif post-extract: - @${CP} /usr/include/complex.h ${BUILD_WRKSRC}/sfftcomplex.h @${PRINTF} "LIBDIR=\t${PREFIX}/lib\n.include <bsd.lib.mk>\n" > \ ${BUILD_WRKSRC}/Makefile -post-patch: - @${REINPLACE_CMD} -e 's/string\.h/cstring/' \ - ${WRKSRC}/src/utils.cc - @${REINPLACE_CMD} -E -e '/<complex\.h>/ \ - {s/<complex\.h>/ "sfftcomplex.h"/; x ; \ - s|^.*$$|#endif|; G; x; \ - s|^.*$$|extern "C" {|; G; x; \ - s|^.*$$|#ifdef __cplusplus|; G; x; \ - s|^.*$$|#ifdef __cplusplus|; H; \ - s|^.*$$|}|; H; \ - s|^.*$$|#endif|; H; x;}' \ - ${WRKSRC}/src/computefourier-1.0-2.0.h \ - ${WRKSRC}/src/computefourier-3.0.h \ - ${WRKSRC}/src/fft.h - CORELIMIT?= /usr/bin/limits -Sc 0 do-test: - @cd ${BUILD_WRKSRC}; \ - ${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \ - ${LDFLAGS} libsfft.a -lfftw3 ${LIBM} ; \ - for _v in 1 2 3 ; do \ - for _k in 5 10 50; do \ - echo "Checking sfft version $${_v} with $${_k} frequency components:"; \ - ${CORELIMIT} ./sfft-verification -k $${_k} -r 3 -v $${_v} || ${TRUE} ; \ - done ; done + cd ${BUILD_WRKSRC} && ${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \ + ${LDFLAGS} libsfft.a -lfftw3 ${LIBM} +.for v in 1 2 3 +. for k in 5 10 50 + @${ECHO_CMD} "Checking sfft version $v with $k frequency components:" + cd ${BUILD_WRKSRC} && ${CORELIMIT} ./sfft-verification -k $k -r 3 -v $v || ${TRUE} +. endfor +.endfor post-install: @${MKDIR} ${STAGEDIR}${PREFIX}/${HDIR} diff --git a/math/sfft/files/patch-src__computefourier-1.0-2.0.cc b/math/sfft/files/patch-src__computefourier-1.0-2.0.cc index 09b1e51d5b29..d7a691a639bb 100644 --- a/math/sfft/files/patch-src__computefourier-1.0-2.0.cc +++ b/math/sfft/files/patch-src__computefourier-1.0-2.0.cc @@ -1,6 +1,6 @@ ---- src/computefourier-1.0-2.0.cc.orig 2013-06-13 08:12:25.000000000 -0400 -+++ src/computefourier-1.0-2.0.cc 2013-08-09 00:26:54.000000000 -0400 -@@ -248,8 +248,13 @@ +--- src/computefourier-1.0-2.0.cc.orig 2013-06-13 12:12:25 UTC ++++ src/computefourier-1.0-2.0.cc +@@ -248,8 +248,13 @@ inner_loop_locate(sfft_v1v2_data * data, complex_t * o __m128d ad_bc = _mm_mul_pd(ab, dc); __m128d ac_mbd = _mm_mul_pd(ac_bd, signs); @@ -15,7 +15,7 @@ unsigned int i_mod_B_p_offset = (i & B2_m_1) + offset; __m128d xy = _mm_load_pd(d_x_sampt + i_mod_B_p_offset); __m128d st = _mm_add_pd(xy, ab_times_cd); -@@ -283,7 +288,13 @@ +@@ -283,7 +288,13 @@ inner_loop_locate(sfft_v1v2_data * data, complex_t * o __m128d ab_square = _mm_mul_pd(ab, ab); __m128d cd_square = _mm_mul_pd(cd, cd); @@ -29,7 +29,7 @@ _mm_store_pd(samples + j, r); } -@@ -390,11 +401,23 @@ +@@ -390,11 +401,23 @@ estimate_values(sfft_v1v2_data * data, const int *hits __m128d ad_bc = _mm_mul_pd(ab, dc); __m128d mad_bc = _mm_mul_pd(ad_bc, signs); diff --git a/math/sfft/files/patch-src__computefourier-3.0.cc b/math/sfft/files/patch-src__computefourier-3.0.cc index aba106fdd70a..e1653f6f1de0 100644 --- a/math/sfft/files/patch-src__computefourier-3.0.cc +++ b/math/sfft/files/patch-src__computefourier-3.0.cc @@ -1,6 +1,6 @@ ---- src/computefourier-3.0.cc.orig 2013-06-13 08:12:26.000000000 -0400 -+++ src/computefourier-3.0.cc 2013-08-10 17:02:52.000000000 -0400 -@@ -416,27 +416,64 @@ +--- src/computefourier-3.0.cc.orig 2013-06-13 12:12:26 UTC ++++ src/computefourier-3.0.cc +@@ -416,27 +416,64 @@ update_gaussian_loops2(int key, complex_t value, compl __m128d t1r = _mm_mul_pd(v1r, ab31); __m128d t1i = _mm_mul_pd(v1i, ba31); @@ -65,7 +65,7 @@ FLOPCOUNT_INCREMENT(6 * (4 + 2)); -@@ -524,11 +561,28 @@ +@@ -524,11 +561,28 @@ estimate_freq_gauss_loops2(sfft_v3_data * data, int WH __m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3); FLOPCOUNT_INCREMENT(8); @@ -94,7 +94,7 @@ FLOPCOUNT_INCREMENT(1); _mm_store_pd(zero_buck_check, zbc); -@@ -681,13 +735,35 @@ +@@ -681,13 +735,35 @@ estimate_freq_mansour_loops2(sfft_v3_data * data, int __m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3); FLOPCOUNT_INCREMENT(8); diff --git a/math/sfft/files/patch-src_fft.h b/math/sfft/files/patch-src_fft.h new file mode 100644 index 000000000000..0c3f9d3483a9 --- /dev/null +++ b/math/sfft/files/patch-src_fft.h @@ -0,0 +1,15 @@ +--- src/fft.h.orig 2024-10-15 16:15:00 UTC ++++ src/fft.h +@@ -32,6 +32,12 @@ + //#define USE_FLOAT + #define USE_DOUBLE + ++#ifdef __cplusplus ++#define complex __complex__ ++#undef I ++#define I ((float __complex__)1.0j) ++#endif ++ + #ifdef USE_FLOAT + typedef float complex complex_t; + typedef float real_t; diff --git a/math/sfft/files/patch-src_intrinsics.h b/math/sfft/files/patch-src_intrinsics.h new file mode 100644 index 000000000000..e721bb6706aa --- /dev/null +++ b/math/sfft/files/patch-src_intrinsics.h @@ -0,0 +1,13 @@ +--- src/intrinsics.h.orig 2024-10-15 15:55:56 UTC ++++ src/intrinsics.h +@@ -20,7 +20,9 @@ + */ + + +-#if defined(__ICC) ++#if defined(__arm__) || defined(__aarch64__) ++#include <sse2neon.h> ++#elif defined(__ICC) + #include <xmmintrin.h> + #elif defined(__GNUC__) + #include <x86intrin.h> diff --git a/math/sfft/files/patch-src_simulation.cc b/math/sfft/files/patch-src_simulation.cc new file mode 100644 index 000000000000..c97106ae1a38 --- /dev/null +++ b/math/sfft/files/patch-src_simulation.cc @@ -0,0 +1,11 @@ +--- src/simulation.cc.orig 2024-10-15 16:05:30 UTC ++++ src/simulation.cc +@@ -50,7 +50,7 @@ void simulation::setup(int argc, char **argv) + int version = 1; + int fftw_opt = FFTW_ESTIMATE; + +- char ch; ++ int ch; + while ((ch = getopt(argc, argv, "hton:k:r:v:")) != EOF) + { + switch (ch) diff --git a/math/sfft/files/patch-src_timing__many.cc b/math/sfft/files/patch-src_timing__many.cc new file mode 100644 index 000000000000..15b475a5eab6 --- /dev/null +++ b/math/sfft/files/patch-src_timing__many.cc @@ -0,0 +1,11 @@ +--- src/timing_many.cc.orig 2024-10-15 16:05:53 UTC ++++ src/timing_many.cc +@@ -51,7 +51,7 @@ parse_arguments(int argc, char **argv, int *n, int *k, + parse_arguments(int argc, char **argv, int *n, int *k, int *num_inputs, + int *version, int *fftw_opt, bool * simple_parallelism) + { +- char ch; ++ int ch; + while ((ch = getopt(argc, argv, "htosi:n:k:v:")) != EOF) + { + switch (ch)