git: b8bf1cfd41e7 - main - biology/mmseqs2: Enable at least SSE2 by default

Jason W. Bacon jwb at FreeBSD.org
Sat Jun 26 16:31:26 UTC 2021


The branch main has been updated by jwb:

URL: https://cgit.FreeBSD.org/ports/commit/?id=b8bf1cfd41e7503311351d0246b328c7abcea8b1

commit b8bf1cfd41e7503311351d0246b328c7abcea8b1
Author:     Jason W. Bacon <jwb at FreeBSD.org>
AuthorDate: 2021-06-26 16:29:25 +0000
Commit:     Jason W. Bacon <jwb at FreeBSD.org>
CommitDate: 2021-06-26 16:29:25 +0000

    biology/mmseqs2: Enable at least SSE2 by default
    
    Hangs when built with clang and minimal optimizations, so build with
    GCC temporarily.  Upstream is investigating.
---
 biology/mmseqs2/Makefile                           | 26 +++++++++++++++++++---
 biology/mmseqs2/distinfo                           |  2 +-
 .../files/patch-data_workflow_createtaxdb.sh       | 21 +++++++++++++++++
 .../mmseqs2/files/patch-data_workflow_databases.sh | 21 +++++++++++++++++
 .../mmseqs2/files/patch-src_commons_DBReader.cpp   | 11 +++++++++
 biology/mmseqs2/pkg-message                        | 13 +++++++++++
 6 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/biology/mmseqs2/Makefile b/biology/mmseqs2/Makefile
index 6ec07e428c72..755ed6f7aaa0 100644
--- a/biology/mmseqs2/Makefile
+++ b/biology/mmseqs2/Makefile
@@ -1,5 +1,6 @@
 PORTNAME=	MMseqs2
 DISTVERSION=	13-45111
+PORTREVISION=	1
 CATEGORIES=	biology
 
 MAINTAINER=	jwb at FreeBSD.org
@@ -8,15 +9,34 @@ COMMENT=	Ultra fast and sensitive sequence search and clustering suite
 LICENSE=	GPLv3
 LICENSE_FILE=	${WRKSRC}/LICENSE.md
 
-USES=		cmake perl5 shebangfix
+BROKEN_i386=	https://github.com/soedinglab/MMseqs2/issues/418
 
-SHEBANG_GLOB=	*.sh
+LIB_DEPENDS=	libzstd.so:archivers/zstd
 
+USES=		cmake perl5 shebangfix
 USE_GITHUB=	yes
+USE_PERL5=	build
+
+SHEBANG_GLOB=	*.sh
 GH_ACCOUNT=	soedinglab
 
+CMAKE_ARGS+=	-DUSE_SYSTEM_ZSTD:BOOL=ON
+LDFLAGS+=	-lpthread
+
 PLIST_FILES=	bin/mmseqs ${DATADIR}/bash-completion.sh
 
+.include <bsd.port.pre.mk>
+
+# Hangs when built with clang and SSE or x86-64 only.  Works fine with
+# -march=native.  Upstream is investigating.  To build fully optimized,
+# non-portable binary with clang: env CFLAGS='-O2 -march=native' make
+.if ${ARCH} == "amd64" && empty(CFLAGS:M*march=native*)
+USE_GCC=	yes
+# Need at least SSE2 for decent performance.  x86-64 groups SSE2 with other
+# common features in low-end AMD64 CPUs.
+CFLAGS+=	-march=x86-64
+.endif
+
 post-patch:
 	@${REINPLACE_CMD} -e 's|MMSEQS_HOME/util|${DATADIR}|g' \
 		${WRKSRC}/src/commons/Application.cpp
@@ -24,4 +44,4 @@ post-patch:
 post-stage:
 	${MV} ${STAGEDIR}${PREFIX}/util ${STAGEDIR}${DATADIR}
 
-.include <bsd.port.mk>
+.include <bsd.port.post.mk>
diff --git a/biology/mmseqs2/distinfo b/biology/mmseqs2/distinfo
index ec7fbc69cf89..e29d9fdcbf95 100644
--- a/biology/mmseqs2/distinfo
+++ b/biology/mmseqs2/distinfo
@@ -1,3 +1,3 @@
-TIMESTAMP = 1624543564
+TIMESTAMP = 1624667730
 SHA256 (soedinglab-MMseqs2-13-45111_GH0.tar.gz) = 6444bb682ebf5ced54b2eda7a301fa3e933c2a28b7661f96ef5bdab1d53695a2
 SIZE (soedinglab-MMseqs2-13-45111_GH0.tar.gz) = 10196433
diff --git a/biology/mmseqs2/files/patch-data_workflow_createtaxdb.sh b/biology/mmseqs2/files/patch-data_workflow_createtaxdb.sh
new file mode 100644
index 000000000000..762b7a1addc2
--- /dev/null
+++ b/biology/mmseqs2/files/patch-data_workflow_createtaxdb.sh
@@ -0,0 +1,21 @@
+--- data/workflow/createtaxdb.sh.orig	2021-06-25 01:33:07 UTC
++++ data/workflow/createtaxdb.sh
+@@ -27,6 +27,8 @@ STRATEGY=""
+ if hasCommand aria2c; then STRATEGY="$STRATEGY ARIA"; fi
+ if hasCommand curl;   then STRATEGY="$STRATEGY CURL"; fi
+ if hasCommand wget;   then STRATEGY="$STRATEGY WGET"; fi
++# Part of FreeBSD base, need not be installed separately
++if hasCommand fetch;  then STRATEGY="$STRATEGY FETCH"; fi
+ if [ "$STRATEGY" = "" ]; then
+     fail "No download tool found in PATH. Please install aria2c, curl or wget."
+ fi
+@@ -47,6 +49,9 @@ downloadFile() {
+             ;;
+         WGET)
+             wget -O "$OUTPUT" "$URL" && return 0
++            ;;
++        FETCH)
++            fetch -o "$OUTPUT" "$URL" && return 0
+             ;;
+         esac
+     done
diff --git a/biology/mmseqs2/files/patch-data_workflow_databases.sh b/biology/mmseqs2/files/patch-data_workflow_databases.sh
new file mode 100644
index 000000000000..2b0a30427958
--- /dev/null
+++ b/biology/mmseqs2/files/patch-data_workflow_databases.sh
@@ -0,0 +1,21 @@
+--- data/workflow/databases.sh.orig	2021-06-25 01:34:08 UTC
++++ data/workflow/databases.sh
+@@ -27,6 +27,8 @@ STRATEGY=""
+ if hasCommand aria2c; then STRATEGY="$STRATEGY ARIA"; fi
+ if hasCommand curl;   then STRATEGY="$STRATEGY CURL"; fi
+ if hasCommand wget;   then STRATEGY="$STRATEGY WGET"; fi
++# Part of FreeBSD base, need not be installed separately
++if hasCommand fetch;  then STRATEGY="$STRATEGY FETCH"; fi
+ if [ "$STRATEGY" = "" ]; then
+     fail "No download tool found in PATH. Please install aria2c, curl or wget."
+ fi
+@@ -47,6 +49,9 @@ downloadFile() {
+             ;;
+         WGET)
+             wget -O "$OUTPUT" "$URL" && return 0
++            ;;
++        FETCH)
++            fetch -o "$OUTPUT" "$URL" && return 0
+             ;;
+         esac
+     done
diff --git a/biology/mmseqs2/files/patch-src_commons_DBReader.cpp b/biology/mmseqs2/files/patch-src_commons_DBReader.cpp
new file mode 100644
index 000000000000..212b3e2ecf59
--- /dev/null
+++ b/biology/mmseqs2/files/patch-src_commons_DBReader.cpp
@@ -0,0 +1,11 @@
+--- src/commons/DBReader.cpp.orig	2021-06-25 22:40:36 UTC
++++ src/commons/DBReader.cpp
+@@ -1004,7 +1004,7 @@ void DBReader<T>::setSequentialAdvice() {
+ #ifdef HAVE_POSIX_MADVISE
+     for(size_t i = 0; i < dataFileCnt; i++){
+         size_t dataSize = dataSizeOffset[i+1] - dataSizeOffset[i];
+-        if (posix_madvise (dataFiles[i], dataSize, POSIX_MADV_SEQUENTIAL) != 0){
++        if (dataSize > 0 && posix_madvise (dataFiles[i], dataSize, POSIX_MADV_SEQUENTIAL) != 0){
+             Debug(Debug::ERROR) << "posix_madvise returned an error " << dataFileName << "\n";
+         }
+     }
diff --git a/biology/mmseqs2/pkg-message b/biology/mmseqs2/pkg-message
new file mode 100644
index 000000000000..d60942661331
--- /dev/null
+++ b/biology/mmseqs2/pkg-message
@@ -0,0 +1,13 @@
+[
+{ type: install
+  message: <<EOM
+
+MMseqs2 can benefit greatly from advanced CPU features such as AVX.
+Consider setting additional optimizations such as -march=native (e.g.
+in make.conf) and reinstalling from source via
+
+cd ${PORTSDIR}/biology/mmseqs2 && make install
+
+EOM
+}
+]


More information about the dev-commits-ports-all mailing list