git: b8bf1cfd41e7 - main - biology/mmseqs2: Enable at least SSE2 by default
Jason W. Bacon
jwb at FreeBSD.org
Sat Jun 26 16:31:26 UTC 2021
The branch main has been updated by jwb:
URL: https://cgit.FreeBSD.org/ports/commit/?id=b8bf1cfd41e7503311351d0246b328c7abcea8b1
commit b8bf1cfd41e7503311351d0246b328c7abcea8b1
Author: Jason W. Bacon <jwb at FreeBSD.org>
AuthorDate: 2021-06-26 16:29:25 +0000
Commit: Jason W. Bacon <jwb at FreeBSD.org>
CommitDate: 2021-06-26 16:29:25 +0000
biology/mmseqs2: Enable at least SSE2 by default
Hangs when built with clang and minimal optimizations, so build with
GCC temporarily. Upstream is investigating.
---
biology/mmseqs2/Makefile | 26 +++++++++++++++++++---
biology/mmseqs2/distinfo | 2 +-
.../files/patch-data_workflow_createtaxdb.sh | 21 +++++++++++++++++
.../mmseqs2/files/patch-data_workflow_databases.sh | 21 +++++++++++++++++
.../mmseqs2/files/patch-src_commons_DBReader.cpp | 11 +++++++++
biology/mmseqs2/pkg-message | 13 +++++++++++
6 files changed, 90 insertions(+), 4 deletions(-)
diff --git a/biology/mmseqs2/Makefile b/biology/mmseqs2/Makefile
index 6ec07e428c72..755ed6f7aaa0 100644
--- a/biology/mmseqs2/Makefile
+++ b/biology/mmseqs2/Makefile
@@ -1,5 +1,6 @@
PORTNAME= MMseqs2
DISTVERSION= 13-45111
+PORTREVISION= 1
CATEGORIES= biology
MAINTAINER= jwb at FreeBSD.org
@@ -8,15 +9,34 @@ COMMENT= Ultra fast and sensitive sequence search and clustering suite
LICENSE= GPLv3
LICENSE_FILE= ${WRKSRC}/LICENSE.md
-USES= cmake perl5 shebangfix
+BROKEN_i386= https://github.com/soedinglab/MMseqs2/issues/418
-SHEBANG_GLOB= *.sh
+LIB_DEPENDS= libzstd.so:archivers/zstd
+USES= cmake perl5 shebangfix
USE_GITHUB= yes
+USE_PERL5= build
+
+SHEBANG_GLOB= *.sh
GH_ACCOUNT= soedinglab
+CMAKE_ARGS+= -DUSE_SYSTEM_ZSTD:BOOL=ON
+LDFLAGS+= -lpthread
+
PLIST_FILES= bin/mmseqs ${DATADIR}/bash-completion.sh
+.include <bsd.port.pre.mk>
+
+# Hangs when built with clang and SSE or x86-64 only. Works fine with
+# -march=native. Upstream is investigating. To build fully optimized,
+# non-portable binary with clang: env CFLAGS='-O2 -march=native' make
+.if ${ARCH} == "amd64" && empty(CFLAGS:M*march=native*)
+USE_GCC= yes
+# Need at least SSE2 for decent performance. x86-64 groups SSE2 with other
+# common features in low-end AMD64 CPUs.
+CFLAGS+= -march=x86-64
+.endif
+
post-patch:
@${REINPLACE_CMD} -e 's|MMSEQS_HOME/util|${DATADIR}|g' \
${WRKSRC}/src/commons/Application.cpp
@@ -24,4 +44,4 @@ post-patch:
post-stage:
${MV} ${STAGEDIR}${PREFIX}/util ${STAGEDIR}${DATADIR}
-.include <bsd.port.mk>
+.include <bsd.port.post.mk>
diff --git a/biology/mmseqs2/distinfo b/biology/mmseqs2/distinfo
index ec7fbc69cf89..e29d9fdcbf95 100644
--- a/biology/mmseqs2/distinfo
+++ b/biology/mmseqs2/distinfo
@@ -1,3 +1,3 @@
-TIMESTAMP = 1624543564
+TIMESTAMP = 1624667730
SHA256 (soedinglab-MMseqs2-13-45111_GH0.tar.gz) = 6444bb682ebf5ced54b2eda7a301fa3e933c2a28b7661f96ef5bdab1d53695a2
SIZE (soedinglab-MMseqs2-13-45111_GH0.tar.gz) = 10196433
diff --git a/biology/mmseqs2/files/patch-data_workflow_createtaxdb.sh b/biology/mmseqs2/files/patch-data_workflow_createtaxdb.sh
new file mode 100644
index 000000000000..762b7a1addc2
--- /dev/null
+++ b/biology/mmseqs2/files/patch-data_workflow_createtaxdb.sh
@@ -0,0 +1,21 @@
+--- data/workflow/createtaxdb.sh.orig 2021-06-25 01:33:07 UTC
++++ data/workflow/createtaxdb.sh
+@@ -27,6 +27,8 @@ STRATEGY=""
+ if hasCommand aria2c; then STRATEGY="$STRATEGY ARIA"; fi
+ if hasCommand curl; then STRATEGY="$STRATEGY CURL"; fi
+ if hasCommand wget; then STRATEGY="$STRATEGY WGET"; fi
++# Part of FreeBSD base, need not be installed separately
++if hasCommand fetch; then STRATEGY="$STRATEGY FETCH"; fi
+ if [ "$STRATEGY" = "" ]; then
+ fail "No download tool found in PATH. Please install aria2c, curl or wget."
+ fi
+@@ -47,6 +49,9 @@ downloadFile() {
+ ;;
+ WGET)
+ wget -O "$OUTPUT" "$URL" && return 0
++ ;;
++ FETCH)
++ fetch -o "$OUTPUT" "$URL" && return 0
+ ;;
+ esac
+ done
diff --git a/biology/mmseqs2/files/patch-data_workflow_databases.sh b/biology/mmseqs2/files/patch-data_workflow_databases.sh
new file mode 100644
index 000000000000..2b0a30427958
--- /dev/null
+++ b/biology/mmseqs2/files/patch-data_workflow_databases.sh
@@ -0,0 +1,21 @@
+--- data/workflow/databases.sh.orig 2021-06-25 01:34:08 UTC
++++ data/workflow/databases.sh
+@@ -27,6 +27,8 @@ STRATEGY=""
+ if hasCommand aria2c; then STRATEGY="$STRATEGY ARIA"; fi
+ if hasCommand curl; then STRATEGY="$STRATEGY CURL"; fi
+ if hasCommand wget; then STRATEGY="$STRATEGY WGET"; fi
++# Part of FreeBSD base, need not be installed separately
++if hasCommand fetch; then STRATEGY="$STRATEGY FETCH"; fi
+ if [ "$STRATEGY" = "" ]; then
+ fail "No download tool found in PATH. Please install aria2c, curl or wget."
+ fi
+@@ -47,6 +49,9 @@ downloadFile() {
+ ;;
+ WGET)
+ wget -O "$OUTPUT" "$URL" && return 0
++ ;;
++ FETCH)
++ fetch -o "$OUTPUT" "$URL" && return 0
+ ;;
+ esac
+ done
diff --git a/biology/mmseqs2/files/patch-src_commons_DBReader.cpp b/biology/mmseqs2/files/patch-src_commons_DBReader.cpp
new file mode 100644
index 000000000000..212b3e2ecf59
--- /dev/null
+++ b/biology/mmseqs2/files/patch-src_commons_DBReader.cpp
@@ -0,0 +1,11 @@
+--- src/commons/DBReader.cpp.orig 2021-06-25 22:40:36 UTC
++++ src/commons/DBReader.cpp
+@@ -1004,7 +1004,7 @@ void DBReader<T>::setSequentialAdvice() {
+ #ifdef HAVE_POSIX_MADVISE
+ for(size_t i = 0; i < dataFileCnt; i++){
+ size_t dataSize = dataSizeOffset[i+1] - dataSizeOffset[i];
+- if (posix_madvise (dataFiles[i], dataSize, POSIX_MADV_SEQUENTIAL) != 0){
++ if (dataSize > 0 && posix_madvise (dataFiles[i], dataSize, POSIX_MADV_SEQUENTIAL) != 0){
+ Debug(Debug::ERROR) << "posix_madvise returned an error " << dataFileName << "\n";
+ }
+ }
diff --git a/biology/mmseqs2/pkg-message b/biology/mmseqs2/pkg-message
new file mode 100644
index 000000000000..d60942661331
--- /dev/null
+++ b/biology/mmseqs2/pkg-message
@@ -0,0 +1,13 @@
+[
+{ type: install
+ message: <<EOM
+
+MMseqs2 can benefit greatly from advanced CPU features such as AVX.
+Consider setting additional optimizations such as -march=native (e.g.
+in make.conf) and reinstalling from source via
+
+cd ${PORTSDIR}/biology/mmseqs2 && make install
+
+EOM
+}
+]
More information about the dev-commits-ports-all
mailing list