git: cf14bbb325bb - main - biology/mmseqs2: Ultra fast and sensitive sequence search and clustering suite

Jason W. Bacon jwb at FreeBSD.org
Thu Jun 24 17:32:15 UTC 2021


The branch main has been updated by jwb:

URL: https://cgit.FreeBSD.org/ports/commit/?id=cf14bbb325bb8696c275ebbabff3da95a75815eb

commit cf14bbb325bb8696c275ebbabff3da95a75815eb
Author:     Jason W. Bacon <jwb at FreeBSD.org>
AuthorDate: 2021-06-24 17:29:51 +0000
Commit:     Jason W. Bacon <jwb at FreeBSD.org>
CommitDate: 2021-06-24 17:31:42 +0000

    biology/mmseqs2: Ultra fast and sensitive sequence search and clustering suite
    
    MMseqs2 (Many-against-Many sequence searching) is a software suite to search
    and cluster huge protein and nucleotide sequence sets. MMseqs2 is open source
    GPL-licensed software implemented in C++ for FreeBSD, Linux, MacOS, and (via
    via cygwin) Windows. The software is designed to run on multiple cores and
    servers and exhibits very good scalability. MMseqs2 can run 10000 times
    faster than BLAST. At 100 times its speed it achieves almost the same
    sensitivity. It can perform profile searches with the same sensitivity as
    PSI-BLAST at over 400 times its speed.
---
 biology/Makefile                           |  1 +
 biology/mmseqs2/Makefile                   | 27 +++++++++++++++
 biology/mmseqs2/distinfo                   |  3 ++
 biology/mmseqs2/files/patch-CMakeLists.txt | 53 ++++++++++++++++++++++++++++++
 biology/mmseqs2/pkg-descr                  | 10 ++++++
 5 files changed, 94 insertions(+)

diff --git a/biology/Makefile b/biology/Makefile
index 158eb70ede1a..55ed6b8c2735 100644
--- a/biology/Makefile
+++ b/biology/Makefile
@@ -77,6 +77,7 @@
     SUBDIR += mapm3
     SUBDIR += migrate
     SUBDIR += minimap2
+    SUBDIR += mmseqs2
     SUBDIR += molden
     SUBDIR += mopac
     SUBDIR += mothur
diff --git a/biology/mmseqs2/Makefile b/biology/mmseqs2/Makefile
new file mode 100644
index 000000000000..6ec07e428c72
--- /dev/null
+++ b/biology/mmseqs2/Makefile
@@ -0,0 +1,27 @@
+PORTNAME=	MMseqs2
+DISTVERSION=	13-45111
+CATEGORIES=	biology
+
+MAINTAINER=	jwb at FreeBSD.org
+COMMENT=	Ultra fast and sensitive sequence search and clustering suite
+
+LICENSE=	GPLv3
+LICENSE_FILE=	${WRKSRC}/LICENSE.md
+
+USES=		cmake perl5 shebangfix
+
+SHEBANG_GLOB=	*.sh
+
+USE_GITHUB=	yes
+GH_ACCOUNT=	soedinglab
+
+PLIST_FILES=	bin/mmseqs ${DATADIR}/bash-completion.sh
+
+post-patch:
+	@${REINPLACE_CMD} -e 's|MMSEQS_HOME/util|${DATADIR}|g' \
+		${WRKSRC}/src/commons/Application.cpp
+
+post-stage:
+	${MV} ${STAGEDIR}${PREFIX}/util ${STAGEDIR}${DATADIR}
+
+.include <bsd.port.mk>
diff --git a/biology/mmseqs2/distinfo b/biology/mmseqs2/distinfo
new file mode 100644
index 000000000000..ec7fbc69cf89
--- /dev/null
+++ b/biology/mmseqs2/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1624543564
+SHA256 (soedinglab-MMseqs2-13-45111_GH0.tar.gz) = 6444bb682ebf5ced54b2eda7a301fa3e933c2a28b7661f96ef5bdab1d53695a2
+SIZE (soedinglab-MMseqs2-13-45111_GH0.tar.gz) = 10196433
diff --git a/biology/mmseqs2/files/patch-CMakeLists.txt b/biology/mmseqs2/files/patch-CMakeLists.txt
new file mode 100644
index 000000000000..908984972649
--- /dev/null
+++ b/biology/mmseqs2/files/patch-CMakeLists.txt
@@ -0,0 +1,53 @@
+--- CMakeLists.txt.orig	2021-06-24 14:37:49 UTC
++++ CMakeLists.txt
+@@ -50,30 +50,19 @@ set(MMSEQS_CXX_FLAGS "-fsigned-char")
+ # SIMD instruction sets support
+ set(MMSEQS_ARCH "")
+ if (HAVE_AVX2)
+-    if (CMAKE_COMPILER_IS_CLANG)
+-        set(MMSEQS_ARCH "${MMSEQS_ARCH} -mavx2 -mcx16")
+-    else ()
+-        set(MMSEQS_ARCH "${MMSEQS_ARCH} -mavx2 -mcx16 -Wa,-q")
+-    endif ()
+     set(X64 1)
+ elseif (HAVE_SSE4_1)
+-    set(MMSEQS_ARCH "${MMSEQS_ARCH} -msse4.1 -mcx16")
+     set(X64 1)
+ elseif (HAVE_SSE2)
+-    set(MMSEQS_ARCH "${MMSEQS_ARCH} -msse2")
+     set(DISABLE_IPS4O 1)
+     set(X64 1)
+ elseif (HAVE_POWER9)
+-    set(MMSEQS_ARCH "${MMSEQS_ARCH} -mcpu=power9 -mvsx")
+     set(PPC64 1)
+ elseif (HAVE_POWER8)
+-    set(MMSEQS_ARCH "${MMSEQS_ARCH} -mcpu=power8 -mvsx")
+     set(PPC64 1)
+ elseif (HAVE_ARM8)
+-    set(MMSEQS_ARCH "${MMSEQS_ARCH} -march=armv8-a+simd")
+     set(ARM 1)
+ elseif (HAVE_S390X)
+-    set(MMSEQS_ARCH "${MMSEQS_ARCH} -mzarch -march=z14")
+     set(ZARCH 1)
+ endif ()
+ 
+@@ -105,19 +94,6 @@ if (NATIVE_ARCH AND (MMSEQS_ARCH STREQUAL ""))
+         endif ()
+         if (PPC64)
+             set(MMSEQS_ARCH "-mcpu=native")
+-        else ()
+-            # clang has a problem with march=native on travis
+-            if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.0.0")
+-                set(MMSEQS_ARCH "${SSE_FLAGS}")
+-            else()
+-                set(MMSEQS_ARCH "-march=native")
+-            endif()
+-        endif ()
+-    else ()
+-        if (PPC64)
+-            set(MMSEQS_ARCH "-mcpu=native")
+-        else ()
+-            set(MMSEQS_ARCH "-march=native")
+         endif ()
+     endif ()
+ endif ()
diff --git a/biology/mmseqs2/pkg-descr b/biology/mmseqs2/pkg-descr
new file mode 100644
index 000000000000..128056f15f39
--- /dev/null
+++ b/biology/mmseqs2/pkg-descr
@@ -0,0 +1,10 @@
+MMseqs2 (Many-against-Many sequence searching) is a software suite to search
+and cluster huge protein and nucleotide sequence sets. MMseqs2 is open source
+GPL-licensed software implemented in C++ for Linux, MacOS, and (as beta
+version, via cygwin) Windows. The software is designed to run on multiple cores
+and servers and exhibits very good scalability. MMseqs2 can run 10000 times
+faster than BLAST. At 100 times its speed it achieves almost the same
+sensitivity. It can perform profile searches with the same sensitivity as
+PSI-BLAST at over 400 times its speed.
+
+WWW: https://github.com/soedinglab/MMseqs2


More information about the dev-commits-ports-all mailing list