git: 4fd08b5074ce - main - biology/mmseqs2: Update to 16.747.c6

From: Jason W. Bacon <jwb_at_FreeBSD.org>
Date: Wed, 15 Jan 2025 15:07:23 UTC
The branch main has been updated by jwb:

URL: https://cgit.FreeBSD.org/ports/commit/?id=4fd08b5074ce3a5d4997f2837d86b406bcfb4cea

commit 4fd08b5074ce3a5d4997f2837d86b406bcfb4cea
Author:     Jason W. Bacon <jwb@FreeBSD.org>
AuthorDate: 2025-01-15 15:06:03 +0000
Commit:     Jason W. Bacon <jwb@FreeBSD.org>
CommitDate: 2025-01-15 15:06:03 +0000

    biology/mmseqs2: Update to 16.747.c6
    
    Numerous fixes and improvements since v13
    Changes: https://github.com/soedinglab/MMseqs2/releases
    
    PR:             283251
    Reported by:    alster@vinterdalen.se
---
 biology/mmseqs2/Makefile                           | 11 ++---
 biology/mmseqs2/distinfo                           |  6 +--
 biology/mmseqs2/files/patch-CMakeLists.txt         | 51 +++++++++++-----------
 .../files/patch-data_workflow_createtaxdb.sh       | 12 ++---
 .../mmseqs2/files/patch-data_workflow_databases.sh | 12 ++---
 .../mmseqs2/files/patch-src_commons_DBReader.cpp   | 11 -----
 biology/mmseqs2/pkg-descr                          | 12 ++---
 7 files changed, 50 insertions(+), 65 deletions(-)

diff --git a/biology/mmseqs2/Makefile b/biology/mmseqs2/Makefile
index 0595f54412d7..c2c410252fd0 100644
--- a/biology/mmseqs2/Makefile
+++ b/biology/mmseqs2/Makefile
@@ -1,11 +1,10 @@
 PORTNAME=	MMseqs2
-DISTVERSION=	13-45111
-PORTREVISION=	2
+DISTVERSION=	16-747c6
 CATEGORIES=	biology
 
 MAINTAINER=	jwb@FreeBSD.org
 COMMENT=	Ultra fast and sensitive sequence search and clustering suite
-WWW=		https://github.com/soedinglab/MMseqs2
+WWW=		https://github.com/soedinglab/MMseqs2/
 
 LICENSE=	GPLv3
 LICENSE_FILE=	${WRKSRC}/LICENSE.md
@@ -18,10 +17,12 @@ USES=		cmake perl5 shebangfix
 USE_GITHUB=	yes
 USE_PERL5=	build
 
-SHEBANG_GLOB=	*.sh
 GH_ACCOUNT=	soedinglab
+SHEBANG_GLOB=	*.sh
+
+CMAKE_ARGS+=	-DVERSION_OVERRIDE=${DISTVERSION}
+CMAKE_ON=	DISABLE_IPS4O USE_SYSTEM_ZSTD
 
-CMAKE_ARGS+=	-DUSE_SYSTEM_ZSTD:BOOL=ON -DDISABLE_IPS4O:BOOL=ON
 LDFLAGS+=	-lpthread
 
 PLIST_FILES=	bin/mmseqs ${DATADIR}/bash-completion.sh
diff --git a/biology/mmseqs2/distinfo b/biology/mmseqs2/distinfo
index e29d9fdcbf95..2cef8860912b 100644
--- a/biology/mmseqs2/distinfo
+++ b/biology/mmseqs2/distinfo
@@ -1,3 +1,3 @@
-TIMESTAMP = 1624667730
-SHA256 (soedinglab-MMseqs2-13-45111_GH0.tar.gz) = 6444bb682ebf5ced54b2eda7a301fa3e933c2a28b7661f96ef5bdab1d53695a2
-SIZE (soedinglab-MMseqs2-13-45111_GH0.tar.gz) = 10196433
+TIMESTAMP = 1733910529
+SHA256 (soedinglab-MMseqs2-16-747c6_GH0.tar.gz) = faeb6841feb8e028651c2391de1346c55c2091a96520b625525d27b99d07ef1d
+SIZE (soedinglab-MMseqs2-16-747c6_GH0.tar.gz) = 13359879
diff --git a/biology/mmseqs2/files/patch-CMakeLists.txt b/biology/mmseqs2/files/patch-CMakeLists.txt
index 908984972649..743bc16ff5c4 100644
--- a/biology/mmseqs2/files/patch-CMakeLists.txt
+++ b/biology/mmseqs2/files/patch-CMakeLists.txt
@@ -1,6 +1,6 @@
---- CMakeLists.txt.orig	2021-06-24 14:37:49 UTC
+--- CMakeLists.txt.orig	2024-11-26 05:22:36 UTC
 +++ CMakeLists.txt
-@@ -50,30 +50,19 @@ set(MMSEQS_CXX_FLAGS "-fsigned-char")
+@@ -57,30 +57,19 @@ if (HAVE_AVX2)
  # SIMD instruction sets support
  set(MMSEQS_ARCH "")
  if (HAVE_AVX2)
@@ -9,45 +9,44 @@
 -    else ()
 -        set(MMSEQS_ARCH "${MMSEQS_ARCH} -mavx2 -mcx16 -Wa,-q")
 -    endif ()
-     set(X64 1)
+     set(X64 1 CACHE INTERNAL "")
  elseif (HAVE_SSE4_1)
 -    set(MMSEQS_ARCH "${MMSEQS_ARCH} -msse4.1 -mcx16")
-     set(X64 1)
+     set(X64 1 CACHE INTERNAL "")
  elseif (HAVE_SSE2)
 -    set(MMSEQS_ARCH "${MMSEQS_ARCH} -msse2")
      set(DISABLE_IPS4O 1)
-     set(X64 1)
+     set(X64 1 CACHE INTERNAL "")
  elseif (HAVE_POWER9)
 -    set(MMSEQS_ARCH "${MMSEQS_ARCH} -mcpu=power9 -mvsx")
-     set(PPC64 1)
+     set(PPC64 1 CACHE INTERNAL "")
  elseif (HAVE_POWER8)
 -    set(MMSEQS_ARCH "${MMSEQS_ARCH} -mcpu=power8 -mvsx")
-     set(PPC64 1)
+     set(PPC64 1 CACHE INTERNAL "")
  elseif (HAVE_ARM8)
 -    set(MMSEQS_ARCH "${MMSEQS_ARCH} -march=armv8-a+simd")
-     set(ARM 1)
+     set(ARM 1 CACHE INTERNAL "")
  elseif (HAVE_S390X)
--    set(MMSEQS_ARCH "${MMSEQS_ARCH} -mzarch -march=z14")
-     set(ZARCH 1)
+-    set(MMSEQS_ARCH "${MMSEQS_ARCH} -march=z14 -mzarch -mzvector")
+     set(ZARCH 1 CACHE INTERNAL "")
  endif ()
  
-@@ -105,19 +94,6 @@ if (NATIVE_ARCH AND (MMSEQS_ARCH STREQUAL ""))
+@@ -110,18 +99,6 @@ if (NATIVE_ARCH AND (MMSEQS_ARCH STREQUAL ""))
+             endif ()
+             message(WARNING "At least SSE4.1 is needed for best performance")
          endif ()
-         if (PPC64)
-             set(MMSEQS_ARCH "-mcpu=native")
--        else ()
--            # clang has a problem with march=native on travis
--            if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.0.0")
--                set(MMSEQS_ARCH "${SSE_FLAGS}")
--            else()
--                set(MMSEQS_ARCH "-march=native")
--            endif()
--        endif ()
--    else ()
--        if (PPC64)
--            set(MMSEQS_ARCH "-mcpu=native")
--        else ()
+-        # clang has a problem with march=native on travis
+-        if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.0.0")
+-            set(MMSEQS_ARCH "${SSE_FLAGS}")
+-        else()
 -            set(MMSEQS_ARCH "-march=native")
-         endif ()
+-        endif()
+-    elseif (PPC64 OR ARM)
+-        set(MMSEQS_ARCH "-mcpu=native")
+-    elseif (ZARCH)
+-        set(MMSEQS_ARCH "-mcpu=native -mzvector")
+-    else ()
+-        set(MMSEQS_ARCH "-march=native")
      endif ()
  endif ()
+ set(MMSEQS_ARCH ${MMSEQS_ARCH} CACHE INTERNAL "")
diff --git a/biology/mmseqs2/files/patch-data_workflow_createtaxdb.sh b/biology/mmseqs2/files/patch-data_workflow_createtaxdb.sh
index 762b7a1addc2..efa7ec4374ba 100644
--- a/biology/mmseqs2/files/patch-data_workflow_createtaxdb.sh
+++ b/biology/mmseqs2/files/patch-data_workflow_createtaxdb.sh
@@ -1,6 +1,6 @@
---- data/workflow/createtaxdb.sh.orig	2021-06-25 01:33:07 UTC
+--- data/workflow/createtaxdb.sh.orig	2024-12-11 10:27:07 UTC
 +++ data/workflow/createtaxdb.sh
-@@ -27,6 +27,8 @@ STRATEGY=""
+@@ -27,6 +27,8 @@ if hasCommand wget;   then STRATEGY="$STRATEGY WGET"; 
  if hasCommand aria2c; then STRATEGY="$STRATEGY ARIA"; fi
  if hasCommand curl;   then STRATEGY="$STRATEGY CURL"; fi
  if hasCommand wget;   then STRATEGY="$STRATEGY WGET"; fi
@@ -9,10 +9,10 @@
  if [ "$STRATEGY" = "" ]; then
      fail "No download tool found in PATH. Please install aria2c, curl or wget."
  fi
-@@ -47,6 +49,9 @@ downloadFile() {
-             ;;
-         WGET)
-             wget -O "$OUTPUT" "$URL" && return 0
+@@ -56,6 +58,9 @@ downloadFile() {
+                 mv -f -- "${OUTPUT}.wget" "${OUTPUT}"
+                 return 0
+             fi
 +            ;;
 +        FETCH)
 +            fetch -o "$OUTPUT" "$URL" && return 0
diff --git a/biology/mmseqs2/files/patch-data_workflow_databases.sh b/biology/mmseqs2/files/patch-data_workflow_databases.sh
index 2b0a30427958..2a8168b4dd5e 100644
--- a/biology/mmseqs2/files/patch-data_workflow_databases.sh
+++ b/biology/mmseqs2/files/patch-data_workflow_databases.sh
@@ -1,6 +1,6 @@
---- data/workflow/databases.sh.orig	2021-06-25 01:34:08 UTC
+--- data/workflow/databases.sh.orig	2024-12-11 10:31:20 UTC
 +++ data/workflow/databases.sh
-@@ -27,6 +27,8 @@ STRATEGY=""
+@@ -27,6 +27,8 @@ if hasCommand wget;   then STRATEGY="$STRATEGY WGET"; 
  if hasCommand aria2c; then STRATEGY="$STRATEGY ARIA"; fi
  if hasCommand curl;   then STRATEGY="$STRATEGY CURL"; fi
  if hasCommand wget;   then STRATEGY="$STRATEGY WGET"; fi
@@ -9,10 +9,10 @@
  if [ "$STRATEGY" = "" ]; then
      fail "No download tool found in PATH. Please install aria2c, curl or wget."
  fi
-@@ -47,6 +49,9 @@ downloadFile() {
-             ;;
-         WGET)
-             wget -O "$OUTPUT" "$URL" && return 0
+@@ -56,6 +58,9 @@ downloadFile() {
+                 mv -f -- "${OUTPUT}.wget" "${OUTPUT}"
+                 return 0
+             fi
 +            ;;
 +        FETCH)
 +            fetch -o "$OUTPUT" "$URL" && return 0
diff --git a/biology/mmseqs2/files/patch-src_commons_DBReader.cpp b/biology/mmseqs2/files/patch-src_commons_DBReader.cpp
deleted file mode 100644
index 212b3e2ecf59..000000000000
--- a/biology/mmseqs2/files/patch-src_commons_DBReader.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
---- src/commons/DBReader.cpp.orig	2021-06-25 22:40:36 UTC
-+++ src/commons/DBReader.cpp
-@@ -1004,7 +1004,7 @@ void DBReader<T>::setSequentialAdvice() {
- #ifdef HAVE_POSIX_MADVISE
-     for(size_t i = 0; i < dataFileCnt; i++){
-         size_t dataSize = dataSizeOffset[i+1] - dataSizeOffset[i];
--        if (posix_madvise (dataFiles[i], dataSize, POSIX_MADV_SEQUENTIAL) != 0){
-+        if (dataSize > 0 && posix_madvise (dataFiles[i], dataSize, POSIX_MADV_SEQUENTIAL) != 0){
-             Debug(Debug::ERROR) << "posix_madvise returned an error " << dataFileName << "\n";
-         }
-     }
diff --git a/biology/mmseqs2/pkg-descr b/biology/mmseqs2/pkg-descr
index 9c359124e272..3665255fbf6e 100644
--- a/biology/mmseqs2/pkg-descr
+++ b/biology/mmseqs2/pkg-descr
@@ -1,8 +1,4 @@
-MMseqs2 (Many-against-Many sequence searching) is a software suite to search
-and cluster huge protein and nucleotide sequence sets. MMseqs2 is open source
-GPL-licensed software implemented in C++ for Linux, MacOS, and (as beta
-version, via cygwin) Windows. The software is designed to run on multiple cores
-and servers and exhibits very good scalability. MMseqs2 can run 10000 times
-faster than BLAST. At 100 times its speed it achieves almost the same
-sensitivity. It can perform profile searches with the same sensitivity as
-PSI-BLAST at over 400 times its speed.
+MMseqs2 (Many-against-Many searching) is a software suite to search
+and cluster huge sequence sets. MMseqs2 is designed to run on multiple
+cores and servers, making it highly scalable. MMseqs2 matches the
+sensitivity of BLAST, but runs orders of magnitude faster.