git: 251245030286 - main - biology/mashmap: New port: Fast approximate aligner for long DNA sequences

From: Yuri Victorovich <yuri_at_FreeBSD.org>
Date: Tue, 11 Jul 2023 01:44:37 UTC
The branch main has been updated by yuri:

URL: https://cgit.FreeBSD.org/ports/commit/?id=2512450302863b1bcfc8f3b3dc216b1ec64091c1

commit 2512450302863b1bcfc8f3b3dc216b1ec64091c1
Author:     Yuri Victorovich <yuri@FreeBSD.org>
AuthorDate: 2023-07-11 01:44:10 +0000
Commit:     Yuri Victorovich <yuri@FreeBSD.org>
CommitDate: 2023-07-11 01:44:34 +0000

    biology/mashmap: New port: Fast approximate aligner for long DNA sequences
---
 biology/Makefile          |  1 +
 biology/mashmap/Makefile  | 24 ++++++++++++++++++++++++
 biology/mashmap/distinfo  |  3 +++
 biology/mashmap/pkg-descr | 11 +++++++++++
 4 files changed, 39 insertions(+)

diff --git a/biology/Makefile b/biology/Makefile
index fe34cafefca7..71d625b98e9f 100644
--- a/biology/Makefile
+++ b/biology/Makefile
@@ -97,6 +97,7 @@
     SUBDIR += linux-foldingathome
     SUBDIR += mafft
     SUBDIR += mapm3
+    SUBDIR += mashmap
     SUBDIR += megahit
     SUBDIR += metaeuk
     SUBDIR += migrate
diff --git a/biology/mashmap/Makefile b/biology/mashmap/Makefile
new file mode 100644
index 000000000000..d9dc5f5fafa4
--- /dev/null
+++ b/biology/mashmap/Makefile
@@ -0,0 +1,24 @@
+PORTNAME=	mashmap
+DISTVERSIONPREFIX=	v
+DISTVERSION=	3.0.6
+CATEGORIES=	biology
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	Fast approximate aligner for long DNA sequences
+WWW=		https://github.com/marbl/MashMap
+
+LICENSE=	PD
+LICENSE_FILE=	${WRKSRC}/LICENSE.txt
+
+LIB_DEPENDS=	libgsl.so:math/gsl
+
+USES=		cmake compiler:c++17-lang localbase:ldflags
+
+USE_GITHUB=	yes
+GH_ACCOUNT=	marbl
+GH_PROJECT=	MashMap
+
+PLIST_FILES=	bin/mashmap \
+		bin/mashmap-align
+
+.include <bsd.port.mk>
diff --git a/biology/mashmap/distinfo b/biology/mashmap/distinfo
new file mode 100644
index 000000000000..7cbf1e62148e
--- /dev/null
+++ b/biology/mashmap/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1689039441
+SHA256 (marbl-MashMap-v3.0.6_GH0.tar.gz) = 53d1b4efad6650d8efbc28325637d1bdacd108eaad70fcc28e927f40ac5c2112
+SIZE (marbl-MashMap-v3.0.6_GH0.tar.gz) = 234093
diff --git a/biology/mashmap/pkg-descr b/biology/mashmap/pkg-descr
new file mode 100644
index 000000000000..4460733166d2
--- /dev/null
+++ b/biology/mashmap/pkg-descr
@@ -0,0 +1,11 @@
+MashMap implements a fast and approximate algorithm for computing local
+alignment boundaries between long DNA sequences. It can be useful for mapping
+genome assembly or long reads (PacBio/ONT) to reference genome(s). Given a
+minimum alignment length and an identity threshold for the desired local
+alignments, Mashmap computes alignment boundaries and identity estimates using
+k-mers. It does not compute the alignments explicitly, but rather estimates an
+unbiased k-mer based Jaccard similarity using a combination of minmers (a novel
+winnowing scheme) and MinHash. This is then converted to an estimate of sequence
+identity using the Mash distance. An appropriate k-mer sampling rate is
+automatically determined using the given minimum local alignment length and
+identity thresholds.