git: 219e72a7c3cb - main - misc/py-jiwer: New port: Evaluate speech-to-text system with similarity measures

From: Yuri Victorovich <yuri_at_FreeBSD.org>
Date: Tue, 06 Aug 2024 00:13:50 UTC
The branch main has been updated by yuri:

URL: https://cgit.FreeBSD.org/ports/commit/?id=219e72a7c3cbe294bfbe96dabf8ae13641866b65

commit 219e72a7c3cbe294bfbe96dabf8ae13641866b65
Author:     Yuri Victorovich <yuri@FreeBSD.org>
AuthorDate: 2024-08-06 00:12:46 +0000
Commit:     Yuri Victorovich <yuri@FreeBSD.org>
CommitDate: 2024-08-06 00:13:44 +0000

    misc/py-jiwer: New port: Evaluate speech-to-text system with similarity measures
---
 misc/Makefile           |  1 +
 misc/py-jiwer/Makefile  | 29 +++++++++++++++++++++++++++++
 misc/py-jiwer/distinfo  |  3 +++
 misc/py-jiwer/pkg-descr | 12 ++++++++++++
 4 files changed, 45 insertions(+)

diff --git a/misc/Makefile b/misc/Makefile
index f99169f04911..0b72d6dd0af7 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -439,6 +439,7 @@
     SUBDIR += py-icoextract
     SUBDIR += py-instructor
     SUBDIR += py-ipyfastscape
+    SUBDIR += py-jiwer
     SUBDIR += py-kartograph
     SUBDIR += py-laspy
     SUBDIR += py-lazrs
diff --git a/misc/py-jiwer/Makefile b/misc/py-jiwer/Makefile
new file mode 100644
index 000000000000..afe34a2dd3f4
--- /dev/null
+++ b/misc/py-jiwer/Makefile
@@ -0,0 +1,29 @@
+PORTNAME=	jiwer
+#DISTVERSIONPREFIX=	v
+DISTVERSION=	3.0.4 # see https://github.com/jitsi/jiwer/issues/91
+CATEGORIES=	misc python # machine-learning
+MASTER_SITES=	PYPI # no tests
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	Evaluate speech-to-text system with similarity measures
+WWW=		https://github.com/jitsi/jiwer
+
+LICENSE=	APACHE20
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+BUILD_DEPENDS=	${PYTHON_PKGNAMEPREFIX}poetry-core>0:devel/py-poetry-core@${PY_FLAVOR}
+RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}click>=8.1.3:devel/py-click@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}rapidfuzz>=3:devel/py-rapidfuzz@${PY_FLAVOR}
+
+USES=		python
+USE_PYTHON=	pep517 concurrent autoplist #pytest
+
+#USE_GITHUB=	yes
+#GH_ACCOUNT=	jitsi
+
+TEST_ENV=	${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR}
+
+NO_ARCH=	yes
+
+.include <bsd.port.mk>
diff --git a/misc/py-jiwer/distinfo b/misc/py-jiwer/distinfo
new file mode 100644
index 000000000000..70736cf03395
--- /dev/null
+++ b/misc/py-jiwer/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1722887112
+SHA256 (jiwer-3.0.4.tar.gz) = 2438acdc7ca22128fcab4be60db595809d2b5e73785b736de36dc3281a2a6ae8
+SIZE (jiwer-3.0.4.tar.gz) = 17515
diff --git a/misc/py-jiwer/pkg-descr b/misc/py-jiwer/pkg-descr
new file mode 100644
index 000000000000..12c8bc7601d5
--- /dev/null
+++ b/misc/py-jiwer/pkg-descr
@@ -0,0 +1,12 @@
+JiWER is a simple and fast python package to evaluate an automatic speech
+recognition system. It supports the following measures:
+* word error rate (WER)
+* match error rate (MER)
+* word information lost (WIL)
+* word information preserved (WIP)
+* character error rate (CER)
+
+These measures are computed with the use of the minimum-edit distance between
+one or more reference and hypothesis sentences. The minimum-edit distance is
+calculated using RapidFuzz, which uses C++ under the hood, and is therefore
+faster than a pure python implementation.