git: c8b4d6d391f8 - main - archivers/py-bitshuffle: Add py-bitshuffle 0.5.1

From: Po-Chuan Hsieh <sunpoet_at_FreeBSD.org>
Date: Sat, 23 Mar 2024 14:57:50 UTC
The branch main has been updated by sunpoet:

URL: https://cgit.FreeBSD.org/ports/commit/?id=c8b4d6d391f8967509242e2538efc7371def0026

commit c8b4d6d391f8967509242e2538efc7371def0026
Author:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
AuthorDate: 2024-03-23 14:14:53 +0000
Commit:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
CommitDate: 2024-03-23 14:30:36 +0000

    archivers/py-bitshuffle: Add py-bitshuffle 0.5.1
    
    Bitshuffle is an algorithm that rearranges typed, binary data for improving
    compression, as well as a python/C package that implements this algorithm within
    the Numpy framework.
    
    The library can be used along side HDF5 to compress and decompress datasets and
    is integrated through the dynamically loaded filters framework. Bitshuffle is
    HDF5 filter number 32008.
    
    Algorithmically, Bitshuffle is closely related to HDF5's Shuffle filter except
    it operates at the bit level instead of the byte level. Arranging a typed data
    array in to a matrix with the elements as the rows and the bits within the
    elements as the columns, Bitshuffle "transposes" the matrix, such that all the
    least-significant-bits are in a row, etc.
    
    This does not in itself compress data, only rearranges it for more efficient
    compression. To perform the actual compression you will need a compression
    library. Bitshuffle has been designed to be well matched to Marc Lehmann's LZF
    as well as LZ4 and ZSTD. Note that because Bitshuffle modifies the data at the
    bit level, sophisticated entropy reducing compression libraries such as GZIP and
    BZIP are unlikely to achieve significantly better compression than simpler and
    faster duplicate-string-elimination algorithms such as LZF, LZ4 and ZSTD.
    Bitshuffle thus includes routines (and HDF5 filter options) to apply LZ4 and
    ZSTD compression to each block after shuffling.
---
 archivers/Makefile                                 |  1 +
 archivers/py-bitshuffle/Makefile                   | 28 ++++++++++++++++++++++
 archivers/py-bitshuffle/distinfo                   |  3 +++
 .../py-bitshuffle/files/patch-lzf-lzf_filter.c     | 14 +++++++++++
 archivers/py-bitshuffle/files/patch-pyproject.toml | 11 +++++++++
 archivers/py-bitshuffle/files/patch-setup.py       | 11 +++++++++
 archivers/py-bitshuffle/pkg-descr                  | 23 ++++++++++++++++++
 7 files changed, 91 insertions(+)

diff --git a/archivers/Makefile b/archivers/Makefile
index 35ecf91c7bb8..28342893c70d 100644
--- a/archivers/Makefile
+++ b/archivers/Makefile
@@ -178,6 +178,7 @@
     SUBDIR += ppunpack
     SUBDIR += pxz
     SUBDIR += py-acefile
+    SUBDIR += py-bitshuffle
     SUBDIR += py-blosc2
     SUBDIR += py-borgbackup
     SUBDIR += py-borgbackup11
diff --git a/archivers/py-bitshuffle/Makefile b/archivers/py-bitshuffle/Makefile
new file mode 100644
index 000000000000..5d89c913325e
--- /dev/null
+++ b/archivers/py-bitshuffle/Makefile
@@ -0,0 +1,28 @@
+PORTNAME=	bitshuffle
+PORTVERSION=	0.5.1
+CATEGORIES=	archivers python
+MASTER_SITES=	PYPI
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER=	sunpoet@FreeBSD.org
+COMMENT=	Bitshuffle filter for improving typed data compression
+WWW=		https://github.com/kiyo-masui/bitshuffle
+
+LICENSE=	MIT
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+BUILD_DEPENDS=	${PYTHON_PKGNAMEPREFIX}h5py>=2.4.0:science/py-h5py@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}numpy>=0,1:math/py-numpy@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}setuptools>=0.7:devel/py-setuptools@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}wheel>=0:devel/py-wheel@${PY_FLAVOR}
+RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}h5py>=2.4.0:science/py-h5py@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}numpy>=1.6.1,1:math/py-numpy@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}setuptools>=0.7:devel/py-setuptools@${PY_FLAVOR}
+
+USES=		pkgconfig python
+USE_PYTHON=	autoplist concurrent cython pep517
+
+post-install:
+	${FIND} ${STAGEDIR}${PYTHON_SITELIBDIR} -name '*.so' -exec ${STRIP_CMD} {} +
+
+.include <bsd.port.mk>
diff --git a/archivers/py-bitshuffle/distinfo b/archivers/py-bitshuffle/distinfo
new file mode 100644
index 000000000000..c8a2e59bc658
--- /dev/null
+++ b/archivers/py-bitshuffle/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1710712300
+SHA256 (bitshuffle-0.5.1.tar.gz) = 988f224739aa6858475a4c59172968c7b51cc657d2249580c8f96848708fbae3
+SIZE (bitshuffle-0.5.1.tar.gz) = 229441
diff --git a/archivers/py-bitshuffle/files/patch-lzf-lzf_filter.c b/archivers/py-bitshuffle/files/patch-lzf-lzf_filter.c
new file mode 100644
index 000000000000..2b15f0ebac8c
--- /dev/null
+++ b/archivers/py-bitshuffle/files/patch-lzf-lzf_filter.c
@@ -0,0 +1,14 @@
+--- lzf/lzf_filter.c.orig	2022-11-26 00:17:16 UTC
++++ lzf/lzf_filter.c
+@@ -51,11 +51,7 @@
+         macro H5_USE_16_API is set
+ */
+ 
+-#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 8 && (H5_VERS_RELEASE < 3 || !H5_USE_16_API)
+ #define H5PY_H5Z_NEWCLS 1
+-#else
+-#define H5PY_H5Z_NEWCLS 0   
+-#endif
+ 
+ size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+ 		    const unsigned cd_values[], size_t nbytes,
diff --git a/archivers/py-bitshuffle/files/patch-pyproject.toml b/archivers/py-bitshuffle/files/patch-pyproject.toml
new file mode 100644
index 000000000000..620e4c320914
--- /dev/null
+++ b/archivers/py-bitshuffle/files/patch-pyproject.toml
@@ -0,0 +1,11 @@
+--- pyproject.toml.orig	2022-11-26 00:17:16 UTC
++++ pyproject.toml
+@@ -3,7 +3,7 @@ requires = [
+ requires = [
+     "setuptools>=0.7",
+     "Cython>=0.19",
+-    "oldest-supported-numpy",
++    "numpy",
+     "h5py>=2.4.0",
+ ]
+ 
diff --git a/archivers/py-bitshuffle/files/patch-setup.py b/archivers/py-bitshuffle/files/patch-setup.py
new file mode 100644
index 000000000000..f96dcdf556d3
--- /dev/null
+++ b/archivers/py-bitshuffle/files/patch-setup.py
@@ -0,0 +1,11 @@
+--- setup.py.orig	2022-11-26 00:17:16 UTC
++++ setup.py
+@@ -371,7 +371,7 @@ class build_ext(build_ext_):
+         else:
+             openmpflag = "-fopenmp"
+             archi = platform.machine()
+-            if archi in ("i386", "x86_64"):
++            if archi in ("amd64", "i386", "x86_64"):
+                 compileflags = COMPILE_FLAGS + ["-march=%s" % self.march]
+             else:
+                 compileflags = COMPILE_FLAGS + ["-mcpu=%s" % self.march]
diff --git a/archivers/py-bitshuffle/pkg-descr b/archivers/py-bitshuffle/pkg-descr
new file mode 100644
index 000000000000..e54c4df001da
--- /dev/null
+++ b/archivers/py-bitshuffle/pkg-descr
@@ -0,0 +1,23 @@
+Bitshuffle is an algorithm that rearranges typed, binary data for improving
+compression, as well as a python/C package that implements this algorithm within
+the Numpy framework.
+
+The library can be used along side HDF5 to compress and decompress datasets and
+is integrated through the dynamically loaded filters framework. Bitshuffle is
+HDF5 filter number 32008.
+
+Algorithmically, Bitshuffle is closely related to HDF5's Shuffle filter except
+it operates at the bit level instead of the byte level. Arranging a typed data
+array in to a matrix with the elements as the rows and the bits within the
+elements as the columns, Bitshuffle "transposes" the matrix, such that all the
+least-significant-bits are in a row, etc.
+
+This does not in itself compress data, only rearranges it for more efficient
+compression. To perform the actual compression you will need a compression
+library. Bitshuffle has been designed to be well matched to Marc Lehmann's LZF
+as well as LZ4 and ZSTD. Note that because Bitshuffle modifies the data at the
+bit level, sophisticated entropy reducing compression libraries such as GZIP and
+BZIP are unlikely to achieve significantly better compression than simpler and
+faster duplicate-string-elimination algorithms such as LZF, LZ4 and ZSTD.
+Bitshuffle thus includes routines (and HDF5 filter options) to apply LZ4 and
+ZSTD compression to each block after shuffling.