git: 21a02f42b641 - main - converters/simdutf: Unicode validation and transcoding with SIMD
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 23 Oct 2023 01:53:32 UTC
The branch main has been updated by fuz: URL: https://cgit.FreeBSD.org/ports/commit/?id=21a02f42b64150353438318eae45aefb66d51b4d commit 21a02f42b64150353438318eae45aefb66d51b4d Author: Robert Clausecker <fuz@FreeBSD.org> AuthorDate: 2023-10-20 17:07:54 +0000 Commit: Robert Clausecker <fuz@FreeBSD.org> CommitDate: 2023-10-23 01:51:27 +0000 converters/simdutf: Unicode validation and transcoding with SIMD This library provide fast Unicode functions such as - ASCII, UTF-8, UTF-16LE/BE and UTF-32 validation, with and without error identification, - transcoding between each of Latin1, UTF-8, UTF-16LE/BE, and UTF-32, with and without validation, with and without error identification - From an UTF-8 string, compute the size of the Latin1/UTF-16/UTF-32 equivalent string, - From an UTF-16LE/BE string, compute the size of the Latin1/UTF-8/UTF-32 equivalent string, - From an UTF-32 string, compute the size of the UTF-8 or UTF-16LE equivalent string, - UTF-8 and UTF-16LE/BE character counting. - UTF-16 endianness change (UTF16-LE/BE to UTF-16-BE/LE) The functions are accelerated using SIMD instructions (e.g., ARM NEON, SSE, AVX, AVX-512, etc.). When your strings contain hundreds of characters, we can often transcode them at speeds exceeding a billion characters per second. You should expect high speeds not only with English strings (ASCII) but also Chinese, Japanese, Arabic, and so forth. We handle the full character range (including, for example, emojis). The library compiles down to a small library of a few hundred kilobytes. Our functions are exception-free and non allocating. We have extensive tests and extensive benchmarks. WWW: https://simdutf.github.io/simdutf/ --- converters/Makefile | 1 + converters/simdutf/Makefile | 38 ++++++++++++++++++++++++++++++++++++++ converters/simdutf/distinfo | 3 +++ converters/simdutf/pkg-descr | 26 ++++++++++++++++++++++++++ converters/simdutf/pkg-plist | 18 ++++++++++++++++++ 5 files changed, 86 insertions(+) diff --git a/converters/Makefile b/converters/Makefile index 3782f6bcdac8..7b40eac0f7ae 100644 --- a/converters/Makefile +++ b/converters/Makefile @@ -168,6 +168,7 @@ SUBDIR += rubygem-url_safe_base64 SUBDIR += rubygem-xdr SUBDIR += showkey + SUBDIR += simdutf SUBDIR += ta2as SUBDIR += tnef SUBDIR += trans diff --git a/converters/simdutf/Makefile b/converters/simdutf/Makefile new file mode 100644 index 000000000000..947a871a07f6 --- /dev/null +++ b/converters/simdutf/Makefile @@ -0,0 +1,38 @@ +PORTNAME= simdutf +DISTVERSIONPREFIX= v +DISTVERSION= 4.0.0 +CATEGORIES= converters textproc + +MAINTAINER= fuz@FreeBSD.org +COMMENT= Unicode validation and transcoding with SIMD +WWW= https://simdutf.github.io/simdutf/ + +LICENSE= APACHE20 MIT +LICENSE_COMB= dual +LICENSE_FILE_APACHE20= ${WRKSRC}/LICENSE-APACHE +LICENSE_FILE_MIT= ${WRKSRC}/LICENSE-MIT + +USES= cmake:testing +USE_GITHUB= yes +USE_LDCONFIG= yes + +CMAKE_ON= BUILD_SHARED_LIBS + +OPTIONS_DEFINE= BENCHMARKS ICONV TEST TOOLS +OPTIONS_DEFAULT= ICONV TOOLS +OPTIONS_SUB= yes +BENCHMARKS_DESC= Build benchmarks +TOOLS_DESC= Build tools + +BENCHMARKS_IMPLIES= TEST +BENCHMARKS_LIB_DEPENDS= libicuuc.so:devel/icu +BENCHMARKS_CMAKE_BOOL= SIMDUTF_BENCHMARKS +ICONV_USES= iconv +ICONV_CMAKE_BOOL= SIMDUTF_ICONV +TEST_CMAKE_BOOL= SIMDUTF_TESTS +TOOLS_CMAKE_BOOL= SIMDUTF_TOOLS + +pre-test-TEST-off: + @echo Enable option TEST to run the full test suite + +.include <bsd.port.mk> diff --git a/converters/simdutf/distinfo b/converters/simdutf/distinfo new file mode 100644 index 000000000000..ef9cc66d6552 --- /dev/null +++ b/converters/simdutf/distinfo @@ -0,0 +1,3 @@ +TIMESTAMP = 1697849191 +SHA256 (simdutf-simdutf-v4.0.0_GH0.tar.gz) = 1a84ea8a24396ea410d1c88d3126f95956a8799d8eaea0e03dc721e7c65ff9b3 +SIZE (simdutf-simdutf-v4.0.0_GH0.tar.gz) = 1864807 diff --git a/converters/simdutf/pkg-descr b/converters/simdutf/pkg-descr new file mode 100644 index 000000000000..d8319ef380ee --- /dev/null +++ b/converters/simdutf/pkg-descr @@ -0,0 +1,26 @@ +This library provide fast Unicode functions such as + + - ASCII, UTF-8, UTF-16LE/BE and UTF-32 validation, with and without + error identification, + - transcoding between each of Latin1, UTF-8, UTF-16LE/BE, and UTF-32, + with and without validation, with and without error identification + - From an UTF-8 string, compute the size of the Latin1/UTF-16/UTF-32 + equivalent string, + - From an UTF-16LE/BE string, compute the size of the + Latin1/UTF-8/UTF-32 equivalent string, + - From an UTF-32 string, compute the size of the UTF-8 or UTF-16LE + equivalent string, + - UTF-8 and UTF-16LE/BE character counting. + - UTF-16 endianness change (UTF16-LE/BE to UTF-16-BE/LE) + +The functions are accelerated using SIMD instructions (e.g., ARM NEON, +SSE, AVX, AVX-512, etc.). When your strings contain hundreds of +characters, we can often transcode them at speeds exceeding a billion +characters per second. You should expect high speeds not only with +English strings (ASCII) but also Chinese, Japanese, Arabic, and so +forth. We handle the full character range (including, for example, +emojis). + +The library compiles down to a small library of a few hundred kilobytes. +Our functions are exception-free and non allocating. We have extensive +tests and extensive benchmarks. diff --git a/converters/simdutf/pkg-plist b/converters/simdutf/pkg-plist new file mode 100644 index 000000000000..4c801e507c91 --- /dev/null +++ b/converters/simdutf/pkg-plist @@ -0,0 +1,18 @@ +%%TOOLS%%bin/sutf +include/simdutf.h +include/simdutf/avx512.h +include/simdutf/common_defs.h +include/simdutf/compiler_check.h +include/simdutf/encoding_types.h +include/simdutf/error.h +include/simdutf/implementation.h +include/simdutf/internal/isadetection.h +include/simdutf/portability.h +include/simdutf/simdutf_version.h +lib/cmake/simdutf/simdutf-config-version.cmake +lib/cmake/simdutf/simdutf-config.cmake +lib/cmake/simdutf/simdutfTargets-%%CMAKE_BUILD_TYPE%%.cmake +lib/cmake/simdutf/simdutfTargets.cmake +lib/libsimdutf.so.5.0.0 +lib/libsimdutf.so.5 +lib/libsimdutf.so