git: 22fcd914debb - main - textproc/uni-algo: Add uni-algo 1.2.0

From: Po-Chuan Hsieh <sunpoet_at_FreeBSD.org>
Date: Mon, 03 Feb 2025 15:55:54 UTC
The branch main has been updated by sunpoet:

URL: https://cgit.FreeBSD.org/ports/commit/?id=22fcd914debb594d1daa5f79355ed5f5d8c0be45

commit 22fcd914debb594d1daa5f79355ed5f5d8c0be45
Author:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
AuthorDate: 2025-02-03 15:12:18 +0000
Commit:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
CommitDate: 2025-02-03 15:50:43 +0000

    textproc/uni-algo: Add uni-algo 1.2.0
    
    There are plenty of Unicode libraries for C/C++ out there that implement random
    Unicode algorithms, but many of them don't handle ill-formed UTF sequences at
    all.  In the best-case scenario, you'll get an exception/error; in the
    worst-case, undefined behavior. The biggest problem is that in 99% cases
    everything will be fine. This is inappropriate for security reasons.  This
    library handles such problems (there are not only ill-formed sequences actually)
    properly and always according to The Unicode Standard.
    
    In C/C++, unlike some other programming languages, there is no safe type for
    UTF-8/UTF-16 that guarantees that the data will be well-formed; this makes the
    problem even worse. The library doesn't introduce such a type either because the
    library doesn't work with types/strings/files/streams, it works with the data
    inside them and makes it safe when it's needed.
    
    Check this article if you want more information about ill-formed sequences:
    https://hsivonen.fi/broken-utf-8
    
    It is a bit outdated because ICU (International Components for Unicode) now uses
    W3C conformant implementation too, but the information in the article is very
    useful.
    
    This library does use W3C conformant implementation too.
---
 textproc/Makefile           |  1 +
 textproc/uni-algo/Makefile  | 22 +++++++++++++++++
 textproc/uni-algo/distinfo  |  3 +++
 textproc/uni-algo/pkg-descr | 22 +++++++++++++++++
 textproc/uni-algo/pkg-plist | 60 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 108 insertions(+)

diff --git a/textproc/Makefile b/textproc/Makefile
index ab17f16d4238..156fa1d0de93 100644
--- a/textproc/Makefile
+++ b/textproc/Makefile
@@ -2140,6 +2140,7 @@
     SUBDIR += uncle
     SUBDIR += uncrustify
     SUBDIR += uni
+    SUBDIR += uni-algo
     SUBDIR += uni2ascii
     SUBDIR += unoconv
     SUBDIR += unroff
diff --git a/textproc/uni-algo/Makefile b/textproc/uni-algo/Makefile
new file mode 100644
index 000000000000..4e480e95b722
--- /dev/null
+++ b/textproc/uni-algo/Makefile
@@ -0,0 +1,22 @@
+PORTNAME=	uni-algo
+PORTVERSION=	1.2.0
+DISTVERSIONPREFIX=	v
+CATEGORIES=	textproc
+
+MAINTAINER=	sunpoet@FreeBSD.org
+COMMENT=	Unicode Algorithms Implementation for C/C++
+WWW=		https://github.com/uni-algo/uni-algo
+
+LICENSE=	MIT PD
+LICENSE_COMB=	dual
+LICENSE_FILE=	${WRKSRC}/LICENSE.md
+
+USES=		cmake compiler:c++17-lang
+
+CMAKE_OFF=	UNI_ALGO_HEADER_ONLY
+CMAKE_ON=	BUILD_SHARED_LIBS \
+		UNI_ALGO_INSTALL
+
+USE_GITHUB=	yes
+
+.include <bsd.port.mk>
diff --git a/textproc/uni-algo/distinfo b/textproc/uni-algo/distinfo
new file mode 100644
index 000000000000..562238d7f518
--- /dev/null
+++ b/textproc/uni-algo/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1738088814
+SHA256 (uni-algo-uni-algo-v1.2.0_GH0.tar.gz) = f2a1539cd8635bc6088d05144a73ecfe7b4d74ee0361fabed6f87f9f19e74ca9
+SIZE (uni-algo-uni-algo-v1.2.0_GH0.tar.gz) = 437340
diff --git a/textproc/uni-algo/pkg-descr b/textproc/uni-algo/pkg-descr
new file mode 100644
index 000000000000..9557eac1e866
--- /dev/null
+++ b/textproc/uni-algo/pkg-descr
@@ -0,0 +1,22 @@
+There are plenty of Unicode libraries for C/C++ out there that implement random
+Unicode algorithms, but many of them don't handle ill-formed UTF sequences at
+all.  In the best-case scenario, you'll get an exception/error; in the
+worst-case, undefined behavior. The biggest problem is that in 99% cases
+everything will be fine. This is inappropriate for security reasons.  This
+library handles such problems (there are not only ill-formed sequences actually)
+properly and always according to The Unicode Standard.
+
+In C/C++, unlike some other programming languages, there is no safe type for
+UTF-8/UTF-16 that guarantees that the data will be well-formed; this makes the
+problem even worse. The library doesn't introduce such a type either because the
+library doesn't work with types/strings/files/streams, it works with the data
+inside them and makes it safe when it's needed.
+
+Check this article if you want more information about ill-formed sequences:
+https://hsivonen.fi/broken-utf-8
+
+It is a bit outdated because ICU (International Components for Unicode) now uses
+W3C conformant implementation too, but the information in the article is very
+useful.
+
+This library does use W3C conformant implementation too.
diff --git a/textproc/uni-algo/pkg-plist b/textproc/uni-algo/pkg-plist
new file mode 100644
index 000000000000..8be5b6eed021
--- /dev/null
+++ b/textproc/uni-algo/pkg-plist
@@ -0,0 +1,60 @@
+include/uni_algo/all.h
+include/uni_algo/case.h
+include/uni_algo/config.h
+include/uni_algo/conv.h
+include/uni_algo/ext/ascii.h
+include/uni_algo/ext/translit/japanese_kana_to_romaji_hepburn.h
+include/uni_algo/ext/translit/macedonian_to_latin_docs.h
+include/uni_algo/impl/data/data_case.h
+include/uni_algo/impl/data/data_norm.h
+include/uni_algo/impl/data/data_prop.h
+include/uni_algo/impl/data/data_script.h
+include/uni_algo/impl/data/data_segment_grapheme.h
+include/uni_algo/impl/data/data_segment_word.h
+include/uni_algo/impl/data/extern_case.h
+include/uni_algo/impl/data/extern_norm.h
+include/uni_algo/impl/data/extern_prop.h
+include/uni_algo/impl/data/extern_script.h
+include/uni_algo/impl/data/extern_segment_grapheme.h
+include/uni_algo/impl/data/extern_segment_word.h
+include/uni_algo/impl/impl_case.h
+include/uni_algo/impl/impl_case_locale.h
+include/uni_algo/impl/impl_conv.h
+include/uni_algo/impl/impl_cpp_lib_version.h
+include/uni_algo/impl/impl_data.h
+include/uni_algo/impl/impl_iter.h
+include/uni_algo/impl/impl_locale.h
+include/uni_algo/impl/impl_norm.h
+include/uni_algo/impl/impl_prop.h
+include/uni_algo/impl/impl_script.h
+include/uni_algo/impl/impl_segment_grapheme.h
+include/uni_algo/impl/impl_segment_word.h
+include/uni_algo/impl/impl_types.h
+include/uni_algo/impl/impl_unicode_version.h
+include/uni_algo/impl/internal_defines.h
+include/uni_algo/impl/internal_stages.h
+include/uni_algo/impl/internal_undefs.h
+include/uni_algo/internal/data_inl.h
+include/uni_algo/internal/error.h
+include/uni_algo/internal/found.h
+include/uni_algo/internal/locale_inl.h
+include/uni_algo/internal/ranges_core.h
+include/uni_algo/internal/ranges_translit.h
+include/uni_algo/internal/safe_layer.h
+include/uni_algo/locale.h
+include/uni_algo/norm.h
+include/uni_algo/prop.h
+include/uni_algo/ranges.h
+include/uni_algo/ranges_conv.h
+include/uni_algo/ranges_grapheme.h
+include/uni_algo/ranges_norm.h
+include/uni_algo/ranges_word.h
+include/uni_algo/script.h
+include/uni_algo/version.h
+lib/libuni-algo.so
+lib/libuni-algo.so.1.2
+lib/libuni-algo.so.1.2.0
+%%DATADIR%%/cmake/uni-algo-config-version.cmake
+%%DATADIR%%/cmake/uni-algo-config.cmake
+%%DATADIR%%/cmake/uni-algo-targets-%%CMAKE_BUILD_TYPE%%.cmake
+%%DATADIR%%/cmake/uni-algo-targets.cmake