git: 0532aa392a24 - main - www/py-htmldate: Add py-htmldate 1.7.0
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 21 Feb 2024 15:17:31 UTC
The branch main has been updated by sunpoet: URL: https://cgit.FreeBSD.org/ports/commit/?id=0532aa392a247337f8afcc9c58025347d9fff64c commit 0532aa392a247337f8afcc9c58025347d9fff64c Author: Po-Chuan Hsieh <sunpoet@FreeBSD.org> AuthorDate: 2024-02-21 14:13:47 +0000 Commit: Po-Chuan Hsieh <sunpoet@FreeBSD.org> CommitDate: 2024-02-21 15:06:10 +0000 www/py-htmldate: Add py-htmldate 1.7.0 htmldate finds original and updated publication dates of any web page. From the command-line or within Python, all the steps needed from web page download to HTML parsing, scraping, and text analysis are included. --- www/Makefile | 1 + www/py-htmldate/Makefile | 26 ++++++++++++++++++++++++++ www/py-htmldate/distinfo | 3 +++ www/py-htmldate/files/patch-setup.py | 11 +++++++++++ www/py-htmldate/pkg-descr | 3 +++ 5 files changed, 44 insertions(+) diff --git a/www/Makefile b/www/Makefile index 44c67c184f35..8d99027b44c3 100644 --- a/www/Makefile +++ b/www/Makefile @@ -1721,6 +1721,7 @@ SUBDIR += py-html3 SUBDIR += py-html5-parser SUBDIR += py-html5lib + SUBDIR += py-htmldate SUBDIR += py-httmock SUBDIR += py-http-parser SUBDIR += py-httpbin diff --git a/www/py-htmldate/Makefile b/www/py-htmldate/Makefile new file mode 100644 index 000000000000..2e44731e6ea0 --- /dev/null +++ b/www/py-htmldate/Makefile @@ -0,0 +1,26 @@ +PORTNAME= htmldate +PORTVERSION= 1.7.0 +CATEGORIES= www python +MASTER_SITES= PYPI +PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} + +MAINTAINER= sunpoet@FreeBSD.org +COMMENT= Fast and robust extraction of publication dates from URLs and web pages +WWW= https://htmldate.readthedocs.io/en/latest/ \ + https://github.com/adbar/htmldate + +LICENSE= GPLv3+ +LICENSE_FILE= ${WRKSRC}/LICENSE + +RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}charset-normalizer>=3.3.2:textproc/py-charset-normalizer@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}dateparser>=1.1.2:devel/py-dateparser@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}lxml>=4.9.3<6:devel/py-lxml@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}dateutil>=2.8.2:devel/py-dateutil@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}urllib3>=1.26,1<3,1:net/py-urllib3@${PY_FLAVOR} + +USES= python +USE_PYTHON= autoplist concurrent distutils + +NO_ARCH= yes + +.include <bsd.port.mk> diff --git a/www/py-htmldate/distinfo b/www/py-htmldate/distinfo new file mode 100644 index 000000000000..010b81ad6da7 --- /dev/null +++ b/www/py-htmldate/distinfo @@ -0,0 +1,3 @@ +TIMESTAMP = 1708448862 +SHA256 (htmldate-1.7.0.tar.gz) = 02a800dd224cbf74bf483b042f64e14f57ba0e40c6b4404b284e98bc6c30b68d +SIZE (htmldate-1.7.0.tar.gz) = 53992 diff --git a/www/py-htmldate/files/patch-setup.py b/www/py-htmldate/files/patch-setup.py new file mode 100644 index 000000000000..1a2e3831b98c --- /dev/null +++ b/www/py-htmldate/files/patch-setup.py @@ -0,0 +1,11 @@ +--- setup.py.orig 2024-01-17 16:57:16 UTC ++++ setup.py +@@ -120,7 +120,7 @@ setup( + "dateparser >= 1.1.2", # 1.1.3+ slower + # see tests on Github Actions + "lxml == 4.9.2; platform_system == 'Darwin' and python_version <= '3.8'", +- "lxml >= 4.9.4, < 6; platform_system != 'Darwin' or python_version > '3.8'", ++ "lxml >= 4.9.3, < 6; platform_system != 'Darwin' or python_version > '3.8'", + "python-dateutil >= 2.8.2", + "urllib3 >= 1.26, < 2; python_version < '3.7'", + "urllib3 >= 1.26, < 3; python_version >= '3.7'", diff --git a/www/py-htmldate/pkg-descr b/www/py-htmldate/pkg-descr new file mode 100644 index 000000000000..0d27dc7f96f5 --- /dev/null +++ b/www/py-htmldate/pkg-descr @@ -0,0 +1,3 @@ +htmldate finds original and updated publication dates of any web page. From the +command-line or within Python, all the steps needed from web page download to +HTML parsing, scraping, and text analysis are included.