git: 27a97f780c56 - 2024Q2 - deskutils/py-paperless-ngx: Fixes and improvements
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 24 Apr 2024 23:11:02 UTC
The branch 2024Q2 has been updated by grembo: URL: https://cgit.FreeBSD.org/ports/commit/?id=27a97f780c56cf51b482aa828ad0e20e24f23fcc commit 27a97f780c56cf51b482aa828ad0e20e24f23fcc Author: Michael Gmelin <grembo@FreeBSD.org> AuthorDate: 2024-04-24 12:39:17 +0000 Commit: Michael Gmelin <grembo@FreeBSD.org> CommitDate: 2024-04-24 23:09:18 +0000 deskutils/py-paperless-ngx: Fixes and improvements - Fix NLTK configuration in paperless.conf.sample - Mention snowball_data in NLTK instructions - Fix sad defect that prevented rc scripts from picking up configuration data from rc.conf - Improve startup script documentation - Document required ACL when using a remote redis instance - Document how to enable JBIG2 and its potential pitfalls PR: 278424, 278425, 278426 Reported by: anonymous bugzilla user, netchild MFH: 2024Q2 (cherry picked from commit d716cbc8e98a9410782bbad8139f7b6457920e5b) --- deskutils/py-paperless-ngx/Makefile | 1 + deskutils/py-paperless-ngx/files/paperless-beat.in | 12 ++++++----- .../py-paperless-ngx/files/paperless-consumer.in | 8 +++++--- .../py-paperless-ngx/files/paperless-flower.in | 12 ++++++----- .../py-paperless-ngx/files/paperless-migrate.in | 12 ++++++----- .../py-paperless-ngx/files/paperless-ngx.7.in | 24 +++++++++++++++++++--- .../py-paperless-ngx/files/paperless-webui.in | 20 +++++++++++------- .../py-paperless-ngx/files/paperless-worker.in | 12 ++++++----- .../files/patch-paperless.conf.example | 10 ++++----- deskutils/py-paperless-ngx/files/pkg-message.in | 21 +++++++++++++++++++ 10 files changed, 93 insertions(+), 39 deletions(-) diff --git a/deskutils/py-paperless-ngx/Makefile b/deskutils/py-paperless-ngx/Makefile index 347baff5bc06..f8885ccace83 100644 --- a/deskutils/py-paperless-ngx/Makefile +++ b/deskutils/py-paperless-ngx/Makefile @@ -1,6 +1,7 @@ PORTNAME= paperless-ngx PORTVERSION= 2.7.2 DISTVERSIONPREFIX= v +PORTREVISION= 1 CATEGORIES= deskutils python MASTER_SITES= https://github.com/${PORTNAME}/${PORTNAME}/releases/download/${DISTVERSIONPREFIX}${DISTVERSION}/:webui \ GH:gh diff --git a/deskutils/py-paperless-ngx/files/paperless-beat.in b/deskutils/py-paperless-ngx/files/paperless-beat.in index 76c977d9c2ea..3fa2f2b4d4a6 100755 --- a/deskutils/py-paperless-ngx/files/paperless-beat.in +++ b/deskutils/py-paperless-ngx/files/paperless-beat.in @@ -11,11 +11,13 @@ # paperless_beat_enable (bool): Set to "NO" by default. # Set it to "YES" to enable # paperless_beat -# paperless_beat_log_facility(str): Set to "daemon" by default. +# paperless_beat_user (str): Set to "paperless" by default. +# User to run beat. +# paperless_beat_log_facility (str): Set to "daemon" by default. # Beat logging syslog facility. -# paperless_beat_log_priority(str): Set to "notice" by default. +# paperless_beat_log_priority (str): Set to "notice" by default. # Beat logging syslog priority. -# paperless_beat_log_tag(str): Set to "paperless-beat" by default. +# paperless_beat_log_tag (str): Set to "paperless-beat" by default. # Beat logging syslog tag. . /etc/rc.subr @@ -23,7 +25,8 @@ name="paperless_beat" rcvar="${name}_enable" -# Set defaults +# read configuration and set defaults +load_rc_config "$name" paperless_beat_enable=${paperless_beat_enable:-"NO"} paperless_beat_daemon_user=${paperless_beat_user:-"paperless"} paperless_beat_log_facility=${paperless_beat_log_facility:-"daemon"} @@ -45,5 +48,4 @@ command_args="-cS -u ${paperless_beat_daemon_user} \ %%PREFIX%%/bin/celery \ --app paperless beat --loglevel INFO" -load_rc_config ${name} run_rc_command "$1" diff --git a/deskutils/py-paperless-ngx/files/paperless-consumer.in b/deskutils/py-paperless-ngx/files/paperless-consumer.in index 806003623d2d..987234567de8 100755 --- a/deskutils/py-paperless-ngx/files/paperless-consumer.in +++ b/deskutils/py-paperless-ngx/files/paperless-consumer.in @@ -11,11 +11,13 @@ # paperless_consumer_enable (bool): Set to "NO" by default. # Set it to "YES" to enable # paperless_consumer +# paperless_consumer_user (str): Set to "paperless" by default. +# User to run consumer. # paperless_consumer_log_facility(str): Set to "daemon" by default. # Consumer logging syslog facility. # paperless_consumer_log_priority(str): Set to "notice" by default. # Consumer logging syslog priority. -# paperless_consumer_log_tag(str): Set to "paperless-consumer" by default. +# paperless_consumer_log_tag (str): Set to "paperless-consumer" by default. # Consumer logging syslog tag. . /etc/rc.subr @@ -23,7 +25,8 @@ name="paperless_consumer" rcvar="${name}_enable" -# Set defaults +# read configuration and set defaults +load_rc_config "$name" paperless_consumer_enable=${paperless_consumer_enable:-"NO"} paperless_consumer_daemon_user=${paperless_consumer_user:-"paperless"} paperless_consumer_log_facility=${paperless_consumer_log_facility:-"daemon"} @@ -45,5 +48,4 @@ command_args="-cS -u ${paperless_consumer_daemon_user} \ -T ${paperless_consumer_log_tag} \ %%PREFIX%%/bin/paperless document_consumer" -load_rc_config ${name} run_rc_command "$1" diff --git a/deskutils/py-paperless-ngx/files/paperless-flower.in b/deskutils/py-paperless-ngx/files/paperless-flower.in index 87f5942f365a..3690d41da3a8 100755 --- a/deskutils/py-paperless-ngx/files/paperless-flower.in +++ b/deskutils/py-paperless-ngx/files/paperless-flower.in @@ -10,11 +10,13 @@ # paperless_flower_enable (bool): Set to "NO" by default. # Set it to "YES" to enable # paperless_flower -# paperless_flower_log_facility(str): Set to "daemon" by default. +# paperless_flower_user (str): Set to "paperless" by default. +# User to run flower. +# paperless_flower_log_facility (str): Set to "daemon" by default. # Flower logging syslog facility. -# paperless_flower_log_priority(str): Set to "notice" by default. +# paperless_flower_log_priority (str): Set to "notice" by default. # Flower logging syslog priority. -# paperless_flower_log_tag(str): Set to "paperless-flower" by default. +# paperless_flower_log_tag (str): Set to "paperless-flower" by default. # Flower logging syslog tag. . /etc/rc.subr @@ -22,7 +24,8 @@ name="paperless_flower" rcvar="${name}_enable" -# Set defaults +# read configuration and set defaults +load_rc_config "$name" paperless_flower_enable=${paperless_flower_enable:-"NO"} paperless_flower_daemon_user=${paperless_flower_user:-"paperless"} paperless_flower_log_facility=${paperless_flower_log_facility:-"daemon"} @@ -44,5 +47,4 @@ command_args="-cS -u ${paperless_flower_daemon_user} \ %%PREFIX%%/bin/celery \ --app paperless flower --url-prefix=flower --loglevel INFO" -load_rc_config ${name} run_rc_command "$1" diff --git a/deskutils/py-paperless-ngx/files/paperless-migrate.in b/deskutils/py-paperless-ngx/files/paperless-migrate.in index 596fa621e8dc..91bd702d2b10 100755 --- a/deskutils/py-paperless-ngx/files/paperless-migrate.in +++ b/deskutils/py-paperless-ngx/files/paperless-migrate.in @@ -11,11 +11,13 @@ # paperless_migrate_enable (bool): Set to "NO" by default. # Set it to "YES" to enable # paperless_migrate -# paperless_migrate_log_facility(str): Set to "daemon" by default. +# paperless_migrate_user (str): Set to "paperless" by default. +# User to run migrate. +# paperless_migrate_log_facility (str): Set to "daemon" by default. # Migrate logging syslog facility. -# paperless_migrate_log_priority(str): Set to "notice" by default. +# paperless_migrate_log_priority (str): Set to "notice" by default. # Migrate logging syslog priority. -# paperless_migrate_log_tag(str): Set to "paperless-migrate" by default. +# paperless_migrate_log_tag (str): Set to "paperless-migrate" by default. # Migrate logging syslog tag. . /etc/rc.subr @@ -23,7 +25,8 @@ name="paperless_migrate" rcvar="${name}_enable" -# Set defaults +# read configuration and set defaults +load_rc_config "$name" paperless_migrate_enable=${paperless_migrate_enable:-"NO"} paperless_migrate_user=${paperless_migrate_user:-"paperless"} paperless_migrate_log_facility=${paperless_migrate_log_facility:-"daemon"} @@ -44,5 +47,4 @@ paperless_migrate_run() -c 'sh -c "%%PREFIX%%/bin/paperless migrate --no-color -v 0"' } -load_rc_config ${name} run_rc_command "$1" diff --git a/deskutils/py-paperless-ngx/files/paperless-ngx.7.in b/deskutils/py-paperless-ngx/files/paperless-ngx.7.in index d809cf9eaceb..6f771e4713e3 100644 --- a/deskutils/py-paperless-ngx/files/paperless-ngx.7.in +++ b/deskutils/py-paperless-ngx/files/paperless-ngx.7.in @@ -23,7 +23,7 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.Dd February 22, 2024 +.Dd April 24, 2024 .Dt PAPERLESS-NGX 7 .Os .Sh NAME @@ -120,6 +120,12 @@ Modify to match the configured credentials (when running on localhost, it is possible to use no special credentials). .Pp +In case redis is not running on localhost, an ACL entry needs to +be added to grant permissions to the user used to access the instance: +.Bd -literal -offset indent +user paperlessusername on +@all -@admin ~* &* +.Ed +.Pp The URL paperless is hosted on needs to be configued by setting .Va PAPERLESS_URL , it is also possible to tune @@ -159,8 +165,8 @@ In order to process scanned documents using machine learning, paperless-ngx requires NLTK (natural language toolkit) data. The required files can be downloaded by using these commands: .Bd -literal -offset indent -%%PYTHON_CMD%% -m nltk.downloader \\ - stopwords punkt -d /var/db/paperless/nltkdata +su -l paperless -c '%%PYTHON_CMD%% -m nltk.downloader \\ + stopwords snowball_data punkt -d /var/db/paperless/nltkdata' .Ed .Pp Normally, the document classifier is run automatically by @@ -178,6 +184,18 @@ It can be enabled and started like this: service paperless-flower enable service paperless-flower start .Ed +.Sh JBIG2 ENCODING +In case a binary named `jbig2enc' is found in $PATH, textproc/py-ocrmypdf +will automatically pick it up to encode PDFs with it. +.Pp +A patch to add a port skeleton for jbig2enc for manual building +on a local ports tree can be found here: +.Pa https://people.freebsd.org/~grembo/graphics-jbig2enc.patch +.Pp +There are various considerations to be made when using jbig2enc, +including potential patent claims and regulatory requirements, +see also +.Pa https://en.wikipedia.org/wiki/JBIG2 . .Sh WEB UI SETUP Before using the web ui, make sure to create a super user and assign a password diff --git a/deskutils/py-paperless-ngx/files/paperless-webui.in b/deskutils/py-paperless-ngx/files/paperless-webui.in index 19330fe57996..1e191883f00b 100755 --- a/deskutils/py-paperless-ngx/files/paperless-webui.in +++ b/deskutils/py-paperless-ngx/files/paperless-webui.in @@ -11,16 +11,22 @@ # paperless_webui_enable (bool): Set to "NO" by default. # Set it to "YES" to enable # paperless_webui -# paperless_webui_log_facility(str): Set to "daemon" by default. +# paperless_webui_user (str): Set to "paperless" by default. +# User to run webui. +# paperless_webui_log_facility (str): Set to "daemon" by default. # WebUI logging syslog facility. -# paperless_webui_log_priority(str): Set to "notice" by default. +# paperless_webui_log_priority (str): Set to "notice" by default. # WebUI logging syslog priority. -# paperless_webui_log_tag(str): Set to "paperless-webui" by default. +# paperless_webui_log_tag (str): Set to "paperless-webui" by default. # WebUI logging syslog tag. -# paperless_webui_restart_delay(int): Set to "5" by default. +# paperless_webui_listen_address (str): Set to "127.0.0.1" by default. +# IP address to listen on. +# paperless_webui_listen_port (int): Set to "8000" by default. +# Port to listen on. +# paperless_webui_restart_delay (int): Set to "5" by default. # WebUI automatic restart delay in # seconds. -# paperless_webui_secret_key(str): Set to +# paperless_webui_secret_key (str): Set to # "%%PREFIX%%/etc/paperless_secret_key" # by default. Load PAPERLESS_SECRET_KEY # from this file from start, in case it @@ -36,7 +42,8 @@ name="paperless_webui" rcvar="${name}_enable" -# Set defaults +# read configuration and set defaults +load_rc_config "$name" paperless_webui_enable=${paperless_webui_enable:-"NO"} paperless_webui_daemon_user=${paperless_webui_user:-"paperless"} paperless_webui_log_facility=${paperless_webui_log_facility:-"daemon"} @@ -87,5 +94,4 @@ paperless_webui_prestart() fi } -load_rc_config ${name} run_rc_command "$1" diff --git a/deskutils/py-paperless-ngx/files/paperless-worker.in b/deskutils/py-paperless-ngx/files/paperless-worker.in index 51568e68415a..db99288a005e 100755 --- a/deskutils/py-paperless-ngx/files/paperless-worker.in +++ b/deskutils/py-paperless-ngx/files/paperless-worker.in @@ -11,11 +11,13 @@ # paperless_worker_enable (bool): Set to "NO" by default. # Set it to "YES" to enable # paperless_worker -# paperless_worker_log_facility(str): Set to "daemon" by default. +# paperless_worker_user (str): Set to "paperless" by default. +# User to run worker. +# paperless_worker_log_facility (str): Set to "daemon" by default. # Worker logging syslog facility. -# paperless_worker_log_priority(str): Set to "notice" by default. +# paperless_worker_log_priority (str): Set to "notice" by default. # Worker logging syslog priority. -# paperless_worker_log_tag(str): Set to "paperless-worker" by default. +# paperless_worker_log_tag (str): Set to "paperless-worker" by default. # Worker logging syslog tag. . /etc/rc.subr @@ -23,7 +25,8 @@ name="paperless_worker" rcvar="${name}_enable" -# Set defaults +# read configuration and set defaults +load_rc_config "$name" paperless_worker_enable=${paperless_worker_enable:-"NO"} paperless_worker_daemon_user=${paperless_worker_user:-"paperless"} paperless_worker_log_facility=${paperless_worker_log_facility:-"daemon"} @@ -45,5 +48,4 @@ command_args="-cS -u ${paperless_worker_daemon_user} \ %%PREFIX%%/bin/celery \ --app paperless worker --loglevel INFO --without-mingle --without-gossip" -load_rc_config ${name} run_rc_command "$1" diff --git a/deskutils/py-paperless-ngx/files/patch-paperless.conf.example b/deskutils/py-paperless-ngx/files/patch-paperless.conf.example index 12cc2465a4b0..e9c565cfdf84 100644 --- a/deskutils/py-paperless-ngx/files/patch-paperless.conf.example +++ b/deskutils/py-paperless-ngx/files/patch-paperless.conf.example @@ -50,13 +50,11 @@ #PAPERLESS_TASK_WORKERS=1 #PAPERLESS_THREADS_PER_WORKER=1 #PAPERLESS_TIME_ZONE=UTC -@@ -92,3 +106,9 @@ +@@ -92,3 +106,7 @@ #PAPERLESS_CONVERT_BINARY=/usr/bin/convert #PAPERLESS_GS_BINARY=/usr/bin/gs + -+# NLTK settings -+NLTK_DIR=/var/db/paperless/nltk_data -+#NLTK_ENABLED=yes -+# below defaults to OCR_LANGUAGE -+#NLTK_LANGUAGE= ++# NLTK settings - see `man 7 paperless` ++PAPERLESS_NLTK_DIR=/var/db/paperless/nltkdata ++PAPERLESS_ENABLE_NLTK=yes diff --git a/deskutils/py-paperless-ngx/files/pkg-message.in b/deskutils/py-paperless-ngx/files/pkg-message.in index 3a67d4c433b9..78d477691107 100644 --- a/deskutils/py-paperless-ngx/files/pkg-message.in +++ b/deskutils/py-paperless-ngx/files/pkg-message.in @@ -4,4 +4,25 @@ Please see `man paperless-ngx' for details on how to configure paperless. EOM } +{ type: upgrade + message: <<EOM +After each upgrade, please check for differences between your config and +the sample configuration installed by the package: + + diff -u %%PREFIX/etc/paperless.conf.sample \ + %%PREFIX/etc/paperless.conf + +Then merge in changes as required by editing +%%PREFIX/etc/paperless.conf. +EOM +} +{ type: upgrade + maximum_version: "2.7.2_1" + message: <<EOM +Configuration of NLTK data has been fixed in port version 2.7.2_1. + +Please modify your %%PREFIX/etc/paperless.conf based on +`*NLTK*` settings found in %%PREFIX/etc/paperless.conf.sample. +EOM +} ]