git: d716cbc8e98a - main - deskutils/py-paperless-ngx: Fixes and improvements

From: Michael Gmelin <grembo_at_FreeBSD.org>
Date: Wed, 24 Apr 2024 21:59:52 UTC
The branch main has been updated by grembo:

URL: https://cgit.FreeBSD.org/ports/commit/?id=d716cbc8e98a9410782bbad8139f7b6457920e5b

commit d716cbc8e98a9410782bbad8139f7b6457920e5b
Author:     Michael Gmelin <grembo@FreeBSD.org>
AuthorDate: 2024-04-24 12:39:17 +0000
Commit:     Michael Gmelin <grembo@FreeBSD.org>
CommitDate: 2024-04-24 21:56:50 +0000

    deskutils/py-paperless-ngx: Fixes and improvements
    
    - Fix NLTK configuration in paperless.conf.sample
    - Mention snowball_data in NLTK instructions
    - Fix sad defect that prevented rc scripts from picking
      up configuration data from rc.conf
    - Improve startup script documentation
    - Document required ACL when using a remote redis instance
    - Document how to enable JBIG2 and its potential pitfalls
    
    PR:             278424, 278425, 278426
    Reported by:    anonymous bugzilla user, netchild
    MFH:            2024Q2
---
 deskutils/py-paperless-ngx/Makefile                |  1 +
 deskutils/py-paperless-ngx/files/paperless-beat.in | 12 ++++++-----
 .../py-paperless-ngx/files/paperless-consumer.in   |  8 +++++---
 .../py-paperless-ngx/files/paperless-flower.in     | 12 ++++++-----
 .../py-paperless-ngx/files/paperless-migrate.in    | 12 ++++++-----
 .../py-paperless-ngx/files/paperless-ngx.7.in      | 24 +++++++++++++++++++---
 .../py-paperless-ngx/files/paperless-webui.in      | 20 +++++++++++-------
 .../py-paperless-ngx/files/paperless-worker.in     | 12 ++++++-----
 .../files/patch-paperless.conf.example             | 10 ++++-----
 deskutils/py-paperless-ngx/files/pkg-message.in    | 21 +++++++++++++++++++
 10 files changed, 93 insertions(+), 39 deletions(-)

diff --git a/deskutils/py-paperless-ngx/Makefile b/deskutils/py-paperless-ngx/Makefile
index 347baff5bc06..f8885ccace83 100644
--- a/deskutils/py-paperless-ngx/Makefile
+++ b/deskutils/py-paperless-ngx/Makefile
@@ -1,6 +1,7 @@
 PORTNAME=	paperless-ngx
 PORTVERSION=	2.7.2
 DISTVERSIONPREFIX=	v
+PORTREVISION=	1
 CATEGORIES=	deskutils python
 MASTER_SITES=	https://github.com/${PORTNAME}/${PORTNAME}/releases/download/${DISTVERSIONPREFIX}${DISTVERSION}/:webui \
 		GH:gh
diff --git a/deskutils/py-paperless-ngx/files/paperless-beat.in b/deskutils/py-paperless-ngx/files/paperless-beat.in
index 76c977d9c2ea..3fa2f2b4d4a6 100755
--- a/deskutils/py-paperless-ngx/files/paperless-beat.in
+++ b/deskutils/py-paperless-ngx/files/paperless-beat.in
@@ -11,11 +11,13 @@
 # paperless_beat_enable (bool):		Set to "NO" by default.
 #					Set it to "YES" to enable
 #					paperless_beat
-# paperless_beat_log_facility(str):	Set to "daemon" by default.
+# paperless_beat_user (str):		Set to "paperless" by default.
+#					User to run beat.
+# paperless_beat_log_facility (str):	Set to "daemon" by default.
 #					Beat logging syslog facility.
-# paperless_beat_log_priority(str):	Set to "notice" by default.
+# paperless_beat_log_priority (str):	Set to "notice" by default.
 #					Beat logging syslog priority.
-# paperless_beat_log_tag(str):		Set to "paperless-beat" by default.
+# paperless_beat_log_tag (str):		Set to "paperless-beat" by default.
 #					Beat logging syslog tag.
 
 . /etc/rc.subr
@@ -23,7 +25,8 @@
 name="paperless_beat"
 rcvar="${name}_enable"
 
-# Set defaults
+# read configuration and set defaults
+load_rc_config "$name"
 paperless_beat_enable=${paperless_beat_enable:-"NO"}
 paperless_beat_daemon_user=${paperless_beat_user:-"paperless"}
 paperless_beat_log_facility=${paperless_beat_log_facility:-"daemon"}
@@ -45,5 +48,4 @@ command_args="-cS -u ${paperless_beat_daemon_user} \
 	%%PREFIX%%/bin/celery \
 	--app paperless beat --loglevel INFO"
 
-load_rc_config ${name}
 run_rc_command "$1"
diff --git a/deskutils/py-paperless-ngx/files/paperless-consumer.in b/deskutils/py-paperless-ngx/files/paperless-consumer.in
index 806003623d2d..987234567de8 100755
--- a/deskutils/py-paperless-ngx/files/paperless-consumer.in
+++ b/deskutils/py-paperless-ngx/files/paperless-consumer.in
@@ -11,11 +11,13 @@
 # paperless_consumer_enable (bool):	Set to "NO" by default.
 #					Set it to "YES" to enable
 #					paperless_consumer
+# paperless_consumer_user (str):	Set to "paperless" by default.
+#					User to run consumer.
 # paperless_consumer_log_facility(str): Set to "daemon" by default.
 #					Consumer logging syslog facility.
 # paperless_consumer_log_priority(str): Set to "notice" by default.
 #					Consumer logging syslog priority.
-# paperless_consumer_log_tag(str):	Set to "paperless-consumer" by default.
+# paperless_consumer_log_tag (str):	Set to "paperless-consumer" by default.
 #					Consumer logging syslog tag.
 
 . /etc/rc.subr
@@ -23,7 +25,8 @@
 name="paperless_consumer"
 rcvar="${name}_enable"
 
-# Set defaults
+# read configuration and set defaults
+load_rc_config "$name"
 paperless_consumer_enable=${paperless_consumer_enable:-"NO"}
 paperless_consumer_daemon_user=${paperless_consumer_user:-"paperless"}
 paperless_consumer_log_facility=${paperless_consumer_log_facility:-"daemon"}
@@ -45,5 +48,4 @@ command_args="-cS -u ${paperless_consumer_daemon_user} \
 	-T ${paperless_consumer_log_tag} \
 	%%PREFIX%%/bin/paperless document_consumer"
 
-load_rc_config ${name}
 run_rc_command "$1"
diff --git a/deskutils/py-paperless-ngx/files/paperless-flower.in b/deskutils/py-paperless-ngx/files/paperless-flower.in
index 87f5942f365a..3690d41da3a8 100755
--- a/deskutils/py-paperless-ngx/files/paperless-flower.in
+++ b/deskutils/py-paperless-ngx/files/paperless-flower.in
@@ -10,11 +10,13 @@
 # paperless_flower_enable (bool):	Set to "NO" by default.
 #					Set it to "YES" to enable
 #					paperless_flower
-# paperless_flower_log_facility(str):	Set to "daemon" by default.
+# paperless_flower_user (str):		Set to "paperless" by default.
+#					User to run flower.
+# paperless_flower_log_facility (str):	Set to "daemon" by default.
 #					Flower logging syslog facility.
-# paperless_flower_log_priority(str):	Set to "notice" by default.
+# paperless_flower_log_priority (str):	Set to "notice" by default.
 #					Flower logging syslog priority.
-# paperless_flower_log_tag(str):	Set to "paperless-flower" by default.
+# paperless_flower_log_tag (str):	Set to "paperless-flower" by default.
 #					Flower logging syslog tag.
 
 . /etc/rc.subr
@@ -22,7 +24,8 @@
 name="paperless_flower"
 rcvar="${name}_enable"
 
-# Set defaults
+# read configuration and set defaults
+load_rc_config "$name"
 paperless_flower_enable=${paperless_flower_enable:-"NO"}
 paperless_flower_daemon_user=${paperless_flower_user:-"paperless"}
 paperless_flower_log_facility=${paperless_flower_log_facility:-"daemon"}
@@ -44,5 +47,4 @@ command_args="-cS -u ${paperless_flower_daemon_user} \
 	%%PREFIX%%/bin/celery \
 	--app paperless flower --url-prefix=flower --loglevel INFO"
 
-load_rc_config ${name}
 run_rc_command "$1"
diff --git a/deskutils/py-paperless-ngx/files/paperless-migrate.in b/deskutils/py-paperless-ngx/files/paperless-migrate.in
index 596fa621e8dc..91bd702d2b10 100755
--- a/deskutils/py-paperless-ngx/files/paperless-migrate.in
+++ b/deskutils/py-paperless-ngx/files/paperless-migrate.in
@@ -11,11 +11,13 @@
 # paperless_migrate_enable (bool):	Set to "NO" by default.
 #					Set it to "YES" to enable
 #					paperless_migrate
-# paperless_migrate_log_facility(str):	Set to "daemon" by default.
+# paperless_migrate_user (str):		Set to "paperless" by default.
+#					User to run migrate.
+# paperless_migrate_log_facility (str):	Set to "daemon" by default.
 #					Migrate logging syslog facility.
-# paperless_migrate_log_priority(str):	Set to "notice" by default.
+# paperless_migrate_log_priority (str):	Set to "notice" by default.
 #					Migrate logging syslog priority.
-# paperless_migrate_log_tag(str):	Set to "paperless-migrate" by default.
+# paperless_migrate_log_tag (str):	Set to "paperless-migrate" by default.
 #					Migrate logging syslog tag.
 
 . /etc/rc.subr
@@ -23,7 +25,8 @@
 name="paperless_migrate"
 rcvar="${name}_enable"
 
-# Set defaults
+# read configuration and set defaults
+load_rc_config "$name"
 paperless_migrate_enable=${paperless_migrate_enable:-"NO"}
 paperless_migrate_user=${paperless_migrate_user:-"paperless"}
 paperless_migrate_log_facility=${paperless_migrate_log_facility:-"daemon"}
@@ -44,5 +47,4 @@ paperless_migrate_run()
 		-c 'sh -c "%%PREFIX%%/bin/paperless migrate --no-color -v 0"'
 }
 
-load_rc_config ${name}
 run_rc_command "$1"
diff --git a/deskutils/py-paperless-ngx/files/paperless-ngx.7.in b/deskutils/py-paperless-ngx/files/paperless-ngx.7.in
index d809cf9eaceb..6f771e4713e3 100644
--- a/deskutils/py-paperless-ngx/files/paperless-ngx.7.in
+++ b/deskutils/py-paperless-ngx/files/paperless-ngx.7.in
@@ -23,7 +23,7 @@
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd February 22, 2024
+.Dd April 24, 2024
 .Dt PAPERLESS-NGX 7
 .Os
 .Sh NAME
@@ -120,6 +120,12 @@ Modify
 to match the configured credentials (when running on localhost,
 it is possible to use no special credentials).
 .Pp
+In case redis is not running on localhost, an ACL entry needs to
+be added to grant permissions to the user used to access the instance:
+.Bd -literal -offset indent
+user paperlessusername on +@all -@admin ~* &*
+.Ed
+.Pp
 The URL paperless is hosted on needs to be configued by setting
 .Va PAPERLESS_URL ,
 it is also possible to tune
@@ -159,8 +165,8 @@ In order to process scanned documents using machine learning,
 paperless-ngx requires NLTK (natural language toolkit) data.
 The required files can be downloaded by using these commands:
 .Bd -literal -offset indent
-%%PYTHON_CMD%% -m nltk.downloader \\
-  stopwords punkt -d /var/db/paperless/nltkdata
+su -l paperless -c '%%PYTHON_CMD%% -m nltk.downloader \\
+  stopwords snowball_data punkt -d /var/db/paperless/nltkdata'
 .Ed
 .Pp
 Normally, the document classifier is run automatically by
@@ -178,6 +184,18 @@ It can be enabled and started like this:
 service paperless-flower enable
 service paperless-flower start
 .Ed
+.Sh JBIG2 ENCODING
+In case a binary named `jbig2enc' is found in $PATH, textproc/py-ocrmypdf
+will automatically pick it up to encode PDFs with it.
+.Pp
+A patch to add a port skeleton for jbig2enc for manual building
+on a local ports tree can be found here:
+.Pa https://people.freebsd.org/~grembo/graphics-jbig2enc.patch
+.Pp
+There are various considerations to be made when using jbig2enc,
+including potential patent claims and regulatory requirements,
+see also
+.Pa https://en.wikipedia.org/wiki/JBIG2 .
 .Sh WEB UI SETUP
 Before using the web ui, make sure to create a super user and assign
 a password
diff --git a/deskutils/py-paperless-ngx/files/paperless-webui.in b/deskutils/py-paperless-ngx/files/paperless-webui.in
index 19330fe57996..1e191883f00b 100755
--- a/deskutils/py-paperless-ngx/files/paperless-webui.in
+++ b/deskutils/py-paperless-ngx/files/paperless-webui.in
@@ -11,16 +11,22 @@
 # paperless_webui_enable (bool):	Set to "NO" by default.
 #					Set it to "YES" to enable
 #					paperless_webui
-# paperless_webui_log_facility(str):	Set to "daemon" by default.
+# paperless_webui_user (str):		Set to "paperless" by default.
+#					User to run webui.
+# paperless_webui_log_facility (str):	Set to "daemon" by default.
 #					WebUI logging syslog facility.
-# paperless_webui_log_priority(str):	Set to "notice" by default.
+# paperless_webui_log_priority (str):	Set to "notice" by default.
 #					WebUI logging syslog priority.
-# paperless_webui_log_tag(str):		Set to "paperless-webui" by default.
+# paperless_webui_log_tag (str):	Set to "paperless-webui" by default.
 #					WebUI logging syslog tag.
-# paperless_webui_restart_delay(int):	Set to "5" by default.
+# paperless_webui_listen_address (str):	Set to "127.0.0.1" by default.
+#					IP address to listen on.
+# paperless_webui_listen_port (int):	Set to "8000" by default.
+#					Port to listen on.
+# paperless_webui_restart_delay (int):	Set to "5" by default.
 #					WebUI automatic restart delay in
 #					seconds.
-# paperless_webui_secret_key(str):	Set to
+# paperless_webui_secret_key (str):	Set to
 #					"%%PREFIX%%/etc/paperless_secret_key"
 #					by default. Load PAPERLESS_SECRET_KEY
 #					from this file from start, in case it
@@ -36,7 +42,8 @@
 name="paperless_webui"
 rcvar="${name}_enable"
 
-# Set defaults
+# read configuration and set defaults
+load_rc_config "$name"
 paperless_webui_enable=${paperless_webui_enable:-"NO"}
 paperless_webui_daemon_user=${paperless_webui_user:-"paperless"}
 paperless_webui_log_facility=${paperless_webui_log_facility:-"daemon"}
@@ -87,5 +94,4 @@ paperless_webui_prestart()
 	fi
 }
 
-load_rc_config ${name}
 run_rc_command "$1"
diff --git a/deskutils/py-paperless-ngx/files/paperless-worker.in b/deskutils/py-paperless-ngx/files/paperless-worker.in
index 51568e68415a..db99288a005e 100755
--- a/deskutils/py-paperless-ngx/files/paperless-worker.in
+++ b/deskutils/py-paperless-ngx/files/paperless-worker.in
@@ -11,11 +11,13 @@
 # paperless_worker_enable (bool):	Set to "NO" by default.
 #					Set it to "YES" to enable
 #					paperless_worker
-# paperless_worker_log_facility(str):	Set to "daemon" by default.
+# paperless_worker_user (str):		Set to "paperless" by default.
+#					User to run worker.
+# paperless_worker_log_facility (str):	Set to "daemon" by default.
 #					Worker logging syslog facility.
-# paperless_worker_log_priority(str):	Set to "notice" by default.
+# paperless_worker_log_priority (str):	Set to "notice" by default.
 #					Worker logging syslog priority.
-# paperless_worker_log_tag(str):	Set to "paperless-worker" by default.
+# paperless_worker_log_tag (str):	Set to "paperless-worker" by default.
 #					Worker logging syslog tag.
 
 . /etc/rc.subr
@@ -23,7 +25,8 @@
 name="paperless_worker"
 rcvar="${name}_enable"
 
-# Set defaults
+# read configuration and set defaults
+load_rc_config "$name"
 paperless_worker_enable=${paperless_worker_enable:-"NO"}
 paperless_worker_daemon_user=${paperless_worker_user:-"paperless"}
 paperless_worker_log_facility=${paperless_worker_log_facility:-"daemon"}
@@ -45,5 +48,4 @@ command_args="-cS -u ${paperless_worker_daemon_user} \
 	%%PREFIX%%/bin/celery \
 	--app paperless worker --loglevel INFO --without-mingle --without-gossip"
 
-load_rc_config ${name}
 run_rc_command "$1"
diff --git a/deskutils/py-paperless-ngx/files/patch-paperless.conf.example b/deskutils/py-paperless-ngx/files/patch-paperless.conf.example
index 12cc2465a4b0..e9c565cfdf84 100644
--- a/deskutils/py-paperless-ngx/files/patch-paperless.conf.example
+++ b/deskutils/py-paperless-ngx/files/patch-paperless.conf.example
@@ -50,13 +50,11 @@
  #PAPERLESS_TASK_WORKERS=1
  #PAPERLESS_THREADS_PER_WORKER=1
  #PAPERLESS_TIME_ZONE=UTC
-@@ -92,3 +106,9 @@
+@@ -92,3 +106,7 @@
  
  #PAPERLESS_CONVERT_BINARY=/usr/bin/convert
  #PAPERLESS_GS_BINARY=/usr/bin/gs
 +
-+# NLTK settings
-+NLTK_DIR=/var/db/paperless/nltk_data
-+#NLTK_ENABLED=yes
-+# below defaults to OCR_LANGUAGE
-+#NLTK_LANGUAGE=
++# NLTK settings - see `man 7 paperless`
++PAPERLESS_NLTK_DIR=/var/db/paperless/nltkdata
++PAPERLESS_ENABLE_NLTK=yes
diff --git a/deskutils/py-paperless-ngx/files/pkg-message.in b/deskutils/py-paperless-ngx/files/pkg-message.in
index 3a67d4c433b9..78d477691107 100644
--- a/deskutils/py-paperless-ngx/files/pkg-message.in
+++ b/deskutils/py-paperless-ngx/files/pkg-message.in
@@ -4,4 +4,25 @@
   Please see `man paperless-ngx' for details on how to configure paperless.
 EOM
 }
+{ type: upgrade
+  message: <<EOM
+After each upgrade, please check for differences between your config and
+the sample configuration installed by the package:
+
+    diff -u %%PREFIX/etc/paperless.conf.sample \
+           %%PREFIX/etc/paperless.conf
+
+Then merge in changes as required by editing
+%%PREFIX/etc/paperless.conf.
+EOM
+}
+{ type: upgrade
+  maximum_version: "2.7.2_1"
+  message: <<EOM
+Configuration of NLTK data has been fixed in port version 2.7.2_1.
+
+Please modify your %%PREFIX/etc/paperless.conf based on
+`*NLTK*` settings found in %%PREFIX/etc/paperless.conf.sample.
+EOM
+}
 ]