git: 089d73ae02c2 - main - mail/bsfilter: update to recent version for ruby 3.3

From: Zsolt Udvari <uzsolt_at_FreeBSD.org>
Date: Sun, 21 Jul 2024 18:20:39 UTC
The branch main has been updated by uzsolt:

URL: https://cgit.FreeBSD.org/ports/commit/?id=089d73ae02c2650bf4a1f06148eba39eb6c63788

commit 089d73ae02c2650bf4a1f06148eba39eb6c63788
Author:     Yoshihiro Takahashi <nyan@FreeBSD.org>
AuthorDate: 2024-07-21 18:09:08 +0000
Commit:     Zsolt Udvari <uzsolt@FreeBSD.org>
CommitDate: 2024-07-21 18:19:43 +0000

    mail/bsfilter: update to recent version for ruby 3.3
    
    Clean Makefile
    Update WWW.
    Submitter takes maintainership.
    
    PR:             279009
    Obtained from:  https://osdn.net/cvs/view/bsfilter/bsfilter/bsfilter?revision=1.89&view=markup
---
 mail/bsfilter/Makefile                      |   19 +-
 mail/bsfilter/files/patch-bsfilter_bsfilter | 5075 +++++++++++++++++++++++++++
 2 files changed, 5082 insertions(+), 12 deletions(-)

diff --git a/mail/bsfilter/Makefile b/mail/bsfilter/Makefile
index cb927ee5b201..ea29b285cc29 100644
--- a/mail/bsfilter/Makefile
+++ b/mail/bsfilter/Makefile
@@ -1,29 +1,24 @@
 PORTNAME=	bsfilter
 PORTVERSION=	1.0.19
-PORTREVISION=	3
+PORTREVISION=	4
 CATEGORIES=	mail ruby
 MASTER_SITES=	OSDN/bsfilter
 
-MAINTAINER=	ports@FreeBSD.org
+MAINTAINER=	nyan@FreeBSD.org
 COMMENT=	Bayesian spam filter written in Ruby
-WWW=		https://en.sourceforge.jp/projects/bsfilter/
+WWW=		https://osdn.net/projects/bsfilter/
 
 LICENSE=	GPLv2
 
-RUN_DEPENDS=	rubygem-gdbm>=2.0.0,2:databases/rubygem-gdbm
+RUN_DEPENDS=	rubygem-gdbm>=2.0.0,2:databases/rubygem-gdbm \
+		rubygem-sdbm>=1.0.0:databases/rubygem-sdbm
 
-NO_BUILD=	yes
 USES=		ruby shebangfix tar:tgz
 SHEBANG_FILES=	bsfilter/bsfilter
+NO_BUILD=	yes
 
 OPTIONS_DEFINE=	EXAMPLES
 
-.include <bsd.port.pre.mk>
-
-.if ${RUBY_VER:M3*} != ""
-RUN_DEPENDS+=	rubygem-sdbm>=1.0.0:databases/rubygem-sdbm
-.endif
-
 do-install:
 	${INSTALL_SCRIPT} ${WRKSRC}/bsfilter/${PORTNAME} ${STAGEDIR}${PREFIX}/bin/${PORTNAME}
 	@${MKDIR} ${STAGEDIR}${EXAMPLESDIR}
@@ -34,4 +29,4 @@ do-install:
 	cd ${WRKSRC} && ${COPYTREE_SHARE} ${DIR} ${STAGEDIR}${EXAMPLESDIR}
 .endfor
 
-.include <bsd.port.post.mk>
+.include <bsd.port.mk>
diff --git a/mail/bsfilter/files/patch-bsfilter_bsfilter b/mail/bsfilter/files/patch-bsfilter_bsfilter
new file mode 100644
index 000000000000..ff5d3d2ca636
--- /dev/null
+++ b/mail/bsfilter/files/patch-bsfilter_bsfilter
@@ -0,0 +1,5075 @@
+--- bsfilter/bsfilter.orig	2013-11-03 10:22:15 UTC
++++ bsfilter/bsfilter
+@@ -1,6 +1,6 @@
+ #! /usr/bin/env ruby
+-## -*-Ruby-*- $Id: bsfilter,v 1.87 2013/11/03 10:22:15 nabeken Exp $
+-## Copyright (C) 2003, 2004, 2005, 2006 NABEYA Kenichi
++## -*-Ruby-*- $Id: bsfilter,v 1.89 2023/12/26 05:52:39 nabeken Exp $
++## Copyright (C) 2003-2023 NABEYA Kenichi
+ ##
+ ## This program is free software; you can redistribute it and/or modify
+ ## it under the terms of the GNU General Public License as published by
+@@ -16,115 +16,112 @@
+ ## along with this program; if not, write to the Free Software
+ ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ 
++require 'English'
+ require 'getoptlong'
+ require 'nkf'
+ 
+ class Bsfilter
+   def initialize
+-    @threads = Array::new
++    @threads = []
+     @token_dbs = nil
+-    @options = Hash::new
+-    @db_hash = Hash::new
++    @options = {}
++    @db_hash = {}
+     @jtokenizer = nil
+   end
+   attr_accessor :token_dbs
+ 
+-  Release = "$Name: release_1_0_19 $".split[1].sub(/\A[^\d]*/, '').gsub(/_/, '.')
+-  Release.concat("-") if (Release == "")
+-  Revision = "$Revision: 1.87 $".gsub(/[^\.\d]/, '')
+-  Languages = ["C", "ja"]
+-  Default_Language = "C"
++  Release = '$Name:  $'.split[1].sub(/\A[^\d]*/, '').gsub(/_/, '.')
++  Release.concat('-') if (Release == '')
++  Revision = '$Revision: 1.89 $'.gsub(/[^.\d]/, '')
++  Languages = %w[C ja].freeze
++  Default_Language = 'C'.freeze
+ 
+-##  Options = Hash::new           # used like a global variable
+-##  DB = Hash::new
+-  
+-  Default_header_prefix = "Spam"
+-  Default_spam_subject_prefix = "[SPAM] "
+-  Default_refer_header = 
+-    ["Ufrom", "From", "To", "Cc", "Subject", "Reply-to", "Return-path", "Received",
+-     "Content-Transfer-Encoding", "Content-Type", "charset", "Content-Disposition"].join(",")
+-  
+-  Default_jtokenizer = "bigram"
+-  Default_mark_in_token = "|!*'"
+-  Default_homedir = ".bsfilter"
+-  Default_conf_file = "bsfilter.conf"
+-  Default_pid_file = "bsfilter.pid"
+-  
+-  Default_method = "rf"           # Robinson Fisher
+-  Default_db = "sdbm"
+-  Default_max_mail = 10000
++  ##  Options = Hash::new           # used like a global variable
++  ##  DB = Hash::new
++
++  Default_header_prefix = 'Spam'.freeze
++  Default_spam_subject_prefix = '[SPAM] '.freeze
++  Default_refer_header =
++    %w[Ufrom From To Cc Subject Reply-to Return-path Received
++       Content-Transfer-Encoding Content-Type charset Content-Disposition].join(',')
++
++  Default_jtokenizer = 'bigram'.freeze
++  Default_mark_in_token = "|!*'".freeze
++  Default_homedir = '.bsfilter'.freeze
++  Default_conf_file = 'bsfilter.conf'.freeze
++  Default_pid_file = 'bsfilter.pid'.freeze
++
++  Default_method = 'rf'.freeze # Robinson Fisher
++  Default_db = 'sdbm'.freeze
++  Default_max_mail = 10_000
+   Default_min_mail = 8000
+   Default_max_line = 500
+-  
+-  Default_pop_proxy_if = "0.0.0.0"
+-  Default_pop_port = "110"
+-  Default_pop_proxy_port = "10110"
+-  Default_pop_max_size = 50000
+-  
+-  Default_imap_port = "143"
+-  Default_imap_auth = "auto"
+-  Default_imap_auth_preference = ["cram-md5", "login", "loginc"]
+ 
+-  Default_icon_number = 32512
+-  
+-  Clean_ext = ".clean"
+-  Spam_ext = ".spam"
+-  Prob_ext = ".prob"
+-  Lock_ext = ".lock"
+-  
+-  SDBM_ext = ".sdbm"
+-  GDBM_ext = ".gdbm"
+-  BDB1_ext = ".bdb1"
+-  BDB_ext = ".bdb"
+-  QDBM_ext = ".qdbm"
+-  
++  Default_pop_proxy_if = '0.0.0.0'.freeze
++  Default_pop_port = '110'.freeze
++  Default_pop_proxy_port = '10110'.freeze
++  Default_pop_max_size = 50_000
++
++  Default_imap_port = '143'.freeze
++  Default_imap_auth = 'auto'.freeze
++  Default_imap_auth_preference = %w[cram-md5 login loginc].freeze
++
++  Default_icon_number = 32_512
++
++  Clean_ext = '.clean'.freeze
++  Spam_ext = '.spam'.freeze
++  Prob_ext = '.prob'.freeze
++  Lock_ext = '.lock'.freeze
++
++  NDBM_ext = '.ndbm'.freeze
++  SDBM_ext = '.sdbm'.freeze
++  GDBM_ext = '.gdbm'.freeze
++  BDB1_ext = '.bdb1'.freeze
++  BDB_ext = '.bdb'.freeze
++  QDBM_ext = '.qdbm'.freeze
++
+   EXIT_NORMAL = 0
+   CODE_NORMAL = true
+   CODE_SPAM = true
+   CODE_CLEAN = false
+-  
+-  CODESET_EUCJP = "eucJP"
+-  CODESET_LATIN = "ISO8859-1"
+-  CODESET_GB18030 = "GB18030"
+-  CODESET_UTF8 = "UTF-8"
+-  PATTERN_UTF8 = '[\xe0-\xef][\x80-\xbf][\x80-\xbf][\xe0-\xef][\x80-\xbf][\x80-\xbf]'
+-  RE_UTF8 = Regexp.new(PATTERN_UTF8, nil, 'n')
+-  
+-  ALL_TAGS = ["html", "head", "title", "meta", "body", "div", "spam",
+-              "h1", "h2", "h3", "h4", "h5", "h6",
+-              "em", "strong", "font", "basefont", "big", "small",
+-              "b", "i", "s", "u", "tt", "sub", "sub",
+-              "rb", "rp", "rt","ruby",
+-              "blink", "marquee",
+-              "dfn", "cite", "abbr", "acronym",
+-              "blockquote", "q",
+-              "br", "pre", "ins", "del", "center", "style", "hr",
+-              "ul", "ol", "li", "dl", "dt", "dd",
+-              "table", "caption", "thead", "tbody", "tfoot",
+-              "colgroup", "col", "tr", "td", "th",
+-              "a", "link", "base", "img", "address",
+-              "form", "input", "select", "option", "textarea", "label",
+-              "fieldset", "legend", "optgroup",
+-              "frameset", "frame", "nofrmaes", "iframe"].join('|')
+-  
+-  SPACE_TAGS = "br|p|td|tr|table|ul|ol|dl|li|dt|dd"
+-  
+-  RE_ALL_TAGS = Regexp::compile('\A<(' + ALL_TAGS + ')\b', Regexp::IGNORECASE, 'n')
+-  RE_SPACE_TAGS = Regexp::compile('\A<(' + SPACE_TAGS + ')\b', Regexp::IGNORECASE, 'n')
+-  
+-  SOCKET_TIMEOUT = 30             # for single socket operation
+-  
++
++  LOG_CODESET = 'UTF-8'.freeze	# codeset for verbose and debug message. nil => no conversion
++
++  ALL_TAGS = %w[html head title meta body div spam
++                h1 h2 h3 h4 h5 h6
++                em strong font basefont big small
++                b i s u tt sub sub
++                rb rp rt ruby
++                blink marquee
++                dfn cite abbr acronym
++                blockquote q
++                br pre ins del center style hr
++                ul ol li dl dt dd
++                table caption thead tbody tfoot
++                colgroup col tr td th
++                a link base img address
++                form input select option textarea label
++                fieldset legend optgroup
++                frameset frame nofrmaes iframe].join('|')
++
++  SPACE_TAGS = 'br|p|td|tr|table|ul|ol|dl|li|dt|dd'.freeze
++
++  RE_ALL_TAGS = Regexp.compile('\A<(' + ALL_TAGS + ')\b', Regexp::IGNORECASE)
++  RE_SPACE_TAGS = Regexp.compile('\A<(' + SPACE_TAGS + ')\b', Regexp::IGNORECASE)
++
++  SOCKET_TIMEOUT = 30 # for single socket operation
++
+   module Bsutil
+     def insert_header!(buf, header, content)
+       buf[0] =~ /([\r\n]*)\z/
+-      eol = $1
+-      
+-      (0 ... buf.length).each do |i|
+-        if ((i == 0) &&         # unix from line
++      eol = ::Regexp.last_match(1)
++
++      (0...buf.length).each do |i|
++        if (i.zero? && # unix from line
+             (buf[i] =~ /\A>?from\s+(\S+)/))
+           next
+-        elsif (buf[i] =~/\A(.*?:)/)
+-          h = $1
++        elsif (buf[i] =~ /\A(.*?:)/)
++          h = ::Regexp.last_match(1)
+           if (h == header)
+             buf[i] = "#{header} #{content}#{eol}"
+             return
+@@ -134,7 +131,7 @@ class Bsfilter
+         elsif (buf[i] =~ /\A[\r\n]*\z/) # separator between header and body
+           buf[i, 0] = "#{header} #{content}#{eol}"
+           return
+-        else                    # not header. may be body without separator
++        else # not header. may be body without separator
+           buf[i, 0] = "#{header} #{content}#{eol}"
+           return
+         end
+@@ -144,17 +141,17 @@ class Bsfilter
+ 
+     def append_header!(buf, header, prefix)
+       buf[0] =~ /([\r\n]*)\z/
+-      eol = $1
++      eol = ::Regexp.last_match(1)
+       append_done = false
+-      (0 ... buf.length).each do |i|
+-        if (buf[i] =~/\A(.*?:)(\s*)(.*?)([\r\n]*)\z/)
+-          h = $1
+-          org_content = $3
++      (0...buf.length).each do |i|
++        if (buf[i] =~ /\A(.*?:)(\s*)(.*?)([\r\n]*)\z/)
++          h = ::Regexp.last_match(1)
++          org_content = ::Regexp.last_match(3)
+           if (h.downcase == header.downcase)
+             buf[i] = "#{header} #{prefix}#{org_content}#{eol}"
+             append_done = true
+           end
+-        elsif ((! append_done) &&
++        elsif (!append_done &&
+                (((buf[i] =~ /\A\S/) && (buf[i] !~ /\A\S+:/)) || # found body without separator
+                 (buf[i] =~ /\A[\r\n]*\z/))) # separator between header and body
+           buf[i, 0] = "#{header} #{prefix}#{eol}"
+@@ -166,104 +163,96 @@ class Bsfilter
+     end
+ 
+     def x_spam_flag
+-      return sprintf("X-%s-Flag:", @options["header-prefix"])
++      return format('X-%s-Flag:', @options['header-prefix'])
+     end
+-    
++
+     def x_spam_probability
+-      return sprintf("X-%s-Probability:", @options["header-prefix"])
++      return format('X-%s-Probability:', @options['header-prefix'])
+     end
+-    
++
+     def x_spam_revision
+-      return sprintf("X-%s-Revision:", @options["header-prefix"])
++      return format('X-%s-Revision:', @options['header-prefix'])
+     end
+-    
+-    def insert_headers!(buf, spam_flag, probability=nil)
++
++    def insert_headers!(buf, spam_flag, probability = nil)
+       updated = false
+-      if (@options["insert-revision"])
++      if (@options['insert-revision'])
+         insert_header!(buf, x_spam_revision, "bsfilter release #{Release} revision #{Revision}")
+         updated = true
+       end
+-      if (@options["insert-flag"])
++      if (@options['insert-flag'])
+         updated = true
+-        if (spam_flag)
+-          insert_header!(buf, x_spam_flag, "Yes")
++        if spam_flag
++          insert_header!(buf, x_spam_flag, 'Yes')
+         else
+-          insert_header!(buf, x_spam_flag, "No")
++          insert_header!(buf, x_spam_flag, 'No')
+         end
+       end
+-      if (@options["insert-probability"] && probability)
++      if (@options['insert-probability'] && probability)
+         updated = true
+-        insert_header!(buf, x_spam_probability, sprintf("%f", probability))
++        insert_header!(buf, x_spam_probability, format('%f', probability))
+       end
+-      if (@options["mark-spam-subject"])
++      if (@options['mark-spam-subject'])
+         updated = true
+-        if (spam_flag)
+-          append_header!(buf, "Subject:", @options["spam-subject-prefix"])
+-        end
++        append_header!(buf, 'Subject:', @options['spam-subject-prefix']) if spam_flag
+       end
+       return updated
+     end
+-  end                           # end of module
++  end
+ 
+   include Bsutil
+ 
+   class DevNull
+-    def sync=(*args)
+-    end
+-    def print(*args)
+-    end
+-    def printf(*args)
+-    end
++    def sync=(*args); end
++
++    def print(*args); end
++
++    def printf(*args); end
+   end
+ 
+   class DBHash < Hash
+-    def flatten(magic="###", head="", &block)
+-      self.each do |k, v|
+-        if (v.class == DBHash)
+-          if (head == "")
++    def flatten(magic = '###', head = '', &block)
++      each do |k, v|
++        if v.instance_of?(DBHash)
++          if (head == '')
+             v.flatten(magic, k, &block)
+           else
+             v.flatten(magic, head + magic + k, &block)
+           end
++        elsif (head == '')
++          yield k, v
+         else
+-          if (head == "")
+-            yield k, v
+-          else
+-            yield head + magic + k, v
+-          end
++          yield head + magic + k, v
+         end
+       end
+     end
+-    
++
+     def add(hash)
+       hash.each do |k, v|
+         if (self[k])
+-          if ((self[k].class == DBHash) &&
+-              (v.class == DBHash))
++          if (self[k].instance_of?(DBHash) &&
++              v.instance_of?(DBHash))
+             self[k].add(v)
+           else
+             self[k] += v
+           end
+         else
+-          self[k] = v             # should do deep copy ?
++          self[k] = v # should do deep copy ?
+         end
+       end
+     end
++
+     def sub(hash)
+       hash.each do |k, v|
+         if (self[k])
+-          if ((self[k].class == DBHash) &&
+-              (v.class == DBHash))
++          if (self[k].instance_of?(DBHash) &&
++              v.instance_of?(DBHash))
+             self[k].sub(v)
+-            if (self[k].empty?)
+-              self.delete(k)
+-            end
++            delete(k) if self[k].empty?
++          elsif (self[k] > v)
++            self[k] -= v
+           else
+-            if (self[k] > v)
+-              self[k] -= v
+-            else
+-              self.delete(k)
+-            end
++            delete(k)
+           end
+         end
+       end
+@@ -271,38 +260,38 @@ class Bsfilter
+   end
+ 
+   def safe_require(file)
+-    begin
+-      require file
+-      return true
+-    rescue LoadError
+-      return false
+-    end
++    require file
++    return true
++  rescue LoadError
++    return false
+   end
+ 
+   def latin2ascii(str)
+     str.force_encoding('ASCII-8BIT')
+     newstr = str.tr("\x92\x93\x94".force_encoding('ASCII-8BIT'), "'''")
+-    newstr.tr!("\xc0-\xc5\xc8-\xcb\xcc-\xcf\xd2-\xd6\xd9-\xdc".force_encoding('ASCII-8BIT'), "AAAAAAEEEEIIIIOOOOOUUUU")
+-    newstr.tr!("\xe0-\xe5\xe8-\xeb\xec-\xef\xf2-\xf6\xf9-\xfc".force_encoding('ASCII-8BIT'), "aaaaaaeeeeiiiiooooouuuu")
++    newstr.tr!("\xc0-\xc5\xc8-\xcb\xcc-\xcf\xd2-\xd6\xd9-\xdc".force_encoding('ASCII-8BIT'), 'AAAAAAEEEEIIIIOOOOOUUUU')
++    newstr.tr!("\xe0-\xe5\xe8-\xeb\xec-\xef\xf2-\xf6\xf9-\xfc".force_encoding('ASCII-8BIT'), 'aaaaaaeeeeiiiiooooouuuu')
+     return newstr
+   end
+ 
+   def u2eucjp(str)
+-      return NKF::nkf('-e -E -X -Z0', str.encode('EUC-JP', 'UTF-8', :undef => :replace, :invalid => :replace))
++    return NKF.nkf('-e -E -X -Z0', str.encode('EUC-JP', 'UTF-8', undef: :replace, invalid: :replace))
+   end
++
+   def u2latin(str)
+-    return str.encode('US-ASCII', 'UTF-8', :undef => :replace, :invalid => :replace)
++    return str.encode('US-ASCII', 'UTF-8', undef: :replace, invalid: :replace)
+   end
++
+   def gb180302eucjp(str)
+-    return str.encode('EUC-JP', 'BIG5', :undef => :replace, :invalid => :replace)
++    return str.encode('EUC-JP', 'BIG5', undef: :replace, invalid: :replace)
+   end
+-  
++
+   def open_ro(file)
+-    if (file == "-")
+-      fh = STDIN
++    if (file == '-')
++      fh = $stdin
+       yield fh
+-    elsif (file.class == Array)
+-      file.instance_eval <<EOM
++    elsif file.instance_of?(Array)
++      file.instance_eval <<EOM, __FILE__, __LINE__ + 1
+       @eof = false
+       def gets
+         @n = 0 if (! @n)
+@@ -323,66 +312,67 @@ class Bsfilter
+ EOM
+       yield file
+     else
+-      if (! FileTest::file?(file))
+-        raise sprintf("%s is not file", file)
++      if (! FileTest.file?(file))
++        raise format('%s is not file', file)
+       end
+-      fh = open(file, "rb")
++
++      fh = File.open(file, 'rb')
+       yield fh
+       fh.close
+     end
+   end
+-  
++
+   def open_wo(file, &block)
+-    if (file == "-")
+-      fh = STDOUT
++    if (file == '-')
++      fh = $stdout
+     else
+-      fh = open(file, "wb")
++      fh = open(file, 'wb')
+     end
+     if (block)
+       yield fh
+-      if (file != "-")
++      if (file != '-')
+         fh.close
+       end
+     else
+       return fh
+     end
+   end
+-  
++
+   class FLOAT
+-    def initialize(f=0, power=1)
++    def initialize(f = 0, power = 1)
+       @mant = 0
+       @exp = 0
+       set_f(f, power)
+     end
+     attr_accessor :mant, :exp
+-    
++
+     def to_f
+-      return @mant * Math::exp(@exp)
++      return @mant * Math.exp(@exp)
+     end
+-    
++
+     def ln
+-      return Math::log(@mant) + @exp
++      return Math.log(@mant) + @exp
+     end
+-    
+-    def * (a)
+-      if (a.class == FLOAT)
+-        n = FLOAT::new
++
++    def *(a)
++      n = FLOAT.new
++      if a.instance_of?(FLOAT)
+         n.mant = @mant * a.mant
+         n.exp = @exp + a.exp
+       else
+-        n = FLOAT::new
+         n.exp = @exp
+         n.mant = @mant * a
+       end
+       return n
+     end
+-    def set_f (a, power=1)
+-      if (a > 0)
++
++    def set_f(a, power = 1)
++      if a.positive?
+         @mant = 1
+-        @exp = Math::log(a) * power
+-      elsif (a < 0)
++        @exp = Math.log(a) * power
++      elsif a.negative?
+         @mant = -1
+-        @exp = Math::log(-a) * power
++        @exp = Math.log(-a) * power
+       else
+         @mant = 0
+         @exp = 0
+@@ -390,24 +380,24 @@ EOM
+       self
+     end
+   end
+-  
+-  
++
+   module TokenAccess
+     def check_size(max_size, min_size)
+       if ((@file_count <= max_size) || (max_size <= 0) || (min_size <= 0))
+         return false
+       end
++
+       old_count = @file_count
+-      if (@options["verbose"])
+-        @options["message-fh"].printf("reduce token database %s from %d to %d\n", @filename, old_count, min_size)
++      if (@options['verbose'])
++        @options['message-fh'].printf("reduce token database %s from %d to %d\n", @filename, old_count, min_size)
+       end
+-      
++
+       key_cts.each do |(category, token)|
+-        if (category != ".internal")
++        if (category != '.internal')
+           v = value(category, token) || 0
+           sub_scalar(category, token, (v * (old_count - min_size).to_f / old_count.to_f).ceil)
+-          if (@options["debug"] && ! value(category, token))
+-            @options["message-fh"].printf("deleted %s %s\n", category, token)
++          if (@options['debug'] && ! value(category, token))
++            @options['message-fh'].printf("deleted %s %s\n", category, token.to_utf8)
+           end
+         end
+       end
+@@ -415,41 +405,47 @@ EOM
+       @dirty = true
+       return true
+     end
+-    
++
+     def value_with_degene(category, token)
+-      if (value(category, token))
++      if value(category, token)
+         return value(category, token)
+-      elsif (! @options["degeneration"])           # no degeneration
++      elsif (!@options['degeneration']) # no degeneration
+         return nil
+       else
+-        if (v = value(category, token[0 .. -2])) # cut last char
+-          return v 
++        if (v = value(category, token[0..-2])) # cut last char
++          return v
+         end
+-        token = token.gsub(Regexp::compile("[#{@options['mark-in-token']}]"), '')
++
++        token = token.gsub(Regexp.compile("[#{@options['mark-in-token']}]"), '')
+         if (v = value(category, token))
+-          return v 
++          return v
+         end
++
+         token = token.downcase
+         if (v = value(category, token))
+-          return v 
++          return v
+         end
++
+         token = token.upcase
+         if (v = value(category, token))
+-          return v 
++          return v
+         end
++
+         token = token.capitalize
+         if (v = value(category, token))
+-          return v 
++          return v
+         end
++
+         return nil
+       end
+     end
++
+     def set_scalar(category, token, val)
+       @dirty = true
+       @file_count += 1
+       set(category, token, val)
+     end
+-    
++
+     def add_scalar(category, token, val)
+       @dirty = true
+       @file_count += 1
+@@ -459,58 +455,58 @@ EOM
+         set(category, token, val)
+       end
+     end
+-    
++
+     def show_new_token(db)
+       db.each_ct do |category, token|
+-        if (! value(category, token) || (value(category, token) == 0))
+-          @options["message-fh"].printf("new %s %s\n", category, token)
++        if (!value(category, token) || value(category, token).zero?)
++          @options['message-fh'].printf("new %s %s\n", category, token.to_utf8)
+         end
+       end
+     end
+-    
++
+     def values
+-      array = Array::new
++      array = []
+       each_ct do |c, t|
+         array.push(value(c, t))
+       end
+       return array
+     end
+-    
++
+     def key_cts
+-      array = Array::new
++      array = []
+       each_ct do |c, t|
+         array.push([c, t])
+       end
+       return array
+     end
+-    
++
+     def export(fh)
+       each_ct do |category, token|
+-        fh.printf("%s %s %s %g\n", @language, category, token, value(category, token)) if (value(category, token))
++        fh.printf("%s %s %s %g\n", @language, category, token, value(category, token)) if value(category, token)
+       end
+     end
+   end
+-  
++
+   class TokenDB
+     include TokenAccess
+-    
+-    def initialize(language=nil)
+-      @hash = DBHash::new
++
++    def initialize(language = nil)
++      @hash = DBHash.new
+       @file_count = 0
+       @language = language
+-      @message_id = "-"
++      @message_id = '-'
+       @probability = nil
+       @spam_flag = nil
+       @dirty = false
+       @time = nil
+-      @filename = "-"
++      @filename = '-'
+     end
+     attr_accessor :hash, :file_count, :probability, :language, :spam_flag, :message_id, :time, :filename
+-    
++
+     def size
+       @hash.size
+     end
+-    
++
+     def each_ct
+       @hash.each_key do |category|
+         @hash[category].each_key do |token|
+@@ -518,9 +514,9 @@ EOM
+         end
+       end
+     end
+-    
++
+     def value(category, token)
+-      if (! @hash[category])
++      if (!@hash[category])
+         return nil
+       elsif (v = @hash[category][token])
+         return v
+@@ -528,14 +524,14 @@ EOM
+         return nil
+       end
+     end
+-    
++
+     def set(category, token, v)
+       @dirty = true
+-      @hash[category] = DBHash::new if (! @hash[category])
++      @hash[category] = DBHash.new if (! @hash[category])
+       @hash[category][token] = v
+     end
+-    
+-    def print_keys_to_str(hash, separator, fh=STDOUT)
++
++    def print_keys_to_str(hash, separator, fh = $stdout)
+       hash.keys.sort.each do |k|
+         v = hash[k]
+         v = v.to_i
+@@ -543,57 +539,49 @@ EOM
+         fh.print(([k] * v).join(separator))
+       end
+     end
+-    
++
+     def clear
+       @dirty = true
+       @file_count = 0
+-      @hash = DBHash::new
++      @hash = DBHash.new
+     end
+-    
++
+     def add_db(db)
+       @dirty = true
+       @file_count += db.file_count
+-      if (! @language && db.language)
+-        @language = db.language
+-      end
++      @language = db.language if (!@language && db.language)
+       @hash.add(db.hash)
+     end
+-    
++
+     def add_hash(hash)
+       @dirty = true
+       @file_count += 1
+       @hash.add(hash)
+     end
+-    
++
+     def sub_scalar(category, token, val)
+-      if (@file_count > 0)
+-        @file_count -= 1
+-      end
+-      @hash.sub({category => {token => val}})
++      @file_count -= 1 if @file_count.positive?
++      @hash.sub({ category => { token => val } })
+     end
+-    
++
+     def sub_hash(hash)
+       @dirty = true
+-      if (@file_count > 0)
+-        @file_count -= 1
+-      end
++      @file_count -= 1 if @file_count.positive?
+       @hash.sub(hash)
+     end
+-    
++
+     def sub_db(db)
+       @dirty = true
+       @file_count -= db.file_count
+-      if (@file_count < 1)
+-        @file_count = 1
+-      end
++      @file_count = 1 if (@file_count < 1)
+       @hash.sub(db.hash)
+     end
+   end
+-  
++
+   class TokenDBM
+     include TokenAccess
+-    MAGIC = "###"
+-    def initialize(options, language, ext)
++    MAGIC = '###'.freeze
++    def initialize(options, language, _ext)
+       @options = options
+       @dbm = nil                  # SDBM not Hash
+       @dirty = nil                # not used. for TokenAccess
+@@ -602,13 +590,13 @@ EOM
+       @language = language
+     end
+     attr_accessor :file_count
+-    
++
+     def size
+       @dbm.size
+     end
+-    
++
+     def to_db
+-      token_db = TokenDB::new(@language)
++      token_db = TokenDB.new(@language)
+       @dbm.each do |ct, v|
+         (category, token) = ct.split(Regexp.new(MAGIC), 2)
+         token_db.set(category, token, v)
+@@ -616,25 +604,25 @@ EOM
+       end
+       return token_db
+     end
+-    
++
+     def clear
+       @dbm.clear
+       @file_count = 0
+-      set(".internal", "file_count", 0)
++      set('.internal', 'file_count', 0)
+     end
+-    
++
+     def each_ct
+       @dbm.each_key do |ct|
+         (category, token) = ct.force_encoding('ASCII-8BIT').split(Regexp.new(MAGIC), 2)
+         yield(category, token) if (category && token)
+       end
+     end
+-    
++
+     def add_db(token_db)
+       add_hash(token_db.hash)
+       @file_count += + token_db.file_count
+     end
+-    
++
+     def add_hash(hash)
+       @dirty = true
+       hash.flatten(MAGIC) do |k, v|
+@@ -645,15 +633,16 @@ EOM
+         end
+       end
+     end
+-    
++
+     def sub_db(token_db)
+       sub_hash(token_db.hash)
+       if (@file_count > token_db.file_count)
+         @file_count -= token_db.file_count
+       else
+-        @file_count= 0
++        @file_count = 0
+       end
+     end
++
+     def sub_hash(hash)
+       @dirty = true
+       hash.flatten(MAGIC) do |k, v|
+@@ -666,24 +655,27 @@ EOM
+         end
+       end
+     end
+-    
++
+     def value(category, token)
+       v = @dbm[category + MAGIC + token]
+-      if (v)
+-        return v.to_f
+-      else
+-        return nil
+-      end
++      return v.to_f if v
++
++      return nil
+     end
+-    
++
+     def set(category, token, v)
+       @dirty = true
+-      @dbm[category + MAGIC + token] = v.to_s
++      begin
++        @dbm[category + MAGIC + token] = v.to_s
++      rescue
++        @options['message-fh'].puts($ERROR_INFO.inspect, category + MAGIC + token, v.to_s) if (@options['verbose'])
++        @options['message-fh'].puts($ERROR_POSITION) if (@options['debug'])
++      end
+     end
+-    
++
+     def sub_scalar(category, token, v)
+       @dirty = true
+-      if (@file_count > 0)
++      if (@file_count.positive?)
+         @file_count -= 1
+       end
*** 4164 LINES SKIPPED ***