git: 62894c093bc7 - main - www/py-beautifulsoup449: Add py-beautifulsoup449 4.9.3 (copied from py-beautifulsoup448)

Go to: [ bottom of page ] [ top of archives ] [ this month ]
From: Po-Chuan Hsieh <sunpoet_at_FreeBSD.org>
Date: Mon, 18 Apr 2022 00:00:20 UTC
The branch main has been updated by sunpoet:

URL: https://cgit.FreeBSD.org/ports/commit/?id=62894c093bc7f6fe7dcde5c97f901a92e87b09bb

commit 62894c093bc7f6fe7dcde5c97f901a92e87b09bb
Author:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
AuthorDate: 2022-04-17 23:39:17 +0000
Commit:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
CommitDate: 2022-04-17 23:39:17 +0000

    www/py-beautifulsoup449: Add py-beautifulsoup449 4.9.3 (copied from py-beautifulsoup448)
    
    - Add PORTSCOUT
---
 www/Makefile                             |   1 +
 www/py-beautifulsoup449/Makefile         |  25 +
 www/py-beautifulsoup449/distinfo         |   3 +
 www/py-beautifulsoup449/files/patch-2to3 | 901 +++++++++++++++++++++++++++++++
 www/py-beautifulsoup449/pkg-descr        |  12 +
 5 files changed, 942 insertions(+)

diff --git a/www/Makefile b/www/Makefile
index b82a03a159ce..1f363aaca031 100644
--- a/www/Makefile
+++ b/www/Makefile
@@ -1429,6 +1429,7 @@
     SUBDIR += py-azure-storage
     SUBDIR += py-beaker
     SUBDIR += py-beautifulsoup
+    SUBDIR += py-beautifulsoup449
     SUBDIR += py-betamax
     SUBDIR += py-biscuits
     SUBDIR += py-bjoern
diff --git a/www/py-beautifulsoup449/Makefile b/www/py-beautifulsoup449/Makefile
new file mode 100644
index 000000000000..0cc1d0e4d413
--- /dev/null
+++ b/www/py-beautifulsoup449/Makefile
@@ -0,0 +1,25 @@
+# Created by: Mike Meyer
+
+PORTNAME=	beautifulsoup4
+PORTVERSION=	4.9.3
+CATEGORIES=	www python
+MASTER_SITES=	CHEESESHOP
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+PKGNAMESUFFIX=	49
+
+MAINTAINER=	sunpoet@FreeBSD.org
+COMMENT=	HTML/XML Parser for Python
+
+LICENSE=	MIT
+LICENSE_FILE=	${WRKSRC}/COPYING.txt
+
+RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}soupsieve>=1.2:www/py-soupsieve@${PY_FLAVOR}
+
+USES=		python:3.7+
+USE_PYTHON=	autoplist concurrent distutils
+
+NO_ARCH=	yes
+
+PORTSCOUT=	limit:^4\.9\.
+
+.include <bsd.port.mk>
diff --git a/www/py-beautifulsoup449/distinfo b/www/py-beautifulsoup449/distinfo
new file mode 100644
index 000000000000..3a02c621b6cd
--- /dev/null
+++ b/www/py-beautifulsoup449/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1650228654
+SHA256 (beautifulsoup4-4.9.3.tar.gz) = 84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25
+SIZE (beautifulsoup4-4.9.3.tar.gz) = 376031
diff --git a/www/py-beautifulsoup449/files/patch-2to3 b/www/py-beautifulsoup449/files/patch-2to3
new file mode 100644
index 000000000000..a3249a90c527
--- /dev/null
+++ b/www/py-beautifulsoup449/files/patch-2to3
@@ -0,0 +1,901 @@
+--- bs4/__init__.py.orig	2020-10-03 15:30:53 UTC
++++ bs4/__init__.py
+@@ -51,7 +51,7 @@ from .element import (
+ 
+ # The very first thing we do is give a useful error if someone is
+ # running this code under Python 3 without converting it.
+-'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
++'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+ 
+ # Define some custom warnings.
+ class GuessedAtParserWarning(UserWarning):
+@@ -100,7 +100,7 @@ class BeautifulSoup(Tag):
+     # Since BeautifulSoup subclasses Tag, it's possible to treat it as
+     # a Tag with a .name. This name makes it clear the BeautifulSoup
+     # object isn't a real markup tag.
+-    ROOT_TAG_NAME = u'[document]'
++    ROOT_TAG_NAME = '[document]'
+ 
+     # If the end-user gives no indication which tree builder they
+     # want, look for one with these features.
+@@ -217,7 +217,7 @@ class BeautifulSoup(Tag):
+         from_encoding = from_encoding or deprecated_argument(
+             "fromEncoding", "from_encoding")
+ 
+-        if from_encoding and isinstance(markup, unicode):
++        if from_encoding and isinstance(markup, str):
+             warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
+             from_encoding = None
+ 
+@@ -234,7 +234,7 @@ class BeautifulSoup(Tag):
+             builder_class = builder
+             builder = None
+         elif builder is None:
+-            if isinstance(features, basestring):
++            if isinstance(features, str):
+                 features = [features]
+             if features is None or len(features) == 0:
+                 features = self.DEFAULT_BUILDER_FEATURES
+@@ -309,13 +309,13 @@ class BeautifulSoup(Tag):
+             markup = markup.read()
+         elif len(markup) <= 256 and (
+                 (isinstance(markup, bytes) and not b'<' in markup)
+-                or (isinstance(markup, unicode) and not u'<' in markup)
++                or (isinstance(markup, str) and not '<' in markup)
+         ):
+             # Print out warnings for a couple beginner problems
+             # involving passing non-markup to Beautiful Soup.
+             # Beautiful Soup will still parse the input as markup,
+             # just in case that's what the user really wants.
+-            if (isinstance(markup, unicode)
++            if (isinstance(markup, str)
+                 and not os.path.supports_unicode_filenames):
+                 possible_filename = markup.encode("utf8")
+             else:
+@@ -323,7 +323,7 @@ class BeautifulSoup(Tag):
+             is_file = False
+             try:
+                 is_file = os.path.exists(possible_filename)
+-            except Exception, e:
++            except Exception as e:
+                 # This is almost certainly a problem involving
+                 # characters not valid in filenames on this
+                 # system. Just let it go.
+@@ -353,9 +353,9 @@ class BeautifulSoup(Tag):
+                 pass
+ 
+         if not success:
+-            other_exceptions = [unicode(e) for e in rejections]
++            other_exceptions = [str(e) for e in rejections]
+             raise ParserRejectedMarkup(
+-                u"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
++                "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
+             )
+ 
+         # Clear out the markup and remove the builder's circular
+@@ -406,9 +406,9 @@ class BeautifulSoup(Tag):
+         if isinstance(markup, bytes):
+             space = b' '
+             cant_start_with = (b"http:", b"https:")
+-        elif isinstance(markup, unicode):
+-            space = u' '
+-            cant_start_with = (u"http:", u"https:")
++        elif isinstance(markup, str):
++            space = ' '
++            cant_start_with = ("http:", "https:")
+         else:
+             return
+ 
+@@ -545,7 +545,7 @@ class BeautifulSoup(Tag):
+         containerClass = self.string_container(containerClass)
+         
+         if self.current_data:
+-            current_data = u''.join(self.current_data)
++            current_data = ''.join(self.current_data)
+             # If whitespace is not preserved, and this string contains
+             # nothing but ASCII spaces, replace it with a single space
+             # or newline.
+@@ -748,9 +748,9 @@ class BeautifulSoup(Tag):
+                 eventual_encoding = None
+             if eventual_encoding != None:
+                 encoding_part = ' encoding="%s"' % eventual_encoding
+-            prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
++            prefix = '<?xml version="1.0"%s?>\n' % encoding_part
+         else:
+-            prefix = u''
++            prefix = ''
+         if not pretty_print:
+             indent_level = None
+         else:
+@@ -788,4 +788,4 @@ class FeatureNotFound(ValueError):
+ if __name__ == '__main__':
+     import sys
+     soup = BeautifulSoup(sys.stdin)
+-    print(soup.prettify())
++    print((soup.prettify()))
+--- bs4/builder/_html5lib.py.orig	2020-09-26 14:36:10 UTC
++++ bs4/builder/_html5lib.py
+@@ -33,7 +33,7 @@ try:
+     # Pre-0.99999999
+     from html5lib.treebuilders import _base as treebuilder_base
+     new_html5lib = False
+-except ImportError, e:
++except ImportError as e:
+     # 0.99999999 and up
+     from html5lib.treebuilders import base as treebuilder_base
+     new_html5lib = True
+@@ -79,7 +79,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
+         parser = html5lib.HTMLParser(tree=self.create_treebuilder)
+         self.underlying_builder.parser = parser
+         extra_kwargs = dict()
+-        if not isinstance(markup, unicode):
++        if not isinstance(markup, str):
+             if new_html5lib:
+                 extra_kwargs['override_encoding'] = self.user_specified_encoding
+             else:
+@@ -87,13 +87,13 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
+         doc = parser.parse(markup, **extra_kwargs)
+         
+         # Set the character encoding detected by the tokenizer.
+-        if isinstance(markup, unicode):
++        if isinstance(markup, str):
+             # We need to special-case this because html5lib sets
+             # charEncoding to UTF-8 if it gets Unicode input.
+             doc.original_encoding = None
+         else:
+             original_encoding = parser.tokenizer.stream.charEncoding[0]
+-            if not isinstance(original_encoding, basestring):
++            if not isinstance(original_encoding, str):
+                 # In 0.99999999 and up, the encoding is an html5lib
+                 # Encoding object. We want to use a string for compatibility
+                 # with other tree builders.
+@@ -110,7 +110,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
+ 
+     def test_fragment_to_document(self, fragment):
+         """See `TreeBuilder`."""
+-        return u'<html><head></head><body>%s</body></html>' % fragment
++        return '<html><head></head><body>%s</body></html>' % fragment
+ 
+ 
+ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
+@@ -217,7 +217,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuil
+                 rv.append("|%s<%s>" % (' ' * indent, name))
+                 if element.attrs:
+                     attributes = []
+-                    for name, value in element.attrs.items():
++                    for name, value in list(element.attrs.items()):
+                         if isinstance(name, NamespacedAttribute):
+                             name = "%s %s" % (prefixes[name.namespace], name.name)
+                         if isinstance(value, list):
+@@ -272,7 +272,7 @@ class Element(treebuilder_base.Node):
+ 
+     def appendChild(self, node):
+         string_child = child = None
+-        if isinstance(node, basestring):
++        if isinstance(node, str):
+             # Some other piece of code decided to pass in a string
+             # instead of creating a TextElement object to contain the
+             # string.
+@@ -289,7 +289,7 @@ class Element(treebuilder_base.Node):
+             child = node.element
+             node.parent = self
+ 
+-        if not isinstance(child, basestring) and child.parent is not None:
++        if not isinstance(child, str) and child.parent is not None:
+             node.element.extract()
+ 
+         if (string_child is not None and self.element.contents
+@@ -302,7 +302,7 @@ class Element(treebuilder_base.Node):
+             old_element.replace_with(new_element)
+             self.soup._most_recent_element = new_element
+         else:
+-            if isinstance(node, basestring):
++            if isinstance(node, str):
+                 # Create a brand new NavigableString from this string.
+                 child = self.soup.new_string(node)
+ 
+@@ -340,7 +340,7 @@ class Element(treebuilder_base.Node):
+ 
+             self.soup.builder._replace_cdata_list_attribute_values(
+                 self.name, attributes)
+-            for name, value in attributes.items():
++            for name, value in list(attributes.items()):
+                 self.element[name] = value
+ 
+             # The attributes may contain variables that need substitution.
+--- bs4/builder/_htmlparser.py.orig	2020-09-26 14:36:05 UTC
++++ bs4/builder/_htmlparser.py
+@@ -8,11 +8,11 @@ __all__ = [
+     'HTMLParserTreeBuilder',
+     ]
+ 
+-from HTMLParser import HTMLParser
++from html.parser import HTMLParser
+ 
+ try:
+-    from HTMLParser import HTMLParseError
+-except ImportError, e:
++    from html.parser import HTMLParseError
++except ImportError as e:
+     # HTMLParseError is removed in Python 3.5. Since it can never be
+     # thrown in 3.5, we can just define our own class as a placeholder.
+     class HTMLParseError(Exception):
+@@ -219,14 +219,14 @@ class BeautifulSoupHTMLParser(HTMLParser):
+                     continue
+                 try:
+                     data = bytearray([real_name]).decode(encoding)
+-                except UnicodeDecodeError, e:
++                except UnicodeDecodeError as e:
+                     pass
+         if not data:
+             try:
+-                data = unichr(real_name)
+-            except (ValueError, OverflowError), e:
++                data = chr(real_name)
++            except (ValueError, OverflowError) as e:
+                 pass
+-        data = data or u"\N{REPLACEMENT CHARACTER}"
++        data = data or "\N{REPLACEMENT CHARACTER}"
+         self.handle_data(data)
+ 
+     def handle_entityref(self, name):
+@@ -353,7 +353,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
+          document to Unicode and parsing it. Each strategy will be tried 
+          in turn.
+         """
+-        if isinstance(markup, unicode):
++        if isinstance(markup, str):
+             # Parse Unicode as-is.
+             yield (markup, None, None, False)
+             return
+@@ -376,7 +376,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
+         try:
+             parser.feed(markup)
+             parser.close()
+-        except HTMLParseError, e:
++        except HTMLParseError as e:
+             warnings.warn(RuntimeWarning(
+                 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
+             raise e
+--- bs4/builder/_lxml.py.orig	2020-09-07 11:13:41 UTC
++++ bs4/builder/_lxml.py
+@@ -8,11 +8,11 @@ __all__ = [
+ 
+ try:
+     from collections.abc import Callable # Python 3.6
+-except ImportError , e:
++except ImportError as e:
+     from collections import Callable
+ 
+ from io import BytesIO
+-from StringIO import StringIO
++from io import StringIO
+ from lxml import etree
+ from bs4.element import (
+     Comment,
+@@ -35,7 +35,7 @@ LXML = 'lxml'
+ 
+ def _invert(d):
+     "Invert a dictionary."
+-    return dict((v,k) for k, v in d.items())
++    return dict((v,k) for k, v in list(d.items()))
+ 
+ class LXMLTreeBuilderForXML(TreeBuilder):
+     DEFAULT_PARSER_CLASS = etree.XMLParser
+@@ -81,7 +81,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+ 
+         :param mapping: A dictionary mapping namespace prefixes to URIs.
+         """
+-        for key, value in mapping.items():
++        for key, value in list(mapping.items()):
+             if key and key not in self.soup._namespaces:
+                 # Let the BeautifulSoup object know about a new namespace.
+                 # If there are multiple namespaces defined with the same
+@@ -169,12 +169,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+         else:
+             self.processing_instruction_class = XMLProcessingInstruction
+ 
+-        if isinstance(markup, unicode):
++        if isinstance(markup, str):
+             # We were given Unicode. Maybe lxml can parse Unicode on
+             # this system?
+             yield markup, None, document_declared_encoding, False
+ 
+-        if isinstance(markup, unicode):
++        if isinstance(markup, str):
+             # No, apparently not. Convert the Unicode to UTF-8 and
+             # tell lxml to parse it as UTF-8.
+             yield (markup.encode("utf8"), "utf8",
+@@ -189,7 +189,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+     def feed(self, markup):
+         if isinstance(markup, bytes):
+             markup = BytesIO(markup)
+-        elif isinstance(markup, unicode):
++        elif isinstance(markup, str):
+             markup = StringIO(markup)
+ 
+         # Call feed() at least once, even if the markup is empty,
+@@ -204,7 +204,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+                 if len(data) != 0:
+                     self.parser.feed(data)
+             self.parser.close()
+-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
++        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
+             raise ParserRejectedMarkup(e)
+ 
+     def close(self):
+@@ -233,7 +233,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+             # Also treat the namespace mapping as a set of attributes on the
+             # tag, so we can recreate it later.
+             attrs = attrs.copy()
+-            for prefix, namespace in nsmap.items():
++            for prefix, namespace in list(nsmap.items()):
+                 attribute = NamespacedAttribute(
+                     "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
+                 attrs[attribute] = namespace
+@@ -242,7 +242,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+         # from lxml with namespaces attached to their names, and
+         # turn then into NamespacedAttribute objects.
+         new_attrs = {}
+-        for attr, value in attrs.items():
++        for attr, value in list(attrs.items()):
+             namespace, attr = self._getNsTag(attr)
+             if namespace is None:
+                 new_attrs[attr] = value
+@@ -302,7 +302,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
+ 
+     def test_fragment_to_document(self, fragment):
+         """See `TreeBuilder`."""
+-        return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
++        return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
+ 
+ 
+ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
+@@ -323,10 +323,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilder
+             self.parser = self.parser_for(encoding)
+             self.parser.feed(markup)
+             self.parser.close()
+-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
++        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
+             raise ParserRejectedMarkup(e)
+ 
+ 
+     def test_fragment_to_document(self, fragment):
+         """See `TreeBuilder`."""
+-        return u'<html><body>%s</body></html>' % fragment
++        return '<html><body>%s</body></html>' % fragment
+--- bs4/diagnose.py.orig	2020-05-17 17:55:43 UTC
++++ bs4/diagnose.py
+@@ -4,8 +4,8 @@
+ __license__ = "MIT"
+ 
+ import cProfile
+-from StringIO import StringIO
+-from HTMLParser import HTMLParser
++from io import StringIO
++from html.parser import HTMLParser
+ import bs4
+ from bs4 import BeautifulSoup, __version__
+ from bs4.builder import builder_registry
+@@ -25,8 +25,8 @@ def diagnose(data):
+     :param data: A string containing markup that needs to be explained.
+     :return: None; diagnostics are printed to standard output.
+     """
+-    print("Diagnostic running on Beautiful Soup %s" % __version__)
+-    print("Python version %s" % sys.version)
++    print(("Diagnostic running on Beautiful Soup %s" % __version__))
++    print(("Python version %s" % sys.version))
+ 
+     basic_parsers = ["html.parser", "html5lib", "lxml"]
+     for name in basic_parsers:
+@@ -35,16 +35,16 @@ def diagnose(data):
+                 break
+         else:
+             basic_parsers.remove(name)
+-            print(
++            print((
+                 "I noticed that %s is not installed. Installing it may help." %
+-                name)
++                name))
+ 
+     if 'lxml' in basic_parsers:
+         basic_parsers.append("lxml-xml")
+         try:
+             from lxml import etree
+-            print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
+-        except ImportError, e:
++            print(("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))))
++        except ImportError as e:
+             print(
+                 "lxml is not installed or couldn't be imported.")
+ 
+@@ -52,21 +52,21 @@ def diagnose(data):
+     if 'html5lib' in basic_parsers:
+         try:
+             import html5lib
+-            print("Found html5lib version %s" % html5lib.__version__)
+-        except ImportError, e:
++            print(("Found html5lib version %s" % html5lib.__version__))
++        except ImportError as e:
+             print(
+                 "html5lib is not installed or couldn't be imported.")
+ 
+     if hasattr(data, 'read'):
+         data = data.read()
+     elif data.startswith("http:") or data.startswith("https:"):
+-        print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
++        print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
+         print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
+         return
+     else:
+         try:
+             if os.path.exists(data):
+-                print('"%s" looks like a filename. Reading data from the file.' % data)
++                print(('"%s" looks like a filename. Reading data from the file.' % data))
+                 with open(data) as fp:
+                     data = fp.read()
+         except ValueError:
+@@ -76,19 +76,19 @@ def diagnose(data):
+         print("")
+ 
+     for parser in basic_parsers:
+-        print("Trying to parse your markup with %s" % parser)
++        print(("Trying to parse your markup with %s" % parser))
+         success = False
+         try:
+             soup = BeautifulSoup(data, features=parser)
+             success = True
+-        except Exception, e:
+-            print("%s could not parse the markup." % parser)
++        except Exception as e:
++            print(("%s could not parse the markup." % parser))
+             traceback.print_exc()
+         if success:
+-            print("Here's what %s did with the markup:" % parser)
+-            print(soup.prettify())
++            print(("Here's what %s did with the markup:" % parser))
++            print((soup.prettify()))
+ 
+-        print("-" * 80)
++        print(("-" * 80))
+ 
+ def lxml_trace(data, html=True, **kwargs):
+     """Print out the lxml events that occur during parsing.
+@@ -104,7 +104,7 @@ def lxml_trace(data, html=True, **kwargs):
+     """
+     from lxml import etree
+     for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
+-        print("%s, %4s, %s" % (event, element.tag, element.text))
++        print(("%s, %4s, %s" % (event, element.tag, element.text)))
+ 
+ class AnnouncingParser(HTMLParser):
+     """Subclass of HTMLParser that announces parse events, without doing
+@@ -193,9 +193,9 @@ def rdoc(num_elements=1000):
+ 
+ def benchmark_parsers(num_elements=100000):
+     """Very basic head-to-head performance benchmark."""
+-    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
++    print(("Comparative parser benchmark on Beautiful Soup %s" % __version__))
+     data = rdoc(num_elements)
+-    print("Generated a large invalid HTML document (%d bytes)." % len(data))
++    print(("Generated a large invalid HTML document (%d bytes)." % len(data)))
+     
+     for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
+         success = False
+@@ -204,24 +204,24 @@ def benchmark_parsers(num_elements=100000):
+             soup = BeautifulSoup(data, parser)
+             b = time.time()
+             success = True
+-        except Exception, e:
+-            print("%s could not parse the markup." % parser)
++        except Exception as e:
++            print(("%s could not parse the markup." % parser))
+             traceback.print_exc()
+         if success:
+-            print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
++            print(("BS4+%s parsed the markup in %.2fs." % (parser, b-a)))
+ 
+     from lxml import etree
+     a = time.time()
+     etree.HTML(data)
+     b = time.time()
+-    print("Raw lxml parsed the markup in %.2fs." % (b-a))
++    print(("Raw lxml parsed the markup in %.2fs." % (b-a)))
+ 
+     import html5lib
+     parser = html5lib.HTMLParser()
+     a = time.time()
+     parser.parse(data)
+     b = time.time()
+-    print("Raw html5lib parsed the markup in %.2fs." % (b-a))
++    print(("Raw html5lib parsed the markup in %.2fs." % (b-a)))
+ 
+ def profile(num_elements=100000, parser="lxml"):
+     """Use Python's profiler on a randomly generated document."""
+--- bs4/element.py.orig	2020-10-02 22:19:12 UTC
++++ bs4/element.py
+@@ -3,14 +3,14 @@ __license__ = "MIT"
+ 
+ try:
+     from collections.abc import Callable # Python 3.6
+-except ImportError , e:
++except ImportError as e:
+     from collections import Callable
+ import re
+ import sys
+ import warnings
+ try:
+     import soupsieve
+-except ImportError, e:
++except ImportError as e:
+     soupsieve = None
+     warnings.warn(
+         'The soupsieve package is not installed. CSS selectors cannot be used.'
+@@ -57,22 +57,22 @@ def _alias(attr):
+ # Source:
+ # https://docs.python.org/3/library/codecs.html#python-specific-encodings
+ PYTHON_SPECIFIC_ENCODINGS = set([
+-    u"idna",
+-    u"mbcs",
+-    u"oem",
+-    u"palmos",
+-    u"punycode",
+-    u"raw_unicode_escape",
+-    u"undefined",
+-    u"unicode_escape",
+-    u"raw-unicode-escape",
+-    u"unicode-escape",
+-    u"string-escape",
+-    u"string_escape",
++    "idna",
++    "mbcs",
++    "oem",
++    "palmos",
++    "punycode",
++    "raw_unicode_escape",
++    "undefined",
++    "unicode_escape",
++    "raw-unicode-escape",
++    "unicode-escape",
++    "string-escape",
++    "string_escape",
+ ])
+     
+ 
+-class NamespacedAttribute(unicode):
++class NamespacedAttribute(str):
+     """A namespaced string (e.g. 'xml:lang') that remembers the namespace
+     ('xml') and the name ('lang') that were used to create it.
+     """
+@@ -84,18 +84,18 @@ class NamespacedAttribute(unicode):
+             name = None
+ 
+         if name is None:
+-            obj = unicode.__new__(cls, prefix)
++            obj = str.__new__(cls, prefix)
+         elif prefix is None:
+             # Not really namespaced.
+-            obj = unicode.__new__(cls, name)
++            obj = str.__new__(cls, name)
+         else:
+-            obj = unicode.__new__(cls, prefix + ":" + name)
++            obj = str.__new__(cls, prefix + ":" + name)
+         obj.prefix = prefix
+         obj.name = name
+         obj.namespace = namespace
+         return obj
+ 
+-class AttributeValueWithCharsetSubstitution(unicode):
++class AttributeValueWithCharsetSubstitution(str):
+     """A stand-in object for a character encoding specified in HTML."""
+ 
+ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
+@@ -106,7 +106,7 @@ class CharsetMetaAttributeValue(AttributeValueWithChar
+     """
+ 
+     def __new__(cls, original_value):
+-        obj = unicode.__new__(cls, original_value)
++        obj = str.__new__(cls, original_value)
+         obj.original_value = original_value
+         return obj
+ 
+@@ -134,9 +134,9 @@ class ContentMetaAttributeValue(AttributeValueWithChar
+         match = cls.CHARSET_RE.search(original_value)
+         if match is None:
+             # No substitution necessary.
+-            return unicode.__new__(unicode, original_value)
++            return str.__new__(str, original_value)
+ 
+-        obj = unicode.__new__(cls, original_value)
++        obj = str.__new__(cls, original_value)
+         obj.original_value = original_value
+         return obj
+ 
+@@ -376,7 +376,7 @@ class PageElement(object):
+             raise ValueError("Cannot insert None into a tag.")
+         if new_child is self:
+             raise ValueError("Cannot insert a tag into itself.")
+-        if (isinstance(new_child, basestring)
++        if (isinstance(new_child, str)
+             and not isinstance(new_child, NavigableString)):
+             new_child = NavigableString(new_child)
+ 
+@@ -753,7 +753,7 @@ class PageElement(object):
+                 result = (element for element in generator
+                           if isinstance(element, Tag))
+                 return ResultSet(strainer, result)
+-            elif isinstance(name, basestring):
++            elif isinstance(name, str):
+                 # Optimization to find all tags with a given name.
+                 if name.count(':') == 1:
+                     # This is a name with a prefix. If this is a namespace-aware document,
+@@ -872,7 +872,7 @@ class PageElement(object):
+         return self.parents
+ 
+ 
+-class NavigableString(unicode, PageElement):
++class NavigableString(str, PageElement):
+     """A Python Unicode string that is part of a parse tree.
+ 
+     When Beautiful Soup parses the markup <b>penguin</b>, it will
+@@ -895,10 +895,10 @@ class NavigableString(unicode, PageElement):
+         passed in to the superclass's __new__ or the superclass won't know
+         how to handle non-ASCII characters.
+         """
+-        if isinstance(value, unicode):
+-            u = unicode.__new__(cls, value)
++        if isinstance(value, str):
++            u = str.__new__(cls, value)
+         else:
+-            u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
++            u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+         u.setup()
+         return u
+ 
+@@ -909,7 +909,7 @@ class NavigableString(unicode, PageElement):
+         return type(self)(self)
+ 
+     def __getnewargs__(self):
+-        return (unicode(self),)
++        return (str(self),)
+ 
+     def __getattr__(self, attr):
+         """text.string gives you text. This is for backwards
+@@ -975,30 +975,30 @@ class PreformattedString(NavigableString):
+ 
+ class CData(PreformattedString):
+     """A CDATA block."""
+-    PREFIX = u'<![CDATA['
+-    SUFFIX = u']]>'
++    PREFIX = '<![CDATA['
++    SUFFIX = ']]>'
+ 
+ class ProcessingInstruction(PreformattedString):
+     """A SGML processing instruction."""
+ 
+-    PREFIX = u'<?'
+-    SUFFIX = u'>'
++    PREFIX = '<?'
++    SUFFIX = '>'
+ 
+ class XMLProcessingInstruction(ProcessingInstruction):
+     """An XML processing instruction."""
+-    PREFIX = u'<?'
+-    SUFFIX = u'?>'
++    PREFIX = '<?'
++    SUFFIX = '?>'
+ 
+ class Comment(PreformattedString):
+     """An HTML or XML comment."""
+-    PREFIX = u'<!--'
+-    SUFFIX = u'-->'
++    PREFIX = '<!--'
++    SUFFIX = '-->'
+ 
+ 
+ class Declaration(PreformattedString):
+     """An XML declaration."""
+-    PREFIX = u'<?'
+-    SUFFIX = u'?>'
++    PREFIX = '<?'
++    SUFFIX = '?>'
+ 
+ 
+ class Doctype(PreformattedString):
+@@ -1026,8 +1026,8 @@ class Doctype(PreformattedString):
+ 
+         return Doctype(value)
+ 
+-    PREFIX = u'<!DOCTYPE '
+-    SUFFIX = u'>\n'
++    PREFIX = '<!DOCTYPE '
++    SUFFIX = '>\n'
+ 
+ 
+ class Stylesheet(NavigableString):
+@@ -1263,7 +1263,7 @@ class Tag(PageElement):
+         for string in self._all_strings(True):
+             yield string
+ 
+-    def get_text(self, separator=u"", strip=False,
++    def get_text(self, separator="", strip=False,
+                  types=(NavigableString, CData)):
+         """Get all child strings, concatenated using the given separator.
+ 
+@@ -1416,7 +1416,7 @@ class Tag(PageElement):
+     def __contains__(self, x):
+         return x in self.contents
+ 
+-    def __nonzero__(self):
++    def __bool__(self):
+         "A tag is non-None even if it has no contents."
+         return True
+ 
+@@ -1565,8 +1565,8 @@ class Tag(PageElement):
+             else:
+                 if isinstance(val, list) or isinstance(val, tuple):
+                     val = ' '.join(val)
+-                elif not isinstance(val, basestring):
+-                    val = unicode(val)
++                elif not isinstance(val, str):
++                    val = str(val)
+                 elif (
+                         isinstance(val, AttributeValueWithCharsetSubstitution)
+                         and eventual_encoding is not None
+@@ -1575,7 +1575,7 @@ class Tag(PageElement):
+ 
+                 text = formatter.attribute_value(val)
+                 decoded = (
+-                    unicode(key) + '='
++                    str(key) + '='
+                     + formatter.quoted_attribute_value(text))
+             attrs.append(decoded)
+         close = ''
+@@ -1934,7 +1934,7 @@ class SoupStrainer(object):
+             else:
+                 attrs = kwargs
+         normalized_attrs = {}
+-        for key, value in attrs.items():
++        for key, value in list(attrs.items()):
+             normalized_attrs[key] = self._normalize_search_value(value)
+ 
+         self.attrs = normalized_attrs
+@@ -1943,7 +1943,7 @@ class SoupStrainer(object):
+     def _normalize_search_value(self, value):
+         # Leave it alone if it's a Unicode string, a callable, a
+         # regular expression, a boolean, or None.
+-        if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match')
++        if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match')
+             or isinstance(value, bool) or value is None):
+             return value
+ 
+@@ -1956,7 +1956,7 @@ class SoupStrainer(object):
+             new_value = []
+             for v in value:
+                 if (hasattr(v, '__iter__') and not isinstance(v, bytes)
+-                    and not isinstance(v, unicode)):
++                    and not isinstance(v, str)):
+                     # This is almost certainly the user's mistake. In the
+                     # interests of avoiding infinite loops, we'll let
+                     # it through as-is rather than doing a recursive call.
+@@ -1968,7 +1968,7 @@ class SoupStrainer(object):
+         # Otherwise, convert it into a Unicode string.
+         # The unicode(str()) thing is so this will do the same thing on Python 2
+         # and Python 3.
+-        return unicode(str(value))
++        return str(str(value))
+ 
+     def __str__(self):
+         """A human-readable representation of this SoupStrainer."""
+@@ -1996,7 +1996,7 @@ class SoupStrainer(object):
+             markup = markup_name
+             markup_attrs = markup
+ 
+-        if isinstance(self.name, basestring):
++        if isinstance(self.name, str):
+             # Optimization for a very common case where the user is
+             # searching for a tag with one specific name, and we're
+             # looking at a tag with a different name.
+@@ -2052,7 +2052,7 @@ class SoupStrainer(object):
+         found = None
+         # If given a list of items, scan it for a text element that
+         # matches.
+-        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
++        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
+             for element in markup:
+                 if isinstance(element, NavigableString) \
+                        and self.search(element):
+@@ -2065,7 +2065,7 @@ class SoupStrainer(object):
+                 found = self.search_tag(markup)
+         # If it's text, make sure the text matches.
+         elif isinstance(markup, NavigableString) or \
+-                 isinstance(markup, basestring):
++                 isinstance(markup, str):
+             if not self.name and not self.attrs and self._matches(markup, self.text):
+                 found = markup
+         else:
+@@ -2110,7 +2110,7 @@ class SoupStrainer(object):
+             return not match_against
+ 
+         if (hasattr(match_against, '__iter__')
+-            and not isinstance(match_against, basestring)):
++            and not isinstance(match_against, str)):
+             # We're asked to match against an iterable of items.
+             # The markup must be match at least one item in the
+             # iterable. We'll try each one in turn.
+@@ -2137,7 +2137,7 @@ class SoupStrainer(object):
+         # the tag's name and once against its prefixed name.
+         match = False
+         
+-        if not match and isinstance(match_against, unicode):
++        if not match and isinstance(match_against, str):
+             # Exact string match
+             match = markup == match_against
+ 
+--- bs4/tests/test_html5lib.py.orig	2020-04-05 19:54:12 UTC
++++ bs4/tests/test_html5lib.py
+@@ -5,7 +5,7 @@ import warnings
+ try:
+     from bs4.builder import HTML5TreeBuilder
+     HTML5LIB_PRESENT = True
+-except ImportError, e:
++except ImportError as e:
+     HTML5LIB_PRESENT = False
+ from bs4.element import SoupStrainer
+ from bs4.testing import (
+@@ -74,14 +74,14 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuil
+     def test_reparented_markup(self):
+         markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
+         soup = self.soup(markup)
+-        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
++        self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
+         self.assertEqual(2, len(soup.find_all('p')))
+ 
+ 
+     def test_reparented_markup_ends_with_whitespace(self):
+         markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
+         soup = self.soup(markup)
+-        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
++        self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
+         self.assertEqual(2, len(soup.find_all('p')))
+ 
+     def test_reparented_markup_containing_identical_whitespace_nodes(self):
+@@ -127,7 +127,7 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuil
+     def test_foster_parenting(self):
+         markup = b"""<table><td></tbody>A"""
+         soup = self.soup(markup)
+-        self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
++        self.assertEqual("<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
+ 
+     def test_extraction(self):
+         """
+--- bs4/tests/test_lxml.py.orig	2020-04-05 19:54:12 UTC
++++ bs4/tests/test_lxml.py
+@@ -7,7 +7,7 @@ try:
+     import lxml.etree
+     LXML_PRESENT = True
+     LXML_VERSION = lxml.etree.LXML_VERSION
+-except ImportError, e:
++except ImportError as e:
+     LXML_PRESENT = False
+     LXML_VERSION = (0,)
+ 
+@@ -68,7 +68,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuild
+         # if one is installed.
+         with warnings.catch_warnings(record=True) as w:
+             soup = BeautifulStoneSoup("<b />")
+-        self.assertEqual(u"<b/>", unicode(soup.b))
++        self.assertEqual("<b/>", str(soup.b))
+         self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
+ 
+     def test_tracking_line_numbers(self):
+--- setup.py.orig	2020-10-03 15:31:00 UTC
++++ setup.py
+@@ -30,7 +30,6 @@ setup(
+         'lxml' : [ 'lxml'],
+         'html5lib' : ['html5lib'],
+     },
+-    use_2to3 = True,
+     classifiers=["Development Status :: 5 - Production/Stable",
+                  "Intended Audience :: Developers",
+                  "License :: OSI Approved :: MIT License",
diff --git a/www/py-beautifulsoup449/pkg-descr b/www/py-beautifulsoup449/pkg-descr
new file mode 100644
index 000000000000..8d41275bc801
--- /dev/null
+++ b/www/py-beautifulsoup449/pkg-descr
@@ -0,0 +1,12 @@
+Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance
*** 11 LINES SKIPPED ***