rename HTMLParser to html.parser and htmlentitydefs to html.entities;

includes merge of trunk revision 63432
2008-05-17 22:02:32 +00:00 · 2008-05-17 22:02:32 +00:00 · 3c50ea4303
parent 9b020c784c
commit 3c50ea4303
9 changed files with 25 additions and 23 deletions
--- a/Doc/library/htmllib.rst
+++ b/Doc/library/htmllib.rst
@ -75,12 +75,12 @@ The module defines a parser class and an exception:
      Interface definition for transforming an abstract flow of formatting events into
      specific output events on writer objects.

-   Module :mod:`HTMLParser`
+   Module :mod:`html.parser`
      Alternate HTML parser that offers a slightly lower-level view of the input, but
      is designed to work with XHTML, and does not implement some of the SGML syntax
      not used in "HTML as deployed" and which isn't legal for XHTML.

-   Module :mod:`htmlentitydefs`
+   Module :mod:`html.entities`
      Definition of replacement text for XHTML 1.0  entities.

   Module :mod:`sgmllib`
@ -147,10 +147,10 @@ additional methods and instance variables for use within tag methods.
   :meth:`save_bgn` will raise a :exc:`TypeError` exception.


-:mod:`htmlentitydefs` --- Definitions of HTML general entities
-==============================================================
+:mod:`html.entities` --- Definitions of HTML general entities
+=============================================================

-.. module:: htmlentitydefs
+.. module:: html.entities
   :synopsis: Definitions of HTML general entities.
 .. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org>

--- a/Doc/library/htmlparser.rst
+++ b/Doc/library/htmlparser.rst
@ -1,8 +1,8 @@

-:mod:`HTMLParser` --- Simple HTML and XHTML parser
-==================================================
+:mod:`html.parser` --- Simple HTML and XHTML parser
+===================================================

-.. module:: HTMLParser
+.. module:: html.parser
   :synopsis: A simple parser that can handle HTML and XHTML.


@ -18,7 +18,7 @@ in :mod:`sgmllib`.

   The :class:`HTMLParser` class is instantiated without arguments.

-   An HTMLParser instance is fed HTML data and calls handler functions when tags
+   An :class:`HTMLParser` instance is fed HTML data and calls handler functions when tags
   begin and end.  The :class:`HTMLParser` class is meant to be overridden by the
   user to provide a desired behavior.

@ -87,8 +87,8 @@ An exception is defined as well:
   HREF="http://www.cwi.nl/">``, this method would be called as
   ``handle_starttag('a', [('href', 'http://www.cwi.nl/')])``.

-   All entity references from htmlentitydefs are replaced in the attribute
-   values.
+   All entity references from :mod:`html.entities` are replaced in the
+   attribute values.


 .. method:: HTMLParser.handle_startendtag(tag, attrs)
@ -166,7 +166,7 @@ Example HTML Parser Application
 As a basic example, below is a very basic HTML parser that uses the
 :class:`HTMLParser` class to print out tags as they are encountered::

-   from HTMLParser import HTMLParser
+   from html.parser import HTMLParser

   class MyHTMLParser(HTMLParser):

--- a/Lib/html/init.py
+++ b/Lib/html/init.py
@ -0,0 +1 @@
+# This directory is a Python package.
--- a/Lib/htmlentitydefs.py
+++ b/Lib/htmlentitydefs.py
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@ -372,16 +372,17 @@ class HTMLParser(_markupbase.ParserBase):
                    c = int(s)
                return chr(c)
            else:
-                # Cannot use name2codepoint directly, because HTMLParser supports apos,
-                # which is not part of HTML 4
-                import htmlentitydefs
+                # Cannot use name2codepoint directly, because HTMLParser
+                # supports apos, which is not part of HTML 4
+                import html.entities
                if HTMLParser.entitydefs is None:
                    entitydefs = HTMLParser.entitydefs = {'apos':"'"}
-                    for k, v in htmlentitydefs.name2codepoint.items():
-                        entitydefs[k] = chr(v)
+                    for k, v in html.entities.name2codepoint.items():
+                        entitydefs[k] = unichr(v)
                try:
                    return self.entitydefs[s]
                except KeyError:
                    return '&'+s+';'

-        return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s)
+        return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));",
+                      replaceEntities, s)
--- a/Lib/htmllib.py
+++ b/Lib/htmllib.py
@ -24,7 +24,7 @@ class HTMLParser(sgmllib.SGMLParser):

    """

-    from htmlentitydefs import entitydefs
+    from html.entities import entitydefs

    def __init__(self, formatter, verbose=0):
        """Creates an instance of the HTMLParser class.
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@ -1,5 +1,5 @@
 import test.test_support, unittest
-import sys, codecs, htmlentitydefs, unicodedata
+import sys, codecs, html.entities, unicodedata

 class PosReturn:
    # this can be used for configurable callbacks
@ -86,7 +86,7 @@ class CodecCallbackTest(unittest.TestCase):
            l = []
            for c in exc.object[exc.start:exc.end]:
                try:
-                    l.append("&%s;" % htmlentitydefs.codepoint2name[ord(c)])
+                    l.append("&%s;" % html.entities.codepoint2name[ord(c)])
                except KeyError:
                    l.append("&#%d;" % ord(c))
            return ("".join(l), exc.end)
--- a/Lib/test/test_multibytecodec_support.py
+++ b/Lib/test/test_multibytecodec_support.py
@ -74,7 +74,7 @@ class TestBase:
        if self.has_iso10646:
            return

-        from htmlentitydefs import codepoint2name
+        from html.entities import codepoint2name

        def xmlcharnamereplace(exc):
            if not isinstance(exc, UnicodeEncodeError):
--- a/Lib/test/test_sundry.py
+++ b/Lib/test/test_sundry.py
@ -48,7 +48,7 @@ class TestUntestedModules(unittest.TestCase):
            import encodings
            import formatter
            import getpass
-            import htmlentitydefs
+            import html.entities
            import imghdr
            import keyword
            import linecache