update references and documentation for modules in the new html package
(http://bugs.python.org/issue2882)
This commit is contained in:
parent
91ae250273
commit
cb51d84214
|
@ -77,12 +77,12 @@ The module defines a parser class and an exception:
|
||||||
Interface definition for transforming an abstract flow of formatting events into
|
Interface definition for transforming an abstract flow of formatting events into
|
||||||
specific output events on writer objects.
|
specific output events on writer objects.
|
||||||
|
|
||||||
Module :mod:`HTMLParser`
|
Module :mod:`html.parser`
|
||||||
Alternate HTML parser that offers a slightly lower-level view of the input, but
|
Alternate HTML parser that offers a slightly lower-level view of the input, but
|
||||||
is designed to work with XHTML, and does not implement some of the SGML syntax
|
is designed to work with XHTML, and does not implement some of the SGML syntax
|
||||||
not used in "HTML as deployed" and which isn't legal for XHTML.
|
not used in "HTML as deployed" and which isn't legal for XHTML.
|
||||||
|
|
||||||
Module :mod:`htmlentitydefs`
|
Module :mod:`html.entities`
|
||||||
Definition of replacement text for XHTML 1.0 entities.
|
Definition of replacement text for XHTML 1.0 entities.
|
||||||
|
|
||||||
Module :mod:`sgmllib`
|
Module :mod:`sgmllib`
|
||||||
|
@ -149,10 +149,10 @@ additional methods and instance variables for use within tag methods.
|
||||||
:meth:`save_bgn` will raise a :exc:`TypeError` exception.
|
:meth:`save_bgn` will raise a :exc:`TypeError` exception.
|
||||||
|
|
||||||
|
|
||||||
:mod:`htmlentitydefs` --- Definitions of HTML general entities
|
:mod:`html.entities` --- Definitions of HTML general entities
|
||||||
==============================================================
|
=============================================================
|
||||||
|
|
||||||
.. module:: htmlentitydefs
|
.. module:: html.entities
|
||||||
:synopsis: Definitions of HTML general entities.
|
:synopsis: Definitions of HTML general entities.
|
||||||
.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org>
|
.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org>
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
|
|
||||||
:mod:`HTMLParser` --- Simple HTML and XHTML parser
|
:mod:`html.parser` --- Simple HTML and XHTML parser
|
||||||
==================================================
|
===================================================
|
||||||
|
|
||||||
.. module:: HTMLParser
|
.. module:: html.parser
|
||||||
:synopsis: A simple parser that can handle HTML and XHTML.
|
:synopsis: A simple parser that can handle HTML and XHTML.
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ in :mod:`sgmllib`.
|
||||||
|
|
||||||
The :class:`HTMLParser` class is instantiated without arguments.
|
The :class:`HTMLParser` class is instantiated without arguments.
|
||||||
|
|
||||||
An HTMLParser instance is fed HTML data and calls handler functions when tags
|
An :class:`HTMLParser` instance is fed HTML data and calls handler functions when tags
|
||||||
begin and end. The :class:`HTMLParser` class is meant to be overridden by the
|
begin and end. The :class:`HTMLParser` class is meant to be overridden by the
|
||||||
user to provide a desired behavior.
|
user to provide a desired behavior.
|
||||||
|
|
||||||
|
@ -92,8 +92,8 @@ An exception is defined as well:
|
||||||
``handle_starttag('a', [('href', 'http://www.cwi.nl/')])``.
|
``handle_starttag('a', [('href', 'http://www.cwi.nl/')])``.
|
||||||
|
|
||||||
.. versionchanged:: 2.6
|
.. versionchanged:: 2.6
|
||||||
All entity references from htmlentitydefs are now replaced in the attribute
|
All entity references from :mod:`html.entities` are now replaced in the
|
||||||
values.
|
attribute values.
|
||||||
|
|
||||||
|
|
||||||
.. method:: HTMLParser.handle_startendtag(tag, attrs)
|
.. method:: HTMLParser.handle_startendtag(tag, attrs)
|
||||||
|
@ -171,7 +171,7 @@ Example HTML Parser Application
|
||||||
As a basic example, below is a very basic HTML parser that uses the
|
As a basic example, below is a very basic HTML parser that uses the
|
||||||
:class:`HTMLParser` class to print out tags as they are encountered::
|
:class:`HTMLParser` class to print out tags as they are encountered::
|
||||||
|
|
||||||
from HTMLParser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
class MyHTMLParser(HTMLParser):
|
class MyHTMLParser(HTMLParser):
|
||||||
|
|
||||||
|
|
|
@ -372,16 +372,17 @@ class HTMLParser(markupbase.ParserBase):
|
||||||
c = int(s)
|
c = int(s)
|
||||||
return unichr(c)
|
return unichr(c)
|
||||||
else:
|
else:
|
||||||
# Cannot use name2codepoint directly, because HTMLParser supports apos,
|
# Cannot use name2codepoint directly, because HTMLParser
|
||||||
# which is not part of HTML 4
|
# supports apos, which is not part of HTML 4
|
||||||
import htmlentitydefs
|
import html.entities
|
||||||
if HTMLParser.entitydefs is None:
|
if HTMLParser.entitydefs is None:
|
||||||
entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
|
entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
|
||||||
for k, v in htmlentitydefs.name2codepoint.iteritems():
|
for k, v in html.entities.name2codepoint.iteritems():
|
||||||
entitydefs[k] = unichr(v)
|
entitydefs[k] = unichr(v)
|
||||||
try:
|
try:
|
||||||
return self.entitydefs[s]
|
return self.entitydefs[s]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return '&'+s+';'
|
return '&'+s+';'
|
||||||
|
|
||||||
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s)
|
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));",
|
||||||
|
replaceEntities, s)
|
||||||
|
|
|
@ -24,7 +24,7 @@ class HTMLParser(sgmllib.SGMLParser):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from htmlentitydefs import entitydefs
|
from html.entities import entitydefs
|
||||||
|
|
||||||
def __init__(self, formatter, verbose=0):
|
def __init__(self, formatter, verbose=0):
|
||||||
"""Creates an instance of the HTMLParser class.
|
"""Creates an instance of the HTMLParser class.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import test.test_support, unittest
|
import test.test_support, unittest
|
||||||
import sys, codecs, htmlentitydefs, unicodedata
|
import sys, codecs, html.entities, unicodedata
|
||||||
|
|
||||||
class PosReturn:
|
class PosReturn:
|
||||||
# this can be used for configurable callbacks
|
# this can be used for configurable callbacks
|
||||||
|
@ -86,7 +86,7 @@ class CodecCallbackTest(unittest.TestCase):
|
||||||
l = []
|
l = []
|
||||||
for c in exc.object[exc.start:exc.end]:
|
for c in exc.object[exc.start:exc.end]:
|
||||||
try:
|
try:
|
||||||
l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
|
l.append(u"&%s;" % html.entities.codepoint2name[ord(c)])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
l.append(u"&#%d;" % ord(c))
|
l.append(u"&#%d;" % ord(c))
|
||||||
return (u"".join(l), exc.end)
|
return (u"".join(l), exc.end)
|
||||||
|
|
|
@ -64,7 +64,7 @@ class TestBase:
|
||||||
if self.has_iso10646:
|
if self.has_iso10646:
|
||||||
return
|
return
|
||||||
|
|
||||||
from htmlentitydefs import codepoint2name
|
from html.entities import codepoint2name
|
||||||
|
|
||||||
def xmlcharnamereplace(exc):
|
def xmlcharnamereplace(exc):
|
||||||
if not isinstance(exc, UnicodeEncodeError):
|
if not isinstance(exc, UnicodeEncodeError):
|
||||||
|
|
|
@ -50,7 +50,7 @@ class TestUntestedModules(unittest.TestCase):
|
||||||
import encodings
|
import encodings
|
||||||
import formatter
|
import formatter
|
||||||
import getpass
|
import getpass
|
||||||
import htmlentitydefs
|
import html.entities
|
||||||
import ihooks
|
import ihooks
|
||||||
import imghdr
|
import imghdr
|
||||||
import imputil
|
import imputil
|
||||||
|
|
Loading…
Reference in New Issue