From f99e4b5dbef57e13dd603dcc0edd9b7318f08c28 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Fri, 28 Oct 2011 14:34:56 +0300 Subject: [PATCH] Improve HTMLParser example in the doc and fix a couple minor things. --- Doc/library/html.parser.rst | 40 +++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/Doc/library/html.parser.rst b/Doc/library/html.parser.rst index 06a3b1a3e8f..0dc81a5b0b4 100644 --- a/Doc/library/html.parser.rst +++ b/Doc/library/html.parser.rst @@ -101,9 +101,9 @@ An exception is defined as well: .. method:: HTMLParser.handle_startendtag(tag, attrs) Similar to :meth:`handle_starttag`, but called when the parser encounters an - XHTML-style empty tag (````). This method may be overridden by + XHTML-style empty tag (````). This method may be overridden by subclasses which require this particular lexical information; the default - implementation simple calls :meth:`handle_starttag` and :meth:`handle_endtag`. + implementation simply calls :meth:`handle_starttag` and :meth:`handle_endtag`. .. method:: HTMLParser.handle_endtag(tag) @@ -178,27 +178,23 @@ An exception is defined as well: Example HTML Parser Application ------------------------------- -As a basic example, below is a very basic HTML parser that uses the -:class:`HTMLParser` class to print out tags as they are encountered:: +As a basic example, below is a simple HTML parser that uses the +:class:`HTMLParser` class to print out start tags, end tags, and data +as they are encountered:: - >>> from html.parser import HTMLParser - >>> - >>> class MyHTMLParser(HTMLParser): - ... def handle_starttag(self, tag, attrs): - ... print("Encountered a {} start tag".format(tag)) - ... def handle_endtag(self, tag): - ... print("Encountered a {} end tag".format(tag)) - ... - >>> page = """

Title

I'm a paragraph!

""" - >>> - >>> myparser = MyHTMLParser() - >>> myparser.feed(page) - Encountered a html start tag - Encountered a h1 start tag - Encountered a h1 end tag - Encountered a p start tag - Encountered a p end tag - Encountered a html end tag + from html.parser import HTMLParser + + class MyHTMLParser(HTMLParser): + def handle_starttag(self, tag, attrs): + print("Encountered a start tag:", tag) + def handle_endtag(self, tag): + print("Encountered an end tag:", tag) + def handle_data(self, data): + print("Encountered some data:", data) + + parser = MyHTMLParser() + parser.feed('Test' + '

Parse me!

') .. rubric:: Footnotes