Overhaul of Demo/xml.

This commit is contained in:
Georg Brandl 2009-10-11 15:06:44 +00:00
parent b3f9d66ca3
commit 32855b6dcd
3 changed files with 51 additions and 38 deletions

View File

@ -1,4 +1,10 @@
"""
A simple demo that reads in an XML document and displays the number of
elements and attributes as well as a tally of elements and attributes by name.
"""
import sys import sys
from collections import defaultdict
from xml.sax import make_parser, handler from xml.sax import make_parser, handler
@ -7,16 +13,16 @@ class FancyCounter(handler.ContentHandler):
def __init__(self): def __init__(self):
self._elems = 0 self._elems = 0
self._attrs = 0 self._attrs = 0
self._elem_types = {} self._elem_types = defaultdict(int)
self._attr_types = {} self._attr_types = defaultdict(int)
def startElement(self, name, attrs): def startElement(self, name, attrs):
self._elems = self._elems + 1 self._elems += 1
self._attrs = self._attrs + len(attrs) self._attrs += len(attrs)
self._elem_types[name] = self._elem_types.get(name, 0) + 1 self._elem_types[name] += 1
for name in attrs.keys(): for name in attrs.keys():
self._attr_types[name] = self._attr_types.get(name, 0) + 1 self._attr_types[name] += 1
def endDocument(self): def endDocument(self):
print "There were", self._elems, "elements." print "There were", self._elems, "elements."
@ -30,7 +36,7 @@ class FancyCounter(handler.ContentHandler):
for pair in self._attr_types.items(): for pair in self._attr_types.items():
print "%20s %d" % pair print "%20s %d" % pair
if __name__ == '__main__':
parser = make_parser() parser = make_parser()
parser.setContentHandler(FancyCounter()) parser.setContentHandler(FancyCounter())
parser.parse(sys.argv[1]) parser.parse(sys.argv[1])

View File

@ -3,7 +3,7 @@ A simple demo that reads in an XML document and spits out an equivalent,
but not necessarily identical, document. but not necessarily identical, document.
""" """
import sys, string import sys
from xml.sax import saxutils, handler, make_parser from xml.sax import saxutils, handler, make_parser
@ -11,7 +11,7 @@ from xml.sax import saxutils, handler, make_parser
class ContentGenerator(handler.ContentHandler): class ContentGenerator(handler.ContentHandler):
def __init__(self, out = sys.stdout): def __init__(self, out=sys.stdout):
handler.ContentHandler.__init__(self) handler.ContentHandler.__init__(self)
self._out = out self._out = out
@ -40,6 +40,7 @@ class ContentGenerator(handler.ContentHandler):
# --- The main program # --- The main program
parser = make_parser() if __name__ == '__main__':
parser.setContentHandler(ContentGenerator()) parser = make_parser()
parser.parse(sys.argv[1]) parser.setContentHandler(ContentGenerator())
parser.parse(sys.argv[1])

View File

@ -1,45 +1,50 @@
"""
A demo that reads in an RSS XML document and emits an HTML file containing
a list of the individual items in the feed.
"""
import sys import sys
import codecs
from xml.sax import make_parser, handler from xml.sax import make_parser, handler
# --- Templates # --- Templates
top = \ top = """\
"""
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML> <html>
<HEAD> <head>
<TITLE>%s</TITLE> <title>%s</title>
</HEAD> <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<BODY> <body>
<H1>%s</H1> <h1>%s</h1>
""" """
bottom = \ bottom = """
"""
</ul> </ul>
<HR> <hr>
<ADDRESS> <address>
Converted to HTML by sax_rss2html.py. Converted to HTML by rss2html.py.
</ADDRESS> </address>
</BODY> </body>
</HTML> </html>
""" """
# --- The ContentHandler # --- The ContentHandler
class RSSHandler(handler.ContentHandler): class RSSHandler(handler.ContentHandler):
def __init__(self, out = sys.stdout): def __init__(self, out=sys.stdout):
handler.ContentHandler.__init__(self) handler.ContentHandler.__init__(self)
self._out = out self._out = codecs.getwriter('utf-8')(out)
self._text = "" self._text = ""
self._parent = None self._parent = None
self._list_started = 0 self._list_started = False
self._title = None self._title = None
self._link = None self._link = None
self._descr = "" self._descr = ""
@ -69,7 +74,7 @@ class RSSHandler(handler.ContentHandler):
elif name == "item": elif name == "item":
if not self._list_started: if not self._list_started:
self._out.write("<ul>\n") self._out.write("<ul>\n")
self._list_started = 1 self._list_started = True
self._out.write(' <li><a href="%s">%s</a> %s\n' % self._out.write(' <li><a href="%s">%s</a> %s\n' %
(self._link, self._title, self._descr)) (self._link, self._title, self._descr))
@ -86,6 +91,7 @@ class RSSHandler(handler.ContentHandler):
# --- Main program # --- Main program
parser = make_parser() if __name__ == '__main__':
parser.setContentHandler(RSSHandler()) parser = make_parser()
parser.parse(sys.argv[1]) parser.setContentHandler(RSSHandler())
parser.parse(sys.argv[1])