Overhaul of Demo/xml.

This commit is contained in:
Georg Brandl 2009-10-11 15:06:44 +00:00
parent b3f9d66ca3
commit 32855b6dcd
3 changed files with 51 additions and 38 deletions

View File

@ -1,4 +1,10 @@
"""
A simple demo that reads in an XML document and displays the number of
elements and attributes as well as a tally of elements and attributes by name.
"""
import sys
from collections import defaultdict
from xml.sax import make_parser, handler
@ -7,16 +13,16 @@ class FancyCounter(handler.ContentHandler):
def __init__(self):
self._elems = 0
self._attrs = 0
self._elem_types = {}
self._attr_types = {}
self._elem_types = defaultdict(int)
self._attr_types = defaultdict(int)
def startElement(self, name, attrs):
self._elems = self._elems + 1
self._attrs = self._attrs + len(attrs)
self._elem_types[name] = self._elem_types.get(name, 0) + 1
self._elems += 1
self._attrs += len(attrs)
self._elem_types[name] += 1
for name in attrs.keys():
self._attr_types[name] = self._attr_types.get(name, 0) + 1
self._attr_types[name] += 1
def endDocument(self):
print "There were", self._elems, "elements."
@ -30,7 +36,7 @@ class FancyCounter(handler.ContentHandler):
for pair in self._attr_types.items():
print "%20s %d" % pair
parser = make_parser()
parser.setContentHandler(FancyCounter())
parser.parse(sys.argv[1])
if __name__ == '__main__':
parser = make_parser()
parser.setContentHandler(FancyCounter())
parser.parse(sys.argv[1])

View File

@ -3,7 +3,7 @@ A simple demo that reads in an XML document and spits out an equivalent,
but not necessarily identical, document.
"""
import sys, string
import sys
from xml.sax import saxutils, handler, make_parser
@ -11,7 +11,7 @@ from xml.sax import saxutils, handler, make_parser
class ContentGenerator(handler.ContentHandler):
def __init__(self, out = sys.stdout):
def __init__(self, out=sys.stdout):
handler.ContentHandler.__init__(self)
self._out = out
@ -40,6 +40,7 @@ class ContentGenerator(handler.ContentHandler):
# --- The main program
parser = make_parser()
parser.setContentHandler(ContentGenerator())
parser.parse(sys.argv[1])
if __name__ == '__main__':
parser = make_parser()
parser.setContentHandler(ContentGenerator())
parser.parse(sys.argv[1])

View File

@ -1,45 +1,50 @@
"""
A demo that reads in an RSS XML document and emits an HTML file containing
a list of the individual items in the feed.
"""
import sys
import codecs
from xml.sax import make_parser, handler
# --- Templates
top = \
"""
top = """\
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<TITLE>%s</TITLE>
</HEAD>
<html>
<head>
<title>%s</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<BODY>
<H1>%s</H1>
<body>
<h1>%s</h1>
"""
bottom = \
"""
bottom = """
</ul>
<HR>
<ADDRESS>
Converted to HTML by sax_rss2html.py.
</ADDRESS>
<hr>
<address>
Converted to HTML by rss2html.py.
</address>
</BODY>
</HTML>
</body>
</html>
"""
# --- The ContentHandler
class RSSHandler(handler.ContentHandler):
def __init__(self, out = sys.stdout):
def __init__(self, out=sys.stdout):
handler.ContentHandler.__init__(self)
self._out = out
self._out = codecs.getwriter('utf-8')(out)
self._text = ""
self._parent = None
self._list_started = 0
self._list_started = False
self._title = None
self._link = None
self._descr = ""
@ -69,7 +74,7 @@ class RSSHandler(handler.ContentHandler):
elif name == "item":
if not self._list_started:
self._out.write("<ul>\n")
self._list_started = 1
self._list_started = True
self._out.write(' <li><a href="%s">%s</a> %s\n' %
(self._link, self._title, self._descr))
@ -86,6 +91,7 @@ class RSSHandler(handler.ContentHandler):
# --- Main program
parser = make_parser()
parser.setContentHandler(RSSHandler())
parser.parse(sys.argv[1])
if __name__ == '__main__':
parser = make_parser()
parser.setContentHandler(RSSHandler())
parser.parse(sys.argv[1])