added html parser and supporting cast

1995-02-27 13:16:55 +00:00 · 1995-02-27 13:16:55 +00:00 · 7c750e1e09
parent eb9e9d2b2a
commit 7c750e1e09
6 changed files with 3014 additions and 0 deletions
--- a/Lib/Para.py
+++ b/Lib/Para.py
@ -0,0 +1,408 @@
+# Text formatting abstractions
+
+
+# Oft-used type object
+Int = type(0)
+
+
+# Represent a paragraph.  This is a list of words with associated
+# font and size information, plus indents and justification for the
+# entire paragraph.
+# Once the words have been added to a paragraph, it can be laid out
+# for different line widths.  Once laid out, it can be rendered at
+# different screen locations.  Once rendered, it can be queried
+# for mouse hits, and parts of the text can be highlighted
+class Para:
+	#
+	def __init__(self):
+		self.words = [] # The words
+		self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c'
+		self.indent_left = self.indent_right = self.indent_hang = 0
+		# Final lay-out parameters, may change
+		self.left = self.top = self.right = self.bottom = \
+			self.width = self.height = self.lines = None
+	#
+	# Add a word, computing size information for it.
+	# Words may also be added manually by appending to self.words
+	# Each word should be a 7-tuple:
+	# (font, text, width, space, stretch, ascent, descent)
+	def addword(self, d, font, text, space, stretch):
+		if font <> None:
+			d.setfont(font)
+		width = d.textwidth(text)
+		ascent = d.baseline()
+		descent = d.lineheight() - ascent
+		spw = d.textwidth(' ')
+		space = space * spw
+		stretch = stretch * spw
+		tuple = (font, text, width, space, stretch, ascent, descent)
+		self.words.append(tuple)
+	#
+	# Hooks to begin and end anchors -- insert numbers in the word list!
+	def bgn_anchor(self, id):
+		self.words.append(id)
+	#
+	def end_anchor(self, id):
+		self.words.append(0)
+	#
+	# Return the total length (width) of the text added so far, in pixels
+	def getlength(self):
+		total = 0
+		for word in self.words:
+			if type(word) <> Int:
+				total = total + word[2] + word[3]
+		return total
+	#
+	# Tab to a given position (relative to the current left indent):
+	# remove all stretch, add fixed space up to the new indent.
+	# If the current position is already beying the tab stop,
+	# don't add any new space (but still remove the stretch)
+	def tabto(self, tab):
+		total = 0
+		as, de = 1, 0
+		for i in range(len(self.words)):
+			word = self.words[i]
+			if type(word) == Int: continue
+			fo, te, wi, sp, st, as, de = word
+			self.words[i] = fo, te, wi, sp, 0, as, de
+			total = total + wi + sp
+		if total < tab:
+			self.words.append(None, '', 0, tab-total, 0, as, de)
+	#
+	# Make a hanging tag: tab to hang, increment indent_left by hang,
+	# and reset indent_hang to -hang
+	def makehangingtag(self, hang):
+		self.tabto(hang)
+		self.indent_left = self.indent_left + hang
+		self.indent_hang = -hang
+	#
+	# Decide where the line breaks will be given some screen width
+	def layout(self, linewidth):
+		self.width = linewidth
+		height = 0
+		self.lines = lines = []
+		avail1 = self.width - self.indent_left - self.indent_right
+		avail = avail1 - self.indent_hang
+		words = self.words
+		i = 0
+		n = len(words)
+		lastfont = None
+		while i < n:
+			firstfont = lastfont
+			charcount = 0
+			width = 0
+			stretch = 0
+			ascent = 0
+			descent = 0
+			lsp = 0
+			j = i
+			while i < n:
+				word = words[i]
+				if type(word) == Int:
+					if word > 0 and width >= avail:
+						break
+					i = i+1
+					continue
+				fo, te, wi, sp, st, as, de = word
+				if width + wi > avail and width > 0 and wi > 0:
+					break
+				if fo <> None:
+					lastfont = fo
+					if width == 0:
+						firstfont = fo
+				charcount = charcount + len(te) + (sp > 0)
+				width = width + wi + sp
+				lsp = sp
+				stretch = stretch + st
+				lst = st
+				ascent = max(ascent, as)
+				descent = max(descent, de)
+				i = i+1
+			while i > j and type(words[i-1]) == Int and \
+				words[i-1] > 0: i = i-1
+			width = width - lsp
+			if i < n:
+				stretch = stretch - lst
+			else:
+				stretch = 0
+			tuple = i-j, firstfont, charcount, width, stretch, \
+				ascent, descent
+			lines.append(tuple)
+			height = height + ascent + descent
+			avail = avail1
+		self.height = height
+	#
+	# Call a function for all words in a line
+	def visit(self, wordfunc, anchorfunc):
+		avail1 = self.width - self.indent_left - self.indent_right
+		avail = avail1 - self.indent_hang
+		v = self.top
+		i = 0
+		for tuple in self.lines:
+			wordcount, firstfont, charcount, width, stretch, \
+				ascent, descent = tuple
+			h = self.left + self.indent_left
+			if i == 0: h = h + self.indent_hang
+			extra = 0
+			if self.just == 'r': h = h + avail - width
+			elif self.just == 'c': h = h + (avail - width) / 2
+			elif self.just == 'lr' and stretch > 0:
+				extra = avail - width
+			v2 = v + ascent + descent
+			for j in range(i, i+wordcount):
+				word = self.words[j]
+				if type(word) == Int:
+					ok = anchorfunc(self, tuple, word, \
+							h, v)
+					if ok <> None: return ok
+					continue
+				fo, te, wi, sp, st, as, de = word
+				if extra > 0 and stretch > 0:
+					ex = extra * st / stretch
+					extra = extra - ex
+					stretch = stretch - st
+				else:
+					ex = 0
+				h2 = h + wi + sp + ex
+				ok = wordfunc(self, tuple, word, h, v, \
+					h2, v2, (j==i), (j==i+wordcount-1))
+				if ok <> None: return ok
+				h = h2
+			v = v2
+			i = i + wordcount
+			avail = avail1
+	#
+	# Render a paragraph in "drawing object" d, using the rectangle
+	# given by (left, top, right) with an unspecified bottom.
+	# Return the computed bottom of the text.
+	def render(self, d, left, top, right):
+		if self.width <> right-left:
+			self.layout(right-left)
+		self.left = left
+		self.top = top
+		self.right = right
+		self.bottom = self.top + self.height
+		self.anchorid = 0
+		try:
+			self.d = d
+			self.visit(self.__class__._renderword, \
+				   self.__class__._renderanchor)
+		finally:
+			self.d = None
+		return self.bottom
+	#
+	def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast):
+		if word[0] <> None: self.d.setfont(word[0])
+		baseline = v + tuple[5]
+		self.d.text((h, baseline - word[5]), word[1])
+		if self.anchorid > 0:
+			self.d.line((h, baseline+2), (h2, baseline+2))
+	#
+	def _renderanchor(self, tuple, word, h, v):
+		self.anchorid = word
+	#
+	# Return which anchor(s) was hit by the mouse
+	def hitcheck(self, mouseh, mousev):
+		self.mouseh = mouseh
+		self.mousev = mousev
+		self.anchorid = 0
+		self.hits = []
+		self.visit(self.__class__._hitcheckword, \
+			   self.__class__._hitcheckanchor)
+		return self.hits
+	#
+	def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast):
+		if self.anchorid > 0 and h <= self.mouseh <= h2 and \
+			v <= self.mousev <= v2:
+			self.hits.append(self.anchorid)
+	#
+	def _hitcheckanchor(self, tuple, word, h, v):
+		self.anchorid = word
+	#
+	# Return whether the given anchor id is present
+	def hasanchor(self, id):
+		return id in self.words or -id in self.words
+	#
+	# Extract the raw text from the word list, substituting one space
+	# for non-empty inter-word space, and terminating with '\n'
+	def extract(self):
+		text = ''
+		for w in self.words:
+			if type(w) <> Int:
+				word = w[1]
+				if w[3]: word = word + ' '
+				text = text + word
+		return text + '\n'
+	#
+	# Return which character position was hit by the mouse, as
+	# an offset in the entire text as returned by extract().
+	# Return None if the mouse was not in this paragraph
+	def whereis(self, d, mouseh, mousev):
+		if mousev < self.top or mousev > self.bottom:
+			return None
+		self.mouseh = mouseh
+		self.mousev = mousev
+		self.lastfont = None
+		self.charcount = 0
+		try:
+			self.d = d
+			return self.visit(self.__class__._whereisword, \
+					  self.__class__._whereisanchor)
+		finally:
+			self.d = None
+	#
+	def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
+		fo, te, wi, sp, st, as, de = word
+		if fo <> None: self.lastfont = fo
+		h = h1
+		if isfirst: h1 = 0
+		if islast: h2 = 999999
+		if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2):
+			self.charcount = self.charcount + len(te) + (sp > 0)
+			return
+		if self.lastfont <> None:
+			self.d.setfont(self.lastfont)
+		cc = 0
+		for c in te:
+			cw = self.d.textwidth(c)
+			if self.mouseh <= h + cw/2:
+				return self.charcount + cc
+			cc = cc+1
+			h = h+cw
+		self.charcount = self.charcount + cc
+		if self.mouseh <= (h+h2) / 2:
+			return self.charcount
+		else:
+			return self.charcount + 1
+	#
+	def _whereisanchor(self, tuple, word, h, v):
+		pass
+	#
+	# Return screen position corresponding to position in paragraph.
+	# Return tuple (h, vtop, vbaseline, vbottom).
+	# This is more or less the inverse of whereis()
+	def screenpos(self, d, pos):
+		if pos < 0:
+			ascent, descent = self.lines[0][5:7]
+			return self.left, self.top, self.top + ascent, \
+				self.top + ascent + descent
+		self.pos = pos
+		self.lastfont = None
+		try:
+			self.d = d
+			ok = self.visit(self.__class__._screenposword, \
+					self.__class__._screenposanchor)
+		finally:
+			self.d = None
+		if ok == None:
+			ascent, descent = self.lines[-1][5:7]
+			ok = self.right, self.bottom - ascent - descent, \
+				self.bottom - descent, self.bottom
+		return ok
+	#
+	def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
+		fo, te, wi, sp, st, as, de = word
+		if fo <> None: self.lastfont = fo
+		cc = len(te) + (sp > 0)
+		if self.pos > cc:
+			self.pos = self.pos - cc
+			return
+		if self.pos < cc:
+			self.d.setfont(self.lastfont)
+			h = h1 + self.d.textwidth(te[:self.pos])
+		else:
+			h = h2
+		ascent, descent = tuple[5:7]
+		return h, v1, v1+ascent, v2
+	#
+	def _screenposanchor(self, tuple, word, h, v):
+		pass
+	#
+	# Invert the stretch of text between pos1 and pos2.
+	# If pos1 is None, the beginning is implied;
+	# if pos2 is None, the end is implied.
+	# Undoes its own effect when called again with the same arguments
+	def invert(self, d, pos1, pos2):
+		if pos1 == None:
+			pos1 = self.left, self.top, self.top, self.top
+		else:
+			pos1 = self.screenpos(d, pos1)
+		if pos2 == None:
+			pos2 = self.right, self.bottom,self.bottom,self.bottom
+		else:
+			pos2 = self.screenpos(d, pos2)
+		h1, top1, baseline1, bottom1 = pos1
+		h2, top2, baseline2, bottom2 = pos2
+		if bottom1 <= top2:
+			d.invert((h1, top1), (self.right, bottom1))
+			h1 = self.left
+			if bottom1 < top2:
+				d.invert((h1, bottom1), (self.right, top2))
+			top1, bottom1 = top2, bottom2
+		d.invert((h1, top1), (h2, bottom2))
+
+
+# Test class Para
+# XXX This was last used on the Mac, hence the weird fonts...
+def test():
+	import stdwin
+	from stdwinevents import *
+	words = 'The', 'quick', 'brown', 'fox', 'jumps', 'over', \
+		'the', 'lazy', 'dog.'
+	paralist = []
+	for just in 'l', 'r', 'lr', 'c':
+		p = Para()
+		p.just = just
+		p.addword(stdwin, ('New York', 'p', 12), words[0], 1, 1)
+		for word in words[1:-1]:
+			p.addword(stdwin, None, word, 1, 1)
+		p.addword(stdwin, None, words[-1], 2, 4)
+		p.addword(stdwin, ('New York', 'b', 18), 'Bye!', 0, 0)
+		p.addword(stdwin, ('New York', 'p', 10), 'Bye!', 0, 0)
+		paralist.append(p)
+	window = stdwin.open('Para.test()')
+	start = stop = selpara = None
+	while 1:
+		etype, win, detail = stdwin.getevent()
+		if etype == WE_CLOSE:
+			break
+		if etype == WE_SIZE:
+			window.change((0, 0), (1000, 1000))
+		if etype == WE_DRAW:
+			width, height = window.getwinsize()
+			d = None
+			try:
+				d = window.begindrawing()
+				d.cliprect(detail)
+				d.erase(detail)
+				v = 0
+				for p in paralist:
+					v = p.render(d, 0, v, width)
+					if p == selpara and \
+					   start <> None and stop <> None:
+						p.invert(d, start, stop)
+			finally:
+				if d: d.close()
+		if etype == WE_MOUSE_DOWN:
+			if selpara and start <> None and stop <> None:
+				d = window.begindrawing()
+				selpara.invert(d, start, stop)
+				d.close()
+			start = stop = selpara = None
+			mouseh, mousev = detail[0]
+			for p in paralist:
+				start = p.whereis(stdwin, mouseh, mousev)
+				if start <> None:
+					selpara = p
+					break
+		if etype == WE_MOUSE_UP and start <> None and selpara:
+			mouseh, mousev = detail[0]
+			stop = selpara.whereis(stdwin, mouseh, mousev)
+			if stop == None: start = selpara = None
+			else:
+				if start > stop:
+					start, stop = stop, start
+				d = window.begindrawing()
+				selpara.invert(d, start, stop)
+				d.close()
+	window.close()
--- a/Lib/fmt.py
+++ b/Lib/fmt.py
@ -0,0 +1,621 @@
+# Text formatting abstractions
+
+
+import string
+import Para
+
+
+# A formatter back-end object has one method that is called by the formatter:
+# addpara(p), where p is a paragraph object.  For example:
+
+
+# Formatter back-end to do nothing at all with the paragraphs
+class NullBackEnd:
+	#
+	def __init__(self):
+		pass
+	#
+	def addpara(self, p):
+		pass
+	#
+	def bgn_anchor(self, id):
+		pass
+	#
+	def end_anchor(self, id):
+		pass
+
+
+# Formatter back-end to collect the paragraphs in a list
+class SavingBackEnd(NullBackEnd):
+	#
+	def __init__(self):
+		self.paralist = []
+	#
+	def addpara(self, p):
+		self.paralist.append(p)
+	#
+	def hitcheck(self, h, v):
+		hits = []
+		for p in self.paralist:
+			if p.top <= v <= p.bottom:
+				for id in p.hitcheck(h, v):
+					if id not in hits:
+						hits.append(id)
+		return hits
+	#
+	def extract(self):
+		text = ''
+		for p in self.paralist:
+			text = text + (p.extract())
+		return text
+	#
+	def extractpart(self, long1, long2):
+		if long1 > long2: long1, long2 = long2, long1
+		para1, pos1 = long1
+		para2, pos2 = long2
+		text = ''
+		while para1 < para2:
+			ptext = self.paralist[para1].extract()
+			text = text + ptext[pos1:]
+			pos1 = 0
+			para1 = para1 + 1
+		ptext = self.paralist[para2].extract()
+		return text + ptext[pos1:pos2]
+	#
+	def whereis(self, d, h, v):
+		total = 0
+		for i in range(len(self.paralist)):
+			p = self.paralist[i]
+			result = p.whereis(d, h, v)
+			if result <> None:
+				return i, result
+		return None
+	#
+	def roundtowords(self, long1, long2):
+		i, offset = long1
+		text = self.paralist[i].extract()
+		while offset > 0 and text[offset-1] <> ' ': offset = offset-1
+		long1 = i, offset
+		#
+		i, offset = long2
+		text = self.paralist[i].extract()
+		n = len(text)
+		while offset < n-1 and text[offset] <> ' ': offset = offset+1
+		long2 = i, offset
+		#
+		return long1, long2
+	#
+	def roundtoparagraphs(self, long1, long2):
+		long1 = long1[0], 0
+		long2 = long2[0], len(self.paralist[long2[0]].extract())
+		return long1, long2
+
+
+# Formatter back-end to send the text directly to the drawing object
+class WritingBackEnd(NullBackEnd):
+	#
+	def __init__(self, d, width):
+		self.d = d
+		self.width = width
+		self.lineno = 0
+	#
+	def addpara(self, p):
+		self.lineno = p.render(self.d, 0, self.lineno, self.width)
+
+
+# A formatter receives a stream of formatting instructions and assembles
+# these into a stream of paragraphs on to a back-end.  The assembly is
+# parametrized by a text measurement object, which must match the output
+# operations of the back-end.  The back-end is responsible for splitting
+# paragraphs up in lines of a given maximum width.  (This is done because
+# in a windowing environment, when the window size changes, there is no
+# need to redo the assembly into paragraphs, but the splitting into lines
+# must be done taking the new window size into account.)
+
+
+# Formatter base class.  Initialize it with a text measurement object,
+# which is used for text measurements, and a back-end object,
+# which receives the completed paragraphs.  The formatting methods are:
+# setfont(font)
+# setleftindent(nspaces)
+# setjust(type) where type is 'l', 'c', 'r', or 'lr'
+# flush()
+# vspace(nlines)
+# needvspace(nlines)
+# addword(word, nspaces)
+class BaseFormatter:
+	#
+	def __init__(self, d, b):
+		# Drawing object used for text measurements
+		self.d = d
+		#
+		# BackEnd object receiving completed paragraphs
+		self.b = b
+		#
+		# Parameters of the formatting model
+		self.leftindent = 0
+		self.just = 'l'
+		self.font = None
+		self.blanklines = 0
+		#
+		# Parameters derived from the current font
+		self.space = d.textwidth(' ')
+		self.line = d.lineheight()
+		self.ascent = d.baseline()
+		self.descent = self.line - self.ascent
+		#
+		# Parameter derived from the default font
+		self.n_space = self.space
+		#
+		# Current paragraph being built
+		self.para = None
+		self.nospace = 1
+		#
+		# Font to set on the next word
+		self.nextfont = None
+	#
+	def newpara(self):
+		return Para.Para()
+	#
+	def setfont(self, font):
+		if font == None: return
+		self.font = self.nextfont = font
+		d = self.d
+		d.setfont(font)
+		self.space = d.textwidth(' ')
+		self.line = d.lineheight()
+		self.ascent = d.baseline()
+		self.descent = self.line - self.ascent
+	#
+	def setleftindent(self, nspaces):
+		self.leftindent = int(self.n_space * nspaces)
+		if self.para:
+			hang = self.leftindent - self.para.indent_left
+			if hang > 0 and self.para.getlength() <= hang:
+				self.para.makehangingtag(hang)
+				self.nospace = 1
+			else:
+				self.flush()
+	#
+	def setrightindent(self, nspaces):
+		self.rightindent = int(self.n_space * nspaces)
+		if self.para:
+			self.para.indent_right = self.rightindent
+			self.flush()
+	#
+	def setjust(self, just):
+		self.just = just
+		if self.para:
+			self.para.just = self.just
+	#
+	def flush(self):
+		if self.para:
+			self.b.addpara(self.para)
+			self.para = None
+			if self.font <> None:
+				self.d.setfont(self.font)
+		self.nospace = 1
+	#
+	def vspace(self, nlines):
+		self.flush()
+		if nlines > 0:
+			self.para = self.newpara()
+			tuple = None, '', 0, 0, 0, int(nlines*self.line), 0
+			self.para.words.append(tuple)
+			self.flush()
+			self.blanklines = self.blanklines + nlines
+	#
+	def needvspace(self, nlines):
+		self.flush() # Just to be sure
+		if nlines > self.blanklines:
+			self.vspace(nlines - self.blanklines)
+	#
+	def addword(self, text, space):
+		if self.nospace and not text:
+			return
+		self.nospace = 0
+		self.blanklines = 0
+		if not self.para:
+			self.para = self.newpara()
+			self.para.indent_left = self.leftindent
+			self.para.just = self.just
+			self.nextfont = self.font
+		space = int(space * self.space)
+		self.para.words.append(self.nextfont, text, \
+			self.d.textwidth(text), space, space, \
+			self.ascent, self.descent)
+		self.nextfont = None
+	#
+	def bgn_anchor(self, id):
+		if not self.para:
+			self.nospace = 0
+			self.addword('', 0)
+		self.para.bgn_anchor(id)
+	#
+	def end_anchor(self, id):
+		if not self.para:
+			self.nospace = 0
+			self.addword('', 0)
+		self.para.end_anchor(id)
+
+
+# Measuring object for measuring text as viewed on a tty
+class NullMeasurer:
+	#
+	def __init__(self):
+		pass
+	#
+	def setfont(self, font):
+		pass
+	#
+	def textwidth(self, text):
+		return len(text)
+	#
+	def lineheight(self):
+		return 1
+	#
+	def baseline(self):
+		return 0
+
+
+# Drawing object for writing plain ASCII text to a file
+class FileWriter:
+	#
+	def __init__(self, fp):
+		self.fp = fp
+		self.lineno, self.colno = 0, 0
+	#
+	def setfont(self, font):
+		pass
+	#
+	def text(self, (h, v), str):
+		if not str: return
+		if '\n' in str:
+			raise ValueError, 'can\'t write \\n'
+		while self.lineno < v:
+			self.fp.write('\n')
+			self.colno, self.lineno = 0, self.lineno + 1
+		while self.lineno > v:
+			# XXX This should never happen...
+			self.fp.write('\033[A') # ANSI up arrow
+			self.lineno = self.lineno - 1
+		if self.colno < h:
+			self.fp.write(' ' * (h - self.colno))
+		elif self.colno > h:
+			self.fp.write('\b' * (self.colno - h))
+		self.colno = h
+		self.fp.write(str)
+		self.colno = h + len(str)
+
+
+# Formatting class to do nothing at all with the data
+class NullFormatter(BaseFormatter):
+	#
+	def __init__(self):
+		d = NullMeasurer()
+		b = NullBackEnd()
+		BaseFormatter.__init__(self, d, b)
+
+
+# Formatting class to write directly to a file
+class WritingFormatter(BaseFormatter):
+	#
+	def __init__(self, fp, width):
+		dm = NullMeasurer()
+		dw = FileWriter(fp)
+		b = WritingBackEnd(dw, width)
+		BaseFormatter.__init__(self, dm, b)
+		self.blanklines = 1
+	#
+	# Suppress multiple blank lines
+	def needvspace(self, nlines):
+		BaseFormatter.needvspace(self, min(1, nlines))
+
+
+# A "FunnyFormatter" writes ASCII text with a twist: *bold words*,
+# _italic text_ and _underlined words_, and `quoted text'.
+# It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman,
+# italic, bold, underline, quote).
+# Moreover, if the font is in upper case, the text is converted to
+# UPPER CASE.
+class FunnyFormatter(WritingFormatter):
+	#
+	def flush(self):
+		if self.para: finalize(self.para)
+		WritingFormatter.flush(self)
+
+
+# Surrounds *bold words* and _italic text_ in a paragraph with
+# appropriate markers, fixing the size (assuming these characters'
+# width is 1).
+openchar = \
+    {'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'}
+closechar = \
+    {'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''}
+def finalize(para):
+	oldfont = curfont = 'r'
+	para.words.append('r', '', 0, 0, 0, 0) # temporary, deleted at end
+	for i in range(len(para.words)):
+		fo, te, wi = para.words[i][:3]
+		if fo <> None: curfont = fo
+		if curfont <> oldfont:
+			if closechar.has_key(oldfont):
+				c = closechar[oldfont]
+				j = i-1
+				while j > 0 and para.words[j][1] == '': j = j-1
+				fo1, te1, wi1 = para.words[j][:3]
+				te1 = te1 + c
+				wi1 = wi1 + len(c)
+				para.words[j] = (fo1, te1, wi1) + \
+					para.words[j][3:]
+			if openchar.has_key(curfont) and te:
+				c = openchar[curfont]
+				te = c + te
+				wi = len(c) + wi
+				para.words[i] = (fo, te, wi) + \
+					para.words[i][3:]
+			if te: oldfont = curfont
+			else: oldfont = 'r'
+		if curfont in string.uppercase:
+			te = string.upper(te)
+			para.words[i] = (fo, te, wi) + para.words[i][3:]
+	del para.words[-1]
+
+
+# Formatter back-end to draw the text in a window.
+# This has an option to draw while the paragraphs are being added,
+# to minimize the delay before the user sees anything.
+# This manages the entire "document" of the window.
+class StdwinBackEnd(SavingBackEnd):
+	#
+	def __init__(self, window, drawnow):
+		self.window = window
+		self.drawnow = drawnow
+		self.width = window.getwinsize()[0]
+		self.selection = None
+		self.height = 0
+		window.setorigin(0, 0)
+		window.setdocsize(0, 0)
+		self.d = window.begindrawing()
+		SavingBackEnd.__init__(self)
+	#
+	def finish(self):
+		self.d.close()
+		self.d = None
+		self.window.setdocsize(0, self.height)
+	#
+	def addpara(self, p):
+		self.paralist.append(p)
+		if self.drawnow:
+			self.height = \
+				p.render(self.d, 0, self.height, self.width)
+		else:
+			p.layout(self.width)
+			p.left = 0
+			p.top = self.height
+			p.right = self.width
+			p.bottom = self.height + p.height
+			self.height = p.bottom
+	#
+	def resize(self):
+		self.window.change((0, 0), (self.width, self.height))
+		self.width = self.window.getwinsize()[0]
+		self.height = 0
+		for p in self.paralist:
+			p.layout(self.width)
+			p.left = 0
+			p.top = self.height
+			p.right = self.width
+			p.bottom = self.height + p.height
+			self.height = p.bottom
+		self.window.change((0, 0), (self.width, self.height))
+		self.window.setdocsize(0, self.height)
+	#
+	def redraw(self, area):
+		d = self.window.begindrawing()
+		(left, top), (right, bottom) = area
+		d.erase(area)
+		d.cliprect(area)
+		for p in self.paralist:
+			if top < p.bottom and p.top < bottom:
+				v = p.render(d, p.left, p.top, p.right)
+		if self.selection:
+			self.invert(d, self.selection)
+		d.close()
+	#
+	def setselection(self, new):
+		if new:
+			long1, long2 = new
+			pos1 = long1[:3]
+			pos2 = long2[:3]
+			new = pos1, pos2
+		if new <> self.selection:
+			d = self.window.begindrawing()
+			if self.selection:
+				self.invert(d, self.selection)
+			if new:
+				self.invert(d, new)
+			d.close()
+			self.selection = new
+	#
+	def getselection(self):
+		return self.selection
+	#
+	def extractselection(self):
+		if self.selection:
+			a, b = self.selection
+			return self.extractpart(a, b)
+		else:
+			return None
+	#
+	def invert(self, d, region):
+		long1, long2 = region
+		if long1 > long2: long1, long2 = long2, long1
+		para1, pos1 = long1
+		para2, pos2 = long2
+		while para1 < para2:
+			self.paralist[para1].invert(d, pos1, None)
+			pos1 = None
+			para1 = para1 + 1
+		self.paralist[para2].invert(d, pos1, pos2)
+	#
+	def search(self, prog):
+		import regex, string
+		if type(prog) == type(''):
+			prog = regex.compile(string.lower(prog))
+		if self.selection:
+			iold = self.selection[0][0]
+		else:
+			iold = -1
+		hit = None
+		for i in range(len(self.paralist)):
+			if i == iold or i < iold and hit:
+				continue
+			p = self.paralist[i]
+			text = string.lower(p.extract())
+			if prog.search(text) >= 0:
+				a, b = prog.regs[0]
+				long1 = i, a
+				long2 = i, b
+				hit = long1, long2
+				if i > iold:
+					break
+		if hit:
+			self.setselection(hit)
+			i = hit[0][0]
+			p = self.paralist[i]
+			self.window.show((p.left, p.top), (p.right, p.bottom))
+			return 1
+		else:
+			return 0
+	#
+	def showanchor(self, id):
+		for i in range(len(self.paralist)):
+			p = self.paralist[i]
+			if p.hasanchor(id):
+				long1 = i, 0
+				long2 = i, len(p.extract())
+				hit = long1, long2
+				self.setselection(hit)
+				self.window.show( \
+					(p.left, p.top), (p.right, p.bottom))
+				break
+
+
+# GL extensions
+
+class GLFontCache:
+	#
+	def __init__(self):
+		self.reset()
+		self.setfont('')
+	#
+	def reset(self):
+		self.fontkey = None
+		self.fonthandle = None
+		self.fontinfo = None
+		self.fontcache = {}
+	#
+	def close(self):
+		self.reset()
+	#
+	def setfont(self, fontkey):
+		if fontkey == '':
+			fontkey = 'Times-Roman 12'
+		elif ' ' not in fontkey:
+			fontkey = fontkey + ' 12'
+		if fontkey == self.fontkey:
+			return
+		if self.fontcache.has_key(fontkey):
+			handle = self.fontcache[fontkey]
+		else:
+			import string
+			i = string.index(fontkey, ' ')
+			name, sizestr = fontkey[:i], fontkey[i:]
+			size = eval(sizestr)
+			key1 = name + ' 1'
+			key = name + ' ' + `size`
+			# NB key may differ from fontkey!
+			if self.fontcache.has_key(key):
+				handle = self.fontcache[key]
+			else:
+				if self.fontcache.has_key(key1):
+					handle = self.fontcache[key1]
+				else:
+					import fm
+					handle = fm.findfont(name)
+					self.fontcache[key1] = handle
+				handle = handle.scalefont(size)
+				self.fontcache[fontkey] = \
+					self.fontcache[key] = handle
+		self.fontkey = fontkey
+		if self.fonthandle <> handle:
+			self.fonthandle = handle
+			self.fontinfo = handle.getfontinfo()
+			handle.setfont()
+
+
+class GLMeasurer(GLFontCache):
+	#
+	def textwidth(self, text):
+		return self.fonthandle.getstrwidth(text)
+	#
+	def baseline(self):
+		return self.fontinfo[6] - self.fontinfo[3]
+	#
+	def lineheight(self):
+		return self.fontinfo[6]
+
+
+class GLWriter(GLFontCache):
+	#
+	# NOTES:
+	# (1) Use gl.ortho2 to use X pixel coordinates!
+	#
+	def text(self, (h, v), text):
+		import gl, fm
+		gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3])
+		fm.prstr(text)
+	#
+	def setfont(self, fontkey):
+		oldhandle = self.fonthandle
+		GLFontCache.setfont(fontkey)
+		if self.fonthandle <> oldhandle:
+			handle.setfont()
+
+
+class GLMeasurerWriter(GLMeasurer, GLWriter):
+	pass
+
+
+class GLBackEnd(SavingBackEnd):
+	#
+	def __init__(self, wid):
+		import gl
+		gl.winset(wid)
+		self.wid = wid
+		self.width = gl.getsize()[1]
+		self.height = 0
+		self.d = GLMeasurerWriter()
+		SavingBackEnd.__init__(self)
+	#
+	def finish(self):
+		pass
+	#
+	def addpara(self, p):
+		self.paralist.append(p)
+		self.height = p.render(self.d, 0, self.height, self.width)
+	#
+	def redraw(self):
+		import gl
+		gl.winset(self.wid)
+		width = gl.getsize()[1]
+		if width <> self.width:
+			setdocsize = 1
+			self.width = width
+			for p in self.paralist:
+				p.top = p.bottom = None
+		d = self.d
+		v = 0
+		for p in self.paralist:
+			v = p.render(d, 0, v, width)
--- a/Lib/htmllib.py
+++ b/Lib/htmllib.py
@ -0,0 +1,635 @@
+# A parser for HTML documents
+
+
+# HTML: HyperText Markup Language; an SGML-like syntax used by WWW to
+# describe hypertext documents
+#
+# SGML: Standard Generalized Markup Language
+#
+# WWW: World-Wide Web; a distributed hypertext system develped at CERN
+#
+# CERN: European Particle Physics Laboratory in Geneva, Switzerland
+
+
+# This file is only concerned with parsing and formatting HTML
+# documents, not with the other (hypertext and networking) aspects of
+# the WWW project.  (It does support highlighting of anchors.)
+
+
+import os
+import sys
+import regex
+import string
+import sgmllib
+
+
+class HTMLParser(sgmllib.SGMLParser):
+
+	# Copy base class entities and add some
+	entitydefs = {}
+	for key in sgmllib.SGMLParser.entitydefs.keys():
+		entitydefs[key] = sgmllib.SGMLParser.entitydefs[key]
+	entitydefs['bullet'] = '*'
+
+	# Provided -- handlers for tags introducing literal text
+	
+	def start_listing(self, attrs):
+		self.setliteral('listing')
+		self.literal_bgn('listing', attrs)
+
+	def end_listing(self):
+		self.literal_end('listing')
+
+	def start_xmp(self, attrs):
+		self.setliteral('xmp')
+		self.literal_bgn('xmp', attrs)
+
+	def end_xmp(self):
+		self.literal_end('xmp')
+
+	def do_plaintext(self, attrs):
+		self.setnomoretags()
+		self.literal_bgn('plaintext', attrs)
+
+	# To be overridden -- begin/end literal mode
+	def literal_bgn(self, tag, attrs): pass
+	def literal_end(self, tag): pass
+
+
+# Next level of sophistication -- collect anchors, title, nextid and isindex
+class CollectingParser(HTMLParser):
+	#
+	def __init__(self):
+		HTMLParser.__init__(self)
+		self.savetext = None
+		self.nextid = ''
+		self.isindex = 0
+		self.title = ''
+		self.inanchor = 0
+		self.anchors = []
+		self.anchornames = []
+		self.anchortypes = []
+	#
+	def start_a(self, attrs):
+		self.inanchor = 0
+		href = ''
+		name = ''
+		type = ''
+		for attrname, value in attrs:
+			if attrname == 'href':
+				href = value
+			if attrname == 'name=':
+				name = value
+			if attrname == 'type=':
+				type = string.lower(value)
+		if not (href or name):
+			return
+		self.anchors.append(href)
+		self.anchornames.append(name)
+		self.anchortypes.append(type)
+		self.inanchor = len(self.anchors)
+		if not href:
+			self.inanchor = -self.inanchor
+	#
+	def end_a(self):
+		if self.inanchor > 0:
+			# Don't show anchors pointing into the current document
+			if self.anchors[self.inanchor-1][:1] <> '#':
+				self.handle_data('[' + `self.inanchor` + ']')
+		self.inanchor = 0
+	#
+	def start_header(self, attrs): pass
+	def end_header(self): pass
+	#
+	# (head is the same as header)
+	def start_head(self, attrs): pass
+	def end_head(self): pass
+	#
+	def start_body(self, attrs): pass
+	def end_body(self): pass
+	#
+	def do_nextid(self, attrs):
+		self.nextid = attrs
+	#
+	def do_isindex(self, attrs):
+		self.isindex = 1
+	#
+	def start_title(self, attrs):
+		self.savetext = ''
+	#
+	def end_title(self):
+		if self.savetext <> None:
+			self.title = self.savetext
+			self.savetext = None
+	#
+	def handle_data(self, text):
+		if self.savetext is not None:
+			self.savetext = self.savetext + text
+
+
+# Formatting parser -- takes a formatter and a style sheet as arguments
+
+# XXX The use of style sheets should change: for each tag and end tag
+# there should be a style definition, and a style definition should
+# encompass many more parameters: font, justification, indentation,
+# vspace before, vspace after, hanging tag...
+
+wordprog = regex.compile('[^ \t\n]*')
+spaceprog = regex.compile('[ \t\n]*')
+
+class FormattingParser(CollectingParser):
+
+	def __init__(self, formatter, stylesheet):
+		CollectingParser.__init__(self)
+		self.fmt = formatter
+		self.stl = stylesheet
+		self.savetext = None
+		self.compact = 0
+		self.nofill = 0
+		self.resetfont()
+		self.setindent(self.stl.stdindent)
+
+	def resetfont(self):
+		self.fontstack = []
+		self.stylestack = []
+		self.fontset = self.stl.stdfontset
+		self.style = ROMAN
+		self.passfont()
+
+	def passfont(self):
+		font = self.fontset[self.style]
+		self.fmt.setfont(font)
+
+	def pushstyle(self, style):
+		self.stylestack.append(self.style)
+		self.style = min(style, len(self.fontset)-1)
+		self.passfont()
+
+	def popstyle(self):
+		self.style = self.stylestack[-1]
+		del self.stylestack[-1]
+		self.passfont()
+
+	def pushfontset(self, fontset, style):
+		self.fontstack.append(self.fontset)
+		self.fontset = fontset
+		self.pushstyle(style)
+
+	def popfontset(self):
+		self.fontset = self.fontstack[-1]
+		del self.fontstack[-1]
+		self.popstyle()
+
+	def flush(self):
+		self.fmt.flush()
+
+	def setindent(self, n):
+		self.fmt.setleftindent(n)
+
+	def needvspace(self, n):
+		self.fmt.needvspace(n)
+
+	def close(self):
+		HTMLParser.close(self)
+		self.fmt.flush()
+
+	def handle_literal(self, text):
+		lines = string.splitfields(text, '\n')
+		for i in range(1, len(lines)):
+			lines[i] = string.expandtabs(lines[i], 8)
+		for line in lines[:-1]:
+			self.fmt.addword(line, 0)
+			self.fmt.flush()
+			self.fmt.nospace = 0
+		for line in lines[-1:]:
+			self.fmt.addword(line, 0)
+
+	def handle_data(self, text):
+		if self.savetext is not None:
+			self.savetext = self.savetext + text
+			return
+		if self.literal:
+			self.handle_literal(text)
+			return
+		i = 0
+		n = len(text)
+		while i < n:
+			j = i + wordprog.match(text, i)
+			word = text[i:j]
+			i = j + spaceprog.match(text, j)
+			self.fmt.addword(word, i-j)
+			if self.nofill and '\n' in text[j:i]:
+				self.fmt.flush()
+				self.fmt.nospace = 0
+				i = j+1
+				while text[i-1] <> '\n': i = i+1
+
+	def literal_bgn(self, tag, attrs):
+		if tag == 'plaintext':
+			self.flush()
+		else:
+			self.needvspace(1)
+		self.pushfontset(self.stl.stdfontset, FIXED)
+		self.setindent(self.stl.literalindent)
+
+	def literal_end(self, tag):
+		self.needvspace(1)
+		self.popfontset()
+		self.setindent(self.stl.stdindent)
+
+	def start_title(self, attrs):
+		self.flush()
+		self.savetext = ''
+	# NB end_title is unchanged
+
+	def do_p(self, attrs):
+		if self.compact:
+			self.flush()
+		else:
+			self.needvspace(1)
+
+	def start_h1(self, attrs):
+		self.needvspace(2)
+		self.setindent(self.stl.h1indent)
+		self.pushfontset(self.stl.h1fontset, BOLD)
+		self.fmt.setjust('c')
+
+	def end_h1(self):
+		self.popfontset()
+		self.needvspace(2)
+		self.setindent(self.stl.stdindent)
+		self.fmt.setjust('l')
+
+	def start_h2(self, attrs):
+		self.needvspace(1)
+		self.setindent(self.stl.h2indent)
+		self.pushfontset(self.stl.h2fontset, BOLD)
+
+	def end_h2(self):
+		self.popfontset()
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	def start_h3(self, attrs):
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+		self.pushfontset(self.stl.h3fontset, BOLD)
+
+	def end_h3(self):
+		self.popfontset()
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	def start_h4(self, attrs):
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+		self.pushfontset(self.stl.stdfontset, BOLD)
+
+	def end_h4(self):
+		self.popfontset()
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	start_h5 = start_h4
+	end_h5 = end_h4
+
+	start_h6 = start_h5
+	end_h6 = end_h5
+
+	start_h7 = start_h6
+	end_h7 = end_h6
+
+	def start_ul(self, attrs):
+		self.needvspace(1)
+		for attrname, value in attrs:
+			if attrname == 'compact':
+				self.compact = 1
+				self.setindent(0)
+				break
+		else:
+			self.setindent(self.stl.ulindent)
+
+	start_dir = start_menu = start_ol = start_ul
+
+	do_li = do_p
+
+	def end_ul(self):
+		self.compact = 0
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	end_dir = end_menu = end_ol = end_ul
+
+	def start_dl(self, attrs):
+		for attrname, value in attrs:
+			if attrname == 'compact':
+				self.compact = 1
+		self.needvspace(1)
+
+	def end_dl(self):
+		self.compact = 0
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	def do_dt(self, attrs):
+		if self.compact:
+			self.flush()
+		else:
+			self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+
+	def do_dd(self, attrs):
+		self.fmt.addword('', 1)
+		self.setindent(self.stl.ddindent)
+
+	def start_address(self, attrs):
+		self.compact = 1
+		self.needvspace(1)
+		self.fmt.setjust('r')
+
+	def end_address(self):
+		self.compact = 0
+		self.needvspace(1)
+		self.setindent(self.stl.stdindent)
+		self.fmt.setjust('l')
+
+	def start_pre(self, attrs):
+		self.needvspace(1)
+		self.nofill = self.nofill + 1
+		self.pushstyle(FIXED)
+
+	def end_pre(self):
+		self.popstyle()
+		self.nofill = self.nofill - 1
+		self.needvspace(1)
+
+	start_typewriter = start_pre
+	end_typewriter = end_pre
+
+	def do_img(self, attrs):
+		self.fmt.addword('(image)', 0)
+
+	# Physical styles
+
+	def start_tt(self, attrs): self.pushstyle(FIXED)
+	def end_tt(self): self.popstyle()
+
+	def start_b(self, attrs): self.pushstyle(BOLD)
+	def end_b(self): self.popstyle()
+
+	def start_i(self, attrs): self.pushstyle(ITALIC)
+	def end_i(self): self.popstyle()
+
+	def start_u(self, attrs): self.pushstyle(ITALIC) # Underline???
+	def end_u(self): self.popstyle()
+
+	def start_r(self, attrs): self.pushstyle(ROMAN) # Not official
+	def end_r(self): self.popstyle()
+
+	# Logical styles
+
+	start_em = start_i
+	end_em = end_i
+
+	start_strong = start_b
+	end_strong = end_b
+
+	start_code = start_tt
+	end_code = end_tt
+
+	start_samp = start_tt
+	end_samp = end_tt
+
+	start_kbd = start_tt
+	end_kbd = end_tt
+
+	start_file = start_tt # unofficial
+	end_file = end_tt
+
+	start_var = start_i
+	end_var = end_i
+
+	start_dfn = start_i
+	end_dfn = end_i
+
+	start_cite = start_i
+	end_cite = end_i
+
+	start_hp1 = start_i
+	end_hp1 = start_i
+
+	start_hp2 = start_b
+	end_hp2 = end_b
+
+	def unknown_starttag(self, tag, attrs):
+		print '*** unknown <' + tag + '>'
+
+	def unknown_endtag(self, tag):
+		print '*** unknown </' + tag + '>'
+
+
+# An extension of the formatting parser which formats anchors differently.
+class AnchoringParser(FormattingParser):
+
+	def start_a(self, attrs):
+		FormattingParser.start_a(self, attrs)
+		if self.inanchor:
+			self.fmt.bgn_anchor(self.inanchor)
+
+	def end_a(self):
+		if self.inanchor:
+			self.fmt.end_anchor(self.inanchor)
+			self.inanchor = 0
+
+
+# Style sheet -- this is never instantiated, but the attributes
+# of the class object itself are used to specify fonts to be used
+# for various paragraph styles.
+# A font set is a non-empty list of fonts, in the order:
+# [roman, italic, bold, fixed].
+# When a style is not available the nearest lower style is used
+
+ROMAN = 0
+ITALIC = 1
+BOLD = 2
+FIXED = 3
+
+class NullStylesheet:
+	# Fonts -- none
+	stdfontset = [None]
+	h1fontset = [None]
+	h2fontset = [None]
+	h3fontset = [None]
+	# Indents
+	stdindent = 2
+	ddindent = 25
+	ulindent = 4
+	h1indent = 0
+	h2indent = 0
+	literalindent = 0
+
+
+class X11Stylesheet(NullStylesheet):
+	stdfontset = [ \
+		'-*-helvetica-medium-r-normal-*-*-100-100-*-*-*-*-*', \
+		'-*-helvetica-medium-o-normal-*-*-100-100-*-*-*-*-*', \
+		'-*-helvetica-bold-r-normal-*-*-100-100-*-*-*-*-*', \
+		'-*-courier-medium-r-normal-*-*-100-100-*-*-*-*-*', \
+		]
+	h1fontset = [ \
+		'-*-helvetica-medium-r-normal-*-*-180-100-*-*-*-*-*', \
+		'-*-helvetica-medium-o-normal-*-*-180-100-*-*-*-*-*', \
+		'-*-helvetica-bold-r-normal-*-*-180-100-*-*-*-*-*', \
+		]
+	h2fontset = [ \
+		'-*-helvetica-medium-r-normal-*-*-140-100-*-*-*-*-*', \
+		'-*-helvetica-medium-o-normal-*-*-140-100-*-*-*-*-*', \
+		'-*-helvetica-bold-r-normal-*-*-140-100-*-*-*-*-*', \
+		]
+	h3fontset = [ \
+		'-*-helvetica-medium-r-normal-*-*-120-100-*-*-*-*-*', \
+		'-*-helvetica-medium-o-normal-*-*-120-100-*-*-*-*-*', \
+		'-*-helvetica-bold-r-normal-*-*-120-100-*-*-*-*-*', \
+		]
+	ddindent = 40
+
+
+class MacStylesheet(NullStylesheet):
+	stdfontset = [ \
+		('Geneva', 'p', 10), \
+		('Geneva', 'i', 10), \
+		('Geneva', 'b', 10), \
+		('Monaco', 'p', 10), \
+		]
+	h1fontset = [ \
+		('Geneva', 'p', 18), \
+		('Geneva', 'i', 18), \
+		('Geneva', 'b', 18), \
+		('Monaco', 'p', 18), \
+		]
+	h3fontset = [ \
+		('Geneva', 'p', 14), \
+		('Geneva', 'i', 14), \
+		('Geneva', 'b', 14), \
+		('Monaco', 'p', 14), \
+		]
+	h3fontset = [ \
+		('Geneva', 'p', 12), \
+		('Geneva', 'i', 12), \
+		('Geneva', 'b', 12), \
+		('Monaco', 'p', 12), \
+		]
+
+
+if os.name == 'mac':
+	StdwinStylesheet = MacStylesheet
+else:
+	StdwinStylesheet = X11Stylesheet
+
+
+class GLStylesheet(NullStylesheet):
+	stdfontset = [ \
+		'Helvetica 10', \
+		'Helvetica-Italic 10', \
+		'Helvetica-Bold 10', \
+		'Courier 10', \
+		]
+	h1fontset = [ \
+		'Helvetica 18', \
+		'Helvetica-Italic 18', \
+		'Helvetica-Bold 18', \
+		'Courier 18', \
+		]
+	h2fontset = [ \
+		'Helvetica 14', \
+		'Helvetica-Italic 14', \
+		'Helvetica-Bold 14', \
+		'Courier 14', \
+		]
+	h3fontset = [ \
+		'Helvetica 12', \
+		'Helvetica-Italic 12', \
+		'Helvetica-Bold 12', \
+		'Courier 12', \
+		]
+
+
+# Test program -- produces no output but times how long it takes
+# to send a document to a null formatter, exclusive of I/O
+
+def test():
+	import fmt
+	import time
+	if sys.argv[1:]: file = sys.argv[1]
+	else: file = 'test.html'
+	data = open(file, 'r').read()
+	t0 = time.time()
+	fmtr = fmt.WritingFormatter(sys.stdout, 79)
+	p = FormattingParser(fmtr, NullStylesheet)
+	p.feed(data)
+	p.close()
+	t1 = time.time()
+	print
+	print '*** Formatting time:', round(t1-t0, 3), 'seconds.'
+
+
+# Test program using stdwin
+
+def testStdwin():
+	import stdwin, fmt
+	from stdwinevents import *
+	if sys.argv[1:]: file = sys.argv[1]
+	else: file = 'test.html'
+	data = open(file, 'r').read()
+	window = stdwin.open('testStdwin')
+	b = None
+	while 1:
+		etype, ewin, edetail = stdwin.getevent()
+		if etype == WE_CLOSE:
+			break
+		if etype == WE_SIZE:
+			window.setdocsize(0, 0)
+			window.setorigin(0, 0)
+			window.change((0, 0), (10000, 30000)) # XXX
+		if etype == WE_DRAW:
+			if not b:
+				b = fmt.StdwinBackEnd(window, 1)
+				f = fmt.BaseFormatter(b.d, b)
+				p = FormattingParser(f, \
+							    MacStylesheet)
+				p.feed(data)
+				p.close()
+				b.finish()
+			else:
+				b.redraw(edetail)
+	window.close()
+
+
+# Test program using GL
+
+def testGL():
+	import gl, GL, fmt
+	if sys.argv[1:]: file = sys.argv[1]
+	else: file = 'test.html'
+	data = open(file, 'r').read()
+	W, H = 600, 600
+	gl.foreground()
+	gl.prefsize(W, H)
+	wid = gl.winopen('testGL')
+	gl.ortho2(0, W, H, 0)
+	gl.color(GL.WHITE)
+	gl.clear()
+	gl.color(GL.BLACK)
+	b = fmt.GLBackEnd(wid)
+	f = fmt.BaseFormatter(b.d, b)
+	p = FormattingParser(f, GLStylesheet)
+	p.feed(data)
+	p.close()
+	b.finish()
+	#
+	import time
+	time.sleep(5)
+
+
+if __name__ == '__main__':
+	test()
--- a/Lib/lib-old/Para.py
+++ b/Lib/lib-old/Para.py
@ -0,0 +1,408 @@
+# Text formatting abstractions
+
+
+# Oft-used type object
+Int = type(0)
+
+
+# Represent a paragraph.  This is a list of words with associated
+# font and size information, plus indents and justification for the
+# entire paragraph.
+# Once the words have been added to a paragraph, it can be laid out
+# for different line widths.  Once laid out, it can be rendered at
+# different screen locations.  Once rendered, it can be queried
+# for mouse hits, and parts of the text can be highlighted
+class Para:
+	#
+	def __init__(self):
+		self.words = [] # The words
+		self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c'
+		self.indent_left = self.indent_right = self.indent_hang = 0
+		# Final lay-out parameters, may change
+		self.left = self.top = self.right = self.bottom = \
+			self.width = self.height = self.lines = None
+	#
+	# Add a word, computing size information for it.
+	# Words may also be added manually by appending to self.words
+	# Each word should be a 7-tuple:
+	# (font, text, width, space, stretch, ascent, descent)
+	def addword(self, d, font, text, space, stretch):
+		if font <> None:
+			d.setfont(font)
+		width = d.textwidth(text)
+		ascent = d.baseline()
+		descent = d.lineheight() - ascent
+		spw = d.textwidth(' ')
+		space = space * spw
+		stretch = stretch * spw
+		tuple = (font, text, width, space, stretch, ascent, descent)
+		self.words.append(tuple)
+	#
+	# Hooks to begin and end anchors -- insert numbers in the word list!
+	def bgn_anchor(self, id):
+		self.words.append(id)
+	#
+	def end_anchor(self, id):
+		self.words.append(0)
+	#
+	# Return the total length (width) of the text added so far, in pixels
+	def getlength(self):
+		total = 0
+		for word in self.words:
+			if type(word) <> Int:
+				total = total + word[2] + word[3]
+		return total
+	#
+	# Tab to a given position (relative to the current left indent):
+	# remove all stretch, add fixed space up to the new indent.
+	# If the current position is already beying the tab stop,
+	# don't add any new space (but still remove the stretch)
+	def tabto(self, tab):
+		total = 0
+		as, de = 1, 0
+		for i in range(len(self.words)):
+			word = self.words[i]
+			if type(word) == Int: continue
+			fo, te, wi, sp, st, as, de = word
+			self.words[i] = fo, te, wi, sp, 0, as, de
+			total = total + wi + sp
+		if total < tab:
+			self.words.append(None, '', 0, tab-total, 0, as, de)
+	#
+	# Make a hanging tag: tab to hang, increment indent_left by hang,
+	# and reset indent_hang to -hang
+	def makehangingtag(self, hang):
+		self.tabto(hang)
+		self.indent_left = self.indent_left + hang
+		self.indent_hang = -hang
+	#
+	# Decide where the line breaks will be given some screen width
+	def layout(self, linewidth):
+		self.width = linewidth
+		height = 0
+		self.lines = lines = []
+		avail1 = self.width - self.indent_left - self.indent_right
+		avail = avail1 - self.indent_hang
+		words = self.words
+		i = 0
+		n = len(words)
+		lastfont = None
+		while i < n:
+			firstfont = lastfont
+			charcount = 0
+			width = 0
+			stretch = 0
+			ascent = 0
+			descent = 0
+			lsp = 0
+			j = i
+			while i < n:
+				word = words[i]
+				if type(word) == Int:
+					if word > 0 and width >= avail:
+						break
+					i = i+1
+					continue
+				fo, te, wi, sp, st, as, de = word
+				if width + wi > avail and width > 0 and wi > 0:
+					break
+				if fo <> None:
+					lastfont = fo
+					if width == 0:
+						firstfont = fo
+				charcount = charcount + len(te) + (sp > 0)
+				width = width + wi + sp
+				lsp = sp
+				stretch = stretch + st
+				lst = st
+				ascent = max(ascent, as)
+				descent = max(descent, de)
+				i = i+1
+			while i > j and type(words[i-1]) == Int and \
+				words[i-1] > 0: i = i-1
+			width = width - lsp
+			if i < n:
+				stretch = stretch - lst
+			else:
+				stretch = 0
+			tuple = i-j, firstfont, charcount, width, stretch, \
+				ascent, descent
+			lines.append(tuple)
+			height = height + ascent + descent
+			avail = avail1
+		self.height = height
+	#
+	# Call a function for all words in a line
+	def visit(self, wordfunc, anchorfunc):
+		avail1 = self.width - self.indent_left - self.indent_right
+		avail = avail1 - self.indent_hang
+		v = self.top
+		i = 0
+		for tuple in self.lines:
+			wordcount, firstfont, charcount, width, stretch, \
+				ascent, descent = tuple
+			h = self.left + self.indent_left
+			if i == 0: h = h + self.indent_hang
+			extra = 0
+			if self.just == 'r': h = h + avail - width
+			elif self.just == 'c': h = h + (avail - width) / 2
+			elif self.just == 'lr' and stretch > 0:
+				extra = avail - width
+			v2 = v + ascent + descent
+			for j in range(i, i+wordcount):
+				word = self.words[j]
+				if type(word) == Int:
+					ok = anchorfunc(self, tuple, word, \
+							h, v)
+					if ok <> None: return ok
+					continue
+				fo, te, wi, sp, st, as, de = word
+				if extra > 0 and stretch > 0:
+					ex = extra * st / stretch
+					extra = extra - ex
+					stretch = stretch - st
+				else:
+					ex = 0
+				h2 = h + wi + sp + ex
+				ok = wordfunc(self, tuple, word, h, v, \
+					h2, v2, (j==i), (j==i+wordcount-1))
+				if ok <> None: return ok
+				h = h2
+			v = v2
+			i = i + wordcount
+			avail = avail1
+	#
+	# Render a paragraph in "drawing object" d, using the rectangle
+	# given by (left, top, right) with an unspecified bottom.
+	# Return the computed bottom of the text.
+	def render(self, d, left, top, right):
+		if self.width <> right-left:
+			self.layout(right-left)
+		self.left = left
+		self.top = top
+		self.right = right
+		self.bottom = self.top + self.height
+		self.anchorid = 0
+		try:
+			self.d = d
+			self.visit(self.__class__._renderword, \
+				   self.__class__._renderanchor)
+		finally:
+			self.d = None
+		return self.bottom
+	#
+	def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast):
+		if word[0] <> None: self.d.setfont(word[0])
+		baseline = v + tuple[5]
+		self.d.text((h, baseline - word[5]), word[1])
+		if self.anchorid > 0:
+			self.d.line((h, baseline+2), (h2, baseline+2))
+	#
+	def _renderanchor(self, tuple, word, h, v):
+		self.anchorid = word
+	#
+	# Return which anchor(s) was hit by the mouse
+	def hitcheck(self, mouseh, mousev):
+		self.mouseh = mouseh
+		self.mousev = mousev
+		self.anchorid = 0
+		self.hits = []
+		self.visit(self.__class__._hitcheckword, \
+			   self.__class__._hitcheckanchor)
+		return self.hits
+	#
+	def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast):
+		if self.anchorid > 0 and h <= self.mouseh <= h2 and \
+			v <= self.mousev <= v2:
+			self.hits.append(self.anchorid)
+	#
+	def _hitcheckanchor(self, tuple, word, h, v):
+		self.anchorid = word
+	#
+	# Return whether the given anchor id is present
+	def hasanchor(self, id):
+		return id in self.words or -id in self.words
+	#
+	# Extract the raw text from the word list, substituting one space
+	# for non-empty inter-word space, and terminating with '\n'
+	def extract(self):
+		text = ''
+		for w in self.words:
+			if type(w) <> Int:
+				word = w[1]
+				if w[3]: word = word + ' '
+				text = text + word
+		return text + '\n'
+	#
+	# Return which character position was hit by the mouse, as
+	# an offset in the entire text as returned by extract().
+	# Return None if the mouse was not in this paragraph
+	def whereis(self, d, mouseh, mousev):
+		if mousev < self.top or mousev > self.bottom:
+			return None
+		self.mouseh = mouseh
+		self.mousev = mousev
+		self.lastfont = None
+		self.charcount = 0
+		try:
+			self.d = d
+			return self.visit(self.__class__._whereisword, \
+					  self.__class__._whereisanchor)
+		finally:
+			self.d = None
+	#
+	def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
+		fo, te, wi, sp, st, as, de = word
+		if fo <> None: self.lastfont = fo
+		h = h1
+		if isfirst: h1 = 0
+		if islast: h2 = 999999
+		if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2):
+			self.charcount = self.charcount + len(te) + (sp > 0)
+			return
+		if self.lastfont <> None:
+			self.d.setfont(self.lastfont)
+		cc = 0
+		for c in te:
+			cw = self.d.textwidth(c)
+			if self.mouseh <= h + cw/2:
+				return self.charcount + cc
+			cc = cc+1
+			h = h+cw
+		self.charcount = self.charcount + cc
+		if self.mouseh <= (h+h2) / 2:
+			return self.charcount
+		else:
+			return self.charcount + 1
+	#
+	def _whereisanchor(self, tuple, word, h, v):
+		pass
+	#
+	# Return screen position corresponding to position in paragraph.
+	# Return tuple (h, vtop, vbaseline, vbottom).
+	# This is more or less the inverse of whereis()
+	def screenpos(self, d, pos):
+		if pos < 0:
+			ascent, descent = self.lines[0][5:7]
+			return self.left, self.top, self.top + ascent, \
+				self.top + ascent + descent
+		self.pos = pos
+		self.lastfont = None
+		try:
+			self.d = d
+			ok = self.visit(self.__class__._screenposword, \
+					self.__class__._screenposanchor)
+		finally:
+			self.d = None
+		if ok == None:
+			ascent, descent = self.lines[-1][5:7]
+			ok = self.right, self.bottom - ascent - descent, \
+				self.bottom - descent, self.bottom
+		return ok
+	#
+	def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
+		fo, te, wi, sp, st, as, de = word
+		if fo <> None: self.lastfont = fo
+		cc = len(te) + (sp > 0)
+		if self.pos > cc:
+			self.pos = self.pos - cc
+			return
+		if self.pos < cc:
+			self.d.setfont(self.lastfont)
+			h = h1 + self.d.textwidth(te[:self.pos])
+		else:
+			h = h2
+		ascent, descent = tuple[5:7]
+		return h, v1, v1+ascent, v2
+	#
+	def _screenposanchor(self, tuple, word, h, v):
+		pass
+	#
+	# Invert the stretch of text between pos1 and pos2.
+	# If pos1 is None, the beginning is implied;
+	# if pos2 is None, the end is implied.
+	# Undoes its own effect when called again with the same arguments
+	def invert(self, d, pos1, pos2):
+		if pos1 == None:
+			pos1 = self.left, self.top, self.top, self.top
+		else:
+			pos1 = self.screenpos(d, pos1)
+		if pos2 == None:
+			pos2 = self.right, self.bottom,self.bottom,self.bottom
+		else:
+			pos2 = self.screenpos(d, pos2)
+		h1, top1, baseline1, bottom1 = pos1
+		h2, top2, baseline2, bottom2 = pos2
+		if bottom1 <= top2:
+			d.invert((h1, top1), (self.right, bottom1))
+			h1 = self.left
+			if bottom1 < top2:
+				d.invert((h1, bottom1), (self.right, top2))
+			top1, bottom1 = top2, bottom2
+		d.invert((h1, top1), (h2, bottom2))
+
+
+# Test class Para
+# XXX This was last used on the Mac, hence the weird fonts...
+def test():
+	import stdwin
+	from stdwinevents import *
+	words = 'The', 'quick', 'brown', 'fox', 'jumps', 'over', \
+		'the', 'lazy', 'dog.'
+	paralist = []
+	for just in 'l', 'r', 'lr', 'c':
+		p = Para()
+		p.just = just
+		p.addword(stdwin, ('New York', 'p', 12), words[0], 1, 1)
+		for word in words[1:-1]:
+			p.addword(stdwin, None, word, 1, 1)
+		p.addword(stdwin, None, words[-1], 2, 4)
+		p.addword(stdwin, ('New York', 'b', 18), 'Bye!', 0, 0)
+		p.addword(stdwin, ('New York', 'p', 10), 'Bye!', 0, 0)
+		paralist.append(p)
+	window = stdwin.open('Para.test()')
+	start = stop = selpara = None
+	while 1:
+		etype, win, detail = stdwin.getevent()
+		if etype == WE_CLOSE:
+			break
+		if etype == WE_SIZE:
+			window.change((0, 0), (1000, 1000))
+		if etype == WE_DRAW:
+			width, height = window.getwinsize()
+			d = None
+			try:
+				d = window.begindrawing()
+				d.cliprect(detail)
+				d.erase(detail)
+				v = 0
+				for p in paralist:
+					v = p.render(d, 0, v, width)
+					if p == selpara and \
+					   start <> None and stop <> None:
+						p.invert(d, start, stop)
+			finally:
+				if d: d.close()
+		if etype == WE_MOUSE_DOWN:
+			if selpara and start <> None and stop <> None:
+				d = window.begindrawing()
+				selpara.invert(d, start, stop)
+				d.close()
+			start = stop = selpara = None
+			mouseh, mousev = detail[0]
+			for p in paralist:
+				start = p.whereis(stdwin, mouseh, mousev)
+				if start <> None:
+					selpara = p
+					break
+		if etype == WE_MOUSE_UP and start <> None and selpara:
+			mouseh, mousev = detail[0]
+			stop = selpara.whereis(stdwin, mouseh, mousev)
+			if stop == None: start = selpara = None
+			else:
+				if start > stop:
+					start, stop = stop, start
+				d = window.begindrawing()
+				selpara.invert(d, start, stop)
+				d.close()
+	window.close()
--- a/Lib/lib-old/fmt.py
+++ b/Lib/lib-old/fmt.py
@ -0,0 +1,621 @@
+# Text formatting abstractions
+
+
+import string
+import Para
+
+
+# A formatter back-end object has one method that is called by the formatter:
+# addpara(p), where p is a paragraph object.  For example:
+
+
+# Formatter back-end to do nothing at all with the paragraphs
+class NullBackEnd:
+	#
+	def __init__(self):
+		pass
+	#
+	def addpara(self, p):
+		pass
+	#
+	def bgn_anchor(self, id):
+		pass
+	#
+	def end_anchor(self, id):
+		pass
+
+
+# Formatter back-end to collect the paragraphs in a list
+class SavingBackEnd(NullBackEnd):
+	#
+	def __init__(self):
+		self.paralist = []
+	#
+	def addpara(self, p):
+		self.paralist.append(p)
+	#
+	def hitcheck(self, h, v):
+		hits = []
+		for p in self.paralist:
+			if p.top <= v <= p.bottom:
+				for id in p.hitcheck(h, v):
+					if id not in hits:
+						hits.append(id)
+		return hits
+	#
+	def extract(self):
+		text = ''
+		for p in self.paralist:
+			text = text + (p.extract())
+		return text
+	#
+	def extractpart(self, long1, long2):
+		if long1 > long2: long1, long2 = long2, long1
+		para1, pos1 = long1
+		para2, pos2 = long2
+		text = ''
+		while para1 < para2:
+			ptext = self.paralist[para1].extract()
+			text = text + ptext[pos1:]
+			pos1 = 0
+			para1 = para1 + 1
+		ptext = self.paralist[para2].extract()
+		return text + ptext[pos1:pos2]
+	#
+	def whereis(self, d, h, v):
+		total = 0
+		for i in range(len(self.paralist)):
+			p = self.paralist[i]
+			result = p.whereis(d, h, v)
+			if result <> None:
+				return i, result
+		return None
+	#
+	def roundtowords(self, long1, long2):
+		i, offset = long1
+		text = self.paralist[i].extract()
+		while offset > 0 and text[offset-1] <> ' ': offset = offset-1
+		long1 = i, offset
+		#
+		i, offset = long2
+		text = self.paralist[i].extract()
+		n = len(text)
+		while offset < n-1 and text[offset] <> ' ': offset = offset+1
+		long2 = i, offset
+		#
+		return long1, long2
+	#
+	def roundtoparagraphs(self, long1, long2):
+		long1 = long1[0], 0
+		long2 = long2[0], len(self.paralist[long2[0]].extract())
+		return long1, long2
+
+
+# Formatter back-end to send the text directly to the drawing object
+class WritingBackEnd(NullBackEnd):
+	#
+	def __init__(self, d, width):
+		self.d = d
+		self.width = width
+		self.lineno = 0
+	#
+	def addpara(self, p):
+		self.lineno = p.render(self.d, 0, self.lineno, self.width)
+
+
+# A formatter receives a stream of formatting instructions and assembles
+# these into a stream of paragraphs on to a back-end.  The assembly is
+# parametrized by a text measurement object, which must match the output
+# operations of the back-end.  The back-end is responsible for splitting
+# paragraphs up in lines of a given maximum width.  (This is done because
+# in a windowing environment, when the window size changes, there is no
+# need to redo the assembly into paragraphs, but the splitting into lines
+# must be done taking the new window size into account.)
+
+
+# Formatter base class.  Initialize it with a text measurement object,
+# which is used for text measurements, and a back-end object,
+# which receives the completed paragraphs.  The formatting methods are:
+# setfont(font)
+# setleftindent(nspaces)
+# setjust(type) where type is 'l', 'c', 'r', or 'lr'
+# flush()
+# vspace(nlines)
+# needvspace(nlines)
+# addword(word, nspaces)
+class BaseFormatter:
+	#
+	def __init__(self, d, b):
+		# Drawing object used for text measurements
+		self.d = d
+		#
+		# BackEnd object receiving completed paragraphs
+		self.b = b
+		#
+		# Parameters of the formatting model
+		self.leftindent = 0
+		self.just = 'l'
+		self.font = None
+		self.blanklines = 0
+		#
+		# Parameters derived from the current font
+		self.space = d.textwidth(' ')
+		self.line = d.lineheight()
+		self.ascent = d.baseline()
+		self.descent = self.line - self.ascent
+		#
+		# Parameter derived from the default font
+		self.n_space = self.space
+		#
+		# Current paragraph being built
+		self.para = None
+		self.nospace = 1
+		#
+		# Font to set on the next word
+		self.nextfont = None
+	#
+	def newpara(self):
+		return Para.Para()
+	#
+	def setfont(self, font):
+		if font == None: return
+		self.font = self.nextfont = font
+		d = self.d
+		d.setfont(font)
+		self.space = d.textwidth(' ')
+		self.line = d.lineheight()
+		self.ascent = d.baseline()
+		self.descent = self.line - self.ascent
+	#
+	def setleftindent(self, nspaces):
+		self.leftindent = int(self.n_space * nspaces)
+		if self.para:
+			hang = self.leftindent - self.para.indent_left
+			if hang > 0 and self.para.getlength() <= hang:
+				self.para.makehangingtag(hang)
+				self.nospace = 1
+			else:
+				self.flush()
+	#
+	def setrightindent(self, nspaces):
+		self.rightindent = int(self.n_space * nspaces)
+		if self.para:
+			self.para.indent_right = self.rightindent
+			self.flush()
+	#
+	def setjust(self, just):
+		self.just = just
+		if self.para:
+			self.para.just = self.just
+	#
+	def flush(self):
+		if self.para:
+			self.b.addpara(self.para)
+			self.para = None
+			if self.font <> None:
+				self.d.setfont(self.font)
+		self.nospace = 1
+	#
+	def vspace(self, nlines):
+		self.flush()
+		if nlines > 0:
+			self.para = self.newpara()
+			tuple = None, '', 0, 0, 0, int(nlines*self.line), 0
+			self.para.words.append(tuple)
+			self.flush()
+			self.blanklines = self.blanklines + nlines
+	#
+	def needvspace(self, nlines):
+		self.flush() # Just to be sure
+		if nlines > self.blanklines:
+			self.vspace(nlines - self.blanklines)
+	#
+	def addword(self, text, space):
+		if self.nospace and not text:
+			return
+		self.nospace = 0
+		self.blanklines = 0
+		if not self.para:
+			self.para = self.newpara()
+			self.para.indent_left = self.leftindent
+			self.para.just = self.just
+			self.nextfont = self.font
+		space = int(space * self.space)
+		self.para.words.append(self.nextfont, text, \
+			self.d.textwidth(text), space, space, \
+			self.ascent, self.descent)
+		self.nextfont = None
+	#
+	def bgn_anchor(self, id):
+		if not self.para:
+			self.nospace = 0
+			self.addword('', 0)
+		self.para.bgn_anchor(id)
+	#
+	def end_anchor(self, id):
+		if not self.para:
+			self.nospace = 0
+			self.addword('', 0)
+		self.para.end_anchor(id)
+
+
+# Measuring object for measuring text as viewed on a tty
+class NullMeasurer:
+	#
+	def __init__(self):
+		pass
+	#
+	def setfont(self, font):
+		pass
+	#
+	def textwidth(self, text):
+		return len(text)
+	#
+	def lineheight(self):
+		return 1
+	#
+	def baseline(self):
+		return 0
+
+
+# Drawing object for writing plain ASCII text to a file
+class FileWriter:
+	#
+	def __init__(self, fp):
+		self.fp = fp
+		self.lineno, self.colno = 0, 0
+	#
+	def setfont(self, font):
+		pass
+	#
+	def text(self, (h, v), str):
+		if not str: return
+		if '\n' in str:
+			raise ValueError, 'can\'t write \\n'
+		while self.lineno < v:
+			self.fp.write('\n')
+			self.colno, self.lineno = 0, self.lineno + 1
+		while self.lineno > v:
+			# XXX This should never happen...
+			self.fp.write('\033[A') # ANSI up arrow
+			self.lineno = self.lineno - 1
+		if self.colno < h:
+			self.fp.write(' ' * (h - self.colno))
+		elif self.colno > h:
+			self.fp.write('\b' * (self.colno - h))
+		self.colno = h
+		self.fp.write(str)
+		self.colno = h + len(str)
+
+
+# Formatting class to do nothing at all with the data
+class NullFormatter(BaseFormatter):
+	#
+	def __init__(self):
+		d = NullMeasurer()
+		b = NullBackEnd()
+		BaseFormatter.__init__(self, d, b)
+
+
+# Formatting class to write directly to a file
+class WritingFormatter(BaseFormatter):
+	#
+	def __init__(self, fp, width):
+		dm = NullMeasurer()
+		dw = FileWriter(fp)
+		b = WritingBackEnd(dw, width)
+		BaseFormatter.__init__(self, dm, b)
+		self.blanklines = 1
+	#
+	# Suppress multiple blank lines
+	def needvspace(self, nlines):
+		BaseFormatter.needvspace(self, min(1, nlines))
+
+
+# A "FunnyFormatter" writes ASCII text with a twist: *bold words*,
+# _italic text_ and _underlined words_, and `quoted text'.
+# It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman,
+# italic, bold, underline, quote).
+# Moreover, if the font is in upper case, the text is converted to
+# UPPER CASE.
+class FunnyFormatter(WritingFormatter):
+	#
+	def flush(self):
+		if self.para: finalize(self.para)
+		WritingFormatter.flush(self)
+
+
+# Surrounds *bold words* and _italic text_ in a paragraph with
+# appropriate markers, fixing the size (assuming these characters'
+# width is 1).
+openchar = \
+    {'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'}
+closechar = \
+    {'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''}
+def finalize(para):
+	oldfont = curfont = 'r'
+	para.words.append('r', '', 0, 0, 0, 0) # temporary, deleted at end
+	for i in range(len(para.words)):
+		fo, te, wi = para.words[i][:3]
+		if fo <> None: curfont = fo
+		if curfont <> oldfont:
+			if closechar.has_key(oldfont):
+				c = closechar[oldfont]
+				j = i-1
+				while j > 0 and para.words[j][1] == '': j = j-1
+				fo1, te1, wi1 = para.words[j][:3]
+				te1 = te1 + c
+				wi1 = wi1 + len(c)
+				para.words[j] = (fo1, te1, wi1) + \
+					para.words[j][3:]
+			if openchar.has_key(curfont) and te:
+				c = openchar[curfont]
+				te = c + te
+				wi = len(c) + wi
+				para.words[i] = (fo, te, wi) + \
+					para.words[i][3:]
+			if te: oldfont = curfont
+			else: oldfont = 'r'
+		if curfont in string.uppercase:
+			te = string.upper(te)
+			para.words[i] = (fo, te, wi) + para.words[i][3:]
+	del para.words[-1]
+
+
+# Formatter back-end to draw the text in a window.
+# This has an option to draw while the paragraphs are being added,
+# to minimize the delay before the user sees anything.
+# This manages the entire "document" of the window.
+class StdwinBackEnd(SavingBackEnd):
+	#
+	def __init__(self, window, drawnow):
+		self.window = window
+		self.drawnow = drawnow
+		self.width = window.getwinsize()[0]
+		self.selection = None
+		self.height = 0
+		window.setorigin(0, 0)
+		window.setdocsize(0, 0)
+		self.d = window.begindrawing()
+		SavingBackEnd.__init__(self)
+	#
+	def finish(self):
+		self.d.close()
+		self.d = None
+		self.window.setdocsize(0, self.height)
+	#
+	def addpara(self, p):
+		self.paralist.append(p)
+		if self.drawnow:
+			self.height = \
+				p.render(self.d, 0, self.height, self.width)
+		else:
+			p.layout(self.width)
+			p.left = 0
+			p.top = self.height
+			p.right = self.width
+			p.bottom = self.height + p.height
+			self.height = p.bottom
+	#
+	def resize(self):
+		self.window.change((0, 0), (self.width, self.height))
+		self.width = self.window.getwinsize()[0]
+		self.height = 0
+		for p in self.paralist:
+			p.layout(self.width)
+			p.left = 0
+			p.top = self.height
+			p.right = self.width
+			p.bottom = self.height + p.height
+			self.height = p.bottom
+		self.window.change((0, 0), (self.width, self.height))
+		self.window.setdocsize(0, self.height)
+	#
+	def redraw(self, area):
+		d = self.window.begindrawing()
+		(left, top), (right, bottom) = area
+		d.erase(area)
+		d.cliprect(area)
+		for p in self.paralist:
+			if top < p.bottom and p.top < bottom:
+				v = p.render(d, p.left, p.top, p.right)
+		if self.selection:
+			self.invert(d, self.selection)
+		d.close()
+	#
+	def setselection(self, new):
+		if new:
+			long1, long2 = new
+			pos1 = long1[:3]
+			pos2 = long2[:3]
+			new = pos1, pos2
+		if new <> self.selection:
+			d = self.window.begindrawing()
+			if self.selection:
+				self.invert(d, self.selection)
+			if new:
+				self.invert(d, new)
+			d.close()
+			self.selection = new
+	#
+	def getselection(self):
+		return self.selection
+	#
+	def extractselection(self):
+		if self.selection:
+			a, b = self.selection
+			return self.extractpart(a, b)
+		else:
+			return None
+	#
+	def invert(self, d, region):
+		long1, long2 = region
+		if long1 > long2: long1, long2 = long2, long1
+		para1, pos1 = long1
+		para2, pos2 = long2
+		while para1 < para2:
+			self.paralist[para1].invert(d, pos1, None)
+			pos1 = None
+			para1 = para1 + 1
+		self.paralist[para2].invert(d, pos1, pos2)
+	#
+	def search(self, prog):
+		import regex, string
+		if type(prog) == type(''):
+			prog = regex.compile(string.lower(prog))
+		if self.selection:
+			iold = self.selection[0][0]
+		else:
+			iold = -1
+		hit = None
+		for i in range(len(self.paralist)):
+			if i == iold or i < iold and hit:
+				continue
+			p = self.paralist[i]
+			text = string.lower(p.extract())
+			if prog.search(text) >= 0:
+				a, b = prog.regs[0]
+				long1 = i, a
+				long2 = i, b
+				hit = long1, long2
+				if i > iold:
+					break
+		if hit:
+			self.setselection(hit)
+			i = hit[0][0]
+			p = self.paralist[i]
+			self.window.show((p.left, p.top), (p.right, p.bottom))
+			return 1
+		else:
+			return 0
+	#
+	def showanchor(self, id):
+		for i in range(len(self.paralist)):
+			p = self.paralist[i]
+			if p.hasanchor(id):
+				long1 = i, 0
+				long2 = i, len(p.extract())
+				hit = long1, long2
+				self.setselection(hit)
+				self.window.show( \
+					(p.left, p.top), (p.right, p.bottom))
+				break
+
+
+# GL extensions
+
+class GLFontCache:
+	#
+	def __init__(self):
+		self.reset()
+		self.setfont('')
+	#
+	def reset(self):
+		self.fontkey = None
+		self.fonthandle = None
+		self.fontinfo = None
+		self.fontcache = {}
+	#
+	def close(self):
+		self.reset()
+	#
+	def setfont(self, fontkey):
+		if fontkey == '':
+			fontkey = 'Times-Roman 12'
+		elif ' ' not in fontkey:
+			fontkey = fontkey + ' 12'
+		if fontkey == self.fontkey:
+			return
+		if self.fontcache.has_key(fontkey):
+			handle = self.fontcache[fontkey]
+		else:
+			import string
+			i = string.index(fontkey, ' ')
+			name, sizestr = fontkey[:i], fontkey[i:]
+			size = eval(sizestr)
+			key1 = name + ' 1'
+			key = name + ' ' + `size`
+			# NB key may differ from fontkey!
+			if self.fontcache.has_key(key):
+				handle = self.fontcache[key]
+			else:
+				if self.fontcache.has_key(key1):
+					handle = self.fontcache[key1]
+				else:
+					import fm
+					handle = fm.findfont(name)
+					self.fontcache[key1] = handle
+				handle = handle.scalefont(size)
+				self.fontcache[fontkey] = \
+					self.fontcache[key] = handle
+		self.fontkey = fontkey
+		if self.fonthandle <> handle:
+			self.fonthandle = handle
+			self.fontinfo = handle.getfontinfo()
+			handle.setfont()
+
+
+class GLMeasurer(GLFontCache):
+	#
+	def textwidth(self, text):
+		return self.fonthandle.getstrwidth(text)
+	#
+	def baseline(self):
+		return self.fontinfo[6] - self.fontinfo[3]
+	#
+	def lineheight(self):
+		return self.fontinfo[6]
+
+
+class GLWriter(GLFontCache):
+	#
+	# NOTES:
+	# (1) Use gl.ortho2 to use X pixel coordinates!
+	#
+	def text(self, (h, v), text):
+		import gl, fm
+		gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3])
+		fm.prstr(text)
+	#
+	def setfont(self, fontkey):
+		oldhandle = self.fonthandle
+		GLFontCache.setfont(fontkey)
+		if self.fonthandle <> oldhandle:
+			handle.setfont()
+
+
+class GLMeasurerWriter(GLMeasurer, GLWriter):
+	pass
+
+
+class GLBackEnd(SavingBackEnd):
+	#
+	def __init__(self, wid):
+		import gl
+		gl.winset(wid)
+		self.wid = wid
+		self.width = gl.getsize()[1]
+		self.height = 0
+		self.d = GLMeasurerWriter()
+		SavingBackEnd.__init__(self)
+	#
+	def finish(self):
+		pass
+	#
+	def addpara(self, p):
+		self.paralist.append(p)
+		self.height = p.render(self.d, 0, self.height, self.width)
+	#
+	def redraw(self):
+		import gl
+		gl.winset(self.wid)
+		width = gl.getsize()[1]
+		if width <> self.width:
+			setdocsize = 1
+			self.width = width
+			for p in self.paralist:
+				p.top = p.bottom = None
+		d = self.d
+		v = 0
+		for p in self.paralist:
+			v = p.render(d, 0, v, width)
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@ -0,0 +1,321 @@
+# A parser for SGML, using the derived class as static DTD.
+
+# XXX This only supports those SGML features used by HTML.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+
+import regex
+import string
+
+
+# Regular expressions used for parsing
+
+incomplete = regex.compile( \
+	  '<!-?\|</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*\|</?\|' + \
+	  '&#[a-zA-Z0-9]*\|&[a-zA-Z][a-zA-Z0-9]*\|&')
+entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]')
+charref = regex.compile('&#[a-zA-Z0-9]+;')
+starttagopen = regex.compile('<[a-zA-Z]')
+endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>')
+commentopen = regex.compile('<!--')
+
+
+# SGML parser base class -- find tags and call handler functions.
+# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
+# The dtd is defined by deriving a class which defines methods
+# with special names to handle tags: start_foo and end_foo to handle
+# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
+# (Tags are converted to lower case for this purpose.)  The data
+# between tags is passed to the parser by calling self.handle_data()
+# with some data as argument (the data may be split up in arbutrary
+# chunks).  Entity references are passed by calling
+# self.handle_entityref() with the entity reference as argument.
+
+class SGMLParser:
+
+	# Interface -- initialize and reset this instance
+	def __init__(self):
+		self.reset()
+
+	# Interface -- reset this instance.  Loses all unprocessed data
+	def reset(self):
+		self.rawdata = ''
+		self.stack = []
+		self.nomoretags = 0
+		self.literal = 0
+
+	# For derived classes only -- enter literal mode (CDATA) till EOF
+	def setnomoretags(self):
+		self.nomoretags = self.literal = 1
+
+	# For derived classes only -- enter literal mode (CDATA)
+	def setliteral(self, *args):
+		self.literal = 1
+
+	# Interface -- feed some data to the parser.  Call this as
+	# often as you want, with as little or as much text as you
+	# want (may include '\n').  (This just saves the text, all the
+	# processing is done by process() or close().)
+	def feed(self, data):
+		self.rawdata = self.rawdata + data
+		self.goahead(0)
+
+	# Interface -- handle the remaining data
+	def close(self):
+		self.goahead(1)
+
+	# Internal -- handle data as far as reasonable.  May leave state
+	# and data to be processed by a subsequent call.  If 'end' is
+	# true, force handling all data as if followed by EOF marker.
+	def goahead(self, end):
+		rawdata = self.rawdata
+		i = 0
+		n = len(rawdata)
+		while i < n:
+			if self.nomoretags:
+				self.handle_data(rawdata[i:n])
+				i = n
+				break
+			j = incomplete.search(rawdata, i)
+			if j < 0: j = n
+			if i < j: self.handle_data(rawdata[i:j])
+			i = j
+			if i == n: break
+			if rawdata[i] == '<':
+				if starttagopen.match(rawdata, i) >= 0:
+					if self.literal:
+						self.handle_data(rawdata[i])
+						i = i+1
+						continue
+					k = self.parse_starttag(i)
+					if k < 0: break
+					i = i + k
+					continue
+				k = endtag.match(rawdata, i)
+				if k >= 0:
+					j = i+k
+					self.parse_endtag(rawdata[i:j])
+					i = j
+					self.literal = 0
+					continue
+				if commentopen.match(rawdata, i) >= 0:
+					if self.literal:
+						self.handle_data(rawdata[i])
+						i = i+1
+						continue
+					k = self.parse_comment(i)
+					if k < 0: break
+					i = i+k
+					continue
+			elif rawdata[i] == '&':
+				k = charref.match(rawdata, i)
+				if k >= 0:
+					j = i+k
+					self.handle_charref(rawdata[i+2:j-1])
+					i = j
+					continue
+				k = entityref.match(rawdata, i)
+				if k >= 0:
+					j = i+k
+					self.handle_entityref(rawdata[i+1:j-1])
+					i = j
+					continue
+			else:
+				raise RuntimeError, 'neither < nor & ??'
+			# We get here only if incomplete matches but
+			# nothing else
+			k = incomplete.match(rawdata, i)
+			if k < 0: raise RuntimeError, 'no incomplete match ??'
+			j = i+k
+			if j == n: break # Really incomplete
+			self.handle_data(rawdata[i:j])
+			i = j
+		# end while
+		if end and i < n:
+			self.handle_data(rawdata[i:n])
+			i = n
+		self.rawdata = rawdata[i:]
+		# XXX if end: check for empty stack
+
+	# Internal -- parse comment, return length or -1 if not ternimated
+	def parse_comment(self, i):
+		rawdata = self.rawdata
+		if rawdata[i:i+4] <> '<!--':
+			raise RuntimeError, 'unexpected call to handle_comment'
+		try:
+			j = string.index(rawdata, '--', i+4)
+		except string.index_error:
+			return -1
+		self.handle_comment(rawdata[i+4: j])
+		j = j+2
+		n = len(rawdata)
+		while j < n and rawdata[j] in ' \t\n': j = j+1
+		if j == n: return -1 # Wait for final '>'
+		if rawdata[j] == '>':
+			j = j+1
+		else:
+			print '*** comment not terminated with >'
+			print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5])
+		return j-i
+
+	# Internal -- handle starttag, return length or -1 if not terminated
+	def parse_starttag(self, i):
+		rawdata = self.rawdata
+		try:
+			j = string.index(rawdata, '>', i)
+		except string.index_error:
+			return -1
+		# Now parse the data between i+1 and j into a tag and attrs
+		attrs = []
+		tagfind = regex.compile('[a-zA-Z][a-zA-Z0-9]*')
+		attrfind = regex.compile( \
+		  '[ \t\n]+\([a-zA-Z][a-zA-Z0-9]*\)' + \
+		  '\([ \t\n]*=[ \t\n]*' + \
+		     '\(\'[^\']*\';\|"[^"]*"\|[-a-zA-Z0-9./:+*%?!()_#]+\)\)?')
+		k = tagfind.match(rawdata, i+1)
+		if k < 0:
+			raise RuntimeError, 'unexpected call to parse_starttag'
+		k = i+1+k
+		tag = string.lower(rawdata[i+1:k])
+		while k < j:
+			l = attrfind.match(rawdata, k)
+			if l < 0: break
+			regs = attrfind.regs
+			a1, b1 = regs[1]
+			a2, b2 = regs[2]
+			a3, b3 = regs[3]
+			attrname = rawdata[a1:b1]
+			if '=' in rawdata[k:k+l]:
+				attrvalue = rawdata[a3:b3]
+				if attrvalue[:1] == '\'' == attrvalue[-1:] or \
+				   attrvalue[:1] == '"' == attrvalue[-1:]:
+					attrvalue = attrvalue[1:-1]
+			else:
+				attrvalue = ''
+			attrs.append(string.lower(attrname), attrvalue)
+			k = k + l
+		j = j+1
+		try:
+			method = getattr(self, 'start_' + tag)
+		except AttributeError:
+			try:
+				method = getattr(self, 'do_' + tag)
+			except AttributeError:
+				self.unknown_starttag(tag, attrs)
+				return j-i
+			method(attrs)
+			return j-i
+		self.stack.append(tag)
+		method(attrs)
+		return j-i
+
+	# Internal -- parse endtag
+	def parse_endtag(self, data):
+		if data[:2] <> '</' or data[-1:] <> '>':
+			raise RuntimeError, 'unexpected call to parse_endtag'
+		tag = string.lower(string.strip(data[2:-1]))
+		try:
+			method = getattr(self, 'end_' + tag)
+		except AttributeError:
+			self.unknown_endtag(tag)
+			return
+		if self.stack and self.stack[-1] == tag:
+			del self.stack[-1]
+		else:
+			print '*** Unbalanced </' + tag + '>'
+			print '*** Stack:', self.stack
+			found = None
+			for i in range(len(self.stack)):
+				if self.stack[i] == tag: found = i
+			if found <> None:
+				del self.stack[found:]
+		method()
+
+	# Example -- handle character reference, no need to override
+	def handle_charref(self, name):
+		try:
+			n = string.atoi(name)
+		except string.atoi_error:
+			self.unknown_charref(name)
+			return
+		if not 0 <= n <= 255:
+			self.unknown_charref(name)
+			return
+		self.handle_data(chr(n))
+
+	# Definition of entities -- derived classes may override
+	entitydefs = \
+		{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
+
+	# Example -- handle entity reference, no need to override
+	def handle_entityref(self, name):
+		table = self.__class__.entitydefs
+		name = string.lower(name)
+		if table.has_key(name):
+			self.handle_data(table[name])
+		else:
+			self.unknown_entityref(name)
+			return
+
+	# Example -- handle data, should be overridden
+	def handle_data(self, data):
+		pass
+
+	# Example -- handle comment, could be overridden
+	def handle_comment(self, data):
+		pass
+
+	# To be overridden -- handlers for unknown objects
+	def unknown_starttag(self, tag, attrs): pass
+	def unknown_endtag(self, tag): pass
+	def unknown_charref(self, ref): pass
+	def unknown_entityref(self, ref): pass
+
+
+class TestSGML(SGMLParser):
+
+	def handle_data(self, data):
+		r = repr(data)
+		if len(r) > 72:
+			r = r[:35] + '...' + r[-35:]
+		print 'data:', r
+
+	def handle_comment(self, data):
+		r = repr(data)
+		if len(r) > 68:
+			r = r[:32] + '...' + r[-32:]
+		print 'comment:', r
+
+	def unknown_starttag(self, tag, attrs):
+		print 'start tag: <' + tag,
+		for name, value in attrs:
+			print name + '=' + '"' + value + '"',
+		print '>'
+
+	def unknown_endtag(self, tag):
+		print 'end tag: </' + tag + '>'
+
+	def unknown_entityref(self, ref):
+		print '*** unknown entity ref: &' + ref + ';'
+
+	def unknown_charref(self, ref):
+		print '*** unknown char ref: &#' + ref + ';'
+
+
+def test():
+	file = 'test.html'
+	f = open(file, 'r')
+	x = TestSGML()
+	while 1:
+		line = f.readline()
+		if not line:
+			x.close()
+			break
+		x.feed(line)
+
+
+#test()