added html parser and supporting cast
This commit is contained in:
parent
eb9e9d2b2a
commit
7c750e1e09
|
@ -0,0 +1,408 @@
|
|||
# Text formatting abstractions
|
||||
|
||||
|
||||
# Oft-used type object
|
||||
Int = type(0)
|
||||
|
||||
|
||||
# Represent a paragraph. This is a list of words with associated
|
||||
# font and size information, plus indents and justification for the
|
||||
# entire paragraph.
|
||||
# Once the words have been added to a paragraph, it can be laid out
|
||||
# for different line widths. Once laid out, it can be rendered at
|
||||
# different screen locations. Once rendered, it can be queried
|
||||
# for mouse hits, and parts of the text can be highlighted
|
||||
class Para:
|
||||
#
|
||||
def __init__(self):
|
||||
self.words = [] # The words
|
||||
self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c'
|
||||
self.indent_left = self.indent_right = self.indent_hang = 0
|
||||
# Final lay-out parameters, may change
|
||||
self.left = self.top = self.right = self.bottom = \
|
||||
self.width = self.height = self.lines = None
|
||||
#
|
||||
# Add a word, computing size information for it.
|
||||
# Words may also be added manually by appending to self.words
|
||||
# Each word should be a 7-tuple:
|
||||
# (font, text, width, space, stretch, ascent, descent)
|
||||
def addword(self, d, font, text, space, stretch):
|
||||
if font <> None:
|
||||
d.setfont(font)
|
||||
width = d.textwidth(text)
|
||||
ascent = d.baseline()
|
||||
descent = d.lineheight() - ascent
|
||||
spw = d.textwidth(' ')
|
||||
space = space * spw
|
||||
stretch = stretch * spw
|
||||
tuple = (font, text, width, space, stretch, ascent, descent)
|
||||
self.words.append(tuple)
|
||||
#
|
||||
# Hooks to begin and end anchors -- insert numbers in the word list!
|
||||
def bgn_anchor(self, id):
|
||||
self.words.append(id)
|
||||
#
|
||||
def end_anchor(self, id):
|
||||
self.words.append(0)
|
||||
#
|
||||
# Return the total length (width) of the text added so far, in pixels
|
||||
def getlength(self):
|
||||
total = 0
|
||||
for word in self.words:
|
||||
if type(word) <> Int:
|
||||
total = total + word[2] + word[3]
|
||||
return total
|
||||
#
|
||||
# Tab to a given position (relative to the current left indent):
|
||||
# remove all stretch, add fixed space up to the new indent.
|
||||
# If the current position is already beying the tab stop,
|
||||
# don't add any new space (but still remove the stretch)
|
||||
def tabto(self, tab):
|
||||
total = 0
|
||||
as, de = 1, 0
|
||||
for i in range(len(self.words)):
|
||||
word = self.words[i]
|
||||
if type(word) == Int: continue
|
||||
fo, te, wi, sp, st, as, de = word
|
||||
self.words[i] = fo, te, wi, sp, 0, as, de
|
||||
total = total + wi + sp
|
||||
if total < tab:
|
||||
self.words.append(None, '', 0, tab-total, 0, as, de)
|
||||
#
|
||||
# Make a hanging tag: tab to hang, increment indent_left by hang,
|
||||
# and reset indent_hang to -hang
|
||||
def makehangingtag(self, hang):
|
||||
self.tabto(hang)
|
||||
self.indent_left = self.indent_left + hang
|
||||
self.indent_hang = -hang
|
||||
#
|
||||
# Decide where the line breaks will be given some screen width
|
||||
def layout(self, linewidth):
|
||||
self.width = linewidth
|
||||
height = 0
|
||||
self.lines = lines = []
|
||||
avail1 = self.width - self.indent_left - self.indent_right
|
||||
avail = avail1 - self.indent_hang
|
||||
words = self.words
|
||||
i = 0
|
||||
n = len(words)
|
||||
lastfont = None
|
||||
while i < n:
|
||||
firstfont = lastfont
|
||||
charcount = 0
|
||||
width = 0
|
||||
stretch = 0
|
||||
ascent = 0
|
||||
descent = 0
|
||||
lsp = 0
|
||||
j = i
|
||||
while i < n:
|
||||
word = words[i]
|
||||
if type(word) == Int:
|
||||
if word > 0 and width >= avail:
|
||||
break
|
||||
i = i+1
|
||||
continue
|
||||
fo, te, wi, sp, st, as, de = word
|
||||
if width + wi > avail and width > 0 and wi > 0:
|
||||
break
|
||||
if fo <> None:
|
||||
lastfont = fo
|
||||
if width == 0:
|
||||
firstfont = fo
|
||||
charcount = charcount + len(te) + (sp > 0)
|
||||
width = width + wi + sp
|
||||
lsp = sp
|
||||
stretch = stretch + st
|
||||
lst = st
|
||||
ascent = max(ascent, as)
|
||||
descent = max(descent, de)
|
||||
i = i+1
|
||||
while i > j and type(words[i-1]) == Int and \
|
||||
words[i-1] > 0: i = i-1
|
||||
width = width - lsp
|
||||
if i < n:
|
||||
stretch = stretch - lst
|
||||
else:
|
||||
stretch = 0
|
||||
tuple = i-j, firstfont, charcount, width, stretch, \
|
||||
ascent, descent
|
||||
lines.append(tuple)
|
||||
height = height + ascent + descent
|
||||
avail = avail1
|
||||
self.height = height
|
||||
#
|
||||
# Call a function for all words in a line
|
||||
def visit(self, wordfunc, anchorfunc):
|
||||
avail1 = self.width - self.indent_left - self.indent_right
|
||||
avail = avail1 - self.indent_hang
|
||||
v = self.top
|
||||
i = 0
|
||||
for tuple in self.lines:
|
||||
wordcount, firstfont, charcount, width, stretch, \
|
||||
ascent, descent = tuple
|
||||
h = self.left + self.indent_left
|
||||
if i == 0: h = h + self.indent_hang
|
||||
extra = 0
|
||||
if self.just == 'r': h = h + avail - width
|
||||
elif self.just == 'c': h = h + (avail - width) / 2
|
||||
elif self.just == 'lr' and stretch > 0:
|
||||
extra = avail - width
|
||||
v2 = v + ascent + descent
|
||||
for j in range(i, i+wordcount):
|
||||
word = self.words[j]
|
||||
if type(word) == Int:
|
||||
ok = anchorfunc(self, tuple, word, \
|
||||
h, v)
|
||||
if ok <> None: return ok
|
||||
continue
|
||||
fo, te, wi, sp, st, as, de = word
|
||||
if extra > 0 and stretch > 0:
|
||||
ex = extra * st / stretch
|
||||
extra = extra - ex
|
||||
stretch = stretch - st
|
||||
else:
|
||||
ex = 0
|
||||
h2 = h + wi + sp + ex
|
||||
ok = wordfunc(self, tuple, word, h, v, \
|
||||
h2, v2, (j==i), (j==i+wordcount-1))
|
||||
if ok <> None: return ok
|
||||
h = h2
|
||||
v = v2
|
||||
i = i + wordcount
|
||||
avail = avail1
|
||||
#
|
||||
# Render a paragraph in "drawing object" d, using the rectangle
|
||||
# given by (left, top, right) with an unspecified bottom.
|
||||
# Return the computed bottom of the text.
|
||||
def render(self, d, left, top, right):
|
||||
if self.width <> right-left:
|
||||
self.layout(right-left)
|
||||
self.left = left
|
||||
self.top = top
|
||||
self.right = right
|
||||
self.bottom = self.top + self.height
|
||||
self.anchorid = 0
|
||||
try:
|
||||
self.d = d
|
||||
self.visit(self.__class__._renderword, \
|
||||
self.__class__._renderanchor)
|
||||
finally:
|
||||
self.d = None
|
||||
return self.bottom
|
||||
#
|
||||
def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast):
|
||||
if word[0] <> None: self.d.setfont(word[0])
|
||||
baseline = v + tuple[5]
|
||||
self.d.text((h, baseline - word[5]), word[1])
|
||||
if self.anchorid > 0:
|
||||
self.d.line((h, baseline+2), (h2, baseline+2))
|
||||
#
|
||||
def _renderanchor(self, tuple, word, h, v):
|
||||
self.anchorid = word
|
||||
#
|
||||
# Return which anchor(s) was hit by the mouse
|
||||
def hitcheck(self, mouseh, mousev):
|
||||
self.mouseh = mouseh
|
||||
self.mousev = mousev
|
||||
self.anchorid = 0
|
||||
self.hits = []
|
||||
self.visit(self.__class__._hitcheckword, \
|
||||
self.__class__._hitcheckanchor)
|
||||
return self.hits
|
||||
#
|
||||
def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast):
|
||||
if self.anchorid > 0 and h <= self.mouseh <= h2 and \
|
||||
v <= self.mousev <= v2:
|
||||
self.hits.append(self.anchorid)
|
||||
#
|
||||
def _hitcheckanchor(self, tuple, word, h, v):
|
||||
self.anchorid = word
|
||||
#
|
||||
# Return whether the given anchor id is present
|
||||
def hasanchor(self, id):
|
||||
return id in self.words or -id in self.words
|
||||
#
|
||||
# Extract the raw text from the word list, substituting one space
|
||||
# for non-empty inter-word space, and terminating with '\n'
|
||||
def extract(self):
|
||||
text = ''
|
||||
for w in self.words:
|
||||
if type(w) <> Int:
|
||||
word = w[1]
|
||||
if w[3]: word = word + ' '
|
||||
text = text + word
|
||||
return text + '\n'
|
||||
#
|
||||
# Return which character position was hit by the mouse, as
|
||||
# an offset in the entire text as returned by extract().
|
||||
# Return None if the mouse was not in this paragraph
|
||||
def whereis(self, d, mouseh, mousev):
|
||||
if mousev < self.top or mousev > self.bottom:
|
||||
return None
|
||||
self.mouseh = mouseh
|
||||
self.mousev = mousev
|
||||
self.lastfont = None
|
||||
self.charcount = 0
|
||||
try:
|
||||
self.d = d
|
||||
return self.visit(self.__class__._whereisword, \
|
||||
self.__class__._whereisanchor)
|
||||
finally:
|
||||
self.d = None
|
||||
#
|
||||
def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
|
||||
fo, te, wi, sp, st, as, de = word
|
||||
if fo <> None: self.lastfont = fo
|
||||
h = h1
|
||||
if isfirst: h1 = 0
|
||||
if islast: h2 = 999999
|
||||
if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2):
|
||||
self.charcount = self.charcount + len(te) + (sp > 0)
|
||||
return
|
||||
if self.lastfont <> None:
|
||||
self.d.setfont(self.lastfont)
|
||||
cc = 0
|
||||
for c in te:
|
||||
cw = self.d.textwidth(c)
|
||||
if self.mouseh <= h + cw/2:
|
||||
return self.charcount + cc
|
||||
cc = cc+1
|
||||
h = h+cw
|
||||
self.charcount = self.charcount + cc
|
||||
if self.mouseh <= (h+h2) / 2:
|
||||
return self.charcount
|
||||
else:
|
||||
return self.charcount + 1
|
||||
#
|
||||
def _whereisanchor(self, tuple, word, h, v):
|
||||
pass
|
||||
#
|
||||
# Return screen position corresponding to position in paragraph.
|
||||
# Return tuple (h, vtop, vbaseline, vbottom).
|
||||
# This is more or less the inverse of whereis()
|
||||
def screenpos(self, d, pos):
|
||||
if pos < 0:
|
||||
ascent, descent = self.lines[0][5:7]
|
||||
return self.left, self.top, self.top + ascent, \
|
||||
self.top + ascent + descent
|
||||
self.pos = pos
|
||||
self.lastfont = None
|
||||
try:
|
||||
self.d = d
|
||||
ok = self.visit(self.__class__._screenposword, \
|
||||
self.__class__._screenposanchor)
|
||||
finally:
|
||||
self.d = None
|
||||
if ok == None:
|
||||
ascent, descent = self.lines[-1][5:7]
|
||||
ok = self.right, self.bottom - ascent - descent, \
|
||||
self.bottom - descent, self.bottom
|
||||
return ok
|
||||
#
|
||||
def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
|
||||
fo, te, wi, sp, st, as, de = word
|
||||
if fo <> None: self.lastfont = fo
|
||||
cc = len(te) + (sp > 0)
|
||||
if self.pos > cc:
|
||||
self.pos = self.pos - cc
|
||||
return
|
||||
if self.pos < cc:
|
||||
self.d.setfont(self.lastfont)
|
||||
h = h1 + self.d.textwidth(te[:self.pos])
|
||||
else:
|
||||
h = h2
|
||||
ascent, descent = tuple[5:7]
|
||||
return h, v1, v1+ascent, v2
|
||||
#
|
||||
def _screenposanchor(self, tuple, word, h, v):
|
||||
pass
|
||||
#
|
||||
# Invert the stretch of text between pos1 and pos2.
|
||||
# If pos1 is None, the beginning is implied;
|
||||
# if pos2 is None, the end is implied.
|
||||
# Undoes its own effect when called again with the same arguments
|
||||
def invert(self, d, pos1, pos2):
|
||||
if pos1 == None:
|
||||
pos1 = self.left, self.top, self.top, self.top
|
||||
else:
|
||||
pos1 = self.screenpos(d, pos1)
|
||||
if pos2 == None:
|
||||
pos2 = self.right, self.bottom,self.bottom,self.bottom
|
||||
else:
|
||||
pos2 = self.screenpos(d, pos2)
|
||||
h1, top1, baseline1, bottom1 = pos1
|
||||
h2, top2, baseline2, bottom2 = pos2
|
||||
if bottom1 <= top2:
|
||||
d.invert((h1, top1), (self.right, bottom1))
|
||||
h1 = self.left
|
||||
if bottom1 < top2:
|
||||
d.invert((h1, bottom1), (self.right, top2))
|
||||
top1, bottom1 = top2, bottom2
|
||||
d.invert((h1, top1), (h2, bottom2))
|
||||
|
||||
|
||||
# Test class Para
|
||||
# XXX This was last used on the Mac, hence the weird fonts...
|
||||
def test():
|
||||
import stdwin
|
||||
from stdwinevents import *
|
||||
words = 'The', 'quick', 'brown', 'fox', 'jumps', 'over', \
|
||||
'the', 'lazy', 'dog.'
|
||||
paralist = []
|
||||
for just in 'l', 'r', 'lr', 'c':
|
||||
p = Para()
|
||||
p.just = just
|
||||
p.addword(stdwin, ('New York', 'p', 12), words[0], 1, 1)
|
||||
for word in words[1:-1]:
|
||||
p.addword(stdwin, None, word, 1, 1)
|
||||
p.addword(stdwin, None, words[-1], 2, 4)
|
||||
p.addword(stdwin, ('New York', 'b', 18), 'Bye!', 0, 0)
|
||||
p.addword(stdwin, ('New York', 'p', 10), 'Bye!', 0, 0)
|
||||
paralist.append(p)
|
||||
window = stdwin.open('Para.test()')
|
||||
start = stop = selpara = None
|
||||
while 1:
|
||||
etype, win, detail = stdwin.getevent()
|
||||
if etype == WE_CLOSE:
|
||||
break
|
||||
if etype == WE_SIZE:
|
||||
window.change((0, 0), (1000, 1000))
|
||||
if etype == WE_DRAW:
|
||||
width, height = window.getwinsize()
|
||||
d = None
|
||||
try:
|
||||
d = window.begindrawing()
|
||||
d.cliprect(detail)
|
||||
d.erase(detail)
|
||||
v = 0
|
||||
for p in paralist:
|
||||
v = p.render(d, 0, v, width)
|
||||
if p == selpara and \
|
||||
start <> None and stop <> None:
|
||||
p.invert(d, start, stop)
|
||||
finally:
|
||||
if d: d.close()
|
||||
if etype == WE_MOUSE_DOWN:
|
||||
if selpara and start <> None and stop <> None:
|
||||
d = window.begindrawing()
|
||||
selpara.invert(d, start, stop)
|
||||
d.close()
|
||||
start = stop = selpara = None
|
||||
mouseh, mousev = detail[0]
|
||||
for p in paralist:
|
||||
start = p.whereis(stdwin, mouseh, mousev)
|
||||
if start <> None:
|
||||
selpara = p
|
||||
break
|
||||
if etype == WE_MOUSE_UP and start <> None and selpara:
|
||||
mouseh, mousev = detail[0]
|
||||
stop = selpara.whereis(stdwin, mouseh, mousev)
|
||||
if stop == None: start = selpara = None
|
||||
else:
|
||||
if start > stop:
|
||||
start, stop = stop, start
|
||||
d = window.begindrawing()
|
||||
selpara.invert(d, start, stop)
|
||||
d.close()
|
||||
window.close()
|
|
@ -0,0 +1,621 @@
|
|||
# Text formatting abstractions
|
||||
|
||||
|
||||
import string
|
||||
import Para
|
||||
|
||||
|
||||
# A formatter back-end object has one method that is called by the formatter:
|
||||
# addpara(p), where p is a paragraph object. For example:
|
||||
|
||||
|
||||
# Formatter back-end to do nothing at all with the paragraphs
|
||||
class NullBackEnd:
|
||||
#
|
||||
def __init__(self):
|
||||
pass
|
||||
#
|
||||
def addpara(self, p):
|
||||
pass
|
||||
#
|
||||
def bgn_anchor(self, id):
|
||||
pass
|
||||
#
|
||||
def end_anchor(self, id):
|
||||
pass
|
||||
|
||||
|
||||
# Formatter back-end to collect the paragraphs in a list
|
||||
class SavingBackEnd(NullBackEnd):
|
||||
#
|
||||
def __init__(self):
|
||||
self.paralist = []
|
||||
#
|
||||
def addpara(self, p):
|
||||
self.paralist.append(p)
|
||||
#
|
||||
def hitcheck(self, h, v):
|
||||
hits = []
|
||||
for p in self.paralist:
|
||||
if p.top <= v <= p.bottom:
|
||||
for id in p.hitcheck(h, v):
|
||||
if id not in hits:
|
||||
hits.append(id)
|
||||
return hits
|
||||
#
|
||||
def extract(self):
|
||||
text = ''
|
||||
for p in self.paralist:
|
||||
text = text + (p.extract())
|
||||
return text
|
||||
#
|
||||
def extractpart(self, long1, long2):
|
||||
if long1 > long2: long1, long2 = long2, long1
|
||||
para1, pos1 = long1
|
||||
para2, pos2 = long2
|
||||
text = ''
|
||||
while para1 < para2:
|
||||
ptext = self.paralist[para1].extract()
|
||||
text = text + ptext[pos1:]
|
||||
pos1 = 0
|
||||
para1 = para1 + 1
|
||||
ptext = self.paralist[para2].extract()
|
||||
return text + ptext[pos1:pos2]
|
||||
#
|
||||
def whereis(self, d, h, v):
|
||||
total = 0
|
||||
for i in range(len(self.paralist)):
|
||||
p = self.paralist[i]
|
||||
result = p.whereis(d, h, v)
|
||||
if result <> None:
|
||||
return i, result
|
||||
return None
|
||||
#
|
||||
def roundtowords(self, long1, long2):
|
||||
i, offset = long1
|
||||
text = self.paralist[i].extract()
|
||||
while offset > 0 and text[offset-1] <> ' ': offset = offset-1
|
||||
long1 = i, offset
|
||||
#
|
||||
i, offset = long2
|
||||
text = self.paralist[i].extract()
|
||||
n = len(text)
|
||||
while offset < n-1 and text[offset] <> ' ': offset = offset+1
|
||||
long2 = i, offset
|
||||
#
|
||||
return long1, long2
|
||||
#
|
||||
def roundtoparagraphs(self, long1, long2):
|
||||
long1 = long1[0], 0
|
||||
long2 = long2[0], len(self.paralist[long2[0]].extract())
|
||||
return long1, long2
|
||||
|
||||
|
||||
# Formatter back-end to send the text directly to the drawing object
|
||||
class WritingBackEnd(NullBackEnd):
|
||||
#
|
||||
def __init__(self, d, width):
|
||||
self.d = d
|
||||
self.width = width
|
||||
self.lineno = 0
|
||||
#
|
||||
def addpara(self, p):
|
||||
self.lineno = p.render(self.d, 0, self.lineno, self.width)
|
||||
|
||||
|
||||
# A formatter receives a stream of formatting instructions and assembles
|
||||
# these into a stream of paragraphs on to a back-end. The assembly is
|
||||
# parametrized by a text measurement object, which must match the output
|
||||
# operations of the back-end. The back-end is responsible for splitting
|
||||
# paragraphs up in lines of a given maximum width. (This is done because
|
||||
# in a windowing environment, when the window size changes, there is no
|
||||
# need to redo the assembly into paragraphs, but the splitting into lines
|
||||
# must be done taking the new window size into account.)
|
||||
|
||||
|
||||
# Formatter base class. Initialize it with a text measurement object,
|
||||
# which is used for text measurements, and a back-end object,
|
||||
# which receives the completed paragraphs. The formatting methods are:
|
||||
# setfont(font)
|
||||
# setleftindent(nspaces)
|
||||
# setjust(type) where type is 'l', 'c', 'r', or 'lr'
|
||||
# flush()
|
||||
# vspace(nlines)
|
||||
# needvspace(nlines)
|
||||
# addword(word, nspaces)
|
||||
class BaseFormatter:
|
||||
#
|
||||
def __init__(self, d, b):
|
||||
# Drawing object used for text measurements
|
||||
self.d = d
|
||||
#
|
||||
# BackEnd object receiving completed paragraphs
|
||||
self.b = b
|
||||
#
|
||||
# Parameters of the formatting model
|
||||
self.leftindent = 0
|
||||
self.just = 'l'
|
||||
self.font = None
|
||||
self.blanklines = 0
|
||||
#
|
||||
# Parameters derived from the current font
|
||||
self.space = d.textwidth(' ')
|
||||
self.line = d.lineheight()
|
||||
self.ascent = d.baseline()
|
||||
self.descent = self.line - self.ascent
|
||||
#
|
||||
# Parameter derived from the default font
|
||||
self.n_space = self.space
|
||||
#
|
||||
# Current paragraph being built
|
||||
self.para = None
|
||||
self.nospace = 1
|
||||
#
|
||||
# Font to set on the next word
|
||||
self.nextfont = None
|
||||
#
|
||||
def newpara(self):
|
||||
return Para.Para()
|
||||
#
|
||||
def setfont(self, font):
|
||||
if font == None: return
|
||||
self.font = self.nextfont = font
|
||||
d = self.d
|
||||
d.setfont(font)
|
||||
self.space = d.textwidth(' ')
|
||||
self.line = d.lineheight()
|
||||
self.ascent = d.baseline()
|
||||
self.descent = self.line - self.ascent
|
||||
#
|
||||
def setleftindent(self, nspaces):
|
||||
self.leftindent = int(self.n_space * nspaces)
|
||||
if self.para:
|
||||
hang = self.leftindent - self.para.indent_left
|
||||
if hang > 0 and self.para.getlength() <= hang:
|
||||
self.para.makehangingtag(hang)
|
||||
self.nospace = 1
|
||||
else:
|
||||
self.flush()
|
||||
#
|
||||
def setrightindent(self, nspaces):
|
||||
self.rightindent = int(self.n_space * nspaces)
|
||||
if self.para:
|
||||
self.para.indent_right = self.rightindent
|
||||
self.flush()
|
||||
#
|
||||
def setjust(self, just):
|
||||
self.just = just
|
||||
if self.para:
|
||||
self.para.just = self.just
|
||||
#
|
||||
def flush(self):
|
||||
if self.para:
|
||||
self.b.addpara(self.para)
|
||||
self.para = None
|
||||
if self.font <> None:
|
||||
self.d.setfont(self.font)
|
||||
self.nospace = 1
|
||||
#
|
||||
def vspace(self, nlines):
|
||||
self.flush()
|
||||
if nlines > 0:
|
||||
self.para = self.newpara()
|
||||
tuple = None, '', 0, 0, 0, int(nlines*self.line), 0
|
||||
self.para.words.append(tuple)
|
||||
self.flush()
|
||||
self.blanklines = self.blanklines + nlines
|
||||
#
|
||||
def needvspace(self, nlines):
|
||||
self.flush() # Just to be sure
|
||||
if nlines > self.blanklines:
|
||||
self.vspace(nlines - self.blanklines)
|
||||
#
|
||||
def addword(self, text, space):
|
||||
if self.nospace and not text:
|
||||
return
|
||||
self.nospace = 0
|
||||
self.blanklines = 0
|
||||
if not self.para:
|
||||
self.para = self.newpara()
|
||||
self.para.indent_left = self.leftindent
|
||||
self.para.just = self.just
|
||||
self.nextfont = self.font
|
||||
space = int(space * self.space)
|
||||
self.para.words.append(self.nextfont, text, \
|
||||
self.d.textwidth(text), space, space, \
|
||||
self.ascent, self.descent)
|
||||
self.nextfont = None
|
||||
#
|
||||
def bgn_anchor(self, id):
|
||||
if not self.para:
|
||||
self.nospace = 0
|
||||
self.addword('', 0)
|
||||
self.para.bgn_anchor(id)
|
||||
#
|
||||
def end_anchor(self, id):
|
||||
if not self.para:
|
||||
self.nospace = 0
|
||||
self.addword('', 0)
|
||||
self.para.end_anchor(id)
|
||||
|
||||
|
||||
# Measuring object for measuring text as viewed on a tty
|
||||
class NullMeasurer:
|
||||
#
|
||||
def __init__(self):
|
||||
pass
|
||||
#
|
||||
def setfont(self, font):
|
||||
pass
|
||||
#
|
||||
def textwidth(self, text):
|
||||
return len(text)
|
||||
#
|
||||
def lineheight(self):
|
||||
return 1
|
||||
#
|
||||
def baseline(self):
|
||||
return 0
|
||||
|
||||
|
||||
# Drawing object for writing plain ASCII text to a file
|
||||
class FileWriter:
|
||||
#
|
||||
def __init__(self, fp):
|
||||
self.fp = fp
|
||||
self.lineno, self.colno = 0, 0
|
||||
#
|
||||
def setfont(self, font):
|
||||
pass
|
||||
#
|
||||
def text(self, (h, v), str):
|
||||
if not str: return
|
||||
if '\n' in str:
|
||||
raise ValueError, 'can\'t write \\n'
|
||||
while self.lineno < v:
|
||||
self.fp.write('\n')
|
||||
self.colno, self.lineno = 0, self.lineno + 1
|
||||
while self.lineno > v:
|
||||
# XXX This should never happen...
|
||||
self.fp.write('\033[A') # ANSI up arrow
|
||||
self.lineno = self.lineno - 1
|
||||
if self.colno < h:
|
||||
self.fp.write(' ' * (h - self.colno))
|
||||
elif self.colno > h:
|
||||
self.fp.write('\b' * (self.colno - h))
|
||||
self.colno = h
|
||||
self.fp.write(str)
|
||||
self.colno = h + len(str)
|
||||
|
||||
|
||||
# Formatting class to do nothing at all with the data
|
||||
class NullFormatter(BaseFormatter):
|
||||
#
|
||||
def __init__(self):
|
||||
d = NullMeasurer()
|
||||
b = NullBackEnd()
|
||||
BaseFormatter.__init__(self, d, b)
|
||||
|
||||
|
||||
# Formatting class to write directly to a file
|
||||
class WritingFormatter(BaseFormatter):
|
||||
#
|
||||
def __init__(self, fp, width):
|
||||
dm = NullMeasurer()
|
||||
dw = FileWriter(fp)
|
||||
b = WritingBackEnd(dw, width)
|
||||
BaseFormatter.__init__(self, dm, b)
|
||||
self.blanklines = 1
|
||||
#
|
||||
# Suppress multiple blank lines
|
||||
def needvspace(self, nlines):
|
||||
BaseFormatter.needvspace(self, min(1, nlines))
|
||||
|
||||
|
||||
# A "FunnyFormatter" writes ASCII text with a twist: *bold words*,
|
||||
# _italic text_ and _underlined words_, and `quoted text'.
|
||||
# It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman,
|
||||
# italic, bold, underline, quote).
|
||||
# Moreover, if the font is in upper case, the text is converted to
|
||||
# UPPER CASE.
|
||||
class FunnyFormatter(WritingFormatter):
|
||||
#
|
||||
def flush(self):
|
||||
if self.para: finalize(self.para)
|
||||
WritingFormatter.flush(self)
|
||||
|
||||
|
||||
# Surrounds *bold words* and _italic text_ in a paragraph with
|
||||
# appropriate markers, fixing the size (assuming these characters'
|
||||
# width is 1).
|
||||
openchar = \
|
||||
{'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'}
|
||||
closechar = \
|
||||
{'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''}
|
||||
def finalize(para):
|
||||
oldfont = curfont = 'r'
|
||||
para.words.append('r', '', 0, 0, 0, 0) # temporary, deleted at end
|
||||
for i in range(len(para.words)):
|
||||
fo, te, wi = para.words[i][:3]
|
||||
if fo <> None: curfont = fo
|
||||
if curfont <> oldfont:
|
||||
if closechar.has_key(oldfont):
|
||||
c = closechar[oldfont]
|
||||
j = i-1
|
||||
while j > 0 and para.words[j][1] == '': j = j-1
|
||||
fo1, te1, wi1 = para.words[j][:3]
|
||||
te1 = te1 + c
|
||||
wi1 = wi1 + len(c)
|
||||
para.words[j] = (fo1, te1, wi1) + \
|
||||
para.words[j][3:]
|
||||
if openchar.has_key(curfont) and te:
|
||||
c = openchar[curfont]
|
||||
te = c + te
|
||||
wi = len(c) + wi
|
||||
para.words[i] = (fo, te, wi) + \
|
||||
para.words[i][3:]
|
||||
if te: oldfont = curfont
|
||||
else: oldfont = 'r'
|
||||
if curfont in string.uppercase:
|
||||
te = string.upper(te)
|
||||
para.words[i] = (fo, te, wi) + para.words[i][3:]
|
||||
del para.words[-1]
|
||||
|
||||
|
||||
# Formatter back-end to draw the text in a window.
|
||||
# This has an option to draw while the paragraphs are being added,
|
||||
# to minimize the delay before the user sees anything.
|
||||
# This manages the entire "document" of the window.
|
||||
class StdwinBackEnd(SavingBackEnd):
|
||||
#
|
||||
def __init__(self, window, drawnow):
|
||||
self.window = window
|
||||
self.drawnow = drawnow
|
||||
self.width = window.getwinsize()[0]
|
||||
self.selection = None
|
||||
self.height = 0
|
||||
window.setorigin(0, 0)
|
||||
window.setdocsize(0, 0)
|
||||
self.d = window.begindrawing()
|
||||
SavingBackEnd.__init__(self)
|
||||
#
|
||||
def finish(self):
|
||||
self.d.close()
|
||||
self.d = None
|
||||
self.window.setdocsize(0, self.height)
|
||||
#
|
||||
def addpara(self, p):
|
||||
self.paralist.append(p)
|
||||
if self.drawnow:
|
||||
self.height = \
|
||||
p.render(self.d, 0, self.height, self.width)
|
||||
else:
|
||||
p.layout(self.width)
|
||||
p.left = 0
|
||||
p.top = self.height
|
||||
p.right = self.width
|
||||
p.bottom = self.height + p.height
|
||||
self.height = p.bottom
|
||||
#
|
||||
def resize(self):
|
||||
self.window.change((0, 0), (self.width, self.height))
|
||||
self.width = self.window.getwinsize()[0]
|
||||
self.height = 0
|
||||
for p in self.paralist:
|
||||
p.layout(self.width)
|
||||
p.left = 0
|
||||
p.top = self.height
|
||||
p.right = self.width
|
||||
p.bottom = self.height + p.height
|
||||
self.height = p.bottom
|
||||
self.window.change((0, 0), (self.width, self.height))
|
||||
self.window.setdocsize(0, self.height)
|
||||
#
|
||||
def redraw(self, area):
|
||||
d = self.window.begindrawing()
|
||||
(left, top), (right, bottom) = area
|
||||
d.erase(area)
|
||||
d.cliprect(area)
|
||||
for p in self.paralist:
|
||||
if top < p.bottom and p.top < bottom:
|
||||
v = p.render(d, p.left, p.top, p.right)
|
||||
if self.selection:
|
||||
self.invert(d, self.selection)
|
||||
d.close()
|
||||
#
|
||||
def setselection(self, new):
|
||||
if new:
|
||||
long1, long2 = new
|
||||
pos1 = long1[:3]
|
||||
pos2 = long2[:3]
|
||||
new = pos1, pos2
|
||||
if new <> self.selection:
|
||||
d = self.window.begindrawing()
|
||||
if self.selection:
|
||||
self.invert(d, self.selection)
|
||||
if new:
|
||||
self.invert(d, new)
|
||||
d.close()
|
||||
self.selection = new
|
||||
#
|
||||
def getselection(self):
|
||||
return self.selection
|
||||
#
|
||||
def extractselection(self):
|
||||
if self.selection:
|
||||
a, b = self.selection
|
||||
return self.extractpart(a, b)
|
||||
else:
|
||||
return None
|
||||
#
|
||||
def invert(self, d, region):
|
||||
long1, long2 = region
|
||||
if long1 > long2: long1, long2 = long2, long1
|
||||
para1, pos1 = long1
|
||||
para2, pos2 = long2
|
||||
while para1 < para2:
|
||||
self.paralist[para1].invert(d, pos1, None)
|
||||
pos1 = None
|
||||
para1 = para1 + 1
|
||||
self.paralist[para2].invert(d, pos1, pos2)
|
||||
#
|
||||
def search(self, prog):
|
||||
import regex, string
|
||||
if type(prog) == type(''):
|
||||
prog = regex.compile(string.lower(prog))
|
||||
if self.selection:
|
||||
iold = self.selection[0][0]
|
||||
else:
|
||||
iold = -1
|
||||
hit = None
|
||||
for i in range(len(self.paralist)):
|
||||
if i == iold or i < iold and hit:
|
||||
continue
|
||||
p = self.paralist[i]
|
||||
text = string.lower(p.extract())
|
||||
if prog.search(text) >= 0:
|
||||
a, b = prog.regs[0]
|
||||
long1 = i, a
|
||||
long2 = i, b
|
||||
hit = long1, long2
|
||||
if i > iold:
|
||||
break
|
||||
if hit:
|
||||
self.setselection(hit)
|
||||
i = hit[0][0]
|
||||
p = self.paralist[i]
|
||||
self.window.show((p.left, p.top), (p.right, p.bottom))
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
#
|
||||
def showanchor(self, id):
|
||||
for i in range(len(self.paralist)):
|
||||
p = self.paralist[i]
|
||||
if p.hasanchor(id):
|
||||
long1 = i, 0
|
||||
long2 = i, len(p.extract())
|
||||
hit = long1, long2
|
||||
self.setselection(hit)
|
||||
self.window.show( \
|
||||
(p.left, p.top), (p.right, p.bottom))
|
||||
break
|
||||
|
||||
|
||||
# GL extensions
|
||||
|
||||
class GLFontCache:
|
||||
#
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
self.setfont('')
|
||||
#
|
||||
def reset(self):
|
||||
self.fontkey = None
|
||||
self.fonthandle = None
|
||||
self.fontinfo = None
|
||||
self.fontcache = {}
|
||||
#
|
||||
def close(self):
|
||||
self.reset()
|
||||
#
|
||||
def setfont(self, fontkey):
|
||||
if fontkey == '':
|
||||
fontkey = 'Times-Roman 12'
|
||||
elif ' ' not in fontkey:
|
||||
fontkey = fontkey + ' 12'
|
||||
if fontkey == self.fontkey:
|
||||
return
|
||||
if self.fontcache.has_key(fontkey):
|
||||
handle = self.fontcache[fontkey]
|
||||
else:
|
||||
import string
|
||||
i = string.index(fontkey, ' ')
|
||||
name, sizestr = fontkey[:i], fontkey[i:]
|
||||
size = eval(sizestr)
|
||||
key1 = name + ' 1'
|
||||
key = name + ' ' + `size`
|
||||
# NB key may differ from fontkey!
|
||||
if self.fontcache.has_key(key):
|
||||
handle = self.fontcache[key]
|
||||
else:
|
||||
if self.fontcache.has_key(key1):
|
||||
handle = self.fontcache[key1]
|
||||
else:
|
||||
import fm
|
||||
handle = fm.findfont(name)
|
||||
self.fontcache[key1] = handle
|
||||
handle = handle.scalefont(size)
|
||||
self.fontcache[fontkey] = \
|
||||
self.fontcache[key] = handle
|
||||
self.fontkey = fontkey
|
||||
if self.fonthandle <> handle:
|
||||
self.fonthandle = handle
|
||||
self.fontinfo = handle.getfontinfo()
|
||||
handle.setfont()
|
||||
|
||||
|
||||
class GLMeasurer(GLFontCache):
|
||||
#
|
||||
def textwidth(self, text):
|
||||
return self.fonthandle.getstrwidth(text)
|
||||
#
|
||||
def baseline(self):
|
||||
return self.fontinfo[6] - self.fontinfo[3]
|
||||
#
|
||||
def lineheight(self):
|
||||
return self.fontinfo[6]
|
||||
|
||||
|
||||
class GLWriter(GLFontCache):
|
||||
#
|
||||
# NOTES:
|
||||
# (1) Use gl.ortho2 to use X pixel coordinates!
|
||||
#
|
||||
def text(self, (h, v), text):
|
||||
import gl, fm
|
||||
gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3])
|
||||
fm.prstr(text)
|
||||
#
|
||||
def setfont(self, fontkey):
|
||||
oldhandle = self.fonthandle
|
||||
GLFontCache.setfont(fontkey)
|
||||
if self.fonthandle <> oldhandle:
|
||||
handle.setfont()
|
||||
|
||||
|
||||
class GLMeasurerWriter(GLMeasurer, GLWriter):
|
||||
pass
|
||||
|
||||
|
||||
class GLBackEnd(SavingBackEnd):
|
||||
#
|
||||
def __init__(self, wid):
|
||||
import gl
|
||||
gl.winset(wid)
|
||||
self.wid = wid
|
||||
self.width = gl.getsize()[1]
|
||||
self.height = 0
|
||||
self.d = GLMeasurerWriter()
|
||||
SavingBackEnd.__init__(self)
|
||||
#
|
||||
def finish(self):
|
||||
pass
|
||||
#
|
||||
def addpara(self, p):
|
||||
self.paralist.append(p)
|
||||
self.height = p.render(self.d, 0, self.height, self.width)
|
||||
#
|
||||
def redraw(self):
|
||||
import gl
|
||||
gl.winset(self.wid)
|
||||
width = gl.getsize()[1]
|
||||
if width <> self.width:
|
||||
setdocsize = 1
|
||||
self.width = width
|
||||
for p in self.paralist:
|
||||
p.top = p.bottom = None
|
||||
d = self.d
|
||||
v = 0
|
||||
for p in self.paralist:
|
||||
v = p.render(d, 0, v, width)
|
|
@ -0,0 +1,635 @@
|
|||
# A parser for HTML documents
|
||||
|
||||
|
||||
# HTML: HyperText Markup Language; an SGML-like syntax used by WWW to
|
||||
# describe hypertext documents
|
||||
#
|
||||
# SGML: Standard Generalized Markup Language
|
||||
#
|
||||
# WWW: World-Wide Web; a distributed hypertext system develped at CERN
|
||||
#
|
||||
# CERN: European Particle Physics Laboratory in Geneva, Switzerland
|
||||
|
||||
|
||||
# This file is only concerned with parsing and formatting HTML
|
||||
# documents, not with the other (hypertext and networking) aspects of
|
||||
# the WWW project. (It does support highlighting of anchors.)
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
import regex
|
||||
import string
|
||||
import sgmllib
|
||||
|
||||
|
||||
class HTMLParser(sgmllib.SGMLParser):
|
||||
|
||||
# Copy base class entities and add some
|
||||
entitydefs = {}
|
||||
for key in sgmllib.SGMLParser.entitydefs.keys():
|
||||
entitydefs[key] = sgmllib.SGMLParser.entitydefs[key]
|
||||
entitydefs['bullet'] = '*'
|
||||
|
||||
# Provided -- handlers for tags introducing literal text
|
||||
|
||||
def start_listing(self, attrs):
|
||||
self.setliteral('listing')
|
||||
self.literal_bgn('listing', attrs)
|
||||
|
||||
def end_listing(self):
|
||||
self.literal_end('listing')
|
||||
|
||||
def start_xmp(self, attrs):
|
||||
self.setliteral('xmp')
|
||||
self.literal_bgn('xmp', attrs)
|
||||
|
||||
def end_xmp(self):
|
||||
self.literal_end('xmp')
|
||||
|
||||
def do_plaintext(self, attrs):
|
||||
self.setnomoretags()
|
||||
self.literal_bgn('plaintext', attrs)
|
||||
|
||||
# To be overridden -- begin/end literal mode
|
||||
def literal_bgn(self, tag, attrs): pass
|
||||
def literal_end(self, tag): pass
|
||||
|
||||
|
||||
# Next level of sophistication -- collect anchors, title, nextid and isindex
|
||||
class CollectingParser(HTMLParser):
|
||||
#
|
||||
def __init__(self):
|
||||
HTMLParser.__init__(self)
|
||||
self.savetext = None
|
||||
self.nextid = ''
|
||||
self.isindex = 0
|
||||
self.title = ''
|
||||
self.inanchor = 0
|
||||
self.anchors = []
|
||||
self.anchornames = []
|
||||
self.anchortypes = []
|
||||
#
|
||||
def start_a(self, attrs):
|
||||
self.inanchor = 0
|
||||
href = ''
|
||||
name = ''
|
||||
type = ''
|
||||
for attrname, value in attrs:
|
||||
if attrname == 'href':
|
||||
href = value
|
||||
if attrname == 'name=':
|
||||
name = value
|
||||
if attrname == 'type=':
|
||||
type = string.lower(value)
|
||||
if not (href or name):
|
||||
return
|
||||
self.anchors.append(href)
|
||||
self.anchornames.append(name)
|
||||
self.anchortypes.append(type)
|
||||
self.inanchor = len(self.anchors)
|
||||
if not href:
|
||||
self.inanchor = -self.inanchor
|
||||
#
|
||||
def end_a(self):
|
||||
if self.inanchor > 0:
|
||||
# Don't show anchors pointing into the current document
|
||||
if self.anchors[self.inanchor-1][:1] <> '#':
|
||||
self.handle_data('[' + `self.inanchor` + ']')
|
||||
self.inanchor = 0
|
||||
#
|
||||
def start_header(self, attrs): pass
|
||||
def end_header(self): pass
|
||||
#
|
||||
# (head is the same as header)
|
||||
def start_head(self, attrs): pass
|
||||
def end_head(self): pass
|
||||
#
|
||||
def start_body(self, attrs): pass
|
||||
def end_body(self): pass
|
||||
#
|
||||
def do_nextid(self, attrs):
|
||||
self.nextid = attrs
|
||||
#
|
||||
def do_isindex(self, attrs):
|
||||
self.isindex = 1
|
||||
#
|
||||
def start_title(self, attrs):
|
||||
self.savetext = ''
|
||||
#
|
||||
def end_title(self):
|
||||
if self.savetext <> None:
|
||||
self.title = self.savetext
|
||||
self.savetext = None
|
||||
#
|
||||
def handle_data(self, text):
|
||||
if self.savetext is not None:
|
||||
self.savetext = self.savetext + text
|
||||
|
||||
|
||||
# Formatting parser -- takes a formatter and a style sheet as arguments
|
||||
|
||||
# XXX The use of style sheets should change: for each tag and end tag
|
||||
# there should be a style definition, and a style definition should
|
||||
# encompass many more parameters: font, justification, indentation,
|
||||
# vspace before, vspace after, hanging tag...
|
||||
|
||||
wordprog = regex.compile('[^ \t\n]*')
|
||||
spaceprog = regex.compile('[ \t\n]*')
|
||||
|
||||
class FormattingParser(CollectingParser):
|
||||
|
||||
def __init__(self, formatter, stylesheet):
|
||||
CollectingParser.__init__(self)
|
||||
self.fmt = formatter
|
||||
self.stl = stylesheet
|
||||
self.savetext = None
|
||||
self.compact = 0
|
||||
self.nofill = 0
|
||||
self.resetfont()
|
||||
self.setindent(self.stl.stdindent)
|
||||
|
||||
def resetfont(self):
|
||||
self.fontstack = []
|
||||
self.stylestack = []
|
||||
self.fontset = self.stl.stdfontset
|
||||
self.style = ROMAN
|
||||
self.passfont()
|
||||
|
||||
def passfont(self):
|
||||
font = self.fontset[self.style]
|
||||
self.fmt.setfont(font)
|
||||
|
||||
def pushstyle(self, style):
|
||||
self.stylestack.append(self.style)
|
||||
self.style = min(style, len(self.fontset)-1)
|
||||
self.passfont()
|
||||
|
||||
def popstyle(self):
|
||||
self.style = self.stylestack[-1]
|
||||
del self.stylestack[-1]
|
||||
self.passfont()
|
||||
|
||||
def pushfontset(self, fontset, style):
|
||||
self.fontstack.append(self.fontset)
|
||||
self.fontset = fontset
|
||||
self.pushstyle(style)
|
||||
|
||||
def popfontset(self):
|
||||
self.fontset = self.fontstack[-1]
|
||||
del self.fontstack[-1]
|
||||
self.popstyle()
|
||||
|
||||
def flush(self):
|
||||
self.fmt.flush()
|
||||
|
||||
def setindent(self, n):
|
||||
self.fmt.setleftindent(n)
|
||||
|
||||
def needvspace(self, n):
|
||||
self.fmt.needvspace(n)
|
||||
|
||||
def close(self):
|
||||
HTMLParser.close(self)
|
||||
self.fmt.flush()
|
||||
|
||||
def handle_literal(self, text):
|
||||
lines = string.splitfields(text, '\n')
|
||||
for i in range(1, len(lines)):
|
||||
lines[i] = string.expandtabs(lines[i], 8)
|
||||
for line in lines[:-1]:
|
||||
self.fmt.addword(line, 0)
|
||||
self.fmt.flush()
|
||||
self.fmt.nospace = 0
|
||||
for line in lines[-1:]:
|
||||
self.fmt.addword(line, 0)
|
||||
|
||||
def handle_data(self, text):
|
||||
if self.savetext is not None:
|
||||
self.savetext = self.savetext + text
|
||||
return
|
||||
if self.literal:
|
||||
self.handle_literal(text)
|
||||
return
|
||||
i = 0
|
||||
n = len(text)
|
||||
while i < n:
|
||||
j = i + wordprog.match(text, i)
|
||||
word = text[i:j]
|
||||
i = j + spaceprog.match(text, j)
|
||||
self.fmt.addword(word, i-j)
|
||||
if self.nofill and '\n' in text[j:i]:
|
||||
self.fmt.flush()
|
||||
self.fmt.nospace = 0
|
||||
i = j+1
|
||||
while text[i-1] <> '\n': i = i+1
|
||||
|
||||
def literal_bgn(self, tag, attrs):
|
||||
if tag == 'plaintext':
|
||||
self.flush()
|
||||
else:
|
||||
self.needvspace(1)
|
||||
self.pushfontset(self.stl.stdfontset, FIXED)
|
||||
self.setindent(self.stl.literalindent)
|
||||
|
||||
def literal_end(self, tag):
|
||||
self.needvspace(1)
|
||||
self.popfontset()
|
||||
self.setindent(self.stl.stdindent)
|
||||
|
||||
def start_title(self, attrs):
|
||||
self.flush()
|
||||
self.savetext = ''
|
||||
# NB end_title is unchanged
|
||||
|
||||
def do_p(self, attrs):
|
||||
if self.compact:
|
||||
self.flush()
|
||||
else:
|
||||
self.needvspace(1)
|
||||
|
||||
def start_h1(self, attrs):
|
||||
self.needvspace(2)
|
||||
self.setindent(self.stl.h1indent)
|
||||
self.pushfontset(self.stl.h1fontset, BOLD)
|
||||
self.fmt.setjust('c')
|
||||
|
||||
def end_h1(self):
|
||||
self.popfontset()
|
||||
self.needvspace(2)
|
||||
self.setindent(self.stl.stdindent)
|
||||
self.fmt.setjust('l')
|
||||
|
||||
def start_h2(self, attrs):
|
||||
self.needvspace(1)
|
||||
self.setindent(self.stl.h2indent)
|
||||
self.pushfontset(self.stl.h2fontset, BOLD)
|
||||
|
||||
def end_h2(self):
|
||||
self.popfontset()
|
||||
self.needvspace(1)
|
||||
self.setindent(self.stl.stdindent)
|
||||
|
||||
def start_h3(self, attrs):
|
||||
self.needvspace(1)
|
||||
self.setindent(self.stl.stdindent)
|
||||
self.pushfontset(self.stl.h3fontset, BOLD)
|
||||
|
||||
def end_h3(self):
|
||||
self.popfontset()
|
||||
self.needvspace(1)
|
||||
self.setindent(self.stl.stdindent)
|
||||
|
||||
def start_h4(self, attrs):
|
||||
self.needvspace(1)
|
||||
self.setindent(self.stl.stdindent)
|
||||
self.pushfontset(self.stl.stdfontset, BOLD)
|
||||
|
||||
def end_h4(self):
|
||||
self.popfontset()
|
||||
self.needvspace(1)
|
||||
self.setindent(self.stl.stdindent)
|
||||
|
||||
start_h5 = start_h4
|
||||
end_h5 = end_h4
|
||||
|
||||
start_h6 = start_h5
|
||||
end_h6 = end_h5
|
||||
|
||||
start_h7 = start_h6
|
||||
end_h7 = end_h6
|
||||
|
||||
def start_ul(self, attrs):
|
||||
self.needvspace(1)
|
||||
for attrname, value in attrs:
|
||||
if attrname == 'compact':
|
||||
self.compact = 1
|
||||
self.setindent(0)
|
||||
break
|
||||
else:
|
||||
self.setindent(self.stl.ulindent)
|
||||
|
||||
start_dir = start_menu = start_ol = start_ul
|
||||
|
||||
do_li = do_p
|
||||
|
||||
def end_ul(self):
|
||||
self.compact = 0
|
||||
self.needvspace(1)
|
||||
self.setindent(self.stl.stdindent)
|
||||
|
||||
end_dir = end_menu = end_ol = end_ul
|
||||
|
||||
def start_dl(self, attrs):
|
||||
for attrname, value in attrs:
|
||||
if attrname == 'compact':
|
||||
self.compact = 1
|
||||
self.needvspace(1)
|
||||
|
||||
def end_dl(self):
|
||||
self.compact = 0
|
||||
self.needvspace(1)
|
||||
self.setindent(self.stl.stdindent)
|
||||
|
||||
def do_dt(self, attrs):
|
||||
if self.compact:
|
||||
self.flush()
|
||||
else:
|
||||
self.needvspace(1)
|
||||
self.setindent(self.stl.stdindent)
|
||||
|
||||
def do_dd(self, attrs):
|
||||
self.fmt.addword('', 1)
|
||||
self.setindent(self.stl.ddindent)
|
||||
|
||||
def start_address(self, attrs):
|
||||
self.compact = 1
|
||||
self.needvspace(1)
|
||||
self.fmt.setjust('r')
|
||||
|
||||
def end_address(self):
|
||||
self.compact = 0
|
||||
self.needvspace(1)
|
||||
self.setindent(self.stl.stdindent)
|
||||
self.fmt.setjust('l')
|
||||
|
||||
def start_pre(self, attrs):
|
||||
self.needvspace(1)
|
||||
self.nofill = self.nofill + 1
|
||||
self.pushstyle(FIXED)
|
||||
|
||||
def end_pre(self):
|
||||
self.popstyle()
|
||||
self.nofill = self.nofill - 1
|
||||
self.needvspace(1)
|
||||
|
||||
start_typewriter = start_pre
|
||||
end_typewriter = end_pre
|
||||
|
||||
def do_img(self, attrs):
|
||||
self.fmt.addword('(image)', 0)
|
||||
|
||||
# Physical styles
|
||||
|
||||
def start_tt(self, attrs): self.pushstyle(FIXED)
|
||||
def end_tt(self): self.popstyle()
|
||||
|
||||
def start_b(self, attrs): self.pushstyle(BOLD)
|
||||
def end_b(self): self.popstyle()
|
||||
|
||||
def start_i(self, attrs): self.pushstyle(ITALIC)
|
||||
def end_i(self): self.popstyle()
|
||||
|
||||
def start_u(self, attrs): self.pushstyle(ITALIC) # Underline???
|
||||
def end_u(self): self.popstyle()
|
||||
|
||||
def start_r(self, attrs): self.pushstyle(ROMAN) # Not official
|
||||
def end_r(self): self.popstyle()
|
||||
|
||||
# Logical styles
|
||||
|
||||
start_em = start_i
|
||||
end_em = end_i
|
||||
|
||||
start_strong = start_b
|
||||
end_strong = end_b
|
||||
|
||||
start_code = start_tt
|
||||
end_code = end_tt
|
||||
|
||||
start_samp = start_tt
|
||||
end_samp = end_tt
|
||||
|
||||
start_kbd = start_tt
|
||||
end_kbd = end_tt
|
||||
|
||||
start_file = start_tt # unofficial
|
||||
end_file = end_tt
|
||||
|
||||
start_var = start_i
|
||||
end_var = end_i
|
||||
|
||||
start_dfn = start_i
|
||||
end_dfn = end_i
|
||||
|
||||
start_cite = start_i
|
||||
end_cite = end_i
|
||||
|
||||
start_hp1 = start_i
|
||||
end_hp1 = start_i
|
||||
|
||||
start_hp2 = start_b
|
||||
end_hp2 = end_b
|
||||
|
||||
def unknown_starttag(self, tag, attrs):
|
||||
print '*** unknown <' + tag + '>'
|
||||
|
||||
def unknown_endtag(self, tag):
|
||||
print '*** unknown </' + tag + '>'
|
||||
|
||||
|
||||
# An extension of the formatting parser which formats anchors differently.
|
||||
class AnchoringParser(FormattingParser):
|
||||
|
||||
def start_a(self, attrs):
|
||||
FormattingParser.start_a(self, attrs)
|
||||
if self.inanchor:
|
||||
self.fmt.bgn_anchor(self.inanchor)
|
||||
|
||||
def end_a(self):
|
||||
if self.inanchor:
|
||||
self.fmt.end_anchor(self.inanchor)
|
||||
self.inanchor = 0
|
||||
|
||||
|
||||
# Style sheet -- this is never instantiated, but the attributes
|
||||
# of the class object itself are used to specify fonts to be used
|
||||
# for various paragraph styles.
|
||||
# A font set is a non-empty list of fonts, in the order:
|
||||
# [roman, italic, bold, fixed].
|
||||
# When a style is not available the nearest lower style is used
|
||||
|
||||
ROMAN = 0
|
||||
ITALIC = 1
|
||||
BOLD = 2
|
||||
FIXED = 3
|
||||
|
||||
class NullStylesheet:
|
||||
# Fonts -- none
|
||||
stdfontset = [None]
|
||||
h1fontset = [None]
|
||||
h2fontset = [None]
|
||||
h3fontset = [None]
|
||||
# Indents
|
||||
stdindent = 2
|
||||
ddindent = 25
|
||||
ulindent = 4
|
||||
h1indent = 0
|
||||
h2indent = 0
|
||||
literalindent = 0
|
||||
|
||||
|
||||
class X11Stylesheet(NullStylesheet):
|
||||
stdfontset = [ \
|
||||
'-*-helvetica-medium-r-normal-*-*-100-100-*-*-*-*-*', \
|
||||
'-*-helvetica-medium-o-normal-*-*-100-100-*-*-*-*-*', \
|
||||
'-*-helvetica-bold-r-normal-*-*-100-100-*-*-*-*-*', \
|
||||
'-*-courier-medium-r-normal-*-*-100-100-*-*-*-*-*', \
|
||||
]
|
||||
h1fontset = [ \
|
||||
'-*-helvetica-medium-r-normal-*-*-180-100-*-*-*-*-*', \
|
||||
'-*-helvetica-medium-o-normal-*-*-180-100-*-*-*-*-*', \
|
||||
'-*-helvetica-bold-r-normal-*-*-180-100-*-*-*-*-*', \
|
||||
]
|
||||
h2fontset = [ \
|
||||
'-*-helvetica-medium-r-normal-*-*-140-100-*-*-*-*-*', \
|
||||
'-*-helvetica-medium-o-normal-*-*-140-100-*-*-*-*-*', \
|
||||
'-*-helvetica-bold-r-normal-*-*-140-100-*-*-*-*-*', \
|
||||
]
|
||||
h3fontset = [ \
|
||||
'-*-helvetica-medium-r-normal-*-*-120-100-*-*-*-*-*', \
|
||||
'-*-helvetica-medium-o-normal-*-*-120-100-*-*-*-*-*', \
|
||||
'-*-helvetica-bold-r-normal-*-*-120-100-*-*-*-*-*', \
|
||||
]
|
||||
ddindent = 40
|
||||
|
||||
|
||||
class MacStylesheet(NullStylesheet):
|
||||
stdfontset = [ \
|
||||
('Geneva', 'p', 10), \
|
||||
('Geneva', 'i', 10), \
|
||||
('Geneva', 'b', 10), \
|
||||
('Monaco', 'p', 10), \
|
||||
]
|
||||
h1fontset = [ \
|
||||
('Geneva', 'p', 18), \
|
||||
('Geneva', 'i', 18), \
|
||||
('Geneva', 'b', 18), \
|
||||
('Monaco', 'p', 18), \
|
||||
]
|
||||
h3fontset = [ \
|
||||
('Geneva', 'p', 14), \
|
||||
('Geneva', 'i', 14), \
|
||||
('Geneva', 'b', 14), \
|
||||
('Monaco', 'p', 14), \
|
||||
]
|
||||
h3fontset = [ \
|
||||
('Geneva', 'p', 12), \
|
||||
('Geneva', 'i', 12), \
|
||||
('Geneva', 'b', 12), \
|
||||
('Monaco', 'p', 12), \
|
||||
]
|
||||
|
||||
|
||||
if os.name == 'mac':
|
||||
StdwinStylesheet = MacStylesheet
|
||||
else:
|
||||
StdwinStylesheet = X11Stylesheet
|
||||
|
||||
|
||||
class GLStylesheet(NullStylesheet):
|
||||
stdfontset = [ \
|
||||
'Helvetica 10', \
|
||||
'Helvetica-Italic 10', \
|
||||
'Helvetica-Bold 10', \
|
||||
'Courier 10', \
|
||||
]
|
||||
h1fontset = [ \
|
||||
'Helvetica 18', \
|
||||
'Helvetica-Italic 18', \
|
||||
'Helvetica-Bold 18', \
|
||||
'Courier 18', \
|
||||
]
|
||||
h2fontset = [ \
|
||||
'Helvetica 14', \
|
||||
'Helvetica-Italic 14', \
|
||||
'Helvetica-Bold 14', \
|
||||
'Courier 14', \
|
||||
]
|
||||
h3fontset = [ \
|
||||
'Helvetica 12', \
|
||||
'Helvetica-Italic 12', \
|
||||
'Helvetica-Bold 12', \
|
||||
'Courier 12', \
|
||||
]
|
||||
|
||||
|
||||
# Test program -- produces no output but times how long it takes
|
||||
# to send a document to a null formatter, exclusive of I/O
|
||||
|
||||
def test():
|
||||
import fmt
|
||||
import time
|
||||
if sys.argv[1:]: file = sys.argv[1]
|
||||
else: file = 'test.html'
|
||||
data = open(file, 'r').read()
|
||||
t0 = time.time()
|
||||
fmtr = fmt.WritingFormatter(sys.stdout, 79)
|
||||
p = FormattingParser(fmtr, NullStylesheet)
|
||||
p.feed(data)
|
||||
p.close()
|
||||
t1 = time.time()
|
||||
print
|
||||
print '*** Formatting time:', round(t1-t0, 3), 'seconds.'
|
||||
|
||||
|
||||
# Test program using stdwin
|
||||
|
||||
def testStdwin():
|
||||
import stdwin, fmt
|
||||
from stdwinevents import *
|
||||
if sys.argv[1:]: file = sys.argv[1]
|
||||
else: file = 'test.html'
|
||||
data = open(file, 'r').read()
|
||||
window = stdwin.open('testStdwin')
|
||||
b = None
|
||||
while 1:
|
||||
etype, ewin, edetail = stdwin.getevent()
|
||||
if etype == WE_CLOSE:
|
||||
break
|
||||
if etype == WE_SIZE:
|
||||
window.setdocsize(0, 0)
|
||||
window.setorigin(0, 0)
|
||||
window.change((0, 0), (10000, 30000)) # XXX
|
||||
if etype == WE_DRAW:
|
||||
if not b:
|
||||
b = fmt.StdwinBackEnd(window, 1)
|
||||
f = fmt.BaseFormatter(b.d, b)
|
||||
p = FormattingParser(f, \
|
||||
MacStylesheet)
|
||||
p.feed(data)
|
||||
p.close()
|
||||
b.finish()
|
||||
else:
|
||||
b.redraw(edetail)
|
||||
window.close()
|
||||
|
||||
|
||||
# Test program using GL
|
||||
|
||||
def testGL():
|
||||
import gl, GL, fmt
|
||||
if sys.argv[1:]: file = sys.argv[1]
|
||||
else: file = 'test.html'
|
||||
data = open(file, 'r').read()
|
||||
W, H = 600, 600
|
||||
gl.foreground()
|
||||
gl.prefsize(W, H)
|
||||
wid = gl.winopen('testGL')
|
||||
gl.ortho2(0, W, H, 0)
|
||||
gl.color(GL.WHITE)
|
||||
gl.clear()
|
||||
gl.color(GL.BLACK)
|
||||
b = fmt.GLBackEnd(wid)
|
||||
f = fmt.BaseFormatter(b.d, b)
|
||||
p = FormattingParser(f, GLStylesheet)
|
||||
p.feed(data)
|
||||
p.close()
|
||||
b.finish()
|
||||
#
|
||||
import time
|
||||
time.sleep(5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
|
@ -0,0 +1,408 @@
|
|||
# Text formatting abstractions
|
||||
|
||||
|
||||
# Oft-used type object
|
||||
Int = type(0)
|
||||
|
||||
|
||||
# Represent a paragraph. This is a list of words with associated
|
||||
# font and size information, plus indents and justification for the
|
||||
# entire paragraph.
|
||||
# Once the words have been added to a paragraph, it can be laid out
|
||||
# for different line widths. Once laid out, it can be rendered at
|
||||
# different screen locations. Once rendered, it can be queried
|
||||
# for mouse hits, and parts of the text can be highlighted
|
||||
class Para:
|
||||
#
|
||||
def __init__(self):
|
||||
self.words = [] # The words
|
||||
self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c'
|
||||
self.indent_left = self.indent_right = self.indent_hang = 0
|
||||
# Final lay-out parameters, may change
|
||||
self.left = self.top = self.right = self.bottom = \
|
||||
self.width = self.height = self.lines = None
|
||||
#
|
||||
# Add a word, computing size information for it.
|
||||
# Words may also be added manually by appending to self.words
|
||||
# Each word should be a 7-tuple:
|
||||
# (font, text, width, space, stretch, ascent, descent)
|
||||
def addword(self, d, font, text, space, stretch):
|
||||
if font <> None:
|
||||
d.setfont(font)
|
||||
width = d.textwidth(text)
|
||||
ascent = d.baseline()
|
||||
descent = d.lineheight() - ascent
|
||||
spw = d.textwidth(' ')
|
||||
space = space * spw
|
||||
stretch = stretch * spw
|
||||
tuple = (font, text, width, space, stretch, ascent, descent)
|
||||
self.words.append(tuple)
|
||||
#
|
||||
# Hooks to begin and end anchors -- insert numbers in the word list!
|
||||
def bgn_anchor(self, id):
|
||||
self.words.append(id)
|
||||
#
|
||||
def end_anchor(self, id):
|
||||
self.words.append(0)
|
||||
#
|
||||
# Return the total length (width) of the text added so far, in pixels
|
||||
def getlength(self):
|
||||
total = 0
|
||||
for word in self.words:
|
||||
if type(word) <> Int:
|
||||
total = total + word[2] + word[3]
|
||||
return total
|
||||
#
|
||||
# Tab to a given position (relative to the current left indent):
|
||||
# remove all stretch, add fixed space up to the new indent.
|
||||
# If the current position is already beying the tab stop,
|
||||
# don't add any new space (but still remove the stretch)
|
||||
def tabto(self, tab):
|
||||
total = 0
|
||||
as, de = 1, 0
|
||||
for i in range(len(self.words)):
|
||||
word = self.words[i]
|
||||
if type(word) == Int: continue
|
||||
fo, te, wi, sp, st, as, de = word
|
||||
self.words[i] = fo, te, wi, sp, 0, as, de
|
||||
total = total + wi + sp
|
||||
if total < tab:
|
||||
self.words.append(None, '', 0, tab-total, 0, as, de)
|
||||
#
|
||||
# Make a hanging tag: tab to hang, increment indent_left by hang,
|
||||
# and reset indent_hang to -hang
|
||||
def makehangingtag(self, hang):
|
||||
self.tabto(hang)
|
||||
self.indent_left = self.indent_left + hang
|
||||
self.indent_hang = -hang
|
||||
#
|
||||
# Decide where the line breaks will be given some screen width
|
||||
def layout(self, linewidth):
|
||||
self.width = linewidth
|
||||
height = 0
|
||||
self.lines = lines = []
|
||||
avail1 = self.width - self.indent_left - self.indent_right
|
||||
avail = avail1 - self.indent_hang
|
||||
words = self.words
|
||||
i = 0
|
||||
n = len(words)
|
||||
lastfont = None
|
||||
while i < n:
|
||||
firstfont = lastfont
|
||||
charcount = 0
|
||||
width = 0
|
||||
stretch = 0
|
||||
ascent = 0
|
||||
descent = 0
|
||||
lsp = 0
|
||||
j = i
|
||||
while i < n:
|
||||
word = words[i]
|
||||
if type(word) == Int:
|
||||
if word > 0 and width >= avail:
|
||||
break
|
||||
i = i+1
|
||||
continue
|
||||
fo, te, wi, sp, st, as, de = word
|
||||
if width + wi > avail and width > 0 and wi > 0:
|
||||
break
|
||||
if fo <> None:
|
||||
lastfont = fo
|
||||
if width == 0:
|
||||
firstfont = fo
|
||||
charcount = charcount + len(te) + (sp > 0)
|
||||
width = width + wi + sp
|
||||
lsp = sp
|
||||
stretch = stretch + st
|
||||
lst = st
|
||||
ascent = max(ascent, as)
|
||||
descent = max(descent, de)
|
||||
i = i+1
|
||||
while i > j and type(words[i-1]) == Int and \
|
||||
words[i-1] > 0: i = i-1
|
||||
width = width - lsp
|
||||
if i < n:
|
||||
stretch = stretch - lst
|
||||
else:
|
||||
stretch = 0
|
||||
tuple = i-j, firstfont, charcount, width, stretch, \
|
||||
ascent, descent
|
||||
lines.append(tuple)
|
||||
height = height + ascent + descent
|
||||
avail = avail1
|
||||
self.height = height
|
||||
#
|
||||
# Call a function for all words in a line
|
||||
def visit(self, wordfunc, anchorfunc):
|
||||
avail1 = self.width - self.indent_left - self.indent_right
|
||||
avail = avail1 - self.indent_hang
|
||||
v = self.top
|
||||
i = 0
|
||||
for tuple in self.lines:
|
||||
wordcount, firstfont, charcount, width, stretch, \
|
||||
ascent, descent = tuple
|
||||
h = self.left + self.indent_left
|
||||
if i == 0: h = h + self.indent_hang
|
||||
extra = 0
|
||||
if self.just == 'r': h = h + avail - width
|
||||
elif self.just == 'c': h = h + (avail - width) / 2
|
||||
elif self.just == 'lr' and stretch > 0:
|
||||
extra = avail - width
|
||||
v2 = v + ascent + descent
|
||||
for j in range(i, i+wordcount):
|
||||
word = self.words[j]
|
||||
if type(word) == Int:
|
||||
ok = anchorfunc(self, tuple, word, \
|
||||
h, v)
|
||||
if ok <> None: return ok
|
||||
continue
|
||||
fo, te, wi, sp, st, as, de = word
|
||||
if extra > 0 and stretch > 0:
|
||||
ex = extra * st / stretch
|
||||
extra = extra - ex
|
||||
stretch = stretch - st
|
||||
else:
|
||||
ex = 0
|
||||
h2 = h + wi + sp + ex
|
||||
ok = wordfunc(self, tuple, word, h, v, \
|
||||
h2, v2, (j==i), (j==i+wordcount-1))
|
||||
if ok <> None: return ok
|
||||
h = h2
|
||||
v = v2
|
||||
i = i + wordcount
|
||||
avail = avail1
|
||||
#
|
||||
# Render a paragraph in "drawing object" d, using the rectangle
|
||||
# given by (left, top, right) with an unspecified bottom.
|
||||
# Return the computed bottom of the text.
|
||||
def render(self, d, left, top, right):
|
||||
if self.width <> right-left:
|
||||
self.layout(right-left)
|
||||
self.left = left
|
||||
self.top = top
|
||||
self.right = right
|
||||
self.bottom = self.top + self.height
|
||||
self.anchorid = 0
|
||||
try:
|
||||
self.d = d
|
||||
self.visit(self.__class__._renderword, \
|
||||
self.__class__._renderanchor)
|
||||
finally:
|
||||
self.d = None
|
||||
return self.bottom
|
||||
#
|
||||
def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast):
|
||||
if word[0] <> None: self.d.setfont(word[0])
|
||||
baseline = v + tuple[5]
|
||||
self.d.text((h, baseline - word[5]), word[1])
|
||||
if self.anchorid > 0:
|
||||
self.d.line((h, baseline+2), (h2, baseline+2))
|
||||
#
|
||||
def _renderanchor(self, tuple, word, h, v):
|
||||
self.anchorid = word
|
||||
#
|
||||
# Return which anchor(s) was hit by the mouse
|
||||
def hitcheck(self, mouseh, mousev):
|
||||
self.mouseh = mouseh
|
||||
self.mousev = mousev
|
||||
self.anchorid = 0
|
||||
self.hits = []
|
||||
self.visit(self.__class__._hitcheckword, \
|
||||
self.__class__._hitcheckanchor)
|
||||
return self.hits
|
||||
#
|
||||
def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast):
|
||||
if self.anchorid > 0 and h <= self.mouseh <= h2 and \
|
||||
v <= self.mousev <= v2:
|
||||
self.hits.append(self.anchorid)
|
||||
#
|
||||
def _hitcheckanchor(self, tuple, word, h, v):
|
||||
self.anchorid = word
|
||||
#
|
||||
# Return whether the given anchor id is present
|
||||
def hasanchor(self, id):
|
||||
return id in self.words or -id in self.words
|
||||
#
|
||||
# Extract the raw text from the word list, substituting one space
|
||||
# for non-empty inter-word space, and terminating with '\n'
|
||||
def extract(self):
|
||||
text = ''
|
||||
for w in self.words:
|
||||
if type(w) <> Int:
|
||||
word = w[1]
|
||||
if w[3]: word = word + ' '
|
||||
text = text + word
|
||||
return text + '\n'
|
||||
#
|
||||
# Return which character position was hit by the mouse, as
|
||||
# an offset in the entire text as returned by extract().
|
||||
# Return None if the mouse was not in this paragraph
|
||||
def whereis(self, d, mouseh, mousev):
|
||||
if mousev < self.top or mousev > self.bottom:
|
||||
return None
|
||||
self.mouseh = mouseh
|
||||
self.mousev = mousev
|
||||
self.lastfont = None
|
||||
self.charcount = 0
|
||||
try:
|
||||
self.d = d
|
||||
return self.visit(self.__class__._whereisword, \
|
||||
self.__class__._whereisanchor)
|
||||
finally:
|
||||
self.d = None
|
||||
#
|
||||
def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
|
||||
fo, te, wi, sp, st, as, de = word
|
||||
if fo <> None: self.lastfont = fo
|
||||
h = h1
|
||||
if isfirst: h1 = 0
|
||||
if islast: h2 = 999999
|
||||
if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2):
|
||||
self.charcount = self.charcount + len(te) + (sp > 0)
|
||||
return
|
||||
if self.lastfont <> None:
|
||||
self.d.setfont(self.lastfont)
|
||||
cc = 0
|
||||
for c in te:
|
||||
cw = self.d.textwidth(c)
|
||||
if self.mouseh <= h + cw/2:
|
||||
return self.charcount + cc
|
||||
cc = cc+1
|
||||
h = h+cw
|
||||
self.charcount = self.charcount + cc
|
||||
if self.mouseh <= (h+h2) / 2:
|
||||
return self.charcount
|
||||
else:
|
||||
return self.charcount + 1
|
||||
#
|
||||
def _whereisanchor(self, tuple, word, h, v):
|
||||
pass
|
||||
#
|
||||
# Return screen position corresponding to position in paragraph.
|
||||
# Return tuple (h, vtop, vbaseline, vbottom).
|
||||
# This is more or less the inverse of whereis()
|
||||
def screenpos(self, d, pos):
|
||||
if pos < 0:
|
||||
ascent, descent = self.lines[0][5:7]
|
||||
return self.left, self.top, self.top + ascent, \
|
||||
self.top + ascent + descent
|
||||
self.pos = pos
|
||||
self.lastfont = None
|
||||
try:
|
||||
self.d = d
|
||||
ok = self.visit(self.__class__._screenposword, \
|
||||
self.__class__._screenposanchor)
|
||||
finally:
|
||||
self.d = None
|
||||
if ok == None:
|
||||
ascent, descent = self.lines[-1][5:7]
|
||||
ok = self.right, self.bottom - ascent - descent, \
|
||||
self.bottom - descent, self.bottom
|
||||
return ok
|
||||
#
|
||||
def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
|
||||
fo, te, wi, sp, st, as, de = word
|
||||
if fo <> None: self.lastfont = fo
|
||||
cc = len(te) + (sp > 0)
|
||||
if self.pos > cc:
|
||||
self.pos = self.pos - cc
|
||||
return
|
||||
if self.pos < cc:
|
||||
self.d.setfont(self.lastfont)
|
||||
h = h1 + self.d.textwidth(te[:self.pos])
|
||||
else:
|
||||
h = h2
|
||||
ascent, descent = tuple[5:7]
|
||||
return h, v1, v1+ascent, v2
|
||||
#
|
||||
def _screenposanchor(self, tuple, word, h, v):
|
||||
pass
|
||||
#
|
||||
# Invert the stretch of text between pos1 and pos2.
|
||||
# If pos1 is None, the beginning is implied;
|
||||
# if pos2 is None, the end is implied.
|
||||
# Undoes its own effect when called again with the same arguments
|
||||
def invert(self, d, pos1, pos2):
|
||||
if pos1 == None:
|
||||
pos1 = self.left, self.top, self.top, self.top
|
||||
else:
|
||||
pos1 = self.screenpos(d, pos1)
|
||||
if pos2 == None:
|
||||
pos2 = self.right, self.bottom,self.bottom,self.bottom
|
||||
else:
|
||||
pos2 = self.screenpos(d, pos2)
|
||||
h1, top1, baseline1, bottom1 = pos1
|
||||
h2, top2, baseline2, bottom2 = pos2
|
||||
if bottom1 <= top2:
|
||||
d.invert((h1, top1), (self.right, bottom1))
|
||||
h1 = self.left
|
||||
if bottom1 < top2:
|
||||
d.invert((h1, bottom1), (self.right, top2))
|
||||
top1, bottom1 = top2, bottom2
|
||||
d.invert((h1, top1), (h2, bottom2))
|
||||
|
||||
|
||||
# Test class Para
|
||||
# XXX This was last used on the Mac, hence the weird fonts...
|
||||
def test():
|
||||
import stdwin
|
||||
from stdwinevents import *
|
||||
words = 'The', 'quick', 'brown', 'fox', 'jumps', 'over', \
|
||||
'the', 'lazy', 'dog.'
|
||||
paralist = []
|
||||
for just in 'l', 'r', 'lr', 'c':
|
||||
p = Para()
|
||||
p.just = just
|
||||
p.addword(stdwin, ('New York', 'p', 12), words[0], 1, 1)
|
||||
for word in words[1:-1]:
|
||||
p.addword(stdwin, None, word, 1, 1)
|
||||
p.addword(stdwin, None, words[-1], 2, 4)
|
||||
p.addword(stdwin, ('New York', 'b', 18), 'Bye!', 0, 0)
|
||||
p.addword(stdwin, ('New York', 'p', 10), 'Bye!', 0, 0)
|
||||
paralist.append(p)
|
||||
window = stdwin.open('Para.test()')
|
||||
start = stop = selpara = None
|
||||
while 1:
|
||||
etype, win, detail = stdwin.getevent()
|
||||
if etype == WE_CLOSE:
|
||||
break
|
||||
if etype == WE_SIZE:
|
||||
window.change((0, 0), (1000, 1000))
|
||||
if etype == WE_DRAW:
|
||||
width, height = window.getwinsize()
|
||||
d = None
|
||||
try:
|
||||
d = window.begindrawing()
|
||||
d.cliprect(detail)
|
||||
d.erase(detail)
|
||||
v = 0
|
||||
for p in paralist:
|
||||
v = p.render(d, 0, v, width)
|
||||
if p == selpara and \
|
||||
start <> None and stop <> None:
|
||||
p.invert(d, start, stop)
|
||||
finally:
|
||||
if d: d.close()
|
||||
if etype == WE_MOUSE_DOWN:
|
||||
if selpara and start <> None and stop <> None:
|
||||
d = window.begindrawing()
|
||||
selpara.invert(d, start, stop)
|
||||
d.close()
|
||||
start = stop = selpara = None
|
||||
mouseh, mousev = detail[0]
|
||||
for p in paralist:
|
||||
start = p.whereis(stdwin, mouseh, mousev)
|
||||
if start <> None:
|
||||
selpara = p
|
||||
break
|
||||
if etype == WE_MOUSE_UP and start <> None and selpara:
|
||||
mouseh, mousev = detail[0]
|
||||
stop = selpara.whereis(stdwin, mouseh, mousev)
|
||||
if stop == None: start = selpara = None
|
||||
else:
|
||||
if start > stop:
|
||||
start, stop = stop, start
|
||||
d = window.begindrawing()
|
||||
selpara.invert(d, start, stop)
|
||||
d.close()
|
||||
window.close()
|
|
@ -0,0 +1,621 @@
|
|||
# Text formatting abstractions
|
||||
|
||||
|
||||
import string
|
||||
import Para
|
||||
|
||||
|
||||
# A formatter back-end object has one method that is called by the formatter:
|
||||
# addpara(p), where p is a paragraph object. For example:
|
||||
|
||||
|
||||
# Formatter back-end to do nothing at all with the paragraphs
|
||||
class NullBackEnd:
|
||||
#
|
||||
def __init__(self):
|
||||
pass
|
||||
#
|
||||
def addpara(self, p):
|
||||
pass
|
||||
#
|
||||
def bgn_anchor(self, id):
|
||||
pass
|
||||
#
|
||||
def end_anchor(self, id):
|
||||
pass
|
||||
|
||||
|
||||
# Formatter back-end to collect the paragraphs in a list
|
||||
class SavingBackEnd(NullBackEnd):
|
||||
#
|
||||
def __init__(self):
|
||||
self.paralist = []
|
||||
#
|
||||
def addpara(self, p):
|
||||
self.paralist.append(p)
|
||||
#
|
||||
def hitcheck(self, h, v):
|
||||
hits = []
|
||||
for p in self.paralist:
|
||||
if p.top <= v <= p.bottom:
|
||||
for id in p.hitcheck(h, v):
|
||||
if id not in hits:
|
||||
hits.append(id)
|
||||
return hits
|
||||
#
|
||||
def extract(self):
|
||||
text = ''
|
||||
for p in self.paralist:
|
||||
text = text + (p.extract())
|
||||
return text
|
||||
#
|
||||
def extractpart(self, long1, long2):
|
||||
if long1 > long2: long1, long2 = long2, long1
|
||||
para1, pos1 = long1
|
||||
para2, pos2 = long2
|
||||
text = ''
|
||||
while para1 < para2:
|
||||
ptext = self.paralist[para1].extract()
|
||||
text = text + ptext[pos1:]
|
||||
pos1 = 0
|
||||
para1 = para1 + 1
|
||||
ptext = self.paralist[para2].extract()
|
||||
return text + ptext[pos1:pos2]
|
||||
#
|
||||
def whereis(self, d, h, v):
|
||||
total = 0
|
||||
for i in range(len(self.paralist)):
|
||||
p = self.paralist[i]
|
||||
result = p.whereis(d, h, v)
|
||||
if result <> None:
|
||||
return i, result
|
||||
return None
|
||||
#
|
||||
def roundtowords(self, long1, long2):
|
||||
i, offset = long1
|
||||
text = self.paralist[i].extract()
|
||||
while offset > 0 and text[offset-1] <> ' ': offset = offset-1
|
||||
long1 = i, offset
|
||||
#
|
||||
i, offset = long2
|
||||
text = self.paralist[i].extract()
|
||||
n = len(text)
|
||||
while offset < n-1 and text[offset] <> ' ': offset = offset+1
|
||||
long2 = i, offset
|
||||
#
|
||||
return long1, long2
|
||||
#
|
||||
def roundtoparagraphs(self, long1, long2):
|
||||
long1 = long1[0], 0
|
||||
long2 = long2[0], len(self.paralist[long2[0]].extract())
|
||||
return long1, long2
|
||||
|
||||
|
||||
# Formatter back-end to send the text directly to the drawing object
|
||||
class WritingBackEnd(NullBackEnd):
|
||||
#
|
||||
def __init__(self, d, width):
|
||||
self.d = d
|
||||
self.width = width
|
||||
self.lineno = 0
|
||||
#
|
||||
def addpara(self, p):
|
||||
self.lineno = p.render(self.d, 0, self.lineno, self.width)
|
||||
|
||||
|
||||
# A formatter receives a stream of formatting instructions and assembles
|
||||
# these into a stream of paragraphs on to a back-end. The assembly is
|
||||
# parametrized by a text measurement object, which must match the output
|
||||
# operations of the back-end. The back-end is responsible for splitting
|
||||
# paragraphs up in lines of a given maximum width. (This is done because
|
||||
# in a windowing environment, when the window size changes, there is no
|
||||
# need to redo the assembly into paragraphs, but the splitting into lines
|
||||
# must be done taking the new window size into account.)
|
||||
|
||||
|
||||
# Formatter base class. Initialize it with a text measurement object,
|
||||
# which is used for text measurements, and a back-end object,
|
||||
# which receives the completed paragraphs. The formatting methods are:
|
||||
# setfont(font)
|
||||
# setleftindent(nspaces)
|
||||
# setjust(type) where type is 'l', 'c', 'r', or 'lr'
|
||||
# flush()
|
||||
# vspace(nlines)
|
||||
# needvspace(nlines)
|
||||
# addword(word, nspaces)
|
||||
class BaseFormatter:
|
||||
#
|
||||
def __init__(self, d, b):
|
||||
# Drawing object used for text measurements
|
||||
self.d = d
|
||||
#
|
||||
# BackEnd object receiving completed paragraphs
|
||||
self.b = b
|
||||
#
|
||||
# Parameters of the formatting model
|
||||
self.leftindent = 0
|
||||
self.just = 'l'
|
||||
self.font = None
|
||||
self.blanklines = 0
|
||||
#
|
||||
# Parameters derived from the current font
|
||||
self.space = d.textwidth(' ')
|
||||
self.line = d.lineheight()
|
||||
self.ascent = d.baseline()
|
||||
self.descent = self.line - self.ascent
|
||||
#
|
||||
# Parameter derived from the default font
|
||||
self.n_space = self.space
|
||||
#
|
||||
# Current paragraph being built
|
||||
self.para = None
|
||||
self.nospace = 1
|
||||
#
|
||||
# Font to set on the next word
|
||||
self.nextfont = None
|
||||
#
|
||||
def newpara(self):
|
||||
return Para.Para()
|
||||
#
|
||||
def setfont(self, font):
|
||||
if font == None: return
|
||||
self.font = self.nextfont = font
|
||||
d = self.d
|
||||
d.setfont(font)
|
||||
self.space = d.textwidth(' ')
|
||||
self.line = d.lineheight()
|
||||
self.ascent = d.baseline()
|
||||
self.descent = self.line - self.ascent
|
||||
#
|
||||
def setleftindent(self, nspaces):
|
||||
self.leftindent = int(self.n_space * nspaces)
|
||||
if self.para:
|
||||
hang = self.leftindent - self.para.indent_left
|
||||
if hang > 0 and self.para.getlength() <= hang:
|
||||
self.para.makehangingtag(hang)
|
||||
self.nospace = 1
|
||||
else:
|
||||
self.flush()
|
||||
#
|
||||
def setrightindent(self, nspaces):
|
||||
self.rightindent = int(self.n_space * nspaces)
|
||||
if self.para:
|
||||
self.para.indent_right = self.rightindent
|
||||
self.flush()
|
||||
#
|
||||
def setjust(self, just):
|
||||
self.just = just
|
||||
if self.para:
|
||||
self.para.just = self.just
|
||||
#
|
||||
def flush(self):
|
||||
if self.para:
|
||||
self.b.addpara(self.para)
|
||||
self.para = None
|
||||
if self.font <> None:
|
||||
self.d.setfont(self.font)
|
||||
self.nospace = 1
|
||||
#
|
||||
def vspace(self, nlines):
|
||||
self.flush()
|
||||
if nlines > 0:
|
||||
self.para = self.newpara()
|
||||
tuple = None, '', 0, 0, 0, int(nlines*self.line), 0
|
||||
self.para.words.append(tuple)
|
||||
self.flush()
|
||||
self.blanklines = self.blanklines + nlines
|
||||
#
|
||||
def needvspace(self, nlines):
|
||||
self.flush() # Just to be sure
|
||||
if nlines > self.blanklines:
|
||||
self.vspace(nlines - self.blanklines)
|
||||
#
|
||||
def addword(self, text, space):
|
||||
if self.nospace and not text:
|
||||
return
|
||||
self.nospace = 0
|
||||
self.blanklines = 0
|
||||
if not self.para:
|
||||
self.para = self.newpara()
|
||||
self.para.indent_left = self.leftindent
|
||||
self.para.just = self.just
|
||||
self.nextfont = self.font
|
||||
space = int(space * self.space)
|
||||
self.para.words.append(self.nextfont, text, \
|
||||
self.d.textwidth(text), space, space, \
|
||||
self.ascent, self.descent)
|
||||
self.nextfont = None
|
||||
#
|
||||
def bgn_anchor(self, id):
|
||||
if not self.para:
|
||||
self.nospace = 0
|
||||
self.addword('', 0)
|
||||
self.para.bgn_anchor(id)
|
||||
#
|
||||
def end_anchor(self, id):
|
||||
if not self.para:
|
||||
self.nospace = 0
|
||||
self.addword('', 0)
|
||||
self.para.end_anchor(id)
|
||||
|
||||
|
||||
# Measuring object for measuring text as viewed on a tty
|
||||
class NullMeasurer:
|
||||
#
|
||||
def __init__(self):
|
||||
pass
|
||||
#
|
||||
def setfont(self, font):
|
||||
pass
|
||||
#
|
||||
def textwidth(self, text):
|
||||
return len(text)
|
||||
#
|
||||
def lineheight(self):
|
||||
return 1
|
||||
#
|
||||
def baseline(self):
|
||||
return 0
|
||||
|
||||
|
||||
# Drawing object for writing plain ASCII text to a file
|
||||
class FileWriter:
|
||||
#
|
||||
def __init__(self, fp):
|
||||
self.fp = fp
|
||||
self.lineno, self.colno = 0, 0
|
||||
#
|
||||
def setfont(self, font):
|
||||
pass
|
||||
#
|
||||
def text(self, (h, v), str):
|
||||
if not str: return
|
||||
if '\n' in str:
|
||||
raise ValueError, 'can\'t write \\n'
|
||||
while self.lineno < v:
|
||||
self.fp.write('\n')
|
||||
self.colno, self.lineno = 0, self.lineno + 1
|
||||
while self.lineno > v:
|
||||
# XXX This should never happen...
|
||||
self.fp.write('\033[A') # ANSI up arrow
|
||||
self.lineno = self.lineno - 1
|
||||
if self.colno < h:
|
||||
self.fp.write(' ' * (h - self.colno))
|
||||
elif self.colno > h:
|
||||
self.fp.write('\b' * (self.colno - h))
|
||||
self.colno = h
|
||||
self.fp.write(str)
|
||||
self.colno = h + len(str)
|
||||
|
||||
|
||||
# Formatting class to do nothing at all with the data
|
||||
class NullFormatter(BaseFormatter):
|
||||
#
|
||||
def __init__(self):
|
||||
d = NullMeasurer()
|
||||
b = NullBackEnd()
|
||||
BaseFormatter.__init__(self, d, b)
|
||||
|
||||
|
||||
# Formatting class to write directly to a file
|
||||
class WritingFormatter(BaseFormatter):
|
||||
#
|
||||
def __init__(self, fp, width):
|
||||
dm = NullMeasurer()
|
||||
dw = FileWriter(fp)
|
||||
b = WritingBackEnd(dw, width)
|
||||
BaseFormatter.__init__(self, dm, b)
|
||||
self.blanklines = 1
|
||||
#
|
||||
# Suppress multiple blank lines
|
||||
def needvspace(self, nlines):
|
||||
BaseFormatter.needvspace(self, min(1, nlines))
|
||||
|
||||
|
||||
# A "FunnyFormatter" writes ASCII text with a twist: *bold words*,
|
||||
# _italic text_ and _underlined words_, and `quoted text'.
|
||||
# It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman,
|
||||
# italic, bold, underline, quote).
|
||||
# Moreover, if the font is in upper case, the text is converted to
|
||||
# UPPER CASE.
|
||||
class FunnyFormatter(WritingFormatter):
|
||||
#
|
||||
def flush(self):
|
||||
if self.para: finalize(self.para)
|
||||
WritingFormatter.flush(self)
|
||||
|
||||
|
||||
# Surrounds *bold words* and _italic text_ in a paragraph with
|
||||
# appropriate markers, fixing the size (assuming these characters'
|
||||
# width is 1).
|
||||
openchar = \
|
||||
{'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'}
|
||||
closechar = \
|
||||
{'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''}
|
||||
def finalize(para):
|
||||
oldfont = curfont = 'r'
|
||||
para.words.append('r', '', 0, 0, 0, 0) # temporary, deleted at end
|
||||
for i in range(len(para.words)):
|
||||
fo, te, wi = para.words[i][:3]
|
||||
if fo <> None: curfont = fo
|
||||
if curfont <> oldfont:
|
||||
if closechar.has_key(oldfont):
|
||||
c = closechar[oldfont]
|
||||
j = i-1
|
||||
while j > 0 and para.words[j][1] == '': j = j-1
|
||||
fo1, te1, wi1 = para.words[j][:3]
|
||||
te1 = te1 + c
|
||||
wi1 = wi1 + len(c)
|
||||
para.words[j] = (fo1, te1, wi1) + \
|
||||
para.words[j][3:]
|
||||
if openchar.has_key(curfont) and te:
|
||||
c = openchar[curfont]
|
||||
te = c + te
|
||||
wi = len(c) + wi
|
||||
para.words[i] = (fo, te, wi) + \
|
||||
para.words[i][3:]
|
||||
if te: oldfont = curfont
|
||||
else: oldfont = 'r'
|
||||
if curfont in string.uppercase:
|
||||
te = string.upper(te)
|
||||
para.words[i] = (fo, te, wi) + para.words[i][3:]
|
||||
del para.words[-1]
|
||||
|
||||
|
||||
# Formatter back-end to draw the text in a window.
|
||||
# This has an option to draw while the paragraphs are being added,
|
||||
# to minimize the delay before the user sees anything.
|
||||
# This manages the entire "document" of the window.
|
||||
class StdwinBackEnd(SavingBackEnd):
|
||||
#
|
||||
def __init__(self, window, drawnow):
|
||||
self.window = window
|
||||
self.drawnow = drawnow
|
||||
self.width = window.getwinsize()[0]
|
||||
self.selection = None
|
||||
self.height = 0
|
||||
window.setorigin(0, 0)
|
||||
window.setdocsize(0, 0)
|
||||
self.d = window.begindrawing()
|
||||
SavingBackEnd.__init__(self)
|
||||
#
|
||||
def finish(self):
|
||||
self.d.close()
|
||||
self.d = None
|
||||
self.window.setdocsize(0, self.height)
|
||||
#
|
||||
def addpara(self, p):
|
||||
self.paralist.append(p)
|
||||
if self.drawnow:
|
||||
self.height = \
|
||||
p.render(self.d, 0, self.height, self.width)
|
||||
else:
|
||||
p.layout(self.width)
|
||||
p.left = 0
|
||||
p.top = self.height
|
||||
p.right = self.width
|
||||
p.bottom = self.height + p.height
|
||||
self.height = p.bottom
|
||||
#
|
||||
def resize(self):
|
||||
self.window.change((0, 0), (self.width, self.height))
|
||||
self.width = self.window.getwinsize()[0]
|
||||
self.height = 0
|
||||
for p in self.paralist:
|
||||
p.layout(self.width)
|
||||
p.left = 0
|
||||
p.top = self.height
|
||||
p.right = self.width
|
||||
p.bottom = self.height + p.height
|
||||
self.height = p.bottom
|
||||
self.window.change((0, 0), (self.width, self.height))
|
||||
self.window.setdocsize(0, self.height)
|
||||
#
|
||||
def redraw(self, area):
|
||||
d = self.window.begindrawing()
|
||||
(left, top), (right, bottom) = area
|
||||
d.erase(area)
|
||||
d.cliprect(area)
|
||||
for p in self.paralist:
|
||||
if top < p.bottom and p.top < bottom:
|
||||
v = p.render(d, p.left, p.top, p.right)
|
||||
if self.selection:
|
||||
self.invert(d, self.selection)
|
||||
d.close()
|
||||
#
|
||||
def setselection(self, new):
|
||||
if new:
|
||||
long1, long2 = new
|
||||
pos1 = long1[:3]
|
||||
pos2 = long2[:3]
|
||||
new = pos1, pos2
|
||||
if new <> self.selection:
|
||||
d = self.window.begindrawing()
|
||||
if self.selection:
|
||||
self.invert(d, self.selection)
|
||||
if new:
|
||||
self.invert(d, new)
|
||||
d.close()
|
||||
self.selection = new
|
||||
#
|
||||
def getselection(self):
|
||||
return self.selection
|
||||
#
|
||||
def extractselection(self):
|
||||
if self.selection:
|
||||
a, b = self.selection
|
||||
return self.extractpart(a, b)
|
||||
else:
|
||||
return None
|
||||
#
|
||||
def invert(self, d, region):
|
||||
long1, long2 = region
|
||||
if long1 > long2: long1, long2 = long2, long1
|
||||
para1, pos1 = long1
|
||||
para2, pos2 = long2
|
||||
while para1 < para2:
|
||||
self.paralist[para1].invert(d, pos1, None)
|
||||
pos1 = None
|
||||
para1 = para1 + 1
|
||||
self.paralist[para2].invert(d, pos1, pos2)
|
||||
#
|
||||
def search(self, prog):
|
||||
import regex, string
|
||||
if type(prog) == type(''):
|
||||
prog = regex.compile(string.lower(prog))
|
||||
if self.selection:
|
||||
iold = self.selection[0][0]
|
||||
else:
|
||||
iold = -1
|
||||
hit = None
|
||||
for i in range(len(self.paralist)):
|
||||
if i == iold or i < iold and hit:
|
||||
continue
|
||||
p = self.paralist[i]
|
||||
text = string.lower(p.extract())
|
||||
if prog.search(text) >= 0:
|
||||
a, b = prog.regs[0]
|
||||
long1 = i, a
|
||||
long2 = i, b
|
||||
hit = long1, long2
|
||||
if i > iold:
|
||||
break
|
||||
if hit:
|
||||
self.setselection(hit)
|
||||
i = hit[0][0]
|
||||
p = self.paralist[i]
|
||||
self.window.show((p.left, p.top), (p.right, p.bottom))
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
#
|
||||
def showanchor(self, id):
|
||||
for i in range(len(self.paralist)):
|
||||
p = self.paralist[i]
|
||||
if p.hasanchor(id):
|
||||
long1 = i, 0
|
||||
long2 = i, len(p.extract())
|
||||
hit = long1, long2
|
||||
self.setselection(hit)
|
||||
self.window.show( \
|
||||
(p.left, p.top), (p.right, p.bottom))
|
||||
break
|
||||
|
||||
|
||||
# GL extensions
|
||||
|
||||
class GLFontCache:
|
||||
#
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
self.setfont('')
|
||||
#
|
||||
def reset(self):
|
||||
self.fontkey = None
|
||||
self.fonthandle = None
|
||||
self.fontinfo = None
|
||||
self.fontcache = {}
|
||||
#
|
||||
def close(self):
|
||||
self.reset()
|
||||
#
|
||||
def setfont(self, fontkey):
|
||||
if fontkey == '':
|
||||
fontkey = 'Times-Roman 12'
|
||||
elif ' ' not in fontkey:
|
||||
fontkey = fontkey + ' 12'
|
||||
if fontkey == self.fontkey:
|
||||
return
|
||||
if self.fontcache.has_key(fontkey):
|
||||
handle = self.fontcache[fontkey]
|
||||
else:
|
||||
import string
|
||||
i = string.index(fontkey, ' ')
|
||||
name, sizestr = fontkey[:i], fontkey[i:]
|
||||
size = eval(sizestr)
|
||||
key1 = name + ' 1'
|
||||
key = name + ' ' + `size`
|
||||
# NB key may differ from fontkey!
|
||||
if self.fontcache.has_key(key):
|
||||
handle = self.fontcache[key]
|
||||
else:
|
||||
if self.fontcache.has_key(key1):
|
||||
handle = self.fontcache[key1]
|
||||
else:
|
||||
import fm
|
||||
handle = fm.findfont(name)
|
||||
self.fontcache[key1] = handle
|
||||
handle = handle.scalefont(size)
|
||||
self.fontcache[fontkey] = \
|
||||
self.fontcache[key] = handle
|
||||
self.fontkey = fontkey
|
||||
if self.fonthandle <> handle:
|
||||
self.fonthandle = handle
|
||||
self.fontinfo = handle.getfontinfo()
|
||||
handle.setfont()
|
||||
|
||||
|
||||
class GLMeasurer(GLFontCache):
|
||||
#
|
||||
def textwidth(self, text):
|
||||
return self.fonthandle.getstrwidth(text)
|
||||
#
|
||||
def baseline(self):
|
||||
return self.fontinfo[6] - self.fontinfo[3]
|
||||
#
|
||||
def lineheight(self):
|
||||
return self.fontinfo[6]
|
||||
|
||||
|
||||
class GLWriter(GLFontCache):
|
||||
#
|
||||
# NOTES:
|
||||
# (1) Use gl.ortho2 to use X pixel coordinates!
|
||||
#
|
||||
def text(self, (h, v), text):
|
||||
import gl, fm
|
||||
gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3])
|
||||
fm.prstr(text)
|
||||
#
|
||||
def setfont(self, fontkey):
|
||||
oldhandle = self.fonthandle
|
||||
GLFontCache.setfont(fontkey)
|
||||
if self.fonthandle <> oldhandle:
|
||||
handle.setfont()
|
||||
|
||||
|
||||
class GLMeasurerWriter(GLMeasurer, GLWriter):
|
||||
pass
|
||||
|
||||
|
||||
class GLBackEnd(SavingBackEnd):
|
||||
#
|
||||
def __init__(self, wid):
|
||||
import gl
|
||||
gl.winset(wid)
|
||||
self.wid = wid
|
||||
self.width = gl.getsize()[1]
|
||||
self.height = 0
|
||||
self.d = GLMeasurerWriter()
|
||||
SavingBackEnd.__init__(self)
|
||||
#
|
||||
def finish(self):
|
||||
pass
|
||||
#
|
||||
def addpara(self, p):
|
||||
self.paralist.append(p)
|
||||
self.height = p.render(self.d, 0, self.height, self.width)
|
||||
#
|
||||
def redraw(self):
|
||||
import gl
|
||||
gl.winset(self.wid)
|
||||
width = gl.getsize()[1]
|
||||
if width <> self.width:
|
||||
setdocsize = 1
|
||||
self.width = width
|
||||
for p in self.paralist:
|
||||
p.top = p.bottom = None
|
||||
d = self.d
|
||||
v = 0
|
||||
for p in self.paralist:
|
||||
v = p.render(d, 0, v, width)
|
|
@ -0,0 +1,321 @@
|
|||
# A parser for SGML, using the derived class as static DTD.
|
||||
|
||||
# XXX This only supports those SGML features used by HTML.
|
||||
|
||||
# XXX There should be a way to distinguish between PCDATA (parsed
|
||||
# character data -- the normal case), RCDATA (replaceable character
|
||||
# data -- only char and entity references and end tags are special)
|
||||
# and CDATA (character data -- only end tags are special).
|
||||
|
||||
|
||||
import regex
|
||||
import string
|
||||
|
||||
|
||||
# Regular expressions used for parsing
|
||||
|
||||
incomplete = regex.compile( \
|
||||
'<!-?\|</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*\|</?\|' + \
|
||||
'&#[a-zA-Z0-9]*\|&[a-zA-Z][a-zA-Z0-9]*\|&')
|
||||
entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]')
|
||||
charref = regex.compile('&#[a-zA-Z0-9]+;')
|
||||
starttagopen = regex.compile('<[a-zA-Z]')
|
||||
endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>')
|
||||
commentopen = regex.compile('<!--')
|
||||
|
||||
|
||||
# SGML parser base class -- find tags and call handler functions.
|
||||
# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
|
||||
# The dtd is defined by deriving a class which defines methods
|
||||
# with special names to handle tags: start_foo and end_foo to handle
|
||||
# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
|
||||
# (Tags are converted to lower case for this purpose.) The data
|
||||
# between tags is passed to the parser by calling self.handle_data()
|
||||
# with some data as argument (the data may be split up in arbutrary
|
||||
# chunks). Entity references are passed by calling
|
||||
# self.handle_entityref() with the entity reference as argument.
|
||||
|
||||
class SGMLParser:
|
||||
|
||||
# Interface -- initialize and reset this instance
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
# Interface -- reset this instance. Loses all unprocessed data
|
||||
def reset(self):
|
||||
self.rawdata = ''
|
||||
self.stack = []
|
||||
self.nomoretags = 0
|
||||
self.literal = 0
|
||||
|
||||
# For derived classes only -- enter literal mode (CDATA) till EOF
|
||||
def setnomoretags(self):
|
||||
self.nomoretags = self.literal = 1
|
||||
|
||||
# For derived classes only -- enter literal mode (CDATA)
|
||||
def setliteral(self, *args):
|
||||
self.literal = 1
|
||||
|
||||
# Interface -- feed some data to the parser. Call this as
|
||||
# often as you want, with as little or as much text as you
|
||||
# want (may include '\n'). (This just saves the text, all the
|
||||
# processing is done by process() or close().)
|
||||
def feed(self, data):
|
||||
self.rawdata = self.rawdata + data
|
||||
self.goahead(0)
|
||||
|
||||
# Interface -- handle the remaining data
|
||||
def close(self):
|
||||
self.goahead(1)
|
||||
|
||||
# Internal -- handle data as far as reasonable. May leave state
|
||||
# and data to be processed by a subsequent call. If 'end' is
|
||||
# true, force handling all data as if followed by EOF marker.
|
||||
def goahead(self, end):
|
||||
rawdata = self.rawdata
|
||||
i = 0
|
||||
n = len(rawdata)
|
||||
while i < n:
|
||||
if self.nomoretags:
|
||||
self.handle_data(rawdata[i:n])
|
||||
i = n
|
||||
break
|
||||
j = incomplete.search(rawdata, i)
|
||||
if j < 0: j = n
|
||||
if i < j: self.handle_data(rawdata[i:j])
|
||||
i = j
|
||||
if i == n: break
|
||||
if rawdata[i] == '<':
|
||||
if starttagopen.match(rawdata, i) >= 0:
|
||||
if self.literal:
|
||||
self.handle_data(rawdata[i])
|
||||
i = i+1
|
||||
continue
|
||||
k = self.parse_starttag(i)
|
||||
if k < 0: break
|
||||
i = i + k
|
||||
continue
|
||||
k = endtag.match(rawdata, i)
|
||||
if k >= 0:
|
||||
j = i+k
|
||||
self.parse_endtag(rawdata[i:j])
|
||||
i = j
|
||||
self.literal = 0
|
||||
continue
|
||||
if commentopen.match(rawdata, i) >= 0:
|
||||
if self.literal:
|
||||
self.handle_data(rawdata[i])
|
||||
i = i+1
|
||||
continue
|
||||
k = self.parse_comment(i)
|
||||
if k < 0: break
|
||||
i = i+k
|
||||
continue
|
||||
elif rawdata[i] == '&':
|
||||
k = charref.match(rawdata, i)
|
||||
if k >= 0:
|
||||
j = i+k
|
||||
self.handle_charref(rawdata[i+2:j-1])
|
||||
i = j
|
||||
continue
|
||||
k = entityref.match(rawdata, i)
|
||||
if k >= 0:
|
||||
j = i+k
|
||||
self.handle_entityref(rawdata[i+1:j-1])
|
||||
i = j
|
||||
continue
|
||||
else:
|
||||
raise RuntimeError, 'neither < nor & ??'
|
||||
# We get here only if incomplete matches but
|
||||
# nothing else
|
||||
k = incomplete.match(rawdata, i)
|
||||
if k < 0: raise RuntimeError, 'no incomplete match ??'
|
||||
j = i+k
|
||||
if j == n: break # Really incomplete
|
||||
self.handle_data(rawdata[i:j])
|
||||
i = j
|
||||
# end while
|
||||
if end and i < n:
|
||||
self.handle_data(rawdata[i:n])
|
||||
i = n
|
||||
self.rawdata = rawdata[i:]
|
||||
# XXX if end: check for empty stack
|
||||
|
||||
# Internal -- parse comment, return length or -1 if not ternimated
|
||||
def parse_comment(self, i):
|
||||
rawdata = self.rawdata
|
||||
if rawdata[i:i+4] <> '<!--':
|
||||
raise RuntimeError, 'unexpected call to handle_comment'
|
||||
try:
|
||||
j = string.index(rawdata, '--', i+4)
|
||||
except string.index_error:
|
||||
return -1
|
||||
self.handle_comment(rawdata[i+4: j])
|
||||
j = j+2
|
||||
n = len(rawdata)
|
||||
while j < n and rawdata[j] in ' \t\n': j = j+1
|
||||
if j == n: return -1 # Wait for final '>'
|
||||
if rawdata[j] == '>':
|
||||
j = j+1
|
||||
else:
|
||||
print '*** comment not terminated with >'
|
||||
print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5])
|
||||
return j-i
|
||||
|
||||
# Internal -- handle starttag, return length or -1 if not terminated
|
||||
def parse_starttag(self, i):
|
||||
rawdata = self.rawdata
|
||||
try:
|
||||
j = string.index(rawdata, '>', i)
|
||||
except string.index_error:
|
||||
return -1
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
tagfind = regex.compile('[a-zA-Z][a-zA-Z0-9]*')
|
||||
attrfind = regex.compile( \
|
||||
'[ \t\n]+\([a-zA-Z][a-zA-Z0-9]*\)' + \
|
||||
'\([ \t\n]*=[ \t\n]*' + \
|
||||
'\(\'[^\']*\';\|"[^"]*"\|[-a-zA-Z0-9./:+*%?!()_#]+\)\)?')
|
||||
k = tagfind.match(rawdata, i+1)
|
||||
if k < 0:
|
||||
raise RuntimeError, 'unexpected call to parse_starttag'
|
||||
k = i+1+k
|
||||
tag = string.lower(rawdata[i+1:k])
|
||||
while k < j:
|
||||
l = attrfind.match(rawdata, k)
|
||||
if l < 0: break
|
||||
regs = attrfind.regs
|
||||
a1, b1 = regs[1]
|
||||
a2, b2 = regs[2]
|
||||
a3, b3 = regs[3]
|
||||
attrname = rawdata[a1:b1]
|
||||
if '=' in rawdata[k:k+l]:
|
||||
attrvalue = rawdata[a3:b3]
|
||||
if attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
||||
attrvalue = attrvalue[1:-1]
|
||||
else:
|
||||
attrvalue = ''
|
||||
attrs.append(string.lower(attrname), attrvalue)
|
||||
k = k + l
|
||||
j = j+1
|
||||
try:
|
||||
method = getattr(self, 'start_' + tag)
|
||||
except AttributeError:
|
||||
try:
|
||||
method = getattr(self, 'do_' + tag)
|
||||
except AttributeError:
|
||||
self.unknown_starttag(tag, attrs)
|
||||
return j-i
|
||||
method(attrs)
|
||||
return j-i
|
||||
self.stack.append(tag)
|
||||
method(attrs)
|
||||
return j-i
|
||||
|
||||
# Internal -- parse endtag
|
||||
def parse_endtag(self, data):
|
||||
if data[:2] <> '</' or data[-1:] <> '>':
|
||||
raise RuntimeError, 'unexpected call to parse_endtag'
|
||||
tag = string.lower(string.strip(data[2:-1]))
|
||||
try:
|
||||
method = getattr(self, 'end_' + tag)
|
||||
except AttributeError:
|
||||
self.unknown_endtag(tag)
|
||||
return
|
||||
if self.stack and self.stack[-1] == tag:
|
||||
del self.stack[-1]
|
||||
else:
|
||||
print '*** Unbalanced </' + tag + '>'
|
||||
print '*** Stack:', self.stack
|
||||
found = None
|
||||
for i in range(len(self.stack)):
|
||||
if self.stack[i] == tag: found = i
|
||||
if found <> None:
|
||||
del self.stack[found:]
|
||||
method()
|
||||
|
||||
# Example -- handle character reference, no need to override
|
||||
def handle_charref(self, name):
|
||||
try:
|
||||
n = string.atoi(name)
|
||||
except string.atoi_error:
|
||||
self.unknown_charref(name)
|
||||
return
|
||||
if not 0 <= n <= 255:
|
||||
self.unknown_charref(name)
|
||||
return
|
||||
self.handle_data(chr(n))
|
||||
|
||||
# Definition of entities -- derived classes may override
|
||||
entitydefs = \
|
||||
{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
|
||||
|
||||
# Example -- handle entity reference, no need to override
|
||||
def handle_entityref(self, name):
|
||||
table = self.__class__.entitydefs
|
||||
name = string.lower(name)
|
||||
if table.has_key(name):
|
||||
self.handle_data(table[name])
|
||||
else:
|
||||
self.unknown_entityref(name)
|
||||
return
|
||||
|
||||
# Example -- handle data, should be overridden
|
||||
def handle_data(self, data):
|
||||
pass
|
||||
|
||||
# Example -- handle comment, could be overridden
|
||||
def handle_comment(self, data):
|
||||
pass
|
||||
|
||||
# To be overridden -- handlers for unknown objects
|
||||
def unknown_starttag(self, tag, attrs): pass
|
||||
def unknown_endtag(self, tag): pass
|
||||
def unknown_charref(self, ref): pass
|
||||
def unknown_entityref(self, ref): pass
|
||||
|
||||
|
||||
class TestSGML(SGMLParser):
|
||||
|
||||
def handle_data(self, data):
|
||||
r = repr(data)
|
||||
if len(r) > 72:
|
||||
r = r[:35] + '...' + r[-35:]
|
||||
print 'data:', r
|
||||
|
||||
def handle_comment(self, data):
|
||||
r = repr(data)
|
||||
if len(r) > 68:
|
||||
r = r[:32] + '...' + r[-32:]
|
||||
print 'comment:', r
|
||||
|
||||
def unknown_starttag(self, tag, attrs):
|
||||
print 'start tag: <' + tag,
|
||||
for name, value in attrs:
|
||||
print name + '=' + '"' + value + '"',
|
||||
print '>'
|
||||
|
||||
def unknown_endtag(self, tag):
|
||||
print 'end tag: </' + tag + '>'
|
||||
|
||||
def unknown_entityref(self, ref):
|
||||
print '*** unknown entity ref: &' + ref + ';'
|
||||
|
||||
def unknown_charref(self, ref):
|
||||
print '*** unknown char ref: &#' + ref + ';'
|
||||
|
||||
|
||||
def test():
|
||||
file = 'test.html'
|
||||
f = open(file, 'r')
|
||||
x = TestSGML()
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if not line:
|
||||
x.close()
|
||||
break
|
||||
x.feed(line)
|
||||
|
||||
|
||||
#test()
|
Loading…
Reference in New Issue