329 lines
8.2 KiB
Python
Executable File
329 lines
8.2 KiB
Python
Executable File
# Tools for info file processing.
|
|
|
|
# XXX Need to be more careful with reading ahead searching for nodes.
|
|
|
|
|
|
import regexp
|
|
import string
|
|
|
|
|
|
# Exported exceptions.
|
|
#
|
|
NoSuchFile = 'no such file'
|
|
NoSuchNode = 'no such node'
|
|
|
|
|
|
# The search path for info files; this is site-specific.
|
|
# Directory names should end in a partname delimiter,
|
|
# so they can simply be concatenated to a relative pathname.
|
|
#
|
|
#INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac
|
|
INFOPATH = ['', '/usr/local/emacs/info/'] # X11 on UNIX
|
|
|
|
|
|
# Tunable constants.
|
|
#
|
|
BLOCKSIZE = 512 # Qty to align reads to, if possible
|
|
FUZZ = 2*BLOCKSIZE # Qty to back-up before searching for a node
|
|
CHUNKSIZE = 4*BLOCKSIZE # Qty to read at once when reading lots of data
|
|
|
|
|
|
# Regular expressions used.
|
|
# Note that it is essential that Python leaves unrecognized backslash
|
|
# escapes in a string so they can be seen by regexp.compile!
|
|
#
|
|
findheader = regexp.compile('\037\014?\n(.*\n)').match
|
|
findescape = regexp.compile('\037').match
|
|
parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
|
|
findfirstline = regexp.compile('^.*\n').match
|
|
findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
|
|
findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
|
|
findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
|
|
findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match
|
|
findmenu = regexp.compile('^\* [mM]enu:').match
|
|
findmenuitem = regexp.compile( \
|
|
'^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
|
|
findfootnote = regexp.compile( \
|
|
'\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
|
|
parsenoderef = regexp.compile('^\((.*)\)(.*)$').match
|
|
|
|
|
|
# Get a node and all information pertaining to it.
|
|
# This doesn't work if there is an indirect tag table,
|
|
# and in general you are better off using icache.get_node() instead.
|
|
# Functions get_whole_file() and get_file_node() provide part
|
|
# functionality used by icache.
|
|
# Raise NoSuchFile or NoSuchNode as appropriate.
|
|
#
|
|
def get_node(curfile, ref):
|
|
file, node = parse_ref(curfile, ref)
|
|
if node == '*':
|
|
return get_whole_file(file)
|
|
else:
|
|
return get_file_node(file, 0, node)
|
|
#
|
|
def get_whole_file(file):
|
|
f = try_open(file) # May raise NoSuchFile
|
|
text = f.read()
|
|
header, menu, footnotes = ('', '', ''), [], []
|
|
return file, '*', header, menu, footnotes, text
|
|
#
|
|
def get_file_node(file, offset, node):
|
|
f = try_open(file) # May raise NoSuchFile
|
|
text = find_node(f, offset, node) # May raise NoSuchNode
|
|
node, header, menu, footnotes = analyze_node(text)
|
|
return file, node, header, menu, footnotes, text
|
|
|
|
|
|
# Parse a node reference into a file (possibly default) and node name.
|
|
# Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".
|
|
# Default file is the curfile argument; default node is Top.
|
|
# A node value of '*' is a special case: the whole file should
|
|
# be interpreted (by the caller!) as a single node.
|
|
#
|
|
def parse_ref(curfile, ref):
|
|
match = parsenoderef(ref)
|
|
if not match:
|
|
file, node = curfile, ref
|
|
else:
|
|
(a, b), (a1, b1), (a2, b2) = match
|
|
file, node = ref[a1:b1], ref[a2:b2]
|
|
if not file:
|
|
file = curfile # (Is this necessary?)
|
|
if not node:
|
|
node = 'Top'
|
|
return file, node
|
|
|
|
|
|
# Extract node name, links, menu and footnotes from the node text.
|
|
#
|
|
def analyze_node(text):
|
|
#
|
|
# Get node name and links from the header line
|
|
#
|
|
match = findfirstline(text)
|
|
if match:
|
|
(a, b) = match[0]
|
|
line = text[a:b]
|
|
else:
|
|
line = ''
|
|
node = get_it(text, findnode)
|
|
prev = get_it(text, findprev)
|
|
next = get_it(text, findnext)
|
|
up = get_it(text, findup)
|
|
#
|
|
# Get the menu items, if there is a menu
|
|
#
|
|
menu = []
|
|
match = findmenu(text)
|
|
if match:
|
|
(a, b) = match[0]
|
|
while 1:
|
|
match = findmenuitem(text, b)
|
|
if not match:
|
|
break
|
|
(a, b), (a1, b1), (a2, b2) = match
|
|
topic, ref = text[a1:b1], text[a2:b2]
|
|
if ref == ':':
|
|
ref = topic
|
|
menu.append((topic, ref))
|
|
#
|
|
# Get the footnotes
|
|
#
|
|
footnotes = []
|
|
b = 0
|
|
while 1:
|
|
match = findfootnote(text, b)
|
|
if not match:
|
|
break
|
|
(a, b), (a1, b1), (a2, b2) = match
|
|
topic, ref = text[a1:b1], text[a2:b2]
|
|
if ref == ':':
|
|
ref = topic
|
|
footnotes.append((topic, ref))
|
|
#
|
|
return node, (prev, next, up), menu, footnotes
|
|
#
|
|
def get_it(line, matcher):
|
|
match = matcher(line)
|
|
if not match:
|
|
return ''
|
|
else:
|
|
(a, b), (a1, b1) = match
|
|
return line[a1:b1]
|
|
|
|
|
|
# Find a node in an open file.
|
|
# The offset (from the tags table) is a hint about the node's position.
|
|
# Pass zero if there is no tags table.
|
|
# Raise NoSuchNode if the node isn't found.
|
|
# NB: This seeks around in the file.
|
|
#
|
|
def find_node(f, offset, node):
|
|
node = string.lower(node) # Just to be sure
|
|
#
|
|
# Position a little before the given offset,
|
|
# so we may find the node even if it has moved around
|
|
# in the file a little.
|
|
#
|
|
offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE)
|
|
f.seek(offset)
|
|
#
|
|
# Loop, hunting for a matching node header.
|
|
#
|
|
while 1:
|
|
buf = f.read(CHUNKSIZE)
|
|
if not buf:
|
|
break
|
|
i = 0
|
|
while 1:
|
|
match = findheader(buf, i)
|
|
if match:
|
|
(a,b), (a1,b1) = match
|
|
start = a1
|
|
line = buf[a1:b1]
|
|
i = b
|
|
match = parseheader(line)
|
|
if match:
|
|
(a,b), (a1,b1) = match
|
|
key = string.lower(line[a1:b1])
|
|
if key == node:
|
|
# Got it! Now read the rest.
|
|
return read_node(f, buf[start:])
|
|
elif findescape(buf, i):
|
|
next = f.read(CHUNKSIZE)
|
|
if not next:
|
|
break
|
|
buf = buf + next
|
|
else:
|
|
break
|
|
#
|
|
# If we get here, we didn't find it. Too bad.
|
|
#
|
|
raise NoSuchNode, node
|
|
|
|
|
|
# Finish off getting a node (subroutine for find_node()).
|
|
# The node begins at the start of buf and may end in buf;
|
|
# if it doesn't end there, read additional data from f.
|
|
#
|
|
def read_node(f, buf):
|
|
i = 0
|
|
match = findescape(buf, i)
|
|
while not match:
|
|
next = f.read(CHUNKSIZE)
|
|
if not next:
|
|
end = len(buf)
|
|
break
|
|
i = len(buf)
|
|
buf = buf + next
|
|
match = findescape(buf, i)
|
|
else:
|
|
# Got a match
|
|
(a, b) = match[0]
|
|
end = a
|
|
# Strip trailing newlines
|
|
while end > 0 and buf[end-1] == '\n':
|
|
end = end-1
|
|
buf = buf[:end]
|
|
return buf
|
|
|
|
|
|
# Read reverse starting at offset until the beginning of a node is found.
|
|
# Then return a buffer containing the beginning of the node,
|
|
# with f positioned just after the buffer.
|
|
# The buffer will contain at least the full header line of the node;
|
|
# the caller should finish off with read_node() if it is the right node.
|
|
# (It is also possible that the buffer extends beyond the node!)
|
|
# Return an empty string if there is no node before the given offset.
|
|
#
|
|
def backup_node(f, offset):
|
|
start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE)
|
|
end = offset
|
|
while start < end:
|
|
f.seek(start)
|
|
buf = f.read(end-start)
|
|
i = 0
|
|
hit = -1
|
|
while 1:
|
|
match = findheader(buf, i)
|
|
if match:
|
|
(a,b), (a1,b1) = match
|
|
hit = a1
|
|
i = b
|
|
elif end < offset and findescape(buf, i):
|
|
next = f.read(min(offset-end, BLOCKSIZE))
|
|
if not next:
|
|
break
|
|
buf = buf + next
|
|
end = end + len(next)
|
|
else:
|
|
break
|
|
if hit >= 0:
|
|
return buf[hit:]
|
|
end = start
|
|
start = max(0, end - CHUNKSIZE)
|
|
return ''
|
|
|
|
|
|
# Make a tag table for the given file by scanning the file.
|
|
# The file must be open for reading, and positioned at the beginning
|
|
# (or wherever the hunt for tags must begin; it is read till the end).
|
|
#
|
|
def make_tags(f):
|
|
tags = {}
|
|
while 1:
|
|
offset = f.tell()
|
|
buf = f.read(CHUNKSIZE)
|
|
if not buf:
|
|
break
|
|
i = 0
|
|
while 1:
|
|
match = findheader(buf, i)
|
|
if match:
|
|
(a,b), (a1,b1) = match
|
|
start = offset+a1
|
|
line = buf[a1:b1]
|
|
i = b
|
|
match = parseheader(line)
|
|
if match:
|
|
(a,b), (a1,b1) = match
|
|
key = string.lower(line[a1:b1])
|
|
if tags.has_key(key):
|
|
print 'Duplicate node:',
|
|
print key
|
|
tags[key] = '', start, line
|
|
elif findescape(buf, i):
|
|
next = f.read(CHUNKSIZE)
|
|
if not next:
|
|
break
|
|
buf = buf + next
|
|
else:
|
|
break
|
|
return tags
|
|
|
|
|
|
# Try to open a file, return a file object if succeeds.
|
|
# Raise NoSuchFile if the file can't be opened.
|
|
# Should treat absolute pathnames special.
|
|
#
|
|
def try_open(file):
|
|
for dir in INFOPATH:
|
|
try:
|
|
return open(dir + file, 'r')
|
|
except IOError:
|
|
pass
|
|
raise NoSuchFile, file
|
|
|
|
|
|
# A little test for the speed of make_tags().
|
|
#
|
|
TESTFILE = 'texinfo-1'
|
|
def test_make_tags():
|
|
import time
|
|
f = try_open(TESTFILE)
|
|
t1 = time.time()
|
|
tags = make_tags(f)
|
|
t2 = time.time()
|
|
print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.'
|