cpython/Demo/ibrowse/ifile.py

# Tools for info file processing.

# XXX Need to be more careful with reading ahead searching for nodes.


import regexp
import string


# Exported exceptions.
#
NoSuchFile = 'no such file'
NoSuchNode = 'no such node'


# The search path for info files; this is site-specific.
# Directory names should end in a partname delimiter,
# so they can simply be concatenated to a relative pathname.
#
#INFOPATH = ['', ':Info.Ibrowse:', ':Info:']	# Mac
INFOPATH = ['', '/usr/local/emacs/info/']	# X11 on UNIX


# Tunable constants.
#
BLOCKSIZE = 512			# Qty to align reads to, if possible
FUZZ = 2*BLOCKSIZE		# Qty to back-up before searching for a node
CHUNKSIZE = 4*BLOCKSIZE		# Qty to read at once when reading lots of data


# Regular expressions used.
# Note that it is essential that Python leaves unrecognized backslash
# escapes in a string so they can be seen by regexp.compile!
#
findheader = regexp.compile('\037\014?\n(.*\n)').match
findescape = regexp.compile('\037').match
parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
findfirstline = regexp.compile('^.*\n').match
findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match
findmenu = regexp.compile('^\* [mM]enu:').match
findmenuitem = regexp.compile( \
	'^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
findfootnote = regexp.compile( \
	'\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
parsenoderef = regexp.compile('^\((.*)\)(.*)$').match


# Get a node and all information pertaining to it.
# This doesn't work if there is an indirect tag table,
# and in general you are better off using icache.get_node() instead.
# Functions get_whole_file() and get_file_node() provide part
# functionality used by icache.
# Raise NoSuchFile or NoSuchNode as appropriate.
#
def get_node(curfile, ref):
	file, node = parse_ref(curfile, ref)
	if node == '*':
		return get_whole_file(file)
	else:
		return get_file_node(file, 0, node)
#
def get_whole_file(file):
	f = try_open(file) # May raise NoSuchFile
	text = f.read()
	header, menu, footnotes = ('', '', ''), [], []
	return file, '*', header, menu, footnotes, text
#
def get_file_node(file, offset, node):
	f = try_open(file) # May raise NoSuchFile
	text = find_node(f, offset, node) # May raise NoSuchNode
	node, header, menu, footnotes = analyze_node(text)
	return file, node, header, menu, footnotes, text


# Parse a node reference into a file (possibly default) and node name.
# Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".
# Default file is the curfile argument; default node is Top.
# A node value of '*' is a special case: the whole file should
# be interpreted (by the caller!) as a single node.
#
def parse_ref(curfile, ref):
	match = parsenoderef(ref)
	if not match:
		file, node = curfile, ref
	else:
		(a, b), (a1, b1), (a2, b2) = match
		file, node = ref[a1:b1], ref[a2:b2]
	if not file:
		file = curfile # (Is this necessary?)
	if not node:
		node = 'Top'
	return file, node


# Extract node name, links, menu and footnotes from the node text.
#
def analyze_node(text):
	#
	# Get node name and links from the header line
	#
	match = findfirstline(text)
	if match:
		(a, b) = match[0]
		line = text[a:b]
	else:
		line = ''
	node = get_it(text, findnode)
	prev = get_it(text, findprev)
	next = get_it(text, findnext)
	up = get_it(text, findup)
	#
	# Get the menu items, if there is a menu
	#
	menu = []
	match = findmenu(text)
	if match:
		(a, b) = match[0]
		while 1:
			match = findmenuitem(text, b)
			if not match:
				break
			(a, b), (a1, b1), (a2, b2) = match
			topic, ref = text[a1:b1], text[a2:b2]
			if ref == ':':
				ref = topic
			menu.append((topic, ref))
	#
	# Get the footnotes
	#
	footnotes = []
	b = 0
	while 1:
		match = findfootnote(text, b)
		if not match:
			break
		(a, b), (a1, b1), (a2, b2) = match
		topic, ref = text[a1:b1], text[a2:b2]
		if ref == ':':
			ref = topic
		footnotes.append((topic, ref))
	#
	return node, (prev, next, up), menu, footnotes
#
def get_it(line, matcher):
	match = matcher(line)
	if not match:
		return ''
	else:
		(a, b), (a1, b1) = match
		return line[a1:b1]


# Find a node in an open file.
# The offset (from the tags table) is a hint about the node's position.
# Pass zero if there is no tags table.
# Raise NoSuchNode if the node isn't found.
# NB: This seeks around in the file.
#
def find_node(f, offset, node):
	node = string.lower(node) # Just to be sure
	#
	# Position a little before the given offset,
	# so we may find the node even if it has moved around
	# in the file a little.
	#
	offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE)
	f.seek(offset)
	#
	# Loop, hunting for a matching node header.
	#
	while 1:
		buf = f.read(CHUNKSIZE)
		if not buf:
			break
		i = 0
		while 1:
			match = findheader(buf, i)
			if match:
				(a,b), (a1,b1) = match
				start = a1
				line = buf[a1:b1]
				i = b
				match = parseheader(line)
				if match:
					(a,b), (a1,b1) = match
					key = string.lower(line[a1:b1])
					if key == node:
						# Got it!  Now read the rest.
						return read_node(f, buf[start:])
			elif findescape(buf, i):
				next = f.read(CHUNKSIZE)
				if not next:
					break
				buf = buf + next
			else:
				break
	#
	# If we get here, we didn't find it.  Too bad.
	#
	raise NoSuchNode, node


# Finish off getting a node (subroutine for find_node()).
# The node begins at the start of buf and may end in buf;
# if it doesn't end there, read additional data from f.
#
def read_node(f, buf):
	i = 0
	match = findescape(buf, i)
	while not match:
		next = f.read(CHUNKSIZE)
		if not next:
			end = len(buf)
			break
		i = len(buf)
		buf = buf + next
		match = findescape(buf, i)
	else:
		# Got a match
		(a, b) = match[0]
		end = a
	# Strip trailing newlines
	while end > 0 and buf[end-1] == '\n':
		end = end-1
	buf = buf[:end]
	return buf


# Read reverse starting at offset until the beginning of a node is found.
# Then return a buffer containing the beginning of the node,
# with f positioned just after the buffer.
# The buffer will contain at least the full header line of the node;
# the caller should finish off with read_node() if it is the right node.
# (It is also possible that the buffer extends beyond the node!)
# Return an empty string if there is no node before the given offset.
#
def backup_node(f, offset):
	start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE)
	end = offset
	while start < end:
		f.seek(start)
		buf = f.read(end-start)
		i = 0
		hit = -1
		while 1:
			match = findheader(buf, i)
			if match:
				(a,b), (a1,b1) = match
				hit = a1
				i = b
			elif end < offset and findescape(buf, i):
				next = f.read(min(offset-end, BLOCKSIZE))
				if not next:
					break
				buf = buf + next
				end = end + len(next)
			else:
				break
		if hit >= 0:
			return buf[hit:]
		end = start
		start = max(0, end - CHUNKSIZE)
	return ''


# Make a tag table for the given file by scanning the file.
# The file must be open for reading, and positioned at the beginning
# (or wherever the hunt for tags must begin; it is read till the end).
#
def make_tags(f):
	tags = {}
	while 1:
		offset = f.tell()
		buf = f.read(CHUNKSIZE)
		if not buf:
			break
		i = 0
		while 1:
			match = findheader(buf, i)
			if match:
				(a,b), (a1,b1) = match
				start = offset+a1
				line = buf[a1:b1]
				i = b
				match = parseheader(line)
				if match:
					(a,b), (a1,b1) = match
					key = string.lower(line[a1:b1])
					if tags.has_key(key):
						print 'Duplicate node:',
						print key
					tags[key] = '', start, line
			elif findescape(buf, i):
				next = f.read(CHUNKSIZE)
				if not next:
					break
				buf = buf + next
			else:
				break
	return tags


# Try to open a file, return a file object if succeeds.
# Raise NoSuchFile if the file can't be opened.
# Should treat absolute pathnames special.
#
def try_open(file):
	for dir in INFOPATH:
		try:
			return open(dir + file, 'r')
		except IOError:
			pass
	raise NoSuchFile, file


# A little test for the speed of make_tags().
#
TESTFILE = 'texinfo-1'
def test_make_tags():
	import time
	f = try_open(TESTFILE)
	t1 = time.time()
	tags = make_tags(f)
	t2 = time.time()
	print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.'