# Tools for info file processing. # XXX Need to be more careful with reading ahead searching for nodes. import regexp import string # Exported exceptions. # NoSuchFile = 'no such file' NoSuchNode = 'no such node' # The search path for info files; this is site-specific. # Directory names should end in a partname delimiter, # so they can simply be concatenated to a relative pathname. # #INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac INFOPATH = ['', '/usr/local/emacs/info/'] # X11 on UNIX # Tunable constants. # BLOCKSIZE = 512 # Qty to align reads to, if possible FUZZ = 2*BLOCKSIZE # Qty to back-up before searching for a node CHUNKSIZE = 4*BLOCKSIZE # Qty to read at once when reading lots of data # Regular expressions used. # Note that it is essential that Python leaves unrecognized backslash # escapes in a string so they can be seen by regexp.compile! # findheader = regexp.compile('\037\014?\n(.*\n)').match findescape = regexp.compile('\037').match parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match findfirstline = regexp.compile('^.*\n').match findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match findmenu = regexp.compile('^\* [mM]enu:').match findmenuitem = regexp.compile( \ '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match findfootnote = regexp.compile( \ '\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match parsenoderef = regexp.compile('^\((.*)\)(.*)$').match # Get a node and all information pertaining to it. # This doesn't work if there is an indirect tag table, # and in general you are better off using icache.get_node() instead. # Functions get_whole_file() and get_file_node() provide part # functionality used by icache. # Raise NoSuchFile or NoSuchNode as appropriate. # def get_node(curfile, ref): file, node = parse_ref(curfile, ref) if node == '*': return get_whole_file(file) else: return get_file_node(file, 0, node) # def get_whole_file(file): f = try_open(file) # May raise NoSuchFile text = f.read() header, menu, footnotes = ('', '', ''), [], [] return file, '*', header, menu, footnotes, text # def get_file_node(file, offset, node): f = try_open(file) # May raise NoSuchFile text = find_node(f, offset, node) # May raise NoSuchNode node, header, menu, footnotes = analyze_node(text) return file, node, header, menu, footnotes, text # Parse a node reference into a file (possibly default) and node name. # Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE". # Default file is the curfile argument; default node is Top. # A node value of '*' is a special case: the whole file should # be interpreted (by the caller!) as a single node. # def parse_ref(curfile, ref): match = parsenoderef(ref) if not match: file, node = curfile, ref else: (a, b), (a1, b1), (a2, b2) = match file, node = ref[a1:b1], ref[a2:b2] if not file: file = curfile # (Is this necessary?) if not node: node = 'Top' return file, node # Extract node name, links, menu and footnotes from the node text. # def analyze_node(text): # # Get node name and links from the header line # match = findfirstline(text) if match: (a, b) = match[0] line = text[a:b] else: line = '' node = get_it(text, findnode) prev = get_it(text, findprev) next = get_it(text, findnext) up = get_it(text, findup) # # Get the menu items, if there is a menu # menu = [] match = findmenu(text) if match: (a, b) = match[0] while 1: match = findmenuitem(text, b) if not match: break (a, b), (a1, b1), (a2, b2) = match topic, ref = text[a1:b1], text[a2:b2] if ref == ':': ref = topic menu.append(topic, ref) # # Get the footnotes # footnotes = [] b = 0 while 1: match = findfootnote(text, b) if not match: break (a, b), (a1, b1), (a2, b2) = match topic, ref = text[a1:b1], text[a2:b2] if ref == ':': ref = topic footnotes.append(topic, ref) # return node, (prev, next, up), menu, footnotes # def get_it(line, matcher): match = matcher(line) if not match: return '' else: (a, b), (a1, b1) = match return line[a1:b1] # Find a node in an open file. # The offset (from the tags table) is a hint about the node's position. # Pass zero if there is no tags table. # Raise NoSuchNode if the node isn't found. # NB: This seeks around in the file. # def find_node(f, offset, node): node = string.lower(node) # Just to be sure # # Position a little before the given offset, # so we may find the node even if it has moved around # in the file a little. # offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE) f.seek(offset) # # Loop, hunting for a matching node header. # while 1: buf = f.read(CHUNKSIZE) if not buf: break i = 0 while 1: match = findheader(buf, i) if match: (a,b), (a1,b1) = match start = a1 line = buf[a1:b1] i = b match = parseheader(line) if match: (a,b), (a1,b1) = match key = string.lower(line[a1:b1]) if key == node: # Got it! Now read the rest. return read_node(f, buf[start:]) elif findescape(buf, i): next = f.read(CHUNKSIZE) if not next: break buf = buf + next else: break # # If we get here, we didn't find it. Too bad. # raise NoSuchNode, node # Finish off getting a node (subroutine for find_node()). # The node begins at the start of buf and may end in buf; # if it doesn't end there, read additional data from f. # def read_node(f, buf): i = 0 match = findescape(buf, i) while not match: next = f.read(CHUNKSIZE) if not next: end = len(buf) break i = len(buf) buf = buf + next match = findescape(buf, i) else: # Got a match (a, b) = match[0] end = a # Strip trailing newlines while end > 0 and buf[end-1] == '\n': end = end-1 buf = buf[:end] return buf # Read reverse starting at offset until the beginning of a node is found. # Then return a buffer containing the beginning of the node, # with f positioned just after the buffer. # The buffer will contain at least the full header line of the node; # the caller should finish off with read_node() if it is the right node. # (It is also possible that the buffer extends beyond the node!) # Return an empty string if there is no node before the given offset. # def backup_node(f, offset): start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE) end = offset while start < end: f.seek(start) buf = f.read(end-start) i = 0 hit = -1 while 1: match = findheader(buf, i) if match: (a,b), (a1,b1) = match hit = a1 i = b elif end < offset and findescape(buf, i): next = f.read(min(offset-end, BLOCKSIZE)) if not next: break buf = buf + next end = end + len(next) else: break if hit >= 0: return buf[hit:] end = start start = max(0, end - CHUNKSIZE) return '' # Make a tag table for the given file by scanning the file. # The file must be open for reading, and positioned at the beginning # (or wherever the hunt for tags must begin; it is read till the end). # def make_tags(f): tags = {} while 1: offset = f.tell() buf = f.read(CHUNKSIZE) if not buf: break i = 0 while 1: match = findheader(buf, i) if match: (a,b), (a1,b1) = match start = offset+a1 line = buf[a1:b1] i = b match = parseheader(line) if match: (a,b), (a1,b1) = match key = string.lower(line[a1:b1]) if tags.has_key(key): print 'Duplicate node:', print key tags[key] = '', start, line elif findescape(buf, i): next = f.read(CHUNKSIZE) if not next: break buf = buf + next else: break return tags # Try to open a file, return a file object if succeeds. # Raise NoSuchFile if the file can't be opened. # Should treat absolute pathnames special. # def try_open(file): for dir in INFOPATH: try: return open(dir + file, 'r') except IOError: pass raise NoSuchFile, file # A little test for the speed of make_tags(). # TESTFILE = 'texinfo-1' def test_make_tags(): import time f = try_open(TESTFILE) t1 = time.time() tags = make_tags(f) t2 = time.time() print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.'