#! /usr/local/bin/python

# Perform massive identifier substitution on C source files.
# This actually tokenizes the files (to some extent) so it can
# avoid making substitutions inside strings or comments.
# Inside strings, substitutions are never made; inside comments,
# it is a user option (on by default).
#
# The substitutions are read from one or more files whose lines,
# when not empty, after stripping comments starting with #,
# must contain exactly two words separated by whitespace: the
# old identifier and its replacement.
#
# The option -r reverses the sense of the substitutions (this may be
# useful to undo a particular substitution).
#
# If the old identifier is prefixed with a '*' (with no intervening
# whitespace), then it will not be substituted inside comments.
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a C file (ends in .h or .c).  The special filename '-' means
# operate in filter mode: read stdin, write stdout.
#
# Symbolic links are always ignored (except as explicit directory
# arguments).
#
# The original files are kept as back-up with a "~" suffix.
#
# Changes made are reported to stdout in a diff-like format.
#
# NB: by changing only the function fixline() you can turn this
# into a program for different changes to C source files; by
# changing the function wanted() you can make a different selection of
# files.

import sys
import regex
import string
import os
from stat import *
import getopt

err = sys.stderr.write
dbg = err
rep = sys.stdout.write

def usage():
	progname = sys.argv[0]
	err('Usage: ' + progname +
		  ' [-c] [-r] [-s file] ... file-or-directory ...\n')
	err('\n')
	err('-c           : substitute inside comments\n')
	err('-r           : reverse direction for following -s options\n')
	err('-s substfile : add a file of substitutions\n')
	err('\n')
	err('Each non-empty non-comment line in a substitution file must\n')
	err('contain exactly two words: an identifier and its replacement.\n')
	err('Comments start with a # character and end at end of line.\n')
	err('If an identifier is preceded with a *, it is not substituted\n')
	err('inside a comment even when -c is specified.\n')

def main():
	try:
		opts, args = getopt.getopt(sys.argv[1:], 'crs:')
	except getopt.error, msg:
		err('Options error: ' + str(msg) + '\n')
		usage()
		sys.exit(2)
	bad = 0
	if not args: # No arguments
		usage()
		sys.exit(2)
	for opt, arg in opts:
		if opt == '-c':
			setdocomments()
		if opt == '-r':
			setreverse()
		if opt == '-s':
			addsubst(arg)
	for arg in args:
		if os.path.isdir(arg):
			if recursedown(arg): bad = 1
		elif os.path.islink(arg):
			err(arg + ': will not process symbolic links\n')
			bad = 1
		else:
			if fix(arg): bad = 1
	sys.exit(bad)

# Change this regular expression to select a different set of files
Wanted = '^[a-zA-Z0-9_]+\.[ch]$'
def wanted(name):
	return regex.match(Wanted, name) >= 0

def recursedown(dirname):
	dbg('recursedown(' + `dirname` + ')\n')
	bad = 0
	try:
		names = os.listdir(dirname)
	except os.error, msg:
		err(dirname + ': cannot list directory: ' + str(msg) + '\n')
		return 1
	names.sort()
	subdirs = []
	for name in names:
		if name in (os.curdir, os.pardir): continue
		fullname = os.path.join(dirname, name)
		if os.path.islink(fullname): pass
		elif os.path.isdir(fullname):
			subdirs.append(fullname)
		elif wanted(name):
			if fix(fullname): bad = 1
	for fullname in subdirs:
		if recursedown(fullname): bad = 1
	return bad

def fix(filename):
##	dbg('fix(' + `filename` + ')\n')
	if filename == '-':
		# Filter mode
		f = sys.stdin
		g = sys.stdout
	else:
		# File replacement mode
		try:
			f = open(filename, 'r')
		except IOError, msg:
			err(filename + ': cannot open: ' + str(msg) + '\n')
			return 1
		head, tail = os.path.split(filename)
		tempname = os.path.join(head, '@' + tail)
		g = None
	# If we find a match, we rewind the file and start over but
	# now copy everything to a temp file.
	lineno = 0
	initfixline()
	while 1:
		line = f.readline()
		if not line: break
		lineno = lineno + 1
		while line[-2:] == '\\\n':
			nextline = f.readline()
			if not nextline: break
			line = line + nextline
			lineno = lineno + 1
		newline = fixline(line)
		if newline != line:
			if g is None:
				try:
					g = open(tempname, 'w')
				except IOError, msg:
					f.close()
					err(tempname+': cannot create: '+
					    str(msg)+'\n')
					return 1
				f.seek(0)
				lineno = 0
				initfixline()
				rep(filename + ':\n')
				continue # restart from the beginning
			rep(`lineno` + '\n')
			rep('< ' + line)
			rep('> ' + newline)
		if g is not None:
			g.write(newline)

	# End of file
	if filename == '-': return 0 # Done in filter mode
	f.close()
	if not g: return 0 # No changes

	# Finishing touch -- move files

	# First copy the file's mode to the temp file
	try:
		statbuf = os.stat(filename)
		os.chmod(tempname, statbuf[ST_MODE] & 07777)
	except os.error, msg:
		err(tempname + ': warning: chmod failed (' + str(msg) + ')\n')
	# Then make a backup of the original file as filename~
	try:
		os.rename(filename, filename + '~')
	except os.error, msg:
		err(filename + ': warning: backup failed (' + str(msg) + ')\n')
	# Now move the temp file to the original file
	try:
		os.rename(tempname, filename)
	except os.error, msg:
		err(filename + ': rename failed (' + str(msg) + ')\n')
		return 1
	# Return succes
	return 0

# Tokenizing ANSI C (partly)

Identifier = '[a-zA-Z_][a-zA-Z0-9_]+'
String = '"\([^\n\\"]\|\\\\.\)*"'
Char = '\'\([^\n\\\']\|\\\\.\)*\''
CommentStart = '/\*'
CommentEnd = '\*/'

Hexnumber = '0[xX][0-9a-fA-F]*[uUlL]*'
Octnumber = '0[0-7]*[uUlL]*'
Decnumber = '[1-9][0-9]*[uUlL]*'
Intnumber = Hexnumber + '\|' + Octnumber + '\|' + Decnumber
Exponent = '[eE][-+]?[0-9]+'
Pointfloat = '\([0-9]+\.[0-9]*\|\.[0-9]+\)\(' + Exponent + '\)?'
Expfloat = '[0-9]+' + Exponent
Floatnumber = Pointfloat + '\|' + Expfloat
Number = Floatnumber + '\|' + Intnumber

# Anything else is an operator -- don't list this explicitly because of '/*'

OutsideComment = (Identifier, Number, String, Char, CommentStart)
OutsideCommentPattern = '\(' + string.joinfields(OutsideComment, '\|') + '\)'
OutsideCommentProgram = regex.compile(OutsideCommentPattern)

InsideComment = (Identifier, Number, CommentEnd)
InsideCommentPattern = '\(' + string.joinfields(InsideComment, '\|') + '\)'
InsideCommentProgram = regex.compile(InsideCommentPattern)

def initfixline():
	global Program
	Program = OutsideCommentProgram

def fixline(line):
	global Program
##	print '-->', `line`
	i = 0
	while i < len(line):
		i = Program.search(line, i)
		if i < 0: break
		found = Program.group(0)
##		if Program is InsideCommentProgram: print '...',
##		else: print '   ',
##		print found
		if len(found) == 2:
			if found == '/*':
				Program = InsideCommentProgram
			elif found == '*/':
				Program = OutsideCommentProgram
		n = len(found)
		if Dict.has_key(found):
			subst = Dict[found]
			if Program is InsideCommentProgram:
				if not Docomments:
					print 'Found in comment:', found
					continue
				if NotInComment.has_key(found):
##					print 'Ignored in comment:',
##					print found, '-->', subst
##					print 'Line:', line,
					subst = found
##				else:
##					print 'Substituting in comment:',
##					print found, '-->', subst
##					print 'Line:', line,
			line = line[:i] + subst + line[i+n:]
			n = len(subst)
		i = i + n
	return line

Docomments = 0
def setdocomments():
	global Docomments
	Docomments = 1

Reverse = 0
def setreverse():
	global Reverse
	Reverse = (not Reverse)

Dict = {}
NotInComment = {}
def addsubst(substfile):
	try:
		fp = open(substfile, 'r')
	except IOError, msg:
		err(substfile + ': cannot read substfile: ' + str(msg) + '\n')
		sys.exit(1)
	lineno = 0
	while 1:
		line = fp.readline()
		if not line: break
		lineno = lineno + 1
		try:
			i = string.index(line, '#')
		except string.index_error:
			i = -1		# Happens to delete trailing \n
		words = string.split(line[:i])
		if not words: continue
		if len(words) <> 2:
			err(substfile + ':' + `lineno` +
				  ': warning: bad line: ' + line)
			continue
		if Reverse:
			[value, key] = words
		else:
			[key, value] = words
		if value[0] == '*':
			value = value[1:]
		if key[0] == '*':
			key = key[1:]
			NotInComment[key] = value
		if Dict.has_key(key):
			err(substfile + ':' + `lineno` +
				  ': warning: overriding: ' +
				  key + ' ' + value + '\n')
			err(substfile + ':' + `lineno` +
				  ': warning: previous: ' + Dict[key] + '\n')
		Dict[key] = value
	fp.close()

main()