Changes submitted by Peter Funk (some fixes/additions by B.Warsaw) to

make pygettext more compatible with GNU xgettext, specifically:

Added -E/--escape for allowing pass-thru of iso8859-1 characters above
7 bits.

Added -o/--output option for renaming the output file from
messages.pot (there's overlap with -d/--default-domain, but GNU
xgettext has them both).

Added -p/--output-dir for specifying the output directory for
messages.pot.

Added -V/--version for printing the version number.

Added -w/--width for specifying the output page width (this is because
now pygettext, like GNU xgettext will put several locations on the
same line to cut down on vertical space).

Added -x/--exclude-file for specifying a list of strings that are not
to be extracted from the input files.

Bumped version number to 1.0

Try to import fintl and use fintl.gettext as _ if available.  Fall
back is to use identity definition of _().

Moved the escape creation to a function make_escapes() so that its
behavior can be controlled by the -E option.

__openseen(): Support the -x option.

write(): Support -w option and vertical space preserving feature.

main(): Support new options.
This commit is contained in:
Barry Warsaw 2000-02-26 20:56:47 +00:00
parent abc52169b7
commit c8f0892d12
1 changed files with 140 additions and 40 deletions

View File

@ -1,5 +1,8 @@
#! /usr/bin/env python #! /usr/bin/env python
# Originally written by Barry Warsaw <bwarsaw@python.org> # Originally written by Barry Warsaw <bwarsaw@python.org>
#
# minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
"""pygettext -- Python equivalent of xgettext(1) """pygettext -- Python equivalent of xgettext(1)
@ -35,7 +38,8 @@ below for how to augment this.
[2] http://www.gnu.org/software/gettext/gettext.html [2] http://www.gnu.org/software/gettext/gettext.html
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
where ever possible. where ever possible. However some options are still missing or are not fully
implemented.
Usage: pygettext [options] filename ... Usage: pygettext [options] filename ...
@ -45,9 +49,17 @@ Options:
--extract-all --extract-all
Extract all strings Extract all strings
-d default-domain -d name
--default-domain=default-domain --default-domain=name
Rename the default output file from messages.pot to default-domain.pot Rename the default output file from messages.pot to name.pot
-E
--escape
replace non-ASCII characters with octal escape sequences.
-h
--help
print this help message and exit
-k [word] -k [word]
--keyword[=word] --keyword[=word]
@ -73,13 +85,31 @@ Options:
If style is omitted, Gnu is used. The style name is case If style is omitted, Gnu is used. The style name is case
insensitive. By default, locations are included. insensitive. By default, locations are included.
-o filename
--output=filename
Rename the default output file from messages.pot to filename.
-p dir
--output-dir=dir
Output files will be placed in directory dir.
-v -v
--verbose --verbose
Print the names of the files being processed. Print the names of the files being processed.
--help -V
-h --version
print this help message and exit Print the version of pygettext and exit.
-w columns
--width=columns
Set width of output to columns.
-x filename
--exclude-file=filename
Specify a file that contains a list of strings that are not be
extracted from the input files. Each string to be excluded must
appear on a line by itself in the file.
""" """
@ -90,12 +120,16 @@ import time
import getopt import getopt
import tokenize import tokenize
__version__ = '0.2' __version__ = '1.0'
# for selftesting # for selftesting
def _(s): return s try:
import fintl
_ = fintl.gettext
except ImportError:
def _(s): return s
# The normal pot-file header. msgmerge and EMACS' po-mode work better if # The normal pot-file header. msgmerge and EMACS' po-mode work better if
@ -125,21 +159,31 @@ def usage(code, msg=''):
print msg print msg
sys.exit(code) sys.exit(code)
escapes = [] escapes = []
for i in range(256):
if i < 32 or i > 127:
escapes.append("\\%03o" % i)
else:
escapes.append(chr(i))
escapes[ord('\\')] = '\\\\' def make_escapes(pass_iso8859):
escapes[ord('\t')] = '\\t' global escapes
escapes[ord('\r')] = '\\r' for i in range(256):
escapes[ord('\n')] = '\\n' if pass_iso8859:
escapes[ord('\"')] = '\\"' # Allow iso-8859 characters to pass through so that e.g. 'msgid
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise
# we escape any character outside the 32..126 range.
i = i % 128
if 32 <= i <= 126:
escapes.append(chr(i))
else:
escapes.append("\\%03o" % i)
escapes[ord('\\')] = '\\\\'
escapes[ord('\t')] = '\\t'
escapes[ord('\r')] = '\\r'
escapes[ord('\n')] = '\\n'
escapes[ord('\"')] = '\\"'
def escape(s): def escape(s):
global escapes
s = list(s) s = list(s)
for i in range(len(s)): for i in range(len(s)):
s[i] = escapes[ord(s[i])] s[i] = escapes[ord(s[i])]
@ -200,12 +244,13 @@ class TokenEater:
# were no strings inside _(), then just ignore this entry. # were no strings inside _(), then just ignore this entry.
if self.__data: if self.__data:
msg = string.join(self.__data, '') msg = string.join(self.__data, '')
entry = (self.__curfile, self.__lineno) if not msg in self.__options.toexclude:
linenos = self.__messages.get(msg) entry = (self.__curfile, self.__lineno)
if linenos is None: linenos = self.__messages.get(msg)
self.__messages[msg] = [entry] if linenos is None:
else: self.__messages[msg] = [entry]
linenos.append(entry) else:
linenos.append(entry)
self.__state = self.__waiting self.__state = self.__waiting
elif ttype == tokenize.STRING: elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring)) self.__data.append(safe_eval(tstring))
@ -222,20 +267,30 @@ class TokenEater:
sys.stdout = fp sys.stdout = fp
# The time stamp in the header doesn't have the same format # The time stamp in the header doesn't have the same format
# as that generated by xgettext... # as that generated by xgettext...
print pot_header % {'time': timestamp, 'version':__version__} print pot_header % {'time': timestamp, 'version': __version__}
for k, v in self.__messages.items(): for k, v in self.__messages.items():
for filename, lineno in v: # location comments are different b/w Solaris and GNU:
# location comments are different b/w Solaris and GNU if options.location == options.SOLARIS:
d = {'filename': filename, for filename, lineno in v:
'lineno': lineno} d = {'filename': filename, 'lineno': lineno}
if options.location == options.SOLARIS:
print _('# File: %(filename)s, line: %(lineno)d') % d print _('# File: %(filename)s, line: %(lineno)d') % d
elif options.location == options.GNU: elif options.location == options.GNU:
print _('#: %(filename)s:%(lineno)d') % d # fit as many locations on one line, as long as the
# resulting line length doesn't exceeds 'options.width'
locline = '#:'
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
s = _(' %(filename)s:%(lineno)d') % d
if len(locline) + len(s) <= options.width:
locline = locline + s
else:
print locline
locline = "#:" + s
if len(locline) > 2:
print locline
# TBD: sorting, normalizing # TBD: sorting, normalizing
print 'msgid', normalize(k) print 'msgid', normalize(k)
print 'msgstr ""' print 'msgstr ""\n'
print
finally: finally:
sys.stdout = sys.__stdout__ sys.stdout = sys.__stdout__
@ -245,9 +300,11 @@ def main():
try: try:
opts, args = getopt.getopt( opts, args = getopt.getopt(
sys.argv[1:], sys.argv[1:],
'k:d:n:hv', 'ad:Ehk:n:o:p:Vvw:x:',
['keyword', 'default-domain', 'help', ['extract-all', 'default-domain', 'escape', 'help', 'keyword',
'add-location=', 'no-location', 'verbose']) 'add-location', 'no-location', 'output=', 'output-dir=',
'verbose', 'version', 'width=', 'exclude-file=',
])
except getopt.error, msg: except getopt.error, msg:
usage(1, msg) usage(1, msg)
@ -257,10 +314,15 @@ def main():
GNU = 1 GNU = 1
SOLARIS = 2 SOLARIS = 2
# defaults # defaults
extractall = 0 # FIXME: currently this option has no effect at all.
escape = 0
keywords = [] keywords = []
outpath = ''
outfile = 'messages.pot' outfile = 'messages.pot'
location = GNU location = GNU
verbose = 0 verbose = 0
width = 78
excludefilename = ''
options = Options() options = Options()
locations = {'gnu' : options.GNU, locations = {'gnu' : options.GNU,
@ -271,12 +333,16 @@ def main():
for opt, arg in opts: for opt, arg in opts:
if opt in ('-h', '--help'): if opt in ('-h', '--help'):
usage(0) usage(0)
elif opt in ('-a', '--extract-all'):
options.extractall = 1
elif opt in ('-d', '--default-domain'):
options.outfile = arg + '.pot'
elif opt in ('-E', '--escape'):
options.escape = 1
elif opt in ('-k', '--keyword'): elif opt in ('-k', '--keyword'):
if arg is None: if arg is None:
default_keywords = [] default_keywords = []
options.keywords.append(arg) options.keywords.append(arg)
elif opt in ('-d', '--default-domain'):
options.outfile = arg + '.pot'
elif opt in ('-n', '--add-location'): elif opt in ('-n', '--add-location'):
if arg is None: if arg is None:
arg = 'gnu' arg = 'gnu'
@ -287,12 +353,44 @@ def main():
usage(1, _('Invalid value for --add-location: %(arg)s') % d) usage(1, _('Invalid value for --add-location: %(arg)s') % d)
elif opt in ('--no-location',): elif opt in ('--no-location',):
options.location = 0 options.location = 0
elif opt in ('-o', '--output'):
options.outfile = arg
elif opt in ('-p', '--output-dir'):
options.outpath = arg
elif opt in ('-v', '--verbose'): elif opt in ('-v', '--verbose'):
options.verbose = 1 options.verbose = 1
elif opt in ('-V', '--version'):
print _('pygettext.py (xgettext for Python) %s') % __version__
sys.exit(0)
elif opt in ('-w', '--width'):
try:
options.width = int(arg)
except ValueError:
d = {'arg':arg}
usage(1, _('Invalid value for --width: %(arg)s, must be int')
% d)
elif opt in ('-x', '--exclude-file'):
options.excludefilename = arg
# calculate escapes
make_escapes(options.escapes)
# calculate all keywords # calculate all keywords
options.keywords.extend(default_keywords) options.keywords.extend(default_keywords)
# initialize list of strings to exclude
if options.excludefilename:
try:
fp = open(options.excludefilename)
options.toexclude = fp.readlines()
fp.close()
except IOError:
sys.stderr.write(_("Can't read --exclude-file: %s") %
options.excludefilename)
sys.exit(1)
else:
options.toexclude = []
# slurp through all the files # slurp through all the files
eater = TokenEater(options) eater = TokenEater(options)
for filename in args: for filename in args:
@ -303,6 +401,8 @@ def main():
tokenize.tokenize(fp.readline, eater) tokenize.tokenize(fp.readline, eater)
fp.close() fp.close()
if options.outpath:
options.outfile = os.path.join(options.outpath, options.outfile)
fp = open(options.outfile, 'w') fp = open(options.outfile, 'w')
eater.write(fp) eater.write(fp)
fp.close() fp.close()