Changes submitted by Peter Funk (some fixes/additions by B.Warsaw) to

make pygettext more compatible with GNU xgettext, specifically:

Added -E/--escape for allowing pass-thru of iso8859-1 characters above
7 bits.

Added -o/--output option for renaming the output file from
messages.pot (there's overlap with -d/--default-domain, but GNU
xgettext has them both).

Added -p/--output-dir for specifying the output directory for
messages.pot.

Added -V/--version for printing the version number.

Added -w/--width for specifying the output page width (this is because
now pygettext, like GNU xgettext will put several locations on the
same line to cut down on vertical space).

Added -x/--exclude-file for specifying a list of strings that are not
to be extracted from the input files.

Bumped version number to 1.0

Try to import fintl and use fintl.gettext as _ if available.  Fall
back is to use identity definition of _().

Moved the escape creation to a function make_escapes() so that its
behavior can be controlled by the -E option.

__openseen(): Support the -x option.

write(): Support -w option and vertical space preserving feature.

main(): Support new options.
This commit is contained in:
Barry Warsaw 2000-02-26 20:56:47 +00:00
parent abc52169b7
commit c8f0892d12
1 changed files with 140 additions and 40 deletions

View File

@ -1,5 +1,8 @@
#! /usr/bin/env python
# Originally written by Barry Warsaw <bwarsaw@python.org>
#
# minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
"""pygettext -- Python equivalent of xgettext(1)
@ -35,7 +38,8 @@ below for how to augment this.
[2] http://www.gnu.org/software/gettext/gettext.html
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
where ever possible.
where ever possible. However some options are still missing or are not fully
implemented.
Usage: pygettext [options] filename ...
@ -45,9 +49,17 @@ Options:
--extract-all
Extract all strings
-d default-domain
--default-domain=default-domain
Rename the default output file from messages.pot to default-domain.pot
-d name
--default-domain=name
Rename the default output file from messages.pot to name.pot
-E
--escape
replace non-ASCII characters with octal escape sequences.
-h
--help
print this help message and exit
-k [word]
--keyword[=word]
@ -73,13 +85,31 @@ Options:
If style is omitted, Gnu is used. The style name is case
insensitive. By default, locations are included.
-o filename
--output=filename
Rename the default output file from messages.pot to filename.
-p dir
--output-dir=dir
Output files will be placed in directory dir.
-v
--verbose
Print the names of the files being processed.
--help
-h
print this help message and exit
-V
--version
Print the version of pygettext and exit.
-w columns
--width=columns
Set width of output to columns.
-x filename
--exclude-file=filename
Specify a file that contains a list of strings that are not be
extracted from the input files. Each string to be excluded must
appear on a line by itself in the file.
"""
@ -90,12 +120,16 @@ import time
import getopt
import tokenize
__version__ = '0.2'
__version__ = '1.0'
# for selftesting
def _(s): return s
try:
import fintl
_ = fintl.gettext
except ImportError:
def _(s): return s
# The normal pot-file header. msgmerge and EMACS' po-mode work better if
@ -125,21 +159,31 @@ def usage(code, msg=''):
print msg
sys.exit(code)
escapes = []
for i in range(256):
if i < 32 or i > 127:
escapes.append("\\%03o" % i)
else:
escapes.append(chr(i))
escapes[ord('\\')] = '\\\\'
escapes[ord('\t')] = '\\t'
escapes[ord('\r')] = '\\r'
escapes[ord('\n')] = '\\n'
escapes[ord('\"')] = '\\"'
def make_escapes(pass_iso8859):
global escapes
for i in range(256):
if pass_iso8859:
# Allow iso-8859 characters to pass through so that e.g. 'msgid
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise
# we escape any character outside the 32..126 range.
i = i % 128
if 32 <= i <= 126:
escapes.append(chr(i))
else:
escapes.append("\\%03o" % i)
escapes[ord('\\')] = '\\\\'
escapes[ord('\t')] = '\\t'
escapes[ord('\r')] = '\\r'
escapes[ord('\n')] = '\\n'
escapes[ord('\"')] = '\\"'
def escape(s):
global escapes
s = list(s)
for i in range(len(s)):
s[i] = escapes[ord(s[i])]
@ -200,12 +244,13 @@ class TokenEater:
# were no strings inside _(), then just ignore this entry.
if self.__data:
msg = string.join(self.__data, '')
entry = (self.__curfile, self.__lineno)
linenos = self.__messages.get(msg)
if linenos is None:
self.__messages[msg] = [entry]
else:
linenos.append(entry)
if not msg in self.__options.toexclude:
entry = (self.__curfile, self.__lineno)
linenos = self.__messages.get(msg)
if linenos is None:
self.__messages[msg] = [entry]
else:
linenos.append(entry)
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
@ -222,20 +267,30 @@ class TokenEater:
sys.stdout = fp
# The time stamp in the header doesn't have the same format
# as that generated by xgettext...
print pot_header % {'time': timestamp, 'version':__version__}
print pot_header % {'time': timestamp, 'version': __version__}
for k, v in self.__messages.items():
for filename, lineno in v:
# location comments are different b/w Solaris and GNU
d = {'filename': filename,
'lineno': lineno}
if options.location == options.SOLARIS:
# location comments are different b/w Solaris and GNU:
if options.location == options.SOLARIS:
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
print _('# File: %(filename)s, line: %(lineno)d') % d
elif options.location == options.GNU:
print _('#: %(filename)s:%(lineno)d') % d
elif options.location == options.GNU:
# fit as many locations on one line, as long as the
# resulting line length doesn't exceeds 'options.width'
locline = '#:'
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
s = _(' %(filename)s:%(lineno)d') % d
if len(locline) + len(s) <= options.width:
locline = locline + s
else:
print locline
locline = "#:" + s
if len(locline) > 2:
print locline
# TBD: sorting, normalizing
print 'msgid', normalize(k)
print 'msgstr ""'
print
print 'msgstr ""\n'
finally:
sys.stdout = sys.__stdout__
@ -245,9 +300,11 @@ def main():
try:
opts, args = getopt.getopt(
sys.argv[1:],
'k:d:n:hv',
['keyword', 'default-domain', 'help',
'add-location=', 'no-location', 'verbose'])
'ad:Ehk:n:o:p:Vvw:x:',
['extract-all', 'default-domain', 'escape', 'help', 'keyword',
'add-location', 'no-location', 'output=', 'output-dir=',
'verbose', 'version', 'width=', 'exclude-file=',
])
except getopt.error, msg:
usage(1, msg)
@ -257,10 +314,15 @@ def main():
GNU = 1
SOLARIS = 2
# defaults
extractall = 0 # FIXME: currently this option has no effect at all.
escape = 0
keywords = []
outpath = ''
outfile = 'messages.pot'
location = GNU
verbose = 0
width = 78
excludefilename = ''
options = Options()
locations = {'gnu' : options.GNU,
@ -271,12 +333,16 @@ def main():
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-a', '--extract-all'):
options.extractall = 1
elif opt in ('-d', '--default-domain'):
options.outfile = arg + '.pot'
elif opt in ('-E', '--escape'):
options.escape = 1
elif opt in ('-k', '--keyword'):
if arg is None:
default_keywords = []
options.keywords.append(arg)
elif opt in ('-d', '--default-domain'):
options.outfile = arg + '.pot'
elif opt in ('-n', '--add-location'):
if arg is None:
arg = 'gnu'
@ -287,12 +353,44 @@ def main():
usage(1, _('Invalid value for --add-location: %(arg)s') % d)
elif opt in ('--no-location',):
options.location = 0
elif opt in ('-o', '--output'):
options.outfile = arg
elif opt in ('-p', '--output-dir'):
options.outpath = arg
elif opt in ('-v', '--verbose'):
options.verbose = 1
elif opt in ('-V', '--version'):
print _('pygettext.py (xgettext for Python) %s') % __version__
sys.exit(0)
elif opt in ('-w', '--width'):
try:
options.width = int(arg)
except ValueError:
d = {'arg':arg}
usage(1, _('Invalid value for --width: %(arg)s, must be int')
% d)
elif opt in ('-x', '--exclude-file'):
options.excludefilename = arg
# calculate escapes
make_escapes(options.escapes)
# calculate all keywords
options.keywords.extend(default_keywords)
# initialize list of strings to exclude
if options.excludefilename:
try:
fp = open(options.excludefilename)
options.toexclude = fp.readlines()
fp.close()
except IOError:
sys.stderr.write(_("Can't read --exclude-file: %s") %
options.excludefilename)
sys.exit(1)
else:
options.toexclude = []
# slurp through all the files
eater = TokenEater(options)
for filename in args:
@ -303,6 +401,8 @@ def main():
tokenize.tokenize(fp.readline, eater)
fp.close()
if options.outpath:
options.outfile = os.path.join(options.outpath, options.outfile)
fp = open(options.outfile, 'w')
eater.write(fp)
fp.close()