Changes submitted by Peter Funk (some fixes/additions by B.Warsaw) to
make pygettext more compatible with GNU xgettext, specifically: Added -E/--escape for allowing pass-thru of iso8859-1 characters above 7 bits. Added -o/--output option for renaming the output file from messages.pot (there's overlap with -d/--default-domain, but GNU xgettext has them both). Added -p/--output-dir for specifying the output directory for messages.pot. Added -V/--version for printing the version number. Added -w/--width for specifying the output page width (this is because now pygettext, like GNU xgettext will put several locations on the same line to cut down on vertical space). Added -x/--exclude-file for specifying a list of strings that are not to be extracted from the input files. Bumped version number to 1.0 Try to import fintl and use fintl.gettext as _ if available. Fall back is to use identity definition of _(). Moved the escape creation to a function make_escapes() so that its behavior can be controlled by the -E option. __openseen(): Support the -x option. write(): Support -w option and vertical space preserving feature. main(): Support new options.
This commit is contained in:
parent
abc52169b7
commit
c8f0892d12
|
@ -1,5 +1,8 @@
|
||||||
#! /usr/bin/env python
|
#! /usr/bin/env python
|
||||||
# Originally written by Barry Warsaw <bwarsaw@python.org>
|
# Originally written by Barry Warsaw <bwarsaw@python.org>
|
||||||
|
#
|
||||||
|
# minimally patched to make it even more xgettext compatible
|
||||||
|
# by Peter Funk <pf@artcom-gmbh.de>
|
||||||
|
|
||||||
"""pygettext -- Python equivalent of xgettext(1)
|
"""pygettext -- Python equivalent of xgettext(1)
|
||||||
|
|
||||||
|
@ -35,7 +38,8 @@ below for how to augment this.
|
||||||
[2] http://www.gnu.org/software/gettext/gettext.html
|
[2] http://www.gnu.org/software/gettext/gettext.html
|
||||||
|
|
||||||
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
|
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
|
||||||
where ever possible.
|
where ever possible. However some options are still missing or are not fully
|
||||||
|
implemented.
|
||||||
|
|
||||||
Usage: pygettext [options] filename ...
|
Usage: pygettext [options] filename ...
|
||||||
|
|
||||||
|
@ -45,9 +49,17 @@ Options:
|
||||||
--extract-all
|
--extract-all
|
||||||
Extract all strings
|
Extract all strings
|
||||||
|
|
||||||
-d default-domain
|
-d name
|
||||||
--default-domain=default-domain
|
--default-domain=name
|
||||||
Rename the default output file from messages.pot to default-domain.pot
|
Rename the default output file from messages.pot to name.pot
|
||||||
|
|
||||||
|
-E
|
||||||
|
--escape
|
||||||
|
replace non-ASCII characters with octal escape sequences.
|
||||||
|
|
||||||
|
-h
|
||||||
|
--help
|
||||||
|
print this help message and exit
|
||||||
|
|
||||||
-k [word]
|
-k [word]
|
||||||
--keyword[=word]
|
--keyword[=word]
|
||||||
|
@ -73,13 +85,31 @@ Options:
|
||||||
If style is omitted, Gnu is used. The style name is case
|
If style is omitted, Gnu is used. The style name is case
|
||||||
insensitive. By default, locations are included.
|
insensitive. By default, locations are included.
|
||||||
|
|
||||||
|
-o filename
|
||||||
|
--output=filename
|
||||||
|
Rename the default output file from messages.pot to filename.
|
||||||
|
|
||||||
|
-p dir
|
||||||
|
--output-dir=dir
|
||||||
|
Output files will be placed in directory dir.
|
||||||
|
|
||||||
-v
|
-v
|
||||||
--verbose
|
--verbose
|
||||||
Print the names of the files being processed.
|
Print the names of the files being processed.
|
||||||
|
|
||||||
--help
|
-V
|
||||||
-h
|
--version
|
||||||
print this help message and exit
|
Print the version of pygettext and exit.
|
||||||
|
|
||||||
|
-w columns
|
||||||
|
--width=columns
|
||||||
|
Set width of output to columns.
|
||||||
|
|
||||||
|
-x filename
|
||||||
|
--exclude-file=filename
|
||||||
|
Specify a file that contains a list of strings that are not be
|
||||||
|
extracted from the input files. Each string to be excluded must
|
||||||
|
appear on a line by itself in the file.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -90,12 +120,16 @@ import time
|
||||||
import getopt
|
import getopt
|
||||||
import tokenize
|
import tokenize
|
||||||
|
|
||||||
__version__ = '0.2'
|
__version__ = '1.0'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# for selftesting
|
# for selftesting
|
||||||
def _(s): return s
|
try:
|
||||||
|
import fintl
|
||||||
|
_ = fintl.gettext
|
||||||
|
except ImportError:
|
||||||
|
def _(s): return s
|
||||||
|
|
||||||
|
|
||||||
# The normal pot-file header. msgmerge and EMACS' po-mode work better if
|
# The normal pot-file header. msgmerge and EMACS' po-mode work better if
|
||||||
|
@ -125,21 +159,31 @@ def usage(code, msg=''):
|
||||||
print msg
|
print msg
|
||||||
sys.exit(code)
|
sys.exit(code)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
escapes = []
|
escapes = []
|
||||||
for i in range(256):
|
|
||||||
if i < 32 or i > 127:
|
|
||||||
escapes.append("\\%03o" % i)
|
|
||||||
else:
|
|
||||||
escapes.append(chr(i))
|
|
||||||
|
|
||||||
escapes[ord('\\')] = '\\\\'
|
def make_escapes(pass_iso8859):
|
||||||
escapes[ord('\t')] = '\\t'
|
global escapes
|
||||||
escapes[ord('\r')] = '\\r'
|
for i in range(256):
|
||||||
escapes[ord('\n')] = '\\n'
|
if pass_iso8859:
|
||||||
escapes[ord('\"')] = '\\"'
|
# Allow iso-8859 characters to pass through so that e.g. 'msgid
|
||||||
|
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise
|
||||||
|
# we escape any character outside the 32..126 range.
|
||||||
|
i = i % 128
|
||||||
|
if 32 <= i <= 126:
|
||||||
|
escapes.append(chr(i))
|
||||||
|
else:
|
||||||
|
escapes.append("\\%03o" % i)
|
||||||
|
escapes[ord('\\')] = '\\\\'
|
||||||
|
escapes[ord('\t')] = '\\t'
|
||||||
|
escapes[ord('\r')] = '\\r'
|
||||||
|
escapes[ord('\n')] = '\\n'
|
||||||
|
escapes[ord('\"')] = '\\"'
|
||||||
|
|
||||||
|
|
||||||
def escape(s):
|
def escape(s):
|
||||||
|
global escapes
|
||||||
s = list(s)
|
s = list(s)
|
||||||
for i in range(len(s)):
|
for i in range(len(s)):
|
||||||
s[i] = escapes[ord(s[i])]
|
s[i] = escapes[ord(s[i])]
|
||||||
|
@ -200,12 +244,13 @@ class TokenEater:
|
||||||
# were no strings inside _(), then just ignore this entry.
|
# were no strings inside _(), then just ignore this entry.
|
||||||
if self.__data:
|
if self.__data:
|
||||||
msg = string.join(self.__data, '')
|
msg = string.join(self.__data, '')
|
||||||
entry = (self.__curfile, self.__lineno)
|
if not msg in self.__options.toexclude:
|
||||||
linenos = self.__messages.get(msg)
|
entry = (self.__curfile, self.__lineno)
|
||||||
if linenos is None:
|
linenos = self.__messages.get(msg)
|
||||||
self.__messages[msg] = [entry]
|
if linenos is None:
|
||||||
else:
|
self.__messages[msg] = [entry]
|
||||||
linenos.append(entry)
|
else:
|
||||||
|
linenos.append(entry)
|
||||||
self.__state = self.__waiting
|
self.__state = self.__waiting
|
||||||
elif ttype == tokenize.STRING:
|
elif ttype == tokenize.STRING:
|
||||||
self.__data.append(safe_eval(tstring))
|
self.__data.append(safe_eval(tstring))
|
||||||
|
@ -222,20 +267,30 @@ class TokenEater:
|
||||||
sys.stdout = fp
|
sys.stdout = fp
|
||||||
# The time stamp in the header doesn't have the same format
|
# The time stamp in the header doesn't have the same format
|
||||||
# as that generated by xgettext...
|
# as that generated by xgettext...
|
||||||
print pot_header % {'time': timestamp, 'version':__version__}
|
print pot_header % {'time': timestamp, 'version': __version__}
|
||||||
for k, v in self.__messages.items():
|
for k, v in self.__messages.items():
|
||||||
for filename, lineno in v:
|
# location comments are different b/w Solaris and GNU:
|
||||||
# location comments are different b/w Solaris and GNU
|
if options.location == options.SOLARIS:
|
||||||
d = {'filename': filename,
|
for filename, lineno in v:
|
||||||
'lineno': lineno}
|
d = {'filename': filename, 'lineno': lineno}
|
||||||
if options.location == options.SOLARIS:
|
|
||||||
print _('# File: %(filename)s, line: %(lineno)d') % d
|
print _('# File: %(filename)s, line: %(lineno)d') % d
|
||||||
elif options.location == options.GNU:
|
elif options.location == options.GNU:
|
||||||
print _('#: %(filename)s:%(lineno)d') % d
|
# fit as many locations on one line, as long as the
|
||||||
|
# resulting line length doesn't exceeds 'options.width'
|
||||||
|
locline = '#:'
|
||||||
|
for filename, lineno in v:
|
||||||
|
d = {'filename': filename, 'lineno': lineno}
|
||||||
|
s = _(' %(filename)s:%(lineno)d') % d
|
||||||
|
if len(locline) + len(s) <= options.width:
|
||||||
|
locline = locline + s
|
||||||
|
else:
|
||||||
|
print locline
|
||||||
|
locline = "#:" + s
|
||||||
|
if len(locline) > 2:
|
||||||
|
print locline
|
||||||
# TBD: sorting, normalizing
|
# TBD: sorting, normalizing
|
||||||
print 'msgid', normalize(k)
|
print 'msgid', normalize(k)
|
||||||
print 'msgstr ""'
|
print 'msgstr ""\n'
|
||||||
print
|
|
||||||
finally:
|
finally:
|
||||||
sys.stdout = sys.__stdout__
|
sys.stdout = sys.__stdout__
|
||||||
|
|
||||||
|
@ -245,9 +300,11 @@ def main():
|
||||||
try:
|
try:
|
||||||
opts, args = getopt.getopt(
|
opts, args = getopt.getopt(
|
||||||
sys.argv[1:],
|
sys.argv[1:],
|
||||||
'k:d:n:hv',
|
'ad:Ehk:n:o:p:Vvw:x:',
|
||||||
['keyword', 'default-domain', 'help',
|
['extract-all', 'default-domain', 'escape', 'help', 'keyword',
|
||||||
'add-location=', 'no-location', 'verbose'])
|
'add-location', 'no-location', 'output=', 'output-dir=',
|
||||||
|
'verbose', 'version', 'width=', 'exclude-file=',
|
||||||
|
])
|
||||||
except getopt.error, msg:
|
except getopt.error, msg:
|
||||||
usage(1, msg)
|
usage(1, msg)
|
||||||
|
|
||||||
|
@ -257,10 +314,15 @@ def main():
|
||||||
GNU = 1
|
GNU = 1
|
||||||
SOLARIS = 2
|
SOLARIS = 2
|
||||||
# defaults
|
# defaults
|
||||||
|
extractall = 0 # FIXME: currently this option has no effect at all.
|
||||||
|
escape = 0
|
||||||
keywords = []
|
keywords = []
|
||||||
|
outpath = ''
|
||||||
outfile = 'messages.pot'
|
outfile = 'messages.pot'
|
||||||
location = GNU
|
location = GNU
|
||||||
verbose = 0
|
verbose = 0
|
||||||
|
width = 78
|
||||||
|
excludefilename = ''
|
||||||
|
|
||||||
options = Options()
|
options = Options()
|
||||||
locations = {'gnu' : options.GNU,
|
locations = {'gnu' : options.GNU,
|
||||||
|
@ -271,12 +333,16 @@ def main():
|
||||||
for opt, arg in opts:
|
for opt, arg in opts:
|
||||||
if opt in ('-h', '--help'):
|
if opt in ('-h', '--help'):
|
||||||
usage(0)
|
usage(0)
|
||||||
|
elif opt in ('-a', '--extract-all'):
|
||||||
|
options.extractall = 1
|
||||||
|
elif opt in ('-d', '--default-domain'):
|
||||||
|
options.outfile = arg + '.pot'
|
||||||
|
elif opt in ('-E', '--escape'):
|
||||||
|
options.escape = 1
|
||||||
elif opt in ('-k', '--keyword'):
|
elif opt in ('-k', '--keyword'):
|
||||||
if arg is None:
|
if arg is None:
|
||||||
default_keywords = []
|
default_keywords = []
|
||||||
options.keywords.append(arg)
|
options.keywords.append(arg)
|
||||||
elif opt in ('-d', '--default-domain'):
|
|
||||||
options.outfile = arg + '.pot'
|
|
||||||
elif opt in ('-n', '--add-location'):
|
elif opt in ('-n', '--add-location'):
|
||||||
if arg is None:
|
if arg is None:
|
||||||
arg = 'gnu'
|
arg = 'gnu'
|
||||||
|
@ -287,12 +353,44 @@ def main():
|
||||||
usage(1, _('Invalid value for --add-location: %(arg)s') % d)
|
usage(1, _('Invalid value for --add-location: %(arg)s') % d)
|
||||||
elif opt in ('--no-location',):
|
elif opt in ('--no-location',):
|
||||||
options.location = 0
|
options.location = 0
|
||||||
|
elif opt in ('-o', '--output'):
|
||||||
|
options.outfile = arg
|
||||||
|
elif opt in ('-p', '--output-dir'):
|
||||||
|
options.outpath = arg
|
||||||
elif opt in ('-v', '--verbose'):
|
elif opt in ('-v', '--verbose'):
|
||||||
options.verbose = 1
|
options.verbose = 1
|
||||||
|
elif opt in ('-V', '--version'):
|
||||||
|
print _('pygettext.py (xgettext for Python) %s') % __version__
|
||||||
|
sys.exit(0)
|
||||||
|
elif opt in ('-w', '--width'):
|
||||||
|
try:
|
||||||
|
options.width = int(arg)
|
||||||
|
except ValueError:
|
||||||
|
d = {'arg':arg}
|
||||||
|
usage(1, _('Invalid value for --width: %(arg)s, must be int')
|
||||||
|
% d)
|
||||||
|
elif opt in ('-x', '--exclude-file'):
|
||||||
|
options.excludefilename = arg
|
||||||
|
|
||||||
|
# calculate escapes
|
||||||
|
make_escapes(options.escapes)
|
||||||
|
|
||||||
# calculate all keywords
|
# calculate all keywords
|
||||||
options.keywords.extend(default_keywords)
|
options.keywords.extend(default_keywords)
|
||||||
|
|
||||||
|
# initialize list of strings to exclude
|
||||||
|
if options.excludefilename:
|
||||||
|
try:
|
||||||
|
fp = open(options.excludefilename)
|
||||||
|
options.toexclude = fp.readlines()
|
||||||
|
fp.close()
|
||||||
|
except IOError:
|
||||||
|
sys.stderr.write(_("Can't read --exclude-file: %s") %
|
||||||
|
options.excludefilename)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
options.toexclude = []
|
||||||
|
|
||||||
# slurp through all the files
|
# slurp through all the files
|
||||||
eater = TokenEater(options)
|
eater = TokenEater(options)
|
||||||
for filename in args:
|
for filename in args:
|
||||||
|
@ -303,6 +401,8 @@ def main():
|
||||||
tokenize.tokenize(fp.readline, eater)
|
tokenize.tokenize(fp.readline, eater)
|
||||||
fp.close()
|
fp.close()
|
||||||
|
|
||||||
|
if options.outpath:
|
||||||
|
options.outfile = os.path.join(options.outpath, options.outfile)
|
||||||
fp = open(options.outfile, 'w')
|
fp = open(options.outfile, 'w')
|
||||||
eater.write(fp)
|
eater.write(fp)
|
||||||
fp.close()
|
fp.close()
|
||||||
|
|
Loading…
Reference in New Issue