From c8f0892d1236df81af1811cf182692f28c85f916 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Sat, 26 Feb 2000 20:56:47 +0000 Subject: [PATCH] Changes submitted by Peter Funk (some fixes/additions by B.Warsaw) to make pygettext more compatible with GNU xgettext, specifically: Added -E/--escape for allowing pass-thru of iso8859-1 characters above 7 bits. Added -o/--output option for renaming the output file from messages.pot (there's overlap with -d/--default-domain, but GNU xgettext has them both). Added -p/--output-dir for specifying the output directory for messages.pot. Added -V/--version for printing the version number. Added -w/--width for specifying the output page width (this is because now pygettext, like GNU xgettext will put several locations on the same line to cut down on vertical space). Added -x/--exclude-file for specifying a list of strings that are not to be extracted from the input files. Bumped version number to 1.0 Try to import fintl and use fintl.gettext as _ if available. Fall back is to use identity definition of _(). Moved the escape creation to a function make_escapes() so that its behavior can be controlled by the -E option. __openseen(): Support the -x option. write(): Support -w option and vertical space preserving feature. main(): Support new options. --- Tools/i18n/pygettext.py | 180 +++++++++++++++++++++++++++++++--------- 1 file changed, 140 insertions(+), 40 deletions(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index fcd6b9512d3..4ff4962d62b 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -1,5 +1,8 @@ #! /usr/bin/env python # Originally written by Barry Warsaw +# +# minimally patched to make it even more xgettext compatible +# by Peter Funk """pygettext -- Python equivalent of xgettext(1) @@ -35,7 +38,8 @@ below for how to augment this. [2] http://www.gnu.org/software/gettext/gettext.html NOTE: pygettext attempts to be option and feature compatible with GNU xgettext -where ever possible. +where ever possible. However some options are still missing or are not fully +implemented. Usage: pygettext [options] filename ... @@ -45,9 +49,17 @@ Options: --extract-all Extract all strings - -d default-domain - --default-domain=default-domain - Rename the default output file from messages.pot to default-domain.pot + -d name + --default-domain=name + Rename the default output file from messages.pot to name.pot + + -E + --escape + replace non-ASCII characters with octal escape sequences. + + -h + --help + print this help message and exit -k [word] --keyword[=word] @@ -73,13 +85,31 @@ Options: If style is omitted, Gnu is used. The style name is case insensitive. By default, locations are included. + -o filename + --output=filename + Rename the default output file from messages.pot to filename. + + -p dir + --output-dir=dir + Output files will be placed in directory dir. + -v --verbose Print the names of the files being processed. - --help - -h - print this help message and exit + -V + --version + Print the version of pygettext and exit. + + -w columns + --width=columns + Set width of output to columns. + + -x filename + --exclude-file=filename + Specify a file that contains a list of strings that are not be + extracted from the input files. Each string to be excluded must + appear on a line by itself in the file. """ @@ -90,12 +120,16 @@ import time import getopt import tokenize -__version__ = '0.2' +__version__ = '1.0' # for selftesting -def _(s): return s +try: + import fintl + _ = fintl.gettext +except ImportError: + def _(s): return s # The normal pot-file header. msgmerge and EMACS' po-mode work better if @@ -125,21 +159,31 @@ def usage(code, msg=''): print msg sys.exit(code) + escapes = [] -for i in range(256): - if i < 32 or i > 127: - escapes.append("\\%03o" % i) - else: - escapes.append(chr(i)) -escapes[ord('\\')] = '\\\\' -escapes[ord('\t')] = '\\t' -escapes[ord('\r')] = '\\r' -escapes[ord('\n')] = '\\n' -escapes[ord('\"')] = '\\"' +def make_escapes(pass_iso8859): + global escapes + for i in range(256): + if pass_iso8859: + # Allow iso-8859 characters to pass through so that e.g. 'msgid + # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise + # we escape any character outside the 32..126 range. + i = i % 128 + if 32 <= i <= 126: + escapes.append(chr(i)) + else: + escapes.append("\\%03o" % i) + escapes[ord('\\')] = '\\\\' + escapes[ord('\t')] = '\\t' + escapes[ord('\r')] = '\\r' + escapes[ord('\n')] = '\\n' + escapes[ord('\"')] = '\\"' + def escape(s): + global escapes s = list(s) for i in range(len(s)): s[i] = escapes[ord(s[i])] @@ -200,12 +244,13 @@ class TokenEater: # were no strings inside _(), then just ignore this entry. if self.__data: msg = string.join(self.__data, '') - entry = (self.__curfile, self.__lineno) - linenos = self.__messages.get(msg) - if linenos is None: - self.__messages[msg] = [entry] - else: - linenos.append(entry) + if not msg in self.__options.toexclude: + entry = (self.__curfile, self.__lineno) + linenos = self.__messages.get(msg) + if linenos is None: + self.__messages[msg] = [entry] + else: + linenos.append(entry) self.__state = self.__waiting elif ttype == tokenize.STRING: self.__data.append(safe_eval(tstring)) @@ -222,20 +267,30 @@ class TokenEater: sys.stdout = fp # The time stamp in the header doesn't have the same format # as that generated by xgettext... - print pot_header % {'time': timestamp, 'version':__version__} + print pot_header % {'time': timestamp, 'version': __version__} for k, v in self.__messages.items(): - for filename, lineno in v: - # location comments are different b/w Solaris and GNU - d = {'filename': filename, - 'lineno': lineno} - if options.location == options.SOLARIS: + # location comments are different b/w Solaris and GNU: + if options.location == options.SOLARIS: + for filename, lineno in v: + d = {'filename': filename, 'lineno': lineno} print _('# File: %(filename)s, line: %(lineno)d') % d - elif options.location == options.GNU: - print _('#: %(filename)s:%(lineno)d') % d + elif options.location == options.GNU: + # fit as many locations on one line, as long as the + # resulting line length doesn't exceeds 'options.width' + locline = '#:' + for filename, lineno in v: + d = {'filename': filename, 'lineno': lineno} + s = _(' %(filename)s:%(lineno)d') % d + if len(locline) + len(s) <= options.width: + locline = locline + s + else: + print locline + locline = "#:" + s + if len(locline) > 2: + print locline # TBD: sorting, normalizing print 'msgid', normalize(k) - print 'msgstr ""' - print + print 'msgstr ""\n' finally: sys.stdout = sys.__stdout__ @@ -245,9 +300,11 @@ def main(): try: opts, args = getopt.getopt( sys.argv[1:], - 'k:d:n:hv', - ['keyword', 'default-domain', 'help', - 'add-location=', 'no-location', 'verbose']) + 'ad:Ehk:n:o:p:Vvw:x:', + ['extract-all', 'default-domain', 'escape', 'help', 'keyword', + 'add-location', 'no-location', 'output=', 'output-dir=', + 'verbose', 'version', 'width=', 'exclude-file=', + ]) except getopt.error, msg: usage(1, msg) @@ -257,10 +314,15 @@ def main(): GNU = 1 SOLARIS = 2 # defaults + extractall = 0 # FIXME: currently this option has no effect at all. + escape = 0 keywords = [] + outpath = '' outfile = 'messages.pot' location = GNU verbose = 0 + width = 78 + excludefilename = '' options = Options() locations = {'gnu' : options.GNU, @@ -271,12 +333,16 @@ def main(): for opt, arg in opts: if opt in ('-h', '--help'): usage(0) + elif opt in ('-a', '--extract-all'): + options.extractall = 1 + elif opt in ('-d', '--default-domain'): + options.outfile = arg + '.pot' + elif opt in ('-E', '--escape'): + options.escape = 1 elif opt in ('-k', '--keyword'): if arg is None: default_keywords = [] options.keywords.append(arg) - elif opt in ('-d', '--default-domain'): - options.outfile = arg + '.pot' elif opt in ('-n', '--add-location'): if arg is None: arg = 'gnu' @@ -287,12 +353,44 @@ def main(): usage(1, _('Invalid value for --add-location: %(arg)s') % d) elif opt in ('--no-location',): options.location = 0 + elif opt in ('-o', '--output'): + options.outfile = arg + elif opt in ('-p', '--output-dir'): + options.outpath = arg elif opt in ('-v', '--verbose'): options.verbose = 1 + elif opt in ('-V', '--version'): + print _('pygettext.py (xgettext for Python) %s') % __version__ + sys.exit(0) + elif opt in ('-w', '--width'): + try: + options.width = int(arg) + except ValueError: + d = {'arg':arg} + usage(1, _('Invalid value for --width: %(arg)s, must be int') + % d) + elif opt in ('-x', '--exclude-file'): + options.excludefilename = arg + + # calculate escapes + make_escapes(options.escapes) # calculate all keywords options.keywords.extend(default_keywords) + # initialize list of strings to exclude + if options.excludefilename: + try: + fp = open(options.excludefilename) + options.toexclude = fp.readlines() + fp.close() + except IOError: + sys.stderr.write(_("Can't read --exclude-file: %s") % + options.excludefilename) + sys.exit(1) + else: + options.toexclude = [] + # slurp through all the files eater = TokenEater(options) for filename in args: @@ -303,6 +401,8 @@ def main(): tokenize.tokenize(fp.readline, eater) fp.close() + if options.outpath: + options.outfile = os.path.join(options.outpath, options.outfile) fp = open(options.outfile, 'w') eater.write(fp) fp.close()