From c8f0892d1236df81af1811cf182692f28c85f916 Mon Sep 17 00:00:00 2001
From: Barry Warsaw <barry@python.org>
Date: Sat, 26 Feb 2000 20:56:47 +0000
Subject: [PATCH] Changes submitted by Peter Funk (some fixes/additions by
 B.Warsaw) to make pygettext more compatible with GNU xgettext, specifically:

Added -E/--escape for allowing pass-thru of iso8859-1 characters above
7 bits.

Added -o/--output option for renaming the output file from
messages.pot (there's overlap with -d/--default-domain, but GNU
xgettext has them both).

Added -p/--output-dir for specifying the output directory for
messages.pot.

Added -V/--version for printing the version number.

Added -w/--width for specifying the output page width (this is because
now pygettext, like GNU xgettext will put several locations on the
same line to cut down on vertical space).

Added -x/--exclude-file for specifying a list of strings that are not
to be extracted from the input files.

Bumped version number to 1.0

Try to import fintl and use fintl.gettext as _ if available.  Fall
back is to use identity definition of _().

Moved the escape creation to a function make_escapes() so that its
behavior can be controlled by the -E option.

__openseen(): Support the -x option.

write(): Support -w option and vertical space preserving feature.

main(): Support new options.
---
 Tools/i18n/pygettext.py | 180 +++++++++++++++++++++++++++++++---------
 1 file changed, 140 insertions(+), 40 deletions(-)

diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index fcd6b9512d3..4ff4962d62b 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -1,5 +1,8 @@
 #! /usr/bin/env python
 # Originally written by Barry Warsaw <bwarsaw@python.org>
+#
+# minimally patched to make it even more xgettext compatible 
+# by Peter Funk <pf@artcom-gmbh.de>
 
 """pygettext -- Python equivalent of xgettext(1)
 
@@ -35,7 +38,8 @@ below for how to augment this.
  [2] http://www.gnu.org/software/gettext/gettext.html
 
 NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
-where ever possible.
+where ever possible.  However some options are still missing or are not fully
+implemented.
 
 Usage: pygettext [options] filename ...
 
@@ -45,9 +49,17 @@ Options:
     --extract-all
         Extract all strings
 
-    -d default-domain
-    --default-domain=default-domain
-        Rename the default output file from messages.pot to default-domain.pot 
+    -d name
+    --default-domain=name
+        Rename the default output file from messages.pot to name.pot 
+
+    -E
+    --escape
+        replace non-ASCII characters with octal escape sequences.
+
+    -h
+    --help
+        print this help message and exit
 
     -k [word]
     --keyword[=word]
@@ -73,13 +85,31 @@ Options:
         If style is omitted, Gnu is used.  The style name is case
         insensitive.  By default, locations are included.
 
+    -o filename
+    --output=filename
+        Rename the default output file from messages.pot to filename.
+
+    -p dir
+    --output-dir=dir
+        Output files will be placed in directory dir.
+
     -v
     --verbose
         Print the names of the files being processed.
 
-    --help
-    -h
-        print this help message and exit
+    -V
+    --version
+        Print the version of pygettext and exit.
+
+    -w columns
+    --width=columns
+        Set width of output to columns.
+
+    -x filename
+    --exclude-file=filename
+        Specify a file that contains a list of strings that are not be
+        extracted from the input files.  Each string to be excluded must
+        appear on a line by itself in the file.
 
 """
 
@@ -90,12 +120,16 @@ import time
 import getopt
 import tokenize
 
-__version__ = '0.2'
+__version__ = '1.0'
 
 
 
 # for selftesting
-def _(s): return s
+try:
+    import fintl
+    _ = fintl.gettext
+except ImportError:
+    def _(s): return s
 
 
 # The normal pot-file header. msgmerge and EMACS' po-mode work better if
@@ -125,21 +159,31 @@ def usage(code, msg=''):
         print msg
     sys.exit(code)
 
+
 
 escapes = []
-for i in range(256):
-    if i < 32 or i > 127:
-        escapes.append("\\%03o" % i)
-    else:
-        escapes.append(chr(i))
 
-escapes[ord('\\')] = '\\\\'
-escapes[ord('\t')] = '\\t'
-escapes[ord('\r')] = '\\r'
-escapes[ord('\n')] = '\\n'
-escapes[ord('\"')] = '\\"'
+def make_escapes(pass_iso8859):
+    global escapes
+    for i in range(256):
+        if pass_iso8859:
+            # Allow iso-8859 characters to pass through so that e.g. 'msgid
+            # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise
+            # we escape any character outside the 32..126 range.
+            i = i % 128
+        if 32 <= i <= 126:
+            escapes.append(chr(i))
+        else:
+            escapes.append("\\%03o" % i)
+    escapes[ord('\\')] = '\\\\'
+    escapes[ord('\t')] = '\\t'
+    escapes[ord('\r')] = '\\r'
+    escapes[ord('\n')] = '\\n'
+    escapes[ord('\"')] = '\\"'
+
 
 def escape(s):
+    global escapes
     s = list(s)
     for i in range(len(s)):
         s[i] = escapes[ord(s[i])]
@@ -200,12 +244,13 @@ class TokenEater:
             # were no strings inside _(), then just ignore this entry.
             if self.__data:
                 msg = string.join(self.__data, '')
-                entry = (self.__curfile, self.__lineno)
-                linenos = self.__messages.get(msg)
-                if linenos is None:
-                    self.__messages[msg] = [entry]
-                else:
-                    linenos.append(entry)
+                if not msg in self.__options.toexclude:
+                    entry = (self.__curfile, self.__lineno)
+                    linenos = self.__messages.get(msg)
+                    if linenos is None:
+                        self.__messages[msg] = [entry]
+                    else:
+                        linenos.append(entry)
             self.__state = self.__waiting
         elif ttype == tokenize.STRING:
             self.__data.append(safe_eval(tstring))
@@ -222,20 +267,30 @@ class TokenEater:
             sys.stdout = fp
             # The time stamp in the header doesn't have the same format
             # as that generated by xgettext...
-            print pot_header % {'time': timestamp, 'version':__version__}
+            print pot_header % {'time': timestamp, 'version': __version__}
             for k, v in self.__messages.items():
-                for filename, lineno in v:
-                    # location comments are different b/w Solaris and GNU
-                    d = {'filename': filename,
-                         'lineno': lineno}
-                    if options.location == options.SOLARIS:
+                # location comments are different b/w Solaris and GNU:
+                if options.location == options.SOLARIS:
+                    for filename, lineno in v:
+                        d = {'filename': filename, 'lineno': lineno}
                         print _('# File: %(filename)s, line: %(lineno)d') % d
-                    elif options.location == options.GNU:
-                        print _('#: %(filename)s:%(lineno)d') % d
+                elif options.location == options.GNU:
+                    # fit as many locations on one line, as long as the
+                    # resulting line length doesn't exceeds 'options.width'
+                    locline = '#:'
+                    for filename, lineno in v:
+                        d = {'filename': filename, 'lineno': lineno}
+                        s = _(' %(filename)s:%(lineno)d') % d
+                        if len(locline) + len(s) <= options.width:
+                            locline = locline + s
+                        else:
+                            print locline
+                            locline = "#:" + s
+                    if len(locline) > 2:
+                        print locline
                 # TBD: sorting, normalizing
                 print 'msgid', normalize(k)
-                print 'msgstr ""'
-                print
+                print 'msgstr ""\n'
         finally:
             sys.stdout = sys.__stdout__
 
@@ -245,9 +300,11 @@ def main():
     try:
         opts, args = getopt.getopt(
             sys.argv[1:],
-            'k:d:n:hv',
-            ['keyword', 'default-domain', 'help',
-             'add-location=', 'no-location', 'verbose'])
+            'ad:Ehk:n:o:p:Vvw:x:',
+            ['extract-all', 'default-domain', 'escape', 'help', 'keyword',
+             'add-location', 'no-location', 'output=', 'output-dir=',
+             'verbose', 'version', 'width=', 'exclude-file=',
+             ])
     except getopt.error, msg:
         usage(1, msg)
 
@@ -257,10 +314,15 @@ def main():
         GNU = 1
         SOLARIS = 2
         # defaults
+        extractall = 0 # FIXME: currently this option has no effect at all.
+        escape = 0
         keywords = []
+        outpath = ''
         outfile = 'messages.pot'
         location = GNU
         verbose = 0
+        width = 78
+        excludefilename = ''
 
     options = Options()
     locations = {'gnu' : options.GNU,
@@ -271,12 +333,16 @@ def main():
     for opt, arg in opts:
         if opt in ('-h', '--help'):
             usage(0)
+        elif opt in ('-a', '--extract-all'):
+            options.extractall = 1
+        elif opt in ('-d', '--default-domain'):
+            options.outfile = arg + '.pot'
+        elif opt in ('-E', '--escape'):
+            options.escape = 1
         elif opt in ('-k', '--keyword'):
             if arg is None:
                 default_keywords = []
             options.keywords.append(arg)
-        elif opt in ('-d', '--default-domain'):
-            options.outfile = arg + '.pot'
         elif opt in ('-n', '--add-location'):
             if arg is None:
                 arg = 'gnu'
@@ -287,12 +353,44 @@ def main():
                 usage(1, _('Invalid value for --add-location: %(arg)s') % d)
         elif opt in ('--no-location',):
             options.location = 0
+        elif opt in ('-o', '--output'):
+            options.outfile = arg
+        elif opt in ('-p', '--output-dir'):
+            options.outpath = arg
         elif opt in ('-v', '--verbose'):
             options.verbose = 1
+        elif opt in ('-V', '--version'):
+            print _('pygettext.py (xgettext for Python) %s') % __version__
+            sys.exit(0)
+        elif opt in ('-w', '--width'):
+            try:
+                options.width = int(arg)
+            except ValueError:
+                d = {'arg':arg}
+                usage(1, _('Invalid value for --width: %(arg)s, must be int')
+                      % d)
+        elif opt in ('-x', '--exclude-file'):
+            options.excludefilename = arg
+
+    # calculate escapes
+    make_escapes(options.escapes)
 
     # calculate all keywords
     options.keywords.extend(default_keywords)
 
+    # initialize list of strings to exclude
+    if options.excludefilename:
+        try:
+            fp = open(options.excludefilename)
+            options.toexclude = fp.readlines()
+            fp.close()
+        except IOError:
+            sys.stderr.write(_("Can't read --exclude-file: %s") %
+                             options.excludefilename)
+            sys.exit(1)
+    else:
+        options.toexclude = []
+
     # slurp through all the files
     eater = TokenEater(options)
     for filename in args:
@@ -303,6 +401,8 @@ def main():
         tokenize.tokenize(fp.readline, eater)
         fp.close()
 
+    if options.outpath:
+        options.outfile = os.path.join(options.outpath, options.outfile)
     fp = open(options.outfile, 'w')
     eater.write(fp)
     fp.close()