Initial implementation
This commit is contained in:
parent
31e5e37196
commit
e27db5a393
|
@ -1 +1,269 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
"""pygettext -- Python equivalent of xgettext(1)
|
||||
|
||||
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
||||
internationalization of C programs. Most of these tools are independent of
|
||||
the programming language and can be used from within Python programs. Martin
|
||||
von Loewis' work[1] helps considerably in this regard.
|
||||
|
||||
There's one hole though; xgettext is the program that scans source code
|
||||
looking for message strings, but it groks only C (or C++). Python introduces
|
||||
a few wrinkles, such as dual quoting characters, triple quoted strings, and
|
||||
raw strings. xgettext understands none of this.
|
||||
|
||||
Enter pygettext, which uses Python's standard tokenize module to scan Python
|
||||
source code, generating .pot files identical to what GNU xgettext[2] generates
|
||||
for C and C++ code. From there, the standard GNU tools can be used.
|
||||
|
||||
A word about marking Python strings as candidates for translation. GNU
|
||||
xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
|
||||
gettext_noop. But those can be a lot of text to include all over your code.
|
||||
C and C++ have a trick: they use the C preprocessor. Most internationalized C
|
||||
source includes a #define for gettext() to _() so that what has to be written
|
||||
in the source is much less. Thus these are both translatable strings:
|
||||
|
||||
gettext("Translatable String")
|
||||
_("Translatable String")
|
||||
|
||||
Python of course has no preprocessor so this doesn't work so well. Thus,
|
||||
pygettext searches only for _() by default, but see the -k/--keyword flag
|
||||
below for how to augment this.
|
||||
|
||||
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
||||
[2] http://www.gnu.org/software/gettext/gettext.html
|
||||
|
||||
|
||||
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
|
||||
where ever possible.
|
||||
|
||||
Usage: pygettext [options] filename ...
|
||||
|
||||
Options:
|
||||
|
||||
-a
|
||||
--extract-all
|
||||
Extract all strings
|
||||
|
||||
-d default-domain
|
||||
--default-domain=default-domain
|
||||
Rename the default output file from messages.pot to default-domain.pot
|
||||
|
||||
-k [word]
|
||||
--keyword[=word]
|
||||
Additional keywords to look for. Without `word' means not to use the
|
||||
default keywords. The default keywords, which are always looked for
|
||||
if not explicitly disabled: _
|
||||
|
||||
The default keyword list is different than GNU xgettext. You can have
|
||||
multiple -k flags on the command line.
|
||||
|
||||
--no-location
|
||||
Do not write filename/lineno location comments
|
||||
|
||||
-n [style]
|
||||
--add-location[=style]
|
||||
Write filename/lineno location comments indicating where each
|
||||
extracted string is found in the source. These lines appear before
|
||||
each msgid. Two styles are supported:
|
||||
|
||||
Solaris # File: filename, line: line-number
|
||||
Gnu #: filename:line
|
||||
|
||||
If style is omitted, Gnu is used. The style name is case
|
||||
insensitive. By default, locations are included.
|
||||
|
||||
--help
|
||||
-h
|
||||
print this help message and exit
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import string
|
||||
import time
|
||||
import getopt
|
||||
import tokenize
|
||||
|
||||
__version__ = '0.1'
|
||||
|
||||
|
||||
|
||||
def usage(code, msg=''):
|
||||
print __doc__ % globals()
|
||||
if msg:
|
||||
print msg
|
||||
sys.exit(code)
|
||||
|
||||
|
||||
|
||||
def normalize(s):
|
||||
# This converts the various Python string types into a format that is
|
||||
# appropriate for .po files, namely much closer to C style.
|
||||
#
|
||||
# unwrap quotes, safely
|
||||
s = eval(s, {'__builtins__':{}}, {})
|
||||
# now escape any embedded double quotes
|
||||
parts = []
|
||||
last = 0
|
||||
i = string.find(s, '"')
|
||||
while i >= 0:
|
||||
# find the number of preceding backslashes
|
||||
j = i
|
||||
n = 0
|
||||
while j >= 0 and s[i] == '\\':
|
||||
j = j - 1
|
||||
n = n + 1
|
||||
if (n % 2) == 0:
|
||||
parts.append(s[last:j])
|
||||
parts.append('\\')
|
||||
parts.append(s[j:i])
|
||||
else:
|
||||
parts.append(s[last:i])
|
||||
last = i
|
||||
i = string.find(s, '"', i+1)
|
||||
else:
|
||||
parts.append(s[last:])
|
||||
if parts:
|
||||
return '"' + string.join(parts, '') + '"'
|
||||
else:
|
||||
return '"' + s + '"'
|
||||
|
||||
|
||||
|
||||
class TokenEater:
|
||||
def __init__(self, options):
|
||||
self.__options = options
|
||||
self.__messages = {}
|
||||
self.__state = self.__waiting
|
||||
self.__data = []
|
||||
self.__lineno = -1
|
||||
|
||||
def __call__(self, ttype, tstring, stup, etup, line):
|
||||
# dispatch
|
||||
self.__state(ttype, tstring, stup[0])
|
||||
|
||||
def __waiting(self, ttype, tstring, lineno):
|
||||
if ttype == tokenize.NAME and tstring in self.__options.keywords:
|
||||
self.__state = self.__keywordseen
|
||||
|
||||
def __keywordseen(self, ttype, tstring, lineno):
|
||||
if ttype == tokenize.OP and tstring == '(':
|
||||
self.__data = []
|
||||
self.__lineno = lineno
|
||||
self.__state = self.__openseen
|
||||
else:
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __openseen(self, ttype, tstring, lineno):
|
||||
if ttype == tokenize.OP and tstring == ')':
|
||||
# We've seen the last of the translatable strings. Record the
|
||||
# line number of the first line of the strings and update the list
|
||||
# of messages seen. Reset state for the next batch. If there
|
||||
# were no strings inside _(), then just ignore this entry.
|
||||
if self.__data:
|
||||
msg = string.join(self.__data, '')
|
||||
entry = (self.__curfile, self.__lineno)
|
||||
linenos = self.__messages.get(msg)
|
||||
if linenos is None:
|
||||
self.__messages[msg] = [entry]
|
||||
else:
|
||||
linenos.append(entry)
|
||||
self.__state = self.__waiting
|
||||
elif ttype == tokenize.STRING:
|
||||
self.__data.append(normalize(tstring))
|
||||
# TBD: should we warn if we seen anything else?
|
||||
|
||||
def set_filename(self, filename):
|
||||
self.__curfile = filename
|
||||
|
||||
def write(self, fp):
|
||||
options = self.__options
|
||||
timestamp = time.ctime(time.time())
|
||||
# common header
|
||||
try:
|
||||
sys.stdout = fp
|
||||
print '# POT file generated by pygettext.py', __version__
|
||||
print '#', timestamp
|
||||
print '#'
|
||||
for k, v in self.__messages.items():
|
||||
for filename, lineno in v:
|
||||
# location comments are different b/w Solaris and GNU
|
||||
if options.location == options.SOLARIS:
|
||||
print '# File: %s,' % filename, 'line: %d' % lineno
|
||||
elif options.location == options.GNU:
|
||||
print '#: %s:%d' % (filename, lineno)
|
||||
# TBD: sorting, normalizing
|
||||
print 'msgid', k
|
||||
print 'msgstr '
|
||||
print
|
||||
finally:
|
||||
sys.stdout = sys.__stdout__
|
||||
|
||||
|
||||
def main():
|
||||
default_keywords = ['_']
|
||||
try:
|
||||
opts, args = getopt.getopt(
|
||||
sys.argv[1:],
|
||||
'k:d:n:h',
|
||||
['keyword', 'default-domain', 'help',
|
||||
'add-location=', 'no-location'])
|
||||
except getopt.error, msg:
|
||||
usage(1, msg)
|
||||
|
||||
# for holding option values
|
||||
class Options:
|
||||
# constants
|
||||
GNU = 1
|
||||
SOLARIS = 2
|
||||
# defaults
|
||||
keywords = []
|
||||
outfile = 'messages.pot'
|
||||
location = GNU
|
||||
|
||||
options = Options()
|
||||
locations = {'gnu' : options.GNU,
|
||||
'solaris' : options.SOLARIS,
|
||||
}
|
||||
|
||||
# parse options
|
||||
for opt, arg in opts:
|
||||
if opt in ('-h', '--help'):
|
||||
usage(0)
|
||||
elif opt in ('-k', '--keyword'):
|
||||
if arg is None:
|
||||
default_keywords = []
|
||||
options.keywords.append(arg)
|
||||
elif opt in ('-d', '--default-domain'):
|
||||
options.outfile = arg + '.pot'
|
||||
elif opt in ('-n', '--add-location'):
|
||||
if arg is None:
|
||||
arg = 'gnu'
|
||||
try:
|
||||
options.location = locations[string.lower(arg)]
|
||||
except KeyError:
|
||||
usage(1, 'Invalid value for --add-location: ' + arg)
|
||||
elif opt in ('--no-location',):
|
||||
options.location = 0
|
||||
|
||||
# calculate all keywords
|
||||
options.keywords.extend(default_keywords)
|
||||
|
||||
# slurp through all the files
|
||||
eater = TokenEater(options)
|
||||
for filename in args:
|
||||
fp = open(filename)
|
||||
eater.set_filename(filename)
|
||||
tokenize.tokenize(fp.readline, eater)
|
||||
fp.close()
|
||||
|
||||
fp = open(options.outfile, 'w')
|
||||
eater.write(fp)
|
||||
fp.close()
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
Loading…
Reference in New Issue