2010-03-11 18:53:45 -04:00
|
|
|
|
#! /usr/bin/env python3
|
2002-11-22 04:36:54 -04:00
|
|
|
|
# -*- coding: iso-8859-1 -*-
|
2013-12-22 21:45:38 -04:00
|
|
|
|
# Originally written by Barry Warsaw <barry@python.org>
|
2000-02-26 16:56:47 -04:00
|
|
|
|
#
|
2003-04-16 15:08:23 -03:00
|
|
|
|
# Minimally patched to make it even more xgettext compatible
|
2000-02-26 16:56:47 -04:00
|
|
|
|
# by Peter Funk <pf@artcom-gmbh.de>
|
2002-11-22 04:36:54 -04:00
|
|
|
|
#
|
|
|
|
|
# 2002-11-22 J<>rgen Hermann <jh@web.de>
|
|
|
|
|
# Added checks that _() only contains string literals, and
|
|
|
|
|
# command line args are resolved to module lists, i.e. you
|
|
|
|
|
# can now pass a filename, a module or package name, or a
|
|
|
|
|
# directory (including globbing chars, important for Win32).
|
|
|
|
|
# Made docstring fit in 80 chars wide displays using pydoc.
|
|
|
|
|
#
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2002-11-22 04:36:54 -04:00
|
|
|
|
# for selftesting
|
|
|
|
|
try:
|
|
|
|
|
import fintl
|
|
|
|
|
_ = fintl.gettext
|
|
|
|
|
except ImportError:
|
|
|
|
|
_ = lambda s: s
|
|
|
|
|
|
|
|
|
|
__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
2002-11-22 04:36:54 -04:00
|
|
|
|
internationalization of C programs. Most of these tools are independent of
|
|
|
|
|
the programming language and can be used from within Python programs.
|
2003-04-16 15:08:23 -03:00
|
|
|
|
Martin von Loewis' work[1] helps considerably in this regard.
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
1999-11-03 14:47:52 -04:00
|
|
|
|
There's one problem though; xgettext is the program that scans source code
|
2002-11-22 04:36:54 -04:00
|
|
|
|
looking for message strings, but it groks only C (or C++). Python
|
|
|
|
|
introduces a few wrinkles, such as dual quoting characters, triple quoted
|
2003-04-16 15:08:23 -03:00
|
|
|
|
strings, and raw strings. xgettext understands none of this.
|
2002-11-22 04:36:54 -04:00
|
|
|
|
|
|
|
|
|
Enter pygettext, which uses Python's standard tokenize module to scan
|
|
|
|
|
Python source code, generating .pot files identical to what GNU xgettext[2]
|
|
|
|
|
generates for C and C++ code. From there, the standard GNU tools can be
|
2003-04-16 15:08:23 -03:00
|
|
|
|
used.
|
2002-11-22 04:36:54 -04:00
|
|
|
|
|
|
|
|
|
A word about marking Python strings as candidates for translation. GNU
|
|
|
|
|
xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
|
|
|
|
|
and gettext_noop. But those can be a lot of text to include all over your
|
|
|
|
|
code. C and C++ have a trick: they use the C preprocessor. Most
|
|
|
|
|
internationalized C source includes a #define for gettext() to _() so that
|
|
|
|
|
what has to be written in the source is much less. Thus these are both
|
2003-04-16 15:08:23 -03:00
|
|
|
|
translatable strings:
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
gettext("Translatable String")
|
|
|
|
|
_("Translatable String")
|
|
|
|
|
|
|
|
|
|
Python of course has no preprocessor so this doesn't work so well. Thus,
|
|
|
|
|
pygettext searches only for _() by default, but see the -k/--keyword flag
|
|
|
|
|
below for how to augment this.
|
|
|
|
|
|
|
|
|
|
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
|
|
|
|
[2] http://www.gnu.org/software/gettext/gettext.html
|
|
|
|
|
|
2002-11-22 04:36:54 -04:00
|
|
|
|
NOTE: pygettext attempts to be option and feature compatible with GNU
|
|
|
|
|
xgettext where ever possible. However some options are still missing or are
|
|
|
|
|
not fully implemented. Also, xgettext's use of command line switches with
|
|
|
|
|
option arguments is broken, and in these cases, pygettext just defines
|
2003-04-16 15:08:23 -03:00
|
|
|
|
additional switches.
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2000-03-08 11:18:35 -04:00
|
|
|
|
Usage: pygettext [options] inputfile ...
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
Options:
|
|
|
|
|
|
|
|
|
|
-a
|
|
|
|
|
--extract-all
|
2001-07-27 13:47:18 -03:00
|
|
|
|
Extract all strings.
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2000-02-26 16:56:47 -04:00
|
|
|
|
-d name
|
|
|
|
|
--default-domain=name
|
2001-07-27 13:47:18 -03:00
|
|
|
|
Rename the default output file from messages.pot to name.pot.
|
2000-02-26 16:56:47 -04:00
|
|
|
|
|
|
|
|
|
-E
|
|
|
|
|
--escape
|
2000-10-27 01:56:28 -03:00
|
|
|
|
Replace non-ASCII characters with octal escape sequences.
|
|
|
|
|
|
|
|
|
|
-D
|
|
|
|
|
--docstrings
|
2002-11-22 04:36:54 -04:00
|
|
|
|
Extract module, class, method, and function docstrings. These do
|
|
|
|
|
not need to be wrapped in _() markers, and in fact cannot be for
|
|
|
|
|
Python to consider them docstrings. (See also the -X option).
|
2000-02-26 16:56:47 -04:00
|
|
|
|
|
|
|
|
|
-h
|
|
|
|
|
--help
|
2001-07-27 13:47:18 -03:00
|
|
|
|
Print this help message and exit.
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2000-03-08 11:18:35 -04:00
|
|
|
|
-k word
|
|
|
|
|
--keyword=word
|
|
|
|
|
Keywords to look for in addition to the default set, which are:
|
|
|
|
|
%(DEFAULTKEYWORDS)s
|
|
|
|
|
|
|
|
|
|
You can have multiple -k flags on the command line.
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2000-03-08 11:18:35 -04:00
|
|
|
|
-K
|
|
|
|
|
--no-default-keywords
|
|
|
|
|
Disable the default set of keywords (see above). Any keywords
|
|
|
|
|
explicitly added with the -k/--keyword option are still recognized.
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
--no-location
|
2000-03-08 11:18:35 -04:00
|
|
|
|
Do not write filename/lineno location comments.
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2000-03-08 11:18:35 -04:00
|
|
|
|
-n
|
|
|
|
|
--add-location
|
1999-08-13 17:59:48 -03:00
|
|
|
|
Write filename/lineno location comments indicating where each
|
|
|
|
|
extracted string is found in the source. These lines appear before
|
2000-03-08 11:18:35 -04:00
|
|
|
|
each msgid. The style of comments is controlled by the -S/--style
|
|
|
|
|
option. This is the default.
|
|
|
|
|
|
2000-10-27 01:56:28 -03:00
|
|
|
|
-o filename
|
|
|
|
|
--output=filename
|
|
|
|
|
Rename the default output file from messages.pot to filename. If
|
|
|
|
|
filename is `-' then the output is sent to standard out.
|
|
|
|
|
|
|
|
|
|
-p dir
|
|
|
|
|
--output-dir=dir
|
|
|
|
|
Output files will be placed in directory dir.
|
|
|
|
|
|
2000-03-08 11:18:35 -04:00
|
|
|
|
-S stylename
|
|
|
|
|
--style stylename
|
|
|
|
|
Specify which style to use for location comments. Two styles are
|
|
|
|
|
supported:
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
Solaris # File: filename, line: line-number
|
2000-03-08 11:18:35 -04:00
|
|
|
|
GNU #: filename:line
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2000-03-08 11:18:35 -04:00
|
|
|
|
The style name is case insensitive. GNU style is the default.
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
1999-11-03 14:47:52 -04:00
|
|
|
|
-v
|
|
|
|
|
--verbose
|
|
|
|
|
Print the names of the files being processed.
|
|
|
|
|
|
2000-02-26 16:56:47 -04:00
|
|
|
|
-V
|
|
|
|
|
--version
|
|
|
|
|
Print the version of pygettext and exit.
|
|
|
|
|
|
|
|
|
|
-w columns
|
|
|
|
|
--width=columns
|
|
|
|
|
Set width of output to columns.
|
|
|
|
|
|
|
|
|
|
-x filename
|
|
|
|
|
--exclude-file=filename
|
|
|
|
|
Specify a file that contains a list of strings that are not be
|
|
|
|
|
extracted from the input files. Each string to be excluded must
|
|
|
|
|
appear on a line by itself in the file.
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2001-07-27 13:47:18 -03:00
|
|
|
|
-X filename
|
|
|
|
|
--no-docstrings=filename
|
|
|
|
|
Specify a file that contains a list of files (one per line) that
|
|
|
|
|
should not have their docstrings extracted. This is only useful in
|
|
|
|
|
conjunction with the -D option above.
|
2000-03-08 11:18:35 -04:00
|
|
|
|
|
2001-07-27 13:47:18 -03:00
|
|
|
|
If `inputfile' is -, standard input is read.
|
2002-11-22 04:36:54 -04:00
|
|
|
|
""")
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
import os
|
2016-04-12 13:46:10 -03:00
|
|
|
|
import importlib.machinery
|
|
|
|
|
import importlib.util
|
1999-08-13 17:59:48 -03:00
|
|
|
|
import sys
|
2003-04-16 15:08:23 -03:00
|
|
|
|
import glob
|
1999-08-13 17:59:48 -03:00
|
|
|
|
import time
|
|
|
|
|
import getopt
|
2020-11-09 18:50:45 -04:00
|
|
|
|
import ast
|
2002-11-22 04:36:54 -04:00
|
|
|
|
import token
|
1999-08-13 17:59:48 -03:00
|
|
|
|
import tokenize
|
|
|
|
|
|
2002-11-22 04:36:54 -04:00
|
|
|
|
__version__ = '1.5'
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2000-03-08 11:18:35 -04:00
|
|
|
|
default_keywords = ['_']
|
|
|
|
|
DEFAULTKEYWORDS = ', '.join(default_keywords)
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2000-03-08 11:18:35 -04:00
|
|
|
|
EMPTYSTRING = ''
|
1999-11-03 14:47:52 -04:00
|
|
|
|
|
|
|
|
|
|
2000-03-08 11:18:35 -04:00
|
|
|
|
|
2001-07-27 13:47:18 -03:00
|
|
|
|
# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
|
|
|
|
|
# there.
|
1999-11-03 14:47:52 -04:00
|
|
|
|
pot_header = _('''\
|
|
|
|
|
# SOME DESCRIPTIVE TITLE.
|
|
|
|
|
# Copyright (C) YEAR ORGANIZATION
|
|
|
|
|
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
|
|
|
|
#
|
|
|
|
|
msgid ""
|
|
|
|
|
msgstr ""
|
|
|
|
|
"Project-Id-Version: PACKAGE VERSION\\n"
|
2001-03-01 18:56:17 -04:00
|
|
|
|
"POT-Creation-Date: %(time)s\\n"
|
|
|
|
|
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
|
1999-11-03 14:47:52 -04:00
|
|
|
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
|
|
|
|
"Language-Team: LANGUAGE <LL@li.org>\\n"
|
|
|
|
|
"MIME-Version: 1.0\\n"
|
2013-02-09 16:37:22 -04:00
|
|
|
|
"Content-Type: text/plain; charset=%(charset)s\\n"
|
|
|
|
|
"Content-Transfer-Encoding: %(encoding)s\\n"
|
1999-11-03 14:47:52 -04:00
|
|
|
|
"Generated-By: pygettext.py %(version)s\\n"
|
|
|
|
|
|
|
|
|
|
''')
|
|
|
|
|
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
def usage(code, msg=''):
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print(__doc__ % globals(), file=sys.stderr)
|
1999-08-13 17:59:48 -03:00
|
|
|
|
if msg:
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print(msg, file=sys.stderr)
|
1999-08-13 17:59:48 -03:00
|
|
|
|
sys.exit(code)
|
|
|
|
|
|
2000-02-26 16:56:47 -04:00
|
|
|
|
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2013-02-09 16:37:22 -04:00
|
|
|
|
def make_escapes(pass_nonascii):
|
|
|
|
|
global escapes, escape
|
|
|
|
|
if pass_nonascii:
|
|
|
|
|
# Allow non-ascii characters to pass through so that e.g. 'msgid
|
2000-02-27 10:30:48 -04:00
|
|
|
|
# "H<>he"' would result not result in 'msgid "H\366he"'. Otherwise we
|
|
|
|
|
# escape any character outside the 32..126 range.
|
|
|
|
|
mod = 128
|
2013-02-09 16:37:22 -04:00
|
|
|
|
escape = escape_ascii
|
2000-02-27 10:30:48 -04:00
|
|
|
|
else:
|
|
|
|
|
mod = 256
|
2013-02-09 16:37:22 -04:00
|
|
|
|
escape = escape_nonascii
|
|
|
|
|
escapes = [r"\%03o" % i for i in range(mod)]
|
|
|
|
|
for i in range(32, 127):
|
|
|
|
|
escapes[i] = chr(i)
|
|
|
|
|
escapes[ord('\\')] = r'\\'
|
|
|
|
|
escapes[ord('\t')] = r'\t'
|
|
|
|
|
escapes[ord('\r')] = r'\r'
|
|
|
|
|
escapes[ord('\n')] = r'\n'
|
|
|
|
|
escapes[ord('\"')] = r'\"'
|
|
|
|
|
|
2000-02-26 16:56:47 -04:00
|
|
|
|
|
2013-02-09 16:37:22 -04:00
|
|
|
|
def escape_ascii(s, encoding):
|
|
|
|
|
return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
|
1999-11-03 14:47:52 -04:00
|
|
|
|
|
2013-02-09 16:37:22 -04:00
|
|
|
|
def escape_nonascii(s, encoding):
|
|
|
|
|
return ''.join(escapes[b] for b in s.encode(encoding))
|
1999-11-03 14:47:52 -04:00
|
|
|
|
|
|
|
|
|
|
2018-04-19 03:23:03 -03:00
|
|
|
|
def is_literal_string(s):
|
|
|
|
|
return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
|
|
|
|
|
|
|
|
|
|
|
1999-11-03 14:47:52 -04:00
|
|
|
|
def safe_eval(s):
|
|
|
|
|
# unwrap quotes, safely
|
|
|
|
|
return eval(s, {'__builtins__':{}}, {})
|
|
|
|
|
|
|
|
|
|
|
2013-02-09 16:37:22 -04:00
|
|
|
|
def normalize(s, encoding):
|
1999-08-13 17:59:48 -03:00
|
|
|
|
# This converts the various Python string types into a format that is
|
|
|
|
|
# appropriate for .po files, namely much closer to C style.
|
2000-03-08 11:18:35 -04:00
|
|
|
|
lines = s.split('\n')
|
1999-11-03 14:47:52 -04:00
|
|
|
|
if len(lines) == 1:
|
2013-02-09 16:37:22 -04:00
|
|
|
|
s = '"' + escape(s, encoding) + '"'
|
1999-08-13 17:59:48 -03:00
|
|
|
|
else:
|
1999-11-03 14:47:52 -04:00
|
|
|
|
if not lines[-1]:
|
|
|
|
|
del lines[-1]
|
|
|
|
|
lines[-1] = lines[-1] + '\n'
|
|
|
|
|
for i in range(len(lines)):
|
2013-02-09 16:37:22 -04:00
|
|
|
|
lines[i] = escape(lines[i], encoding)
|
2000-03-08 11:18:35 -04:00
|
|
|
|
lineterm = '\\n"\n"'
|
|
|
|
|
s = '""\n"' + lineterm.join(lines) + '"'
|
1999-11-03 14:47:52 -04:00
|
|
|
|
return s
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2002-11-22 04:36:54 -04:00
|
|
|
|
|
|
|
|
|
def containsAny(str, set):
|
2003-04-16 15:08:23 -03:00
|
|
|
|
"""Check whether 'str' contains ANY of the chars in 'set'"""
|
2002-11-22 04:36:54 -04:00
|
|
|
|
return 1 in [c in str for c in set]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getFilesForName(name):
|
2003-04-16 15:08:23 -03:00
|
|
|
|
"""Get a list of module files for a filename, a module or package name,
|
|
|
|
|
or a directory.
|
2002-11-22 04:36:54 -04:00
|
|
|
|
"""
|
|
|
|
|
if not os.path.exists(name):
|
|
|
|
|
# check for glob chars
|
|
|
|
|
if containsAny(name, "*?[]"):
|
|
|
|
|
files = glob.glob(name)
|
|
|
|
|
list = []
|
|
|
|
|
for file in files:
|
|
|
|
|
list.extend(getFilesForName(file))
|
|
|
|
|
return list
|
|
|
|
|
|
|
|
|
|
# try to find module or package
|
2016-04-12 13:46:10 -03:00
|
|
|
|
try:
|
|
|
|
|
spec = importlib.util.find_spec(name)
|
|
|
|
|
name = spec.origin
|
|
|
|
|
except ImportError:
|
|
|
|
|
name = None
|
2002-11-22 04:36:54 -04:00
|
|
|
|
if not name:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
if os.path.isdir(name):
|
|
|
|
|
# find all python files in directory
|
|
|
|
|
list = []
|
2018-04-09 14:09:17 -03:00
|
|
|
|
# get extension for python source files
|
|
|
|
|
_py_ext = importlib.machinery.SOURCE_SUFFIXES[0]
|
|
|
|
|
for root, dirs, files in os.walk(name):
|
|
|
|
|
# don't recurse into CVS directories
|
|
|
|
|
if 'CVS' in dirs:
|
|
|
|
|
dirs.remove('CVS')
|
|
|
|
|
# add all *.py files to list
|
|
|
|
|
list.extend(
|
|
|
|
|
[os.path.join(root, file) for file in files
|
|
|
|
|
if os.path.splitext(file)[1] == _py_ext]
|
|
|
|
|
)
|
2002-11-22 04:36:54 -04:00
|
|
|
|
return list
|
|
|
|
|
elif os.path.exists(name):
|
|
|
|
|
# a single file
|
|
|
|
|
return [name]
|
|
|
|
|
|
|
|
|
|
return []
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TokenEater:
|
|
|
|
|
def __init__(self, options):
|
|
|
|
|
self.__options = options
|
|
|
|
|
self.__messages = {}
|
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
self.__data = []
|
|
|
|
|
self.__lineno = -1
|
2000-10-27 01:56:28 -03:00
|
|
|
|
self.__freshmodule = 1
|
2001-07-27 13:47:18 -03:00
|
|
|
|
self.__curfile = None
|
2018-02-26 18:48:14 -04:00
|
|
|
|
self.__enclosurecount = 0
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
def __call__(self, ttype, tstring, stup, etup, line):
|
|
|
|
|
# dispatch
|
2000-10-27 01:56:28 -03:00
|
|
|
|
## import token
|
2018-04-19 03:23:03 -03:00
|
|
|
|
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
|
|
|
|
|
## file=sys.stderr)
|
1999-08-13 17:59:48 -03:00
|
|
|
|
self.__state(ttype, tstring, stup[0])
|
|
|
|
|
|
|
|
|
|
def __waiting(self, ttype, tstring, lineno):
|
2001-07-27 13:47:18 -03:00
|
|
|
|
opts = self.__options
|
2000-10-27 01:56:28 -03:00
|
|
|
|
# Do docstring extractions, if enabled
|
2001-07-27 13:47:18 -03:00
|
|
|
|
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
|
2000-10-27 01:56:28 -03:00
|
|
|
|
# module docstring?
|
|
|
|
|
if self.__freshmodule:
|
2018-04-19 03:23:03 -03:00
|
|
|
|
if ttype == tokenize.STRING and is_literal_string(tstring):
|
2001-05-21 16:51:26 -03:00
|
|
|
|
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
2000-10-27 01:56:28 -03:00
|
|
|
|
self.__freshmodule = 0
|
|
|
|
|
elif ttype not in (tokenize.COMMENT, tokenize.NL):
|
|
|
|
|
self.__freshmodule = 0
|
|
|
|
|
return
|
2018-02-26 18:48:14 -04:00
|
|
|
|
# class or func/method docstring?
|
2000-10-27 01:56:28 -03:00
|
|
|
|
if ttype == tokenize.NAME and tstring in ('class', 'def'):
|
|
|
|
|
self.__state = self.__suiteseen
|
|
|
|
|
return
|
2001-07-27 13:47:18 -03:00
|
|
|
|
if ttype == tokenize.NAME and tstring in opts.keywords:
|
1999-08-13 17:59:48 -03:00
|
|
|
|
self.__state = self.__keywordseen
|
2020-11-09 18:50:45 -04:00
|
|
|
|
return
|
|
|
|
|
if ttype == tokenize.STRING:
|
|
|
|
|
maybe_fstring = ast.parse(tstring, mode='eval').body
|
|
|
|
|
if not isinstance(maybe_fstring, ast.JoinedStr):
|
|
|
|
|
return
|
|
|
|
|
for value in filter(lambda node: isinstance(node, ast.FormattedValue),
|
|
|
|
|
maybe_fstring.values):
|
|
|
|
|
for call in filter(lambda node: isinstance(node, ast.Call),
|
|
|
|
|
ast.walk(value)):
|
|
|
|
|
func = call.func
|
|
|
|
|
if isinstance(func, ast.Name):
|
|
|
|
|
func_name = func.id
|
|
|
|
|
elif isinstance(func, ast.Attribute):
|
|
|
|
|
func_name = func.attr
|
|
|
|
|
else:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if func_name not in opts.keywords:
|
|
|
|
|
continue
|
|
|
|
|
if len(call.args) != 1:
|
|
|
|
|
print(_(
|
|
|
|
|
'*** %(file)s:%(lineno)s: Seen unexpected amount of'
|
|
|
|
|
' positional arguments in gettext call: %(source_segment)s'
|
|
|
|
|
) % {
|
|
|
|
|
'source_segment': ast.get_source_segment(tstring, call) or tstring,
|
|
|
|
|
'file': self.__curfile,
|
|
|
|
|
'lineno': lineno
|
|
|
|
|
}, file=sys.stderr)
|
|
|
|
|
continue
|
|
|
|
|
if call.keywords:
|
|
|
|
|
print(_(
|
|
|
|
|
'*** %(file)s:%(lineno)s: Seen unexpected keyword arguments'
|
|
|
|
|
' in gettext call: %(source_segment)s'
|
|
|
|
|
) % {
|
|
|
|
|
'source_segment': ast.get_source_segment(tstring, call) or tstring,
|
|
|
|
|
'file': self.__curfile,
|
|
|
|
|
'lineno': lineno
|
|
|
|
|
}, file=sys.stderr)
|
|
|
|
|
continue
|
|
|
|
|
arg = call.args[0]
|
|
|
|
|
if not isinstance(arg, ast.Constant):
|
|
|
|
|
print(_(
|
|
|
|
|
'*** %(file)s:%(lineno)s: Seen unexpected argument type'
|
|
|
|
|
' in gettext call: %(source_segment)s'
|
|
|
|
|
) % {
|
|
|
|
|
'source_segment': ast.get_source_segment(tstring, call) or tstring,
|
|
|
|
|
'file': self.__curfile,
|
|
|
|
|
'lineno': lineno
|
|
|
|
|
}, file=sys.stderr)
|
|
|
|
|
continue
|
|
|
|
|
if isinstance(arg.value, str):
|
|
|
|
|
self.__addentry(arg.value, lineno)
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2000-10-27 01:56:28 -03:00
|
|
|
|
def __suiteseen(self, ttype, tstring, lineno):
|
2018-02-26 18:48:14 -04:00
|
|
|
|
# skip over any enclosure pairs until we see the colon
|
|
|
|
|
if ttype == tokenize.OP:
|
|
|
|
|
if tstring == ':' and self.__enclosurecount == 0:
|
|
|
|
|
# we see a colon and we're not in an enclosure: end of def
|
|
|
|
|
self.__state = self.__suitedocstring
|
|
|
|
|
elif tstring in '([{':
|
|
|
|
|
self.__enclosurecount += 1
|
|
|
|
|
elif tstring in ')]}':
|
|
|
|
|
self.__enclosurecount -= 1
|
2000-10-27 01:56:28 -03:00
|
|
|
|
|
|
|
|
|
def __suitedocstring(self, ttype, tstring, lineno):
|
|
|
|
|
# ignore any intervening noise
|
2018-04-19 03:23:03 -03:00
|
|
|
|
if ttype == tokenize.STRING and is_literal_string(tstring):
|
2001-05-21 16:51:26 -03:00
|
|
|
|
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
2000-10-27 01:56:28 -03:00
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
|
|
|
|
tokenize.COMMENT):
|
|
|
|
|
# there was no class docstring
|
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
|
1999-08-13 17:59:48 -03:00
|
|
|
|
def __keywordseen(self, ttype, tstring, lineno):
|
|
|
|
|
if ttype == tokenize.OP and tstring == '(':
|
|
|
|
|
self.__data = []
|
|
|
|
|
self.__lineno = lineno
|
|
|
|
|
self.__state = self.__openseen
|
|
|
|
|
else:
|
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
|
|
|
|
|
def __openseen(self, ttype, tstring, lineno):
|
|
|
|
|
if ttype == tokenize.OP and tstring == ')':
|
|
|
|
|
# We've seen the last of the translatable strings. Record the
|
2003-04-16 15:08:23 -03:00
|
|
|
|
# line number of the first line of the strings and update the list
|
1999-08-13 17:59:48 -03:00
|
|
|
|
# of messages seen. Reset state for the next batch. If there
|
|
|
|
|
# were no strings inside _(), then just ignore this entry.
|
|
|
|
|
if self.__data:
|
2000-10-27 01:56:28 -03:00
|
|
|
|
self.__addentry(EMPTYSTRING.join(self.__data))
|
1999-08-13 17:59:48 -03:00
|
|
|
|
self.__state = self.__waiting
|
2018-04-19 03:23:03 -03:00
|
|
|
|
elif ttype == tokenize.STRING and is_literal_string(tstring):
|
1999-11-03 14:47:52 -04:00
|
|
|
|
self.__data.append(safe_eval(tstring))
|
2002-11-22 04:36:54 -04:00
|
|
|
|
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
|
|
|
|
token.NEWLINE, tokenize.NL]:
|
|
|
|
|
# warn if we see anything else than STRING or whitespace
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print(_(
|
2003-04-16 15:08:23 -03:00
|
|
|
|
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
|
|
|
|
|
) % {
|
|
|
|
|
'token': tstring,
|
|
|
|
|
'file': self.__curfile,
|
|
|
|
|
'lineno': self.__lineno
|
2007-08-03 14:06:41 -03:00
|
|
|
|
}, file=sys.stderr)
|
2002-11-22 04:36:54 -04:00
|
|
|
|
self.__state = self.__waiting
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2001-05-21 16:51:26 -03:00
|
|
|
|
def __addentry(self, msg, lineno=None, isdocstring=0):
|
2000-10-27 01:56:28 -03:00
|
|
|
|
if lineno is None:
|
|
|
|
|
lineno = self.__lineno
|
|
|
|
|
if not msg in self.__options.toexclude:
|
|
|
|
|
entry = (self.__curfile, lineno)
|
2001-05-21 16:51:26 -03:00
|
|
|
|
self.__messages.setdefault(msg, {})[entry] = isdocstring
|
2000-10-27 01:56:28 -03:00
|
|
|
|
|
1999-08-13 17:59:48 -03:00
|
|
|
|
def set_filename(self, filename):
|
|
|
|
|
self.__curfile = filename
|
2001-07-27 13:47:18 -03:00
|
|
|
|
self.__freshmodule = 1
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
def write(self, fp):
|
|
|
|
|
options = self.__options
|
2015-04-16 13:15:09 -03:00
|
|
|
|
timestamp = time.strftime('%Y-%m-%d %H:%M%z')
|
2013-02-09 16:37:22 -04:00
|
|
|
|
encoding = fp.encoding if fp.encoding else 'UTF-8'
|
|
|
|
|
print(pot_header % {'time': timestamp, 'version': __version__,
|
|
|
|
|
'charset': encoding,
|
|
|
|
|
'encoding': '8bit'}, file=fp)
|
2001-05-23 13:59:45 -03:00
|
|
|
|
# Sort the entries. First sort each particular entry's keys, then
|
|
|
|
|
# sort all the entries by their first item.
|
|
|
|
|
reverse = {}
|
2000-10-26 00:49:15 -03:00
|
|
|
|
for k, v in self.__messages.items():
|
2008-01-15 13:41:38 -04:00
|
|
|
|
keys = sorted(v.keys())
|
2001-05-24 20:06:13 -03:00
|
|
|
|
reverse.setdefault(tuple(keys), []).append((k, v))
|
2008-01-15 13:41:38 -04:00
|
|
|
|
rkeys = sorted(reverse.keys())
|
2001-05-23 13:59:45 -03:00
|
|
|
|
for rkey in rkeys:
|
2001-05-24 20:06:13 -03:00
|
|
|
|
rentries = reverse[rkey]
|
|
|
|
|
rentries.sort()
|
|
|
|
|
for k, v in rentries:
|
|
|
|
|
# If the entry was gleaned out of a docstring, then add a
|
|
|
|
|
# comment stating so. This is to aid translators who may wish
|
|
|
|
|
# to skip translating some unimportant docstrings.
|
2006-08-21 21:21:25 -03:00
|
|
|
|
isdocstring = any(v.values())
|
2001-05-24 20:06:13 -03:00
|
|
|
|
# k is the message string, v is a dictionary-set of (filename,
|
|
|
|
|
# lineno) tuples. We want to sort the entries in v first by
|
|
|
|
|
# file name and then by line number.
|
2008-01-15 13:41:38 -04:00
|
|
|
|
v = sorted(v.keys())
|
2001-05-24 20:06:13 -03:00
|
|
|
|
if not options.writelocations:
|
|
|
|
|
pass
|
|
|
|
|
# location comments are different b/w Solaris and GNU:
|
|
|
|
|
elif options.locationstyle == options.SOLARIS:
|
|
|
|
|
for filename, lineno in v:
|
|
|
|
|
d = {'filename': filename, 'lineno': lineno}
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print(_(
|
|
|
|
|
'# File: %(filename)s, line: %(lineno)d') % d, file=fp)
|
2001-05-24 20:06:13 -03:00
|
|
|
|
elif options.locationstyle == options.GNU:
|
|
|
|
|
# fit as many locations on one line, as long as the
|
2016-08-04 10:07:31 -03:00
|
|
|
|
# resulting line length doesn't exceed 'options.width'
|
2001-05-24 20:06:13 -03:00
|
|
|
|
locline = '#:'
|
|
|
|
|
for filename, lineno in v:
|
|
|
|
|
d = {'filename': filename, 'lineno': lineno}
|
|
|
|
|
s = _(' %(filename)s:%(lineno)d') % d
|
|
|
|
|
if len(locline) + len(s) <= options.width:
|
|
|
|
|
locline = locline + s
|
|
|
|
|
else:
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print(locline, file=fp)
|
2001-05-24 20:06:13 -03:00
|
|
|
|
locline = "#:" + s
|
|
|
|
|
if len(locline) > 2:
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print(locline, file=fp)
|
2001-06-20 16:41:40 -03:00
|
|
|
|
if isdocstring:
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print('#, docstring', file=fp)
|
2013-02-09 16:37:22 -04:00
|
|
|
|
print('msgid', normalize(k, encoding), file=fp)
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print('msgstr ""\n', file=fp)
|
2000-10-27 01:56:28 -03:00
|
|
|
|
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2000-03-08 11:18:35 -04:00
|
|
|
|
global default_keywords
|
1999-08-13 17:59:48 -03:00
|
|
|
|
try:
|
|
|
|
|
opts, args = getopt.getopt(
|
|
|
|
|
sys.argv[1:],
|
2001-07-27 13:47:18 -03:00
|
|
|
|
'ad:DEhk:Kno:p:S:Vvw:x:X:',
|
2001-05-21 16:58:23 -03:00
|
|
|
|
['extract-all', 'default-domain=', 'escape', 'help',
|
2000-03-08 11:18:35 -04:00
|
|
|
|
'keyword=', 'no-default-keywords',
|
2000-02-26 16:56:47 -04:00
|
|
|
|
'add-location', 'no-location', 'output=', 'output-dir=',
|
2000-03-08 11:18:35 -04:00
|
|
|
|
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
|
2001-07-27 13:47:18 -03:00
|
|
|
|
'docstrings', 'no-docstrings',
|
2000-02-26 16:56:47 -04:00
|
|
|
|
])
|
2007-01-10 12:19:56 -04:00
|
|
|
|
except getopt.error as msg:
|
1999-08-13 17:59:48 -03:00
|
|
|
|
usage(1, msg)
|
|
|
|
|
|
|
|
|
|
# for holding option values
|
|
|
|
|
class Options:
|
|
|
|
|
# constants
|
|
|
|
|
GNU = 1
|
|
|
|
|
SOLARIS = 2
|
|
|
|
|
# defaults
|
2000-02-26 16:56:47 -04:00
|
|
|
|
extractall = 0 # FIXME: currently this option has no effect at all.
|
|
|
|
|
escape = 0
|
1999-08-13 17:59:48 -03:00
|
|
|
|
keywords = []
|
2000-02-26 16:56:47 -04:00
|
|
|
|
outpath = ''
|
1999-08-13 17:59:48 -03:00
|
|
|
|
outfile = 'messages.pot'
|
2000-03-08 11:18:35 -04:00
|
|
|
|
writelocations = 1
|
|
|
|
|
locationstyle = GNU
|
1999-11-03 14:47:52 -04:00
|
|
|
|
verbose = 0
|
2000-02-26 16:56:47 -04:00
|
|
|
|
width = 78
|
|
|
|
|
excludefilename = ''
|
2000-10-27 01:56:28 -03:00
|
|
|
|
docstrings = 0
|
2001-07-27 13:47:18 -03:00
|
|
|
|
nodocstrings = {}
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
options = Options()
|
|
|
|
|
locations = {'gnu' : options.GNU,
|
|
|
|
|
'solaris' : options.SOLARIS,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# parse options
|
|
|
|
|
for opt, arg in opts:
|
|
|
|
|
if opt in ('-h', '--help'):
|
|
|
|
|
usage(0)
|
2000-02-26 16:56:47 -04:00
|
|
|
|
elif opt in ('-a', '--extract-all'):
|
|
|
|
|
options.extractall = 1
|
|
|
|
|
elif opt in ('-d', '--default-domain'):
|
|
|
|
|
options.outfile = arg + '.pot'
|
|
|
|
|
elif opt in ('-E', '--escape'):
|
|
|
|
|
options.escape = 1
|
2000-10-27 01:56:28 -03:00
|
|
|
|
elif opt in ('-D', '--docstrings'):
|
|
|
|
|
options.docstrings = 1
|
1999-08-13 17:59:48 -03:00
|
|
|
|
elif opt in ('-k', '--keyword'):
|
|
|
|
|
options.keywords.append(arg)
|
2000-03-08 11:18:35 -04:00
|
|
|
|
elif opt in ('-K', '--no-default-keywords'):
|
|
|
|
|
default_keywords = []
|
1999-08-13 17:59:48 -03:00
|
|
|
|
elif opt in ('-n', '--add-location'):
|
2000-03-08 11:18:35 -04:00
|
|
|
|
options.writelocations = 1
|
1999-08-13 17:59:48 -03:00
|
|
|
|
elif opt in ('--no-location',):
|
2000-03-08 11:18:35 -04:00
|
|
|
|
options.writelocations = 0
|
|
|
|
|
elif opt in ('-S', '--style'):
|
|
|
|
|
options.locationstyle = locations.get(arg.lower())
|
|
|
|
|
if options.locationstyle is None:
|
|
|
|
|
usage(1, _('Invalid value for --style: %s') % arg)
|
2000-02-26 16:56:47 -04:00
|
|
|
|
elif opt in ('-o', '--output'):
|
|
|
|
|
options.outfile = arg
|
|
|
|
|
elif opt in ('-p', '--output-dir'):
|
|
|
|
|
options.outpath = arg
|
1999-11-03 14:47:52 -04:00
|
|
|
|
elif opt in ('-v', '--verbose'):
|
|
|
|
|
options.verbose = 1
|
2000-02-26 16:56:47 -04:00
|
|
|
|
elif opt in ('-V', '--version'):
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print(_('pygettext.py (xgettext for Python) %s') % __version__)
|
2000-02-26 16:56:47 -04:00
|
|
|
|
sys.exit(0)
|
|
|
|
|
elif opt in ('-w', '--width'):
|
|
|
|
|
try:
|
|
|
|
|
options.width = int(arg)
|
|
|
|
|
except ValueError:
|
2000-03-08 11:18:35 -04:00
|
|
|
|
usage(1, _('--width argument must be an integer: %s') % arg)
|
2000-02-26 16:56:47 -04:00
|
|
|
|
elif opt in ('-x', '--exclude-file'):
|
|
|
|
|
options.excludefilename = arg
|
2001-07-27 13:47:18 -03:00
|
|
|
|
elif opt in ('-X', '--no-docstrings'):
|
|
|
|
|
fp = open(arg)
|
|
|
|
|
try:
|
|
|
|
|
while 1:
|
|
|
|
|
line = fp.readline()
|
|
|
|
|
if not line:
|
|
|
|
|
break
|
|
|
|
|
options.nodocstrings[line[:-1]] = 1
|
|
|
|
|
finally:
|
|
|
|
|
fp.close()
|
2000-02-26 16:56:47 -04:00
|
|
|
|
|
|
|
|
|
# calculate escapes
|
2013-02-09 16:37:22 -04:00
|
|
|
|
make_escapes(not options.escape)
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
# calculate all keywords
|
|
|
|
|
options.keywords.extend(default_keywords)
|
|
|
|
|
|
2000-02-26 16:56:47 -04:00
|
|
|
|
# initialize list of strings to exclude
|
|
|
|
|
if options.excludefilename:
|
|
|
|
|
try:
|
2019-03-30 03:33:02 -03:00
|
|
|
|
with open(options.excludefilename) as fp:
|
|
|
|
|
options.toexclude = fp.readlines()
|
2000-02-26 16:56:47 -04:00
|
|
|
|
except IOError:
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print(_(
|
|
|
|
|
"Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr)
|
2000-02-26 16:56:47 -04:00
|
|
|
|
sys.exit(1)
|
|
|
|
|
else:
|
|
|
|
|
options.toexclude = []
|
|
|
|
|
|
2002-11-22 04:36:54 -04:00
|
|
|
|
# resolve args to module lists
|
|
|
|
|
expanded = []
|
|
|
|
|
for arg in args:
|
|
|
|
|
if arg == '-':
|
|
|
|
|
expanded.append(arg)
|
|
|
|
|
else:
|
|
|
|
|
expanded.extend(getFilesForName(arg))
|
|
|
|
|
args = expanded
|
|
|
|
|
|
1999-08-13 17:59:48 -03:00
|
|
|
|
# slurp through all the files
|
|
|
|
|
eater = TokenEater(options)
|
|
|
|
|
for filename in args:
|
2000-03-08 11:18:35 -04:00
|
|
|
|
if filename == '-':
|
|
|
|
|
if options.verbose:
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print(_('Reading standard input'))
|
2013-02-09 16:37:22 -04:00
|
|
|
|
fp = sys.stdin.buffer
|
2000-03-08 11:18:35 -04:00
|
|
|
|
closep = 0
|
|
|
|
|
else:
|
|
|
|
|
if options.verbose:
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print(_('Working on %s') % filename)
|
2013-02-09 16:37:22 -04:00
|
|
|
|
fp = open(filename, 'rb')
|
2000-03-08 11:18:35 -04:00
|
|
|
|
closep = 1
|
|
|
|
|
try:
|
|
|
|
|
eater.set_filename(filename)
|
2001-02-26 00:46:53 -04:00
|
|
|
|
try:
|
2013-02-09 16:37:22 -04:00
|
|
|
|
tokens = tokenize.tokenize(fp.readline)
|
2008-03-18 19:41:35 -03:00
|
|
|
|
for _token in tokens:
|
|
|
|
|
eater(*_token)
|
2007-01-10 12:19:56 -04:00
|
|
|
|
except tokenize.TokenError as e:
|
2007-08-03 14:06:41 -03:00
|
|
|
|
print('%s: %s, line %d, column %d' % (
|
2007-10-22 13:16:13 -03:00
|
|
|
|
e.args[0], filename, e.args[1][0], e.args[1][1]),
|
|
|
|
|
file=sys.stderr)
|
2000-03-08 11:18:35 -04:00
|
|
|
|
finally:
|
|
|
|
|
if closep:
|
|
|
|
|
fp.close()
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
2000-03-08 11:18:35 -04:00
|
|
|
|
# write the output
|
|
|
|
|
if options.outfile == '-':
|
|
|
|
|
fp = sys.stdout
|
|
|
|
|
closep = 0
|
|
|
|
|
else:
|
|
|
|
|
if options.outpath:
|
|
|
|
|
options.outfile = os.path.join(options.outpath, options.outfile)
|
|
|
|
|
fp = open(options.outfile, 'w')
|
|
|
|
|
closep = 1
|
|
|
|
|
try:
|
|
|
|
|
eater.write(fp)
|
|
|
|
|
finally:
|
|
|
|
|
if closep:
|
|
|
|
|
fp.close()
|
1999-08-13 17:59:48 -03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|
2000-05-02 16:28:30 -03:00
|
|
|
|
# some more test strings
|
2003-04-16 15:08:23 -03:00
|
|
|
|
# this one creates a warning
|
|
|
|
|
_('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
|
2002-11-22 04:36:54 -04:00
|
|
|
|
_('more' 'than' 'one' 'string')
|