Issue #17156: pygettext.py now uses an encoding of source file and correctly
writes and escapes non-ascii characters.
This commit is contained in:
parent
041d553319
commit
b6ed17344b
|
@ -215,6 +215,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #17156: pygettext.py now uses an encoding of source file and correctly
|
||||
writes and escapes non-ascii characters.
|
||||
|
||||
- Issue #16564: Fixed regression relative to Python2 in the operation of
|
||||
email.encoders.encode_noop when used with binary data.
|
||||
|
||||
|
|
|
@ -189,8 +189,8 @@ msgstr ""
|
|||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\\n"
|
||||
"MIME-Version: 1.0\\n"
|
||||
"Content-Type: text/plain; charset=CHARSET\\n"
|
||||
"Content-Transfer-Encoding: ENCODING\\n"
|
||||
"Content-Type: text/plain; charset=%(charset)s\\n"
|
||||
"Content-Transfer-Encoding: %(encoding)s\\n"
|
||||
"Generated-By: pygettext.py %(version)s\\n"
|
||||
|
||||
''')
|
||||
|
@ -204,35 +204,32 @@ def usage(code, msg=''):
|
|||
|
||||
|
||||
|
||||
escapes = []
|
||||
|
||||
def make_escapes(pass_iso8859):
|
||||
global escapes
|
||||
if pass_iso8859:
|
||||
# Allow iso-8859 characters to pass through so that e.g. 'msgid
|
||||
def make_escapes(pass_nonascii):
|
||||
global escapes, escape
|
||||
if pass_nonascii:
|
||||
# Allow non-ascii characters to pass through so that e.g. 'msgid
|
||||
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
|
||||
# escape any character outside the 32..126 range.
|
||||
mod = 128
|
||||
escape = escape_ascii
|
||||
else:
|
||||
mod = 256
|
||||
for i in range(256):
|
||||
if 32 <= (i % mod) <= 126:
|
||||
escapes.append(chr(i))
|
||||
else:
|
||||
escapes.append("\\%03o" % i)
|
||||
escapes[ord('\\')] = '\\\\'
|
||||
escapes[ord('\t')] = '\\t'
|
||||
escapes[ord('\r')] = '\\r'
|
||||
escapes[ord('\n')] = '\\n'
|
||||
escapes[ord('\"')] = '\\"'
|
||||
escape = escape_nonascii
|
||||
escapes = [r"\%03o" % i for i in range(mod)]
|
||||
for i in range(32, 127):
|
||||
escapes[i] = chr(i)
|
||||
escapes[ord('\\')] = r'\\'
|
||||
escapes[ord('\t')] = r'\t'
|
||||
escapes[ord('\r')] = r'\r'
|
||||
escapes[ord('\n')] = r'\n'
|
||||
escapes[ord('\"')] = r'\"'
|
||||
|
||||
|
||||
def escape(s):
|
||||
global escapes
|
||||
s = list(s)
|
||||
for i in range(len(s)):
|
||||
s[i] = escapes[ord(s[i])]
|
||||
return EMPTYSTRING.join(s)
|
||||
def escape_ascii(s, encoding):
|
||||
return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
|
||||
|
||||
def escape_nonascii(s, encoding):
|
||||
return ''.join(escapes[b] for b in s.encode(encoding))
|
||||
|
||||
|
||||
def safe_eval(s):
|
||||
|
@ -240,18 +237,18 @@ def safe_eval(s):
|
|||
return eval(s, {'__builtins__':{}}, {})
|
||||
|
||||
|
||||
def normalize(s):
|
||||
def normalize(s, encoding):
|
||||
# This converts the various Python string types into a format that is
|
||||
# appropriate for .po files, namely much closer to C style.
|
||||
lines = s.split('\n')
|
||||
if len(lines) == 1:
|
||||
s = '"' + escape(s) + '"'
|
||||
s = '"' + escape(s, encoding) + '"'
|
||||
else:
|
||||
if not lines[-1]:
|
||||
del lines[-1]
|
||||
lines[-1] = lines[-1] + '\n'
|
||||
for i in range(len(lines)):
|
||||
lines[i] = escape(lines[i])
|
||||
lines[i] = escape(lines[i], encoding)
|
||||
lineterm = '\\n"\n"'
|
||||
s = '""\n"' + lineterm.join(lines) + '"'
|
||||
return s
|
||||
|
@ -448,7 +445,10 @@ class TokenEater:
|
|||
timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
|
||||
# The time stamp in the header doesn't have the same format as that
|
||||
# generated by xgettext...
|
||||
print(pot_header % {'time': timestamp, 'version': __version__}, file=fp)
|
||||
encoding = fp.encoding if fp.encoding else 'UTF-8'
|
||||
print(pot_header % {'time': timestamp, 'version': __version__,
|
||||
'charset': encoding,
|
||||
'encoding': '8bit'}, file=fp)
|
||||
# Sort the entries. First sort each particular entry's keys, then
|
||||
# sort all the entries by their first item.
|
||||
reverse = {}
|
||||
|
@ -492,7 +492,7 @@ class TokenEater:
|
|||
print(locline, file=fp)
|
||||
if isdocstring:
|
||||
print('#, docstring', file=fp)
|
||||
print('msgid', normalize(k), file=fp)
|
||||
print('msgid', normalize(k, encoding), file=fp)
|
||||
print('msgstr ""\n', file=fp)
|
||||
|
||||
|
||||
|
@ -588,7 +588,7 @@ def main():
|
|||
fp.close()
|
||||
|
||||
# calculate escapes
|
||||
make_escapes(options.escape)
|
||||
make_escapes(not options.escape)
|
||||
|
||||
# calculate all keywords
|
||||
options.keywords.extend(default_keywords)
|
||||
|
@ -621,17 +621,17 @@ def main():
|
|||
if filename == '-':
|
||||
if options.verbose:
|
||||
print(_('Reading standard input'))
|
||||
fp = sys.stdin
|
||||
fp = sys.stdin.buffer
|
||||
closep = 0
|
||||
else:
|
||||
if options.verbose:
|
||||
print(_('Working on %s') % filename)
|
||||
fp = open(filename)
|
||||
fp = open(filename, 'rb')
|
||||
closep = 1
|
||||
try:
|
||||
eater.set_filename(filename)
|
||||
try:
|
||||
tokens = tokenize.generate_tokens(fp.readline)
|
||||
tokens = tokenize.tokenize(fp.readline)
|
||||
for _token in tokens:
|
||||
eater(*_token)
|
||||
except tokenize.TokenError as e:
|
||||
|
|
Loading…
Reference in New Issue