Issue #17156: pygettext.py now uses an encoding of source file and correctly
writes and escapes non-ascii characters.
This commit is contained in:
parent
041d553319
commit
b6ed17344b
|
@ -215,6 +215,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #17156: pygettext.py now uses an encoding of source file and correctly
|
||||||
|
writes and escapes non-ascii characters.
|
||||||
|
|
||||||
- Issue #16564: Fixed regression relative to Python2 in the operation of
|
- Issue #16564: Fixed regression relative to Python2 in the operation of
|
||||||
email.encoders.encode_noop when used with binary data.
|
email.encoders.encode_noop when used with binary data.
|
||||||
|
|
||||||
|
|
|
@ -189,8 +189,8 @@ msgstr ""
|
||||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
||||||
"Language-Team: LANGUAGE <LL@li.org>\\n"
|
"Language-Team: LANGUAGE <LL@li.org>\\n"
|
||||||
"MIME-Version: 1.0\\n"
|
"MIME-Version: 1.0\\n"
|
||||||
"Content-Type: text/plain; charset=CHARSET\\n"
|
"Content-Type: text/plain; charset=%(charset)s\\n"
|
||||||
"Content-Transfer-Encoding: ENCODING\\n"
|
"Content-Transfer-Encoding: %(encoding)s\\n"
|
||||||
"Generated-By: pygettext.py %(version)s\\n"
|
"Generated-By: pygettext.py %(version)s\\n"
|
||||||
|
|
||||||
''')
|
''')
|
||||||
|
@ -204,35 +204,32 @@ def usage(code, msg=''):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
escapes = []
|
def make_escapes(pass_nonascii):
|
||||||
|
global escapes, escape
|
||||||
def make_escapes(pass_iso8859):
|
if pass_nonascii:
|
||||||
global escapes
|
# Allow non-ascii characters to pass through so that e.g. 'msgid
|
||||||
if pass_iso8859:
|
|
||||||
# Allow iso-8859 characters to pass through so that e.g. 'msgid
|
|
||||||
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
|
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
|
||||||
# escape any character outside the 32..126 range.
|
# escape any character outside the 32..126 range.
|
||||||
mod = 128
|
mod = 128
|
||||||
|
escape = escape_ascii
|
||||||
else:
|
else:
|
||||||
mod = 256
|
mod = 256
|
||||||
for i in range(256):
|
escape = escape_nonascii
|
||||||
if 32 <= (i % mod) <= 126:
|
escapes = [r"\%03o" % i for i in range(mod)]
|
||||||
escapes.append(chr(i))
|
for i in range(32, 127):
|
||||||
else:
|
escapes[i] = chr(i)
|
||||||
escapes.append("\\%03o" % i)
|
escapes[ord('\\')] = r'\\'
|
||||||
escapes[ord('\\')] = '\\\\'
|
escapes[ord('\t')] = r'\t'
|
||||||
escapes[ord('\t')] = '\\t'
|
escapes[ord('\r')] = r'\r'
|
||||||
escapes[ord('\r')] = '\\r'
|
escapes[ord('\n')] = r'\n'
|
||||||
escapes[ord('\n')] = '\\n'
|
escapes[ord('\"')] = r'\"'
|
||||||
escapes[ord('\"')] = '\\"'
|
|
||||||
|
|
||||||
|
|
||||||
def escape(s):
|
def escape_ascii(s, encoding):
|
||||||
global escapes
|
return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
|
||||||
s = list(s)
|
|
||||||
for i in range(len(s)):
|
def escape_nonascii(s, encoding):
|
||||||
s[i] = escapes[ord(s[i])]
|
return ''.join(escapes[b] for b in s.encode(encoding))
|
||||||
return EMPTYSTRING.join(s)
|
|
||||||
|
|
||||||
|
|
||||||
def safe_eval(s):
|
def safe_eval(s):
|
||||||
|
@ -240,18 +237,18 @@ def safe_eval(s):
|
||||||
return eval(s, {'__builtins__':{}}, {})
|
return eval(s, {'__builtins__':{}}, {})
|
||||||
|
|
||||||
|
|
||||||
def normalize(s):
|
def normalize(s, encoding):
|
||||||
# This converts the various Python string types into a format that is
|
# This converts the various Python string types into a format that is
|
||||||
# appropriate for .po files, namely much closer to C style.
|
# appropriate for .po files, namely much closer to C style.
|
||||||
lines = s.split('\n')
|
lines = s.split('\n')
|
||||||
if len(lines) == 1:
|
if len(lines) == 1:
|
||||||
s = '"' + escape(s) + '"'
|
s = '"' + escape(s, encoding) + '"'
|
||||||
else:
|
else:
|
||||||
if not lines[-1]:
|
if not lines[-1]:
|
||||||
del lines[-1]
|
del lines[-1]
|
||||||
lines[-1] = lines[-1] + '\n'
|
lines[-1] = lines[-1] + '\n'
|
||||||
for i in range(len(lines)):
|
for i in range(len(lines)):
|
||||||
lines[i] = escape(lines[i])
|
lines[i] = escape(lines[i], encoding)
|
||||||
lineterm = '\\n"\n"'
|
lineterm = '\\n"\n"'
|
||||||
s = '""\n"' + lineterm.join(lines) + '"'
|
s = '""\n"' + lineterm.join(lines) + '"'
|
||||||
return s
|
return s
|
||||||
|
@ -448,7 +445,10 @@ class TokenEater:
|
||||||
timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
|
timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
|
||||||
# The time stamp in the header doesn't have the same format as that
|
# The time stamp in the header doesn't have the same format as that
|
||||||
# generated by xgettext...
|
# generated by xgettext...
|
||||||
print(pot_header % {'time': timestamp, 'version': __version__}, file=fp)
|
encoding = fp.encoding if fp.encoding else 'UTF-8'
|
||||||
|
print(pot_header % {'time': timestamp, 'version': __version__,
|
||||||
|
'charset': encoding,
|
||||||
|
'encoding': '8bit'}, file=fp)
|
||||||
# Sort the entries. First sort each particular entry's keys, then
|
# Sort the entries. First sort each particular entry's keys, then
|
||||||
# sort all the entries by their first item.
|
# sort all the entries by their first item.
|
||||||
reverse = {}
|
reverse = {}
|
||||||
|
@ -492,7 +492,7 @@ class TokenEater:
|
||||||
print(locline, file=fp)
|
print(locline, file=fp)
|
||||||
if isdocstring:
|
if isdocstring:
|
||||||
print('#, docstring', file=fp)
|
print('#, docstring', file=fp)
|
||||||
print('msgid', normalize(k), file=fp)
|
print('msgid', normalize(k, encoding), file=fp)
|
||||||
print('msgstr ""\n', file=fp)
|
print('msgstr ""\n', file=fp)
|
||||||
|
|
||||||
|
|
||||||
|
@ -588,7 +588,7 @@ def main():
|
||||||
fp.close()
|
fp.close()
|
||||||
|
|
||||||
# calculate escapes
|
# calculate escapes
|
||||||
make_escapes(options.escape)
|
make_escapes(not options.escape)
|
||||||
|
|
||||||
# calculate all keywords
|
# calculate all keywords
|
||||||
options.keywords.extend(default_keywords)
|
options.keywords.extend(default_keywords)
|
||||||
|
@ -621,17 +621,17 @@ def main():
|
||||||
if filename == '-':
|
if filename == '-':
|
||||||
if options.verbose:
|
if options.verbose:
|
||||||
print(_('Reading standard input'))
|
print(_('Reading standard input'))
|
||||||
fp = sys.stdin
|
fp = sys.stdin.buffer
|
||||||
closep = 0
|
closep = 0
|
||||||
else:
|
else:
|
||||||
if options.verbose:
|
if options.verbose:
|
||||||
print(_('Working on %s') % filename)
|
print(_('Working on %s') % filename)
|
||||||
fp = open(filename)
|
fp = open(filename, 'rb')
|
||||||
closep = 1
|
closep = 1
|
||||||
try:
|
try:
|
||||||
eater.set_filename(filename)
|
eater.set_filename(filename)
|
||||||
try:
|
try:
|
||||||
tokens = tokenize.generate_tokens(fp.readline)
|
tokens = tokenize.tokenize(fp.readline)
|
||||||
for _token in tokens:
|
for _token in tokens:
|
||||||
eater(*_token)
|
eater(*_token)
|
||||||
except tokenize.TokenError as e:
|
except tokenize.TokenError as e:
|
||||||
|
|
Loading…
Reference in New Issue