From ba9c6645f795c7bf82ee097a240cbd060605afe7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 11 May 2011 00:57:29 +0200 Subject: [PATCH] Issue #10419: Fix build_scripts command of distutils to handle correctly non-ASCII scripts. Open and write the script in binary mode, but ensure that the shebang is decodable from UTF-8 and from the encoding of the script. --- Lib/distutils/command/build_scripts.py | 47 ++++++++++++++++++++------ Misc/NEWS | 4 +++ 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/Lib/distutils/command/build_scripts.py b/Lib/distutils/command/build_scripts.py index 8b08bfeaf02..b3c767e319e 100644 --- a/Lib/distutils/command/build_scripts.py +++ b/Lib/distutils/command/build_scripts.py @@ -11,9 +11,11 @@ from distutils.core import Command from distutils.dep_util import newer from distutils.util import convert_path, Mixin2to3 from distutils import log +import sys +import tokenize # check if Python is called on the first line with this expression -first_line_re = re.compile('^#!.*python[0-9.]*([ \t].*)?$') +first_line_re = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$') class build_scripts(Command): @@ -74,12 +76,14 @@ class build_scripts(Command): # that way, we'll get accurate feedback if we can read the # script. try: - f = open(script, "r") + f = open(script, "rb") except IOError: if not self.dry_run: raise f = None else: + encoding, lines = tokenize.detect_encoding(f.readline) + f.seek(0) first_line = f.readline() if not first_line: self.warn("%s is an empty file (skipping)" % script) @@ -88,25 +92,46 @@ class build_scripts(Command): match = first_line_re.match(first_line) if match: adjust = True - post_interp = match.group(1) or '' + post_interp = match.group(1) or b'' if adjust: log.info("copying and adjusting %s -> %s", script, self.build_dir) updated_files.append(outfile) if not self.dry_run: - outf = open(outfile, "w") if not sysconfig.python_build: - outf.write("#!%s%s\n" % - (self.executable, - post_interp)) + executable = self.executable else: - outf.write("#!%s%s\n" % - (os.path.join( + executable = os.path.join( sysconfig.get_config_var("BINDIR"), "python%s%s" % (sysconfig.get_config_var("VERSION"), - sysconfig.get_config_var("EXE"))), - post_interp)) + sysconfig.get_config_var("EXE"))) + executable = executable.encode(sys.getfilesystemencoding(), + 'surrogateescape') + shebang = b"#!" + executable + post_interp + b"\n" + # Python parser starts to read a script using UTF-8 until + # it gets a #coding:xxx cookie. The shebang has to be the + # first line of a file, the #coding:xxx cookie cannot be + # written before. So the shebang has to be decodable from + # UTF-8. + try: + shebang.decode('utf-8') + except UnicodeDecodeError: + raise ValueError( + "The shebang ({!r}) is not decodable " + "from utf-8".format(shebang)) + # If the script is encoded to a custom encoding (use a + # #coding:xxx cookie), the shebang has to be decodable from + # the script encoding too. + try: + shebang.decode(encoding) + except UnicodeDecodeError: + raise ValueError( + "The shebang ({!r}) is not decodable " + "from the script encoding ({})" + .format(shebang, encoding)) + outf = open(outfile, "wb") + outf.write(shebang) outf.writelines(f.readlines()) outf.close() if f: diff --git a/Misc/NEWS b/Misc/NEWS index cdb63bed6fc..b77a9a6130c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -69,6 +69,10 @@ Core and Builtins Library ------- +- Issue #10419: Fix build_scripts command of distutils to handle correctly + non-ASCII scripts. Open and write the script in binary mode, but ensure that + the shebang is decodable from UTF-8 and from the encoding of the script. + - Issue #12012: ssl.PROTOCOL_SSLv2 becomes optional. - Issue #11164: Stop trying to use _xmlplus in the xml module.