From 7fed7bd8bb628f0f09c6011871a4ce68afb41b18 Mon Sep 17 00:00:00 2001 From: andyclegg Date: Mon, 23 Oct 2017 03:01:19 +0100 Subject: [PATCH] bpo-31756: subprocess.run should alias universal_newlines to text (#4049) Improve human friendliness of the Popen API: Add text=False as a keyword-only argument to subprocess.Popen along with a Popen attribute .text_mode and set this based on the encoding/errors/universal_newlines/text arguments. The universal_newlines parameter and attribute are maintained for backwards compatibility. --- Doc/library/subprocess.rst | 19 +++-- Lib/subprocess.py | 76 +++++++++++++------ Lib/test/test_subprocess.py | 73 +++++++++--------- .../2017-10-20-12-57-52.bpo-31756.IxCvGB.rst | 3 + 4 files changed, 104 insertions(+), 67 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-10-20-12-57-52.bpo-31756.IxCvGB.rst diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst index 21b1299d6f8..693355cce7f 100644 --- a/Doc/library/subprocess.rst +++ b/Doc/library/subprocess.rst @@ -61,7 +61,7 @@ compatibility with older versions, see the :ref:`call-function-trio` section. The *input* argument is passed to :meth:`Popen.communicate` and thus to the subprocess's stdin. If used it must be a byte sequence, or a string if - *encoding* or *errors* is specified or *universal_newlines* is true. When + *encoding* or *errors* is specified or *text* is true. When used, the internal :class:`Popen` object is automatically created with ``stdin=PIPE``, and the *stdin* argument may not be used as well. @@ -70,10 +70,11 @@ compatibility with older versions, see the :ref:`call-function-trio` section. exception hold the arguments, the exit code, and stdout and stderr if they were captured. - If *encoding* or *errors* are specified, or *universal_newlines* is true, + If *encoding* or *errors* are specified, or *text* is true, file objects for stdin, stdout and stderr are opened in text mode using the specified *encoding* and *errors* or the :class:`io.TextIOWrapper` default. - Otherwise, file objects are opened in binary mode. + The *universal_newlines* argument is equivalent to *text* and is provided + for backwards compatibility. By default, file objects are opened in binary mode. Examples:: @@ -95,6 +96,10 @@ compatibility with older versions, see the :ref:`call-function-trio` section. Added *encoding* and *errors* parameters + .. versionchanged:: 3.7 + + Added the *text* parameter, as a more understandable alias of *universal_newlines* + .. class:: CompletedProcess The return value from :func:`run`, representing a process that has finished. @@ -114,8 +119,8 @@ compatibility with older versions, see the :ref:`call-function-trio` section. .. attribute:: stdout Captured stdout from the child process. A bytes sequence, or a string if - :func:`run` was called with an encoding or errors. ``None`` if stdout was not - captured. + :func:`run` was called with an encoding, errors, or text=True. + ``None`` if stdout was not captured. If you ran the process with ``stderr=subprocess.STDOUT``, stdout and stderr will be combined in this attribute, and :attr:`stderr` will be @@ -124,8 +129,8 @@ compatibility with older versions, see the :ref:`call-function-trio` section. .. attribute:: stderr Captured stderr from the child process. A bytes sequence, or a string if - :func:`run` was called with an encoding or errors. ``None`` if stderr was not - captured. + :func:`run` was called with an encoding, errors, or text=True. + ``None`` if stderr was not captured. .. method:: check_returncode() diff --git a/Lib/subprocess.py b/Lib/subprocess.py index dd994e2aaf1..c7e568fafec 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -320,8 +320,11 @@ def check_output(*popenargs, timeout=None, **kwargs): ... input=b"when in the course of fooman events\n") b'when in the course of barman events\n' - If universal_newlines=True is passed, the "input" argument must be a - string and the return value will be a string rather than bytes. + By default, all communication is in bytes, and therefore any "input" + should be bytes, and the return value wil be bytes. If in text mode, + any "input" should be a string, and the return value will be a string + decoded according to locale encoding, or by "encoding" if set. Text mode + is triggered by setting any of text, encoding, errors or universal_newlines. """ if 'stdout' in kwargs: raise ValueError('stdout argument not allowed, it will be overridden.') @@ -384,15 +387,17 @@ def run(*popenargs, input=None, timeout=None, check=False, **kwargs): exception will be raised. There is an optional argument "input", allowing you to - pass a string to the subprocess's stdin. If you use this argument + pass bytes or a string to the subprocess's stdin. If you use this argument you may not also use the Popen constructor's "stdin" argument, as it will be used internally. - The other arguments are the same as for the Popen constructor. + By default, all communication is in bytes, and therefore any "input" should + be bytes, and the stdout and stderr will be bytes. If in text mode, any + "input" should be a string, and stdout and stderr will be strings decoded + according to locale encoding, or by "encoding" if set. Text mode is + triggered by setting any of text, encoding, errors or universal_newlines. - If universal_newlines=True is passed, the "input" argument must be a - string and stdout/stderr in the returned object will be strings rather than - bytes. + The other arguments are the same as for the Popen constructor. """ if input is not None: if 'stdin' in kwargs: @@ -513,7 +518,7 @@ def getstatusoutput(cmd): (-15, '') """ try: - data = check_output(cmd, shell=True, universal_newlines=True, stderr=STDOUT) + data = check_output(cmd, shell=True, text=True, stderr=STDOUT) exitcode = 0 except CalledProcessError as ex: data = ex.output @@ -565,8 +570,10 @@ class Popen(object): env: Defines the environment variables for the new process. - universal_newlines: If true, use universal line endings for file - objects stdin, stdout and stderr. + text: If true, decode stdin, stdout and stderr using the given encoding + (if set) or the system default otherwise. + + universal_newlines: Alias of text, provided for backwards compatibility. startupinfo and creationflags (Windows only) @@ -587,10 +594,10 @@ class Popen(object): def __init__(self, args, bufsize=-1, executable=None, stdin=None, stdout=None, stderr=None, preexec_fn=None, close_fds=_PLATFORM_DEFAULT_CLOSE_FDS, - shell=False, cwd=None, env=None, universal_newlines=False, + shell=False, cwd=None, env=None, universal_newlines=None, startupinfo=None, creationflags=0, restore_signals=True, start_new_session=False, - pass_fds=(), *, encoding=None, errors=None): + pass_fds=(), *, encoding=None, errors=None, text=None): """Create new Popen instance.""" _cleanup() # Held while anything is calling waitpid before returncode has been @@ -642,10 +649,16 @@ class Popen(object): self.stderr = None self.pid = None self.returncode = None - self.universal_newlines = universal_newlines self.encoding = encoding self.errors = errors + # Validate the combinations of text and universal_newlines + if (text is not None and universal_newlines is not None + and bool(universal_newlines) != bool(text)): + raise SubprocessError('Cannot disambiguate when both text ' + 'and universal_newlines are supplied but ' + 'different. Pass one or the other.') + # Input and output objects. The general principle is like # this: # @@ -677,25 +690,25 @@ class Popen(object): if errread != -1: errread = msvcrt.open_osfhandle(errread.Detach(), 0) - text_mode = encoding or errors or universal_newlines + self.text_mode = encoding or errors or text or universal_newlines self._closed_child_pipe_fds = False try: if p2cwrite != -1: self.stdin = io.open(p2cwrite, 'wb', bufsize) - if text_mode: + if self.text_mode: self.stdin = io.TextIOWrapper(self.stdin, write_through=True, line_buffering=(bufsize == 1), encoding=encoding, errors=errors) if c2pread != -1: self.stdout = io.open(c2pread, 'rb', bufsize) - if text_mode: + if self.text_mode: self.stdout = io.TextIOWrapper(self.stdout, encoding=encoding, errors=errors) if errread != -1: self.stderr = io.open(errread, 'rb', bufsize) - if text_mode: + if self.text_mode: self.stderr = io.TextIOWrapper(self.stderr, encoding=encoding, errors=errors) @@ -735,6 +748,16 @@ class Popen(object): raise + @property + def universal_newlines(self): + # universal_newlines as retained as an alias of text_mode for API + # compatability. bpo-31756 + return self.text_mode + + @universal_newlines.setter + def universal_newlines(self, universal_newlines): + self.text_mode = bool(universal_newlines) + def _translate_newlines(self, data, encoding, errors): data = data.decode(encoding, errors) return data.replace("\r\n", "\n").replace("\r", "\n") @@ -805,12 +828,16 @@ class Popen(object): reached. Wait for process to terminate. The optional "input" argument should be data to be sent to the - child process (if self.universal_newlines is True, this should - be a string; if it is False, "input" should be bytes), or - None, if no data should be sent to the child. + child process, or None, if no data should be sent to the child. + communicate() returns a tuple (stdout, stderr). - communicate() returns a tuple (stdout, stderr). These will be - bytes or, if self.universal_newlines was True, a string. + By default, all communication is in bytes, and therefore any + "input" should be bytes, and the (stdout, stderr) will be bytes. + If in text mode (indicated by self.text_mode), any "input" should + be a string, and (stdout, stderr) will be strings decoded + according to locale encoding, or by "encoding" if set. Text mode + is triggered by setting any of text, encoding, errors or + universal_newlines. """ if self._communication_started and input: @@ -1533,7 +1560,7 @@ class Popen(object): # Translate newlines, if requested. # This also turns bytes into strings. - if self.encoding or self.errors or self.universal_newlines: + if self.text_mode: if stdout is not None: stdout = self._translate_newlines(stdout, self.stdout.encoding, @@ -1553,8 +1580,7 @@ class Popen(object): if self.stdin and self._input is None: self._input_offset = 0 self._input = input - if input is not None and ( - self.encoding or self.errors or self.universal_newlines): + if input is not None and self.text_mode: self._input = self._input.encode(self.stdin.encoding, self.stdin.errors) diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 3ba5c028517..fff1b0db105 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -845,41 +845,44 @@ class ProcessTestCase(BaseTestCase): self.assertEqual(stdout, b"bananasplit") self.assertStderrEqual(stderr, b"") - def test_universal_newlines(self): - p = subprocess.Popen([sys.executable, "-c", - 'import sys,os;' + SETBINARY + - 'buf = sys.stdout.buffer;' - 'buf.write(sys.stdin.readline().encode());' - 'buf.flush();' - 'buf.write(b"line2\\n");' - 'buf.flush();' - 'buf.write(sys.stdin.read().encode());' - 'buf.flush();' - 'buf.write(b"line4\\n");' - 'buf.flush();' - 'buf.write(b"line5\\r\\n");' - 'buf.flush();' - 'buf.write(b"line6\\r");' - 'buf.flush();' - 'buf.write(b"\\nline7");' - 'buf.flush();' - 'buf.write(b"\\nline8");'], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - universal_newlines=1) - with p: - p.stdin.write("line1\n") - p.stdin.flush() - self.assertEqual(p.stdout.readline(), "line1\n") - p.stdin.write("line3\n") - p.stdin.close() - self.addCleanup(p.stdout.close) - self.assertEqual(p.stdout.readline(), - "line2\n") - self.assertEqual(p.stdout.read(6), - "line3\n") - self.assertEqual(p.stdout.read(), - "line4\nline5\nline6\nline7\nline8") + def test_universal_newlines_and_text(self): + args = [ + sys.executable, "-c", + 'import sys,os;' + SETBINARY + + 'buf = sys.stdout.buffer;' + 'buf.write(sys.stdin.readline().encode());' + 'buf.flush();' + 'buf.write(b"line2\\n");' + 'buf.flush();' + 'buf.write(sys.stdin.read().encode());' + 'buf.flush();' + 'buf.write(b"line4\\n");' + 'buf.flush();' + 'buf.write(b"line5\\r\\n");' + 'buf.flush();' + 'buf.write(b"line6\\r");' + 'buf.flush();' + 'buf.write(b"\\nline7");' + 'buf.flush();' + 'buf.write(b"\\nline8");'] + + for extra_kwarg in ('universal_newlines', 'text'): + p = subprocess.Popen(args, **{'stdin': subprocess.PIPE, + 'stdout': subprocess.PIPE, + extra_kwarg: True}) + with p: + p.stdin.write("line1\n") + p.stdin.flush() + self.assertEqual(p.stdout.readline(), "line1\n") + p.stdin.write("line3\n") + p.stdin.close() + self.addCleanup(p.stdout.close) + self.assertEqual(p.stdout.readline(), + "line2\n") + self.assertEqual(p.stdout.read(6), + "line3\n") + self.assertEqual(p.stdout.read(), + "line4\nline5\nline6\nline7\nline8") def test_universal_newlines_communicate(self): # universal newlines through communicate() diff --git a/Misc/NEWS.d/next/Library/2017-10-20-12-57-52.bpo-31756.IxCvGB.rst b/Misc/NEWS.d/next/Library/2017-10-20-12-57-52.bpo-31756.IxCvGB.rst new file mode 100644 index 00000000000..99064e8cb76 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-10-20-12-57-52.bpo-31756.IxCvGB.rst @@ -0,0 +1,3 @@ +Add a ``subprocess.Popen(text=False)`` keyword argument to `subprocess` +functions to be more explicit about when the library should attempt to +decode outputs into text. Patch by Andrew Clegg.