Return complete lines from codec stream readers
even if there is an exception in later lines, resulting in correct line numbers for decoding errors in source code. Fixes #1178484. Will backport to 2.4.
This commit is contained in:
parent
6d2b346140
commit
56066d2e55
|
@ -394,7 +394,7 @@ order to be compatible to the Python codec registry.
|
|||
be extended with \function{register_error()}.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{methoddesc}{read}{\optional{size\optional{, chars}}}
|
||||
\begin{methoddesc}{read}{\optional{size\optional{, chars, \optional{firstline}}}}
|
||||
Decodes data from the stream and returns the resulting object.
|
||||
|
||||
\var{chars} indicates the number of characters to read from the
|
||||
|
@ -408,12 +408,16 @@ order to be compatible to the Python codec registry.
|
|||
decode as much as possible. \var{size} is intended to prevent having
|
||||
to decode huge files in one step.
|
||||
|
||||
\var{firstline} indicates that it would be sufficient to only return
|
||||
the first line, if there are decoding errors on later lines.
|
||||
|
||||
The method should use a greedy read strategy meaning that it should
|
||||
read as much data as is allowed within the definition of the encoding
|
||||
and the given size, e.g. if optional encoding endings or state
|
||||
markers are available on the stream, these should be read too.
|
||||
|
||||
\versionchanged[\var{chars} argument added]{2.4}
|
||||
\versionchanged[\var{firstline} argument added]{2.4.2}
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}{readline}{\optional{size\optional{, keepends}}}
|
||||
|
|
|
@ -236,7 +236,7 @@ class StreamReader(Codec):
|
|||
def decode(self, input, errors='strict'):
|
||||
raise NotImplementedError
|
||||
|
||||
def read(self, size=-1, chars=-1):
|
||||
def read(self, size=-1, chars=-1, firstline=False):
|
||||
|
||||
""" Decodes data from the stream self.stream and returns the
|
||||
resulting object.
|
||||
|
@ -253,6 +253,11 @@ class StreamReader(Codec):
|
|||
is intended to prevent having to decode huge files in one
|
||||
step.
|
||||
|
||||
If firstline is true, and a UnicodeDecodeError happens
|
||||
after the first line terminator in the input only the first line
|
||||
will be returned, the rest of the input will be kept until the
|
||||
next call to read().
|
||||
|
||||
The method should use a greedy read strategy meaning that
|
||||
it should read as much data as is allowed within the
|
||||
definition of the encoding and the given size, e.g. if
|
||||
|
@ -275,7 +280,16 @@ class StreamReader(Codec):
|
|||
newdata = self.stream.read(size)
|
||||
# decode bytes (those remaining from the last call included)
|
||||
data = self.bytebuffer + newdata
|
||||
try:
|
||||
newchars, decodedbytes = self.decode(data, self.errors)
|
||||
except UnicodeDecodeError, exc:
|
||||
if firstline:
|
||||
newchars, decodedbytes = self.decode(data[:exc.start], self.errors)
|
||||
lines = newchars.splitlines(True)
|
||||
if len(lines)<=1:
|
||||
raise
|
||||
else:
|
||||
raise
|
||||
# keep undecoded bytes until the next call
|
||||
self.bytebuffer = data[decodedbytes:]
|
||||
# put new characters in the character buffer
|
||||
|
@ -306,7 +320,7 @@ class StreamReader(Codec):
|
|||
line = ""
|
||||
# If size is given, we call read() only once
|
||||
while True:
|
||||
data = self.read(readsize)
|
||||
data = self.read(readsize, firstline=True)
|
||||
if data:
|
||||
# If we're at a "\r" read one extra character (which might
|
||||
# be a "\n") to get a proper line ending. If the stream is
|
||||
|
|
|
@ -191,6 +191,10 @@ Extension Modules
|
|||
Library
|
||||
-------
|
||||
|
||||
- Bug #1178484: Return complete lines from codec stream readers
|
||||
even if there is an exception in later lines, resulting in
|
||||
correct line numbers for decoding errors in source code.
|
||||
|
||||
- Bug #1192315: Disallow negative arguments to clear() in pdb.
|
||||
|
||||
- Patch #827386: Support absolute source paths in msvccompiler.py.
|
||||
|
|
Loading…
Reference in New Issue