From 2403a787b98376205cd11469ddff292cbd41d601 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 26 Jan 2014 19:20:24 +0200 Subject: [PATCH] Issue #8260: The read(), readline() and readlines() methods of codecs.StreamReader returned incomplete data when were called after readline() or read(size). Based on patch by Amaury Forgeot d'Arc. --- Lib/codecs.py | 15 +++++++-------- Lib/test/test_codecs.py | 34 ++++++++++++++++++++++++++++++++++ Misc/NEWS | 4 ++++ 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/Lib/codecs.py b/Lib/codecs.py index f4cd60a14f2..79a918dd196 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -456,16 +456,13 @@ class StreamReader(Codec): # read until we get the required number of characters (if available) while True: - # can the request can be satisfied from the character buffer? - if chars < 0: - if size < 0: - if self.charbuffer: - break - elif len(self.charbuffer) >= size: - break - else: + # can the request be satisfied from the character buffer? + if chars >= 0: if len(self.charbuffer) >= chars: break + elif size >= 0: + if len(self.charbuffer) >= size: + break # we need more data if size < 0: newdata = self.stream.read() @@ -473,6 +470,8 @@ class StreamReader(Codec): newdata = self.stream.read(size) # decode bytes (those remaining from the last call included) data = self.bytebuffer + newdata + if not data: + break try: newchars, decodedbytes = self.decode(data, self.errors) except UnicodeDecodeError, exc: diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index c9a25154aef..d0f96af326a 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -124,6 +124,40 @@ class ReadTest(unittest.TestCase): size*u"a", ) + def test_mixed_readline_and_read(self): + lines = ["Humpty Dumpty sat on a wall,\n", + "Humpty Dumpty had a great fall.\r\n", + "All the king's horses and all the king's men\r", + "Couldn't put Humpty together again."] + data = ''.join(lines) + def getreader(): + stream = StringIO.StringIO(data.encode(self.encoding)) + return codecs.getreader(self.encoding)(stream) + + # Issue #8260: Test readline() followed by read() + f = getreader() + self.assertEqual(f.readline(), lines[0]) + self.assertEqual(f.read(), ''.join(lines[1:])) + self.assertEqual(f.read(), '') + + # Issue #16636: Test readline() followed by readlines() + f = getreader() + self.assertEqual(f.readline(), lines[0]) + self.assertEqual(f.readlines(), lines[1:]) + self.assertEqual(f.read(), '') + + # Test read() followed by read() + f = getreader() + self.assertEqual(f.read(size=40, chars=5), data[:5]) + self.assertEqual(f.read(), data[5:]) + self.assertEqual(f.read(), '') + + # Issue #12446: Test read() followed by readlines() + f = getreader() + self.assertEqual(f.read(size=40, chars=5), data[:5]) + self.assertEqual(f.readlines(), [lines[0][5:]] + lines[1:]) + self.assertEqual(f.read(), '') + def test_bug1175396(self): s = [ '<%!--===================================================\r\n', diff --git a/Misc/NEWS b/Misc/NEWS index 3a2d2da8a5d..d1f7b7b6982 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -38,6 +38,10 @@ Core and Builtins Library ------- +- Issue #8260: The read(), readline() and readlines() methods of + codecs.StreamReader returned incomplete data when were called after + readline() or read(size). Based on patch by Amaury Forgeot d'Arc. + - Issue #20374: Fix build with GNU readline >= 6.3. - Issue #14548: Make multiprocessing finalizers check pid before