From edc0d2338d4df38ddf9d9d37d7d0f0e14d9fc628 Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Sat, 17 Jul 2010 01:40:30 +0000 Subject: [PATCH] Merged revisions 82924 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/release27-maint ................ r82924 | r.david.murray | 2010-07-16 21:35:16 -0400 (Fri, 16 Jul 2010) | 11 lines Merged revisions 82922 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r82922 | r.david.murray | 2010-07-16 21:19:57 -0400 (Fri, 16 Jul 2010) | 4 lines #1555570: correctly handle a \r\n that is split by the read buffer. Patch and test by Tony Nelson. ........ ................ --- Lib/email/feedparser.py | 4 ++++ Lib/email/test/test_email.py | 33 +++++++++++++++++++++++++++++++++ Misc/NEWS | 3 +++ 3 files changed, 40 insertions(+) diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 163fadafd4a..5ff266fa30a 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -104,6 +104,10 @@ class BufferedSubFile(object): # data after the final RE. In the case of a NL/CR terminated string, # this is the empty string. self._partial = parts.pop() + #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r: + # is there a \n to follow later? + if not self._partial and parts and parts[-1].endswith('\r'): + self._partial = parts.pop(-2)+parts.pop() # parts is a list of strings, alternating between the line contents # and the eol character(s). Gather up a list of lines after # re-attaching the newlines. diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index 6c23693cf11..9b1287747a4 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -2445,6 +2445,39 @@ Do you like this message? -Me """) + def test_pushCR_LF(self): + '''FeedParser BufferedSubFile.push() assumed it received complete + line endings. A CR ending one push() followed by a LF starting + the next push() added an empty line. + ''' + imt = [ + ("a\r \n", 2), + ("b", 0), + ("c\n", 1), + ("", 0), + ("d\r\n", 1), + ("e\r", 0), + ("\nf", 1), + ("\r\n", 1), + ] + from email.feedparser import BufferedSubFile, NeedMoreData + bsf = BufferedSubFile() + om = [] + nt = 0 + for il, n in imt: + bsf.push(il) + nt += n + n1 = 0 + while True: + ol = bsf.readline() + if ol == NeedMoreData: + break + om.append(ol) + n1 += 1 + self.assertTrue(n == n1) + self.assertTrue(len(om) == nt) + self.assertTrue(''.join([il for il, n in imt]) == ''.join(om)) + class TestParsers(TestEmailBase): diff --git a/Misc/NEWS b/Misc/NEWS index fdd9a42d125..0688d0b6ba7 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -81,6 +81,9 @@ C-API Library ------- +- Issue #1555570: email no longer inserts extra blank lines when a \r\n + combo crosses an 8192 byte boundary. + - Issue #9164: Ensure sysconfig handles dupblice archs while building on OSX - Issue #7646: The fnmatch pattern cache no longer grows without bound.