#5610: use \Z not $ so we don't eat extra chars when body part ends with \r\n.
If a body part ended with \r\n, feedparser, using '$' to terminate its search for the newline, would match on the \r\n, and think that it needed to strip two characters in order to account for the line end before the boundary. That made it chop one too many characters off the end of the body part. Using \Z makes the match correct. Patch and test by Tony Nelson.
This commit is contained in:
parent
23152ea5bd
commit
61746d580e
|
@ -28,7 +28,7 @@ from email import message
|
|||
|
||||
NLCRE = re.compile('\r\n|\r|\n')
|
||||
NLCRE_bol = re.compile('(\r\n|\r|\n)')
|
||||
NLCRE_eol = re.compile('(\r\n|\r|\n)$')
|
||||
NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
|
||||
NLCRE_crack = re.compile('(\r\n|\r|\n)')
|
||||
# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
|
||||
# except controls, SP, and ":".
|
||||
|
|
|
@ -2610,6 +2610,24 @@ Here's the message body
|
|||
eq(headers, ['A', 'B', 'CC'])
|
||||
eq(msg.get_payload(), 'body')
|
||||
|
||||
def test_CRLFLF_at_end_of_part(self):
|
||||
# issue 5610: feedparser should not eat two chars from body part ending
|
||||
# with "\r\n\n".
|
||||
m = (
|
||||
"From: foo@bar.com\n"
|
||||
"To: baz\n"
|
||||
"Mime-Version: 1.0\n"
|
||||
"Content-Type: multipart/mixed; boundary=BOUNDARY\n"
|
||||
"\n"
|
||||
"--BOUNDARY\n"
|
||||
"Content-Type: text/plain\n"
|
||||
"\n"
|
||||
"body ending with CRLF newline\r\n"
|
||||
"\n"
|
||||
"--BOUNDARY--\n"
|
||||
)
|
||||
msg = email.message_from_string(m)
|
||||
self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
|
||||
|
||||
|
||||
class TestBase64(unittest.TestCase):
|
||||
|
|
Loading…
Reference in New Issue