From 68157da8b42b26408af5d157d2dba4fcf29c6320 Mon Sep 17 00:00:00 2001 From: Abhilash Raj Date: Sun, 8 Dec 2019 17:35:38 -0800 Subject: [PATCH] bpo-38698: Add a new InvalidMessageID token to email header parser. (GH-17503) This adds a new InvalidMessageID token to the email header parser which can be used to represent invalid message-id headers in the parse tree. --- Lib/email/_header_value_parser.py | 20 ++++++++-- .../test_email/test__header_value_parser.py | 40 +++++++++++++++++-- .../2019-12-07-21-49-50.bpo-38698.HxoSym.rst | 3 ++ 3 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-12-07-21-49-50.bpo-38698.HxoSym.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index abdef8189ca..cb013225ec6 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -850,10 +850,15 @@ class MsgID(TokenList): # message-id tokens may not be folded. return str(self) + policy.linesep + class MessageID(MsgID): token_type = 'message-id' +class InvalidMessageID(MessageID): + token_type = 'invalid-message-id' + + class Header(TokenList): token_type = 'header' @@ -2110,11 +2115,18 @@ def parse_message_id(value): message_id = MessageID() try: token, value = get_msg_id(value) - except errors.HeaderParseError: - message_id.defects.append(errors.InvalidHeaderDefect( - "Expected msg-id but found {!r}".format(value))) - else: message_id.append(token) + except errors.HeaderParseError as ex: + token = get_unstructured(value) + message_id = InvalidMessageID(token) + message_id.defects.append( + errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex))) + else: + # Value after parsing a valid msg_id should be None. + if value: + message_id.defects.append(errors.InvalidHeaderDefect( + "Unexpected {!r}".format(value))) + return message_id # diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 71168f3183d..d59d70117b8 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2639,10 +2639,44 @@ class TestParser(TestParserMixin, TestEmailBase): self.assertEqual(msg_id.token_type, 'msg-id') def test_get_msg_id_invalid_expected_msg_id_not_found(self): - text = "Message-Id: 935-XPB-567:0:86089:180874:0:45327:9:90305:17843586-40@example.com" + text = "935-XPB-567:0:45327:9:90305:17843586-40@example.com" msg_id = parser.parse_message_id(text) - self.assertDefectsEqual(msg_id.all_defects, - [errors.InvalidHeaderDefect]) + self.assertDefectsEqual( + msg_id.all_defects, + [errors.InvalidHeaderDefect]) + + def test_parse_invalid_message_id(self): + message_id = self._test_parse_x( + parser.parse_message_id, + "935-XPB-567:0:45327:9:90305:17843586-40@example.com", + "935-XPB-567:0:45327:9:90305:17843586-40@example.com", + "935-XPB-567:0:45327:9:90305:17843586-40@example.com", + [errors.InvalidHeaderDefect], + ) + self.assertEqual(message_id.token_type, 'invalid-message-id') + + def test_parse_valid_message_id(self): + message_id = self._test_parse_x( + parser.parse_message_id, + "", + "", + "", + [], + ) + self.assertEqual(message_id.token_type, 'message-id') + + def test_parse_message_id_with_remaining(self): + message_id = self._test_parse_x( + parser.parse_message_id, + "thensomething", + "", + "", + [errors.InvalidHeaderDefect], + [], + ) + self.assertEqual(message_id.token_type, 'message-id') + self.assertEqual(str(message_id.all_defects[0]), + "Unexpected 'thensomething'") def test_get_msg_id_no_angle_start(self): with self.assertRaises(errors.HeaderParseError): diff --git a/Misc/NEWS.d/next/Library/2019-12-07-21-49-50.bpo-38698.HxoSym.rst b/Misc/NEWS.d/next/Library/2019-12-07-21-49-50.bpo-38698.HxoSym.rst new file mode 100644 index 00000000000..b930dea0fa7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-12-07-21-49-50.bpo-38698.HxoSym.rst @@ -0,0 +1,3 @@ +Add a new ``InvalidMessageID`` token to email parser to represent invalid +Message-ID headers. Also, add defects when there is remaining value after +parsing the header.