diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 885097c7dda..53d71f50225 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -37,6 +37,8 @@ NLCRE_crack = re.compile(r'(\r\n|\r|\n)') headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' NL = '\n' +boundaryendRE = re.compile( + r'(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') NeedMoreData = object() @@ -327,9 +329,10 @@ class FeedParser: # this onto the input stream until we've scanned past the # preamble. separator = '--' + boundary - boundaryre = re.compile( - '(?P' + re.escape(separator) + - r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') + def boundarymatch(line): + if not line.startswith(separator): + return None + return boundaryendRE.match(line, len(separator)) capturing_preamble = True preamble = [] linesep = False @@ -341,7 +344,7 @@ class FeedParser: continue if line == '': break - mo = boundaryre.match(line) + mo = boundarymatch(line) if mo: # If we're looking at the end boundary, we're done with # this multipart. If there was a newline at the end of @@ -373,13 +376,13 @@ class FeedParser: if line is NeedMoreData: yield NeedMoreData continue - mo = boundaryre.match(line) + mo = boundarymatch(line) if not mo: self._input.unreadline(line) break # Recurse to parse this subpart; the input stream points # at the subpart's first line. - self._input.push_eof_matcher(boundaryre.match) + self._input.push_eof_matcher(boundarymatch) for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData diff --git a/Misc/NEWS.d/next/Library/2023-07-11-16-36-22.gh-issue-106628.Kx8Zvc.rst b/Misc/NEWS.d/next/Library/2023-07-11-16-36-22.gh-issue-106628.Kx8Zvc.rst new file mode 100644 index 00000000000..6fa276e901f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-07-11-16-36-22.gh-issue-106628.Kx8Zvc.rst @@ -0,0 +1,2 @@ +Speed up parsing of emails by about 20% by not compiling a new regular +expression for every single email.