mirror of https://github.com/python/cpython
gh-106628: email parsing speedup (gh-106629)
This commit is contained in:
parent
af51bd7cda
commit
7e6ce48872
|
@ -37,6 +37,8 @@ NLCRE_crack = re.compile(r'(\r\n|\r|\n)')
|
|||
headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
|
||||
EMPTYSTRING = ''
|
||||
NL = '\n'
|
||||
boundaryendRE = re.compile(
|
||||
r'(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
|
||||
|
||||
NeedMoreData = object()
|
||||
|
||||
|
@ -327,9 +329,10 @@ class FeedParser:
|
|||
# this onto the input stream until we've scanned past the
|
||||
# preamble.
|
||||
separator = '--' + boundary
|
||||
boundaryre = re.compile(
|
||||
'(?P<sep>' + re.escape(separator) +
|
||||
r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
|
||||
def boundarymatch(line):
|
||||
if not line.startswith(separator):
|
||||
return None
|
||||
return boundaryendRE.match(line, len(separator))
|
||||
capturing_preamble = True
|
||||
preamble = []
|
||||
linesep = False
|
||||
|
@ -341,7 +344,7 @@ class FeedParser:
|
|||
continue
|
||||
if line == '':
|
||||
break
|
||||
mo = boundaryre.match(line)
|
||||
mo = boundarymatch(line)
|
||||
if mo:
|
||||
# If we're looking at the end boundary, we're done with
|
||||
# this multipart. If there was a newline at the end of
|
||||
|
@ -373,13 +376,13 @@ class FeedParser:
|
|||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
mo = boundaryre.match(line)
|
||||
mo = boundarymatch(line)
|
||||
if not mo:
|
||||
self._input.unreadline(line)
|
||||
break
|
||||
# Recurse to parse this subpart; the input stream points
|
||||
# at the subpart's first line.
|
||||
self._input.push_eof_matcher(boundaryre.match)
|
||||
self._input.push_eof_matcher(boundarymatch)
|
||||
for retval in self._parsegen():
|
||||
if retval is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Speed up parsing of emails by about 20% by not compiling a new regular
|
||||
expression for every single email.
|
Loading…
Reference in New Issue