bpo-41002: Optimize HTTPResponse.read with a given amount (GH-20943)

I've done the implementation for both non-chunked and chunked reads. I haven't benchmarked chunked reads because I don't currently have a convenient way to generate a high-bandwidth chunked stream, but I don't see any reason that it shouldn't enjoy the same benefits that the non-chunked case does. I've used the benchmark attached to the bpo bug to verify that performance now matches the unsized read case.

Automerge-Triggered-By: @methane
This commit is contained in:
Bruce Merry 2020-06-25 08:30:21 +02:00 committed by GitHub
parent cf18c9e9d4
commit 152f0b8bee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 10 deletions

View File

@ -448,18 +448,25 @@ class HTTPResponse(io.BufferedIOBase):
self._close_conn()
return b""
if self.chunked:
return self._read_chunked(amt)
if amt is not None:
# Amount is given, implement using readinto
b = bytearray(amt)
n = self.readinto(b)
return memoryview(b)[:n].tobytes()
if self.length is not None and amt > self.length:
# clip the read to the "end of response"
amt = self.length
s = self.fp.read(amt)
if not s and amt:
# Ideally, we would raise IncompleteRead if the content-length
# wasn't satisfied, but it might break compatibility.
self._close_conn()
elif self.length is not None:
self.length -= len(s)
if not self.length:
self._close_conn()
return s
else:
# Amount is not given (unbounded read) so we must check self.length
# and self.chunked
if self.chunked:
return self._readall_chunked()
if self.length is None:
s = self.fp.read()
else:
@ -560,7 +567,7 @@ class HTTPResponse(io.BufferedIOBase):
self.chunk_left = chunk_left
return chunk_left
def _readall_chunked(self):
def _read_chunked(self, amt=None):
assert self.chunked != _UNKNOWN
value = []
try:
@ -568,7 +575,15 @@ class HTTPResponse(io.BufferedIOBase):
chunk_left = self._get_chunk_left()
if chunk_left is None:
break
if amt is not None and amt <= chunk_left:
value.append(self._safe_read(amt))
self.chunk_left = chunk_left - amt
break
value.append(self._safe_read(chunk_left))
if amt is not None:
amt -= chunk_left
self.chunk_left = 0
return b''.join(value)
except IncompleteRead:

View File

@ -569,6 +569,33 @@ class BasicTest(TestCase):
resp.close()
self.assertTrue(resp.closed)
def test_partial_reads_past_end(self):
# if we have Content-Length, clip reads to the end
body = "HTTP/1.1 200 Ok\r\nContent-Length: 4\r\n\r\nText"
sock = FakeSocket(body)
resp = client.HTTPResponse(sock)
resp.begin()
self.assertEqual(resp.read(10), b'Text')
self.assertTrue(resp.isclosed())
self.assertFalse(resp.closed)
resp.close()
self.assertTrue(resp.closed)
def test_partial_readintos_past_end(self):
# if we have Content-Length, clip readintos to the end
body = "HTTP/1.1 200 Ok\r\nContent-Length: 4\r\n\r\nText"
sock = FakeSocket(body)
resp = client.HTTPResponse(sock)
resp.begin()
b = bytearray(10)
n = resp.readinto(b)
self.assertEqual(n, 4)
self.assertEqual(bytes(b)[:4], b'Text')
self.assertTrue(resp.isclosed())
self.assertFalse(resp.closed)
resp.close()
self.assertTrue(resp.closed)
def test_partial_reads_no_content_length(self):
# when no length is present, the socket should be gracefully closed when
# all data was read

View File

@ -0,0 +1 @@
Improve performance of HTTPResponse.read with a given amount. Patch by Bruce Merry.