From 351adda54bed3afbbf6db7725699679e68722d7d Mon Sep 17 00:00:00 2001 From: Pierre Quentel Date: Sun, 2 Apr 2017 12:26:12 +0200 Subject: [PATCH] bpo-29654 : Support If-Modified-Since HTTP header (browser cache) (#298) Return 304 response if file was not modified. --- Doc/library/http.server.rst | 12 +++++--- Doc/whatsnew/3.7.rst | 8 +++++ Lib/http/server.py | 39 +++++++++++++++++++++--- Lib/test/test_httpservers.py | 59 +++++++++++++++++++++++++++++++++++- Misc/NEWS | 3 ++ 5 files changed, 112 insertions(+), 9 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index fb5c1df611d..ee1c37c6319 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -343,11 +343,13 @@ of which this module provides three different variants: :func:`os.listdir` to scan the directory, and returns a ``404`` error response if the :func:`~os.listdir` fails. - If the request was mapped to a file, it is opened and the contents are - returned. Any :exc:`OSError` exception in opening the requested file is - mapped to a ``404``, ``'File not found'`` error. Otherwise, the content + If the request was mapped to a file, it is opened. Any :exc:`OSError` + exception in opening the requested file is mapped to a ``404``, + ``'File not found'`` error. If there was a ``'If-Modified-Since'`` + header in the request, and the file was not modified after this time, + a ``304``, ``'Not Modified'`` response is sent. Otherwise, the content type is guessed by calling the :meth:`guess_type` method, which in turn - uses the *extensions_map* variable. + uses the *extensions_map* variable, and the file contents are returned. A ``'Content-type:'`` header with the guessed content type is output, followed by a ``'Content-Length:'`` header with the file's size and a @@ -360,6 +362,8 @@ of which this module provides three different variants: For example usage, see the implementation of the :func:`test` function invocation in the :mod:`http.server` module. + .. versionchanged:: 3.7 + Support of the ``'If-Modified-Since'`` header. The :class:`SimpleHTTPRequestHandler` class can be used in the following manner in order to create a very basic webserver serving files relative to diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 19e04bb19ef..12f65ff9a59 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -95,6 +95,14 @@ New Modules Improved Modules ================ +http.server +----------- + +:class:`~http.server.SimpleHTTPRequestHandler` supports the HTTP +If-Modified-Since header. The server returns the 304 response status if the +target file was not modified after the time specified in the header. +(Contributed by Pierre Quentel in :issue:`29654`.) + locale ------ diff --git a/Lib/http/server.py b/Lib/http/server.py index 61ddecc7efe..429490b73a8 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -87,6 +87,9 @@ __all__ = [ "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", ] +import argparse +import copy +import datetime import email.utils import html import http.client @@ -101,8 +104,6 @@ import socketserver import sys import time import urllib.parse -import copy -import argparse from http import HTTPStatus @@ -686,12 +687,42 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): except OSError: self.send_error(HTTPStatus.NOT_FOUND, "File not found") return None + try: + fs = os.fstat(f.fileno()) + # Use browser cache if possible + if ("If-Modified-Since" in self.headers + and "If-None-Match" not in self.headers): + # compare If-Modified-Since and time of last file modification + try: + ims = email.utils.parsedate_to_datetime( + self.headers["If-Modified-Since"]) + except (TypeError, IndexError, OverflowError, ValueError): + # ignore ill-formed values + pass + else: + if ims.tzinfo is None: + # obsolete format with no timezone, cf. + # https://tools.ietf.org/html/rfc7231#section-7.1.1.1 + ims = ims.replace(tzinfo=datetime.timezone.utc) + if ims.tzinfo is datetime.timezone.utc: + # compare to UTC datetime of last modification + last_modif = datetime.datetime.fromtimestamp( + fs.st_mtime, datetime.timezone.utc) + # remove microseconds, like in If-Modified-Since + last_modif = last_modif.replace(microsecond=0) + + if last_modif <= ims: + self.send_response(HTTPStatus.NOT_MODIFIED) + self.end_headers() + f.close() + return None + self.send_response(HTTPStatus.OK) self.send_header("Content-type", ctype) - fs = os.fstat(f.fileno()) self.send_header("Content-Length", str(fs[6])) - self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) + self.send_header("Last-Modified", + self.date_time_string(fs.st_mtime)) self.end_headers() return f except: diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 5049538e664..dafcb0cbd56 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -14,11 +14,14 @@ import re import base64 import ntpath import shutil -import urllib.parse +import email.message +import email.utils import html import http.client +import urllib.parse import tempfile import time +import datetime from io import BytesIO import unittest @@ -333,6 +336,13 @@ class SimpleHTTPServerTestCase(BaseTestCase): self.base_url = '/' + self.tempdir_name with open(os.path.join(self.tempdir, 'test'), 'wb') as temp: temp.write(self.data) + mtime = os.fstat(temp.fileno()).st_mtime + # compute last modification datetime for browser cache tests + last_modif = datetime.datetime.fromtimestamp(mtime, + datetime.timezone.utc) + self.last_modif_datetime = last_modif.replace(microsecond=0) + self.last_modif_header = email.utils.formatdate( + last_modif.timestamp(), usegmt=True) def tearDown(self): try: @@ -444,6 +454,44 @@ class SimpleHTTPServerTestCase(BaseTestCase): self.assertEqual(response.getheader('content-type'), 'application/octet-stream') + def test_browser_cache(self): + """Check that when a request to /test is sent with the request header + If-Modified-Since set to date of last modification, the server returns + status code 304, not 200 + """ + headers = email.message.Message() + headers['If-Modified-Since'] = self.last_modif_header + response = self.request(self.base_url + '/test', headers=headers) + self.check_status_and_reason(response, HTTPStatus.NOT_MODIFIED) + + # one hour after last modification : must return 304 + new_dt = self.last_modif_datetime + datetime.timedelta(hours=1) + headers = email.message.Message() + headers['If-Modified-Since'] = email.utils.format_datetime(new_dt, + usegmt=True) + response = self.request(self.base_url + '/test', headers=headers) + self.check_status_and_reason(response, HTTPStatus.NOT_MODIFIED) + + def test_browser_cache_file_changed(self): + # with If-Modified-Since earlier than Last-Modified, must return 200 + dt = self.last_modif_datetime + # build datetime object : 365 days before last modification + old_dt = dt - datetime.timedelta(days=365) + headers = email.message.Message() + headers['If-Modified-Since'] = email.utils.format_datetime(old_dt, + usegmt=True) + response = self.request(self.base_url + '/test', headers=headers) + self.check_status_and_reason(response, HTTPStatus.OK) + + def test_browser_cache_with_If_None_Match_header(self): + # if If-None-Match header is present, ignore If-Modified-Since + + headers = email.message.Message() + headers['If-Modified-Since'] = self.last_modif_header + headers['If-None-Match'] = "*" + response = self.request(self.base_url + '/test', headers=headers) + self.check_status_and_reason(response, HTTPStatus.OK) + def test_invalid_requests(self): response = self.request('/', method='FOO') self.check_status_and_reason(response, HTTPStatus.NOT_IMPLEMENTED) @@ -453,6 +501,15 @@ class SimpleHTTPServerTestCase(BaseTestCase): response = self.request('/', method='GETs') self.check_status_and_reason(response, HTTPStatus.NOT_IMPLEMENTED) + def test_last_modified(self): + """Checks that the datetime returned in Last-Modified response header + is the actual datetime of last modification, rounded to the second + """ + response = self.request(self.base_url + '/test') + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + last_modif_header = response.headers['Last-modified'] + self.assertEqual(last_modif_header, self.last_modif_header) + def test_path_without_leading_slash(self): response = self.request(self.tempdir_name + '/test') self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) diff --git a/Misc/NEWS b/Misc/NEWS index a9acaf8e62f..2e9cce6f12c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -303,6 +303,9 @@ Extension Modules Library ------- +- bpo-29654: Support If-Modified-Since HTTP header (browser cache). Patch + by Pierre Quentel. + - bpo-29931: Fixed comparison check for ipaddress.ip_interface objects. Patch by Sanjay Sundaresan.