Update some www.unicode.org URLs to use HTTPS. (GH-18912)

This commit is contained in:
Benjamin Peterson 2020-03-10 21:10:59 -07:00 committed by GitHub
parent 051b9d08d1
commit 51796e5d26
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 14 additions and 14 deletions

View File

@ -156,9 +156,9 @@ UTF-8 has several convenient properties:
References References
---------- ----------
The `Unicode Consortium site <http://www.unicode.org>`_ has character charts, a The `Unicode Consortium site <https://www.unicode.org>`_ has character charts, a
glossary, and PDF versions of the Unicode specification. Be prepared for some glossary, and PDF versions of the Unicode specification. Be prepared for some
difficult reading. `A chronology <http://www.unicode.org/history/>`_ of the difficult reading. `A chronology <https://www.unicode.org/history/>`_ of the
origin and development of Unicode is also available on the site. origin and development of Unicode is also available on the site.
On the Computerphile Youtube channel, Tom Scott briefly On the Computerphile Youtube channel, Tom Scott briefly
@ -393,7 +393,7 @@ These are grouped into categories such as "Letter", "Number", "Punctuation", or
from the above output, ``'Ll'`` means 'Letter, lowercase', ``'No'`` means from the above output, ``'Ll'`` means 'Letter, lowercase', ``'No'`` means
"Number, other", ``'Mn'`` is "Mark, nonspacing", and ``'So'`` is "Symbol, "Number, other", ``'Mn'`` is "Mark, nonspacing", and ``'So'`` is "Symbol,
other". See other". See
`the General Category Values section of the Unicode Character Database documentation <http://www.unicode.org/reports/tr44/#General_Category_Values>`_ for a `the General Category Values section of the Unicode Character Database documentation <https://www.unicode.org/reports/tr44/#General_Category_Values>`_ for a
list of category codes. list of category codes.

View File

@ -352,7 +352,7 @@ Notes:
The numeric literals accepted include the digits ``0`` to ``9`` or any The numeric literals accepted include the digits ``0`` to ``9`` or any
Unicode equivalent (code points with the ``Nd`` property). Unicode equivalent (code points with the ``Nd`` property).
See http://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt See https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt
for a complete list of code points with the ``Nd`` property. for a complete list of code points with the ``Nd`` property.

View File

@ -18,7 +18,7 @@
This module provides access to the Unicode Character Database (UCD) which This module provides access to the Unicode Character Database (UCD) which
defines character properties for all Unicode characters. The data contained in defines character properties for all Unicode characters. The data contained in
this database is compiled from the `UCD version 13.0.0 this database is compiled from the `UCD version 13.0.0
<http://www.unicode.org/Public/13.0.0/ucd>`_. <https://www.unicode.org/Public/13.0.0/ucd>`_.
The module uses the same names and symbols as defined by Unicode The module uses the same names and symbols as defined by Unicode
Standard Annex #44, `"Unicode Character Database" Standard Annex #44, `"Unicode Character Database"
@ -175,6 +175,6 @@ Examples:
.. rubric:: Footnotes .. rubric:: Footnotes
.. [#] http://www.unicode.org/Public/13.0.0/ucd/NameAliases.txt .. [#] https://www.unicode.org/Public/13.0.0/ucd/NameAliases.txt
.. [#] http://www.unicode.org/Public/13.0.0/ucd/NamedSequences.txt .. [#] https://www.unicode.org/Public/13.0.0/ucd/NamedSequences.txt

View File

@ -316,7 +316,7 @@ The Unicode category codes mentioned above stand for:
* *Nd* - decimal numbers * *Nd* - decimal numbers
* *Pc* - connector punctuations * *Pc* - connector punctuations
* *Other_ID_Start* - explicit list of characters in `PropList.txt * *Other_ID_Start* - explicit list of characters in `PropList.txt
<http://www.unicode.org/Public/13.0.0/ucd/PropList.txt>`_ to support backwards <https://www.unicode.org/Public/13.0.0/ucd/PropList.txt>`_ to support backwards
compatibility compatibility
* *Other_ID_Continue* - likewise * *Other_ID_Continue* - likewise
@ -934,4 +934,4 @@ occurrence outside string literals and comments is an unconditional error:
.. rubric:: Footnotes .. rubric:: Footnotes
.. [#] http://www.unicode.org/Public/11.0.0/ucd/NameAliases.txt .. [#] https://www.unicode.org/Public/11.0.0/ucd/NameAliases.txt

View File

@ -1752,7 +1752,7 @@ class UnicodeTest(string_tests.CommonTest,
# Issue #8271: during the decoding of an invalid UTF-8 byte sequence, # Issue #8271: during the decoding of an invalid UTF-8 byte sequence,
# only the start byte and the continuation byte(s) are now considered # only the start byte and the continuation byte(s) are now considered
# invalid, instead of the number of bytes specified by the start byte. # invalid, instead of the number of bytes specified by the start byte.
# See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95, # See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95,
# table 3-8, Row 2) for more information about the algorithm used. # table 3-8, Row 2) for more information about the algorithm used.
FFFD = '\ufffd' FFFD = '\ufffd'
sequences = [ sequences = [

View File

@ -176,7 +176,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertRaises(TypeError, self.db.combining, 'xx') self.assertRaises(TypeError, self.db.combining, 'xx')
def test_pr29(self): def test_pr29(self):
# http://www.unicode.org/review/pr-29.html # https://www.unicode.org/review/pr-29.html
# See issues #1054943 and #10254. # See issues #1054943 and #10254.
composed = ("\u0b47\u0300\u0b3e", "\u1100\u0300\u1161", composed = ("\u0b47\u0300\u0b3e", "\u1100\u0300\u1161",
'Li\u030dt-s\u1e73\u0301', 'Li\u030dt-s\u1e73\u0301',

View File

@ -153,7 +153,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
/* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF
will result in surrogates in range D800-DFFF. Surrogates are will result in surrogates in range D800-DFFF. Surrogates are
not valid UTF-8 so they are rejected. not valid UTF-8 so they are rejected.
See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
(table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */ (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
goto InvalidContinuation1; goto InvalidContinuation1;
} }

View File

@ -893,9 +893,9 @@ def open_data(template, version):
import urllib.request import urllib.request
if version == '3.2.0': if version == '3.2.0':
# irregular url structure # irregular url structure
url = ('http://www.unicode.org/Public/3.2-Update/'+template) % ('-'+version,) url = ('https://www.unicode.org/Public/3.2-Update/'+template) % ('-'+version,)
else: else:
url = ('http://www.unicode.org/Public/%s/ucd/'+template) % (version, '') url = ('https://www.unicode.org/Public/%s/ucd/'+template) % (version, '')
os.makedirs(DATA_DIR, exist_ok=True) os.makedirs(DATA_DIR, exist_ok=True)
urllib.request.urlretrieve(url, filename=local) urllib.request.urlretrieve(url, filename=local)
if local.endswith('.txt'): if local.endswith('.txt'):