Update some www.unicode.org URLs to use HTTPS. (GH-18912)
This commit is contained in:
parent
051b9d08d1
commit
51796e5d26
|
@ -156,9 +156,9 @@ UTF-8 has several convenient properties:
|
||||||
References
|
References
|
||||||
----------
|
----------
|
||||||
|
|
||||||
The `Unicode Consortium site <http://www.unicode.org>`_ has character charts, a
|
The `Unicode Consortium site <https://www.unicode.org>`_ has character charts, a
|
||||||
glossary, and PDF versions of the Unicode specification. Be prepared for some
|
glossary, and PDF versions of the Unicode specification. Be prepared for some
|
||||||
difficult reading. `A chronology <http://www.unicode.org/history/>`_ of the
|
difficult reading. `A chronology <https://www.unicode.org/history/>`_ of the
|
||||||
origin and development of Unicode is also available on the site.
|
origin and development of Unicode is also available on the site.
|
||||||
|
|
||||||
On the Computerphile Youtube channel, Tom Scott briefly
|
On the Computerphile Youtube channel, Tom Scott briefly
|
||||||
|
@ -393,7 +393,7 @@ These are grouped into categories such as "Letter", "Number", "Punctuation", or
|
||||||
from the above output, ``'Ll'`` means 'Letter, lowercase', ``'No'`` means
|
from the above output, ``'Ll'`` means 'Letter, lowercase', ``'No'`` means
|
||||||
"Number, other", ``'Mn'`` is "Mark, nonspacing", and ``'So'`` is "Symbol,
|
"Number, other", ``'Mn'`` is "Mark, nonspacing", and ``'So'`` is "Symbol,
|
||||||
other". See
|
other". See
|
||||||
`the General Category Values section of the Unicode Character Database documentation <http://www.unicode.org/reports/tr44/#General_Category_Values>`_ for a
|
`the General Category Values section of the Unicode Character Database documentation <https://www.unicode.org/reports/tr44/#General_Category_Values>`_ for a
|
||||||
list of category codes.
|
list of category codes.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -352,7 +352,7 @@ Notes:
|
||||||
The numeric literals accepted include the digits ``0`` to ``9`` or any
|
The numeric literals accepted include the digits ``0`` to ``9`` or any
|
||||||
Unicode equivalent (code points with the ``Nd`` property).
|
Unicode equivalent (code points with the ``Nd`` property).
|
||||||
|
|
||||||
See http://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt
|
See https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt
|
||||||
for a complete list of code points with the ``Nd`` property.
|
for a complete list of code points with the ``Nd`` property.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
This module provides access to the Unicode Character Database (UCD) which
|
This module provides access to the Unicode Character Database (UCD) which
|
||||||
defines character properties for all Unicode characters. The data contained in
|
defines character properties for all Unicode characters. The data contained in
|
||||||
this database is compiled from the `UCD version 13.0.0
|
this database is compiled from the `UCD version 13.0.0
|
||||||
<http://www.unicode.org/Public/13.0.0/ucd>`_.
|
<https://www.unicode.org/Public/13.0.0/ucd>`_.
|
||||||
|
|
||||||
The module uses the same names and symbols as defined by Unicode
|
The module uses the same names and symbols as defined by Unicode
|
||||||
Standard Annex #44, `"Unicode Character Database"
|
Standard Annex #44, `"Unicode Character Database"
|
||||||
|
@ -175,6 +175,6 @@ Examples:
|
||||||
|
|
||||||
.. rubric:: Footnotes
|
.. rubric:: Footnotes
|
||||||
|
|
||||||
.. [#] http://www.unicode.org/Public/13.0.0/ucd/NameAliases.txt
|
.. [#] https://www.unicode.org/Public/13.0.0/ucd/NameAliases.txt
|
||||||
|
|
||||||
.. [#] http://www.unicode.org/Public/13.0.0/ucd/NamedSequences.txt
|
.. [#] https://www.unicode.org/Public/13.0.0/ucd/NamedSequences.txt
|
||||||
|
|
|
@ -316,7 +316,7 @@ The Unicode category codes mentioned above stand for:
|
||||||
* *Nd* - decimal numbers
|
* *Nd* - decimal numbers
|
||||||
* *Pc* - connector punctuations
|
* *Pc* - connector punctuations
|
||||||
* *Other_ID_Start* - explicit list of characters in `PropList.txt
|
* *Other_ID_Start* - explicit list of characters in `PropList.txt
|
||||||
<http://www.unicode.org/Public/13.0.0/ucd/PropList.txt>`_ to support backwards
|
<https://www.unicode.org/Public/13.0.0/ucd/PropList.txt>`_ to support backwards
|
||||||
compatibility
|
compatibility
|
||||||
* *Other_ID_Continue* - likewise
|
* *Other_ID_Continue* - likewise
|
||||||
|
|
||||||
|
@ -934,4 +934,4 @@ occurrence outside string literals and comments is an unconditional error:
|
||||||
|
|
||||||
.. rubric:: Footnotes
|
.. rubric:: Footnotes
|
||||||
|
|
||||||
.. [#] http://www.unicode.org/Public/11.0.0/ucd/NameAliases.txt
|
.. [#] https://www.unicode.org/Public/11.0.0/ucd/NameAliases.txt
|
||||||
|
|
|
@ -1752,7 +1752,7 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
# Issue #8271: during the decoding of an invalid UTF-8 byte sequence,
|
# Issue #8271: during the decoding of an invalid UTF-8 byte sequence,
|
||||||
# only the start byte and the continuation byte(s) are now considered
|
# only the start byte and the continuation byte(s) are now considered
|
||||||
# invalid, instead of the number of bytes specified by the start byte.
|
# invalid, instead of the number of bytes specified by the start byte.
|
||||||
# See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95,
|
# See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95,
|
||||||
# table 3-8, Row 2) for more information about the algorithm used.
|
# table 3-8, Row 2) for more information about the algorithm used.
|
||||||
FFFD = '\ufffd'
|
FFFD = '\ufffd'
|
||||||
sequences = [
|
sequences = [
|
||||||
|
|
|
@ -176,7 +176,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
|
||||||
self.assertRaises(TypeError, self.db.combining, 'xx')
|
self.assertRaises(TypeError, self.db.combining, 'xx')
|
||||||
|
|
||||||
def test_pr29(self):
|
def test_pr29(self):
|
||||||
# http://www.unicode.org/review/pr-29.html
|
# https://www.unicode.org/review/pr-29.html
|
||||||
# See issues #1054943 and #10254.
|
# See issues #1054943 and #10254.
|
||||||
composed = ("\u0b47\u0300\u0b3e", "\u1100\u0300\u1161",
|
composed = ("\u0b47\u0300\u0b3e", "\u1100\u0300\u1161",
|
||||||
'Li\u030dt-s\u1e73\u0301',
|
'Li\u030dt-s\u1e73\u0301',
|
||||||
|
|
|
@ -153,7 +153,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
|
||||||
/* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF
|
/* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF
|
||||||
will result in surrogates in range D800-DFFF. Surrogates are
|
will result in surrogates in range D800-DFFF. Surrogates are
|
||||||
not valid UTF-8 so they are rejected.
|
not valid UTF-8 so they are rejected.
|
||||||
See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
|
See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
|
||||||
(table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
|
(table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
|
||||||
goto InvalidContinuation1;
|
goto InvalidContinuation1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -893,9 +893,9 @@ def open_data(template, version):
|
||||||
import urllib.request
|
import urllib.request
|
||||||
if version == '3.2.0':
|
if version == '3.2.0':
|
||||||
# irregular url structure
|
# irregular url structure
|
||||||
url = ('http://www.unicode.org/Public/3.2-Update/'+template) % ('-'+version,)
|
url = ('https://www.unicode.org/Public/3.2-Update/'+template) % ('-'+version,)
|
||||||
else:
|
else:
|
||||||
url = ('http://www.unicode.org/Public/%s/ucd/'+template) % (version, '')
|
url = ('https://www.unicode.org/Public/%s/ucd/'+template) % (version, '')
|
||||||
os.makedirs(DATA_DIR, exist_ok=True)
|
os.makedirs(DATA_DIR, exist_ok=True)
|
||||||
urllib.request.urlretrieve(url, filename=local)
|
urllib.request.urlretrieve(url, filename=local)
|
||||||
if local.endswith('.txt'):
|
if local.endswith('.txt'):
|
||||||
|
|
Loading…
Reference in New Issue