diff --git a/Doc/howto/unicode.rst b/Doc/howto/unicode.rst index 51bd64bfc23..4825b39f150 100644 --- a/Doc/howto/unicode.rst +++ b/Doc/howto/unicode.rst @@ -156,9 +156,9 @@ UTF-8 has several convenient properties: References ---------- -The `Unicode Consortium site `_ has character charts, a +The `Unicode Consortium site `_ has character charts, a glossary, and PDF versions of the Unicode specification. Be prepared for some -difficult reading. `A chronology `_ of the +difficult reading. `A chronology `_ of the origin and development of Unicode is also available on the site. On the Computerphile Youtube channel, Tom Scott briefly @@ -393,7 +393,7 @@ These are grouped into categories such as "Letter", "Number", "Punctuation", or from the above output, ``'Ll'`` means 'Letter, lowercase', ``'No'`` means "Number, other", ``'Mn'`` is "Mark, nonspacing", and ``'So'`` is "Symbol, other". See -`the General Category Values section of the Unicode Character Database documentation `_ for a +`the General Category Values section of the Unicode Character Database documentation `_ for a list of category codes. diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 2f46babf5db..a1364d472da 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -352,7 +352,7 @@ Notes: The numeric literals accepted include the digits ``0`` to ``9`` or any Unicode equivalent (code points with the ``Nd`` property). - See http://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt + See https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt for a complete list of code points with the ``Nd`` property. diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst index 124170f198f..dade3f265b5 100644 --- a/Doc/library/unicodedata.rst +++ b/Doc/library/unicodedata.rst @@ -18,7 +18,7 @@ This module provides access to the Unicode Character Database (UCD) which defines character properties for all Unicode characters. The data contained in this database is compiled from the `UCD version 13.0.0 -`_. +`_. The module uses the same names and symbols as defined by Unicode Standard Annex #44, `"Unicode Character Database" @@ -175,6 +175,6 @@ Examples: .. rubric:: Footnotes -.. [#] http://www.unicode.org/Public/13.0.0/ucd/NameAliases.txt +.. [#] https://www.unicode.org/Public/13.0.0/ucd/NameAliases.txt -.. [#] http://www.unicode.org/Public/13.0.0/ucd/NamedSequences.txt +.. [#] https://www.unicode.org/Public/13.0.0/ucd/NamedSequences.txt diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index c5157ef6f0d..7e912aabbc7 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -316,7 +316,7 @@ The Unicode category codes mentioned above stand for: * *Nd* - decimal numbers * *Pc* - connector punctuations * *Other_ID_Start* - explicit list of characters in `PropList.txt - `_ to support backwards + `_ to support backwards compatibility * *Other_ID_Continue* - likewise @@ -934,4 +934,4 @@ occurrence outside string literals and comments is an unconditional error: .. rubric:: Footnotes -.. [#] http://www.unicode.org/Public/11.0.0/ucd/NameAliases.txt +.. [#] https://www.unicode.org/Public/11.0.0/ucd/NameAliases.txt diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 8636f2b0bd5..28398896467 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1752,7 +1752,7 @@ class UnicodeTest(string_tests.CommonTest, # Issue #8271: during the decoding of an invalid UTF-8 byte sequence, # only the start byte and the continuation byte(s) are now considered # invalid, instead of the number of bytes specified by the start byte. - # See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95, + # See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95, # table 3-8, Row 2) for more information about the algorithm used. FFFD = '\ufffd' sequences = [ diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 4b5c8927618..60b1aac758d 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -176,7 +176,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): self.assertRaises(TypeError, self.db.combining, 'xx') def test_pr29(self): - # http://www.unicode.org/review/pr-29.html + # https://www.unicode.org/review/pr-29.html # See issues #1054943 and #10254. composed = ("\u0b47\u0300\u0b3e", "\u1100\u0300\u1161", 'Li\u030dt-s\u1e73\u0301', diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index eb42e071751..39c155321e4 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -153,7 +153,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF will result in surrogates in range D800-DFFF. Surrogates are not valid UTF-8 so they are rejected. - See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf + See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */ goto InvalidContinuation1; } diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index c9fcd83f540..810b285de2c 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -893,9 +893,9 @@ def open_data(template, version): import urllib.request if version == '3.2.0': # irregular url structure - url = ('http://www.unicode.org/Public/3.2-Update/'+template) % ('-'+version,) + url = ('https://www.unicode.org/Public/3.2-Update/'+template) % ('-'+version,) else: - url = ('http://www.unicode.org/Public/%s/ucd/'+template) % (version, '') + url = ('https://www.unicode.org/Public/%s/ucd/'+template) % (version, '') os.makedirs(DATA_DIR, exist_ok=True) urllib.request.urlretrieve(url, filename=local) if local.endswith('.txt'):