mirror of https://github.com/python/cpython
bpo-46932: Update bundled libexpat to 2.4.7 (GH-31736)
This commit is contained in:
parent
f193631387
commit
176835c3d5
|
@ -0,0 +1 @@
|
|||
Update bundled libexpat to 2.4.7
|
|
@ -15,6 +15,7 @@
|
|||
Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org>
|
||||
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
|
||||
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
|
||||
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl>
|
||||
Licensed under the MIT license:
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
@ -174,8 +175,10 @@ struct XML_cp {
|
|||
};
|
||||
|
||||
/* This is called for an element declaration. See above for
|
||||
description of the model argument. It's the caller's responsibility
|
||||
to free model when finished with it.
|
||||
description of the model argument. It's the user code's responsibility
|
||||
to free model when finished with it. See XML_FreeContentModel.
|
||||
There is no need to free the model from the handler, it can be kept
|
||||
around and freed at a later stage.
|
||||
*/
|
||||
typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData,
|
||||
const XML_Char *name,
|
||||
|
@ -237,6 +240,17 @@ XML_ParserCreate(const XML_Char *encoding);
|
|||
and the local part will be concatenated without any separator.
|
||||
It is a programming error to use the separator '\0' with namespace
|
||||
triplets (see XML_SetReturnNSTriplet).
|
||||
If a namespace separator is chosen that can be part of a URI or
|
||||
part of an XML name, splitting an expanded name back into its
|
||||
1, 2 or 3 original parts on application level in the element handler
|
||||
may end up vulnerable, so these are advised against; sane choices for
|
||||
a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
|
||||
|
||||
Note that Expat does not validate namespace URIs (beyond encoding)
|
||||
against RFC 3986 today (and is not required to do so with regard to
|
||||
the XML 1.0 namespaces specification) but it may start doing that
|
||||
in future releases. Before that, an application using Expat must
|
||||
be ready to receive namespace URIs containing non-URI characters.
|
||||
*/
|
||||
XMLPARSEAPI(XML_Parser)
|
||||
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
|
||||
|
@ -317,7 +331,7 @@ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData,
|
|||
const XML_Char *pubid,
|
||||
int has_internal_subset);
|
||||
|
||||
/* This is called for the start of the DOCTYPE declaration when the
|
||||
/* This is called for the end of the DOCTYPE declaration when the
|
||||
closing > is encountered, but after processing any external
|
||||
subset.
|
||||
*/
|
||||
|
@ -1041,7 +1055,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
|
|||
*/
|
||||
#define XML_MAJOR_VERSION 2
|
||||
#define XML_MINOR_VERSION 4
|
||||
#define XML_MICRO_VERSION 6
|
||||
#define XML_MICRO_VERSION 7
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+)
|
||||
/* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7+)
|
||||
__ __ _
|
||||
___\ \/ /_ __ __ _| |_
|
||||
/ _ \\ /| '_ \ / _` | __|
|
||||
|
@ -34,6 +34,7 @@
|
|||
Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
|
||||
Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
|
||||
Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
|
||||
Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
|
||||
Licensed under the MIT license:
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
@ -133,7 +134,7 @@
|
|||
* BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
|
||||
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
|
||||
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
|
||||
* Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
|
||||
* Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
|
||||
* Windows >=Vista (rand_s): _WIN32. \
|
||||
\
|
||||
If insist on not using any of these, bypass this error by defining \
|
||||
|
@ -722,6 +723,7 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
|
|||
return XML_ParserCreate_MM(encodingName, NULL, tmp);
|
||||
}
|
||||
|
||||
// "xml=http://www.w3.org/XML/1998/namespace"
|
||||
static const XML_Char implicitContext[]
|
||||
= {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
|
||||
ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
|
||||
|
@ -3704,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
|
|||
return XML_ERROR_NONE;
|
||||
}
|
||||
|
||||
static XML_Bool
|
||||
is_rfc3986_uri_char(XML_Char candidate) {
|
||||
// For the RFC 3986 ANBF grammar see
|
||||
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
|
||||
|
||||
switch (candidate) {
|
||||
// From rule "ALPHA" (uppercase half)
|
||||
case 'A':
|
||||
case 'B':
|
||||
case 'C':
|
||||
case 'D':
|
||||
case 'E':
|
||||
case 'F':
|
||||
case 'G':
|
||||
case 'H':
|
||||
case 'I':
|
||||
case 'J':
|
||||
case 'K':
|
||||
case 'L':
|
||||
case 'M':
|
||||
case 'N':
|
||||
case 'O':
|
||||
case 'P':
|
||||
case 'Q':
|
||||
case 'R':
|
||||
case 'S':
|
||||
case 'T':
|
||||
case 'U':
|
||||
case 'V':
|
||||
case 'W':
|
||||
case 'X':
|
||||
case 'Y':
|
||||
case 'Z':
|
||||
|
||||
// From rule "ALPHA" (lowercase half)
|
||||
case 'a':
|
||||
case 'b':
|
||||
case 'c':
|
||||
case 'd':
|
||||
case 'e':
|
||||
case 'f':
|
||||
case 'g':
|
||||
case 'h':
|
||||
case 'i':
|
||||
case 'j':
|
||||
case 'k':
|
||||
case 'l':
|
||||
case 'm':
|
||||
case 'n':
|
||||
case 'o':
|
||||
case 'p':
|
||||
case 'q':
|
||||
case 'r':
|
||||
case 's':
|
||||
case 't':
|
||||
case 'u':
|
||||
case 'v':
|
||||
case 'w':
|
||||
case 'x':
|
||||
case 'y':
|
||||
case 'z':
|
||||
|
||||
// From rule "DIGIT"
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
|
||||
// From rule "pct-encoded"
|
||||
case '%':
|
||||
|
||||
// From rule "unreserved"
|
||||
case '-':
|
||||
case '.':
|
||||
case '_':
|
||||
case '~':
|
||||
|
||||
// From rule "gen-delims"
|
||||
case ':':
|
||||
case '/':
|
||||
case '?':
|
||||
case '#':
|
||||
case '[':
|
||||
case ']':
|
||||
case '@':
|
||||
|
||||
// From rule "sub-delims"
|
||||
case '!':
|
||||
case '$':
|
||||
case '&':
|
||||
case '\'':
|
||||
case '(':
|
||||
case ')':
|
||||
case '*':
|
||||
case '+':
|
||||
case ',':
|
||||
case ';':
|
||||
case '=':
|
||||
return XML_TRUE;
|
||||
|
||||
default:
|
||||
return XML_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* addBinding() overwrites the value of prefix->binding without checking.
|
||||
Therefore one must keep track of the old value outside of addBinding().
|
||||
*/
|
||||
static enum XML_Error
|
||||
addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
|
||||
const XML_Char *uri, BINDING **bindingsPtr) {
|
||||
// "http://www.w3.org/XML/1998/namespace"
|
||||
static const XML_Char xmlNamespace[]
|
||||
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
|
||||
ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
|
||||
|
@ -3720,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
|
|||
ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
|
||||
ASCII_e, '\0'};
|
||||
static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
|
||||
// "http://www.w3.org/2000/xmlns/"
|
||||
static const XML_Char xmlnsNamespace[]
|
||||
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
|
||||
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
|
||||
|
@ -3760,14 +3875,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
|
|||
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
|
||||
isXMLNS = XML_FALSE;
|
||||
|
||||
// NOTE: While Expat does not validate namespace URIs against RFC 3986,
|
||||
// we have to at least make sure that the XML processor on top of
|
||||
// Expat (that is splitting tag names by namespace separator into
|
||||
// 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
|
||||
// by an attacker putting additional namespace separator characters
|
||||
// into namespace declarations. That would be ambiguous and not to
|
||||
// be expected.
|
||||
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
|
||||
// NOTE: While Expat does not validate namespace URIs against RFC 3986
|
||||
// today (and is not REQUIRED to do so with regard to the XML 1.0
|
||||
// namespaces specification) we have to at least make sure, that
|
||||
// the application on top of Expat (that is likely splitting expanded
|
||||
// element names ("qualified names") of form
|
||||
// "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
|
||||
// in its element handler code) cannot be confused by an attacker
|
||||
// putting additional namespace separator characters into namespace
|
||||
// declarations. That would be ambiguous and not to be expected.
|
||||
//
|
||||
// While the HTML API docs of function XML_ParserCreateNS have been
|
||||
// advising against use of a namespace separator character that can
|
||||
// appear in a URI for >20 years now, some widespread applications
|
||||
// are using URI characters (':' (colon) in particular) for a
|
||||
// namespace separator, in practice. To keep these applications
|
||||
// functional, we only reject namespaces URIs containing the
|
||||
// application-chosen namespace separator if the chosen separator
|
||||
// is a non-URI character with regard to RFC 3986.
|
||||
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
|
||||
&& ! is_rfc3986_uri_char(uri[len])) {
|
||||
return XML_ERROR_SYNTAX;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue