bpo-46932: Update bundled libexpat to 2.4.7 (GH-31736)

This commit is contained in:
Steve Dower 2022-03-07 21:46:18 +00:00 committed by GitHub
parent f193631387
commit 176835c3d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 156 additions and 14 deletions

View File

@ -0,0 +1 @@
Update bundled libexpat to 2.4.7

View File

@ -15,6 +15,7 @@
Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org>
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -174,8 +175,10 @@ struct XML_cp {
};
/* This is called for an element declaration. See above for
description of the model argument. It's the caller's responsibility
to free model when finished with it.
description of the model argument. It's the user code's responsibility
to free model when finished with it. See XML_FreeContentModel.
There is no need to free the model from the handler, it can be kept
around and freed at a later stage.
*/
typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData,
const XML_Char *name,
@ -237,6 +240,17 @@ XML_ParserCreate(const XML_Char *encoding);
and the local part will be concatenated without any separator.
It is a programming error to use the separator '\0' with namespace
triplets (see XML_SetReturnNSTriplet).
If a namespace separator is chosen that can be part of a URI or
part of an XML name, splitting an expanded name back into its
1, 2 or 3 original parts on application level in the element handler
may end up vulnerable, so these are advised against; sane choices for
a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
Note that Expat does not validate namespace URIs (beyond encoding)
against RFC 3986 today (and is not required to do so with regard to
the XML 1.0 namespaces specification) but it may start doing that
in future releases. Before that, an application using Expat must
be ready to receive namespace URIs containing non-URI characters.
*/
XMLPARSEAPI(XML_Parser)
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
@ -317,7 +331,7 @@ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData,
const XML_Char *pubid,
int has_internal_subset);
/* This is called for the start of the DOCTYPE declaration when the
/* This is called for the end of the DOCTYPE declaration when the
closing > is encountered, but after processing any external
subset.
*/
@ -1041,7 +1055,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 4
#define XML_MICRO_VERSION 6
#define XML_MICRO_VERSION 7
#ifdef __cplusplus
}

View File

@ -1,4 +1,4 @@
/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+)
/* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@ -34,6 +34,7 @@
Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -133,7 +134,7 @@
* BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
* Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
* Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
* Windows >=Vista (rand_s): _WIN32. \
\
If insist on not using any of these, bypass this error by defining \
@ -722,6 +723,7 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
return XML_ParserCreate_MM(encodingName, NULL, tmp);
}
// "xml=http://www.w3.org/XML/1998/namespace"
static const XML_Char implicitContext[]
= {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
@ -3704,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
return XML_ERROR_NONE;
}
static XML_Bool
is_rfc3986_uri_char(XML_Char candidate) {
// For the RFC 3986 ANBF grammar see
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
switch (candidate) {
// From rule "ALPHA" (uppercase half)
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
// From rule "ALPHA" (lowercase half)
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
// From rule "DIGIT"
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// From rule "pct-encoded"
case '%':
// From rule "unreserved"
case '-':
case '.':
case '_':
case '~':
// From rule "gen-delims"
case ':':
case '/':
case '?':
case '#':
case '[':
case ']':
case '@':
// From rule "sub-delims"
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
case '=':
return XML_TRUE;
default:
return XML_FALSE;
}
}
/* addBinding() overwrites the value of prefix->binding without checking.
Therefore one must keep track of the old value outside of addBinding().
*/
static enum XML_Error
addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
const XML_Char *uri, BINDING **bindingsPtr) {
// "http://www.w3.org/XML/1998/namespace"
static const XML_Char xmlNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
@ -3720,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
ASCII_e, '\0'};
static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
// "http://www.w3.org/2000/xmlns/"
static const XML_Char xmlnsNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
@ -3760,14 +3875,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE;
// NOTE: While Expat does not validate namespace URIs against RFC 3986,
// we have to at least make sure that the XML processor on top of
// Expat (that is splitting tag names by namespace separator into
// 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
// by an attacker putting additional namespace separator characters
// into namespace declarations. That would be ambiguous and not to
// be expected.
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
// NOTE: While Expat does not validate namespace URIs against RFC 3986
// today (and is not REQUIRED to do so with regard to the XML 1.0
// namespaces specification) we have to at least make sure, that
// the application on top of Expat (that is likely splitting expanded
// element names ("qualified names") of form
// "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
// in its element handler code) cannot be confused by an attacker
// putting additional namespace separator characters into namespace
// declarations. That would be ambiguous and not to be expected.
//
// While the HTML API docs of function XML_ParserCreateNS have been
// advising against use of a namespace separator character that can
// appear in a URI for >20 years now, some widespread applications
// are using URI characters (':' (colon) in particular) for a
// namespace separator, in practice. To keep these applications
// functional, we only reject namespaces URIs containing the
// application-chosen namespace separator if the chosen separator
// is a non-URI character with regard to RFC 3986.
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
&& ! is_rfc3986_uri_char(uri[len])) {
return XML_ERROR_SYNTAX;
}
}