bpo-33956: update vendored expat to 2.2.5 (GH-7925)

This commit is contained in:
Benjamin Peterson 2018-06-26 19:25:45 -07:00 committed by GitHub
parent 58ed7307ea
commit 4e21100fa7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 1350 additions and 1461 deletions

View File

@ -0,0 +1 @@
Update vendored Expat library copy to version 2.2.5.

View File

@ -1076,7 +1076,7 @@ XML_GetFeatureList(void);
*/ */
#define XML_MAJOR_VERSION 2 #define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 2 #define XML_MINOR_VERSION 2
#define XML_MICRO_VERSION 4 #define XML_MICRO_VERSION 5
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -35,12 +35,8 @@
/* External API definitions */ /* External API definitions */
/* Namespace external symbols to allow multiple libexpat version to
co-exist. */
#include "pyexpatns.h"
#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__) #if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
#define XML_USE_MSC_EXTENSIONS 1 # define XML_USE_MSC_EXTENSIONS 1
#endif #endif
/* Expat tries very hard to make the API boundary very specifically /* Expat tries very hard to make the API boundary very specifically
@ -66,11 +62,11 @@
system headers may assume the cdecl convention. system headers may assume the cdecl convention.
*/ */
#ifndef XMLCALL #ifndef XMLCALL
#if defined(_MSC_VER) # if defined(_MSC_VER)
#define XMLCALL __cdecl # define XMLCALL __cdecl
#elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER) # elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER)
#define XMLCALL __attribute__((cdecl)) # define XMLCALL __attribute__((cdecl))
#else # else
/* For any platform which uses this definition and supports more than /* For any platform which uses this definition and supports more than
one calling convention, we need to extend this definition to one calling convention, we need to extend this definition to
declare the convention used on that platform, if it's possible to declare the convention used on that platform, if it's possible to
@ -81,41 +77,41 @@
pre-processor and how to specify the same calling convention as the pre-processor and how to specify the same calling convention as the
platform's malloc() implementation. platform's malloc() implementation.
*/ */
#define XMLCALL # define XMLCALL
#endif # endif
#endif /* not defined XMLCALL */ #endif /* not defined XMLCALL */
#if !defined(XML_STATIC) && !defined(XMLIMPORT) #if !defined(XML_STATIC) && !defined(XMLIMPORT)
#ifndef XML_BUILDING_EXPAT # ifndef XML_BUILDING_EXPAT
/* using Expat from an application */ /* using Expat from an application */
#ifdef XML_USE_MSC_EXTENSIONS # ifdef XML_USE_MSC_EXTENSIONS
#define XMLIMPORT __declspec(dllimport) # define XMLIMPORT __declspec(dllimport)
#endif # endif
#endif # endif
#endif /* not defined XML_STATIC */ #endif /* not defined XML_STATIC */
#if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4) #if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4)
#define XMLIMPORT __attribute__ ((visibility ("default"))) # define XMLIMPORT __attribute__ ((visibility ("default")))
#endif #endif
/* If we didn't define it above, define it away: */ /* If we didn't define it above, define it away: */
#ifndef XMLIMPORT #ifndef XMLIMPORT
#define XMLIMPORT # define XMLIMPORT
#endif #endif
#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) #if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
#define XML_ATTR_MALLOC __attribute__((__malloc__)) # define XML_ATTR_MALLOC __attribute__((__malloc__))
#else #else
#define XML_ATTR_MALLOC # define XML_ATTR_MALLOC
#endif #endif
#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) #if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
#define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) # define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))
#else #else
#define XML_ATTR_ALLOC_SIZE(x) # define XML_ATTR_ALLOC_SIZE(x)
#endif #endif
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL #define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
@ -125,33 +121,35 @@ extern "C" {
#endif #endif
#ifdef XML_UNICODE_WCHAR_T #ifdef XML_UNICODE_WCHAR_T
# define XML_UNICODE # ifndef XML_UNICODE
# define XML_UNICODE
# endif
# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2) # if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2)
# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc" # error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc"
# endif # endif
#endif #endif
#ifdef XML_UNICODE /* Information is UTF-16 encoded. */ #ifdef XML_UNICODE /* Information is UTF-16 encoded. */
#ifdef XML_UNICODE_WCHAR_T # ifdef XML_UNICODE_WCHAR_T
typedef wchar_t XML_Char; typedef wchar_t XML_Char;
typedef wchar_t XML_LChar; typedef wchar_t XML_LChar;
#else # else
typedef unsigned short XML_Char; typedef unsigned short XML_Char;
typedef char XML_LChar; typedef char XML_LChar;
#endif /* XML_UNICODE_WCHAR_T */ # endif /* XML_UNICODE_WCHAR_T */
#else /* Information is UTF-8 encoded. */ #else /* Information is UTF-8 encoded. */
typedef char XML_Char; typedef char XML_Char;
typedef char XML_LChar; typedef char XML_LChar;
#endif /* XML_UNICODE */ #endif /* XML_UNICODE */
#ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ #ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */
#if defined(XML_USE_MSC_EXTENSIONS) && _MSC_VER < 1400 # if defined(XML_USE_MSC_EXTENSIONS) && _MSC_VER < 1400
typedef __int64 XML_Index; typedef __int64 XML_Index;
typedef unsigned __int64 XML_Size; typedef unsigned __int64 XML_Size;
#else # else
typedef long long XML_Index; typedef long long XML_Index;
typedef unsigned long long XML_Size; typedef unsigned long long XML_Size;
#endif # endif
#else #else
typedef long XML_Index; typedef long XML_Index;
typedef unsigned long XML_Size; typedef unsigned long XML_Size;

View File

@ -116,7 +116,7 @@ extern "C" {
void void
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef); _INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** fromLimRef);
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -84,7 +84,7 @@ HMODULE _Expat_LoadLibrary(LPCTSTR filename)
/* Get a handle to kernel32 so we can access it's functions at runtime */ /* Get a handle to kernel32 so we can access it's functions at runtime */
HMODULE hKernel32 = GetModuleHandle(TEXT("kernel32")); HMODULE hKernel32 = GetModuleHandle(TEXT("kernel32"));
if(!hKernel32) if(!hKernel32)
return NULL; return NULL; /* LCOV_EXCL_LINE */
/* Attempt to find LoadLibraryEx() which is only available on Windows 2000 /* Attempt to find LoadLibraryEx() which is only available on Windows 2000
and above */ and above */

File diff suppressed because it is too large Load Diff

View File

@ -31,8 +31,17 @@
*/ */
#include <stddef.h> #include <stddef.h>
#include <stdbool.h> #include <string.h> /* memcpy */
#include <string.h> // memcpy
#if defined(_MSC_VER) && (_MSC_VER <= 1700)
/* for vs2012/11.0/1700 and earlier Visual Studio compilers */
# define bool int
# define false 0
# define true 1
#else
# include <stdbool.h>
#endif
#ifdef _WIN32 #ifdef _WIN32
#include "winconfig.h" #include "winconfig.h"
@ -57,7 +66,6 @@
{ PREFIX(prologTok), PREFIX(contentTok), \ { PREFIX(prologTok), PREFIX(contentTok), \
PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
{ PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
PREFIX(sameName), \
PREFIX(nameMatchesAscii), \ PREFIX(nameMatchesAscii), \
PREFIX(nameLength), \ PREFIX(nameLength), \
PREFIX(skipS), \ PREFIX(skipS), \
@ -354,7 +362,7 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
}; };
void void
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef) _INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** fromLimRef)
{ {
const char * fromLim = *fromLimRef; const char * fromLim = *fromLimRef;
size_t walked = 0; size_t walked = 0;
@ -405,18 +413,22 @@ utf8_toUtf8(const ENCODING *UNUSED_P(enc),
} }
/* Avoid copying partial characters (from incomplete input). */ /* Avoid copying partial characters (from incomplete input). */
const char * const fromLimBefore = fromLim; {
align_limit_to_full_utf8_characters(*fromP, &fromLim); const char * const fromLimBefore = fromLim;
if (fromLim < fromLimBefore) { _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
input_incomplete = true; if (fromLim < fromLimBefore) {
input_incomplete = true;
}
} }
const ptrdiff_t bytesToCopy = fromLim - *fromP; {
memcpy((void *)*toP, (const void *)*fromP, (size_t)bytesToCopy); const ptrdiff_t bytesToCopy = fromLim - *fromP;
*fromP += bytesToCopy; memcpy(*toP, *fromP, bytesToCopy);
*toP += bytesToCopy; *fromP += bytesToCopy;
*toP += bytesToCopy;
}
if (output_exhausted) // needs to go first if (output_exhausted) /* needs to go first */
return XML_CONVERT_OUTPUT_EXHAUSTED; return XML_CONVERT_OUTPUT_EXHAUSTED;
else if (input_incomplete) else if (input_incomplete)
return XML_CONVERT_INPUT_INCOMPLETE; return XML_CONVERT_INPUT_INCOMPLETE;
@ -1452,9 +1464,8 @@ unknown_toUtf8(const ENCODING *enc,
return XML_CONVERT_OUTPUT_EXHAUSTED; return XML_CONVERT_OUTPUT_EXHAUSTED;
(*fromP)++; (*fromP)++;
} }
do { memcpy(*toP, utf8, n);
*(*toP)++ = *utf8++; *toP += n;
} while (--n != 0);
} }
} }

View File

@ -167,9 +167,6 @@ enum XML_Convert_Result {
struct encoding { struct encoding {
SCANNER scanners[XML_N_STATES]; SCANNER scanners[XML_N_STATES];
SCANNER literalScanners[XML_N_LITERAL_TYPES]; SCANNER literalScanners[XML_N_LITERAL_TYPES];
int (PTRCALL *sameName)(const ENCODING *,
const char *,
const char *);
int (PTRCALL *nameMatchesAscii)(const ENCODING *, int (PTRCALL *nameMatchesAscii)(const ENCODING *,
const char *, const char *,
const char *, const char *,
@ -260,8 +257,6 @@ struct encoding {
#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
#define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \
(((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2))

View File

@ -1653,79 +1653,6 @@ PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
return 0; return 0;
} }
/* This function does not appear to be called from anywhere within the
* library code. It is used via the macro XmlSameName(), which is
* defined but never used. Since it appears in the encoding function
* table, removing it is not a thing to be undertaken lightly. For
* the moment, we simply exclude it from coverage tests.
*
* LCOV_EXCL_START
*/
static int PTRCALL
PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
{
for (;;) {
switch (BYTE_TYPE(enc, ptr1)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
if (*ptr1++ != *ptr2++) \
return 0;
LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
#undef LEAD_CASE
/* fall through */
if (*ptr1++ != *ptr2++)
return 0;
break;
case BT_NONASCII:
case BT_NMSTRT:
#ifdef XML_NS
case BT_COLON:
#endif
case BT_HEX:
case BT_DIGIT:
case BT_NAME:
case BT_MINUS:
if (*ptr2++ != *ptr1++)
return 0;
if (MINBPC(enc) > 1) {
if (*ptr2++ != *ptr1++)
return 0;
if (MINBPC(enc) > 2) {
if (*ptr2++ != *ptr1++)
return 0;
if (MINBPC(enc) > 3) {
if (*ptr2++ != *ptr1++)
return 0;
}
}
}
break;
default:
if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
return 1;
switch (BYTE_TYPE(enc, ptr2)) {
case BT_LEAD2:
case BT_LEAD3:
case BT_LEAD4:
case BT_NONASCII:
case BT_NMSTRT:
#ifdef XML_NS
case BT_COLON:
#endif
case BT_HEX:
case BT_DIGIT:
case BT_NAME:
case BT_MINUS:
return 0;
default:
return 1;
}
}
}
/* not reached */
}
/* LCOV_EXCL_STOP */
static int PTRCALL static int PTRCALL
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
const char *end1, const char *ptr2) const char *end1, const char *ptr2)
@ -1733,7 +1660,7 @@ PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
if (end1 - ptr1 < MINBPC(enc)) { if (end1 - ptr1 < MINBPC(enc)) {
/* This line cannot be executed. THe incoming data has already /* This line cannot be executed. THe incoming data has already
* been tokenized once, so incomplete characters like this have * been tokenized once, so imcomplete characters like this have
* already been eliminated from the input. Retaining the * already been eliminated from the input. Retaining the
* paranoia check is still valuable, however. * paranoia check is still valuable, however.
*/ */