bpo-30947: Update libexpat from 2.2.1 to 2.2.3 (#3106)

* bpo-30947: Update libexpat from 2.2.1 to 2.2.3

* Add NEWS entry

* Add new loadlibrary.c

* expat_external.h: restore include "pyexpatns.h"

* PCbuild: add expat/loadlibrary.c

* Define XML_POOR_ENTROPY to compile expat
This commit is contained in:
Victor Stinner 2017-08-18 23:43:54 +02:00 committed by GitHub
parent c99d41f9c0
commit 93d0cb58b4
13 changed files with 734 additions and 103 deletions

View File

@ -0,0 +1,2 @@
Upgrade libexpat embedded copy from version 2.2.1 to 2.2.3 to get security
fixes.

View File

@ -24,7 +24,6 @@ extern "C" {
struct XML_ParserStruct; struct XML_ParserStruct;
typedef struct XML_ParserStruct *XML_Parser; typedef struct XML_ParserStruct *XML_Parser;
/* Should this be defined using stdbool.h when C99 is available? */
typedef unsigned char XML_Bool; typedef unsigned char XML_Bool;
#define XML_TRUE ((XML_Bool) 1) #define XML_TRUE ((XML_Bool) 1)
#define XML_FALSE ((XML_Bool) 0) #define XML_FALSE ((XML_Bool) 0)
@ -1049,7 +1048,7 @@ XML_GetFeatureList(void);
*/ */
#define XML_MAJOR_VERSION 2 #define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 2 #define XML_MINOR_VERSION 2
#define XML_MICRO_VERSION 1 #define XML_MICRO_VERSION 3
#ifdef __cplusplus #ifdef __cplusplus
} }

141
Modules/expat/loadlibrary.c Normal file
View File

@ -0,0 +1,141 @@
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 2016 - 2017, Steve Holme, <steve_holme@hotmail.com>.
*
* All rights reserved.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
* THIRD PARTY RIGHTS. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
* THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in this Software without prior written authorization of the
* copyright holder.
*
***************************************************************************/
#if defined(_WIN32)
#include <windows.h>
#include <tchar.h>
HMODULE _Expat_LoadLibrary(LPCTSTR filename);
#if !defined(LOAD_WITH_ALTERED_SEARCH_PATH)
#define LOAD_WITH_ALTERED_SEARCH_PATH 0x00000008
#endif
#if !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
#define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
#endif
/* We use our own typedef here since some headers might lack these */
typedef HMODULE (APIENTRY *LOADLIBRARYEX_FN)(LPCTSTR, HANDLE, DWORD);
/* See function definitions in winbase.h */
#ifdef UNICODE
# ifdef _WIN32_WCE
# define LOADLIBARYEX L"LoadLibraryExW"
# else
# define LOADLIBARYEX "LoadLibraryExW"
# endif
#else
# define LOADLIBARYEX "LoadLibraryExA"
#endif
/*
* _Expat_LoadLibrary()
*
* This is used to dynamically load DLLs using the most secure method available
* for the version of Windows that we are running on.
*
* Parameters:
*
* filename [in] - The filename or full path of the DLL to load. If only the
* filename is passed then the DLL will be loaded from the
* Windows system directory.
*
* Returns the handle of the module on success; otherwise NULL.
*/
HMODULE _Expat_LoadLibrary(LPCTSTR filename)
{
HMODULE hModule = NULL;
LOADLIBRARYEX_FN pLoadLibraryEx = NULL;
/* Get a handle to kernel32 so we can access it's functions at runtime */
HMODULE hKernel32 = GetModuleHandle(TEXT("kernel32"));
if(!hKernel32)
return NULL;
/* Attempt to find LoadLibraryEx() which is only available on Windows 2000
and above */
pLoadLibraryEx = (LOADLIBRARYEX_FN) GetProcAddress(hKernel32, LOADLIBARYEX);
/* Detect if there's already a path in the filename and load the library if
there is. Note: Both back slashes and forward slashes have been supported
since the earlier days of DOS at an API level although they are not
supported by command prompt */
if(_tcspbrk(filename, TEXT("\\/"))) {
/** !checksrc! disable BANNEDFUNC 1 **/
hModule = pLoadLibraryEx ?
pLoadLibraryEx(filename, NULL, LOAD_WITH_ALTERED_SEARCH_PATH) :
LoadLibrary(filename);
}
/* Detect if KB2533623 is installed, as LOAD_LIBARY_SEARCH_SYSTEM32 is only
supported on Windows Vista, Windows Server 2008, Windows 7 and Windows
Server 2008 R2 with this patch or natively on Windows 8 and above */
else if(pLoadLibraryEx && GetProcAddress(hKernel32, "AddDllDirectory")) {
/* Load the DLL from the Windows system directory */
hModule = pLoadLibraryEx(filename, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
}
else {
/* Attempt to get the Windows system path */
UINT systemdirlen = GetSystemDirectory(NULL, 0);
if(systemdirlen) {
/* Allocate space for the full DLL path (Room for the null terminator
is included in systemdirlen) */
size_t filenamelen = _tcslen(filename);
TCHAR *path = malloc(sizeof(TCHAR) * (systemdirlen + 1 + filenamelen));
if(path && GetSystemDirectory(path, systemdirlen)) {
/* Calculate the full DLL path */
_tcscpy(path + _tcslen(path), TEXT("\\"));
_tcscpy(path + _tcslen(path), filename);
/* Load the DLL from the Windows system directory */
/** !checksrc! disable BANNEDFUNC 1 **/
hModule = pLoadLibraryEx ?
pLoadLibraryEx(path, NULL, LOAD_WITH_ALTERED_SEARCH_PATH) :
LoadLibrary(path);
}
free(path);
}
}
return hModule;
}
#else /* defined(_WIN32) */
/* ISO C requires a translation unit to contain at least one declaration
[-Wempty-translation-unit] */
typedef int _TRANSLATION_UNIT_LOAD_LIBRARY_C_NOT_EMTPY;
#endif /* defined(_WIN32) */

View File

@ -2,9 +2,8 @@
* siphash.h - SipHash-2-4 in a single header file * siphash.h - SipHash-2-4 in a single header file
* -------------------------------------------------------------------------- * --------------------------------------------------------------------------
* Derived by William Ahern from the reference implementation[1] published[2] * Derived by William Ahern from the reference implementation[1] published[2]
* by Jean-Philippe Aumasson and Daniel J. Berstein. Licensed in kind.
* by Jean-Philippe Aumasson and Daniel J. Berstein. * by Jean-Philippe Aumasson and Daniel J. Berstein.
* Minimal changes by Sebastian Pipping on top, details below. * Minimal changes by Sebastian Pipping and Victor Stinner on top, see below.
* Licensed under the CC0 Public Domain Dedication license. * Licensed under the CC0 Public Domain Dedication license.
* *
* 1. https://www.131002.net/siphash/siphash24.c * 1. https://www.131002.net/siphash/siphash24.c
@ -12,14 +11,25 @@
* -------------------------------------------------------------------------- * --------------------------------------------------------------------------
* HISTORY: * HISTORY:
* *
* 2017-06-10 (Sebastian Pipping) * 2017-07-25 (Vadim Zeitlin)
* - Fix use of SIPHASH_MAIN macro
*
* 2017-07-05 (Sebastian Pipping)
* - Use _SIP_ULL macro to not require a C++11 compiler if compiled as C++
* - Add const qualifiers at two places
* - Ensure <=80 characters line length (assuming tab width 4)
*
* 2017-06-23 (Victor Stinner)
* - Address Win64 compile warnings
*
* 2017-06-18 (Sebastian Pipping)
* - Clarify license note in the header * - Clarify license note in the header
* - Address C89 issues: * - Address C89 issues:
* - Stop using inline keyword (and let compiler decide) * - Stop using inline keyword (and let compiler decide)
* - Turn integer suffix ULL to UL
* - Replace _Bool by int * - Replace _Bool by int
* - Turn macro siphash24 into a function * - Turn macro siphash24 into a function
* - Address invalid conversion (void pointer) by explicit cast * - Address invalid conversion (void pointer) by explicit cast
* - Address lack of stdint.h for Visual Studio 2003 to 2008
* - Always expose sip24_valid (for self-tests) * - Always expose sip24_valid (for self-tests)
* *
* 2012-11-04 - Born. (William Ahern) * 2012-11-04 - Born. (William Ahern)
@ -76,7 +86,23 @@
#define SIPHASH_H #define SIPHASH_H
#include <stddef.h> /* size_t */ #include <stddef.h> /* size_t */
#include <stdint.h> /* uint64_t uint32_t uint8_t */
#if defined(_WIN32) && defined(_MSC_VER) && (_MSC_VER < 1600)
/* For vs2003/7.1 up to vs2008/9.0; _MSC_VER 1600 is vs2010/10.0 */
typedef unsigned __int8 uint8_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
#else
#include <stdint.h> /* uint64_t uint32_t uint8_t */
#endif
/*
* Workaround to not require a C++11 compiler for using ULL suffix
* if this code is included and compiled as C++; related GCC warning is:
* warning: use of C++11 long long integer constant [-Wlong-long]
*/
#define _SIP_ULL(high, low) (((uint64_t)high << 32) | low)
#define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ( (x) >> (64 - (b)))) #define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ( (x) >> (64 - (b))))
@ -158,11 +184,12 @@ static void sip_round(struct siphash *H, const int rounds) {
} /* sip_round() */ } /* sip_round() */
static struct siphash *sip24_init(struct siphash *H, const struct sipkey *key) { static struct siphash *sip24_init(struct siphash *H,
H->v0 = 0x736f6d6570736575UL ^ key->k[0]; const struct sipkey *key) {
H->v1 = 0x646f72616e646f6dUL ^ key->k[1]; H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0];
H->v2 = 0x6c7967656e657261UL ^ key->k[0]; H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1];
H->v3 = 0x7465646279746573UL ^ key->k[1]; H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0];
H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1];
H->p = H->buf; H->p = H->buf;
H->c = 0; H->c = 0;
@ -173,7 +200,8 @@ static struct siphash *sip24_init(struct siphash *H, const struct sipkey *key) {
#define sip_endof(a) (&(a)[sizeof (a) / sizeof *(a)]) #define sip_endof(a) (&(a)[sizeof (a) / sizeof *(a)])
static struct siphash *sip24_update(struct siphash *H, const void *src, size_t len) { static struct siphash *sip24_update(struct siphash *H, const void *src,
size_t len) {
const unsigned char *p = (const unsigned char *)src, *pe = p + len; const unsigned char *p = (const unsigned char *)src, *pe = p + len;
uint64_t m; uint64_t m;
@ -198,7 +226,7 @@ static struct siphash *sip24_update(struct siphash *H, const void *src, size_t l
static uint64_t sip24_final(struct siphash *H) { static uint64_t sip24_final(struct siphash *H) {
char left = (char)(H->p - H->buf); const char left = (char)(H->p - H->buf);
uint64_t b = (H->c + left) << 56; uint64_t b = (H->c + left) << 56;
switch (left) { switch (left) {
@ -222,7 +250,8 @@ static uint64_t sip24_final(struct siphash *H) {
} /* sip24_final() */ } /* sip24_final() */
static uint64_t siphash24(const void *src, size_t len, const struct sipkey *key) { static uint64_t siphash24(const void *src, size_t len,
const struct sipkey *key) {
struct siphash state = SIPHASH_INITIALIZER; struct siphash state = SIPHASH_INITIALIZER;
return sip24_final(sip24_update(sip24_init(&state, key), src, len)); return sip24_final(sip24_update(sip24_init(&state, key), src, len));
} /* siphash24() */ } /* siphash24() */
@ -310,7 +339,8 @@ static int sip24_valid(void) {
struct sipkey k; struct sipkey k;
size_t i; size_t i;
sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"); sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011"
"\012\013\014\015\016\017");
for (i = 0; i < sizeof in; ++i) { for (i = 0; i < sizeof in; ++i) {
in[i] = (unsigned char)i; in[i] = (unsigned char)i;
@ -323,12 +353,12 @@ static int sip24_valid(void) {
} /* sip24_valid() */ } /* sip24_valid() */
#if SIPHASH_MAIN #ifdef SIPHASH_MAIN
#include <stdio.h> #include <stdio.h>
int main(void) { int main(void) {
int ok = sip24_valid(); const int ok = sip24_valid();
if (ok) if (ok)
puts("OK"); puts("OK");

View File

@ -1,10 +1,12 @@
/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
See the file COPYING for copying permission. See the file COPYING for copying permission.
77fea421d361dca90041d0040ecf1dca651167fadf2af79e990e35168d70d933 (2.2.1+) 101bfd65d1ff3d1511cf6671e6aae65f82cd97df6f4da137d46d510731830ad9 (2.2.3+)
*/ */
#define _GNU_SOURCE 1 /* syscall prototype */ #if !defined(_GNU_SOURCE)
# define _GNU_SOURCE 1 /* syscall prototype */
#endif
#include <stddef.h> #include <stddef.h>
#include <string.h> /* memset(), memcpy() */ #include <string.h> /* memset(), memcpy() */
@ -19,6 +21,8 @@
#include <sys/time.h> /* gettimeofday() */ #include <sys/time.h> /* gettimeofday() */
#include <sys/types.h> /* getpid() */ #include <sys/types.h> /* getpid() */
#include <unistd.h> /* getpid() */ #include <unistd.h> /* getpid() */
#include <fcntl.h> /* O_RDONLY */
#include <errno.h>
#endif #endif
#define XML_BUILDING_EXPAT 1 #define XML_BUILDING_EXPAT 1
@ -33,6 +37,57 @@
#include "expat.h" #include "expat.h"
#include "siphash.h" #include "siphash.h"
#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
# if defined(HAVE_GETRANDOM)
# include <sys/random.h> /* getrandom */
# else
# include <unistd.h> /* syscall */
# include <sys/syscall.h> /* SYS_getrandom */
# endif
# if ! defined(GRND_NONBLOCK)
# define GRND_NONBLOCK 0x0001
# endif /* defined(GRND_NONBLOCK) */
#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
#if defined(HAVE_LIBBSD) \
&& (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
# include <bsd/stdlib.h>
#endif
#if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
#endif
#if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \
&& !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \
&& !defined(XML_DEV_URANDOM) \
&& !defined(_WIN32) \
&& !defined(XML_POOR_ENTROPY)
# error \
You do not have support for any sources of high quality entropy \
enabled. For end user security, that is probably not what you want. \
\
Your options include: \
* Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
* Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
* BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
* BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
* Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
* Windows (RtlGenRandom): _WIN32. \
\
If insist on not using any of these, bypass this error by defining \
XML_POOR_ENTROPY; you have been warned. \
\
For CMake, one way to pass the define is: \
cmake -DCMAKE_C_FLAGS="-pipe -O2 -DHAVE_SYSCALL_GETRANDOM" . \
\
If you have reasons to patch this detection code away or need changes \
to the build system, please open a bug. Thank you!
#endif
#ifdef XML_UNICODE #ifdef XML_UNICODE
#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
#define XmlConvert XmlUtf16Convert #define XmlConvert XmlUtf16Convert
@ -436,6 +491,9 @@ static ELEMENT_TYPE *
getElementType(XML_Parser parser, const ENCODING *enc, getElementType(XML_Parser parser, const ENCODING *enc,
const char *ptr, const char *end); const char *ptr, const char *end);
static XML_Char *copyString(const XML_Char *s,
const XML_Memory_Handling_Suite *memsuite);
static unsigned long generate_hash_secret_salt(XML_Parser parser); static unsigned long generate_hash_secret_salt(XML_Parser parser);
static XML_Bool startParsing(XML_Parser parser); static XML_Bool startParsing(XML_Parser parser);
@ -696,21 +754,13 @@ static const XML_Char implicitContext[] = {
#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
# include <errno.h>
# if defined(HAVE_GETRANDOM)
# include <sys/random.h> /* getrandom */
# else
# include <unistd.h> /* syscall */
# include <sys/syscall.h> /* SYS_getrandom */
# endif
/* Obtain entropy on Linux 3.17+ */ /* Obtain entropy on Linux 3.17+ */
static int static int
writeRandomBytes_getrandom(void * target, size_t count) { writeRandomBytes_getrandom_nonblock(void * target, size_t count) {
int success = 0; /* full count bytes written? */ int success = 0; /* full count bytes written? */
size_t bytesWrittenTotal = 0; size_t bytesWrittenTotal = 0;
const unsigned int getrandomFlags = 0; const unsigned int getrandomFlags = GRND_NONBLOCK;
do { do {
void * const currentTarget = (void*)((char*)target + bytesWrittenTotal); void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
@ -728,7 +778,7 @@ writeRandomBytes_getrandom(void * target, size_t count) {
if (bytesWrittenTotal >= count) if (bytesWrittenTotal >= count)
success = 1; success = 1;
} }
} while (! success && (errno == EINTR || errno == EAGAIN)); } while (! success && (errno == EINTR));
return success; return success;
} }
@ -736,12 +786,67 @@ writeRandomBytes_getrandom(void * target, size_t count) {
#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
/* Extract entropy from /dev/urandom */
static int
writeRandomBytes_dev_urandom(void * target, size_t count) {
int success = 0; /* full count bytes written? */
size_t bytesWrittenTotal = 0;
const int fd = open("/dev/urandom", O_RDONLY);
if (fd < 0) {
return 0;
}
do {
void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
const size_t bytesToWrite = count - bytesWrittenTotal;
const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
if (bytesWrittenMore > 0) {
bytesWrittenTotal += bytesWrittenMore;
if (bytesWrittenTotal >= count)
success = 1;
}
} while (! success && (errno == EINTR));
close(fd);
return success;
}
#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
#if defined(HAVE_ARC4RANDOM)
static void
writeRandomBytes_arc4random(void * target, size_t count) {
size_t bytesWrittenTotal = 0;
while (bytesWrittenTotal < count) {
const uint32_t random32 = arc4random();
size_t i = 0;
for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
i++, bytesWrittenTotal++) {
const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
((uint8_t *)target)[bytesWrittenTotal] = random8;
}
}
}
#endif /* defined(HAVE_ARC4RANDOM) */
#ifdef _WIN32 #ifdef _WIN32
typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG); typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */
/* Obtain entropy on Windows XP / Windows Server 2003 and later. /* Obtain entropy on Windows XP / Windows Server 2003 and later.
* Hint on RtlGenRandom and the following article from libsodioum. * Hint on RtlGenRandom and the following article from libsodium.
* *
* Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
* https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/ * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
@ -749,7 +854,7 @@ typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
static int static int
writeRandomBytes_RtlGenRandom(void * target, size_t count) { writeRandomBytes_RtlGenRandom(void * target, size_t count) {
int success = 0; /* full count bytes written? */ int success = 0; /* full count bytes written? */
const HMODULE advapi32 = LoadLibrary("ADVAPI32.DLL"); const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL"));
if (advapi32) { if (advapi32) {
const RTLGENRANDOM_FUNC RtlGenRandom const RTLGENRANDOM_FUNC RtlGenRandom
@ -768,6 +873,8 @@ writeRandomBytes_RtlGenRandom(void * target, size_t count) {
#endif /* _WIN32 */ #endif /* _WIN32 */
#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
static unsigned long static unsigned long
gather_time_entropy(void) gather_time_entropy(void)
{ {
@ -780,16 +887,20 @@ gather_time_entropy(void)
int gettimeofday_res; int gettimeofday_res;
gettimeofday_res = gettimeofday(&tv, NULL); gettimeofday_res = gettimeofday(&tv, NULL);
#if defined(NDEBUG)
(void)gettimeofday_res;
#else
assert (gettimeofday_res == 0); assert (gettimeofday_res == 0);
#endif /* defined(NDEBUG) */
/* Microseconds time is <20 bits entropy */ /* Microseconds time is <20 bits entropy */
return tv.tv_usec; return tv.tv_usec;
#endif #endif
} }
#if defined(HAVE_ARC4RANDOM_BUF) && defined(HAVE_LIBBSD) #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
# include <bsd/stdlib.h>
#endif
static unsigned long static unsigned long
ENTROPY_DEBUG(const char * label, unsigned long entropy) { ENTROPY_DEBUG(const char * label, unsigned long entropy) {
@ -808,10 +919,12 @@ generate_hash_secret_salt(XML_Parser parser)
{ {
unsigned long entropy; unsigned long entropy;
(void)parser; (void)parser;
#if defined(HAVE_ARC4RANDOM_BUF) || defined(__CloudABI__) #if defined(HAVE_ARC4RANDOM_BUF)
(void)gather_time_entropy;
arc4random_buf(&entropy, sizeof(entropy)); arc4random_buf(&entropy, sizeof(entropy));
return ENTROPY_DEBUG("arc4random_buf", entropy); return ENTROPY_DEBUG("arc4random_buf", entropy);
#elif defined(HAVE_ARC4RANDOM)
writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
return ENTROPY_DEBUG("arc4random", entropy);
#else #else
/* Try high quality providers first .. */ /* Try high quality providers first .. */
#ifdef _WIN32 #ifdef _WIN32
@ -819,10 +932,15 @@ generate_hash_secret_salt(XML_Parser parser)
return ENTROPY_DEBUG("RtlGenRandom", entropy); return ENTROPY_DEBUG("RtlGenRandom", entropy);
} }
#elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) #elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
if (writeRandomBytes_getrandom((void *)&entropy, sizeof(entropy))) { if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
return ENTROPY_DEBUG("getrandom", entropy); return ENTROPY_DEBUG("getrandom", entropy);
} }
#endif #endif
#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
return ENTROPY_DEBUG("/dev/urandom", entropy);
}
#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
/* .. and self-made low quality for backup: */ /* .. and self-made low quality for backup: */
/* Process ID is 0 bits entropy if attacker has local access */ /* Process ID is 0 bits entropy if attacker has local access */
@ -833,7 +951,7 @@ generate_hash_secret_salt(XML_Parser parser)
return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
} else { } else {
return ENTROPY_DEBUG("fallback(8)", return ENTROPY_DEBUG("fallback(8)",
entropy * (unsigned long)2305843009213693951); entropy * (unsigned long)2305843009213693951ULL);
} }
#endif #endif
} }
@ -962,6 +1080,8 @@ parserCreate(const XML_Char *encodingName,
nsAttsVersion = 0; nsAttsVersion = 0;
nsAttsPower = 0; nsAttsPower = 0;
protocolEncodingName = NULL;
poolInit(&tempPool, &(parser->m_mem)); poolInit(&tempPool, &(parser->m_mem));
poolInit(&temp2Pool, &(parser->m_mem)); poolInit(&temp2Pool, &(parser->m_mem));
parserInit(parser, encodingName); parserInit(parser, encodingName);
@ -988,9 +1108,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName)
{ {
processor = prologInitProcessor; processor = prologInitProcessor;
XmlPrologStateInit(&prologState); XmlPrologStateInit(&prologState);
protocolEncodingName = (encodingName != NULL if (encodingName != NULL) {
? poolCopyString(&tempPool, encodingName) protocolEncodingName = copyString(encodingName, &(parser->m_mem));
: NULL); }
curBase = NULL; curBase = NULL;
XmlInitEncoding(&initEncoding, &encoding, 0); XmlInitEncoding(&initEncoding, &encoding, 0);
userData = NULL; userData = NULL;
@ -1103,6 +1223,8 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
unknownEncodingRelease(unknownEncodingData); unknownEncodingRelease(unknownEncodingData);
poolClear(&tempPool); poolClear(&tempPool);
poolClear(&temp2Pool); poolClear(&temp2Pool);
FREE((void *)protocolEncodingName);
protocolEncodingName = NULL;
parserInit(parser, encodingName); parserInit(parser, encodingName);
dtdReset(_dtd, &parser->m_mem); dtdReset(_dtd, &parser->m_mem);
return XML_TRUE; return XML_TRUE;
@ -1119,10 +1241,16 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
*/ */
if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
return XML_STATUS_ERROR; return XML_STATUS_ERROR;
/* Get rid of any previous encoding name */
FREE((void *)protocolEncodingName);
if (encodingName == NULL) if (encodingName == NULL)
/* No new encoding name */
protocolEncodingName = NULL; protocolEncodingName = NULL;
else { else {
protocolEncodingName = poolCopyString(&tempPool, encodingName); /* Copy the new encoding name into allocated memory */
protocolEncodingName = copyString(encodingName, &(parser->m_mem));
if (!protocolEncodingName) if (!protocolEncodingName)
return XML_STATUS_ERROR; return XML_STATUS_ERROR;
} }
@ -1357,6 +1485,7 @@ XML_ParserFree(XML_Parser parser)
destroyBindings(inheritedBindings, parser); destroyBindings(inheritedBindings, parser);
poolDestroy(&tempPool); poolDestroy(&tempPool);
poolDestroy(&temp2Pool); poolDestroy(&temp2Pool);
FREE((void *)protocolEncodingName);
#ifdef XML_DTD #ifdef XML_DTD
/* external parameter entity parsers share the DTD structure /* external parameter entity parsers share the DTD structure
parser->m_dtd with the root parser, so we must not destroy it parser->m_dtd with the root parser, so we must not destroy it
@ -1748,7 +1877,8 @@ enum XML_Status XMLCALL
XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
{ {
if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
errorCode = XML_ERROR_INVALID_ARGUMENT; if (parser != NULL)
parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
return XML_STATUS_ERROR; return XML_STATUS_ERROR;
} }
switch (ps_parsing) { switch (ps_parsing) {
@ -1783,9 +1913,22 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
if (errorCode == XML_ERROR_NONE) { if (errorCode == XML_ERROR_NONE) {
switch (ps_parsing) { switch (ps_parsing) {
case XML_SUSPENDED: case XML_SUSPENDED:
/* It is hard to be certain, but it seems that this case
* cannot occur. This code is cleaning up a previous parse
* with no new data (since len == 0). Changing the parsing
* state requires getting to execute a handler function, and
* there doesn't seem to be an opportunity for that while in
* this circumstance.
*
* Given the uncertainty, we retain the code but exclude it
* from coverage tests.
*
* LCOV_EXCL_START
*/
XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
positionPtr = bufferPtr; positionPtr = bufferPtr;
return XML_STATUS_SUSPENDED; return XML_STATUS_SUSPENDED;
/* LCOV_EXCL_STOP */
case XML_INITIALIZED: case XML_INITIALIZED:
case XML_PARSING: case XML_PARSING:
ps_parsing = XML_FINISHED; ps_parsing = XML_FINISHED;
@ -2974,9 +3117,17 @@ doContent(XML_Parser parser,
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
break; break;
default: default:
/* All of the tokens produced by XmlContentTok() have their own
* explicit cases, so this default is not strictly necessary.
* However it is a useful safety net, so we retain the code and
* simply exclude it from the coverage tests.
*
* LCOV_EXCL_START
*/
if (defaultHandler) if (defaultHandler)
reportDefault(parser, enc, s, next); reportDefault(parser, enc, s, next);
break; break;
/* LCOV_EXCL_STOP */
} }
*eventPP = s = next; *eventPP = s = next;
switch (ps_parsing) { switch (ps_parsing) {
@ -3067,13 +3218,17 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
#endif #endif
attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
if (temp == NULL) if (temp == NULL) {
attsSize = oldAttsSize;
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
}
atts = temp; atts = temp;
#ifdef XML_ATTR_INFO #ifdef XML_ATTR_INFO
temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo)); temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo));
if (temp2 == NULL) if (temp2 == NULL) {
attsSize = oldAttsSize;
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
}
attInfo = temp2; attInfo = temp2;
#endif #endif
if (n > oldAttsSize) if (n > oldAttsSize)
@ -3210,6 +3365,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
int j; /* hash table index */ int j; /* hash table index */
unsigned long version = nsAttsVersion; unsigned long version = nsAttsVersion;
int nsAttsSize = (int)1 << nsAttsPower; int nsAttsSize = (int)1 << nsAttsPower;
unsigned char oldNsAttsPower = nsAttsPower;
/* size of hash table must be at least 2 * (# of prefixed attributes) */ /* size of hash table must be at least 2 * (# of prefixed attributes) */
if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */ if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */
NS_ATT *temp; NS_ATT *temp;
@ -3219,8 +3375,11 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
nsAttsPower = 3; nsAttsPower = 3;
nsAttsSize = (int)1 << nsAttsPower; nsAttsSize = (int)1 << nsAttsPower;
temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT)); temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT));
if (!temp) if (!temp) {
/* Restore actual size of memory in nsAtts */
nsAttsPower = oldNsAttsPower;
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
}
nsAtts = temp; nsAtts = temp;
version = 0; /* force re-initialization of nsAtts hash table */ version = 0; /* force re-initialization of nsAtts hash table */
} }
@ -3247,8 +3406,23 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
((XML_Char *)s)[-1] = 0; /* clear flag */ ((XML_Char *)s)[-1] = 0; /* clear flag */
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
if (!id || !id->prefix) if (!id || !id->prefix) {
return XML_ERROR_NO_MEMORY; /* This code is walking through the appAtts array, dealing
* with (in this case) a prefixed attribute name. To be in
* the array, the attribute must have already been bound, so
* has to have passed through the hash table lookup once
* already. That implies that an entry for it already
* exists, so the lookup above will return a pointer to
* already allocated memory. There is no opportunaity for
* the allocator to fail, so the condition above cannot be
* fulfilled.
*
* Since it is difficult to be certain that the above
* analysis is complete, we retain the test and merely
* remove the code from coverage tests.
*/
return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
}
b = id->prefix->binding; b = id->prefix->binding;
if (!b) if (!b)
return XML_ERROR_UNBOUND_PREFIX; return XML_ERROR_UNBOUND_PREFIX;
@ -3625,8 +3799,16 @@ doCdataSection(XML_Parser parser,
} }
return XML_ERROR_UNCLOSED_CDATA_SECTION; return XML_ERROR_UNCLOSED_CDATA_SECTION;
default: default:
/* Every token returned by XmlCdataSectionTok() has its own
* explicit case, so this default case will never be executed.
* We retain it as a safety net and exclude it from the coverage
* statistics.
*
* LCOV_EXCL_START
*/
*eventPP = next; *eventPP = next;
return XML_ERROR_UNEXPECTED_STATE; return XML_ERROR_UNEXPECTED_STATE;
/* LCOV_EXCL_STOP */
} }
*eventPP = s = next; *eventPP = s = next;
@ -3686,8 +3868,20 @@ doIgnoreSection(XML_Parser parser,
eventEndPP = &eventEndPtr; eventEndPP = &eventEndPtr;
} }
else { else {
/* It's not entirely clear, but it seems the following two lines
* of code cannot be executed. The only occasions on which 'enc'
* is not 'parser->m_encoding' are when this function is called
* from the internal entity processing, and IGNORE sections are an
* error in internal entities.
*
* Since it really isn't clear that this is true, we keep the code
* and just remove it from our coverage tests.
*
* LCOV_EXCL_START
*/
eventPP = &(openInternalEntities->internalEventPtr); eventPP = &(openInternalEntities->internalEventPtr);
eventEndPP = &(openInternalEntities->internalEventEndPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr);
/* LCOV_EXCL_STOP */
} }
*eventPP = s; *eventPP = s;
*startPtr = NULL; *startPtr = NULL;
@ -3720,8 +3914,16 @@ doIgnoreSection(XML_Parser parser,
} }
return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
default: default:
/* All of the tokens that XmlIgnoreSectionTok() returns have
* explicit cases to handle them, so this default case is never
* executed. We keep it as a safety net anyway, and remove it
* from our test coverage statistics.
*
* LCOV_EXCL_START
*/
*eventPP = next; *eventPP = next;
return XML_ERROR_UNEXPECTED_STATE; return XML_ERROR_UNEXPECTED_STATE;
/* LCOV_EXCL_STOP */
} }
/* not reached */ /* not reached */
} }
@ -3734,6 +3936,7 @@ initializeEncoding(XML_Parser parser)
const char *s; const char *s;
#ifdef XML_UNICODE #ifdef XML_UNICODE
char encodingBuf[128]; char encodingBuf[128];
/* See comments abount `protoclEncodingName` in parserInit() */
if (!protocolEncodingName) if (!protocolEncodingName)
s = NULL; s = NULL;
else { else {
@ -3817,7 +4020,14 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
reportDefault(parser, encoding, s, next); reportDefault(parser, encoding, s, next);
if (protocolEncodingName == NULL) { if (protocolEncodingName == NULL) {
if (newEncoding) { if (newEncoding) {
if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { /* Check that the specified encoding does not conflict with what
* the parser has already deduced. Do we have the same number
* of bytes in the smallest representation of a character? If
* this is UTF-16, is it the same endianness?
*/
if (newEncoding->minBytesPerChar != encoding->minBytesPerChar
|| (newEncoding->minBytesPerChar == 2 &&
newEncoding != encoding)) {
eventPtr = encodingName; eventPtr = encodingName;
return XML_ERROR_INCORRECT_ENCODING; return XML_ERROR_INCORRECT_ENCODING;
} }
@ -3962,15 +4172,14 @@ entityValueInitProcessor(XML_Parser parser,
result = processXmlDecl(parser, 0, start, next); result = processXmlDecl(parser, 0, start, next);
if (result != XML_ERROR_NONE) if (result != XML_ERROR_NONE)
return result; return result;
switch (ps_parsing) { /* At this point, ps_parsing cannot be XML_SUSPENDED. For that
case XML_SUSPENDED: * to happen, a parameter entity parsing handler must have
*nextPtr = next; * attempted to suspend the parser, which fails and raises an
return XML_ERROR_NONE; * error. The parser can be aborted, but can't be suspended.
case XML_FINISHED: */
if (ps_parsing == XML_FINISHED)
return XML_ERROR_ABORTED; return XML_ERROR_ABORTED;
default: *nextPtr = next;
*nextPtr = next;
}
/* stop scanning for text declaration - we found one */ /* stop scanning for text declaration - we found one */
processor = entityValueProcessor; processor = entityValueProcessor;
return entityValueProcessor(parser, next, end, nextPtr); return entityValueProcessor(parser, next, end, nextPtr);
@ -4293,8 +4502,14 @@ doProlog(XML_Parser parser,
&dtd->paramEntities, &dtd->paramEntities,
externalSubsetName, externalSubsetName,
sizeof(ENTITY)); sizeof(ENTITY));
if (!entity) if (!entity) {
return XML_ERROR_NO_MEMORY; /* The external subset name "#" will have already been
* inserted into the hash table at the start of the
* external entity parsing, so no allocation will happen
* and lookup() cannot fail.
*/
return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
}
if (useForeignDTD) if (useForeignDTD)
entity->base = curBase; entity->base = curBase;
dtd->paramEntityRead = XML_FALSE; dtd->paramEntityRead = XML_FALSE;
@ -4773,8 +4988,10 @@ doProlog(XML_Parser parser,
if (prologState.level >= groupSize) { if (prologState.level >= groupSize) {
if (groupSize) { if (groupSize) {
char *temp = (char *)REALLOC(groupConnector, groupSize *= 2); char *temp = (char *)REALLOC(groupConnector, groupSize *= 2);
if (temp == NULL) if (temp == NULL) {
groupSize /= 2;
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
}
groupConnector = temp; groupConnector = temp;
if (dtd->scaffIndex) { if (dtd->scaffIndex) {
int *temp = (int *)REALLOC(dtd->scaffIndex, int *temp = (int *)REALLOC(dtd->scaffIndex,
@ -4786,8 +5003,10 @@ doProlog(XML_Parser parser,
} }
else { else {
groupConnector = (char *)MALLOC(groupSize = 32); groupConnector = (char *)MALLOC(groupSize = 32);
if (!groupConnector) if (!groupConnector) {
groupSize = 0;
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
}
} }
} }
groupConnector[prologState.level] = 0; groupConnector[prologState.level] = 0;
@ -4850,8 +5069,29 @@ doProlog(XML_Parser parser,
: !dtd->hasParamEntityRefs)) { : !dtd->hasParamEntityRefs)) {
if (!entity) if (!entity)
return XML_ERROR_UNDEFINED_ENTITY; return XML_ERROR_UNDEFINED_ENTITY;
else if (!entity->is_internal) else if (!entity->is_internal) {
return XML_ERROR_ENTITY_DECLARED_IN_PE; /* It's hard to exhaustively search the code to be sure,
* but there doesn't seem to be a way of executing the
* following line. There are two cases:
*
* If 'standalone' is false, the DTD must have no
* parameter entities or we wouldn't have passed the outer
* 'if' statement. That measn the only entity in the hash
* table is the external subset name "#" which cannot be
* given as a parameter entity name in XML syntax, so the
* lookup must have returned NULL and we don't even reach
* the test for an internal entity.
*
* If 'standalone' is true, it does not seem to be
* possible to create entities taking this code path that
* are not internal entities, so fail the test above.
*
* Because this analysis is very uncertain, the code is
* being left in place and merely removed from the
* coverage test statistics.
*/
return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
}
} }
else if (!entity) { else if (!entity) {
dtd->keepProcessing = dtd->standalone; dtd->keepProcessing = dtd->standalone;
@ -5323,11 +5563,15 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
&& (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
break; break;
n = XmlEncode(n, (ICHAR *)buf); n = XmlEncode(n, (ICHAR *)buf);
if (!n) { /* The XmlEncode() functions can never return 0 here. That
if (enc == encoding) * error return happens if the code point passed in is either
eventPtr = ptr; * negative or greater than or equal to 0x110000. The
return XML_ERROR_BAD_CHAR_REF; * XmlCharRefNumber() functions will all return a number
} * strictly less than 0x110000 or a negative value if an error
* occurred. The negative value is intercepted above, so
* XmlEncode() is never passed a value it might return an
* error for.
*/
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
if (!poolAppendChar(pool, buf[i])) if (!poolAppendChar(pool, buf[i]))
return XML_ERROR_NO_MEMORY; return XML_ERROR_NO_MEMORY;
@ -5401,8 +5645,26 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
break; break;
} }
if (entity->open) { if (entity->open) {
if (enc == encoding) if (enc == encoding) {
eventPtr = ptr; /* It does not appear that this line can be executed.
*
* The "if (entity->open)" check catches recursive entity
* definitions. In order to be called with an open
* entity, it must have gone through this code before and
* been through the recursive call to
* appendAttributeValue() some lines below. That call
* sets the local encoding ("enc") to the parser's
* internal encoding (internal_utf8 or internal_utf16),
* which can never be the same as the principle encoding.
* It doesn't appear there is another code path that gets
* here with entity->open being TRUE.
*
* Since it is not certain that this logic is watertight,
* we keep the line and merely exclude it from coverage
* tests.
*/
eventPtr = ptr; /* LCOV_EXCL_LINE */
}
return XML_ERROR_RECURSIVE_ENTITY_REF; return XML_ERROR_RECURSIVE_ENTITY_REF;
} }
if (entity->notation) { if (entity->notation) {
@ -5429,9 +5691,21 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
} }
break; break;
default: default:
/* The only token returned by XmlAttributeValueTok() that does
* not have an explicit case here is XML_TOK_PARTIAL_CHAR.
* Getting that would require an entity name to contain an
* incomplete XML character (e.g. \xE2\x82); however previous
* tokenisers will have already recognised and rejected such
* names before XmlAttributeValueTok() gets a look-in. This
* default case should be retained as a safety net, but the code
* excluded from coverage tests.
*
* LCOV_EXCL_START
*/
if (enc == encoding) if (enc == encoding)
eventPtr = ptr; eventPtr = ptr;
return XML_ERROR_UNEXPECTED_STATE; return XML_ERROR_UNEXPECTED_STATE;
/* LCOV_EXCL_STOP */
} }
ptr = next; ptr = next;
} }
@ -5564,12 +5838,15 @@ storeEntityValue(XML_Parser parser,
goto endEntityValue; goto endEntityValue;
} }
n = XmlEncode(n, (ICHAR *)buf); n = XmlEncode(n, (ICHAR *)buf);
if (!n) { /* The XmlEncode() functions can never return 0 here. That
if (enc == encoding) * error return happens if the code point passed in is either
eventPtr = entityTextPtr; * negative or greater than or equal to 0x110000. The
result = XML_ERROR_BAD_CHAR_REF; * XmlCharRefNumber() functions will all return a number
goto endEntityValue; * strictly less than 0x110000 or a negative value if an error
} * occurred. The negative value is intercepted above, so
* XmlEncode() is never passed a value it might return an
* error for.
*/
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
if (pool->end == pool->ptr && !poolGrow(pool)) { if (pool->end == pool->ptr && !poolGrow(pool)) {
result = XML_ERROR_NO_MEMORY; result = XML_ERROR_NO_MEMORY;
@ -5590,10 +5867,18 @@ storeEntityValue(XML_Parser parser,
result = XML_ERROR_INVALID_TOKEN; result = XML_ERROR_INVALID_TOKEN;
goto endEntityValue; goto endEntityValue;
default: default:
/* This default case should be unnecessary -- all the tokens
* that XmlEntityValueTok() can return have their own explicit
* cases -- but should be retained for safety. We do however
* exclude it from the coverage statistics.
*
* LCOV_EXCL_START
*/
if (enc == encoding) if (enc == encoding)
eventPtr = entityTextPtr; eventPtr = entityTextPtr;
result = XML_ERROR_UNEXPECTED_STATE; result = XML_ERROR_UNEXPECTED_STATE;
goto endEntityValue; goto endEntityValue;
/* LCOV_EXCL_STOP */
} }
entityTextPtr = next; entityTextPtr = next;
} }
@ -5691,8 +5976,25 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
eventEndPP = &eventEndPtr; eventEndPP = &eventEndPtr;
} }
else { else {
/* To get here, two things must be true; the parser must be
* using a character encoding that is not the same as the
* encoding passed in, and the encoding passed in must need
* conversion to the internal format (UTF-8 unless XML_UNICODE
* is defined). The only occasions on which the encoding passed
* in is not the same as the parser's encoding are when it is
* the internal encoding (e.g. a previously defined parameter
* entity, already converted to internal format). This by
* definition doesn't need conversion, so the whole branch never
* gets executed.
*
* For safety's sake we don't delete these lines and merely
* exclude them from coverage statistics.
*
* LCOV_EXCL_START
*/
eventPP = &(openInternalEntities->internalEventPtr); eventPP = &(openInternalEntities->internalEventPtr);
eventEndPP = &(openInternalEntities->internalEventEndPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr);
/* LCOV_EXCL_STOP */
} }
do { do {
ICHAR *dataPtr = (ICHAR *)dataBuf; ICHAR *dataPtr = (ICHAR *)dataBuf;
@ -5861,9 +6163,30 @@ getContext(XML_Parser parser)
len = dtd->defaultPrefix.binding->uriLen; len = dtd->defaultPrefix.binding->uriLen;
if (namespaceSeparator) if (namespaceSeparator)
len--; len--;
for (i = 0; i < len; i++) for (i = 0; i < len; i++) {
if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) {
return NULL; /* Because of memory caching, I don't believe this line can be
* executed.
*
* This is part of a loop copying the default prefix binding
* URI into the parser's temporary string pool. Previously,
* that URI was copied into the same string pool, with a
* terminating NUL character, as part of setContext(). When
* the pool was cleared, that leaves a block definitely big
* enough to hold the URI on the free block list of the pool.
* The URI copy in getContext() therefore cannot run out of
* memory.
*
* If the pool is used between the setContext() and
* getContext() calls, the worst it can do is leave a bigger
* block on the front of the free list. Given that this is
* all somewhat inobvious and program logic can be changed, we
* don't delete the line but we do exclude it from the test
* coverage statistics.
*/
return NULL; /* LCOV_EXCL_LINE */
}
}
needSep = XML_TRUE; needSep = XML_TRUE;
} }
@ -5875,8 +6198,15 @@ getContext(XML_Parser parser)
PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
if (!prefix) if (!prefix)
break; break;
if (!prefix->binding) if (!prefix->binding) {
continue; /* This test appears to be (justifiable) paranoia. There does
* not seem to be a way of injecting a prefix without a binding
* that doesn't get errored long before this function is called.
* The test should remain for safety's sake, so we instead
* exclude the following line from the coverage statistics.
*/
continue; /* LCOV_EXCL_LINE */
}
if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
return NULL; return NULL;
for (s = prefix->name; *s; s++) for (s = prefix->name; *s; s++)
@ -6547,8 +6877,20 @@ poolCopyString(STRING_POOL *pool, const XML_Char *s)
static const XML_Char * static const XML_Char *
poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
{ {
if (!pool->ptr && !poolGrow(pool)) if (!pool->ptr && !poolGrow(pool)) {
return NULL; /* The following line is unreachable given the current usage of
* poolCopyStringN(). Currently it is called from exactly one
* place to copy the text of a simple general entity. By that
* point, the name of the entity is already stored in the pool, so
* pool->ptr cannot be NULL.
*
* If poolCopyStringN() is used elsewhere as it well might be,
* this line may well become executable again. Regardless, this
* sort of check shouldn't be removed lightly, so we just exclude
* it from the coverage statistics.
*/
return NULL; /* LCOV_EXCL_LINE */
}
for (; n > 0; --n, s++) { for (; n > 0; --n, s++) {
if (!poolAppendChar(pool, *s)) if (!poolAppendChar(pool, *s))
return NULL; return NULL;
@ -6641,8 +6983,19 @@ poolGrow(STRING_POOL *pool)
int blockSize = (int)((unsigned)(pool->end - pool->start)*2U); int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
size_t bytesToAllocate; size_t bytesToAllocate;
if (blockSize < 0) // NOTE: Needs to be calculated prior to calling `realloc`
return XML_FALSE; // to avoid dangling pointers:
const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
if (blockSize < 0) {
/* This condition traps a situation where either more than
* INT_MAX/2 bytes have already been allocated. This isn't
* readily testable, since it is unlikely that an average
* machine will have that much memory, so we exclude it from the
* coverage statistics.
*/
return XML_FALSE; /* LCOV_EXCL_LINE */
}
bytesToAllocate = poolBytesToAllocateFor(blockSize); bytesToAllocate = poolBytesToAllocateFor(blockSize);
if (bytesToAllocate == 0) if (bytesToAllocate == 0)
@ -6654,7 +7007,7 @@ poolGrow(STRING_POOL *pool)
return XML_FALSE; return XML_FALSE;
pool->blocks = temp; pool->blocks = temp;
pool->blocks->size = blockSize; pool->blocks->size = blockSize;
pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->ptr = pool->blocks->s + offsetInsideBlock;
pool->start = pool->blocks->s; pool->start = pool->blocks->s;
pool->end = pool->start + blockSize; pool->end = pool->start + blockSize;
} }
@ -6663,8 +7016,18 @@ poolGrow(STRING_POOL *pool)
int blockSize = (int)(pool->end - pool->start); int blockSize = (int)(pool->end - pool->start);
size_t bytesToAllocate; size_t bytesToAllocate;
if (blockSize < 0) if (blockSize < 0) {
return XML_FALSE; /* This condition traps a situation where either more than
* INT_MAX bytes have already been allocated (which is prevented
* by various pieces of program logic, not least this one, never
* mind the unlikelihood of actually having that much memory) or
* the pool control fields have been corrupted (which could
* conceivably happen in an extremely buggy user handler
* function). Either way it isn't readily testable, so we
* exclude it from the coverage statistics.
*/
return XML_FALSE; /* LCOV_EXCL_LINE */
}
if (blockSize < INIT_BLOCK_SIZE) if (blockSize < INIT_BLOCK_SIZE)
blockSize = INIT_BLOCK_SIZE; blockSize = INIT_BLOCK_SIZE;
@ -6827,3 +7190,26 @@ getElementType(XML_Parser parser,
} }
return ret; return ret;
} }
static XML_Char *
copyString(const XML_Char *s,
const XML_Memory_Handling_Suite *memsuite)
{
int charsRequired = 0;
XML_Char *result;
/* First determine how long the string is */
while (s[charsRequired] != 0) {
charsRequired++;
}
/* Include the terminator */
charsRequired++;
/* Now allocate space for the copy */
result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
if (result == NULL)
return NULL;
/* Copy the original into place */
memcpy(result, s, charsRequired * sizeof(XML_Char));
return result;
}

View File

@ -170,7 +170,14 @@ prolog1(PROLOG_STATE *state,
case XML_TOK_COMMENT: case XML_TOK_COMMENT:
return XML_ROLE_COMMENT; return XML_ROLE_COMMENT;
case XML_TOK_BOM: case XML_TOK_BOM:
return XML_ROLE_NONE; /* This case can never arise. To reach this role function, the
* parse must have passed through prolog0 and therefore have had
* some form of input, even if only a space. At that point, a
* byte order mark is no longer a valid character (though
* technically it should be interpreted as a non-breaking space),
* so will be rejected by the tokenizing stages.
*/
return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
case XML_TOK_DECL_OPEN: case XML_TOK_DECL_OPEN:
if (!XmlNameMatchesAscii(enc, if (!XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc), ptr + 2 * MIN_BYTES_PER_CHAR(enc),
@ -1285,6 +1292,26 @@ declClose(PROLOG_STATE *state,
return common(state, tok); return common(state, tok);
} }
/* This function will only be invoked if the internal logic of the
* parser has broken down. It is used in two cases:
*
* 1: When the XML prolog has been finished. At this point the
* processor (the parser level above these role handlers) should
* switch from prologProcessor to contentProcessor and reinitialise
* the handler function.
*
* 2: When an error has been detected (via common() below). At this
* point again the processor should be switched to errorProcessor,
* which will never call a handler.
*
* The result of this is that error() can only be called if the
* processor switch failed to happen, which is an internal error and
* therefore we shouldn't be able to provoke it simply by using the
* library. It is a necessary backstop, however, so we merely exclude
* it from the coverage statistics.
*
* LCOV_EXCL_START
*/
static int PTRCALL static int PTRCALL
error(PROLOG_STATE *UNUSED_P(state), error(PROLOG_STATE *UNUSED_P(state),
int UNUSED_P(tok), int UNUSED_P(tok),
@ -1294,6 +1321,7 @@ error(PROLOG_STATE *UNUSED_P(state),
{ {
return XML_ROLE_NONE; return XML_ROLE_NONE;
} }
/* LCOV_EXCL_STOP */
static int FASTCALL static int FASTCALL
common(PROLOG_STATE *state, int tok) common(PROLOG_STATE *state, int tok)

View File

@ -1019,7 +1019,11 @@ streqci(const char *s1, const char *s2)
if (ASCII_a <= c1 && c1 <= ASCII_z) if (ASCII_a <= c1 && c1 <= ASCII_z)
c1 += ASCII_A - ASCII_a; c1 += ASCII_A - ASCII_a;
if (ASCII_a <= c2 && c2 <= ASCII_z) if (ASCII_a <= c2 && c2 <= ASCII_z)
c2 += ASCII_A - ASCII_a; /* The following line will never get executed. streqci() is
* only called from two places, both of which guarantee to put
* upper-case strings into s2.
*/
c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
if (c1 != c2) if (c1 != c2)
return 0; return 0;
if (!c1) if (!c1)
@ -1291,7 +1295,7 @@ XmlUtf8Encode(int c, char *buf)
}; };
if (c < 0) if (c < 0)
return 0; return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
if (c < min2) { if (c < min2) {
buf[0] = (char)(c | UTF8_cval1); buf[0] = (char)(c | UTF8_cval1);
return 1; return 1;
@ -1314,7 +1318,7 @@ XmlUtf8Encode(int c, char *buf)
buf[3] = (char)((c & 0x3f) | 0x80); buf[3] = (char)((c & 0x3f) | 0x80);
return 4; return 4;
} }
return 0; return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
} }
int FASTCALL int FASTCALL
@ -1465,6 +1469,9 @@ XmlInitUnknownEncoding(void *mem,
else if (c < 0) { else if (c < 0) {
if (c < -4) if (c < -4)
return 0; return 0;
/* Multi-byte sequences need a converter function */
if (!convert)
return 0;
e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
e->utf8[i][0] = 0; e->utf8[i][0] = 0;
e->utf16[i] = 0; e->utf16[i] = 0;

View File

@ -1198,8 +1198,14 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char *start; const char *start;
if (ptr >= end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
else if (! HAS_CHAR(enc, ptr, end)) else if (! HAS_CHAR(enc, ptr, end)) {
return XML_TOK_PARTIAL; /* This line cannot be executed. The incoming data has already
* been tokenized once, so incomplete characters like this have
* already been eliminated from the input. Retaining the paranoia
* check is still valuable, however.
*/
return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
}
start = ptr; start = ptr;
while (HAS_CHAR(enc, ptr, end)) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
@ -1258,8 +1264,14 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char *start; const char *start;
if (ptr >= end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
else if (! HAS_CHAR(enc, ptr, end)) else if (! HAS_CHAR(enc, ptr, end)) {
return XML_TOK_PARTIAL; /* This line cannot be executed. The incoming data has already
* been tokenized once, so incomplete characters like this have
* already been eliminated from the input. Retaining the paranoia
* check is still valuable, however.
*/
return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
}
start = ptr; start = ptr;
while (HAS_CHAR(enc, ptr, end)) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
@ -1614,6 +1626,14 @@ PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
return 0; return 0;
} }
/* This function does not appear to be called from anywhere within the
* library code. It is used via the macro XmlSameName(), which is
* defined but never used. Since it appears in the encoding function
* table, removing it is not a thing to be undertaken lightly. For
* the moment, we simply exclude it from coverage tests.
*
* LCOV_EXCL_START
*/
static int PTRCALL static int PTRCALL
PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
{ {
@ -1677,14 +1697,21 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
} }
/* not reached */ /* not reached */
} }
/* LCOV_EXCL_STOP */
static int PTRCALL static int PTRCALL
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
const char *end1, const char *ptr2) const char *end1, const char *ptr2)
{ {
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
if (end1 - ptr1 < MINBPC(enc)) if (end1 - ptr1 < MINBPC(enc)) {
return 0; /* This line cannot be executed. THe incoming data has already
* been tokenized once, so imcomplete characters like this have
* already been eliminated from the input. Retaining the
* paranoia check is still valuable, however.
*/
return 0; /* LCOV_EXCL_LINE */
}
if (!CHAR_MATCHES(enc, ptr1, *ptr2)) if (!CHAR_MATCHES(enc, ptr1, *ptr2))
return 0; return 0;
} }

View File

@ -87,6 +87,7 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\Modules\_elementtree.c" /> <ClCompile Include="..\Modules\_elementtree.c" />
<ClCompile Include="..\Modules\expat\loadlibrary.c" />
<ClCompile Include="..\Modules\expat\xmlparse.c" /> <ClCompile Include="..\Modules\expat\xmlparse.c" />
<ClCompile Include="..\Modules\expat\xmlrole.c" /> <ClCompile Include="..\Modules\expat\xmlrole.c" />
<ClCompile Include="..\Modules\expat\xmltok.c" /> <ClCompile Include="..\Modules\expat\xmltok.c" />

View File

@ -33,6 +33,9 @@
<ClInclude Include="..\Modules\expat\latin1tab.h"> <ClInclude Include="..\Modules\expat\latin1tab.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\Modules\expat\loadlibrary.c">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Modules\expat\macconfig.h"> <ClInclude Include="..\Modules\expat\macconfig.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
@ -69,4 +72,4 @@
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -68,6 +68,7 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\Modules\pyexpat.c" /> <ClCompile Include="..\Modules\pyexpat.c" />
<ClCompile Include="..\Modules\expat\loadlibrary.c" />
<ClCompile Include="..\Modules\expat\xmlparse.c" /> <ClCompile Include="..\Modules\expat\xmlparse.c" />
<ClCompile Include="..\Modules\expat\xmlrole.c" /> <ClCompile Include="..\Modules\expat\xmlrole.c" />
<ClCompile Include="..\Modules\expat\xmltok.c" /> <ClCompile Include="..\Modules\expat\xmltok.c" />
@ -84,4 +85,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -20,6 +20,9 @@
<ClCompile Include="..\Modules\pyexpat.c"> <ClCompile Include="..\Modules\pyexpat.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\Modules\expat\loadlibrary.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Modules\expat\xmlparse.c"> <ClCompile Include="..\Modules\expat\xmlparse.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
@ -30,4 +33,4 @@
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -1525,6 +1525,9 @@ class PyBuildExt(build_ext):
expat_inc = [os.path.join(os.getcwd(), srcdir, 'Modules', 'expat')] expat_inc = [os.path.join(os.getcwd(), srcdir, 'Modules', 'expat')]
define_macros = [ define_macros = [
('HAVE_EXPAT_CONFIG_H', '1'), ('HAVE_EXPAT_CONFIG_H', '1'),
# bpo-30947: Python uses best available entropy sources to
# call XML_SetHashSalt(), expat entropy sources are not needed
('XML_POOR_ENTROPY', '1'),
] ]
expat_lib = [] expat_lib = []
expat_sources = ['expat/xmlparse.c', expat_sources = ['expat/xmlparse.c',