bpo-30947: Update libexpat from 2.2.1 to 2.2.3 (#3106)

* bpo-30947: Update libexpat from 2.2.1 to 2.2.3 * Add NEWS entry * Add new loadlibrary.c * expat_external.h: restore include "pyexpatns.h" * PCbuild: add expat/loadlibrary.c * Define XML_POOR_ENTROPY to compile expat
2017-08-18 23:43:54 +02:00 · 2017-08-18 23:43:54 +02:00 · 93d0cb58b4
parent c99d41f9c0
commit 93d0cb58b4
13 changed files with 734 additions and 103 deletions
--- a/Misc/NEWS.d/next/Security/2017-08-16-16-35-59.bpo-30947.iNMmm4.rst
+++ b/Misc/NEWS.d/next/Security/2017-08-16-16-35-59.bpo-30947.iNMmm4.rst
@ -0,0 +1,2 @@
+Upgrade libexpat embedded copy from version 2.2.1 to 2.2.3 to get security
+fixes.
--- a/Modules/expat/expat.h
+++ b/Modules/expat/expat.h
@ -24,7 +24,6 @@ extern "C" {
 struct XML_ParserStruct;
 typedef struct XML_ParserStruct *XML_Parser;

-/* Should this be defined using stdbool.h when C99 is available? */
 typedef unsigned char XML_Bool;
 #define XML_TRUE   ((XML_Bool) 1)
 #define XML_FALSE  ((XML_Bool) 0)
@ -1049,7 +1048,7 @@ XML_GetFeatureList(void);
 */
 #define XML_MAJOR_VERSION 2
 #define XML_MINOR_VERSION 2
-#define XML_MICRO_VERSION 1
+#define XML_MICRO_VERSION 3

 #ifdef __cplusplus
 }
--- a/Modules/expat/loadlibrary.c
+++ b/Modules/expat/loadlibrary.c
@ -0,0 +1,141 @@
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 2016 - 2017, Steve Holme, <steve_holme@hotmail.com>.
+ *
+ * All rights reserved.
+ *
+ * Permission to  use, copy,  modify, and distribute  this software  for any
+ * purpose with  or without fee is  hereby granted, provided that  the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
+ * EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
+ * MERCHANTABILITY, FITNESS FOR A  PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+ * THIRD PARTY RIGHTS. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR  OTHERWISE, ARISING FROM, OUT OF OR  IN CONNECTION WITH
+ * THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice,  the name of a copyright holder shall
+ * not be used in advertising or otherwise to promote the sale, use or other
+ * dealings  in this  Software without  prior written  authorization of  the
+ * copyright holder.
+ *
+ ***************************************************************************/
+
+#if defined(_WIN32)
+
+#include <windows.h>
+#include <tchar.h>
+
+
+HMODULE _Expat_LoadLibrary(LPCTSTR filename);
+
+
+#if !defined(LOAD_WITH_ALTERED_SEARCH_PATH)
+#define LOAD_WITH_ALTERED_SEARCH_PATH  0x00000008
+#endif
+
+#if !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
+#define LOAD_LIBRARY_SEARCH_SYSTEM32   0x00000800
+#endif
+
+/* We use our own typedef here since some headers might lack these */
+typedef HMODULE (APIENTRY *LOADLIBRARYEX_FN)(LPCTSTR, HANDLE, DWORD);
+
+/* See function definitions in winbase.h */
+#ifdef UNICODE
+#  ifdef _WIN32_WCE
+#    define LOADLIBARYEX  L"LoadLibraryExW"
+#  else
+#    define LOADLIBARYEX  "LoadLibraryExW"
+#  endif
+#else
+#  define LOADLIBARYEX    "LoadLibraryExA"
+#endif
+
+
+/*
+ * _Expat_LoadLibrary()
+ *
+ * This is used to dynamically load DLLs using the most secure method available
+ * for the version of Windows that we are running on.
+ *
+ * Parameters:
+ *
+ * filename  [in] - The filename or full path of the DLL to load. If only the
+ *                  filename is passed then the DLL will be loaded from the
+ *                  Windows system directory.
+ *
+ * Returns the handle of the module on success; otherwise NULL.
+ */
+HMODULE _Expat_LoadLibrary(LPCTSTR filename)
+{
+  HMODULE hModule = NULL;
+  LOADLIBRARYEX_FN pLoadLibraryEx = NULL;
+
+  /* Get a handle to kernel32 so we can access it's functions at runtime */
+  HMODULE hKernel32 = GetModuleHandle(TEXT("kernel32"));
+  if(!hKernel32)
+    return NULL;
+
+  /* Attempt to find LoadLibraryEx() which is only available on Windows 2000
+     and above */
+  pLoadLibraryEx = (LOADLIBRARYEX_FN) GetProcAddress(hKernel32, LOADLIBARYEX);
+
+  /* Detect if there's already a path in the filename and load the library if
+     there is. Note: Both back slashes and forward slashes have been supported
+     since the earlier days of DOS at an API level although they are not
+     supported by command prompt */
+  if(_tcspbrk(filename, TEXT("\\/"))) {
+    /** !checksrc! disable BANNEDFUNC 1 **/
+    hModule = pLoadLibraryEx ?
+      pLoadLibraryEx(filename, NULL, LOAD_WITH_ALTERED_SEARCH_PATH) :
+      LoadLibrary(filename);
+  }
+  /* Detect if KB2533623 is installed, as LOAD_LIBARY_SEARCH_SYSTEM32 is only
+     supported on Windows Vista, Windows Server 2008, Windows 7 and Windows
+     Server 2008 R2 with this patch or natively on Windows 8 and above */
+  else if(pLoadLibraryEx && GetProcAddress(hKernel32, "AddDllDirectory")) {
+    /* Load the DLL from the Windows system directory */
+    hModule = pLoadLibraryEx(filename, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
+  }
+  else {
+    /* Attempt to get the Windows system path */
+    UINT systemdirlen = GetSystemDirectory(NULL, 0);
+    if(systemdirlen) {
+      /* Allocate space for the full DLL path (Room for the null terminator
+         is included in systemdirlen) */
+      size_t filenamelen = _tcslen(filename);
+      TCHAR *path = malloc(sizeof(TCHAR) * (systemdirlen + 1 + filenamelen));
+      if(path && GetSystemDirectory(path, systemdirlen)) {
+        /* Calculate the full DLL path */
+        _tcscpy(path + _tcslen(path), TEXT("\\"));
+        _tcscpy(path + _tcslen(path), filename);
+
+        /* Load the DLL from the Windows system directory */
+        /** !checksrc! disable BANNEDFUNC 1 **/
+        hModule = pLoadLibraryEx ?
+          pLoadLibraryEx(path, NULL, LOAD_WITH_ALTERED_SEARCH_PATH) :
+          LoadLibrary(path);
+
+      }
+      free(path);
+    }
+  }
+
+  return hModule;
+}
+
+#else /* defined(_WIN32) */
+
+/* ISO C requires a translation unit to contain at least one declaration
+   [-Wempty-translation-unit] */
+typedef int _TRANSLATION_UNIT_LOAD_LIBRARY_C_NOT_EMTPY;
+
+#endif /* defined(_WIN32) */
--- a/Modules/expat/siphash.h
+++ b/Modules/expat/siphash.h
@ -2,9 +2,8 @@
 * siphash.h - SipHash-2-4 in a single header file
 * --------------------------------------------------------------------------
 * Derived by William Ahern from the reference implementation[1] published[2]
- * by Jean-Philippe Aumasson and Daniel J. Berstein. Licensed in kind.
 * by Jean-Philippe Aumasson and Daniel J. Berstein.
- * Minimal changes by Sebastian Pipping on top, details below.
+ * Minimal changes by Sebastian Pipping and Victor Stinner on top, see below.
 * Licensed under the CC0 Public Domain Dedication license.
 *
 * 1. https://www.131002.net/siphash/siphash24.c
@ -12,14 +11,25 @@
 * --------------------------------------------------------------------------
 * HISTORY:
 *
- * 2017-06-10  (Sebastian Pipping)
+ * 2017-07-25  (Vadim Zeitlin)
+ *   - Fix use of SIPHASH_MAIN macro
+ *
+ * 2017-07-05  (Sebastian Pipping)
+ *   - Use _SIP_ULL macro to not require a C++11 compiler if compiled as C++
+ *   - Add const qualifiers at two places
+ *   - Ensure <=80 characters line length (assuming tab width 4)
+ *
+ * 2017-06-23  (Victor Stinner)
+ *   - Address Win64 compile warnings
+ *
+ * 2017-06-18  (Sebastian Pipping)
 *   - Clarify license note in the header
 *   - Address C89 issues:
 *     - Stop using inline keyword (and let compiler decide)
- *     - Turn integer suffix ULL to UL
 *     - Replace _Bool by int
 *     - Turn macro siphash24 into a function
 *     - Address invalid conversion (void pointer) by explicit cast
+ *   - Address lack of stdint.h for Visual Studio 2003 to 2008
 *   - Always expose sip24_valid (for self-tests)
 *
 * 2012-11-04 - Born.  (William Ahern)
@ -76,7 +86,23 @@
 #define SIPHASH_H

 #include <stddef.h> /* size_t */
-#include <stdint.h> /* uint64_t uint32_t uint8_t */
+
+#if defined(_WIN32) && defined(_MSC_VER) && (_MSC_VER < 1600)
+  /* For vs2003/7.1 up to vs2008/9.0; _MSC_VER 1600 is vs2010/10.0 */
+  typedef unsigned __int8   uint8_t;
+  typedef unsigned __int32 uint32_t;
+  typedef unsigned __int64 uint64_t;
+#else
+ #include <stdint.h> /* uint64_t uint32_t uint8_t */
+#endif
+
+
+/*
+ * Workaround to not require a C++11 compiler for using ULL suffix
+ * if this code is included and compiled as C++; related GCC warning is:
+ * warning: use of C++11 long long integer constant [-Wlong-long]
+ */
+#define _SIP_ULL(high, low)  (((uint64_t)high << 32) | low)


 #define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ( (x) >> (64 - (b))))
@ -158,11 +184,12 @@ static void sip_round(struct siphash *H, const int rounds) {
 } /* sip_round() */


-static struct siphash *sip24_init(struct siphash *H, const struct sipkey *key) {
-	H->v0 = 0x736f6d6570736575UL ^ key->k[0];
-	H->v1 = 0x646f72616e646f6dUL ^ key->k[1];
-	H->v2 = 0x6c7967656e657261UL ^ key->k[0];
-	H->v3 = 0x7465646279746573UL ^ key->k[1];
+static struct siphash *sip24_init(struct siphash *H,
+		const struct sipkey *key) {
+	H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0];
+	H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1];
+	H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0];
+	H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1];

 	H->p = H->buf;
 	H->c = 0;
@ -173,7 +200,8 @@ static struct siphash *sip24_init(struct siphash *H, const struct sipkey *key) {

 #define sip_endof(a) (&(a)[sizeof (a) / sizeof *(a)])

-static struct siphash *sip24_update(struct siphash *H, const void *src, size_t len) {
+static struct siphash *sip24_update(struct siphash *H, const void *src,
+		size_t len) {
 	const unsigned char *p = (const unsigned char *)src, *pe = p + len;
 	uint64_t m;

@ -198,7 +226,7 @@ static struct siphash *sip24_update(struct siphash *H, const void *src, size_t l


 static uint64_t sip24_final(struct siphash *H) {
-	char left = (char)(H->p - H->buf);
+	const char left = (char)(H->p - H->buf);
 	uint64_t b = (H->c + left) << 56;

 	switch (left) {
@ -222,7 +250,8 @@ static uint64_t sip24_final(struct siphash *H) {
 } /* sip24_final() */


-static uint64_t siphash24(const void *src, size_t len, const struct sipkey *key) {
+static uint64_t siphash24(const void *src, size_t len,
+		const struct sipkey *key) {
 	struct siphash state = SIPHASH_INITIALIZER;
 	return sip24_final(sip24_update(sip24_init(&state, key), src, len));
 } /* siphash24() */
@ -310,7 +339,8 @@ static int sip24_valid(void) {
 	struct sipkey k;
 	size_t i;

-	sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017");
+	sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011"
+			"\012\013\014\015\016\017");

 	for (i = 0; i < sizeof in; ++i) {
 		in[i] = (unsigned char)i;
@ -323,12 +353,12 @@ static int sip24_valid(void) {
 } /* sip24_valid() */


-#if SIPHASH_MAIN
+#ifdef SIPHASH_MAIN

 #include <stdio.h>

 int main(void) {
-	int ok = sip24_valid();
+	const int ok = sip24_valid();

 	if (ok)
 		puts("OK");
--- a/Modules/expat/xmlparse.c
+++ b/Modules/expat/xmlparse.c
@ -1,10 +1,12 @@
 /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
   See the file COPYING for copying permission.

-   77fea421d361dca90041d0040ecf1dca651167fadf2af79e990e35168d70d933 (2.2.1+)
+   101bfd65d1ff3d1511cf6671e6aae65f82cd97df6f4da137d46d510731830ad9 (2.2.3+)
 */

-#define _GNU_SOURCE 1                   /* syscall prototype */
+#if !defined(_GNU_SOURCE)
+# define _GNU_SOURCE 1                  /* syscall prototype */
+#endif

 #include <stddef.h>
 #include <string.h>                     /* memset(), memcpy() */
@ -19,6 +21,8 @@
 #include <sys/time.h>                   /* gettimeofday() */
 #include <sys/types.h>                  /* getpid() */
 #include <unistd.h>                     /* getpid() */
+#include <fcntl.h>                      /* O_RDONLY */
+#include <errno.h>
 #endif

 #define XML_BUILDING_EXPAT 1
@ -33,6 +37,57 @@
 #include "expat.h"
 #include "siphash.h"

+#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
+# if defined(HAVE_GETRANDOM)
+#  include <sys/random.h>    /* getrandom */
+# else
+#  include <unistd.h>        /* syscall */
+#  include <sys/syscall.h>   /* SYS_getrandom */
+# endif
+# if ! defined(GRND_NONBLOCK)
+#  define GRND_NONBLOCK  0x0001
+# endif  /* defined(GRND_NONBLOCK) */
+#endif  /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
+
+#if defined(HAVE_LIBBSD) \
+    && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
+# include <bsd/stdlib.h>
+#endif
+
+#if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
+# define LOAD_LIBRARY_SEARCH_SYSTEM32  0x00000800
+#endif
+
+#if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \
+    && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \
+    && !defined(XML_DEV_URANDOM) \
+    && !defined(_WIN32) \
+    && !defined(XML_POOR_ENTROPY)
+# error  \
+    You do not have support for any sources of high quality entropy \
+    enabled.  For end user security, that is probably not what you want. \
+    \
+    Your options include: \
+      * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
+      * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
+      * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
+      * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
+      * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
+      * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
+      * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
+      * Windows (RtlGenRandom): _WIN32. \
+    \
+    If insist on not using any of these, bypass this error by defining \
+    XML_POOR_ENTROPY; you have been warned. \
+    \
+    For CMake, one way to pass the define is: \
+        cmake -DCMAKE_C_FLAGS="-pipe -O2 -DHAVE_SYSCALL_GETRANDOM" . \
+    \
+    If you have reasons to patch this detection code away or need changes \
+    to the build system, please open a bug.  Thank you!
+#endif
+
+
 #ifdef XML_UNICODE
 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
 #define XmlConvert XmlUtf16Convert
@ -436,6 +491,9 @@ static ELEMENT_TYPE *
 getElementType(XML_Parser parser, const ENCODING *enc,
               const char *ptr, const char *end);

+static XML_Char *copyString(const XML_Char *s,
+                            const XML_Memory_Handling_Suite *memsuite);
+
 static unsigned long generate_hash_secret_salt(XML_Parser parser);
 static XML_Bool startParsing(XML_Parser parser);

@ -696,21 +754,13 @@ static const XML_Char implicitContext[] = {


 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
-# include <errno.h>
-
-# if defined(HAVE_GETRANDOM)
-#  include <sys/random.h>    /* getrandom */
-# else
-#  include <unistd.h>        /* syscall */
-#  include <sys/syscall.h>   /* SYS_getrandom */
-# endif

 /* Obtain entropy on Linux 3.17+ */
 static int
-writeRandomBytes_getrandom(void * target, size_t count) {
+writeRandomBytes_getrandom_nonblock(void * target, size_t count) {
  int success = 0;  /* full count bytes written? */
  size_t bytesWrittenTotal = 0;
-  const unsigned int getrandomFlags = 0;
+  const unsigned int getrandomFlags = GRND_NONBLOCK;

  do {
    void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
@ -728,7 +778,7 @@ writeRandomBytes_getrandom(void * target, size_t count) {
      if (bytesWrittenTotal >= count)
        success = 1;
    }
-  } while (! success && (errno == EINTR || errno == EAGAIN));
+  } while (! success && (errno == EINTR));

  return success;
 }
@ -736,12 +786,67 @@ writeRandomBytes_getrandom(void * target, size_t count) {
 #endif  /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */


+#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
+
+/* Extract entropy from /dev/urandom */
+static int
+writeRandomBytes_dev_urandom(void * target, size_t count) {
+  int success = 0;  /* full count bytes written? */
+  size_t bytesWrittenTotal = 0;
+
+  const int fd = open("/dev/urandom", O_RDONLY);
+  if (fd < 0) {
+    return 0;
+  }
+
+  do {
+    void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
+    const size_t bytesToWrite = count - bytesWrittenTotal;
+
+    const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
+
+    if (bytesWrittenMore > 0) {
+      bytesWrittenTotal += bytesWrittenMore;
+      if (bytesWrittenTotal >= count)
+        success = 1;
+    }
+  } while (! success && (errno == EINTR));
+
+  close(fd);
+  return success;
+}
+
+#endif  /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
+
+
+#if defined(HAVE_ARC4RANDOM)
+
+static void
+writeRandomBytes_arc4random(void * target, size_t count) {
+  size_t bytesWrittenTotal = 0;
+
+  while (bytesWrittenTotal < count) {
+    const uint32_t random32 = arc4random();
+    size_t i = 0;
+
+    for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
+        i++, bytesWrittenTotal++) {
+      const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
+      ((uint8_t *)target)[bytesWrittenTotal] = random8;
+    }
+  }
+}
+
+#endif  /* defined(HAVE_ARC4RANDOM) */
+
+
 #ifdef _WIN32

 typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
+HMODULE _Expat_LoadLibrary(LPCTSTR filename);  /* see loadlibrary.c */

 /* Obtain entropy on Windows XP / Windows Server 2003 and later.
- * Hint on RtlGenRandom and the following article from libsodioum.
+ * Hint on RtlGenRandom and the following article from libsodium.
 *
 * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
 * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
@ -749,7 +854,7 @@ typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
 static int
 writeRandomBytes_RtlGenRandom(void * target, size_t count) {
  int success = 0;  /* full count bytes written? */
-  const HMODULE advapi32 = LoadLibrary("ADVAPI32.DLL");
+  const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL"));

  if (advapi32) {
    const RTLGENRANDOM_FUNC RtlGenRandom
@ -768,6 +873,8 @@ writeRandomBytes_RtlGenRandom(void * target, size_t count) {
 #endif /* _WIN32 */


+#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
+
 static unsigned long
 gather_time_entropy(void)
 {
@ -780,16 +887,20 @@ gather_time_entropy(void)
  int gettimeofday_res;

  gettimeofday_res = gettimeofday(&tv, NULL);
+
+#if defined(NDEBUG)
+  (void)gettimeofday_res;
+#else
  assert (gettimeofday_res == 0);
+#endif  /* defined(NDEBUG) */

  /* Microseconds time is <20 bits entropy */
  return tv.tv_usec;
 #endif
 }

-#if defined(HAVE_ARC4RANDOM_BUF) && defined(HAVE_LIBBSD)
-# include <bsd/stdlib.h>
-#endif
+#endif  /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
+

 static unsigned long
 ENTROPY_DEBUG(const char * label, unsigned long entropy) {
@ -808,10 +919,12 @@ generate_hash_secret_salt(XML_Parser parser)
 {
  unsigned long entropy;
  (void)parser;
-#if defined(HAVE_ARC4RANDOM_BUF) || defined(__CloudABI__)
-  (void)gather_time_entropy;
+#if defined(HAVE_ARC4RANDOM_BUF)
  arc4random_buf(&entropy, sizeof(entropy));
  return ENTROPY_DEBUG("arc4random_buf", entropy);
+#elif defined(HAVE_ARC4RANDOM)
+  writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
+  return ENTROPY_DEBUG("arc4random", entropy);
 #else
  /* Try high quality providers first .. */
 #ifdef _WIN32
@ -819,10 +932,15 @@ generate_hash_secret_salt(XML_Parser parser)
    return ENTROPY_DEBUG("RtlGenRandom", entropy);
  }
 #elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
-  if (writeRandomBytes_getrandom((void *)&entropy, sizeof(entropy))) {
+  if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
    return ENTROPY_DEBUG("getrandom", entropy);
  }
 #endif
+#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
+  if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
+    return ENTROPY_DEBUG("/dev/urandom", entropy);
+  }
+#endif  /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
  /* .. and self-made low quality for backup: */

  /* Process ID is 0 bits entropy if attacker has local access */
@ -833,7 +951,7 @@ generate_hash_secret_salt(XML_Parser parser)
    return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
  } else {
    return ENTROPY_DEBUG("fallback(8)",
-        entropy * (unsigned long)2305843009213693951);
+        entropy * (unsigned long)2305843009213693951ULL);
  }
 #endif
 }
@ -962,6 +1080,8 @@ parserCreate(const XML_Char *encodingName,
  nsAttsVersion = 0;
  nsAttsPower = 0;

+  protocolEncodingName = NULL;
+
  poolInit(&tempPool, &(parser->m_mem));
  poolInit(&temp2Pool, &(parser->m_mem));
  parserInit(parser, encodingName);
@ -988,9 +1108,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName)
 {
  processor = prologInitProcessor;
  XmlPrologStateInit(&prologState);
-  protocolEncodingName = (encodingName != NULL
-                          ? poolCopyString(&tempPool, encodingName)
-                          : NULL);
+  if (encodingName != NULL) {
+    protocolEncodingName = copyString(encodingName, &(parser->m_mem));
+  }
  curBase = NULL;
  XmlInitEncoding(&initEncoding, &encoding, 0);
  userData = NULL;
@ -1103,6 +1223,8 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
    unknownEncodingRelease(unknownEncodingData);
  poolClear(&tempPool);
  poolClear(&temp2Pool);
+  FREE((void *)protocolEncodingName);
+  protocolEncodingName = NULL;
  parserInit(parser, encodingName);
  dtdReset(_dtd, &parser->m_mem);
  return XML_TRUE;
@ -1119,10 +1241,16 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
  */
  if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
    return XML_STATUS_ERROR;
+
+  /* Get rid of any previous encoding name */
+  FREE((void *)protocolEncodingName);
+
  if (encodingName == NULL)
+    /* No new encoding name */
    protocolEncodingName = NULL;
  else {
-    protocolEncodingName = poolCopyString(&tempPool, encodingName);
+    /* Copy the new encoding name into allocated memory */
+    protocolEncodingName = copyString(encodingName, &(parser->m_mem));
    if (!protocolEncodingName)
      return XML_STATUS_ERROR;
  }
@ -1357,6 +1485,7 @@ XML_ParserFree(XML_Parser parser)
  destroyBindings(inheritedBindings, parser);
  poolDestroy(&tempPool);
  poolDestroy(&temp2Pool);
+  FREE((void *)protocolEncodingName);
 #ifdef XML_DTD
  /* external parameter entity parsers share the DTD structure
     parser->m_dtd with the root parser, so we must not destroy it
@ -1748,7 +1877,8 @@ enum XML_Status XMLCALL
 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
 {
  if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
-    errorCode = XML_ERROR_INVALID_ARGUMENT;
+    if (parser != NULL)
+      parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
    return XML_STATUS_ERROR;
  }
  switch (ps_parsing) {
@ -1783,9 +1913,22 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
    if (errorCode == XML_ERROR_NONE) {
      switch (ps_parsing) {
      case XML_SUSPENDED:
+        /* It is hard to be certain, but it seems that this case
+         * cannot occur.  This code is cleaning up a previous parse
+         * with no new data (since len == 0).  Changing the parsing
+         * state requires getting to execute a handler function, and
+         * there doesn't seem to be an opportunity for that while in
+         * this circumstance.
+         *
+         * Given the uncertainty, we retain the code but exclude it
+         * from coverage tests.
+         *
+         * LCOV_EXCL_START
+         */
        XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
        positionPtr = bufferPtr;
        return XML_STATUS_SUSPENDED;
+        /* LCOV_EXCL_STOP */
      case XML_INITIALIZED:
      case XML_PARSING:
        ps_parsing = XML_FINISHED;
@ -2974,9 +3117,17 @@ doContent(XML_Parser parser,
        return XML_ERROR_NO_MEMORY;
      break;
    default:
+      /* All of the tokens produced by XmlContentTok() have their own
+       * explicit cases, so this default is not strictly necessary.
+       * However it is a useful safety net, so we retain the code and
+       * simply exclude it from the coverage tests.
+       *
+       * LCOV_EXCL_START
+       */
      if (defaultHandler)
        reportDefault(parser, enc, s, next);
      break;
+      /* LCOV_EXCL_STOP */
    }
    *eventPP = s = next;
    switch (ps_parsing) {
@ -3067,13 +3218,17 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
 #endif
    attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
    temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
-    if (temp == NULL)
+    if (temp == NULL) {
+      attsSize = oldAttsSize;
      return XML_ERROR_NO_MEMORY;
+    }
    atts = temp;
 #ifdef XML_ATTR_INFO
    temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo));
-    if (temp2 == NULL)
+    if (temp2 == NULL) {
+      attsSize = oldAttsSize;
      return XML_ERROR_NO_MEMORY;
+    }
    attInfo = temp2;
 #endif
    if (n > oldAttsSize)
@ -3210,6 +3365,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
    int j;  /* hash table index */
    unsigned long version = nsAttsVersion;
    int nsAttsSize = (int)1 << nsAttsPower;
+    unsigned char oldNsAttsPower = nsAttsPower;
    /* size of hash table must be at least 2 * (# of prefixed attributes) */
    if ((nPrefixes << 1) >> nsAttsPower) {  /* true for nsAttsPower = 0 */
      NS_ATT *temp;
@ -3219,8 +3375,11 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
        nsAttsPower = 3;
      nsAttsSize = (int)1 << nsAttsPower;
      temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT));
-      if (!temp)
+      if (!temp) {
+        /* Restore actual size of memory in nsAtts */
+        nsAttsPower = oldNsAttsPower;
        return XML_ERROR_NO_MEMORY;
+      }
      nsAtts = temp;
      version = 0;  /* force re-initialization of nsAtts hash table */
    }
@ -3247,8 +3406,23 @@ storeAtts(XML_Parser parser, const ENCODING *enc,

        ((XML_Char *)s)[-1] = 0;  /* clear flag */
        id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
-        if (!id || !id->prefix)
-          return XML_ERROR_NO_MEMORY;
+        if (!id || !id->prefix) {
+          /* This code is walking through the appAtts array, dealing
+           * with (in this case) a prefixed attribute name.  To be in
+           * the array, the attribute must have already been bound, so
+           * has to have passed through the hash table lookup once
+           * already.  That implies that an entry for it already
+           * exists, so the lookup above will return a pointer to
+           * already allocated memory.  There is no opportunaity for
+           * the allocator to fail, so the condition above cannot be
+           * fulfilled.
+           *
+           * Since it is difficult to be certain that the above
+           * analysis is complete, we retain the test and merely
+           * remove the code from coverage tests.
+           */
+          return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
+        }
        b = id->prefix->binding;
        if (!b)
          return XML_ERROR_UNBOUND_PREFIX;
@ -3625,8 +3799,16 @@ doCdataSection(XML_Parser parser,
      }
      return XML_ERROR_UNCLOSED_CDATA_SECTION;
    default:
+      /* Every token returned by XmlCdataSectionTok() has its own
+       * explicit case, so this default case will never be executed.
+       * We retain it as a safety net and exclude it from the coverage
+       * statistics.
+       *
+       * LCOV_EXCL_START
+      */
      *eventPP = next;
      return XML_ERROR_UNEXPECTED_STATE;
+      /* LCOV_EXCL_STOP */
    }

    *eventPP = s = next;
@ -3686,8 +3868,20 @@ doIgnoreSection(XML_Parser parser,
    eventEndPP = &eventEndPtr;
  }
  else {
+    /* It's not entirely clear, but it seems the following two lines
+     * of code cannot be executed.  The only occasions on which 'enc'
+     * is not 'parser->m_encoding' are when this function is called
+     * from the internal entity processing, and IGNORE sections are an
+     * error in internal entities.
+     *
+     * Since it really isn't clear that this is true, we keep the code
+     * and just remove it from our coverage tests.
+     *
+     * LCOV_EXCL_START
+     */
    eventPP = &(openInternalEntities->internalEventPtr);
    eventEndPP = &(openInternalEntities->internalEventEndPtr);
+    /* LCOV_EXCL_STOP */
  }
  *eventPP = s;
  *startPtr = NULL;
@ -3720,8 +3914,16 @@ doIgnoreSection(XML_Parser parser,
    }
    return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
  default:
+    /* All of the tokens that XmlIgnoreSectionTok() returns have
+     * explicit cases to handle them, so this default case is never
+     * executed.  We keep it as a safety net anyway, and remove it
+     * from our test coverage statistics.
+     *
+     * LCOV_EXCL_START
+     */
    *eventPP = next;
    return XML_ERROR_UNEXPECTED_STATE;
+    /* LCOV_EXCL_STOP */
  }
  /* not reached */
 }
@ -3734,6 +3936,7 @@ initializeEncoding(XML_Parser parser)
  const char *s;
 #ifdef XML_UNICODE
  char encodingBuf[128];
+  /* See comments abount `protoclEncodingName` in parserInit() */
  if (!protocolEncodingName)
    s = NULL;
  else {
@ -3817,7 +4020,14 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
    reportDefault(parser, encoding, s, next);
  if (protocolEncodingName == NULL) {
    if (newEncoding) {
-      if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
+      /* Check that the specified encoding does not conflict with what
+       * the parser has already deduced.  Do we have the same number
+       * of bytes in the smallest representation of a character?  If
+       * this is UTF-16, is it the same endianness?
+       */
+      if (newEncoding->minBytesPerChar != encoding->minBytesPerChar
+          || (newEncoding->minBytesPerChar == 2 &&
+              newEncoding != encoding)) {
        eventPtr = encodingName;
        return XML_ERROR_INCORRECT_ENCODING;
      }
@ -3962,15 +4172,14 @@ entityValueInitProcessor(XML_Parser parser,
      result = processXmlDecl(parser, 0, start, next);
      if (result != XML_ERROR_NONE)
        return result;
-      switch (ps_parsing) {
-      case XML_SUSPENDED:
-        *nextPtr = next;
-        return XML_ERROR_NONE;
-      case XML_FINISHED:
+      /* At this point, ps_parsing cannot be XML_SUSPENDED.  For that
+       * to happen, a parameter entity parsing handler must have
+       * attempted to suspend the parser, which fails and raises an
+       * error.  The parser can be aborted, but can't be suspended.
+       */
+      if (ps_parsing == XML_FINISHED)
        return XML_ERROR_ABORTED;
-      default:
-        *nextPtr = next;
-      }
+      *nextPtr = next;
      /* stop scanning for text declaration - we found one */
      processor = entityValueProcessor;
      return entityValueProcessor(parser, next, end, nextPtr);
@ -4293,8 +4502,14 @@ doProlog(XML_Parser parser,
                                            &dtd->paramEntities,
                                            externalSubsetName,
                                            sizeof(ENTITY));
-          if (!entity)
-            return XML_ERROR_NO_MEMORY;
+          if (!entity) {
+            /* The external subset name "#" will have already been
+             * inserted into the hash table at the start of the
+             * external entity parsing, so no allocation will happen
+             * and lookup() cannot fail.
+             */
+            return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
+          }
          if (useForeignDTD)
            entity->base = curBase;
          dtd->paramEntityRead = XML_FALSE;
@ -4773,8 +4988,10 @@ doProlog(XML_Parser parser,
      if (prologState.level >= groupSize) {
        if (groupSize) {
          char *temp = (char *)REALLOC(groupConnector, groupSize *= 2);
-          if (temp == NULL)
+          if (temp == NULL) {
+            groupSize /= 2;
            return XML_ERROR_NO_MEMORY;
+          }
          groupConnector = temp;
          if (dtd->scaffIndex) {
            int *temp = (int *)REALLOC(dtd->scaffIndex,
@ -4786,8 +5003,10 @@ doProlog(XML_Parser parser,
        }
        else {
          groupConnector = (char *)MALLOC(groupSize = 32);
-          if (!groupConnector)
+          if (!groupConnector) {
+            groupSize = 0;
            return XML_ERROR_NO_MEMORY;
+          }
        }
      }
      groupConnector[prologState.level] = 0;
@ -4850,8 +5069,29 @@ doProlog(XML_Parser parser,
             : !dtd->hasParamEntityRefs)) {
          if (!entity)
            return XML_ERROR_UNDEFINED_ENTITY;
-          else if (!entity->is_internal)
-            return XML_ERROR_ENTITY_DECLARED_IN_PE;
+          else if (!entity->is_internal) {
+            /* It's hard to exhaustively search the code to be sure,
+             * but there doesn't seem to be a way of executing the
+             * following line.  There are two cases:
+             *
+             * If 'standalone' is false, the DTD must have no
+             * parameter entities or we wouldn't have passed the outer
+             * 'if' statement.  That measn the only entity in the hash
+             * table is the external subset name "#" which cannot be
+             * given as a parameter entity name in XML syntax, so the
+             * lookup must have returned NULL and we don't even reach
+             * the test for an internal entity.
+             *
+             * If 'standalone' is true, it does not seem to be
+             * possible to create entities taking this code path that
+             * are not internal entities, so fail the test above.
+             *
+             * Because this analysis is very uncertain, the code is
+             * being left in place and merely removed from the
+             * coverage test statistics.
+             */
+            return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
+          }
        }
        else if (!entity) {
          dtd->keepProcessing = dtd->standalone;
@ -5323,11 +5563,15 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
            && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
          break;
        n = XmlEncode(n, (ICHAR *)buf);
-        if (!n) {
-          if (enc == encoding)
-            eventPtr = ptr;
-          return XML_ERROR_BAD_CHAR_REF;
-        }
+        /* The XmlEncode() functions can never return 0 here.  That
+         * error return happens if the code point passed in is either
+         * negative or greater than or equal to 0x110000.  The
+         * XmlCharRefNumber() functions will all return a number
+         * strictly less than 0x110000 or a negative value if an error
+         * occurred.  The negative value is intercepted above, so
+         * XmlEncode() is never passed a value it might return an
+         * error for.
+         */
        for (i = 0; i < n; i++) {
          if (!poolAppendChar(pool, buf[i]))
            return XML_ERROR_NO_MEMORY;
@ -5401,8 +5645,26 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
          break;
        }
        if (entity->open) {
-          if (enc == encoding)
-            eventPtr = ptr;
+          if (enc == encoding) {
+            /* It does not appear that this line can be executed.
+             *
+             * The "if (entity->open)" check catches recursive entity
+             * definitions.  In order to be called with an open
+             * entity, it must have gone through this code before and
+             * been through the recursive call to
+             * appendAttributeValue() some lines below.  That call
+             * sets the local encoding ("enc") to the parser's
+             * internal encoding (internal_utf8 or internal_utf16),
+             * which can never be the same as the principle encoding.
+             * It doesn't appear there is another code path that gets
+             * here with entity->open being TRUE.
+             *
+             * Since it is not certain that this logic is watertight,
+             * we keep the line and merely exclude it from coverage
+             * tests.
+             */
+            eventPtr = ptr; /* LCOV_EXCL_LINE */
+          }
          return XML_ERROR_RECURSIVE_ENTITY_REF;
        }
        if (entity->notation) {
@ -5429,9 +5691,21 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
      }
      break;
    default:
+      /* The only token returned by XmlAttributeValueTok() that does
+       * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
+       * Getting that would require an entity name to contain an
+       * incomplete XML character (e.g. \xE2\x82); however previous
+       * tokenisers will have already recognised and rejected such
+       * names before XmlAttributeValueTok() gets a look-in.  This
+       * default case should be retained as a safety net, but the code
+       * excluded from coverage tests.
+       *
+       * LCOV_EXCL_START
+       */
      if (enc == encoding)
        eventPtr = ptr;
      return XML_ERROR_UNEXPECTED_STATE;
+      /* LCOV_EXCL_STOP */
    }
    ptr = next;
  }
@ -5564,12 +5838,15 @@ storeEntityValue(XML_Parser parser,
          goto endEntityValue;
        }
        n = XmlEncode(n, (ICHAR *)buf);
-        if (!n) {
-          if (enc == encoding)
-            eventPtr = entityTextPtr;
-          result = XML_ERROR_BAD_CHAR_REF;
-          goto endEntityValue;
-        }
+        /* The XmlEncode() functions can never return 0 here.  That
+         * error return happens if the code point passed in is either
+         * negative or greater than or equal to 0x110000.  The
+         * XmlCharRefNumber() functions will all return a number
+         * strictly less than 0x110000 or a negative value if an error
+         * occurred.  The negative value is intercepted above, so
+         * XmlEncode() is never passed a value it might return an
+         * error for.
+         */
        for (i = 0; i < n; i++) {
          if (pool->end == pool->ptr && !poolGrow(pool)) {
            result = XML_ERROR_NO_MEMORY;
@ -5590,10 +5867,18 @@ storeEntityValue(XML_Parser parser,
      result = XML_ERROR_INVALID_TOKEN;
      goto endEntityValue;
    default:
+      /* This default case should be unnecessary -- all the tokens
+       * that XmlEntityValueTok() can return have their own explicit
+       * cases -- but should be retained for safety.  We do however
+       * exclude it from the coverage statistics.
+       *
+       * LCOV_EXCL_START
+       */
      if (enc == encoding)
        eventPtr = entityTextPtr;
      result = XML_ERROR_UNEXPECTED_STATE;
      goto endEntityValue;
+      /* LCOV_EXCL_STOP */
    }
    entityTextPtr = next;
  }
@ -5691,8 +5976,25 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
      eventEndPP = &eventEndPtr;
    }
    else {
+      /* To get here, two things must be true; the parser must be
+       * using a character encoding that is not the same as the
+       * encoding passed in, and the encoding passed in must need
+       * conversion to the internal format (UTF-8 unless XML_UNICODE
+       * is defined).  The only occasions on which the encoding passed
+       * in is not the same as the parser's encoding are when it is
+       * the internal encoding (e.g. a previously defined parameter
+       * entity, already converted to internal format).  This by
+       * definition doesn't need conversion, so the whole branch never
+       * gets executed.
+       *
+       * For safety's sake we don't delete these lines and merely
+       * exclude them from coverage statistics.
+       *
+       * LCOV_EXCL_START
+       */
      eventPP = &(openInternalEntities->internalEventPtr);
      eventEndPP = &(openInternalEntities->internalEventEndPtr);
+      /* LCOV_EXCL_STOP */
    }
    do {
      ICHAR *dataPtr = (ICHAR *)dataBuf;
@ -5861,9 +6163,30 @@ getContext(XML_Parser parser)
    len = dtd->defaultPrefix.binding->uriLen;
    if (namespaceSeparator)
      len--;
-    for (i = 0; i < len; i++)
-      if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i]))
-        return NULL;
+    for (i = 0; i < len; i++) {
+      if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) {
+        /* Because of memory caching, I don't believe this line can be
+         * executed.
+         *
+         * This is part of a loop copying the default prefix binding
+         * URI into the parser's temporary string pool.  Previously,
+         * that URI was copied into the same string pool, with a
+         * terminating NUL character, as part of setContext().  When
+         * the pool was cleared, that leaves a block definitely big
+         * enough to hold the URI on the free block list of the pool.
+         * The URI copy in getContext() therefore cannot run out of
+         * memory.
+         *
+         * If the pool is used between the setContext() and
+         * getContext() calls, the worst it can do is leave a bigger
+         * block on the front of the free list.  Given that this is
+         * all somewhat inobvious and program logic can be changed, we
+         * don't delete the line but we do exclude it from the test
+         * coverage statistics.
+         */
+        return NULL; /* LCOV_EXCL_LINE */
+      }
+    }
    needSep = XML_TRUE;
  }

@ -5875,8 +6198,15 @@ getContext(XML_Parser parser)
    PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
    if (!prefix)
      break;
-    if (!prefix->binding)
-      continue;
+    if (!prefix->binding) {
+      /* This test appears to be (justifiable) paranoia.  There does
+       * not seem to be a way of injecting a prefix without a binding
+       * that doesn't get errored long before this function is called.
+       * The test should remain for safety's sake, so we instead
+       * exclude the following line from the coverage statistics.
+       */
+      continue; /* LCOV_EXCL_LINE */
+    }
    if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
      return NULL;
    for (s = prefix->name; *s; s++)
@ -6547,8 +6877,20 @@ poolCopyString(STRING_POOL *pool, const XML_Char *s)
 static const XML_Char *
 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
 {
-  if (!pool->ptr && !poolGrow(pool))
-    return NULL;
+  if (!pool->ptr && !poolGrow(pool)) {
+    /* The following line is unreachable given the current usage of
+     * poolCopyStringN().  Currently it is called from exactly one
+     * place to copy the text of a simple general entity.  By that
+     * point, the name of the entity is already stored in the pool, so
+     * pool->ptr cannot be NULL.
+     *
+     * If poolCopyStringN() is used elsewhere as it well might be,
+     * this line may well become executable again.  Regardless, this
+     * sort of check shouldn't be removed lightly, so we just exclude
+     * it from the coverage statistics.
+     */
+    return NULL; /* LCOV_EXCL_LINE */
+  }
  for (; n > 0; --n, s++) {
    if (!poolAppendChar(pool, *s))
      return NULL;
@ -6641,8 +6983,19 @@ poolGrow(STRING_POOL *pool)
    int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
    size_t bytesToAllocate;

-    if (blockSize < 0)
-      return XML_FALSE;
+    // NOTE: Needs to be calculated prior to calling `realloc`
+    //       to avoid dangling pointers:
+    const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
+
+    if (blockSize < 0) {
+      /* This condition traps a situation where either more than
+       * INT_MAX/2 bytes have already been allocated.  This isn't
+       * readily testable, since it is unlikely that an average
+       * machine will have that much memory, so we exclude it from the
+       * coverage statistics.
+       */
+      return XML_FALSE; /* LCOV_EXCL_LINE */
+    }

    bytesToAllocate = poolBytesToAllocateFor(blockSize);
    if (bytesToAllocate == 0)
@ -6654,7 +7007,7 @@ poolGrow(STRING_POOL *pool)
      return XML_FALSE;
    pool->blocks = temp;
    pool->blocks->size = blockSize;
-    pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
+    pool->ptr = pool->blocks->s + offsetInsideBlock;
    pool->start = pool->blocks->s;
    pool->end = pool->start + blockSize;
  }
@ -6663,8 +7016,18 @@ poolGrow(STRING_POOL *pool)
    int blockSize = (int)(pool->end - pool->start);
    size_t bytesToAllocate;

-    if (blockSize < 0)
-      return XML_FALSE;
+    if (blockSize < 0) {
+      /* This condition traps a situation where either more than
+       * INT_MAX bytes have already been allocated (which is prevented
+       * by various pieces of program logic, not least this one, never
+       * mind the unlikelihood of actually having that much memory) or
+       * the pool control fields have been corrupted (which could
+       * conceivably happen in an extremely buggy user handler
+       * function).  Either way it isn't readily testable, so we
+       * exclude it from the coverage statistics.
+       */
+      return XML_FALSE;  /* LCOV_EXCL_LINE */
+    }

    if (blockSize < INIT_BLOCK_SIZE)
      blockSize = INIT_BLOCK_SIZE;
@ -6827,3 +7190,26 @@ getElementType(XML_Parser parser,
  }
  return ret;
 }
+
+static XML_Char *
+copyString(const XML_Char *s,
+           const XML_Memory_Handling_Suite *memsuite)
+{
+    int charsRequired = 0;
+    XML_Char *result;
+
+    /* First determine how long the string is */
+    while (s[charsRequired] != 0) {
+      charsRequired++;
+    }
+    /* Include the terminator */
+    charsRequired++;
+
+    /* Now allocate space for the copy */
+    result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
+    if (result == NULL)
+        return NULL;
+    /* Copy the original into place */
+    memcpy(result, s, charsRequired * sizeof(XML_Char));
+    return result;
+}
--- a/Modules/expat/xmlrole.c
+++ b/Modules/expat/xmlrole.c
@ -170,7 +170,14 @@ prolog1(PROLOG_STATE *state,
  case XML_TOK_COMMENT:
    return XML_ROLE_COMMENT;
  case XML_TOK_BOM:
-    return XML_ROLE_NONE;
+    /* This case can never arise.  To reach this role function, the
+     * parse must have passed through prolog0 and therefore have had
+     * some form of input, even if only a space.  At that point, a
+     * byte order mark is no longer a valid character (though
+     * technically it should be interpreted as a non-breaking space),
+     * so will be rejected by the tokenizing stages.
+     */
+    return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
  case XML_TOK_DECL_OPEN:
    if (!XmlNameMatchesAscii(enc,
                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
@ -1285,6 +1292,26 @@ declClose(PROLOG_STATE *state,
  return common(state, tok);
 }

+/* This function will only be invoked if the internal logic of the
+ * parser has broken down.  It is used in two cases:
+ *
+ * 1: When the XML prolog has been finished.  At this point the
+ * processor (the parser level above these role handlers) should
+ * switch from prologProcessor to contentProcessor and reinitialise
+ * the handler function.
+ *
+ * 2: When an error has been detected (via common() below).  At this
+ * point again the processor should be switched to errorProcessor,
+ * which will never call a handler.
+ *
+ * The result of this is that error() can only be called if the
+ * processor switch failed to happen, which is an internal error and
+ * therefore we shouldn't be able to provoke it simply by using the
+ * library.  It is a necessary backstop, however, so we merely exclude
+ * it from the coverage statistics.
+ *
+ * LCOV_EXCL_START
+ */
 static int PTRCALL
 error(PROLOG_STATE *UNUSED_P(state),
      int UNUSED_P(tok),
@ -1294,6 +1321,7 @@ error(PROLOG_STATE *UNUSED_P(state),
 {
  return XML_ROLE_NONE;
 }
+/* LCOV_EXCL_STOP */

 static int FASTCALL
 common(PROLOG_STATE *state, int tok)
--- a/Modules/expat/xmltok.c
+++ b/Modules/expat/xmltok.c
@ -1019,7 +1019,11 @@ streqci(const char *s1, const char *s2)
    if (ASCII_a <= c1 && c1 <= ASCII_z)
      c1 += ASCII_A - ASCII_a;
    if (ASCII_a <= c2 && c2 <= ASCII_z)
-      c2 += ASCII_A - ASCII_a;
+      /* The following line will never get executed.  streqci() is
+       * only called from two places, both of which guarantee to put
+       * upper-case strings into s2.
+       */
+      c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
    if (c1 != c2)
      return 0;
    if (!c1)
@ -1291,7 +1295,7 @@ XmlUtf8Encode(int c, char *buf)
  };

  if (c < 0)
-    return 0;
+    return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
  if (c < min2) {
    buf[0] = (char)(c | UTF8_cval1);
    return 1;
@ -1314,7 +1318,7 @@ XmlUtf8Encode(int c, char *buf)
    buf[3] = (char)((c & 0x3f) | 0x80);
    return 4;
  }
-  return 0;
+  return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
 }

 int FASTCALL
@ -1465,6 +1469,9 @@ XmlInitUnknownEncoding(void *mem,
    else if (c < 0) {
      if (c < -4)
        return 0;
+      /* Multi-byte sequences need a converter function */
+      if (!convert)
+        return 0;
      e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
      e->utf8[i][0] = 0;
      e->utf16[i] = 0;
--- a/Modules/expat/xmltok_impl.c
+++ b/Modules/expat/xmltok_impl.c
@ -1198,8 +1198,14 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
  const char *start;
  if (ptr >= end)
    return XML_TOK_NONE;
-  else if (! HAS_CHAR(enc, ptr, end))
-    return XML_TOK_PARTIAL;
+  else if (! HAS_CHAR(enc, ptr, end)) {
+    /* This line cannot be executed.  The incoming data has already
+     * been tokenized once, so incomplete characters like this have
+     * already been eliminated from the input.  Retaining the paranoia
+     * check is still valuable, however.
+     */
+    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
+  }
  start = ptr;
  while (HAS_CHAR(enc, ptr, end)) {
    switch (BYTE_TYPE(enc, ptr)) {
@ -1258,8 +1264,14 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
  const char *start;
  if (ptr >= end)
    return XML_TOK_NONE;
-  else if (! HAS_CHAR(enc, ptr, end))
-    return XML_TOK_PARTIAL;
+  else if (! HAS_CHAR(enc, ptr, end)) {
+    /* This line cannot be executed.  The incoming data has already
+     * been tokenized once, so incomplete characters like this have
+     * already been eliminated from the input.  Retaining the paranoia
+     * check is still valuable, however.
+     */
+    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
+  }
  start = ptr;
  while (HAS_CHAR(enc, ptr, end)) {
    switch (BYTE_TYPE(enc, ptr)) {
@ -1614,6 +1626,14 @@ PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
  return 0;
 }

+/* This function does not appear to be called from anywhere within the
+ * library code.  It is used via the macro XmlSameName(), which is
+ * defined but never used.  Since it appears in the encoding function
+ * table, removing it is not a thing to be undertaken lightly.  For
+ * the moment, we simply exclude it from coverage tests.
+ *
+ * LCOV_EXCL_START
+ */
 static int PTRCALL
 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
 {
@ -1677,14 +1697,21 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
  }
  /* not reached */
 }
+/* LCOV_EXCL_STOP */

 static int PTRCALL
 PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
                         const char *end1, const char *ptr2)
 {
  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
-    if (end1 - ptr1 < MINBPC(enc))
-      return 0;
+    if (end1 - ptr1 < MINBPC(enc)) {
+      /* This line cannot be executed.  THe incoming data has already
+       * been tokenized once, so imcomplete characters like this have
+       * already been eliminated from the input.  Retaining the
+       * paranoia check is still valuable, however.
+       */
+      return 0; /* LCOV_EXCL_LINE */
+    }
    if (!CHAR_MATCHES(enc, ptr1, *ptr2))
      return 0;
  }
--- a/PCbuild/_elementtree.vcxproj
+++ b/PCbuild/_elementtree.vcxproj
@ -87,6 +87,7 @@
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\Modules\_elementtree.c" />
+    <ClCompile Include="..\Modules\expat\loadlibrary.c" />
    <ClCompile Include="..\Modules\expat\xmlparse.c" />
    <ClCompile Include="..\Modules\expat\xmlrole.c" />
    <ClCompile Include="..\Modules\expat\xmltok.c" />
--- a/PCbuild/_elementtree.vcxproj.filters
+++ b/PCbuild/_elementtree.vcxproj.filters
@ -33,6 +33,9 @@
    <ClInclude Include="..\Modules\expat\latin1tab.h">
      <Filter>Header Files</Filter>
    </ClInclude>
+    <ClInclude Include="..\Modules\expat\loadlibrary.c">
+      <Filter>Header Files</Filter>
+    </ClInclude>
    <ClInclude Include="..\Modules\expat\macconfig.h">
      <Filter>Header Files</Filter>
    </ClInclude>
--- a/PCbuild/pyexpat.vcxproj
+++ b/PCbuild/pyexpat.vcxproj
@ -68,6 +68,7 @@
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\Modules\pyexpat.c" />
+    <ClCompile Include="..\Modules\expat\loadlibrary.c" />
    <ClCompile Include="..\Modules\expat\xmlparse.c" />
    <ClCompile Include="..\Modules\expat\xmlrole.c" />
    <ClCompile Include="..\Modules\expat\xmltok.c" />
--- a/PCbuild/pyexpat.vcxproj.filters
+++ b/PCbuild/pyexpat.vcxproj.filters
@ -20,6 +20,9 @@
    <ClCompile Include="..\Modules\pyexpat.c">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\Modules\expat\loadlibrary.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
    <ClCompile Include="..\Modules\expat\xmlparse.c">
      <Filter>Source Files</Filter>
    </ClCompile>
--- a/setup.py
+++ b/setup.py
@ -1525,6 +1525,9 @@ class PyBuildExt(build_ext):
            expat_inc = [os.path.join(os.getcwd(), srcdir, 'Modules', 'expat')]
            define_macros = [
                ('HAVE_EXPAT_CONFIG_H', '1'),
+                # bpo-30947: Python uses best available entropy sources to
+                # call XML_SetHashSalt(), expat entropy sources are not needed
+                ('XML_POOR_ENTROPY', '1'),
            ]
            expat_lib = []
            expat_sources = ['expat/xmlparse.c',