From f1e751e933aa8c39c0e9cfa4cdc3f8f9f0538202 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 20 Oct 2023 20:03:11 +0200
Subject: [PATCH] gh-111089: PyUnicode_AsUTF8AndSize() sets size on error
 (#111106)

On error, PyUnicode_AsUTF8AndSize() now sets the size argument to -1,
to avoid undefined value.
---
 Doc/c-api/unicode.rst       | 4 ++--
 Modules/_testcapi/unicode.c | 2 +-
 Objects/unicodeobject.c     | 9 ++++++++-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index d17e63dc089..5fa37963e07 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -971,8 +971,8 @@ These are the UTF-8 codec APIs:
    returned buffer always has an extra null byte appended (not included in
    *size*), regardless of whether there are any other null code points.
 
-   In the case of an error, ``NULL`` is returned with an exception set and no
-   *size* is stored.
+   On error, set an exception, set *size* to ``-1`` (if it's not NULL) and
+   return ``NULL``.
 
    This caches the UTF-8 representation of the string in the Unicode object, and
    subsequent calls will return a pointer to the same buffer.  The caller is not
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
index d52d88a65d8..a10183dddec 100644
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -634,7 +634,7 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
     NULLABLE(unicode);
     s = PyUnicode_AsUTF8AndSize(unicode, &size);
     if (s == NULL) {
-        assert(size == UNINITIALIZED_SIZE);
+        assert(size == -1);
         return NULL;
     }
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 07d1b6e726b..80b19567c63 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3820,17 +3820,24 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
+        if (psize) {
+            *psize = -1;
+        }
         return NULL;
     }
 
     if (PyUnicode_UTF8(unicode) == NULL) {
         if (unicode_fill_utf8(unicode) == -1) {
+            if (psize) {
+                *psize = -1;
+            }
             return NULL;
         }
     }
 
-    if (psize)
+    if (psize) {
         *psize = PyUnicode_UTF8_LENGTH(unicode);
+    }
     return PyUnicode_UTF8(unicode);
 }