From 0dcf67e56d891832b53a82ee0abb60dcc2e0148e Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Tue, 26 Jun 2001 20:01:56 +0000 Subject: [PATCH] more unicode tweaks: make unichr(0xdddddddd) behave like u"\Udddddddd" wrt surrogates. (this extends the valid range from 65535 to 1114111) --- Python/bltinmodule.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 4da984f8da4..ed5519f2574 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -308,23 +308,34 @@ static PyObject * builtin_unichr(PyObject *self, PyObject *args) { long x; - Py_UNICODE s[1]; + Py_UNICODE s[2]; if (!PyArg_ParseTuple(args, "l:unichr", &x)) return NULL; - if (x < 0 || x >= 65536) { + + if (x < 0 || x > 0x10ffff) { PyErr_SetString(PyExc_ValueError, - "unichr() arg not in range(65536)"); + "unichr() arg not in range(0x10ffff)"); return NULL; } - s[0] = (Py_UNICODE)x; - return PyUnicode_FromUnicode(s, 1); + + if (x <= 0xffff) { + /* UCS-2 character */ + s[0] = (Py_UNICODE) x; + return PyUnicode_FromUnicode(s, 1); + } else { + /* UCS-4 character. store as two surrogate characters */ + x -= 0x10000L; + s[0] = 0xD800 + (Py_UNICODE) (x >> 10); + s[1] = 0xDC00 + (Py_UNICODE) (x & 0x03FF); + return PyUnicode_FromUnicode(s, 2); + } } static char unichr_doc[] = "unichr(i) -> Unicode character\n\ \n\ -Return a Unicode string of one character with ordinal i; 0 <= i < 65536."; +Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."; static PyObject *