Issue #8603: Create a bytes version of os.environ for Unix

Create os.environb mapping and os.getenvb() function, os.unsetenv() encodes str
argument to the file system encoding with the surrogateescape error handler
(instead of utf8/strict) and accepts bytes, and posix.environ keys and values
are bytes.
This commit is contained in:
Victor Stinner 2010-05-06 22:05:07 +00:00
parent d930b63583
commit 84ae118006
7 changed files with 190 additions and 54 deletions

View File

@ -107,6 +107,10 @@ process and user.
to modify the environment as well as query the environment. :func:`putenv` will
be called automatically when the mapping is modified.
On Unix, keys and values use :func:`sys.getfilesystemencoding` and
``'surrogateescape'`` error handler. Use :data:`environb` if you would like
to use a different encoding.
.. note::
Calling :func:`putenv` directly does not change ``os.environ``, so it's better
@ -128,6 +132,16 @@ process and user.
one of the :meth:`pop` or :meth:`clear` methods is called.
.. data:: environb
Bytes version of :data:`environ`: a mapping object representing the
environment as byte strings. :data:`environ` and :data:`environb` are
synchronized (modify :data:`environb` updates :data:`environ`, and vice
versa).
Availability: Unix.
.. function:: chdir(path)
fchdir(fd)
getcwd()
@ -251,7 +265,19 @@ process and user.
.. function:: getenv(key, default=None)
Return the value of the environment variable *key* if it exists, or
*default* if it doesn't. Availability: most flavors of Unix, Windows.
*default* if it doesn't. *key*, *default* and the result are str.
Availability: most flavors of Unix, Windows.
On Unix, keys and values are decoded with :func:`sys.getfilesystemencoding`
and ``'surrogateescape'`` error handler. Use :func:`os.getenvb` if you
would like to use a different encoding.
.. function:: getenvb(key, default=None)
Return the value of the environment variable *key* if it exists, or
*default* if it doesn't. *key*, *default* and the result are bytes.
Availability: most flavors of Unix.
.. function:: putenv(key, value)

View File

@ -69,17 +69,22 @@ In addition to many functions described in the :mod:`os` module documentation,
.. data:: environ
A dictionary representing the string environment at the time the interpreter
was started. For example, ``environ['HOME']`` is the pathname of your home
directory, equivalent to ``getenv("HOME")`` in C.
was started. Keys and values are bytes on Unix and str on Windows. For
example, ``environ[b'HOME']`` (``environ['HOME']`` on Windows) is the
pathname of your home directory, equivalent to ``getenv("HOME")`` in C.
Modifying this dictionary does not affect the string environment passed on by
:func:`execv`, :func:`popen` or :func:`system`; if you need to change the
environment, pass ``environ`` to :func:`execve` or add variable assignments and
export statements to the command string for :func:`system` or :func:`popen`.
.. versionchanged:: 3.2
On Unix, keys and values are bytes.
.. note::
The :mod:`os` module provides an alternate implementation of ``environ`` which
updates the environment on modification. Note also that updating ``os.environ``
will render this dictionary obsolete. Use of the :mod:`os` module version of
this is recommended over direct access to the :mod:`posix` module.
The :mod:`os` module provides an alternate implementation of ``environ``
which updates the environment on modification. Note also that updating
:data:`os.environ` will render this dictionary obsolete. Use of the
:mod:`os` module version of this is recommended over direct access to the
:mod:`posix` module.

View File

@ -387,29 +387,33 @@ def get_exec_path(env=None):
from _abcoll import MutableMapping # Can't use collections (bootstrap)
class _Environ(MutableMapping):
def __init__(self, environ, keymap, putenv, unsetenv):
self.keymap = keymap
def __init__(self, data, encodekey, decodekey, encodevalue, decodevalue, putenv, unsetenv):
self.encodekey = encodekey
self.decodekey = decodekey
self.encodevalue = encodevalue
self.decodevalue = decodevalue
self.putenv = putenv
self.unsetenv = unsetenv
self.data = data = {}
for key, value in environ.items():
data[keymap(key)] = str(value)
self.data = data
def __getitem__(self, key):
return self.data[self.keymap(key)]
value = self.data[self.encodekey(key)]
return self.decodevalue(value)
def __setitem__(self, key, value):
value = str(value)
key = self.encodekey(key)
value = self.encodevalue(value)
self.putenv(key, value)
self.data[self.keymap(key)] = value
self.data[key] = value
def __delitem__(self, key):
key = self.encodekey(key)
self.unsetenv(key)
del self.data[self.keymap(key)]
del self.data[key]
def __iter__(self):
for key in self.data:
yield key
yield self.decodekey(key)
def __len__(self):
return len(self.data)
@ -439,22 +443,67 @@ except NameError:
else:
__all__.append("unsetenv")
if name in ('os2', 'nt'): # Where Env Var Names Must Be UPPERCASE
_keymap = lambda key: str(key.upper())
else: # Where Env Var Names Can Be Mixed Case
_keymap = lambda key: str(key)
def _createenviron():
if name in ('os2', 'nt'):
# Where Env Var Names Must Be UPPERCASE
def check_str(value):
if not isinstance(value, str):
raise TypeError("str expected, not %s" % type(value).__name__)
return value
encode = check_str
decode = str
def encodekey(key):
return encode(key).upper()
data = {}
for key, value in environ.items():
data[encodekey(key)] = value
else:
# Where Env Var Names Can Be Mixed Case
def encode(value):
if not isinstance(value, str):
raise TypeError("str expected, not %s" % type(value).__name__)
return value.encode(sys.getfilesystemencoding(), 'surrogateescape')
def decode(value):
return value.decode(sys.getfilesystemencoding(), 'surrogateescape')
encodekey = encode
data = environ
return _Environ(data,
encodekey, decode,
encode, decode,
_putenv, _unsetenv)
environ = _Environ(environ, _keymap, _putenv, _unsetenv)
# unicode environ
environ = _createenviron()
del _createenviron
def getenv(key, default=None):
"""Get an environment variable, return None if it doesn't exist.
The optional second argument can specify an alternate default."""
if isinstance(key, bytes):
key = key.decode(sys.getfilesystemencoding(), "surrogateescape")
The optional second argument can specify an alternate default.
key, default and the result are str."""
return environ.get(key, default)
__all__.append("getenv")
if name not in ('os2', 'nt'):
def _check_bytes(value):
if not isinstance(value, bytes):
raise TypeError("bytes expected, not %s" % type(value).__name__)
return value
# bytes environ
environb = _Environ(environ.data,
_check_bytes, bytes,
_check_bytes, bytes,
_putenv, _unsetenv)
del _check_bytes
def getenvb(key, default=None):
"""Get an environment variable, return None if it doesn't exist.
The optional second argument can specify an alternate default.
key, default and the result are bytes."""
return environb.get(key, default)
__all__.append("getenvb")
def _exists(name):
return name in globals()

View File

@ -369,12 +369,15 @@ class EnvironTests(mapping_tests.BasicTestMappingProtocol):
def setUp(self):
self.__save = dict(os.environ)
self.__saveb = dict(os.environb)
for key, value in self._reference().items():
os.environ[key] = value
def tearDown(self):
os.environ.clear()
os.environ.update(self.__save)
os.environb.clear()
os.environb.update(self.__saveb)
def _reference(self):
return {"KEY1":"VALUE1", "KEY2":"VALUE2", "KEY3":"VALUE3"}
@ -439,6 +442,24 @@ class EnvironTests(mapping_tests.BasicTestMappingProtocol):
# Supplied PATH environment variable
self.assertSequenceEqual(test_path, os.get_exec_path(test_env))
@unittest.skipIf(sys.platform == "win32", "POSIX specific test")
def test_environb(self):
# os.environ -> os.environb
value = 'euro\u20ac'
try:
value_bytes = value.encode(sys.getfilesystemencoding(), 'surrogateescape')
except UnicodeEncodeError:
raise unittest.SkipTest("U+20AC character is not encodable to %s" % sys.getfilesystemencoding())
os.environ['unicode'] = value
self.assertEquals(os.environ['unicode'], value)
self.assertEquals(os.environb[b'unicode'], value_bytes)
# os.environb -> os.environ
value = b'\xff'
os.environb[b'bytes'] = value
self.assertEquals(os.environb[b'bytes'], value)
value_str = value.decode(sys.getfilesystemencoding(), 'surrogateescape')
self.assertEquals(os.environ['bytes'], value_str)
class WalkTests(unittest.TestCase):
"""Tests for os.walk()."""

View File

@ -803,8 +803,6 @@ class POSIXProcessTestCase(BaseTestCase):
def test_undecodable_env(self):
for key, value in (('test', 'abc\uDCFF'), ('test\uDCFF', '42')):
value_repr = repr(value).encode("ascii")
# test str with surrogates
script = "import os; print(repr(os.getenv(%s)))" % repr(key)
env = os.environ.copy()
@ -813,19 +811,19 @@ class POSIXProcessTestCase(BaseTestCase):
[sys.executable, "-c", script],
env=env)
stdout = stdout.rstrip(b'\n\r')
self.assertEquals(stdout, value_repr)
self.assertEquals(stdout.decode('ascii'), repr(value))
# test bytes
key = key.encode("ascii", "surrogateescape")
value = value.encode("ascii", "surrogateescape")
script = "import os; print(repr(os.getenv(%s)))" % repr(key)
script = "import os; print(repr(os.getenvb(%s)))" % repr(key)
env = os.environ.copy()
env[key] = value
stdout = subprocess.check_output(
[sys.executable, "-c", script],
env=env)
stdout = stdout.rstrip(b'\n\r')
self.assertEquals(stdout, value_repr)
self.assertEquals(stdout.decode('ascii'), repr(value))
@unittest.skipUnless(mswindows, "Windows specific tests")

View File

@ -348,6 +348,12 @@ C-API
Library
-------
- Issue #8603: Create a bytes version of os.environ for Unix: create
os.environb mapping and os.getenvb() function, os.unsetenv() encodes str
argument to the file system encoding with the surrogateescape error handler
(instead of utf8/strict) and accepts bytes, and posix.environ keys and values
are bytes.
- Issue #8573: asyncore _strerror() function might throw ValueError.
- Issue #8483: asyncore.dispatcher's __getattr__ method produced confusing

View File

@ -498,14 +498,12 @@ convertenviron(void)
char *p = strchr(*e, '=');
if (p == NULL)
continue;
k = PyUnicode_Decode(*e, (int)(p-*e),
Py_FileSystemDefaultEncoding, "surrogateescape");
k = PyBytes_FromStringAndSize(*e, (int)(p-*e));
if (k == NULL) {
PyErr_Clear();
continue;
}
v = PyUnicode_Decode(p+1, strlen(p+1),
Py_FileSystemDefaultEncoding, "surrogateescape");
v = PyBytes_FromStringAndSize(p+1, strlen(p+1));
if (v == NULL) {
PyErr_Clear();
Py_DECREF(k);
@ -5301,7 +5299,7 @@ posix_putenv(PyObject *self, PyObject *args)
char *s1, *s2;
char *newenv;
#endif
PyObject *newstr;
PyObject *newstr = NULL;
size_t len;
#ifdef MS_WINDOWS
@ -5324,15 +5322,19 @@ posix_putenv(PyObject *self, PyObject *args)
APIRET rc;
rc = DosSetExtLIBPATH(s2, BEGIN_LIBPATH);
if (rc != NO_ERROR)
return os2_error(rc);
if (rc != NO_ERROR) {
os2_error(rc);
goto error;
}
} else if (stricmp(s1, "ENDLIBPATH") == 0) {
APIRET rc;
rc = DosSetExtLIBPATH(s2, END_LIBPATH);
if (rc != NO_ERROR)
return os2_error(rc);
if (rc != NO_ERROR) {
os2_error(rc);
goto error;
}
} else {
#endif
/* XXX This can leak memory -- not easy to fix :-( */
@ -5342,36 +5344,40 @@ posix_putenv(PyObject *self, PyObject *args)
len = wcslen(s1) + wcslen(s2) + 2;
newstr = PyUnicode_FromUnicode(NULL, (int)len - 1);
#else
len = strlen(s1) + strlen(s2) + 2;
len = PyBytes_GET_SIZE(os1) + PyBytes_GET_SIZE(os2) + 2;
newstr = PyBytes_FromStringAndSize(NULL, (int)len - 1);
#endif
if (newstr == NULL)
return PyErr_NoMemory();
if (newstr == NULL) {
PyErr_NoMemory();
goto error;
}
#ifdef MS_WINDOWS
newenv = PyUnicode_AsUnicode(newstr);
_snwprintf(newenv, len, L"%s=%s", s1, s2);
if (_wputenv(newenv)) {
Py_DECREF(newstr);
posix_error();
return NULL;
goto error;
}
#else
newenv = PyBytes_AS_STRING(newstr);
PyOS_snprintf(newenv, len, "%s=%s", s1, s2);
if (putenv(newenv)) {
Py_DECREF(newstr);
Py_DECREF(os1);
Py_DECREF(os2);
posix_error();
return NULL;
goto error;
}
#endif
/* Install the first arg and newstr in posix_putenv_garbage;
* this will cause previous value to be collected. This has to
* happen after the real putenv() call because the old value
* was still accessible until then. */
if (PyDict_SetItem(posix_putenv_garbage,
PyTuple_GET_ITEM(args, 0), newstr)) {
#ifdef MS_WINDOWS
PyTuple_GET_ITEM(args, 0),
#else
os1,
#endif
newstr)) {
/* really not much we can do; just leak */
PyErr_Clear();
}
@ -5382,12 +5388,20 @@ posix_putenv(PyObject *self, PyObject *args)
#if defined(PYOS_OS2)
}
#endif
#ifndef MS_WINDOWS
Py_DECREF(os1);
Py_DECREF(os2);
#endif
Py_INCREF(Py_None);
return Py_None;
Py_RETURN_NONE;
error:
#ifndef MS_WINDOWS
Py_DECREF(os1);
Py_DECREF(os2);
#endif
Py_XDECREF(newstr);
return NULL;
}
#endif /* putenv */
@ -5399,10 +5413,20 @@ Delete an environment variable.");
static PyObject *
posix_unsetenv(PyObject *self, PyObject *args)
{
#ifdef MS_WINDOWS
char *s1;
if (!PyArg_ParseTuple(args, "s:unsetenv", &s1))
return NULL;
#else
PyObject *os1;
char *s1;
if (!PyArg_ParseTuple(args, "O&:unsetenv",
PyUnicode_FSConverter, &os1))
return NULL;
s1 = PyBytes_AsString(os1);
#endif
unsetenv(s1);
@ -5412,13 +5436,20 @@ posix_unsetenv(PyObject *self, PyObject *args)
* old value was still accessible until then.
*/
if (PyDict_DelItem(posix_putenv_garbage,
PyTuple_GET_ITEM(args, 0))) {
#ifdef MS_WINDOWS
PyTuple_GET_ITEM(args, 0)
#else
os1
#endif
)) {
/* really not much we can do; just leak */
PyErr_Clear();
}
Py_INCREF(Py_None);
return Py_None;
#ifndef MS_WINDOWS
Py_DECREF(os1);
#endif
Py_RETURN_NONE;
}
#endif /* unsetenv */