Add lzma.{encode,decode}_filter_properties().

2012-05-06 23:01:27 +02:00 · 2012-05-06 23:01:27 +02:00 · f55b329edc
parent 75d5d8c781
commit f55b329edc
4 changed files with 252 additions and 4 deletions
--- a/Doc/library/lzma.rst
+++ b/Doc/library/lzma.rst
@ -235,6 +235,32 @@ Miscellaneous
   feature set.
 .. function:: encode_filter_properties(filter)
   Return a :class:`bytes` object encoding the options (properties) of the
   filter specified by *filter* (a dictionary).
   *filter* is interpreted as a filter specifier, as described in
   :ref:`filter-chain-specs`.
   The returned data does not include the filter ID itself, only the options.
   This function is primarily of interest to users implementing custom file
   formats.
 .. function:: decode_filter_properties(filter_id, encoded_props)
   Return a dictionary describing a filter with ID *filter_id*, and options
   (properties) decoded from the :class:`bytes` object *encoded_props*.
   The returned dictionary is a filter specifier, as described in
   :ref:`filter-chain-specs`.
   This function is primarily of interest to users implementing custom file
   formats.
 .. _filter-chain-specs:
 Specifying custom filter chains
--- a/Lib/lzma.py
+++ b/Lib/lzma.py
@ -19,6 +19,7 @@ __all__ = [
    "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
    "compress", "decompress", "check_is_supported",
    "encode_filter_properties", "decode_filter_properties",
 ]
 import io
--- a/Lib/test/test_lzma.py
+++ b/Lib/test/test_lzma.py
@ -944,6 +944,49 @@ class MiscellaneousTestCase(unittest.TestCase):
        # This value should not be a valid check ID.
        self.assertFalse(lzma.check_is_supported(lzma.CHECK_UNKNOWN))
    def test_encode_filter_properties(self):
        with self.assertRaises(TypeError):
            lzma.encode_filter_properties(b"not a dict")
        with self.assertRaises(ValueError):
            lzma.encode_filter_properties({"id": 0x100})
        with self.assertRaises(ValueError):
            lzma.encode_filter_properties({"id": lzma.FILTER_LZMA2, "junk": 12})
        with self.assertRaises(lzma.LZMAError):
            lzma.encode_filter_properties({"id": lzma.FILTER_DELTA,
                                           "dist": 9001})
        # Test with parameters used by zipfile module.
        props = lzma.encode_filter_properties({
                "id": lzma.FILTER_LZMA1,
                "pb": 2,
                "lp": 0,
                "lc": 3,
                "dict_size": 8 << 20,
            })
        self.assertEqual(props, b"]\x00\x00\x80\x00")
    def test_decode_filter_properties(self):
        with self.assertRaises(TypeError):
            lzma.decode_filter_properties(lzma.FILTER_X86, {"should be": bytes})
        with self.assertRaises(lzma.LZMAError):
            lzma.decode_filter_properties(lzma.FILTER_DELTA, b"too long")
        # Test with parameters used by zipfile module.
        filterspec = lzma.decode_filter_properties(
                lzma.FILTER_LZMA1, b"]\x00\x00\x80\x00")
        self.assertEqual(filterspec["id"], lzma.FILTER_LZMA1)
        self.assertEqual(filterspec["pb"], 2)
        self.assertEqual(filterspec["lp"], 0)
        self.assertEqual(filterspec["lc"], 3)
        self.assertEqual(filterspec["dict_size"], 8 << 20)
    def test_filter_properties_roundtrip(self):
        spec1 = lzma.decode_filter_properties(
                lzma.FILTER_LZMA1, b"]\x00\x00\x80\x00")
        reencoded = lzma.encode_filter_properties(spec1)
        spec2 = lzma.decode_filter_properties(lzma.FILTER_LZMA1, reencoded)
        self.assertEqual(spec1, spec2)
 # Test data:
--- a/Modules/_lzmamodule.c
+++ b/Modules/_lzmamodule.c
@ -137,6 +137,9 @@ grow_buffer(PyObject **buf)
      uint32_t - the "I" (unsigned int) specifier is the right size, but
      silently ignores overflows on conversion.
      lzma_vli - the "K" (unsigned PY_LONG_LONG) specifier is the right
      size, but like "I" it silently ignores overflows on conversion.
      lzma_mode and lzma_match_finder - these are enumeration types, and
      so the size of each is implementation-defined. Worse, different
      enum types can be of different sizes within the same program, so
@ -147,12 +150,12 @@ grow_buffer(PyObject **buf)
    static int \
    FUNCNAME(PyObject *obj, void *ptr) \
    { \
-        unsigned long val; \
+        unsigned PY_LONG_LONG val; \
        \
-        val = PyLong_AsUnsignedLong(obj); \
+        val = PyLong_AsUnsignedLongLong(obj); \
        if (PyErr_Occurred()) \
            return 0; \
-        if ((unsigned long)(TYPE)val != val) { \
+        if ((unsigned PY_LONG_LONG)(TYPE)val != val) { \
            PyErr_SetString(PyExc_OverflowError, \
                            "Value too large for " #TYPE " type"); \
            return 0; \
@ -162,13 +165,17 @@ grow_buffer(PyObject **buf)
    }
 INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
 INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
 INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
 INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
 #undef INT_TYPE_CONVERTER_FUNC
-/* Filter specifier parsing functions. */
+/* Filter specifier parsing.
   This code handles converting filter specifiers (Python dicts) into
   the C lzma_filter structs expected by liblzma. */
 static void *
 parse_filter_spec_lzma(PyObject *spec)
@ -358,6 +365,88 @@ parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs)
 }
 /* Filter specifier construction.
   This code handles converting C lzma_filter structs into
   Python-level filter specifiers (represented as dicts). */
 static int
 spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned PY_LONG_LONG value)
 {
    int status;
    PyObject *value_object;
    value_object = PyLong_FromUnsignedLongLong(value);
    if (value_object == NULL)
        return -1;
    status = _PyDict_SetItemId(spec, key, value_object);
    Py_DECREF(value_object);
    return status;
 }
 static PyObject *
 build_filter_spec(const lzma_filter *f)
 {
    PyObject *spec;
    spec = PyDict_New();
    if (spec == NULL)
        return NULL;
 #define ADD_FIELD(SOURCE, FIELD) \
    do { \
        _Py_IDENTIFIER(FIELD); \
        if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
            goto error;\
    } while (0)
    ADD_FIELD(f, id);
    switch (f->id) {
        case LZMA_FILTER_LZMA1:
        case LZMA_FILTER_LZMA2: {
            lzma_options_lzma *options = f->options;
            ADD_FIELD(options, dict_size);
            ADD_FIELD(options, lc);
            ADD_FIELD(options, lp);
            ADD_FIELD(options, pb);
            ADD_FIELD(options, mode);
            ADD_FIELD(options, nice_len);
            ADD_FIELD(options, mf);
            ADD_FIELD(options, depth);
            break;
        }
        case LZMA_FILTER_DELTA: {
            lzma_options_delta *options = f->options;
            ADD_FIELD(options, dist);
            break;
        }
        case LZMA_FILTER_X86:
        case LZMA_FILTER_POWERPC:
        case LZMA_FILTER_IA64:
        case LZMA_FILTER_ARM:
        case LZMA_FILTER_ARMTHUMB:
        case LZMA_FILTER_SPARC: {
            lzma_options_bcj *options = f->options;
            ADD_FIELD(options, start_offset);
            break;
        }
        default:
            PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
            goto error;
    }
 #undef ADD_FIELD
    return spec;
 error:
    Py_DECREF(spec);
    return NULL;
 }
 /* LZMACompressor class. */
 static PyObject *
@ -1005,11 +1094,100 @@ check_is_supported(PyObject *self, PyObject *args)
 }
 PyDoc_STRVAR(encode_filter_properties_doc,
 "encode_filter_properties(filter) -> bytes\n"
 "\n"
 "Return a bytes object encoding the options (properties) of the filter\n"
 "specified by *filter* (a dict).\n"
 "\n"
 "The result does not include the filter ID itself, only the options.\n"
 "\n"
 "This function is primarily of interest to users implementing custom\n"
 "file formats.\n");
 static PyObject *
 encode_filter_properties(PyObject *self, PyObject *args)
 {
    PyObject *filterspec;
    lzma_filter filter;
    lzma_ret lzret;
    uint32_t encoded_size;
    PyObject *result = NULL;
    if (!PyArg_ParseTuple(args, "O:encode_filter_properties", &filterspec))
        return NULL;
    if (parse_filter_spec(&filter, filterspec) == NULL)
        return NULL;
    lzret = lzma_properties_size(&encoded_size, &filter);
    if (catch_lzma_error(lzret))
        goto error;
    result = PyBytes_FromStringAndSize(NULL, encoded_size);
    if (result == NULL)
        goto error;
    lzret = lzma_properties_encode(
            &filter, (uint8_t *)PyBytes_AS_STRING(result));
    if (catch_lzma_error(lzret))
        goto error;
    PyMem_Free(filter.options);
    return result;
 error:
    Py_XDECREF(result);
    PyMem_Free(filter.options);
    return NULL;
 }
 PyDoc_STRVAR(decode_filter_properties_doc,
 "decode_filter_properties(filter_id, encoded_props) -> dict\n"
 "\n"
 "Return a dict describing a filter with ID *filter_id*, and options\n"
 "(properties) decoded from the bytes object *encoded_props*.\n"
 "\n"
 "This function is primarily of interest to users implementing custom\n"
 "file formats.\n");
 static PyObject *
 decode_filter_properties(PyObject *self, PyObject *args)
 {
    Py_buffer encoded_props;
    lzma_filter filter;
    lzma_ret lzret;
    PyObject *result = NULL;
    if (!PyArg_ParseTuple(args, "O&y*:decode_filter_properties",
                          lzma_vli_converter, &filter.id, &encoded_props))
        return NULL;
    lzret = lzma_properties_decode(
            &filter, NULL, encoded_props.buf, encoded_props.len);
    PyBuffer_Release(&encoded_props);
    if (catch_lzma_error(lzret))
        return NULL;
    result = build_filter_spec(&filter);
    /* We use vanilla free() here instead of PyMem_Free() - filter.options was
       allocated by lzma_properties_decode() using the default allocator. */
    free(filter.options);
    return result;
 }
 /* Module initialization. */
 static PyMethodDef module_methods[] = {
    {"check_is_supported", (PyCFunction)check_is_supported,
     METH_VARARGS, check_is_supported_doc},
    {"encode_filter_properties", (PyCFunction)encode_filter_properties,
     METH_VARARGS, encode_filter_properties_doc},
    {"decode_filter_properties", (PyCFunction)decode_filter_properties,
     METH_VARARGS, decode_filter_properties_doc},
    {NULL}
 };