Merge tag 'samba-4.19.1' into v4-19-stable
[Samba.git] / lib / compression / pycompression.c
blob3be3620b1cf145c4837abbd3b55a45719d1a4eb4
1 /*
2 Samba Unix SMB/CIFS implementation.
4 Python bindings for compression functions.
6 Copyright (C) Petr Viktorin 2015
7 Copyright (C) Douglas Bagnall 2022
9 ** NOTE! The following LGPL license applies to the talloc
10 ** library. This does NOT imply that all of Samba is released
11 ** under the LGPL
13 This library is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Lesser General Public
15 License as published by the Free Software Foundation; either
16 version 3 of the License, or (at your option) any later version.
18 This library is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
23 You should have received a copy of the GNU Lesser General Public
24 License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 #include "includes.h"
28 #include <talloc.h>
29 #include <Python.h>
30 #include "lzxpress.h"
31 #include "lzxpress_huffman.h"
33 /* CompressionError is filled out in module init */
34 static PyObject *CompressionError = NULL;
36 static PyObject *plain_compress(PyObject *mod, PyObject *args)
38 uint8_t *src = NULL;
39 Py_ssize_t src_len;
40 char *dest = NULL;
41 Py_ssize_t dest_len;
42 PyObject *dest_obj = NULL;
43 size_t alloc_len;
44 int ret;
46 if (!PyArg_ParseTuple(args, "s#", &src, &src_len)) {
47 return NULL;
51 * 9/8 + 4 is the worst case growth, but we add room.
53 * alloc_len can't overflow as src_len is ssize_t while alloc_len is
54 * size_t.
56 alloc_len = src_len + src_len / 8 + 500;
58 dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len);
59 if (dest_obj == NULL) {
60 return NULL;
62 dest = PyBytes_AS_STRING(dest_obj);
64 dest_len = lzxpress_compress(src,
65 src_len,
66 (uint8_t *)dest,
67 alloc_len);
68 if (dest_len < 0) {
69 PyErr_SetString(CompressionError, "unable to compress data");
70 Py_DECREF(dest_obj);
71 return NULL;
74 ret = _PyBytes_Resize(&dest_obj, dest_len);
75 if (ret != 0) {
77 * Don't try to free dest_obj, as we're in deep MemoryError
78 * territory here.
80 return NULL;
82 return dest_obj;
86 static PyObject *plain_decompress(PyObject *mod, PyObject *args)
88 uint8_t *src = NULL;
89 Py_ssize_t src_len;
90 char *dest = NULL;
91 Py_ssize_t dest_len;
92 PyObject *dest_obj = NULL;
93 Py_ssize_t alloc_len = 0;
94 Py_ssize_t given_len = 0;
95 int ret;
97 if (!PyArg_ParseTuple(args, "s#|n", &src, &src_len, &given_len)) {
98 return NULL;
100 if (given_len != 0) {
102 * With plain decompression, we don't *need* the exact output
103 * size (as we do with LZ77+Huffman), but it certainly helps
104 * when guessing the size.
106 alloc_len = given_len;
107 } else if (src_len > UINT32_MAX) {
109 * The underlying decompress function will reject this, but by
110 * checking here we can give a better message and be clearer
111 * about overflow risks.
113 * Note, the limit is actually the smallest of UINT32_MAX and
114 * SSIZE_MAX, but src_len is ssize_t so it already can't
115 * exceed that.
117 PyErr_Format(CompressionError,
118 "The maximum size for compressed data is 4GB "
119 "cannot decompress %zu bytes.", src_len);
120 } else {
122 * The data can expand massively (though not beyond the
123 * 4GB limit) so we guess a big number for small inputs
124 * (we expect small inputs), and a relatively conservative
125 * number for big inputs.
127 if (src_len <= 3333333) {
128 alloc_len = 10000000;
129 } else if (src_len > UINT32_MAX / 3) {
130 alloc_len = UINT32_MAX;
131 } else {
132 alloc_len = src_len * 3;
136 dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len);
137 if (dest_obj == NULL) {
138 return NULL;
140 dest = PyBytes_AS_STRING(dest_obj);
142 dest_len = lzxpress_decompress(src,
143 src_len,
144 (uint8_t *)dest,
145 alloc_len);
146 if (dest_len < 0) {
147 if (alloc_len == given_len) {
148 PyErr_Format(CompressionError,
149 "unable to decompress data into a buffer "
150 "of %zd bytes.", alloc_len);
151 } else {
152 PyErr_Format(CompressionError,
153 "unable to decompress data into a buffer "
154 "of %zd bytes. If you know the length, "
155 "supply it as the second argument.",
156 alloc_len);
158 Py_DECREF(dest_obj);
159 return NULL;
162 ret = _PyBytes_Resize(&dest_obj, dest_len);
163 if (ret != 0) {
165 * Don't try to free dest_obj, as we're in deep MemoryError
166 * territory here.
168 return NULL;
170 return dest_obj;
175 static PyObject *huffman_compress(PyObject *mod, PyObject *args)
177 uint8_t *src = NULL;
178 Py_ssize_t src_len;
179 char *dest = NULL;
180 Py_ssize_t dest_len;
181 PyObject *dest_obj = NULL;
182 size_t alloc_len;
183 int ret;
184 struct lzxhuff_compressor_mem cmp_mem;
186 if (!PyArg_ParseTuple(args, "s#", &src, &src_len)) {
187 return NULL;
190 * worst case is roughly 256 per 64k or less.
192 * alloc_len won't overflow as src_len is ssize_t while alloc_len is
193 * size_t.
195 alloc_len = src_len + src_len / 8 + 500;
197 dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len);
198 if (dest_obj == NULL) {
199 return NULL;
201 dest = PyBytes_AS_STRING(dest_obj);
203 dest_len = lzxpress_huffman_compress(&cmp_mem,
204 src,
205 src_len,
206 (uint8_t *)dest,
207 alloc_len);
208 if (dest_len < 0) {
209 PyErr_SetString(CompressionError, "unable to compress data");
210 Py_DECREF(dest_obj);
211 return NULL;
214 ret = _PyBytes_Resize(&dest_obj, dest_len);
215 if (ret != 0) {
216 return NULL;
218 return dest_obj;
222 static PyObject *huffman_decompress(PyObject *mod, PyObject *args)
224 uint8_t *src = NULL;
225 Py_ssize_t src_len;
226 char *dest = NULL;
227 Py_ssize_t dest_len;
228 PyObject *dest_obj = NULL;
229 Py_ssize_t given_len = 0;
231 * Here it is always necessary to supply the exact length.
234 if (!PyArg_ParseTuple(args, "s#n", &src, &src_len, &given_len)) {
235 return NULL;
238 dest_obj = PyBytes_FromStringAndSize(NULL, given_len);
239 if (dest_obj == NULL) {
240 return NULL;
242 dest = PyBytes_AS_STRING(dest_obj);
244 dest_len = lzxpress_huffman_decompress(src,
245 src_len,
246 (uint8_t *)dest,
247 given_len);
248 if (dest_len != given_len) {
249 PyErr_Format(CompressionError,
250 "unable to decompress data into a %zd bytes.",
251 given_len);
252 Py_DECREF(dest_obj);
253 return NULL;
255 /* no resize here */
256 return dest_obj;
260 static PyMethodDef mod_methods[] = {
261 { "plain_compress", (PyCFunction)plain_compress, METH_VARARGS,
262 "compress bytes using lzxpress plain compression"},
263 { "plain_decompress", (PyCFunction)plain_decompress, METH_VARARGS,
264 "decompress lzxpress plain compressed bytes"},
265 { "huffman_compress", (PyCFunction)huffman_compress, METH_VARARGS,
266 "compress bytes using lzxpress plain compression"},
267 { "huffman_decompress", (PyCFunction)huffman_decompress, METH_VARARGS,
268 "decompress lzxpress plain compressed bytes"},
273 #define MODULE_DOC PyDoc_STR("LZXpress compression/decompression bindings")
275 static struct PyModuleDef moduledef = {
276 PyModuleDef_HEAD_INIT,
277 .m_name = "compression",
278 .m_doc = MODULE_DOC,
279 .m_size = -1,
280 .m_methods = mod_methods,
284 static PyObject *module_init(void)
286 PyObject *m = PyModule_Create(&moduledef);
287 if (m == NULL) {
288 return NULL;
291 CompressionError = PyErr_NewException(
292 "compression.CompressionError",
293 PyExc_Exception,
294 NULL);
295 PyModule_AddObject(m, "CompressionError", CompressionError);
297 return m;
300 PyMODINIT_FUNC PyInit_compression(void);
301 PyMODINIT_FUNC PyInit_compression(void)
303 return module_init();