r23784: use the GPLv3 boilerplate as recommended by the FSF and the license text
[Samba/bb.git] / source / python / py_tdbpack.c
blobe504f30b863605a571fc8bccdf53bfbf89be003f
1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
3 Python wrapper for Samba tdb pack/unpack functions
4 Copyright (C) Martin Pool 2002, 2003
7 NOTE PYTHON STYLE GUIDE
8 http://www.python.org/peps/pep-0007.html
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program. If not, see <http://www.gnu.org/licenses/>.
25 #include "Python.h"
27 /* This symbol is used in both config.h and Python.h which causes an
28 annoying compiler warning. */
30 #ifdef HAVE_FSTAT
31 #undef HAVE_FSTAT
32 #endif
34 /* This module is supposed to be standalone, however for portability
35 it would be good to use the FUNCTION_MACRO preprocessor define. */
37 #include "include/config.h"
39 #ifdef HAVE_FUNCTION_MACRO
40 #define FUNCTION_MACRO (__FUNCTION__)
41 #else
42 #define FUNCTION_MACRO (__FILE__)
43 #endif
45 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
46 static PyObject * pytdbpack_str(char ch,
47 PyObject *val_iter, PyObject *packed_list,
48 const char *encoding);
49 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
51 static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
53 static PyObject *pytdbpack_data(const char *format_str,
54 PyObject *val_seq,
55 PyObject *val_list);
57 static PyObject *
58 pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
60 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
63 static PyObject *pytdbpack_bad_type(char ch,
64 const char *expected,
65 PyObject *val_obj);
67 static const char * pytdbpack_docstring =
68 "Convert between Python values and Samba binary encodings.\n"
69 "\n"
70 "This module is conceptually similar to the standard 'struct' module, but it\n"
71 "uses both a different binary format and a different description string.\n"
72 "\n"
73 "Samba's encoding is based on that used inside DCE-RPC and SMB: a\n"
74 "little-endian, unpadded, non-self-describing binary format. It is intended\n"
75 "that these functions be as similar as possible to the routines in Samba's\n"
76 "tdb/tdbutil module, with appropriate adjustments for Python datatypes.\n"
77 "\n"
78 "Python strings are used to specify the format of data to be packed or\n"
79 "unpacked.\n"
80 "\n"
81 "String encodings are implied by the database format: they may be either DOS\n"
82 "codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded\n"
83 "to be the same as the default Python encoding).\n"
84 "\n"
85 "tdbpack format strings:\n"
86 "\n"
87 " 'f': NUL-terminated string in codepage iso8859-1\n"
88 " \n"
89 " 'P': same as 'f'\n"
90 "\n"
91 " 'F': NUL-terminated string in iso-8859-1\n"
92 "\n"
93 " 'd': 4 byte little-endian unsigned number\n"
94 "\n"
95 " 'w': 2 byte little-endian unsigned number\n"
96 "\n"
97 " 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is\n"
98 " really just an \"exists\" or \"does not exist\" flag. The boolean\n"
99 " value of the Python object is used.\n"
100 " \n"
101 " 'B': 4-byte LE length, followed by that many bytes of binary data.\n"
102 " Corresponds to a Python integer giving the length, followed by a byte\n"
103 " string of the appropriate length.\n"
104 "\n"
105 " '$': Special flag indicating that the preceding format code should be\n"
106 " repeated while data remains. This is only supported for unpacking.\n"
107 "\n"
108 " Every code corresponds to a single Python object, except 'B' which\n"
109 " corresponds to two values (length and contents), and '$', which produces\n"
110 " however many make sense.\n";
112 static char const pytdbpack_doc[] =
113 "pack(format, values) -> buffer\n"
114 "Pack Python objects into Samba binary format according to format string.\n"
115 "\n"
116 "arguments:\n"
117 " format -- string of tdbpack format characters\n"
118 " values -- sequence of value objects corresponding 1:1 to format characters\n"
119 "\n"
120 "returns:\n"
121 " buffer -- string containing packed data\n"
122 "\n"
123 "raises:\n"
124 " IndexError -- if there are too few values for the format\n"
125 " ValueError -- if any of the format characters is illegal\n"
126 " TypeError -- if the format is not a string, or values is not a sequence,\n"
127 " or any of the values is of the wrong type for the corresponding\n"
128 " format character\n"
129 "\n"
130 "notes:\n"
131 " For historical reasons, it is not an error to pass more values than are consumed\n"
132 " by the format.\n";
135 static char const pytdbunpack_doc[] =
136 "unpack(format, buffer) -> (values, rest)\n"
137 "Unpack Samba binary data according to format string.\n"
138 "\n"
139 "arguments:\n"
140 " format -- string of tdbpack characters\n"
141 " buffer -- string of packed binary data\n"
142 "\n"
143 "returns:\n"
144 " 2-tuple of:\n"
145 " values -- sequence of values corresponding 1:1 to format characters\n"
146 " rest -- string containing data that was not decoded, or '' if the\n"
147 " whole string was consumed\n"
148 "\n"
149 "raises:\n"
150 " IndexError -- if there is insufficient data in the buffer for the\n"
151 " format (or if the data is corrupt and contains a variable-length\n"
152 " field extending past the end)\n"
153 " ValueError -- if any of the format characters is illegal\n"
154 "\n"
155 "notes:\n"
156 " Because unconsumed data is returned, you can feed it back in to the\n"
157 " unpacker to extract further fields. Alternatively, if you wish to modify\n"
158 " some fields near the start of the data, you may be able to save time by\n"
159 " only unpacking and repacking the necessary part.\n";
162 const char *pytdb_dos_encoding = "cp850";
164 /* NULL, meaning that the Samba default encoding *must* be the same as the
165 Python default encoding. */
166 const char *pytdb_unix_encoding = NULL;
170 * Pack objects to bytes.
172 * All objects are first individually encoded onto a list, and then the list
173 * of strings is concatenated. This is faster than concatenating strings,
174 * and reasonably simple to code.
176 static PyObject *
177 pytdbpack(PyObject *self,
178 PyObject *args)
180 char *format_str;
181 PyObject *val_seq, *val_iter = NULL,
182 *packed_list = NULL, *packed_str = NULL,
183 *empty_str = NULL;
185 /* TODO: Test passing wrong types or too many arguments */
186 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
187 return NULL;
189 if (!(val_iter = PyObject_GetIter(val_seq)))
190 goto out;
192 /* Create list to hold strings until we're done, then join them all. */
193 if (!(packed_list = PyList_New(0)))
194 goto out;
196 if (!pytdbpack_data(format_str, val_iter, packed_list))
197 goto out;
199 /* this function is not officially documented but it works */
200 if (!(empty_str = PyString_InternFromString("")))
201 goto out;
203 packed_str = _PyString_Join(empty_str, packed_list);
205 out:
206 Py_XDECREF(empty_str);
207 Py_XDECREF(val_iter);
208 Py_XDECREF(packed_list);
210 return packed_str;
215 Pack data according to FORMAT_STR from the elements of VAL_SEQ into
216 PACKED_BUF.
218 The string has already been checked out, so we know that VAL_SEQ is large
219 enough to hold the packed data, and that there are enough value items.
220 (However, their types may not have been thoroughly checked yet.)
222 In addition, val_seq is a Python Fast sequence.
224 Returns NULL for error (with exception set), or None.
226 PyObject *
227 pytdbpack_data(const char *format_str,
228 PyObject *val_iter,
229 PyObject *packed_list)
231 int format_i, val_i = 0;
233 for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
234 char ch = format_str[format_i];
236 switch (ch) {
237 /* dispatch to the appropriate packer for this type,
238 which should pull things off the iterator, and
239 append them to the packed_list */
240 case 'w':
241 case 'd':
242 case 'p':
243 if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
244 return NULL;
245 break;
247 case 'f':
248 case 'P':
249 if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
250 return NULL;
251 break;
253 case 'B':
254 if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
255 return NULL;
256 break;
258 default:
259 PyErr_Format(PyExc_ValueError,
260 "%s: format character '%c' is not supported",
261 FUNCTION_MACRO, ch);
262 return NULL;
266 return packed_list;
270 static PyObject *
271 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
273 unsigned long val_long;
274 PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
275 PyObject *new_list = NULL;
276 unsigned char pack_buf[4];
278 if (!(val_obj = PyIter_Next(val_iter)))
279 goto out;
281 if (!(long_obj = PyNumber_Long(val_obj))) {
282 pytdbpack_bad_type(ch, "Number", val_obj);
283 goto out;
286 val_long = PyLong_AsUnsignedLong(long_obj);
287 pack_le_uint32(val_long, pack_buf);
289 /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
290 the first two bytes. */
292 if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
293 goto out;
295 if (PyList_Append(packed_list, result_obj) != -1)
296 new_list = packed_list;
298 out:
299 Py_XDECREF(val_obj);
300 Py_XDECREF(long_obj);
301 Py_XDECREF(result_obj);
303 return new_list;
308 * Take one string from the iterator val_iter, convert it to 8-bit, and return
309 * it.
311 * If the input is neither a string nor Unicode, an exception is raised.
313 * If the input is Unicode, then it is converted to the appropriate encoding.
315 * If the input is a String, and encoding is not null, then it is converted to
316 * Unicode using the default decoding method, and then converted to the
317 * encoding. If the encoding is NULL, then the string is written out as-is --
318 * this is used when the default Python encoding is the same as the Samba
319 * encoding.
321 * I hope this approach avoids being too fragile w.r.t. being passed either
322 * Unicode or String objects.
324 static PyObject *
325 pytdbpack_str(char ch,
326 PyObject *val_iter, PyObject *packed_list, const char *encoding)
328 PyObject *val_obj = NULL;
329 PyObject *unicode_obj = NULL;
330 PyObject *coded_str = NULL;
331 PyObject *nul_str = NULL;
332 PyObject *new_list = NULL;
334 if (!(val_obj = PyIter_Next(val_iter)))
335 goto out;
337 if (PyUnicode_Check(val_obj)) {
338 if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
339 goto out;
341 else if (PyString_Check(val_obj) && !encoding) {
342 /* For efficiency, we assume that the Python interpreter has
343 the same default string encoding as Samba's native string
344 encoding. On the PSA, both are always 8859-1. */
345 coded_str = val_obj;
346 Py_INCREF(coded_str);
348 else if (PyString_Check(val_obj)) {
349 /* String, but needs to be converted */
350 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
351 goto out;
352 if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
353 goto out;
355 else {
356 pytdbpack_bad_type(ch, "String or Unicode", val_obj);
357 goto out;
360 if (!nul_str)
361 /* this is constant and often-used; hold it forever */
362 if (!(nul_str = PyString_FromStringAndSize("", 1)))
363 goto out;
365 if ((PyList_Append(packed_list, coded_str) != -1)
366 && (PyList_Append(packed_list, nul_str) != -1))
367 new_list = packed_list;
369 out:
370 Py_XDECREF(val_obj);
371 Py_XDECREF(unicode_obj);
372 Py_XDECREF(coded_str);
374 return new_list;
379 * Pack (LENGTH, BUFFER) pair onto the list.
381 * The buffer must already be a String, not Unicode, because it contains 8-bit
382 * untranslated data. In some cases it will actually be UTF_16_LE data.
384 static PyObject *
385 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
387 PyObject *val_obj;
388 PyObject *new_list = NULL;
390 /* pull off integer and stick onto list */
391 if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
392 return NULL;
394 /* this assumes that the string is the right length; the old code did
395 the same. */
396 if (!(val_obj = PyIter_Next(val_iter)))
397 return NULL;
399 if (!PyString_Check(val_obj)) {
400 pytdbpack_bad_type('B', "String", val_obj);
401 goto out;
404 if (PyList_Append(packed_list, val_obj) != -1)
405 new_list = packed_list;
407 out:
408 Py_XDECREF(val_obj);
409 return new_list;
413 static PyObject *pytdbpack_bad_type(char ch,
414 const char *expected,
415 PyObject *val_obj)
417 PyObject *r = PyObject_Repr(val_obj);
418 if (!r)
419 return NULL;
420 PyErr_Format(PyExc_TypeError,
421 "tdbpack: format '%c' requires %s, not %s",
422 ch, expected, PyString_AS_STRING(r));
423 Py_DECREF(r);
424 return val_obj;
429 XXX: glib and Samba have quicker macro for doing the endianness conversions,
430 but I don't know of one in plain libc, and it's probably not a big deal. I
431 realize this is kind of dumb because we'll almost always be on x86, but
432 being safe is important.
434 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
436 pbuf[0] = val_long & 0xff;
437 pbuf[1] = (val_long >> 8) & 0xff;
438 pbuf[2] = (val_long >> 16) & 0xff;
439 pbuf[3] = (val_long >> 24) & 0xff;
443 #if 0 /* not used */
444 static void pack_bytes(long len, const char *from,
445 unsigned char **pbuf)
447 memcpy(*pbuf, from, len);
448 (*pbuf) += len;
450 #endif
453 static PyObject *
454 pytdbunpack(PyObject *self,
455 PyObject *args)
457 char *format_str, *packed_str, *ppacked;
458 PyObject *val_list = NULL, *ret_tuple = NULL;
459 PyObject *rest_string = NULL;
460 int format_len, packed_len;
461 char last_format = '#'; /* invalid */
462 int i;
464 /* get arguments */
465 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
466 return NULL;
468 format_len = strlen(format_str);
470 /* Allocate list to hold results. Initially empty, and we append
471 results as we go along. */
472 val_list = PyList_New(0);
473 if (!val_list)
474 goto failed;
475 ret_tuple = PyTuple_New(2);
476 if (!ret_tuple)
477 goto failed;
479 /* For every object, unpack. */
480 for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
481 last_format = format_str[i];
482 /* packed_len is reduced in place */
483 if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
484 goto failed;
487 /* If the last character was '$', keep going until out of space */
488 if (format_str[i] == '$') {
489 if (i == 0) {
490 PyErr_Format(PyExc_ValueError,
491 "%s: '$' may not be first character in format",
492 FUNCTION_MACRO);
493 return NULL;
495 while (packed_len > 0)
496 if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
497 goto failed;
500 /* save leftovers for next time */
501 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
502 if (!rest_string)
503 goto failed;
505 /* return (values, rest) tuple; give up references to them */
506 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
507 val_list = NULL;
508 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
509 val_list = NULL;
510 return ret_tuple;
512 failed:
513 /* handle failure: deallocate anything. XDECREF forms handle NULL
514 pointers for objects that haven't been allocated yet. */
515 Py_XDECREF(val_list);
516 Py_XDECREF(ret_tuple);
517 Py_XDECREF(rest_string);
518 return NULL;
522 static void
523 pytdbunpack_err_too_short(void)
525 PyErr_Format(PyExc_IndexError,
526 "%s: data too short for unpack format", FUNCTION_MACRO);
530 static PyObject *
531 pytdbunpack_uint32(char **pbuf, int *plen)
533 unsigned long v;
534 unsigned char *b;
536 if (*plen < 4) {
537 pytdbunpack_err_too_short();
538 return NULL;
541 b = *pbuf;
542 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
544 (*pbuf) += 4;
545 (*plen) -= 4;
547 return PyLong_FromUnsignedLong(v);
551 static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
553 long v;
554 unsigned char *b;
556 if (*plen < 2) {
557 pytdbunpack_err_too_short();
558 return NULL;
561 b = *pbuf;
562 v = b[0] | b[1]<<8;
564 (*pbuf) += 2;
565 (*plen) -= 2;
567 return PyInt_FromLong(v);
571 static PyObject *
572 pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
574 int len;
575 char *nul_ptr, *start;
577 start = *pbuf;
579 nul_ptr = memchr(start, '\0', *plen);
580 if (!nul_ptr) {
581 pytdbunpack_err_too_short();
582 return NULL;
585 len = nul_ptr - start;
587 *pbuf += len + 1; /* skip \0 */
588 *plen -= len + 1;
590 return PyString_Decode(start, len, encoding, NULL);
594 static PyObject *
595 pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
597 /* first get 32-bit len */
598 long slen;
599 unsigned char *b;
600 unsigned char *start;
601 PyObject *str_obj = NULL, *len_obj = NULL;
603 if (*plen < 4) {
604 pytdbunpack_err_too_short();
605 return NULL;
608 b = *pbuf;
609 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
611 if (slen < 0) { /* surely you jest */
612 PyErr_Format(PyExc_ValueError,
613 "%s: buffer seems to have negative length", FUNCTION_MACRO);
614 return NULL;
617 (*pbuf) += 4;
618 (*plen) -= 4;
619 start = *pbuf;
621 if (*plen < slen) {
622 PyErr_Format(PyExc_IndexError,
623 "%s: not enough data to unpack buffer: "
624 "need %d bytes, have %d", FUNCTION_MACRO,
625 (int) slen, *plen);
626 return NULL;
629 (*pbuf) += slen;
630 (*plen) -= slen;
632 if (!(len_obj = PyInt_FromLong(slen)))
633 goto failed;
635 if (PyList_Append(val_list, len_obj) == -1)
636 goto failed;
638 if (!(str_obj = PyString_FromStringAndSize(start, slen)))
639 goto failed;
641 if (PyList_Append(val_list, str_obj) == -1)
642 goto failed;
644 Py_DECREF(len_obj);
645 Py_DECREF(str_obj);
647 return val_list;
649 failed:
650 Py_XDECREF(len_obj); /* handles NULL */
651 Py_XDECREF(str_obj);
652 return NULL;
656 /* Unpack a single field from packed data, according to format character CH.
657 Remaining data is at *PBUF, of *PLEN.
659 *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
660 been consumed.
662 Returns a reference to None, or NULL for failure.
664 static PyObject *pytdbunpack_item(char ch,
665 char **pbuf,
666 int *plen,
667 PyObject *val_list)
669 PyObject *unpacked;
671 if (ch == 'w') { /* 16-bit int */
672 unpacked = pytdbunpack_int16(pbuf, plen);
674 else if (ch == 'd' || ch == 'p') { /* 32-bit int */
675 /* pointers can just come through as integers */
676 unpacked = pytdbunpack_uint32(pbuf, plen);
678 else if (ch == 'f' || ch == 'P') { /* nul-term string */
679 unpacked = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
681 else if (ch == 'B') { /* length, buffer */
682 return pytdbunpack_buffer(pbuf, plen, val_list);
684 else {
685 PyErr_Format(PyExc_ValueError,
686 "%s: format character '%c' is not supported",
687 FUNCTION_MACRO, ch);
689 return NULL;
692 /* otherwise OK */
693 if (!unpacked)
694 return NULL;
696 if (PyList_Append(val_list, unpacked) == -1)
697 val_list = NULL;
699 /* PyList_Append takes a new reference to the inserted object.
700 Therefore, we no longer need the original reference. */
701 Py_DECREF(unpacked);
703 return val_list;
711 static PyMethodDef pytdbpack_methods[] = {
712 { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
713 { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
716 DL_EXPORT(void)
717 inittdbpack(void)
719 Py_InitModule3("tdbpack", pytdbpack_methods,
720 (char *) pytdbpack_docstring);