Quieten compiler warning.
[Samba/gebeck_regimport.git] / source3 / python / py_tdbpack.c
blobf0718b717ed8c7e7267423a2e7e915ac2f9aaf38
1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
3 Python wrapper for Samba tdb pack/unpack functions
4 Copyright (C) Martin Pool 2002, 2003
7 NOTE PYTHON STYLE GUIDE
8 http://www.python.org/peps/pep-0007.html
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 #include "Python.h"
28 /* This symbol is used in both config.h and Python.h which causes an
29 annoying compiler warning. */
31 #ifdef HAVE_FSTAT
32 #undef HAVE_FSTAT
33 #endif
35 /* This module is supposed to be standalone, however for portability
36 it would be good to use the FUNCTION_MACRO preprocessor define. */
38 #include "include/config.h"
40 #ifdef HAVE_FUNCTION_MACRO
41 #define FUNCTION_MACRO (__FUNCTION__)
42 #else
43 #define FUNCTION_MACRO (__FILE__)
44 #endif
46 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
47 static PyObject * pytdbpack_str(char ch,
48 PyObject *val_iter, PyObject *packed_list,
49 const char *encoding);
50 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
52 static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
54 static PyObject *pytdbpack_data(const char *format_str,
55 PyObject *val_seq,
56 PyObject *val_list);
58 static PyObject *
59 pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
61 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
64 static PyObject *pytdbpack_bad_type(char ch,
65 const char *expected,
66 PyObject *val_obj);
68 static const char * pytdbpack_docstring =
69 "Convert between Python values and Samba binary encodings.
71 This module is conceptually similar to the standard 'struct' module, but it
72 uses both a different binary format and a different description string.
74 Samba's encoding is based on that used inside DCE-RPC and SMB: a
75 little-endian, unpadded, non-self-describing binary format. It is intended
76 that these functions be as similar as possible to the routines in Samba's
77 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
79 Python strings are used to specify the format of data to be packed or
80 unpacked.
82 String encodings are implied by the database format: they may be either DOS
83 codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded
84 to be the same as the default Python encoding).
86 tdbpack format strings:
88 'f': NUL-terminated string in codepage iso8859-1
90 'P': same as 'f'
92 'F': NUL-terminated string in iso-8859-1
94 'd': 4 byte little-endian unsigned number
96 'w': 2 byte little-endian unsigned number
98 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
99 really just an \"exists\" or \"does not exist\" flag. The boolean
100 value of the Python object is used.
102 'B': 4-byte LE length, followed by that many bytes of binary data.
103 Corresponds to a Python integer giving the length, followed by a byte
104 string of the appropriate length.
106 '$': Special flag indicating that the preceding format code should be
107 repeated while data remains. This is only supported for unpacking.
109 Every code corresponds to a single Python object, except 'B' which
110 corresponds to two values (length and contents), and '$', which produces
111 however many make sense.
115 static char const pytdbpack_doc[] =
116 "pack(format, values) -> buffer
117 Pack Python objects into Samba binary format according to format string.
119 arguments:
120 format -- string of tdbpack format characters
121 values -- sequence of value objects corresponding 1:1 to format characters
123 returns:
124 buffer -- string containing packed data
126 raises:
127 IndexError -- if there are too few values for the format
128 ValueError -- if any of the format characters is illegal
129 TypeError -- if the format is not a string, or values is not a sequence,
130 or any of the values is of the wrong type for the corresponding
131 format character
133 notes:
134 For historical reasons, it is not an error to pass more values than are consumed
135 by the format.
139 static char const pytdbunpack_doc[] =
140 "unpack(format, buffer) -> (values, rest)
141 Unpack Samba binary data according to format string.
143 arguments:
144 format -- string of tdbpack characters
145 buffer -- string of packed binary data
147 returns:
148 2-tuple of:
149 values -- sequence of values corresponding 1:1 to format characters
150 rest -- string containing data that was not decoded, or '' if the
151 whole string was consumed
153 raises:
154 IndexError -- if there is insufficient data in the buffer for the
155 format (or if the data is corrupt and contains a variable-length
156 field extending past the end)
157 ValueError -- if any of the format characters is illegal
159 notes:
160 Because unconsumed data is returned, you can feed it back in to the
161 unpacker to extract further fields. Alternatively, if you wish to modify
162 some fields near the start of the data, you may be able to save time by
163 only unpacking and repacking the necessary part.
167 const char *pytdb_dos_encoding = "cp850";
169 /* NULL, meaning that the Samba default encoding *must* be the same as the
170 Python default encoding. */
171 const char *pytdb_unix_encoding = NULL;
175 * Pack objects to bytes.
177 * All objects are first individually encoded onto a list, and then the list
178 * of strings is concatenated. This is faster than concatenating strings,
179 * and reasonably simple to code.
181 static PyObject *
182 pytdbpack(PyObject *self,
183 PyObject *args)
185 char *format_str;
186 PyObject *val_seq, *val_iter = NULL,
187 *packed_list = NULL, *packed_str = NULL,
188 *empty_str = NULL;
190 /* TODO: Test passing wrong types or too many arguments */
191 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
192 return NULL;
194 if (!(val_iter = PyObject_GetIter(val_seq)))
195 goto out;
197 /* Create list to hold strings until we're done, then join them all. */
198 if (!(packed_list = PyList_New(0)))
199 goto out;
201 if (!pytdbpack_data(format_str, val_iter, packed_list))
202 goto out;
204 /* this function is not officially documented but it works */
205 if (!(empty_str = PyString_InternFromString("")))
206 goto out;
208 packed_str = _PyString_Join(empty_str, packed_list);
210 out:
211 Py_XDECREF(empty_str);
212 Py_XDECREF(val_iter);
213 Py_XDECREF(packed_list);
215 return packed_str;
220 Pack data according to FORMAT_STR from the elements of VAL_SEQ into
221 PACKED_BUF.
223 The string has already been checked out, so we know that VAL_SEQ is large
224 enough to hold the packed data, and that there are enough value items.
225 (However, their types may not have been thoroughly checked yet.)
227 In addition, val_seq is a Python Fast sequence.
229 Returns NULL for error (with exception set), or None.
231 PyObject *
232 pytdbpack_data(const char *format_str,
233 PyObject *val_iter,
234 PyObject *packed_list)
236 int format_i, val_i = 0;
238 for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
239 char ch = format_str[format_i];
241 switch (ch) {
242 /* dispatch to the appropriate packer for this type,
243 which should pull things off the iterator, and
244 append them to the packed_list */
245 case 'w':
246 case 'd':
247 case 'p':
248 if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
249 return NULL;
250 break;
252 case 'f':
253 case 'P':
254 if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
255 return NULL;
256 break;
258 case 'B':
259 if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
260 return NULL;
261 break;
263 default:
264 PyErr_Format(PyExc_ValueError,
265 "%s: format character '%c' is not supported",
266 FUNCTION_MACRO, ch);
267 return NULL;
271 return packed_list;
275 static PyObject *
276 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
278 unsigned long val_long;
279 PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
280 PyObject *new_list = NULL;
281 unsigned char pack_buf[4];
283 if (!(val_obj = PyIter_Next(val_iter)))
284 goto out;
286 if (!(long_obj = PyNumber_Long(val_obj))) {
287 pytdbpack_bad_type(ch, "Number", val_obj);
288 goto out;
291 val_long = PyLong_AsUnsignedLong(long_obj);
292 pack_le_uint32(val_long, pack_buf);
294 /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
295 the first two bytes. */
297 if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
298 goto out;
300 if (PyList_Append(packed_list, result_obj) != -1)
301 new_list = packed_list;
303 out:
304 Py_XDECREF(val_obj);
305 Py_XDECREF(long_obj);
306 Py_XDECREF(result_obj);
308 return new_list;
313 * Take one string from the iterator val_iter, convert it to 8-bit, and return
314 * it.
316 * If the input is neither a string nor Unicode, an exception is raised.
318 * If the input is Unicode, then it is converted to the appropriate encoding.
320 * If the input is a String, and encoding is not null, then it is converted to
321 * Unicode using the default decoding method, and then converted to the
322 * encoding. If the encoding is NULL, then the string is written out as-is --
323 * this is used when the default Python encoding is the same as the Samba
324 * encoding.
326 * I hope this approach avoids being too fragile w.r.t. being passed either
327 * Unicode or String objects.
329 static PyObject *
330 pytdbpack_str(char ch,
331 PyObject *val_iter, PyObject *packed_list, const char *encoding)
333 PyObject *val_obj = NULL;
334 PyObject *unicode_obj = NULL;
335 PyObject *coded_str = NULL;
336 PyObject *nul_str = NULL;
337 PyObject *new_list = NULL;
339 if (!(val_obj = PyIter_Next(val_iter)))
340 goto out;
342 if (PyUnicode_Check(val_obj)) {
343 if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
344 goto out;
346 else if (PyString_Check(val_obj) && !encoding) {
347 /* For efficiency, we assume that the Python interpreter has
348 the same default string encoding as Samba's native string
349 encoding. On the PSA, both are always 8859-1. */
350 coded_str = val_obj;
351 Py_INCREF(coded_str);
353 else if (PyString_Check(val_obj)) {
354 /* String, but needs to be converted */
355 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
356 goto out;
357 if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
358 goto out;
360 else {
361 pytdbpack_bad_type(ch, "String or Unicode", val_obj);
362 goto out;
365 if (!nul_str)
366 /* this is constant and often-used; hold it forever */
367 if (!(nul_str = PyString_FromStringAndSize("", 1)))
368 goto out;
370 if ((PyList_Append(packed_list, coded_str) != -1)
371 && (PyList_Append(packed_list, nul_str) != -1))
372 new_list = packed_list;
374 out:
375 Py_XDECREF(val_obj);
376 Py_XDECREF(unicode_obj);
377 Py_XDECREF(coded_str);
379 return new_list;
384 * Pack (LENGTH, BUFFER) pair onto the list.
386 * The buffer must already be a String, not Unicode, because it contains 8-bit
387 * untranslated data. In some cases it will actually be UTF_16_LE data.
389 static PyObject *
390 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
392 PyObject *val_obj;
393 PyObject *new_list = NULL;
395 /* pull off integer and stick onto list */
396 if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
397 return NULL;
399 /* this assumes that the string is the right length; the old code did
400 the same. */
401 if (!(val_obj = PyIter_Next(val_iter)))
402 return NULL;
404 if (!PyString_Check(val_obj)) {
405 pytdbpack_bad_type('B', "String", val_obj);
406 goto out;
409 if (PyList_Append(packed_list, val_obj) != -1)
410 new_list = packed_list;
412 out:
413 Py_XDECREF(val_obj);
414 return new_list;
418 static PyObject *pytdbpack_bad_type(char ch,
419 const char *expected,
420 PyObject *val_obj)
422 PyObject *r = PyObject_Repr(val_obj);
423 if (!r)
424 return NULL;
425 PyErr_Format(PyExc_TypeError,
426 "tdbpack: format '%c' requires %s, not %s",
427 ch, expected, PyString_AS_STRING(r));
428 Py_DECREF(r);
429 return val_obj;
434 XXX: glib and Samba have quicker macro for doing the endianness conversions,
435 but I don't know of one in plain libc, and it's probably not a big deal. I
436 realize this is kind of dumb because we'll almost always be on x86, but
437 being safe is important.
439 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
441 pbuf[0] = val_long & 0xff;
442 pbuf[1] = (val_long >> 8) & 0xff;
443 pbuf[2] = (val_long >> 16) & 0xff;
444 pbuf[3] = (val_long >> 24) & 0xff;
448 static void pack_bytes(long len, const char *from,
449 unsigned char **pbuf)
451 memcpy(*pbuf, from, len);
452 (*pbuf) += len;
457 static PyObject *
458 pytdbunpack(PyObject *self,
459 PyObject *args)
461 char *format_str, *packed_str, *ppacked;
462 PyObject *val_list = NULL, *ret_tuple = NULL;
463 PyObject *rest_string = NULL;
464 int format_len, packed_len;
465 char last_format = '#'; /* invalid */
466 int i;
468 /* get arguments */
469 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
470 return NULL;
472 format_len = strlen(format_str);
474 /* Allocate list to hold results. Initially empty, and we append
475 results as we go along. */
476 val_list = PyList_New(0);
477 if (!val_list)
478 goto failed;
479 ret_tuple = PyTuple_New(2);
480 if (!ret_tuple)
481 goto failed;
483 /* For every object, unpack. */
484 for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
485 last_format = format_str[i];
486 /* packed_len is reduced in place */
487 if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
488 goto failed;
491 /* If the last character was '$', keep going until out of space */
492 if (format_str[i] == '$') {
493 if (i == 0) {
494 PyErr_Format(PyExc_ValueError,
495 "%s: '$' may not be first character in format",
496 FUNCTION_MACRO);
497 return NULL;
499 while (packed_len > 0)
500 if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
501 goto failed;
504 /* save leftovers for next time */
505 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
506 if (!rest_string)
507 goto failed;
509 /* return (values, rest) tuple; give up references to them */
510 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
511 val_list = NULL;
512 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
513 val_list = NULL;
514 return ret_tuple;
516 failed:
517 /* handle failure: deallocate anything. XDECREF forms handle NULL
518 pointers for objects that haven't been allocated yet. */
519 Py_XDECREF(val_list);
520 Py_XDECREF(ret_tuple);
521 Py_XDECREF(rest_string);
522 return NULL;
526 static void
527 pytdbunpack_err_too_short(void)
529 PyErr_Format(PyExc_IndexError,
530 "%s: data too short for unpack format", FUNCTION_MACRO);
534 static PyObject *
535 pytdbunpack_uint32(char **pbuf, int *plen)
537 unsigned long v;
538 unsigned char *b;
540 if (*plen < 4) {
541 pytdbunpack_err_too_short();
542 return NULL;
545 b = *pbuf;
546 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
548 (*pbuf) += 4;
549 (*plen) -= 4;
551 return PyLong_FromUnsignedLong(v);
555 static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
557 long v;
558 unsigned char *b;
560 if (*plen < 2) {
561 pytdbunpack_err_too_short();
562 return NULL;
565 b = *pbuf;
566 v = b[0] | b[1]<<8;
568 (*pbuf) += 2;
569 (*plen) -= 2;
571 return PyInt_FromLong(v);
575 static PyObject *
576 pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
578 int len;
579 char *nul_ptr, *start;
581 start = *pbuf;
583 nul_ptr = memchr(start, '\0', *plen);
584 if (!nul_ptr) {
585 pytdbunpack_err_too_short();
586 return NULL;
589 len = nul_ptr - start;
591 *pbuf += len + 1; /* skip \0 */
592 *plen -= len + 1;
594 return PyString_Decode(start, len, encoding, NULL);
598 static PyObject *
599 pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
601 /* first get 32-bit len */
602 long slen;
603 unsigned char *b;
604 unsigned char *start;
605 PyObject *str_obj = NULL, *len_obj = NULL;
607 if (*plen < 4) {
608 pytdbunpack_err_too_short();
609 return NULL;
612 b = *pbuf;
613 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
615 if (slen < 0) { /* surely you jest */
616 PyErr_Format(PyExc_ValueError,
617 "%s: buffer seems to have negative length", FUNCTION_MACRO);
618 return NULL;
621 (*pbuf) += 4;
622 (*plen) -= 4;
623 start = *pbuf;
625 if (*plen < slen) {
626 PyErr_Format(PyExc_IndexError,
627 "%s: not enough data to unpack buffer: "
628 "need %d bytes, have %d", FUNCTION_MACRO,
629 (int) slen, *plen);
630 return NULL;
633 (*pbuf) += slen;
634 (*plen) -= slen;
636 if (!(len_obj = PyInt_FromLong(slen)))
637 goto failed;
639 if (PyList_Append(val_list, len_obj) == -1)
640 goto failed;
642 if (!(str_obj = PyString_FromStringAndSize(start, slen)))
643 goto failed;
645 if (PyList_Append(val_list, str_obj) == -1)
646 goto failed;
648 Py_DECREF(len_obj);
649 Py_DECREF(str_obj);
651 return val_list;
653 failed:
654 Py_XDECREF(len_obj); /* handles NULL */
655 Py_XDECREF(str_obj);
656 return NULL;
660 /* Unpack a single field from packed data, according to format character CH.
661 Remaining data is at *PBUF, of *PLEN.
663 *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
664 been consumed.
666 Returns a reference to None, or NULL for failure.
668 static PyObject *pytdbunpack_item(char ch,
669 char **pbuf,
670 int *plen,
671 PyObject *val_list)
673 PyObject *unpacked;
675 if (ch == 'w') { /* 16-bit int */
676 unpacked = pytdbunpack_int16(pbuf, plen);
678 else if (ch == 'd' || ch == 'p') { /* 32-bit int */
679 /* pointers can just come through as integers */
680 unpacked = pytdbunpack_uint32(pbuf, plen);
682 else if (ch == 'f' || ch == 'P') { /* nul-term string */
683 unpacked = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
685 else if (ch == 'B') { /* length, buffer */
686 return pytdbunpack_buffer(pbuf, plen, val_list);
688 else {
689 PyErr_Format(PyExc_ValueError,
690 "%s: format character '%c' is not supported",
691 FUNCTION_MACRO, ch);
693 return NULL;
696 /* otherwise OK */
697 if (!unpacked)
698 return NULL;
700 if (PyList_Append(val_list, unpacked) == -1)
701 val_list = NULL;
703 /* PyList_Append takes a new reference to the inserted object.
704 Therefore, we no longer need the original reference. */
705 Py_DECREF(unpacked);
707 return val_list;
715 static PyMethodDef pytdbpack_methods[] = {
716 { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
717 { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
720 DL_EXPORT(void)
721 inittdbpack(void)
723 Py_InitModule3("tdbpack", pytdbpack_methods,
724 (char *) pytdbpack_docstring);