off by one bug in string length; CR 1159
[Samba.git] / source / python / py_tdbpack.c
blob6181a4918e6a7e6870bbd51e6f0370a7be01d75d
1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
3 Python wrapper for Samba tdb pack/unpack functions
4 Copyright (C) Martin Pool 2002
7 NOTE PYTHON STYLE GUIDE
8 http://www.python.org/peps/pep-0007.html
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 #include "Python.h"
28 /* This module is supposed to be standalone, however for portability
29 it would be good to use the FUNCTION_MACRO preprocessor define. */
31 #include "include/config.h"
33 #ifdef HAVE_FUNCTION_MACRO
34 #define FUNCTION_MACRO (__FUNCTION__)
35 #else
36 #define FUNCTION_MACRO (__FILE__)
37 #endif
39 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
40 static PyObject * pytdbpack_str(char ch,
41 PyObject *val_iter, PyObject *packed_list,
42 const char *encoding);
43 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
45 static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
47 static PyObject *pytdbpack_data(const char *format_str,
48 PyObject *val_seq,
49 PyObject *val_list);
51 static PyObject *
52 pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
54 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
57 static PyObject *pytdbpack_bad_type(char ch,
58 const char *expected,
59 PyObject *val_obj);
61 static const char * pytdbpack_docstring =
62 "Convert between Python values and Samba binary encodings.
64 This module is conceptually similar to the standard 'struct' module, but it
65 uses both a different binary format and a different description string.
67 Samba's encoding is based on that used inside DCE-RPC and SMB: a
68 little-endian, unpadded, non-self-describing binary format. It is intended
69 that these functions be as similar as possible to the routines in Samba's
70 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
72 Python strings are used to specify the format of data to be packed or
73 unpacked.
75 String encodings are implied by the database format: they may be either DOS
76 codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded
77 to be the same as the default Python encoding).
79 tdbpack format strings:
81 'f': NUL-terminated string in codepage iso8859-1
83 'P': same as 'f'
85 'F': NUL-terminated string in iso-8859-1
87 'd': 4 byte little-endian unsigned number
89 'w': 2 byte little-endian unsigned number
91 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
92 really just an \"exists\" or \"does not exist\" flag. The boolean
93 value of the Python object is used.
95 'B': 4-byte LE length, followed by that many bytes of binary data.
96 Corresponds to a Python integer giving the length, followed by a byte
97 string of the appropriate length.
99 '$': Special flag indicating that the preceding format code should be
100 repeated while data remains. This is only supported for unpacking.
102 Every code corresponds to a single Python object, except 'B' which
103 corresponds to two values (length and contents), and '$', which produces
104 however many make sense.
108 static char const pytdbpack_doc[] =
109 "pack(format, values) -> buffer
110 Pack Python objects into Samba binary format according to format string.
112 arguments:
113 format -- string of tdbpack format characters
114 values -- sequence of value objects corresponding 1:1 to format characters
116 returns:
117 buffer -- string containing packed data
119 raises:
120 IndexError -- if there are too few values for the format
121 ValueError -- if any of the format characters is illegal
122 TypeError -- if the format is not a string, or values is not a sequence,
123 or any of the values is of the wrong type for the corresponding
124 format character
126 notes:
127 For historical reasons, it is not an error to pass more values than are consumed
128 by the format.
132 static char const pytdbunpack_doc[] =
133 "unpack(format, buffer) -> (values, rest)
134 Unpack Samba binary data according to format string.
136 arguments:
137 format -- string of tdbpack characters
138 buffer -- string of packed binary data
140 returns:
141 2-tuple of:
142 values -- sequence of values corresponding 1:1 to format characters
143 rest -- string containing data that was not decoded, or '' if the
144 whole string was consumed
146 raises:
147 IndexError -- if there is insufficient data in the buffer for the
148 format (or if the data is corrupt and contains a variable-length
149 field extending past the end)
150 ValueError -- if any of the format characters is illegal
152 notes:
153 Because unconsumed data is returned, you can feed it back in to the
154 unpacker to extract further fields. Alternatively, if you wish to modify
155 some fields near the start of the data, you may be able to save time by
156 only unpacking and repacking the necessary part.
160 const char *pytdb_dos_encoding = "cp850";
162 /* NULL, meaning that the Samba default encoding *must* be the same as the
163 Python default encoding. */
164 const char *pytdb_unix_encoding = NULL;
168 * Pack objects to bytes.
170 * All objects are first individually encoded onto a list, and then the list
171 * of strings is concatenated. This is faster than concatenating strings,
172 * and reasonably simple to code.
174 static PyObject *
175 pytdbpack(PyObject *self,
176 PyObject *args)
178 char *format_str;
179 PyObject *val_seq, *val_iter = NULL,
180 *packed_list = NULL, *packed_str = NULL,
181 *empty_str = NULL;
183 /* TODO: Test passing wrong types or too many arguments */
184 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
185 return NULL;
187 if (!(val_iter = PyObject_GetIter(val_seq)))
188 goto out;
190 /* Create list to hold strings until we're done, then join them all. */
191 if (!(packed_list = PyList_New(0)))
192 goto out;
194 if (!pytdbpack_data(format_str, val_iter, packed_list))
195 goto out;
197 /* this function is not officially documented but it works */
198 if (!(empty_str = PyString_InternFromString("")))
199 goto out;
201 packed_str = _PyString_Join(empty_str, packed_list);
203 out:
204 Py_XDECREF(empty_str);
205 Py_XDECREF(val_iter);
206 Py_XDECREF(packed_list);
208 return packed_str;
213 Pack data according to FORMAT_STR from the elements of VAL_SEQ into
214 PACKED_BUF.
216 The string has already been checked out, so we know that VAL_SEQ is large
217 enough to hold the packed data, and that there are enough value items.
218 (However, their types may not have been thoroughly checked yet.)
220 In addition, val_seq is a Python Fast sequence.
222 Returns NULL for error (with exception set), or None.
224 PyObject *
225 pytdbpack_data(const char *format_str,
226 PyObject *val_iter,
227 PyObject *packed_list)
229 int format_i, val_i = 0;
231 for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
232 char ch = format_str[format_i];
234 switch (ch) {
235 /* dispatch to the appropriate packer for this type,
236 which should pull things off the iterator, and
237 append them to the packed_list */
238 case 'w':
239 case 'd':
240 case 'p':
241 if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
242 return NULL;
243 break;
245 case 'f':
246 case 'P':
247 if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
248 return NULL;
249 break;
251 case 'B':
252 if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
253 return NULL;
254 break;
256 default:
257 PyErr_Format(PyExc_ValueError,
258 "%s: format character '%c' is not supported",
259 FUNCTION_MACRO, ch);
260 return NULL;
264 return packed_list;
268 static PyObject *
269 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
271 unsigned long val_long;
272 PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
273 PyObject *new_list = NULL;
274 unsigned char pack_buf[4];
276 if (!(val_obj = PyIter_Next(val_iter)))
277 goto out;
279 if (!(long_obj = PyNumber_Long(val_obj))) {
280 pytdbpack_bad_type(ch, "Number", val_obj);
281 goto out;
284 val_long = PyLong_AsUnsignedLong(long_obj);
285 pack_le_uint32(val_long, pack_buf);
287 /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
288 the first two bytes. */
290 if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
291 goto out;
293 if (PyList_Append(packed_list, result_obj) != -1)
294 new_list = packed_list;
296 out:
297 Py_XDECREF(val_obj);
298 Py_XDECREF(long_obj);
299 Py_XDECREF(result_obj);
301 return new_list;
306 * Take one string from the iterator val_iter, convert it to 8-bit, and return
307 * it.
309 * If the input is neither a string nor Unicode, an exception is raised.
311 * If the input is Unicode, then it is converted to the appropriate encoding.
313 * If the input is a String, and encoding is not null, then it is converted to
314 * Unicode using the default decoding method, and then converted to the
315 * encoding. If the encoding is NULL, then the string is written out as-is --
316 * this is used when the default Python encoding is the same as the Samba
317 * encoding.
319 * I hope this approach avoids being too fragile w.r.t. being passed either
320 * Unicode or String objects.
322 static PyObject *
323 pytdbpack_str(char ch,
324 PyObject *val_iter, PyObject *packed_list, const char *encoding)
326 PyObject *val_obj = NULL;
327 PyObject *unicode_obj = NULL;
328 PyObject *coded_str = NULL;
329 PyObject *nul_str = NULL;
330 PyObject *new_list = NULL;
332 if (!(val_obj = PyIter_Next(val_iter)))
333 goto out;
335 if (PyUnicode_Check(val_obj)) {
336 if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
337 goto out;
339 else if (PyString_Check(val_obj) && !encoding) {
340 /* For efficiency, we assume that the Python interpreter has
341 the same default string encoding as Samba's native string
342 encoding. On the PSA, both are always 8859-1. */
343 coded_str = val_obj;
344 Py_INCREF(coded_str);
346 else if (PyString_Check(val_obj)) {
347 /* String, but needs to be converted */
348 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
349 goto out;
350 if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
351 goto out;
353 else {
354 pytdbpack_bad_type(ch, "String or Unicode", val_obj);
355 goto out;
358 if (!nul_str)
359 /* this is constant and often-used; hold it forever */
360 if (!(nul_str = PyString_FromStringAndSize("", 1)))
361 goto out;
363 if ((PyList_Append(packed_list, coded_str) != -1)
364 && (PyList_Append(packed_list, nul_str) != -1))
365 new_list = packed_list;
367 out:
368 Py_XDECREF(val_obj);
369 Py_XDECREF(unicode_obj);
370 Py_XDECREF(coded_str);
372 return new_list;
377 * Pack (LENGTH, BUFFER) pair onto the list.
379 * The buffer must already be a String, not Unicode, because it contains 8-bit
380 * untranslated data. In some cases it will actually be UTF_16_LE data.
382 static PyObject *
383 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
385 PyObject *val_obj;
386 PyObject *new_list = NULL;
388 /* pull off integer and stick onto list */
389 if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
390 return NULL;
392 /* this assumes that the string is the right length; the old code did
393 the same. */
394 if (!(val_obj = PyIter_Next(val_iter)))
395 return NULL;
397 if (!PyString_Check(val_obj)) {
398 pytdbpack_bad_type('B', "String", val_obj);
399 goto out;
402 if (PyList_Append(packed_list, val_obj) != -1)
403 new_list = packed_list;
405 out:
406 Py_XDECREF(val_obj);
407 return new_list;
411 static PyObject *pytdbpack_bad_type(char ch,
412 const char *expected,
413 PyObject *val_obj)
415 PyObject *r = PyObject_Repr(val_obj);
416 if (!r)
417 return NULL;
418 PyErr_Format(PyExc_TypeError,
419 "tdbpack: format '%c' requires %s, not %s",
420 ch, expected, PyString_AS_STRING(r));
421 Py_DECREF(r);
422 return val_obj;
427 XXX: glib and Samba have quicker macro for doing the endianness conversions,
428 but I don't know of one in plain libc, and it's probably not a big deal. I
429 realize this is kind of dumb because we'll almost always be on x86, but
430 being safe is important.
432 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
434 pbuf[0] = val_long & 0xff;
435 pbuf[1] = (val_long >> 8) & 0xff;
436 pbuf[2] = (val_long >> 16) & 0xff;
437 pbuf[3] = (val_long >> 24) & 0xff;
441 static void pack_bytes(long len, const char *from,
442 unsigned char **pbuf)
444 memcpy(*pbuf, from, len);
445 (*pbuf) += len;
450 static PyObject *
451 pytdbunpack(PyObject *self,
452 PyObject *args)
454 char *format_str, *packed_str, *ppacked;
455 PyObject *val_list = NULL, *ret_tuple = NULL;
456 PyObject *rest_string = NULL;
457 int format_len, packed_len;
458 char last_format = '#'; /* invalid */
459 int i;
461 /* get arguments */
462 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
463 return NULL;
465 format_len = strlen(format_str);
467 /* Allocate list to hold results. Initially empty, and we append
468 results as we go along. */
469 val_list = PyList_New(0);
470 if (!val_list)
471 goto failed;
472 ret_tuple = PyTuple_New(2);
473 if (!ret_tuple)
474 goto failed;
476 /* For every object, unpack. */
477 for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
478 last_format = format_str[i];
479 /* packed_len is reduced in place */
480 if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
481 goto failed;
484 /* If the last character was '$', keep going until out of space */
485 if (format_str[i] == '$') {
486 if (i == 0) {
487 PyErr_Format(PyExc_ValueError,
488 "%s: '$' may not be first character in format",
489 FUNCTION_MACRO);
490 return NULL;
492 while (packed_len > 0)
493 if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
494 goto failed;
497 /* save leftovers for next time */
498 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
499 if (!rest_string)
500 goto failed;
502 /* return (values, rest) tuple; give up references to them */
503 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
504 val_list = NULL;
505 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
506 val_list = NULL;
507 return ret_tuple;
509 failed:
510 /* handle failure: deallocate anything. XDECREF forms handle NULL
511 pointers for objects that haven't been allocated yet. */
512 Py_XDECREF(val_list);
513 Py_XDECREF(ret_tuple);
514 Py_XDECREF(rest_string);
515 return NULL;
519 static void
520 pytdbunpack_err_too_short(void)
522 PyErr_Format(PyExc_IndexError,
523 "%s: data too short for unpack format", FUNCTION_MACRO);
527 static PyObject *
528 pytdbunpack_uint32(char **pbuf, int *plen)
530 unsigned long v;
531 unsigned char *b;
533 if (*plen < 4) {
534 pytdbunpack_err_too_short();
535 return NULL;
538 b = *pbuf;
539 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
541 (*pbuf) += 4;
542 (*plen) -= 4;
544 return PyLong_FromUnsignedLong(v);
548 static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
550 long v;
551 unsigned char *b;
553 if (*plen < 2) {
554 pytdbunpack_err_too_short();
555 return NULL;
558 b = *pbuf;
559 v = b[0] | b[1]<<8;
561 (*pbuf) += 2;
562 (*plen) -= 2;
564 return PyInt_FromLong(v);
568 static PyObject *
569 pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
571 int len;
572 char *nul_ptr, *start;
574 start = *pbuf;
576 nul_ptr = memchr(start, '\0', *plen);
577 if (!nul_ptr) {
578 pytdbunpack_err_too_short();
579 return NULL;
582 len = nul_ptr - start;
584 *pbuf += len + 1; /* skip \0 */
585 *plen -= len + 1;
587 return PyString_Decode(start, len, encoding, NULL);
591 static PyObject *
592 pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
594 /* first get 32-bit len */
595 long slen;
596 unsigned char *b;
597 unsigned char *start;
598 PyObject *str_obj = NULL, *len_obj = NULL;
600 if (*plen < 4) {
601 pytdbunpack_err_too_short();
602 return NULL;
605 b = *pbuf;
606 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
608 if (slen < 0) { /* surely you jest */
609 PyErr_Format(PyExc_ValueError,
610 "%s: buffer seems to have negative length", FUNCTION_MACRO);
611 return NULL;
614 (*pbuf) += 4;
615 (*plen) -= 4;
616 start = *pbuf;
618 if (*plen < slen) {
619 PyErr_Format(PyExc_IndexError,
620 "%s: not enough data to unpack buffer: "
621 "need %d bytes, have %d", FUNCTION_MACRO,
622 (int) slen, *plen);
623 return NULL;
626 (*pbuf) += slen;
627 (*plen) -= slen;
629 if (!(len_obj = PyInt_FromLong(slen)))
630 goto failed;
632 if (PyList_Append(val_list, len_obj) == -1)
633 goto failed;
635 if (!(str_obj = PyString_FromStringAndSize(start, slen)))
636 goto failed;
638 if (PyList_Append(val_list, str_obj) == -1)
639 goto failed;
641 return val_list;
643 failed:
644 Py_XDECREF(len_obj); /* handles NULL */
645 Py_XDECREF(str_obj);
646 return NULL;
650 /* Unpack a single field from packed data, according to format character CH.
651 Remaining data is at *PBUF, of *PLEN.
653 *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
654 been consumed.
656 Returns a reference to None, or NULL for failure.
658 static PyObject *pytdbunpack_item(char ch,
659 char **pbuf,
660 int *plen,
661 PyObject *val_list)
663 PyObject *unpacked;
665 if (ch == 'w') { /* 16-bit int */
666 unpacked = pytdbunpack_int16(pbuf, plen);
668 else if (ch == 'd' || ch == 'p') { /* 32-bit int */
669 /* pointers can just come through as integers */
670 unpacked = pytdbunpack_uint32(pbuf, plen);
672 else if (ch == 'f' || ch == 'P') { /* nul-term string */
673 unpacked = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
675 else if (ch == 'B') { /* length, buffer */
676 return pytdbunpack_buffer(pbuf, plen, val_list);
678 else {
679 PyErr_Format(PyExc_ValueError,
680 "%s: format character '%c' is not supported",
681 FUNCTION_MACRO, ch);
683 return NULL;
686 /* otherwise OK */
687 if (!unpacked)
688 return NULL;
690 if (PyList_Append(val_list, unpacked) == -1)
691 val_list = NULL;
693 /* PyList_Append takes a new reference to the inserted object.
694 Therefore, we no longer need the original reference. */
695 Py_DECREF(unpacked);
697 return val_list;
705 static PyMethodDef pytdbpack_methods[] = {
706 { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
707 { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
710 DL_EXPORT(void)
711 inittdbpack(void)
713 Py_InitModule3("tdbpack", pytdbpack_methods,
714 (char *) pytdbpack_docstring);