Modules/_struct.c

   1 /* struct module -- pack values into and (out of) bytes objects */
   2
   3 /* New version supporting byte order, alignment and size options,
   4    character strings, and unsigned numbers */
   5
   6 #define PY_SSIZE_T_CLEAN
   7
   8 #include "Python.h"
   9 #include "structseq.h"
  10 #include "structmember.h"
  11 #include <ctype.h>
  12
  13 static PyTypeObject PyStructType;
  14
  15 /* The translation function for each format character is table driven */
  16 typedef struct _formatdef {
  17     char format;
  18     Py_ssize_t size;
  19     Py_ssize_t alignment;
  20     PyObject* (*unpack)(const char *,
  21                         const struct _formatdef *);
  22     int (*pack)(char *, PyObject *,
  23                 const struct _formatdef *);
  24 } formatdef;
  25
  26 typedef struct _formatcode {
  27     const struct _formatdef *fmtdef;
  28     Py_ssize_t offset;
  29     Py_ssize_t size;
  30 } formatcode;
  31
  32 /* Struct object interface */
  33
  34 typedef struct {
  35     PyObject_HEAD
  36     Py_ssize_t s_size;
  37     Py_ssize_t s_len;
  38     formatcode *s_codes;
  39     PyObject *s_format;
  40     PyObject *weakreflist; /* List of weak references */
  41 } PyStructObject;
  42
  43
  44 #define PyStruct_Check(op) PyObject_TypeCheck(op, &PyStructType)
  45 #define PyStruct_CheckExact(op) (Py_TYPE(op) == &PyStructType)
  46
  47
  48 /* Exception */
  49
  50 static PyObject *StructError;
  51
  52
  53 /* Define various structs to figure out the alignments of types */
  54
  55
  56 typedef struct { char c; short x; } st_short;
  57 typedef struct { char c; int x; } st_int;
  58 typedef struct { char c; long x; } st_long;
  59 typedef struct { char c; float x; } st_float;
  60 typedef struct { char c; double x; } st_double;
  61 typedef struct { char c; void *x; } st_void_p;
  62
  63 #define SHORT_ALIGN (sizeof(st_short) - sizeof(short))
  64 #define INT_ALIGN (sizeof(st_int) - sizeof(int))
  65 #define LONG_ALIGN (sizeof(st_long) - sizeof(long))
  66 #define FLOAT_ALIGN (sizeof(st_float) - sizeof(float))
  67 #define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double))
  68 #define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *))
  69
  70 /* We can't support q and Q in native mode unless the compiler does;
  71    in std mode, they're 8 bytes on all platforms. */
  72 #ifdef HAVE_LONG_LONG
  73 typedef struct { char c; PY_LONG_LONG x; } s_long_long;
  74 #define LONG_LONG_ALIGN (sizeof(s_long_long) - sizeof(PY_LONG_LONG))
  75 #endif
  76
  77 #ifdef HAVE_C99_BOOL
  78 #define BOOL_TYPE _Bool
  79 typedef struct { char c; _Bool x; } s_bool;
  80 #define BOOL_ALIGN (sizeof(s_bool) - sizeof(BOOL_TYPE))
  81 #else
  82 #define BOOL_TYPE char
  83 #define BOOL_ALIGN 0
  84 #endif
  85
  86 #define STRINGIFY(x)    #x
  87
  88 #ifdef __powerc
  89 #pragma options align=reset
  90 #endif
  91
  92 /* Helper to get a PyLongObject.  Caller should decref. */
  93
  94 static PyObject *
  95 get_pylong(PyObject *v)
  96 {
  97     assert(v != NULL);
  98     if (!PyLong_Check(v)) {
  99         PyErr_SetString(StructError,
 100                         "required argument is not an integer");
 101         return NULL;
 102     }
 103
 104     Py_INCREF(v);
 105     return v;
 106 }
 107
 108 /* Helper routine to get a C long and raise the appropriate error if it isn't
 109    one */
 110
 111 static int
 112 get_long(PyObject *v, long *p)
 113 {
 114     long x;
 115
 116     if (!PyLong_Check(v)) {
 117         PyErr_SetString(StructError,
 118                         "required argument is not an integer");
 119         return -1;
 120     }
 121     x = PyLong_AsLong(v);
 122     if (x == -1 && PyErr_Occurred()) {
 123         if (PyErr_ExceptionMatches(PyExc_OverflowError))
 124             PyErr_SetString(StructError,
 125                             "argument out of range");
 126         return -1;
 127     }
 128     *p = x;
 129     return 0;
 130 }
 131
 132
 133 /* Same, but handling unsigned long */
 134
 135 #ifndef PY_STRUCT_OVERFLOW_MASKING
 136 static int
 137 get_ulong(PyObject *v, unsigned long *p)
 138 {
 139     unsigned long x;
 140
 141     if (!PyLong_Check(v)) {
 142         PyErr_SetString(StructError,
 143                         "required argument is not an integer");
 144         return -1;
 145     }
 146     x = PyLong_AsUnsignedLong(v);
 147     if (x == (unsigned long)-1 && PyErr_Occurred()) {
 148         if (PyErr_ExceptionMatches(PyExc_OverflowError))
 149             PyErr_SetString(StructError,
 150                             "argument out of range");
 151         return -1;
 152     }
 153     *p = x;
 154     return 0;
 155 }
 156 #endif  /* PY_STRUCT_OVERFLOW_MASKING */
 157
 158 #ifdef HAVE_LONG_LONG
 159
 160 /* Same, but handling native long long. */
 161
 162 static int
 163 get_longlong(PyObject *v, PY_LONG_LONG *p)
 164 {
 165     PY_LONG_LONG x;
 166     if (!PyLong_Check(v)) {
 167         PyErr_SetString(StructError,
 168                         "required argument is not an integer");
 169         return -1;
 170     }
 171     x = PyLong_AsLongLong(v);
 172     if (x == -1 && PyErr_Occurred()) {
 173         if (PyErr_ExceptionMatches(PyExc_OverflowError))
 174             PyErr_SetString(StructError,
 175                             "argument out of range");
 176         return -1;
 177     }
 178     *p = x;
 179     return 0;
 180 }
 181
 182 /* Same, but handling native unsigned long long. */
 183
 184 static int
 185 get_ulonglong(PyObject *v, unsigned PY_LONG_LONG *p)
 186 {
 187     unsigned PY_LONG_LONG x;
 188     if (!PyLong_Check(v)) {
 189         PyErr_SetString(StructError,
 190                         "required argument is not an integer");
 191         return -1;
 192     }
 193     x = PyLong_AsUnsignedLongLong(v);
 194     if (x == -1 && PyErr_Occurred()) {
 195         if (PyErr_ExceptionMatches(PyExc_OverflowError))
 196             PyErr_SetString(StructError,
 197                             "argument out of range");
 198         return -1;
 199     }
 200     *p = x;
 201     return 0;
 202 }
 203
 204 #endif
 205
 206
 207 #define RANGE_ERROR(x, f, flag, mask) return _range_error(f, flag)
 208
 209
 210 /* Floating point helpers */
 211
 212 static PyObject *
 213 unpack_float(const char *p,  /* start of 4-byte string */
 214          int le)             /* true for little-endian, false for big-endian */
 215 {
 216     double x;
 217
 218     x = _PyFloat_Unpack4((unsigned char *)p, le);
 219     if (x == -1.0 && PyErr_Occurred())
 220         return NULL;
 221     return PyFloat_FromDouble(x);
 222 }
 223
 224 static PyObject *
 225 unpack_double(const char *p,  /* start of 8-byte string */
 226           int le)         /* true for little-endian, false for big-endian */
 227 {
 228     double x;
 229
 230     x = _PyFloat_Unpack8((unsigned char *)p, le);
 231     if (x == -1.0 && PyErr_Occurred())
 232         return NULL;
 233     return PyFloat_FromDouble(x);
 234 }
 235
 236 /* Helper to format the range error exceptions */
 237 static int
 238 _range_error(const formatdef *f, int is_unsigned)
 239 {
 240     /* ulargest is the largest unsigned value with f->size bytes.
 241      * Note that the simpler:
 242      *     ((size_t)1 << (f->size * 8)) - 1
 243      * doesn't work when f->size == sizeof(size_t) because C doesn't
 244      * define what happens when a left shift count is >= the number of
 245      * bits in the integer being shifted; e.g., on some boxes it doesn't
 246      * shift at all when they're equal.
 247      */
 248     const size_t ulargest = (size_t)-1 >> ((SIZEOF_SIZE_T - f->size)*8);
 249     assert(f->size >= 1 && f->size <= SIZEOF_SIZE_T);
 250     if (is_unsigned)
 251         PyErr_Format(StructError,
 252             "'%c' format requires 0 <= number <= %zu",
 253             f->format,
 254             ulargest);
 255     else {
 256         const Py_ssize_t largest = (Py_ssize_t)(ulargest >> 1);
 257         PyErr_Format(StructError,
 258             "'%c' format requires %zd <= number <= %zd",
 259             f->format,
 260             ~ largest,
 261             largest);
 262     }
 263
 264     return -1;
 265 }
 266
 267
 268
 269 /* A large number of small routines follow, with names of the form
 270
 271    [bln][up]_TYPE
 272
 273    [bln] distiguishes among big-endian, little-endian and native.
 274    [pu] distiguishes between pack (to struct) and unpack (from struct).
 275    TYPE is one of char, byte, ubyte, etc.
 276 */
 277
 278 /* Native mode routines. ****************************************************/
 279 /* NOTE:
 280    In all n[up]_<type> routines handling types larger than 1 byte, there is
 281    *no* guarantee that the p pointer is properly aligned for each type,
 282    therefore memcpy is called.  An intermediate variable is used to
 283    compensate for big-endian architectures.
 284    Normally both the intermediate variable and the memcpy call will be
 285    skipped by C optimisation in little-endian architectures (gcc >= 2.91
 286    does this). */
 287
 288 static PyObject *
 289 nu_char(const char *p, const formatdef *f)
 290 {
 291     return PyBytes_FromStringAndSize(p, 1);
 292 }
 293
 294 static PyObject *
 295 nu_byte(const char *p, const formatdef *f)
 296 {
 297     return PyLong_FromLong((long) *(signed char *)p);
 298 }
 299
 300 static PyObject *
 301 nu_ubyte(const char *p, const formatdef *f)
 302 {
 303     return PyLong_FromLong((long) *(unsigned char *)p);
 304 }
 305
 306 static PyObject *
 307 nu_short(const char *p, const formatdef *f)
 308 {
 309     short x;
 310     memcpy((char *)&x, p, sizeof x);
 311     return PyLong_FromLong((long)x);
 312 }
 313
 314 static PyObject *
 315 nu_ushort(const char *p, const formatdef *f)
 316 {
 317     unsigned short x;
 318     memcpy((char *)&x, p, sizeof x);
 319     return PyLong_FromLong((long)x);
 320 }
 321
 322 static PyObject *
 323 nu_int(const char *p, const formatdef *f)
 324 {
 325     int x;
 326     memcpy((char *)&x, p, sizeof x);
 327     return PyLong_FromLong((long)x);
 328 }
 329
 330 static PyObject *
 331 nu_uint(const char *p, const formatdef *f)
 332 {
 333     unsigned int x;
 334     memcpy((char *)&x, p, sizeof x);
 335 #if (SIZEOF_LONG > SIZEOF_INT)
 336     return PyLong_FromLong((long)x);
 337 #else
 338     if (x <= ((unsigned int)LONG_MAX))
 339         return PyLong_FromLong((long)x);
 340     return PyLong_FromUnsignedLong((unsigned long)x);
 341 #endif
 342 }
 343
 344 static PyObject *
 345 nu_long(const char *p, const formatdef *f)
 346 {
 347     long x;
 348     memcpy((char *)&x, p, sizeof x);
 349     return PyLong_FromLong(x);
 350 }
 351
 352 static PyObject *
 353 nu_ulong(const char *p, const formatdef *f)
 354 {
 355     unsigned long x;
 356     memcpy((char *)&x, p, sizeof x);
 357     if (x <= LONG_MAX)
 358         return PyLong_FromLong((long)x);
 359     return PyLong_FromUnsignedLong(x);
 360 }
 361
 362 /* Native mode doesn't support q or Q unless the platform C supports
 363    long long (or, on Windows, __int64). */
 364
 365 #ifdef HAVE_LONG_LONG
 366
 367 static PyObject *
 368 nu_longlong(const char *p, const formatdef *f)
 369 {
 370     PY_LONG_LONG x;
 371     memcpy((char *)&x, p, sizeof x);
 372     if (x >= LONG_MIN && x <= LONG_MAX)
 373         return PyLong_FromLong(Py_SAFE_DOWNCAST(x, PY_LONG_LONG, long));
 374     return PyLong_FromLongLong(x);
 375 }
 376
 377 static PyObject *
 378 nu_ulonglong(const char *p, const formatdef *f)
 379 {
 380     unsigned PY_LONG_LONG x;
 381     memcpy((char *)&x, p, sizeof x);
 382     if (x <= LONG_MAX)
 383         return PyLong_FromLong(Py_SAFE_DOWNCAST(x, unsigned PY_LONG_LONG, long));
 384     return PyLong_FromUnsignedLongLong(x);
 385 }
 386
 387 #endif
 388
 389 static PyObject *
 390 nu_bool(const char *p, const formatdef *f)
 391 {
 392     BOOL_TYPE x;
 393     memcpy((char *)&x, p, sizeof x);
 394     return PyBool_FromLong(x != 0);
 395 }
 396
 397
 398 static PyObject *
 399 nu_float(const char *p, const formatdef *f)
 400 {
 401     float x;
 402     memcpy((char *)&x, p, sizeof x);
 403     return PyFloat_FromDouble((double)x);
 404 }
 405
 406 static PyObject *
 407 nu_double(const char *p, const formatdef *f)
 408 {
 409     double x;
 410     memcpy((char *)&x, p, sizeof x);
 411     return PyFloat_FromDouble(x);
 412 }
 413
 414 static PyObject *
 415 nu_void_p(const char *p, const formatdef *f)
 416 {
 417     void *x;
 418     memcpy((char *)&x, p, sizeof x);
 419     return PyLong_FromVoidPtr(x);
 420 }
 421
 422 static int
 423 np_byte(char *p, PyObject *v, const formatdef *f)
 424 {
 425     long x;
 426     if (get_long(v, &x) < 0)
 427         return -1;
 428     if (x < -128 || x > 127){
 429         PyErr_SetString(StructError,
 430                         "byte format requires -128 <= number <= 127");
 431         return -1;
 432     }
 433     *p = (char)x;
 434     return 0;
 435 }
 436
 437 static int
 438 np_ubyte(char *p, PyObject *v, const formatdef *f)
 439 {
 440     long x;
 441     if (get_long(v, &x) < 0)
 442         return -1;
 443     if (x < 0 || x > 255){
 444         PyErr_SetString(StructError,
 445                         "ubyte format requires 0 <= number <= 255");
 446         return -1;
 447     }
 448     *p = (char)x;
 449     return 0;
 450 }
 451
 452 static int
 453 np_char(char *p, PyObject *v, const formatdef *f)
 454 {
 455     if (PyUnicode_Check(v)) {
 456         v = _PyUnicode_AsDefaultEncodedString(v, NULL);
 457         if (v == NULL)
 458             return -1;
 459     }
 460     if (!PyBytes_Check(v) || PyBytes_Size(v) != 1) {
 461         PyErr_SetString(StructError,
 462                         "char format requires bytes or string of length 1");
 463         return -1;
 464     }
 465     *p = *PyBytes_AsString(v);
 466     return 0;
 467 }
 468
 469 static int
 470 np_short(char *p, PyObject *v, const formatdef *f)
 471 {
 472     long x;
 473     short y;
 474     if (get_long(v, &x) < 0)
 475         return -1;
 476     if (x < SHRT_MIN || x > SHRT_MAX){
 477         PyErr_SetString(StructError,
 478                         "short format requires " STRINGIFY(SHRT_MIN)
 479                         " <= number <= " STRINGIFY(SHRT_MAX));
 480         return -1;
 481     }
 482     y = (short)x;
 483     memcpy(p, (char *)&y, sizeof y);
 484     return 0;
 485 }
 486
 487 static int
 488 np_ushort(char *p, PyObject *v, const formatdef *f)
 489 {
 490     long x;
 491     unsigned short y;
 492     if (get_long(v, &x) < 0)
 493         return -1;
 494     if (x < 0 || x > USHRT_MAX){
 495         PyErr_SetString(StructError,
 496                         "ushort format requires 0 <= number <= " STRINGIFY(USHRT_MAX));
 497         return -1;
 498     }
 499     y = (unsigned short)x;
 500     memcpy(p, (char *)&y, sizeof y);
 501     return 0;
 502 }
 503
 504 static int
 505 np_int(char *p, PyObject *v, const formatdef *f)
 506 {
 507     long x;
 508     int y;
 509     if (get_long(v, &x) < 0)
 510         return -1;
 511 #if (SIZEOF_LONG > SIZEOF_INT)
 512     if ((x < ((long)INT_MIN)) || (x > ((long)INT_MAX)))
 513         RANGE_ERROR(x, f, 0, -1);
 514 #endif
 515     y = (int)x;
 516     memcpy(p, (char *)&y, sizeof y);
 517     return 0;
 518 }
 519
 520 static int
 521 np_uint(char *p, PyObject *v, const formatdef *f)
 522 {
 523     unsigned long x;
 524     unsigned int y;
 525     if (get_ulong(v, &x) < 0)
 526         return -1;
 527     y = (unsigned int)x;
 528 #if (SIZEOF_LONG > SIZEOF_INT)
 529     if (x > ((unsigned long)UINT_MAX))
 530         RANGE_ERROR(y, f, 1, -1);
 531 #endif
 532     memcpy(p, (char *)&y, sizeof y);
 533     return 0;
 534 }
 535
 536 static int
 537 np_long(char *p, PyObject *v, const formatdef *f)
 538 {
 539     long x;
 540     if (get_long(v, &x) < 0)
 541         return -1;
 542     memcpy(p, (char *)&x, sizeof x);
 543     return 0;
 544 }
 545
 546 static int
 547 np_ulong(char *p, PyObject *v, const formatdef *f)
 548 {
 549     unsigned long x;
 550     if (get_ulong(v, &x) < 0)
 551         return -1;
 552     memcpy(p, (char *)&x, sizeof x);
 553     return 0;
 554 }
 555
 556 #ifdef HAVE_LONG_LONG
 557
 558 static int
 559 np_longlong(char *p, PyObject *v, const formatdef *f)
 560 {
 561     PY_LONG_LONG x;
 562     if (get_longlong(v, &x) < 0)
 563         return -1;
 564     memcpy(p, (char *)&x, sizeof x);
 565     return 0;
 566 }
 567
 568 static int
 569 np_ulonglong(char *p, PyObject *v, const formatdef *f)
 570 {
 571     unsigned PY_LONG_LONG x;
 572     if (get_ulonglong(v, &x) < 0)
 573         return -1;
 574     memcpy(p, (char *)&x, sizeof x);
 575     return 0;
 576 }
 577 #endif
 578
 579
 580 static int
 581 np_bool(char *p, PyObject *v, const formatdef *f)
 582 {
 583     BOOL_TYPE y;
 584     y = PyObject_IsTrue(v);
 585     memcpy(p, (char *)&y, sizeof y);
 586     return 0;
 587 }
 588
 589 static int
 590 np_float(char *p, PyObject *v, const formatdef *f)
 591 {
 592     float x = (float)PyFloat_AsDouble(v);
 593     if (x == -1 && PyErr_Occurred()) {
 594         PyErr_SetString(StructError,
 595                         "required argument is not a float");
 596         return -1;
 597     }
 598     memcpy(p, (char *)&x, sizeof x);
 599     return 0;
 600 }
 601
 602 static int
 603 np_double(char *p, PyObject *v, const formatdef *f)
 604 {
 605     double x = PyFloat_AsDouble(v);
 606     if (x == -1 && PyErr_Occurred()) {
 607         PyErr_SetString(StructError,
 608                         "required argument is not a float");
 609         return -1;
 610     }
 611     memcpy(p, (char *)&x, sizeof(double));
 612     return 0;
 613 }
 614
 615 static int
 616 np_void_p(char *p, PyObject *v, const formatdef *f)
 617 {
 618     void *x;
 619
 620     v = get_pylong(v);
 621     if (v == NULL)
 622         return -1;
 623     assert(PyLong_Check(v));
 624     x = PyLong_AsVoidPtr(v);
 625     Py_DECREF(v);
 626     if (x == NULL && PyErr_Occurred())
 627         return -1;
 628     memcpy(p, (char *)&x, sizeof x);
 629     return 0;
 630 }
 631
 632 static formatdef native_table[] = {
 633     {'x',       sizeof(char),   0,              NULL},
 634     {'b',       sizeof(char),   0,              nu_byte,        np_byte},
 635     {'B',       sizeof(char),   0,              nu_ubyte,       np_ubyte},
 636     {'c',       sizeof(char),   0,              nu_char,        np_char},
 637     {'s',       sizeof(char),   0,              NULL},
 638     {'p',       sizeof(char),   0,              NULL},
 639     {'h',       sizeof(short),  SHORT_ALIGN,    nu_short,       np_short},
 640     {'H',       sizeof(short),  SHORT_ALIGN,    nu_ushort,      np_ushort},
 641     {'i',       sizeof(int),    INT_ALIGN,      nu_int,         np_int},
 642     {'I',       sizeof(int),    INT_ALIGN,      nu_uint,        np_uint},
 643     {'l',       sizeof(long),   LONG_ALIGN,     nu_long,        np_long},
 644     {'L',       sizeof(long),   LONG_ALIGN,     nu_ulong,       np_ulong},
 645 #ifdef HAVE_LONG_LONG
 646     {'q',       sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_longlong, np_longlong},
 647     {'Q',       sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_ulonglong,np_ulonglong},
 648 #endif
 649     {'?',       sizeof(BOOL_TYPE),      BOOL_ALIGN,     nu_bool,        np_bool},
 650     {'f',       sizeof(float),  FLOAT_ALIGN,    nu_float,       np_float},
 651     {'d',       sizeof(double), DOUBLE_ALIGN,   nu_double,      np_double},
 652     {'P',       sizeof(void *), VOID_P_ALIGN,   nu_void_p,      np_void_p},
 653     {0}
 654 };
 655
 656 /* Big-endian routines. *****************************************************/
 657
 658 static PyObject *
 659 bu_int(const char *p, const formatdef *f)
 660 {
 661     long x = 0;
 662     Py_ssize_t i = f->size;
 663     const unsigned char *bytes = (const unsigned char *)p;
 664     do {
 665         x = (x<<8) | *bytes++;
 666     } while (--i > 0);
 667     /* Extend the sign bit. */
 668     if (SIZEOF_LONG > f->size)
 669         x |= -(x & (1L << ((8 * f->size) - 1)));
 670     return PyLong_FromLong(x);
 671 }
 672
 673 static PyObject *
 674 bu_uint(const char *p, const formatdef *f)
 675 {
 676     unsigned long x = 0;
 677     Py_ssize_t i = f->size;
 678     const unsigned char *bytes = (const unsigned char *)p;
 679     do {
 680         x = (x<<8) | *bytes++;
 681     } while (--i > 0);
 682     if (x <= LONG_MAX)
 683         return PyLong_FromLong((long)x);
 684     return PyLong_FromUnsignedLong(x);
 685 }
 686
 687 static PyObject *
 688 bu_longlong(const char *p, const formatdef *f)
 689 {
 690 #ifdef HAVE_LONG_LONG
 691     PY_LONG_LONG x = 0;
 692     Py_ssize_t i = f->size;
 693     const unsigned char *bytes = (const unsigned char *)p;
 694     do {
 695         x = (x<<8) | *bytes++;
 696     } while (--i > 0);
 697     /* Extend the sign bit. */
 698     if (SIZEOF_LONG_LONG > f->size)
 699         x |= -(x & ((PY_LONG_LONG)1 << ((8 * f->size) - 1)));
 700     if (x >= LONG_MIN && x <= LONG_MAX)
 701         return PyLong_FromLong(Py_SAFE_DOWNCAST(x, PY_LONG_LONG, long));
 702     return PyLong_FromLongLong(x);
 703 #else
 704     return _PyLong_FromByteArray((const unsigned char *)p,
 705                                   8,
 706                                   0, /* little-endian */
 707                       1  /* signed */);
 708 #endif
 709 }
 710
 711 static PyObject *
 712 bu_ulonglong(const char *p, const formatdef *f)
 713 {
 714 #ifdef HAVE_LONG_LONG
 715     unsigned PY_LONG_LONG x = 0;
 716     Py_ssize_t i = f->size;
 717     const unsigned char *bytes = (const unsigned char *)p;
 718     do {
 719         x = (x<<8) | *bytes++;
 720     } while (--i > 0);
 721     if (x <= LONG_MAX)
 722         return PyLong_FromLong(Py_SAFE_DOWNCAST(x, unsigned PY_LONG_LONG, long));
 723     return PyLong_FromUnsignedLongLong(x);
 724 #else
 725     return _PyLong_FromByteArray((const unsigned char *)p,
 726                                   8,
 727                                   0, /* little-endian */
 728                       0  /* signed */);
 729 #endif
 730 }
 731
 732 static PyObject *
 733 bu_float(const char *p, const formatdef *f)
 734 {
 735     return unpack_float(p, 0);
 736 }
 737
 738 static PyObject *
 739 bu_double(const char *p, const formatdef *f)
 740 {
 741     return unpack_double(p, 0);
 742 }
 743
 744 static PyObject *
 745 bu_bool(const char *p, const formatdef *f)
 746 {
 747     char x;
 748     memcpy((char *)&x, p, sizeof x);
 749     return PyBool_FromLong(x != 0);
 750 }
 751
 752 static int
 753 bp_int(char *p, PyObject *v, const formatdef *f)
 754 {
 755     long x;
 756     Py_ssize_t i;
 757     if (get_long(v, &x) < 0)
 758         return -1;
 759     i = f->size;
 760     if (i != SIZEOF_LONG) {
 761         if ((i == 2) && (x < -32768 || x > 32767))
 762             RANGE_ERROR(x, f, 0, 0xffffL);
 763 #if (SIZEOF_LONG != 4)
 764         else if ((i == 4) && (x < -2147483648L || x > 2147483647L))
 765             RANGE_ERROR(x, f, 0, 0xffffffffL);
 766 #endif
 767     }
 768     do {
 769         p[--i] = (char)x;
 770         x >>= 8;
 771     } while (i > 0);
 772     return 0;
 773 }
 774
 775 static int
 776 bp_uint(char *p, PyObject *v, const formatdef *f)
 777 {
 778     unsigned long x;
 779     Py_ssize_t i;
 780     if (get_ulong(v, &x) < 0)
 781         return -1;
 782     i = f->size;
 783     if (i != SIZEOF_LONG) {
 784         unsigned long maxint = 1;
 785         maxint <<= (unsigned long)(i * 8);
 786         if (x >= maxint)
 787             RANGE_ERROR(x, f, 1, maxint - 1);
 788     }
 789     do {
 790         p[--i] = (char)x;
 791         x >>= 8;
 792     } while (i > 0);
 793     return 0;
 794 }
 795
 796 static int
 797 bp_longlong(char *p, PyObject *v, const formatdef *f)
 798 {
 799     int res;
 800     v = get_pylong(v);
 801     if (v == NULL)
 802         return -1;
 803     res = _PyLong_AsByteArray((PyLongObject *)v,
 804                               (unsigned char *)p,
 805                               8,
 806                               0, /* little_endian */
 807                   1  /* signed */);
 808     Py_DECREF(v);
 809     return res;
 810 }
 811
 812 static int
 813 bp_ulonglong(char *p, PyObject *v, const formatdef *f)
 814 {
 815     int res;
 816     v = get_pylong(v);
 817     if (v == NULL)
 818         return -1;
 819     res = _PyLong_AsByteArray((PyLongObject *)v,
 820                               (unsigned char *)p,
 821                               8,
 822                               0, /* little_endian */
 823                   0  /* signed */);
 824     Py_DECREF(v);
 825     return res;
 826 }
 827
 828 static int
 829 bp_float(char *p, PyObject *v, const formatdef *f)
 830 {
 831     double x = PyFloat_AsDouble(v);
 832     if (x == -1 && PyErr_Occurred()) {
 833         PyErr_SetString(StructError,
 834                         "required argument is not a float");
 835         return -1;
 836     }
 837     return _PyFloat_Pack4(x, (unsigned char *)p, 0);
 838 }
 839
 840 static int
 841 bp_double(char *p, PyObject *v, const formatdef *f)
 842 {
 843     double x = PyFloat_AsDouble(v);
 844     if (x == -1 && PyErr_Occurred()) {
 845         PyErr_SetString(StructError,
 846                         "required argument is not a float");
 847         return -1;
 848     }
 849     return _PyFloat_Pack8(x, (unsigned char *)p, 0);
 850 }
 851
 852 static int
 853 bp_bool(char *p, PyObject *v, const formatdef *f)
 854 {
 855     char y;
 856     y = PyObject_IsTrue(v);
 857     memcpy(p, (char *)&y, sizeof y);
 858     return 0;
 859 }
 860
 861 static formatdef bigendian_table[] = {
 862     {'x',       1,              0,              NULL},
 863     {'b',       1,              0,              nu_byte,        np_byte},
 864     {'B',       1,              0,              nu_ubyte,       np_ubyte},
 865     {'c',       1,              0,              nu_char,        np_char},
 866     {'s',       1,              0,              NULL},
 867     {'p',       1,              0,              NULL},
 868     {'h',       2,              0,              bu_int,         bp_int},
 869     {'H',       2,              0,              bu_uint,        bp_uint},
 870     {'i',       4,              0,              bu_int,         bp_int},
 871     {'I',       4,              0,              bu_uint,        bp_uint},
 872     {'l',       4,              0,              bu_int,         bp_int},
 873     {'L',       4,              0,              bu_uint,        bp_uint},
 874     {'q',       8,              0,              bu_longlong,    bp_longlong},
 875     {'Q',       8,              0,              bu_ulonglong,   bp_ulonglong},
 876     {'?',       1,              0,              bu_bool,        bp_bool},
 877     {'f',       4,              0,              bu_float,       bp_float},
 878     {'d',       8,              0,              bu_double,      bp_double},
 879     {0}
 880 };
 881
 882 /* Little-endian routines. *****************************************************/
 883
 884 static PyObject *
 885 lu_int(const char *p, const formatdef *f)
 886 {
 887     long x = 0;
 888     Py_ssize_t i = f->size;
 889     const unsigned char *bytes = (const unsigned char *)p;
 890     do {
 891         x = (x<<8) | bytes[--i];
 892     } while (i > 0);
 893     /* Extend the sign bit. */
 894     if (SIZEOF_LONG > f->size)
 895         x |= -(x & (1L << ((8 * f->size) - 1)));
 896     return PyLong_FromLong(x);
 897 }
 898
 899 static PyObject *
 900 lu_uint(const char *p, const formatdef *f)
 901 {
 902     unsigned long x = 0;
 903     Py_ssize_t i = f->size;
 904     const unsigned char *bytes = (const unsigned char *)p;
 905     do {
 906         x = (x<<8) | bytes[--i];
 907     } while (i > 0);
 908     if (x <= LONG_MAX)
 909         return PyLong_FromLong((long)x);
 910     return PyLong_FromUnsignedLong((long)x);
 911 }
 912
 913 static PyObject *
 914 lu_longlong(const char *p, const formatdef *f)
 915 {
 916 #ifdef HAVE_LONG_LONG
 917     PY_LONG_LONG x = 0;
 918     Py_ssize_t i = f->size;
 919     const unsigned char *bytes = (const unsigned char *)p;
 920     do {
 921         x = (x<<8) | bytes[--i];
 922     } while (i > 0);
 923     /* Extend the sign bit. */
 924     if (SIZEOF_LONG_LONG > f->size)
 925         x |= -(x & ((PY_LONG_LONG)1 << ((8 * f->size) - 1)));
 926     if (x >= LONG_MIN && x <= LONG_MAX)
 927         return PyLong_FromLong(Py_SAFE_DOWNCAST(x, PY_LONG_LONG, long));
 928     return PyLong_FromLongLong(x);
 929 #else
 930     return _PyLong_FromByteArray((const unsigned char *)p,
 931                                   8,
 932                                   1, /* little-endian */
 933                       1  /* signed */);
 934 #endif
 935 }
 936
 937 static PyObject *
 938 lu_ulonglong(const char *p, const formatdef *f)
 939 {
 940 #ifdef HAVE_LONG_LONG
 941     unsigned PY_LONG_LONG x = 0;
 942     Py_ssize_t i = f->size;
 943     const unsigned char *bytes = (const unsigned char *)p;
 944     do {
 945         x = (x<<8) | bytes[--i];
 946     } while (i > 0);
 947     if (x <= LONG_MAX)
 948         return PyLong_FromLong(Py_SAFE_DOWNCAST(x, unsigned PY_LONG_LONG, long));
 949     return PyLong_FromUnsignedLongLong(x);
 950 #else
 951     return _PyLong_FromByteArray((const unsigned char *)p,
 952                                   8,
 953                                   1, /* little-endian */
 954                       0  /* signed */);
 955 #endif
 956 }
 957
 958 static PyObject *
 959 lu_float(const char *p, const formatdef *f)
 960 {
 961     return unpack_float(p, 1);
 962 }
 963
 964 static PyObject *
 965 lu_double(const char *p, const formatdef *f)
 966 {
 967     return unpack_double(p, 1);
 968 }
 969
 970 static int
 971 lp_int(char *p, PyObject *v, const formatdef *f)
 972 {
 973     long x;
 974     Py_ssize_t i;
 975     if (get_long(v, &x) < 0)
 976         return -1;
 977     i = f->size;
 978     if (i != SIZEOF_LONG) {
 979         if ((i == 2) && (x < -32768 || x > 32767))
 980             RANGE_ERROR(x, f, 0, 0xffffL);
 981 #if (SIZEOF_LONG != 4)
 982         else if ((i == 4) && (x < -2147483648L || x > 2147483647L))
 983             RANGE_ERROR(x, f, 0, 0xffffffffL);
 984 #endif
 985     }
 986     do {
 987         *p++ = (char)x;
 988         x >>= 8;
 989     } while (--i > 0);
 990     return 0;
 991 }
 992
 993 static int
 994 lp_uint(char *p, PyObject *v, const formatdef *f)
 995 {
 996     unsigned long x;
 997     Py_ssize_t i;
 998     if (get_ulong(v, &x) < 0)
 999         return -1;
1000     i = f->size;
1001     if (i != SIZEOF_LONG) {
1002         unsigned long maxint = 1;
1003         maxint <<= (unsigned long)(i * 8);
1004         if (x >= maxint)
1005             RANGE_ERROR(x, f, 1, maxint - 1);
1006     }
1007     do {
1008         *p++ = (char)x;
1009         x >>= 8;
1010     } while (--i > 0);
1011     return 0;
1012 }
1013
1014 static int
1015 lp_longlong(char *p, PyObject *v, const formatdef *f)
1016 {
1017     int res;
1018     v = get_pylong(v);
1019     if (v == NULL)
1020         return -1;
1021     res = _PyLong_AsByteArray((PyLongObject*)v,
1022                               (unsigned char *)p,
1023                               8,
1024                               1, /* little_endian */
1025                   1  /* signed */);
1026     Py_DECREF(v);
1027     return res;
1028 }
1029
1030 static int
1031 lp_ulonglong(char *p, PyObject *v, const formatdef *f)
1032 {
1033     int res;
1034     v = get_pylong(v);
1035     if (v == NULL)
1036         return -1;
1037     res = _PyLong_AsByteArray((PyLongObject*)v,
1038                               (unsigned char *)p,
1039                               8,
1040                               1, /* little_endian */
1041                   0  /* signed */);
1042     Py_DECREF(v);
1043     return res;
1044 }
1045
1046 static int
1047 lp_float(char *p, PyObject *v, const formatdef *f)
1048 {
1049     double x = PyFloat_AsDouble(v);
1050     if (x == -1 && PyErr_Occurred()) {
1051         PyErr_SetString(StructError,
1052                         "required argument is not a float");
1053         return -1;
1054     }
1055     return _PyFloat_Pack4(x, (unsigned char *)p, 1);
1056 }
1057
1058 static int
1059 lp_double(char *p, PyObject *v, const formatdef *f)
1060 {
1061     double x = PyFloat_AsDouble(v);
1062     if (x == -1 && PyErr_Occurred()) {
1063         PyErr_SetString(StructError,
1064                         "required argument is not a float");
1065         return -1;
1066     }
1067     return _PyFloat_Pack8(x, (unsigned char *)p, 1);
1068 }
1069
1070 static formatdef lilendian_table[] = {
1071     {'x',       1,              0,              NULL},
1072     {'b',       1,              0,              nu_byte,        np_byte},
1073     {'B',       1,              0,              nu_ubyte,       np_ubyte},
1074     {'c',       1,              0,              nu_char,        np_char},
1075     {'s',       1,              0,              NULL},
1076     {'p',       1,              0,              NULL},
1077     {'h',       2,              0,              lu_int,         lp_int},
1078     {'H',       2,              0,              lu_uint,        lp_uint},
1079     {'i',       4,              0,              lu_int,         lp_int},
1080     {'I',       4,              0,              lu_uint,        lp_uint},
1081     {'l',       4,              0,              lu_int,         lp_int},
1082     {'L',       4,              0,              lu_uint,        lp_uint},
1083     {'q',       8,              0,              lu_longlong,    lp_longlong},
1084     {'Q',       8,              0,              lu_ulonglong,   lp_ulonglong},
1085     {'?',       1,              0,              bu_bool,        bp_bool}, /* Std rep not endian dep,
1086         but potentially different from native rep -- reuse bx_bool funcs. */
1087     {'f',       4,              0,              lu_float,       lp_float},
1088     {'d',       8,              0,              lu_double,      lp_double},
1089     {0}
1090 };
1091
1092
1093 static const formatdef *
1094 whichtable(char **pfmt)
1095 {
1096     const char *fmt = (*pfmt)++; /* May be backed out of later */
1097     switch (*fmt) {
1098     case '<':
1099         return lilendian_table;
1100     case '>':
1101     case '!': /* Network byte order is big-endian */
1102         return bigendian_table;
1103     case '=': { /* Host byte order -- different from native in aligment! */
1104         int n = 1;
1105         char *p = (char *) &n;
1106         if (*p == 1)
1107             return lilendian_table;
1108         else
1109             return bigendian_table;
1110     }
1111     default:
1112         --*pfmt; /* Back out of pointer increment */
1113         /* Fall through */
1114     case '@':
1115         return native_table;
1116     }
1117 }
1118
1119
1120 /* Get the table entry for a format code */
1121
1122 static const formatdef *
1123 getentry(int c, const formatdef *f)
1124 {
1125     for (; f->format != '\0'; f++) {
1126         if (f->format == c) {
1127             return f;
1128         }
1129     }
1130     PyErr_SetString(StructError, "bad char in struct format");
1131     return NULL;
1132 }
1133
1134
1135 /* Align a size according to a format code.  Return -1 on overflow. */
1136
1137 static Py_ssize_t
1138 align(Py_ssize_t size, char c, const formatdef *e)
1139 {
1140     Py_ssize_t extra;
1141
1142     if (e->format == c) {
1143         if (e->alignment && size > 0) {
1144             extra = (e->alignment - 1) - (size - 1) % (e->alignment);
1145             if (extra > PY_SSIZE_T_MAX - size)
1146                 return -1;
1147             size += extra;
1148         }
1149     }
1150     return size;
1151 }
1152
1153
1154 /* calculate the size of a format string */
1155
1156 static int
1157 prepare_s(PyStructObject *self)
1158 {
1159     const formatdef *f;
1160     const formatdef *e;
1161     formatcode *codes;
1162
1163     const char *s;
1164     const char *fmt;
1165     char c;
1166     Py_ssize_t size, len, num, itemsize;
1167
1168     fmt = PyBytes_AS_STRING(self->s_format);
1169
1170     f = whichtable((char **)&fmt);
1171
1172     s = fmt;
1173     size = 0;
1174     len = 0;
1175     while ((c = *s++) != '\0') {
1176         if (isspace(Py_CHARMASK(c)))
1177             continue;
1178         if ('0' <= c && c <= '9') {
1179             num = c - '0';
1180             while ('0' <= (c = *s++) && c <= '9') {
1181                 /* overflow-safe version of
1182                    if (num*10 + (c - '0') > PY_SSIZE_T_MAX) { ... } */
1183                 if (num >= PY_SSIZE_T_MAX / 10 && (
1184                         num > PY_SSIZE_T_MAX / 10 ||
1185                         (c - '0') > PY_SSIZE_T_MAX % 10))
1186                     goto overflow;
1187                 num = num*10 + (c - '0');
1188             }
1189             if (c == '\0')
1190                 break;
1191         }
1192         else
1193             num = 1;
1194
1195         e = getentry(c, f);
1196         if (e == NULL)
1197             return -1;
1198
1199         switch (c) {
1200             case 's': /* fall through */
1201             case 'p': len++; break;
1202             case 'x': break;
1203             default: len += num; break;
1204         }
1205
1206         itemsize = e->size;
1207         size = align(size, c, e);
1208         if (size == -1)
1209             goto overflow;
1210
1211         /* if (size + num * itemsize > PY_SSIZE_T_MAX) { ... } */
1212         if (num > (PY_SSIZE_T_MAX - size) / itemsize)
1213             goto overflow;
1214         size += num * itemsize;
1215     }
1216
1217     /* check for overflow */
1218     if ((len + 1) > (PY_SSIZE_T_MAX / sizeof(formatcode))) {
1219         PyErr_NoMemory();
1220         return -1;
1221     }
1222
1223     self->s_size = size;
1224     self->s_len = len;
1225     codes = PyMem_MALLOC((len + 1) * sizeof(formatcode));
1226     if (codes == NULL) {
1227         PyErr_NoMemory();
1228         return -1;
1229     }
1230     self->s_codes = codes;
1231
1232     s = fmt;
1233     size = 0;
1234     while ((c = *s++) != '\0') {
1235         if (isspace(Py_CHARMASK(c)))
1236             continue;
1237         if ('0' <= c && c <= '9') {
1238             num = c - '0';
1239             while ('0' <= (c = *s++) && c <= '9')
1240                 num = num*10 + (c - '0');
1241             if (c == '\0')
1242                 break;
1243         }
1244         else
1245             num = 1;
1246
1247         e = getentry(c, f);
1248
1249         size = align(size, c, e);
1250         if (c == 's' || c == 'p') {
1251             codes->offset = size;
1252             codes->size = num;
1253             codes->fmtdef = e;
1254             codes++;
1255             size += num;
1256         } else if (c == 'x') {
1257             size += num;
1258         } else {
1259             while (--num >= 0) {
1260                 codes->offset = size;
1261                 codes->size = e->size;
1262                 codes->fmtdef = e;
1263                 codes++;
1264                 size += e->size;
1265             }
1266         }
1267     }
1268     codes->fmtdef = NULL;
1269     codes->offset = size;
1270     codes->size = 0;
1271
1272     return 0;
1273
1274   overflow:
1275     PyErr_SetString(StructError,
1276                     "total struct size too long");
1277     return -1;
1278 }
1279
1280 static PyObject *
1281 s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1282 {
1283     PyObject *self;
1284
1285     assert(type != NULL && type->tp_alloc != NULL);
1286
1287     self = type->tp_alloc(type, 0);
1288     if (self != NULL) {
1289         PyStructObject *s = (PyStructObject*)self;
1290         Py_INCREF(Py_None);
1291         s->s_format = Py_None;
1292         s->s_codes = NULL;
1293         s->s_size = -1;
1294         s->s_len = -1;
1295     }
1296     return self;
1297 }
1298
1299 static int
1300 s_init(PyObject *self, PyObject *args, PyObject *kwds)
1301 {
1302     PyStructObject *soself = (PyStructObject *)self;
1303     PyObject *o_format = NULL;
1304     int ret = 0;
1305     static char *kwlist[] = {"format", 0};
1306
1307     assert(PyStruct_Check(self));
1308
1309     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:Struct", kwlist,
1310                                      &o_format))
1311         return -1;
1312
1313     if (PyUnicode_Check(o_format)) {
1314         o_format = PyUnicode_AsASCIIString(o_format);
1315         if (o_format == NULL)
1316             return -1;
1317     }
1318     /* XXX support buffer interface, too */
1319     else {
1320         Py_INCREF(o_format);
1321     }
1322
1323     if (!PyBytes_Check(o_format)) {
1324         Py_DECREF(o_format);
1325         PyErr_Format(PyExc_TypeError,
1326                      "Struct() argument 1 must be bytes, not %.200s",
1327                      Py_TYPE(o_format)->tp_name);
1328         return -1;
1329     }
1330
1331     Py_CLEAR(soself->s_format);
1332     soself->s_format = o_format;
1333
1334     ret = prepare_s(soself);
1335     return ret;
1336 }
1337
1338 static void
1339 s_dealloc(PyStructObject *s)
1340 {
1341     if (s->weakreflist != NULL)
1342         PyObject_ClearWeakRefs((PyObject *)s);
1343     if (s->s_codes != NULL) {
1344         PyMem_FREE(s->s_codes);
1345     }
1346     Py_XDECREF(s->s_format);
1347     Py_TYPE(s)->tp_free((PyObject *)s);
1348 }
1349
1350 static PyObject *
1351 s_unpack_internal(PyStructObject *soself, char *startfrom) {
1352     formatcode *code;
1353     Py_ssize_t i = 0;
1354     PyObject *result = PyTuple_New(soself->s_len);
1355     if (result == NULL)
1356         return NULL;
1357
1358     for (code = soself->s_codes; code->fmtdef != NULL; code++) {
1359         PyObject *v;
1360         const formatdef *e = code->fmtdef;
1361         const char *res = startfrom + code->offset;
1362         if (e->format == 's') {
1363             v = PyBytes_FromStringAndSize(res, code->size);
1364         } else if (e->format == 'p') {
1365             Py_ssize_t n = *(unsigned char*)res;
1366             if (n >= code->size)
1367                 n = code->size - 1;
1368             v = PyBytes_FromStringAndSize(res + 1, n);
1369         } else {
1370             v = e->unpack(res, e);
1371         }
1372         if (v == NULL)
1373             goto fail;
1374         PyTuple_SET_ITEM(result, i++, v);
1375     }
1376
1377     return result;
1378 fail:
1379     Py_DECREF(result);
1380     return NULL;
1381 }
1382
1383
1384 PyDoc_STRVAR(s_unpack__doc__,
1385 "S.unpack(buffer) -> (v1, v2, ...)\n\
1386 \n\
1387 Return a tuple containing values unpacked according to the format\n\
1388 string S.format.  Requires len(buffer) == S.size.  See help(struct)\n\
1389 for more on format strings.");
1390
1391 static PyObject *
1392 s_unpack(PyObject *self, PyObject *input)
1393 {
1394     Py_buffer vbuf;
1395     PyObject *result;
1396     PyStructObject *soself = (PyStructObject *)self;
1397
1398     assert(PyStruct_Check(self));
1399     assert(soself->s_codes != NULL);
1400     if (PyObject_GetBuffer(input, &vbuf, PyBUF_SIMPLE) < 0)
1401         return NULL;
1402     if (vbuf.len != soself->s_size) {
1403         PyErr_Format(StructError,
1404                      "unpack requires a bytes argument of length %zd",
1405                      soself->s_size);
1406         PyBuffer_Release(&vbuf);
1407         return NULL;
1408     }
1409     result = s_unpack_internal(soself, vbuf.buf);
1410     PyBuffer_Release(&vbuf);
1411     return result;
1412 }
1413
1414 PyDoc_STRVAR(s_unpack_from__doc__,
1415 "S.unpack_from(buffer, offset=0) -> (v1, v2, ...)\n\
1416 \n\
1417 Return a tuple containing values unpacked according to the format\n\
1418 string S.format.  Requires len(buffer[offset:]) >= S.size.  See\n\
1419 help(struct) for more on format strings.");
1420
1421 static PyObject *
1422 s_unpack_from(PyObject *self, PyObject *args, PyObject *kwds)
1423 {
1424     static char *kwlist[] = {"buffer", "offset", 0};
1425
1426     PyObject *input;
1427     Py_ssize_t offset = 0;
1428     Py_buffer vbuf;
1429     PyObject *result;
1430     PyStructObject *soself = (PyStructObject *)self;
1431
1432     assert(PyStruct_Check(self));
1433     assert(soself->s_codes != NULL);
1434
1435     if (!PyArg_ParseTupleAndKeywords(args, kwds,
1436                                      "O|n:unpack_from", kwlist,
1437                                      &input, &offset))
1438         return NULL;
1439     if (PyObject_GetBuffer(input, &vbuf, PyBUF_SIMPLE) < 0)
1440         return NULL;
1441     if (offset < 0)
1442         offset += vbuf.len;
1443     if (offset < 0 || vbuf.len - offset < soself->s_size) {
1444         PyErr_Format(StructError,
1445             "unpack_from requires a buffer of at least %zd bytes",
1446             soself->s_size);
1447         PyBuffer_Release(&vbuf);
1448         return NULL;
1449     }
1450     result = s_unpack_internal(soself, (char*)vbuf.buf + offset);
1451     PyBuffer_Release(&vbuf);
1452     return result;
1453 }
1454
1455
1456 /*
1457  * Guts of the pack function.
1458  *
1459  * Takes a struct object, a tuple of arguments, and offset in that tuple of
1460  * argument for where to start processing the arguments for packing, and a
1461  * character buffer for writing the packed string.  The caller must insure
1462  * that the buffer may contain the required length for packing the arguments.
1463  * 0 is returned on success, 1 is returned if there is an error.
1464  *
1465  */
1466 static int
1467 s_pack_internal(PyStructObject *soself, PyObject *args, int offset, char* buf)
1468 {
1469     formatcode *code;
1470     /* XXX(nnorwitz): why does i need to be a local?  can we use
1471        the offset parameter or do we need the wider width? */
1472     Py_ssize_t i;
1473
1474     memset(buf, '\0', soself->s_size);
1475     i = offset;
1476     for (code = soself->s_codes; code->fmtdef != NULL; code++) {
1477         Py_ssize_t n;
1478         PyObject *v = PyTuple_GET_ITEM(args, i++);
1479         const formatdef *e = code->fmtdef;
1480         char *res = buf + code->offset;
1481         if (e->format == 's') {
1482             int isstring;
1483             void *p;
1484             if (PyUnicode_Check(v)) {
1485                 v = _PyUnicode_AsDefaultEncodedString(v, NULL);
1486                 if (v == NULL)
1487                     return -1;
1488             }
1489             isstring = PyBytes_Check(v);
1490             if (!isstring && !PyByteArray_Check(v)) {
1491                 PyErr_SetString(StructError,
1492                                 "argument for 's' must be a bytes or string");
1493                 return -1;
1494             }
1495             if (isstring) {
1496                 n = PyBytes_GET_SIZE(v);
1497                 p = PyBytes_AS_STRING(v);
1498             }
1499             else {
1500                 n = PyByteArray_GET_SIZE(v);
1501                 p = PyByteArray_AS_STRING(v);
1502             }
1503             if (n > code->size)
1504                 n = code->size;
1505             if (n > 0)
1506                 memcpy(res, p, n);
1507         } else if (e->format == 'p') {
1508             int isstring;
1509             void *p;
1510             if (PyUnicode_Check(v)) {
1511                 v = _PyUnicode_AsDefaultEncodedString(v, NULL);
1512                 if (v == NULL)
1513                     return -1;
1514             }
1515             isstring = PyBytes_Check(v);
1516             if (!isstring && !PyByteArray_Check(v)) {
1517                 PyErr_SetString(StructError,
1518                                 "argument for 'p' must be a bytes or string");
1519                 return -1;
1520             }
1521             if (isstring) {
1522                 n = PyBytes_GET_SIZE(v);
1523                 p = PyBytes_AS_STRING(v);
1524             }
1525             else {
1526                 n = PyByteArray_GET_SIZE(v);
1527                 p = PyByteArray_AS_STRING(v);
1528             }
1529             if (n > (code->size - 1))
1530                 n = code->size - 1;
1531             if (n > 0)
1532                 memcpy(res + 1, p, n);
1533             if (n > 255)
1534                 n = 255;
1535             *res = Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char);
1536         } else {
1537             if (e->pack(res, v, e) < 0) {
1538                 if (PyLong_Check(v) && PyErr_ExceptionMatches(PyExc_OverflowError))
1539                     PyErr_SetString(StructError,
1540                                     "long too large to convert to int");
1541                 return -1;
1542             }
1543         }
1544     }
1545
1546     /* Success */
1547     return 0;
1548 }
1549
1550
1551 PyDoc_STRVAR(s_pack__doc__,
1552 "S.pack(v1, v2, ...) -> bytes\n\
1553 \n\
1554 Return a bytes object containing values v1, v2, ... packed according\n\
1555 to the format string S.format.  See help(struct) for more on format\n\
1556 strings.");
1557
1558 static PyObject *
1559 s_pack(PyObject *self, PyObject *args)
1560 {
1561     PyStructObject *soself;
1562     PyObject *result;
1563
1564     /* Validate arguments. */
1565     soself = (PyStructObject *)self;
1566     assert(PyStruct_Check(self));
1567     assert(soself->s_codes != NULL);
1568     if (PyTuple_GET_SIZE(args) != soself->s_len)
1569     {
1570         PyErr_Format(StructError,
1571             "pack requires exactly %zd arguments", soself->s_len);
1572         return NULL;
1573     }
1574
1575     /* Allocate a new string */
1576     result = PyBytes_FromStringAndSize((char *)NULL, soself->s_size);
1577     if (result == NULL)
1578         return NULL;
1579
1580     /* Call the guts */
1581     if ( s_pack_internal(soself, args, 0, PyBytes_AS_STRING(result)) != 0 ) {
1582         Py_DECREF(result);
1583         return NULL;
1584     }
1585
1586     return result;
1587 }
1588
1589 PyDoc_STRVAR(s_pack_into__doc__,
1590 "S.pack_into(buffer, offset, v1, v2, ...)\n\
1591 \n\
1592 Pack the values v1, v2, ... according to the format string S.format\n\
1593 and write the packed bytes into the writable buffer buf starting at\n\
1594 offset.  Note that the offset is a required argument.  See\n\
1595 help(struct) for more on format strings.");
1596
1597 static PyObject *
1598 s_pack_into(PyObject *self, PyObject *args)
1599 {
1600     PyStructObject *soself;
1601     char *buffer;
1602     Py_ssize_t buffer_len, offset;
1603
1604     /* Validate arguments.  +1 is for the first arg as buffer. */
1605     soself = (PyStructObject *)self;
1606     assert(PyStruct_Check(self));
1607     assert(soself->s_codes != NULL);
1608     if (PyTuple_GET_SIZE(args) != (soself->s_len + 2))
1609     {
1610         PyErr_Format(StructError,
1611                      "pack_into requires exactly %zd arguments",
1612                      (soself->s_len + 2));
1613         return NULL;
1614     }
1615
1616     /* Extract a writable memory buffer from the first argument */
1617     if ( PyObject_AsWriteBuffer(PyTuple_GET_ITEM(args, 0),
1618                                                             (void**)&buffer, &buffer_len) == -1 ) {
1619         return NULL;
1620     }
1621     assert( buffer_len >= 0 );
1622
1623     /* Extract the offset from the first argument */
1624     offset = PyNumber_AsSsize_t(PyTuple_GET_ITEM(args, 1), PyExc_IndexError);
1625     if (offset == -1 && PyErr_Occurred())
1626         return NULL;
1627
1628     /* Support negative offsets. */
1629     if (offset < 0)
1630         offset += buffer_len;
1631
1632     /* Check boundaries */
1633     if (offset < 0 || (buffer_len - offset) < soself->s_size) {
1634         PyErr_Format(StructError,
1635                      "pack_into requires a buffer of at least %zd bytes",
1636                      soself->s_size);
1637         return NULL;
1638     }
1639
1640     /* Call the guts */
1641     if ( s_pack_internal(soself, args, 2, buffer + offset) != 0 ) {
1642         return NULL;
1643     }
1644
1645     Py_RETURN_NONE;
1646 }
1647
1648 static PyObject *
1649 s_get_format(PyStructObject *self, void *unused)
1650 {
1651     Py_INCREF(self->s_format);
1652     return self->s_format;
1653 }
1654
1655 static PyObject *
1656 s_get_size(PyStructObject *self, void *unused)
1657 {
1658     return PyLong_FromSsize_t(self->s_size);
1659 }
1660
1661 /* List of functions */
1662
1663 static struct PyMethodDef s_methods[] = {
1664     {"pack",            s_pack,         METH_VARARGS, s_pack__doc__},
1665     {"pack_into",       s_pack_into,    METH_VARARGS, s_pack_into__doc__},
1666     {"unpack",          s_unpack,       METH_O, s_unpack__doc__},
1667     {"unpack_from",     (PyCFunction)s_unpack_from, METH_VARARGS|METH_KEYWORDS,
1668                     s_unpack_from__doc__},
1669     {NULL,       NULL}          /* sentinel */
1670 };
1671
1672 PyDoc_STRVAR(s__doc__,
1673 "Struct(fmt) --> compiled struct object\n"
1674 "\n"
1675 "Return a new Struct object which writes and reads binary data according to\n"
1676 "the format string fmt.  See help(struct) for more on format strings.");
1677
1678 #define OFF(x) offsetof(PyStructObject, x)
1679
1680 static PyGetSetDef s_getsetlist[] = {
1681     {"format", (getter)s_get_format, (setter)NULL, "struct format string", NULL},
1682     {"size", (getter)s_get_size, (setter)NULL, "struct size in bytes", NULL},
1683     {NULL} /* sentinel */
1684 };
1685
1686 static
1687 PyTypeObject PyStructType = {
1688     PyVarObject_HEAD_INIT(NULL, 0)
1689     "Struct",
1690     sizeof(PyStructObject),
1691     0,
1692     (destructor)s_dealloc,      /* tp_dealloc */
1693     0,                                          /* tp_print */
1694     0,                                          /* tp_getattr */
1695     0,                                          /* tp_setattr */
1696     0,                                          /* tp_reserved */
1697     0,                                          /* tp_repr */
1698     0,                                          /* tp_as_number */
1699     0,                                          /* tp_as_sequence */
1700     0,                                          /* tp_as_mapping */
1701     0,                                          /* tp_hash */
1702     0,                                          /* tp_call */
1703     0,                                          /* tp_str */
1704     PyObject_GenericGetAttr,            /* tp_getattro */
1705     PyObject_GenericSetAttr,            /* tp_setattro */
1706     0,                                          /* tp_as_buffer */
1707     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1708     s__doc__,                           /* tp_doc */
1709     0,                                          /* tp_traverse */
1710     0,                                          /* tp_clear */
1711     0,                                          /* tp_richcompare */
1712     offsetof(PyStructObject, weakreflist),      /* tp_weaklistoffset */
1713     0,                                          /* tp_iter */
1714     0,                                          /* tp_iternext */
1715     s_methods,                          /* tp_methods */
1716     NULL,                               /* tp_members */
1717     s_getsetlist,               /* tp_getset */
1718     0,                                          /* tp_base */
1719     0,                                          /* tp_dict */
1720     0,                                          /* tp_descr_get */
1721     0,                                          /* tp_descr_set */
1722     0,                                          /* tp_dictoffset */
1723     s_init,                             /* tp_init */
1724     PyType_GenericAlloc,/* tp_alloc */
1725     s_new,                              /* tp_new */
1726     PyObject_Del,               /* tp_free */
1727 };
1728
1729
1730 /* ---- Standalone functions  ---- */
1731
1732 #define MAXCACHE 100
1733 static PyObject *cache = NULL;
1734
1735 static PyObject *
1736 cache_struct(PyObject *fmt)
1737 {
1738     PyObject * s_object;
1739
1740     if (cache == NULL) {
1741         cache = PyDict_New();
1742         if (cache == NULL)
1743             return NULL;
1744     }
1745
1746     s_object = PyDict_GetItem(cache, fmt);
1747     if (s_object != NULL) {
1748         Py_INCREF(s_object);
1749         return s_object;
1750     }
1751
1752     s_object = PyObject_CallFunctionObjArgs((PyObject *)(&PyStructType), fmt, NULL);
1753     if (s_object != NULL) {
1754         if (PyDict_Size(cache) >= MAXCACHE)
1755             PyDict_Clear(cache);
1756         /* Attempt to cache the result */
1757         if (PyDict_SetItem(cache, fmt, s_object) == -1)
1758             PyErr_Clear();
1759     }
1760     return s_object;
1761 }
1762
1763 PyDoc_STRVAR(clearcache_doc,
1764 "Clear the internal cache.");
1765
1766 static PyObject *
1767 clearcache(PyObject *self)
1768 {
1769     Py_CLEAR(cache);
1770     Py_RETURN_NONE;
1771 }
1772
1773 PyDoc_STRVAR(calcsize_doc,
1774 "calcsize(fmt) -> integer\n\
1775 \n\
1776 Return size in bytes of the struct described by the format string fmt.");
1777
1778 static PyObject *
1779 calcsize(PyObject *self, PyObject *fmt)
1780 {
1781     Py_ssize_t n;
1782     PyObject *s_object = cache_struct(fmt);
1783     if (s_object == NULL)
1784         return NULL;
1785     n = ((PyStructObject *)s_object)->s_size;
1786     Py_DECREF(s_object);
1787     return PyLong_FromSsize_t(n);
1788 }
1789
1790 PyDoc_STRVAR(pack_doc,
1791 "pack(fmt, v1, v2, ...) -> bytes\n\
1792 \n\
1793 Return a bytes object containing the values v1, v2, ... packed according\n\
1794 to the format string fmt.  See help(struct) for more on format strings.");
1795
1796 static PyObject *
1797 pack(PyObject *self, PyObject *args)
1798 {
1799     PyObject *s_object, *fmt, *newargs, *result;
1800     Py_ssize_t n = PyTuple_GET_SIZE(args);
1801
1802     if (n == 0) {
1803         PyErr_SetString(PyExc_TypeError, "missing format argument");
1804         return NULL;
1805     }
1806     fmt = PyTuple_GET_ITEM(args, 0);
1807     newargs = PyTuple_GetSlice(args, 1, n);
1808     if (newargs == NULL)
1809         return NULL;
1810
1811     s_object = cache_struct(fmt);
1812     if (s_object == NULL) {
1813         Py_DECREF(newargs);
1814         return NULL;
1815     }
1816     result = s_pack(s_object, newargs);
1817     Py_DECREF(newargs);
1818     Py_DECREF(s_object);
1819     return result;
1820 }
1821
1822 PyDoc_STRVAR(pack_into_doc,
1823 "pack_into(fmt, buffer, offset, v1, v2, ...)\n\
1824 \n\
1825 Pack the values v1, v2, ... according to the format string fmt and write\n\
1826 the packed bytes into the writable buffer buf starting at offset.  Note\n\
1827 that the offset is a required argument.  See help(struct) for more\n\
1828 on format strings.");
1829
1830 static PyObject *
1831 pack_into(PyObject *self, PyObject *args)
1832 {
1833     PyObject *s_object, *fmt, *newargs, *result;
1834     Py_ssize_t n = PyTuple_GET_SIZE(args);
1835
1836     if (n == 0) {
1837         PyErr_SetString(PyExc_TypeError, "missing format argument");
1838         return NULL;
1839     }
1840     fmt = PyTuple_GET_ITEM(args, 0);
1841     newargs = PyTuple_GetSlice(args, 1, n);
1842     if (newargs == NULL)
1843         return NULL;
1844
1845     s_object = cache_struct(fmt);
1846     if (s_object == NULL) {
1847         Py_DECREF(newargs);
1848         return NULL;
1849     }
1850     result = s_pack_into(s_object, newargs);
1851     Py_DECREF(newargs);
1852     Py_DECREF(s_object);
1853     return result;
1854 }
1855
1856 PyDoc_STRVAR(unpack_doc,
1857 "unpack(fmt, buffer) -> (v1, v2, ...)\n\
1858 \n\
1859 Return a tuple containing values unpacked according to the format string\n\
1860 fmt.  Requires len(buffer) == calcsize(fmt). See help(struct) for more\n\
1861 on format strings.");
1862
1863 static PyObject *
1864 unpack(PyObject *self, PyObject *args)
1865 {
1866     PyObject *s_object, *fmt, *inputstr, *result;
1867
1868     if (!PyArg_UnpackTuple(args, "unpack", 2, 2, &fmt, &inputstr))
1869         return NULL;
1870
1871     s_object = cache_struct(fmt);
1872     if (s_object == NULL)
1873         return NULL;
1874     result = s_unpack(s_object, inputstr);
1875     Py_DECREF(s_object);
1876     return result;
1877 }
1878
1879 PyDoc_STRVAR(unpack_from_doc,
1880 "unpack_from(fmt, buffer, offset=0) -> (v1, v2, ...)\n\
1881 \n\
1882 Return a tuple containing values unpacked according to the format string\n\
1883 fmt.  Requires len(buffer[offset:]) >= calcsize(fmt).  See help(struct)\n\
1884 for more on format strings.");
1885
1886 static PyObject *
1887 unpack_from(PyObject *self, PyObject *args, PyObject *kwds)
1888 {
1889     PyObject *s_object, *fmt, *newargs, *result;
1890     Py_ssize_t n = PyTuple_GET_SIZE(args);
1891
1892     if (n == 0) {
1893         PyErr_SetString(PyExc_TypeError, "missing format argument");
1894         return NULL;
1895     }
1896     fmt = PyTuple_GET_ITEM(args, 0);
1897     newargs = PyTuple_GetSlice(args, 1, n);
1898     if (newargs == NULL)
1899         return NULL;
1900
1901     s_object = cache_struct(fmt);
1902     if (s_object == NULL) {
1903         Py_DECREF(newargs);
1904         return NULL;
1905     }
1906     result = s_unpack_from(s_object, newargs, kwds);
1907     Py_DECREF(newargs);
1908     Py_DECREF(s_object);
1909     return result;
1910 }
1911
1912 static struct PyMethodDef module_functions[] = {
1913     {"_clearcache",     (PyCFunction)clearcache,        METH_NOARGS,    clearcache_doc},
1914     {"calcsize",        calcsize,       METH_O,         calcsize_doc},
1915     {"pack",            pack,           METH_VARARGS,   pack_doc},
1916     {"pack_into",       pack_into,      METH_VARARGS,   pack_into_doc},
1917     {"unpack",          unpack,         METH_VARARGS,   unpack_doc},
1918     {"unpack_from",     (PyCFunction)unpack_from,
1919                     METH_VARARGS|METH_KEYWORDS,         unpack_from_doc},
1920     {NULL,       NULL}          /* sentinel */
1921 };
1922
1923
1924 /* Module initialization */
1925
1926 PyDoc_STRVAR(module_doc,
1927 "Functions to convert between Python values and C structs.\n\
1928 Python bytes objects are used to hold the data representing the C struct\n\
1929 and also as format strings (explained below) to describe the layout of data\n\
1930 in the C struct.\n\
1931 \n\
1932 The optional first format char indicates byte order, size and alignment:\n\
1933   @: native order, size & alignment (default)\n\
1934   =: native order, std. size & alignment\n\
1935   <: little-endian, std. size & alignment\n\
1936   >: big-endian, std. size & alignment\n\
1937   !: same as >\n\
1938 \n\
1939 The remaining chars indicate types of args and must match exactly;\n\
1940 these can be preceded by a decimal repeat count:\n\
1941   x: pad byte (no data); c:char; b:signed byte; B:unsigned byte;\n\
1942   ?: _Bool (requires C99; if not available, char is used instead)\n\
1943   h:short; H:unsigned short; i:int; I:unsigned int;\n\
1944   l:long; L:unsigned long; f:float; d:double.\n\
1945 Special cases (preceding decimal count indicates length):\n\
1946   s:string (array of char); p: pascal string (with count byte).\n\
1947 Special case (only available in native format):\n\
1948   P:an integer type that is wide enough to hold a pointer.\n\
1949 Special case (not in native mode unless 'long long' in platform C):\n\
1950   q:long long; Q:unsigned long long\n\
1951 Whitespace between formats is ignored.\n\
1952 \n\
1953 The variable struct.error is an exception raised on errors.\n");
1954
1955
1956 static struct PyModuleDef _structmodule = {
1957     PyModuleDef_HEAD_INIT,
1958     "_struct",
1959     module_doc,
1960     -1,
1961     module_functions,
1962     NULL,
1963     NULL,
1964     NULL,
1965     NULL
1966 };
1967
1968 PyMODINIT_FUNC
1969 PyInit__struct(void)
1970 {
1971     PyObject *ver, *m;
1972
1973     ver = PyBytes_FromString("0.3");
1974     if (ver == NULL)
1975         return NULL;
1976
1977     m = PyModule_Create(&_structmodule);
1978     if (m == NULL)
1979         return NULL;
1980
1981     Py_TYPE(&PyStructType) = &PyType_Type;
1982     if (PyType_Ready(&PyStructType) < 0)
1983         return NULL;
1984
1985     /* Check endian and swap in faster functions */
1986     {
1987         int one = 1;
1988         formatdef *native = native_table;
1989         formatdef *other, *ptr;
1990         if ((int)*(unsigned char*)&one)
1991             other = lilendian_table;
1992         else
1993             other = bigendian_table;
1994         /* Scan through the native table, find a matching
1995            entry in the endian table and swap in the
1996            native implementations whenever possible
1997            (64-bit platforms may not have "standard" sizes) */
1998         while (native->format != '\0' && other->format != '\0') {
1999             ptr = other;
2000             while (ptr->format != '\0') {
2001                 if (ptr->format == native->format) {
2002                     /* Match faster when formats are
2003                        listed in the same order */
2004                     if (ptr == other)
2005                         other++;
2006                     /* Only use the trick if the
2007                        size matches */
2008                     if (ptr->size != native->size)
2009                         break;
2010                     /* Skip float and double, could be
2011                        "unknown" float format */
2012                     if (ptr->format == 'd' || ptr->format == 'f')
2013                         break;
2014                     ptr->pack = native->pack;
2015                     ptr->unpack = native->unpack;
2016                     break;
2017                 }
2018                 ptr++;
2019             }
2020             native++;
2021         }
2022     }
2023
2024     /* Add some symbolic constants to the module */
2025     if (StructError == NULL) {
2026         StructError = PyErr_NewException("struct.error", NULL, NULL);
2027         if (StructError == NULL)
2028             return NULL;
2029     }
2030
2031     Py_INCREF(StructError);
2032     PyModule_AddObject(m, "error", StructError);
2033
2034     Py_INCREF((PyObject*)&PyStructType);
2035     PyModule_AddObject(m, "Struct", (PyObject*)&PyStructType);
2036
2037     PyModule_AddObject(m, "__version__", ver);
2038
2039     return m;
2040 }