Objects/dictobject.c

   1
   2 /* Dictionary object implementation using a hash table */
   3
   4 /* The distribution includes a separate file, Objects/dictnotes.txt,
   5    describing explorations into dictionary design and optimization.
   6    It covers typical dictionary use patterns, the parameters for
   7    tuning dictionaries, and several ideas for possible optimizations.
   8 */
   9
  10 #include "Python.h"
  11
  12
  13 /* Set a key error with the specified argument, wrapping it in a
  14  * tuple automatically so that tuple keys are not unpacked as the
  15  * exception arguments. */
  16 static void
  17 set_key_error(PyObject *arg)
  18 {
  19         PyObject *tup;
  20         tup = PyTuple_Pack(1, arg);
  21         if (!tup)
  22                 return; /* caller will expect error to be set anyway */
  23         PyErr_SetObject(PyExc_KeyError, tup);
  24         Py_DECREF(tup);
  25 }
  26
  27 /* Define this out if you don't want conversion statistics on exit. */
  28 #undef SHOW_CONVERSION_COUNTS
  29
  30 /* See large comment block below.  This must be >= 1. */
  31 #define PERTURB_SHIFT 5
  32
  33 /*
  34 Major subtleties ahead:  Most hash schemes depend on having a "good" hash
  35 function, in the sense of simulating randomness.  Python doesn't:  its most
  36 important hash functions (for strings and ints) are very regular in common
  37 cases:
  38
  39 >>> map(hash, (0, 1, 2, 3))
  40 [0, 1, 2, 3]
  41 >>> map(hash, ("namea", "nameb", "namec", "named"))
  42 [-1658398457, -1658398460, -1658398459, -1658398462]
  43 >>>
  44
  45 This isn't necessarily bad!  To the contrary, in a table of size 2**i, taking
  46 the low-order i bits as the initial table index is extremely fast, and there
  47 are no collisions at all for dicts indexed by a contiguous range of ints.
  48 The same is approximately true when keys are "consecutive" strings.  So this
  49 gives better-than-random behavior in common cases, and that's very desirable.
  50
  51 OTOH, when collisions occur, the tendency to fill contiguous slices of the
  52 hash table makes a good collision resolution strategy crucial.  Taking only
  53 the last i bits of the hash code is also vulnerable:  for example, consider
  54 [i << 16 for i in range(20000)] as a set of keys.  Since ints are their own
  55 hash codes, and this fits in a dict of size 2**15, the last 15 bits of every
  56 hash code are all 0:  they *all* map to the same table index.
  57
  58 But catering to unusual cases should not slow the usual ones, so we just take
  59 the last i bits anyway.  It's up to collision resolution to do the rest.  If
  60 we *usually* find the key we're looking for on the first try (and, it turns
  61 out, we usually do -- the table load factor is kept under 2/3, so the odds
  62 are solidly in our favor), then it makes best sense to keep the initial index
  63 computation dirt cheap.
  64
  65 The first half of collision resolution is to visit table indices via this
  66 recurrence:
  67
  68     j = ((5*j) + 1) mod 2**i
  69
  70 For any initial j in range(2**i), repeating that 2**i times generates each
  71 int in range(2**i) exactly once (see any text on random-number generation for
  72 proof).  By itself, this doesn't help much:  like linear probing (setting
  73 j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed
  74 order.  This would be bad, except that's not the only thing we do, and it's
  75 actually *good* in the common cases where hash keys are consecutive.  In an
  76 example that's really too small to make this entirely clear, for a table of
  77 size 2**3 the order of indices is:
  78
  79     0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating]
  80
  81 If two things come in at index 5, the first place we look after is index 2,
  82 not 6, so if another comes in at index 6 the collision at 5 didn't hurt it.
  83 Linear probing is deadly in this case because there the fixed probe order
  84 is the *same* as the order consecutive keys are likely to arrive.  But it's
  85 extremely unlikely hash codes will follow a 5*j+1 recurrence by accident,
  86 and certain that consecutive hash codes do not.
  87
  88 The other half of the strategy is to get the other bits of the hash code
  89 into play.  This is done by initializing a (unsigned) vrbl "perturb" to the
  90 full hash code, and changing the recurrence to:
  91
  92     j = (5*j) + 1 + perturb;
  93     perturb >>= PERTURB_SHIFT;
  94     use j % 2**i as the next table index;
  95
  96 Now the probe sequence depends (eventually) on every bit in the hash code,
  97 and the pseudo-scrambling property of recurring on 5*j+1 is more valuable,
  98 because it quickly magnifies small differences in the bits that didn't affect
  99 the initial index.  Note that because perturb is unsigned, if the recurrence
 100 is executed often enough perturb eventually becomes and remains 0.  At that
 101 point (very rarely reached) the recurrence is on (just) 5*j+1 again, and
 102 that's certain to find an empty slot eventually (since it generates every int
 103 in range(2**i), and we make sure there's always at least one empty slot).
 104
 105 Selecting a good value for PERTURB_SHIFT is a balancing act.  You want it
 106 small so that the high bits of the hash code continue to affect the probe
 107 sequence across iterations; but you want it large so that in really bad cases
 108 the high-order hash bits have an effect on early iterations.  5 was "the
 109 best" in minimizing total collisions across experiments Tim Peters ran (on
 110 both normal and pathological cases), but 4 and 6 weren't significantly worse.
 111
 112 Historical:  Reimer Behrends contributed the idea of using a polynomial-based
 113 approach, using repeated multiplication by x in GF(2**n) where an irreducible
 114 polynomial for each table size was chosen such that x was a primitive root.
 115 Christian Tismer later extended that to use division by x instead, as an
 116 efficient way to get the high bits of the hash code into play.  This scheme
 117 also gave excellent collision statistics, but was more expensive:  two
 118 if-tests were required inside the loop; computing "the next" index took about
 119 the same number of operations but without as much potential parallelism
 120 (e.g., computing 5*j can go on at the same time as computing 1+perturb in the
 121 above, and then shifting perturb can be done while the table index is being
 122 masked); and the PyDictObject struct required a member to hold the table's
 123 polynomial.  In Tim's experiments the current scheme ran faster, produced
 124 equally good collision statistics, needed less code & used less memory.
 125
 126 Theoretical Python 2.5 headache:  hash codes are only C "long", but
 127 sizeof(Py_ssize_t) > sizeof(long) may be possible.  In that case, and if a
 128 dict is genuinely huge, then only the slots directly reachable via indexing
 129 by a C long can be the first slot in a probe sequence.  The probe sequence
 130 will still eventually reach every slot in the table, but the collision rate
 131 on initial probes may be much higher than this scheme was designed for.
 132 Getting a hash code as fat as Py_ssize_t is the only real cure.  But in
 133 practice, this probably won't make a lick of difference for many years (at
 134 which point everyone will have terabytes of RAM on 64-bit boxes).
 135 */
 136
 137 /* Object used as dummy key to fill deleted entries */
 138 static PyObject *dummy = NULL; /* Initialized by first call to newPyDictObject() */
 139
 140 #ifdef Py_REF_DEBUG
 141 PyObject *
 142 _PyDict_Dummy(void)
 143 {
 144         return dummy;
 145 }
 146 #endif
 147
 148 /* forward declarations */
 149 static PyDictEntry *
 150 lookdict_string(PyDictObject *mp, PyObject *key, long hash);
 151
 152 #ifdef SHOW_CONVERSION_COUNTS
 153 static long created = 0L;
 154 static long converted = 0L;
 155
 156 static void
 157 show_counts(void)
 158 {
 159         fprintf(stderr, "created %ld string dicts\n", created);
 160         fprintf(stderr, "converted %ld to normal dicts\n", converted);
 161         fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created);
 162 }
 163 #endif
 164
 165 /* Debug statistic to compare allocations with reuse through the free list */
 166 #undef SHOW_ALLOC_COUNT
 167 #ifdef SHOW_ALLOC_COUNT
 168 static size_t count_alloc = 0;
 169 static size_t count_reuse = 0;
 170
 171 static void
 172 show_alloc(void)
 173 {
 174         fprintf(stderr, "Dict allocations: %" PY_FORMAT_SIZE_T "d\n",
 175                 count_alloc);
 176         fprintf(stderr, "Dict reuse through freelist: %" PY_FORMAT_SIZE_T
 177                 "d\n", count_reuse);
 178         fprintf(stderr, "%.2f%% reuse rate\n\n",
 179                 (100.0*count_reuse/(count_alloc+count_reuse)));
 180 }
 181 #endif
 182
 183 /* Initialization macros.
 184    There are two ways to create a dict:  PyDict_New() is the main C API
 185    function, and the tp_new slot maps to dict_new().  In the latter case we
 186    can save a little time over what PyDict_New does because it's guaranteed
 187    that the PyDictObject struct is already zeroed out.
 188    Everyone except dict_new() should use EMPTY_TO_MINSIZE (unless they have
 189    an excellent reason not to).
 190 */
 191
 192 #define INIT_NONZERO_DICT_SLOTS(mp) do {                                \
 193         (mp)->ma_table = (mp)->ma_smalltable;                           \
 194         (mp)->ma_mask = PyDict_MINSIZE - 1;                             \
 195     } while(0)
 196
 197 #define EMPTY_TO_MINSIZE(mp) do {                                       \
 198         memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable));    \
 199         (mp)->ma_used = (mp)->ma_fill = 0;                              \
 200         INIT_NONZERO_DICT_SLOTS(mp);                                    \
 201     } while(0)
 202
 203 /* Dictionary reuse scheme to save calls to malloc, free, and memset */
 204 #ifndef PyDict_MAXFREELIST
 205 #define PyDict_MAXFREELIST 80
 206 #endif
 207 static PyDictObject *free_list[PyDict_MAXFREELIST];
 208 static int numfree = 0;
 209
 210 void
 211 PyDict_Fini(void)
 212 {
 213         PyDictObject *op;
 214
 215         while (numfree) {
 216                 op = free_list[--numfree];
 217                 assert(PyDict_CheckExact(op));
 218                 PyObject_GC_Del(op);
 219         }
 220 }
 221
 222 PyObject *
 223 PyDict_New(void)
 224 {
 225         register PyDictObject *mp;
 226         if (dummy == NULL) { /* Auto-initialize dummy */
 227                 dummy = PyString_FromString("<dummy key>");
 228                 if (dummy == NULL)
 229                         return NULL;
 230 #ifdef SHOW_CONVERSION_COUNTS
 231                 Py_AtExit(show_counts);
 232 #endif
 233 #ifdef SHOW_ALLOC_COUNT
 234                 Py_AtExit(show_alloc);
 235 #endif
 236         }
 237         if (numfree) {
 238                 mp = free_list[--numfree];
 239                 assert (mp != NULL);
 240                 assert (Py_TYPE(mp) == &PyDict_Type);
 241                 _Py_NewReference((PyObject *)mp);
 242                 if (mp->ma_fill) {
 243                         EMPTY_TO_MINSIZE(mp);
 244                 }
 245                 assert (mp->ma_used == 0);
 246                 assert (mp->ma_table == mp->ma_smalltable);
 247                 assert (mp->ma_mask == PyDict_MINSIZE - 1);
 248 #ifdef SHOW_ALLOC_COUNT
 249                 count_reuse++;
 250 #endif
 251         } else {
 252                 mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
 253                 if (mp == NULL)
 254                         return NULL;
 255                 EMPTY_TO_MINSIZE(mp);
 256 #ifdef SHOW_ALLOC_COUNT
 257                 count_alloc++;
 258 #endif
 259         }
 260         mp->ma_lookup = lookdict_string;
 261 #ifdef SHOW_CONVERSION_COUNTS
 262         ++created;
 263 #endif
 264         _PyObject_GC_TRACK(mp);
 265         return (PyObject *)mp;
 266 }
 267
 268 /*
 269 The basic lookup function used by all operations.
 270 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
 271 Open addressing is preferred over chaining since the link overhead for
 272 chaining would be substantial (100% with typical malloc overhead).
 273
 274 The initial probe index is computed as hash mod the table size. Subsequent
 275 probe indices are computed as explained earlier.
 276
 277 All arithmetic on hash should ignore overflow.
 278
 279 (The details in this version are due to Tim Peters, building on many past
 280 contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
 281 Christian Tismer).
 282
 283 lookdict() is general-purpose, and may return NULL if (and only if) a
 284 comparison raises an exception (this was new in Python 2.5).
 285 lookdict_string() below is specialized to string keys, comparison of which can
 286 never raise an exception; that function can never return NULL.  For both, when
 287 the key isn't found a PyDictEntry* is returned for which the me_value field is
 288 NULL; this is the slot in the dict at which the key would have been found, and
 289 the caller can (if it wishes) add the <key, value> pair to the returned
 290 PyDictEntry*.
 291 */
 292 static PyDictEntry *
 293 lookdict(PyDictObject *mp, PyObject *key, register long hash)
 294 {
 295         register size_t i;
 296         register size_t perturb;
 297         register PyDictEntry *freeslot;
 298         register size_t mask = (size_t)mp->ma_mask;
 299         PyDictEntry *ep0 = mp->ma_table;
 300         register PyDictEntry *ep;
 301         register int cmp;
 302         PyObject *startkey;
 303
 304         i = (size_t)hash & mask;
 305         ep = &ep0[i];
 306         if (ep->me_key == NULL || ep->me_key == key)
 307                 return ep;
 308
 309         if (ep->me_key == dummy)
 310                 freeslot = ep;
 311         else {
 312                 if (ep->me_hash == hash) {
 313                         startkey = ep->me_key;
 314                         Py_INCREF(startkey);
 315                         cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
 316                         Py_DECREF(startkey);
 317                         if (cmp < 0)
 318                                 return NULL;
 319                         if (ep0 == mp->ma_table && ep->me_key == startkey) {
 320                                 if (cmp > 0)
 321                                         return ep;
 322                         }
 323                         else {
 324                                 /* The compare did major nasty stuff to the
 325                                  * dict:  start over.
 326                                  * XXX A clever adversary could prevent this
 327                                  * XXX from terminating.
 328                                  */
 329                                 return lookdict(mp, key, hash);
 330                         }
 331                 }
 332                 freeslot = NULL;
 333         }
 334
 335         /* In the loop, me_key == dummy is by far (factor of 100s) the
 336            least likely outcome, so test for that last. */
 337         for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
 338                 i = (i << 2) + i + perturb + 1;
 339                 ep = &ep0[i & mask];
 340                 if (ep->me_key == NULL)
 341                         return freeslot == NULL ? ep : freeslot;
 342                 if (ep->me_key == key)
 343                         return ep;
 344                 if (ep->me_hash == hash && ep->me_key != dummy) {
 345                         startkey = ep->me_key;
 346                         Py_INCREF(startkey);
 347                         cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
 348                         Py_DECREF(startkey);
 349                         if (cmp < 0)
 350                                 return NULL;
 351                         if (ep0 == mp->ma_table && ep->me_key == startkey) {
 352                                 if (cmp > 0)
 353                                         return ep;
 354                         }
 355                         else {
 356                                 /* The compare did major nasty stuff to the
 357                                  * dict:  start over.
 358                                  * XXX A clever adversary could prevent this
 359                                  * XXX from terminating.
 360                                  */
 361                                 return lookdict(mp, key, hash);
 362                         }
 363                 }
 364                 else if (ep->me_key == dummy && freeslot == NULL)
 365                         freeslot = ep;
 366         }
 367         assert(0);      /* NOT REACHED */
 368         return 0;
 369 }
 370
 371 /*
 372  * Hacked up version of lookdict which can assume keys are always strings;
 373  * this assumption allows testing for errors during PyObject_RichCompareBool()
 374  * to be dropped; string-string comparisons never raise exceptions.  This also
 375  * means we don't need to go through PyObject_RichCompareBool(); we can always
 376  * use _PyString_Eq() directly.
 377  *
 378  * This is valuable because dicts with only string keys are very common.
 379  */
 380 static PyDictEntry *
 381 lookdict_string(PyDictObject *mp, PyObject *key, register long hash)
 382 {
 383         register size_t i;
 384         register size_t perturb;
 385         register PyDictEntry *freeslot;
 386         register size_t mask = (size_t)mp->ma_mask;
 387         PyDictEntry *ep0 = mp->ma_table;
 388         register PyDictEntry *ep;
 389
 390         /* Make sure this function doesn't have to handle non-string keys,
 391            including subclasses of str; e.g., one reason to subclass
 392            strings is to override __eq__, and for speed we don't cater to
 393            that here. */
 394         if (!PyString_CheckExact(key)) {
 395 #ifdef SHOW_CONVERSION_COUNTS
 396                 ++converted;
 397 #endif
 398                 mp->ma_lookup = lookdict;
 399                 return lookdict(mp, key, hash);
 400         }
 401         i = hash & mask;
 402         ep = &ep0[i];
 403         if (ep->me_key == NULL || ep->me_key == key)
 404                 return ep;
 405         if (ep->me_key == dummy)
 406                 freeslot = ep;
 407         else {
 408                 if (ep->me_hash == hash && _PyString_Eq(ep->me_key, key))
 409                         return ep;
 410                 freeslot = NULL;
 411         }
 412
 413         /* In the loop, me_key == dummy is by far (factor of 100s) the
 414            least likely outcome, so test for that last. */
 415         for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
 416                 i = (i << 2) + i + perturb + 1;
 417                 ep = &ep0[i & mask];
 418                 if (ep->me_key == NULL)
 419                         return freeslot == NULL ? ep : freeslot;
 420                 if (ep->me_key == key
 421                     || (ep->me_hash == hash
 422                         && ep->me_key != dummy
 423                         && _PyString_Eq(ep->me_key, key)))
 424                         return ep;
 425                 if (ep->me_key == dummy && freeslot == NULL)
 426                         freeslot = ep;
 427         }
 428         assert(0);      /* NOT REACHED */
 429         return 0;
 430 }
 431
 432 /*
 433 Internal routine to insert a new item into the table.
 434 Used both by the internal resize routine and by the public insert routine.
 435 Eats a reference to key and one to value.
 436 Returns -1 if an error occurred, or 0 on success.
 437 */
 438 static int
 439 insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
 440 {
 441         PyObject *old_value;
 442         register PyDictEntry *ep;
 443         typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, long);
 444
 445         assert(mp->ma_lookup != NULL);
 446         ep = mp->ma_lookup(mp, key, hash);
 447         if (ep == NULL) {
 448                 Py_DECREF(key);
 449                 Py_DECREF(value);
 450                 return -1;
 451         }
 452         if (ep->me_value != NULL) {
 453                 old_value = ep->me_value;
 454                 ep->me_value = value;
 455                 Py_DECREF(old_value); /* which **CAN** re-enter */
 456                 Py_DECREF(key);
 457         }
 458         else {
 459                 if (ep->me_key == NULL)
 460                         mp->ma_fill++;
 461                 else {
 462                         assert(ep->me_key == dummy);
 463                         Py_DECREF(dummy);
 464                 }
 465                 ep->me_key = key;
 466                 ep->me_hash = (Py_ssize_t)hash;
 467                 ep->me_value = value;
 468                 mp->ma_used++;
 469         }
 470         return 0;
 471 }
 472
 473 /*
 474 Internal routine used by dictresize() to insert an item which is
 475 known to be absent from the dict.  This routine also assumes that
 476 the dict contains no deleted entries.  Besides the performance benefit,
 477 using insertdict() in dictresize() is dangerous (SF bug #1456209).
 478 Note that no refcounts are changed by this routine; if needed, the caller
 479 is responsible for incref'ing `key` and `value`.
 480 */
 481 static void
 482 insertdict_clean(register PyDictObject *mp, PyObject *key, long hash,
 483                  PyObject *value)
 484 {
 485         register size_t i;
 486         register size_t perturb;
 487         register size_t mask = (size_t)mp->ma_mask;
 488         PyDictEntry *ep0 = mp->ma_table;
 489         register PyDictEntry *ep;
 490
 491         i = hash & mask;
 492         ep = &ep0[i];
 493         for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
 494                 i = (i << 2) + i + perturb + 1;
 495                 ep = &ep0[i & mask];
 496         }
 497         assert(ep->me_value == NULL);
 498         mp->ma_fill++;
 499         ep->me_key = key;
 500         ep->me_hash = (Py_ssize_t)hash;
 501         ep->me_value = value;
 502         mp->ma_used++;
 503 }
 504
 505 /*
 506 Restructure the table by allocating a new table and reinserting all
 507 items again.  When entries have been deleted, the new table may
 508 actually be smaller than the old one.
 509 */
 510 static int
 511 dictresize(PyDictObject *mp, Py_ssize_t minused)
 512 {
 513         Py_ssize_t newsize;
 514         PyDictEntry *oldtable, *newtable, *ep;
 515         Py_ssize_t i;
 516         int is_oldtable_malloced;
 517         PyDictEntry small_copy[PyDict_MINSIZE];
 518
 519         assert(minused >= 0);
 520
 521         /* Find the smallest table size > minused. */
 522         for (newsize = PyDict_MINSIZE;
 523              newsize <= minused && newsize > 0;
 524              newsize <<= 1)
 525                 ;
 526         if (newsize <= 0) {
 527                 PyErr_NoMemory();
 528                 return -1;
 529         }
 530
 531         /* Get space for a new table. */
 532         oldtable = mp->ma_table;
 533         assert(oldtable != NULL);
 534         is_oldtable_malloced = oldtable != mp->ma_smalltable;
 535
 536         if (newsize == PyDict_MINSIZE) {
 537                 /* A large table is shrinking, or we can't get any smaller. */
 538                 newtable = mp->ma_smalltable;
 539                 if (newtable == oldtable) {
 540                         if (mp->ma_fill == mp->ma_used) {
 541                                 /* No dummies, so no point doing anything. */
 542                                 return 0;
 543                         }
 544                         /* We're not going to resize it, but rebuild the
 545                            table anyway to purge old dummy entries.
 546                            Subtle:  This is *necessary* if fill==size,
 547                            as lookdict needs at least one virgin slot to
 548                            terminate failing searches.  If fill < size, it's
 549                            merely desirable, as dummies slow searches. */
 550                         assert(mp->ma_fill > mp->ma_used);
 551                         memcpy(small_copy, oldtable, sizeof(small_copy));
 552                         oldtable = small_copy;
 553                 }
 554         }
 555         else {
 556                 newtable = PyMem_NEW(PyDictEntry, newsize);
 557                 if (newtable == NULL) {
 558                         PyErr_NoMemory();
 559                         return -1;
 560                 }
 561         }
 562
 563         /* Make the dict empty, using the new table. */
 564         assert(newtable != oldtable);
 565         mp->ma_table = newtable;
 566         mp->ma_mask = newsize - 1;
 567         memset(newtable, 0, sizeof(PyDictEntry) * newsize);
 568         mp->ma_used = 0;
 569         i = mp->ma_fill;
 570         mp->ma_fill = 0;
 571
 572         /* Copy the data over; this is refcount-neutral for active entries;
 573            dummy entries aren't copied over, of course */
 574         for (ep = oldtable; i > 0; ep++) {
 575                 if (ep->me_value != NULL) {     /* active entry */
 576                         --i;
 577                         insertdict_clean(mp, ep->me_key, (long)ep->me_hash,
 578                                          ep->me_value);
 579                 }
 580                 else if (ep->me_key != NULL) {  /* dummy entry */
 581                         --i;
 582                         assert(ep->me_key == dummy);
 583                         Py_DECREF(ep->me_key);
 584                 }
 585                 /* else key == value == NULL:  nothing to do */
 586         }
 587
 588         if (is_oldtable_malloced)
 589                 PyMem_DEL(oldtable);
 590         return 0;
 591 }
 592
 593 /* Create a new dictionary pre-sized to hold an estimated number of elements.
 594    Underestimates are okay because the dictionary will resize as necessary.
 595    Overestimates just mean the dictionary will be more sparse than usual.
 596 */
 597
 598 PyObject *
 599 _PyDict_NewPresized(Py_ssize_t minused)
 600 {
 601         PyObject *op = PyDict_New();
 602
 603         if (minused>5 && op != NULL && dictresize((PyDictObject *)op, minused) == -1) {
 604                 Py_DECREF(op);
 605                 return NULL;
 606         }
 607         return op;
 608 }
 609
 610 /* Note that, for historical reasons, PyDict_GetItem() suppresses all errors
 611  * that may occur (originally dicts supported only string keys, and exceptions
 612  * weren't possible).  So, while the original intent was that a NULL return
 613  * meant the key wasn't present, in reality it can mean that, or that an error
 614  * (suppressed) occurred while computing the key's hash, or that some error
 615  * (suppressed) occurred when comparing keys in the dict's internal probe
 616  * sequence.  A nasty example of the latter is when a Python-coded comparison
 617  * function hits a stack-depth error, which can cause this to return NULL
 618  * even if the key is present.
 619  */
 620 PyObject *
 621 PyDict_GetItem(PyObject *op, PyObject *key)
 622 {
 623         long hash;
 624         PyDictObject *mp = (PyDictObject *)op;
 625         PyDictEntry *ep;
 626         PyThreadState *tstate;
 627         if (!PyDict_Check(op))
 628                 return NULL;
 629         if (!PyString_CheckExact(key) ||
 630             (hash = ((PyStringObject *) key)->ob_shash) == -1)
 631         {
 632                 hash = PyObject_Hash(key);
 633                 if (hash == -1) {
 634                         PyErr_Clear();
 635                         return NULL;
 636                 }
 637         }
 638
 639         /* We can arrive here with a NULL tstate during initialization:
 640            try running "python -Wi" for an example related to string
 641            interning.  Let's just hope that no exception occurs then... */
 642         tstate = _PyThreadState_Current;
 643         if (tstate != NULL && tstate->curexc_type != NULL) {
 644                 /* preserve the existing exception */
 645                 PyObject *err_type, *err_value, *err_tb;
 646                 PyErr_Fetch(&err_type, &err_value, &err_tb);
 647                 ep = (mp->ma_lookup)(mp, key, hash);
 648                 /* ignore errors */
 649                 PyErr_Restore(err_type, err_value, err_tb);
 650                 if (ep == NULL)
 651                         return NULL;
 652         }
 653         else {
 654                 ep = (mp->ma_lookup)(mp, key, hash);
 655                 if (ep == NULL) {
 656                         PyErr_Clear();
 657                         return NULL;
 658                 }
 659         }
 660         return ep->me_value;
 661 }
 662
 663 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
 664  * dictionary if it's merely replacing the value for an existing key.
 665  * This means that it's safe to loop over a dictionary with PyDict_Next()
 666  * and occasionally replace a value -- but you can't insert new keys or
 667  * remove them.
 668  */
 669 int
 670 PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
 671 {
 672         register PyDictObject *mp;
 673         register long hash;
 674         register Py_ssize_t n_used;
 675
 676         if (!PyDict_Check(op)) {
 677                 PyErr_BadInternalCall();
 678                 return -1;
 679         }
 680         assert(key);
 681         assert(value);
 682         mp = (PyDictObject *)op;
 683         if (PyString_CheckExact(key)) {
 684                 hash = ((PyStringObject *)key)->ob_shash;
 685                 if (hash == -1)
 686                         hash = PyObject_Hash(key);
 687         }
 688         else {
 689                 hash = PyObject_Hash(key);
 690                 if (hash == -1)
 691                         return -1;
 692         }
 693         assert(mp->ma_fill <= mp->ma_mask);  /* at least one empty slot */
 694         n_used = mp->ma_used;
 695         Py_INCREF(value);
 696         Py_INCREF(key);
 697         if (insertdict(mp, key, hash, value) != 0)
 698                 return -1;
 699         /* If we added a key, we can safely resize.  Otherwise just return!
 700          * If fill >= 2/3 size, adjust size.  Normally, this doubles or
 701          * quaduples the size, but it's also possible for the dict to shrink
 702          * (if ma_fill is much larger than ma_used, meaning a lot of dict
 703          * keys have been * deleted).
 704          *
 705          * Quadrupling the size improves average dictionary sparseness
 706          * (reducing collisions) at the cost of some memory and iteration
 707          * speed (which loops over every possible entry).  It also halves
 708          * the number of expensive resize operations in a growing dictionary.
 709          *
 710          * Very large dictionaries (over 50K items) use doubling instead.
 711          * This may help applications with severe memory constraints.
 712          */
 713         if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
 714                 return 0;
 715         return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
 716 }
 717
 718 int
 719 PyDict_DelItem(PyObject *op, PyObject *key)
 720 {
 721         register PyDictObject *mp;
 722         register long hash;
 723         register PyDictEntry *ep;
 724         PyObject *old_value, *old_key;
 725
 726         if (!PyDict_Check(op)) {
 727                 PyErr_BadInternalCall();
 728                 return -1;
 729         }
 730         assert(key);
 731         if (!PyString_CheckExact(key) ||
 732             (hash = ((PyStringObject *) key)->ob_shash) == -1) {
 733                 hash = PyObject_Hash(key);
 734                 if (hash == -1)
 735                         return -1;
 736         }
 737         mp = (PyDictObject *)op;
 738         ep = (mp->ma_lookup)(mp, key, hash);
 739         if (ep == NULL)
 740                 return -1;
 741         if (ep->me_value == NULL) {
 742                 set_key_error(key);
 743                 return -1;
 744         }
 745         old_key = ep->me_key;
 746         Py_INCREF(dummy);
 747         ep->me_key = dummy;
 748         old_value = ep->me_value;
 749         ep->me_value = NULL;
 750         mp->ma_used--;
 751         Py_DECREF(old_value);
 752         Py_DECREF(old_key);
 753         return 0;
 754 }
 755
 756 void
 757 PyDict_Clear(PyObject *op)
 758 {
 759         PyDictObject *mp;
 760         PyDictEntry *ep, *table;
 761         int table_is_malloced;
 762         Py_ssize_t fill;
 763         PyDictEntry small_copy[PyDict_MINSIZE];
 764 #ifdef Py_DEBUG
 765         Py_ssize_t i, n;
 766 #endif
 767
 768         if (!PyDict_Check(op))
 769                 return;
 770         mp = (PyDictObject *)op;
 771 #ifdef Py_DEBUG
 772         n = mp->ma_mask + 1;
 773         i = 0;
 774 #endif
 775
 776         table = mp->ma_table;
 777         assert(table != NULL);
 778         table_is_malloced = table != mp->ma_smalltable;
 779
 780         /* This is delicate.  During the process of clearing the dict,
 781          * decrefs can cause the dict to mutate.  To avoid fatal confusion
 782          * (voice of experience), we have to make the dict empty before
 783          * clearing the slots, and never refer to anything via mp->xxx while
 784          * clearing.
 785          */
 786         fill = mp->ma_fill;
 787         if (table_is_malloced)
 788                 EMPTY_TO_MINSIZE(mp);
 789
 790         else if (fill > 0) {
 791                 /* It's a small table with something that needs to be cleared.
 792                  * Afraid the only safe way is to copy the dict entries into
 793                  * another small table first.
 794                  */
 795                 memcpy(small_copy, table, sizeof(small_copy));
 796                 table = small_copy;
 797                 EMPTY_TO_MINSIZE(mp);
 798         }
 799         /* else it's a small table that's already empty */
 800
 801         /* Now we can finally clear things.  If C had refcounts, we could
 802          * assert that the refcount on table is 1 now, i.e. that this function
 803          * has unique access to it, so decref side-effects can't alter it.
 804          */
 805         for (ep = table; fill > 0; ++ep) {
 806 #ifdef Py_DEBUG
 807                 assert(i < n);
 808                 ++i;
 809 #endif
 810                 if (ep->me_key) {
 811                         --fill;
 812                         Py_DECREF(ep->me_key);
 813                         Py_XDECREF(ep->me_value);
 814                 }
 815 #ifdef Py_DEBUG
 816                 else
 817                         assert(ep->me_value == NULL);
 818 #endif
 819         }
 820
 821         if (table_is_malloced)
 822                 PyMem_DEL(table);
 823 }
 824
 825 /*
 826  * Iterate over a dict.  Use like so:
 827  *
 828  *     Py_ssize_t i;
 829  *     PyObject *key, *value;
 830  *     i = 0;   # important!  i should not otherwise be changed by you
 831  *     while (PyDict_Next(yourdict, &i, &key, &value)) {
 832  *              Refer to borrowed references in key and value.
 833  *     }
 834  *
 835  * CAUTION:  In general, it isn't safe to use PyDict_Next in a loop that
 836  * mutates the dict.  One exception:  it is safe if the loop merely changes
 837  * the values associated with the keys (but doesn't insert new keys or
 838  * delete keys), via PyDict_SetItem().
 839  */
 840 int
 841 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
 842 {
 843         register Py_ssize_t i;
 844         register Py_ssize_t mask;
 845         register PyDictEntry *ep;
 846
 847         if (!PyDict_Check(op))
 848                 return 0;
 849         i = *ppos;
 850         if (i < 0)
 851                 return 0;
 852         ep = ((PyDictObject *)op)->ma_table;
 853         mask = ((PyDictObject *)op)->ma_mask;
 854         while (i <= mask && ep[i].me_value == NULL)
 855                 i++;
 856         *ppos = i+1;
 857         if (i > mask)
 858                 return 0;
 859         if (pkey)
 860                 *pkey = ep[i].me_key;
 861         if (pvalue)
 862                 *pvalue = ep[i].me_value;
 863         return 1;
 864 }
 865
 866 /* Internal version of PyDict_Next that returns a hash value in addition to the key and value.*/
 867 int
 868 _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, long *phash)
 869 {
 870         register Py_ssize_t i;
 871         register Py_ssize_t mask;
 872         register PyDictEntry *ep;
 873
 874         if (!PyDict_Check(op))
 875                 return 0;
 876         i = *ppos;
 877         if (i < 0)
 878                 return 0;
 879         ep = ((PyDictObject *)op)->ma_table;
 880         mask = ((PyDictObject *)op)->ma_mask;
 881         while (i <= mask && ep[i].me_value == NULL)
 882                 i++;
 883         *ppos = i+1;
 884         if (i > mask)
 885                 return 0;
 886         *phash = (long)(ep[i].me_hash);
 887         if (pkey)
 888                 *pkey = ep[i].me_key;
 889         if (pvalue)
 890                 *pvalue = ep[i].me_value;
 891         return 1;
 892 }
 893
 894 /* Methods */
 895
 896 static void
 897 dict_dealloc(register PyDictObject *mp)
 898 {
 899         register PyDictEntry *ep;
 900         Py_ssize_t fill = mp->ma_fill;
 901         PyObject_GC_UnTrack(mp);
 902         Py_TRASHCAN_SAFE_BEGIN(mp)
 903         for (ep = mp->ma_table; fill > 0; ep++) {
 904                 if (ep->me_key) {
 905                         --fill;
 906                         Py_DECREF(ep->me_key);
 907                         Py_XDECREF(ep->me_value);
 908                 }
 909         }
 910         if (mp->ma_table != mp->ma_smalltable)
 911                 PyMem_DEL(mp->ma_table);
 912         if (numfree < PyDict_MAXFREELIST && Py_TYPE(mp) == &PyDict_Type)
 913                 free_list[numfree++] = mp;
 914         else
 915                 Py_TYPE(mp)->tp_free((PyObject *)mp);
 916         Py_TRASHCAN_SAFE_END(mp)
 917 }
 918
 919 static int
 920 dict_print(register PyDictObject *mp, register FILE *fp, register int flags)
 921 {
 922         register Py_ssize_t i;
 923         register Py_ssize_t any;
 924         int status;
 925
 926         status = Py_ReprEnter((PyObject*)mp);
 927         if (status != 0) {
 928                 if (status < 0)
 929                         return status;
 930                 Py_BEGIN_ALLOW_THREADS
 931                 fprintf(fp, "{...}");
 932                 Py_END_ALLOW_THREADS
 933                 return 0;
 934         }
 935
 936         Py_BEGIN_ALLOW_THREADS
 937         fprintf(fp, "{");
 938         Py_END_ALLOW_THREADS
 939         any = 0;
 940         for (i = 0; i <= mp->ma_mask; i++) {
 941                 PyDictEntry *ep = mp->ma_table + i;
 942                 PyObject *pvalue = ep->me_value;
 943                 if (pvalue != NULL) {
 944                         /* Prevent PyObject_Repr from deleting value during
 945                            key format */
 946                         Py_INCREF(pvalue);
 947                         if (any++ > 0) {
 948                                 Py_BEGIN_ALLOW_THREADS
 949                                 fprintf(fp, ", ");
 950                                 Py_END_ALLOW_THREADS
 951                         }
 952                         if (PyObject_Print((PyObject *)ep->me_key, fp, 0)!=0) {
 953                                 Py_DECREF(pvalue);
 954                                 Py_ReprLeave((PyObject*)mp);
 955                                 return -1;
 956                         }
 957                         Py_BEGIN_ALLOW_THREADS
 958                         fprintf(fp, ": ");
 959                         Py_END_ALLOW_THREADS
 960                         if (PyObject_Print(pvalue, fp, 0) != 0) {
 961                                 Py_DECREF(pvalue);
 962                                 Py_ReprLeave((PyObject*)mp);
 963                                 return -1;
 964                         }
 965                         Py_DECREF(pvalue);
 966                 }
 967         }
 968         Py_BEGIN_ALLOW_THREADS
 969         fprintf(fp, "}");
 970         Py_END_ALLOW_THREADS
 971         Py_ReprLeave((PyObject*)mp);
 972         return 0;
 973 }
 974
 975 static PyObject *
 976 dict_repr(PyDictObject *mp)
 977 {
 978         Py_ssize_t i;
 979         PyObject *s, *temp, *colon = NULL;
 980         PyObject *pieces = NULL, *result = NULL;
 981         PyObject *key, *value;
 982
 983         i = Py_ReprEnter((PyObject *)mp);
 984         if (i != 0) {
 985                 return i > 0 ? PyString_FromString("{...}") : NULL;
 986         }
 987
 988         if (mp->ma_used == 0) {
 989                 result = PyString_FromString("{}");
 990                 goto Done;
 991         }
 992
 993         pieces = PyList_New(0);
 994         if (pieces == NULL)
 995                 goto Done;
 996
 997         colon = PyString_FromString(": ");
 998         if (colon == NULL)
 999                 goto Done;
1000
1001         /* Do repr() on each key+value pair, and insert ": " between them.
1002            Note that repr may mutate the dict. */
1003         i = 0;
1004         while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
1005                 int status;
1006                 /* Prevent repr from deleting value during key format. */
1007                 Py_INCREF(value);
1008                 s = PyObject_Repr(key);
1009                 PyString_Concat(&s, colon);
1010                 PyString_ConcatAndDel(&s, PyObject_Repr(value));
1011                 Py_DECREF(value);
1012                 if (s == NULL)
1013                         goto Done;
1014                 status = PyList_Append(pieces, s);
1015                 Py_DECREF(s);  /* append created a new ref */
1016                 if (status < 0)
1017                         goto Done;
1018         }
1019
1020         /* Add "{}" decorations to the first and last items. */
1021         assert(PyList_GET_SIZE(pieces) > 0);
1022         s = PyString_FromString("{");
1023         if (s == NULL)
1024                 goto Done;
1025         temp = PyList_GET_ITEM(pieces, 0);
1026         PyString_ConcatAndDel(&s, temp);
1027         PyList_SET_ITEM(pieces, 0, s);
1028         if (s == NULL)
1029                 goto Done;
1030
1031         s = PyString_FromString("}");
1032         if (s == NULL)
1033                 goto Done;
1034         temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
1035         PyString_ConcatAndDel(&temp, s);
1036         PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
1037         if (temp == NULL)
1038                 goto Done;
1039
1040         /* Paste them all together with ", " between. */
1041         s = PyString_FromString(", ");
1042         if (s == NULL)
1043                 goto Done;
1044         result = _PyString_Join(s, pieces);
1045         Py_DECREF(s);
1046
1047 Done:
1048         Py_XDECREF(pieces);
1049         Py_XDECREF(colon);
1050         Py_ReprLeave((PyObject *)mp);
1051         return result;
1052 }
1053
1054 static Py_ssize_t
1055 dict_length(PyDictObject *mp)
1056 {
1057         return mp->ma_used;
1058 }
1059
1060 static PyObject *
1061 dict_subscript(PyDictObject *mp, register PyObject *key)
1062 {
1063         PyObject *v;
1064         long hash;
1065         PyDictEntry *ep;
1066         assert(mp->ma_table != NULL);
1067         if (!PyString_CheckExact(key) ||
1068             (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1069                 hash = PyObject_Hash(key);
1070                 if (hash == -1)
1071                         return NULL;
1072         }
1073         ep = (mp->ma_lookup)(mp, key, hash);
1074         if (ep == NULL)
1075                 return NULL;
1076         v = ep->me_value;
1077         if (v == NULL) {
1078                 if (!PyDict_CheckExact(mp)) {
1079                         /* Look up __missing__ method if we're a subclass. */
1080                         PyObject *missing;
1081                         static PyObject *missing_str = NULL;
1082                         if (missing_str == NULL)
1083                                 missing_str =
1084                                   PyString_InternFromString("__missing__");
1085                         missing = _PyType_Lookup(Py_TYPE(mp), missing_str);
1086                         if (missing != NULL)
1087                                 return PyObject_CallFunctionObjArgs(missing,
1088                                         (PyObject *)mp, key, NULL);
1089                 }
1090                 set_key_error(key);
1091                 return NULL;
1092         }
1093         else
1094                 Py_INCREF(v);
1095         return v;
1096 }
1097
1098 static int
1099 dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w)
1100 {
1101         if (w == NULL)
1102                 return PyDict_DelItem((PyObject *)mp, v);
1103         else
1104                 return PyDict_SetItem((PyObject *)mp, v, w);
1105 }
1106
1107 static PyMappingMethods dict_as_mapping = {
1108         (lenfunc)dict_length, /*mp_length*/
1109         (binaryfunc)dict_subscript, /*mp_subscript*/
1110         (objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
1111 };
1112
1113 static PyObject *
1114 dict_keys(register PyDictObject *mp)
1115 {
1116         register PyObject *v;
1117         register Py_ssize_t i, j;
1118         PyDictEntry *ep;
1119         Py_ssize_t mask, n;
1120
1121   again:
1122         n = mp->ma_used;
1123         v = PyList_New(n);
1124         if (v == NULL)
1125                 return NULL;
1126         if (n != mp->ma_used) {
1127                 /* Durnit.  The allocations caused the dict to resize.
1128                  * Just start over, this shouldn't normally happen.
1129                  */
1130                 Py_DECREF(v);
1131                 goto again;
1132         }
1133         ep = mp->ma_table;
1134         mask = mp->ma_mask;
1135         for (i = 0, j = 0; i <= mask; i++) {
1136                 if (ep[i].me_value != NULL) {
1137                         PyObject *key = ep[i].me_key;
1138                         Py_INCREF(key);
1139                         PyList_SET_ITEM(v, j, key);
1140                         j++;
1141                 }
1142         }
1143         assert(j == n);
1144         return v;
1145 }
1146
1147 static PyObject *
1148 dict_values(register PyDictObject *mp)
1149 {
1150         register PyObject *v;
1151         register Py_ssize_t i, j;
1152         PyDictEntry *ep;
1153         Py_ssize_t mask, n;
1154
1155   again:
1156         n = mp->ma_used;
1157         v = PyList_New(n);
1158         if (v == NULL)
1159                 return NULL;
1160         if (n != mp->ma_used) {
1161                 /* Durnit.  The allocations caused the dict to resize.
1162                  * Just start over, this shouldn't normally happen.
1163                  */
1164                 Py_DECREF(v);
1165                 goto again;
1166         }
1167         ep = mp->ma_table;
1168         mask = mp->ma_mask;
1169         for (i = 0, j = 0; i <= mask; i++) {
1170                 if (ep[i].me_value != NULL) {
1171                         PyObject *value = ep[i].me_value;
1172                         Py_INCREF(value);
1173                         PyList_SET_ITEM(v, j, value);
1174                         j++;
1175                 }
1176         }
1177         assert(j == n);
1178         return v;
1179 }
1180
1181 static PyObject *
1182 dict_items(register PyDictObject *mp)
1183 {
1184         register PyObject *v;
1185         register Py_ssize_t i, j, n;
1186         Py_ssize_t mask;
1187         PyObject *item, *key, *value;
1188         PyDictEntry *ep;
1189
1190         /* Preallocate the list of tuples, to avoid allocations during
1191          * the loop over the items, which could trigger GC, which
1192          * could resize the dict. :-(
1193          */
1194   again:
1195         n = mp->ma_used;
1196         v = PyList_New(n);
1197         if (v == NULL)
1198                 return NULL;
1199         for (i = 0; i < n; i++) {
1200                 item = PyTuple_New(2);
1201                 if (item == NULL) {
1202                         Py_DECREF(v);
1203                         return NULL;
1204                 }
1205                 PyList_SET_ITEM(v, i, item);
1206         }
1207         if (n != mp->ma_used) {
1208                 /* Durnit.  The allocations caused the dict to resize.
1209                  * Just start over, this shouldn't normally happen.
1210                  */
1211                 Py_DECREF(v);
1212                 goto again;
1213         }
1214         /* Nothing we do below makes any function calls. */
1215         ep = mp->ma_table;
1216         mask = mp->ma_mask;
1217         for (i = 0, j = 0; i <= mask; i++) {
1218                 if ((value=ep[i].me_value) != NULL) {
1219                         key = ep[i].me_key;
1220                         item = PyList_GET_ITEM(v, j);
1221                         Py_INCREF(key);
1222                         PyTuple_SET_ITEM(item, 0, key);
1223                         Py_INCREF(value);
1224                         PyTuple_SET_ITEM(item, 1, value);
1225                         j++;
1226                 }
1227         }
1228         assert(j == n);
1229         return v;
1230 }
1231
1232 static PyObject *
1233 dict_fromkeys(PyObject *cls, PyObject *args)
1234 {
1235         PyObject *seq;
1236         PyObject *value = Py_None;
1237         PyObject *it;   /* iter(seq) */
1238         PyObject *key;
1239         PyObject *d;
1240         int status;
1241
1242         if (!PyArg_UnpackTuple(args, "fromkeys", 1, 2, &seq, &value))
1243                 return NULL;
1244
1245         d = PyObject_CallObject(cls, NULL);
1246         if (d == NULL)
1247                 return NULL;
1248
1249         if (PyDict_CheckExact(d) && PyDict_CheckExact(seq)) {
1250                 PyDictObject *mp = (PyDictObject *)d;
1251                 PyObject *oldvalue;
1252                 Py_ssize_t pos = 0;
1253                 PyObject *key;
1254                 long hash;
1255
1256                 if (dictresize(mp, Py_SIZE(seq)))
1257                         return NULL;
1258
1259                 while (_PyDict_Next(seq, &pos, &key, &oldvalue, &hash)) {
1260                         Py_INCREF(key);
1261                         Py_INCREF(value);
1262                         if (insertdict(mp, key, hash, value))
1263                                 return NULL;
1264                 }
1265                 return d;
1266         }
1267
1268         if (PyDict_CheckExact(d) && PyAnySet_CheckExact(seq)) {
1269                 PyDictObject *mp = (PyDictObject *)d;
1270                 Py_ssize_t pos = 0;
1271                 PyObject *key;
1272                 long hash;
1273
1274                 if (dictresize(mp, PySet_GET_SIZE(seq)))
1275                         return NULL;
1276
1277                 while (_PySet_NextEntry(seq, &pos, &key, &hash)) {
1278                         Py_INCREF(key);
1279                         Py_INCREF(value);
1280                         if (insertdict(mp, key, hash, value))
1281                                 return NULL;
1282                 }
1283                 return d;
1284         }
1285
1286         it = PyObject_GetIter(seq);
1287         if (it == NULL){
1288                 Py_DECREF(d);
1289                 return NULL;
1290         }
1291
1292         if (PyDict_CheckExact(d)) {
1293                 while ((key = PyIter_Next(it)) != NULL) {
1294                         status = PyDict_SetItem(d, key, value);
1295                         Py_DECREF(key);
1296                         if (status < 0)
1297                                 goto Fail;
1298                 }
1299         } else {
1300                 while ((key = PyIter_Next(it)) != NULL) {
1301                         status = PyObject_SetItem(d, key, value);
1302                         Py_DECREF(key);
1303                         if (status < 0)
1304                                 goto Fail;
1305                 }
1306         }
1307
1308         if (PyErr_Occurred())
1309                 goto Fail;
1310         Py_DECREF(it);
1311         return d;
1312
1313 Fail:
1314         Py_DECREF(it);
1315         Py_DECREF(d);
1316         return NULL;
1317 }
1318
1319 static int
1320 dict_update_common(PyObject *self, PyObject *args, PyObject *kwds, char *methname)
1321 {
1322         PyObject *arg = NULL;
1323         int result = 0;
1324
1325         if (!PyArg_UnpackTuple(args, methname, 0, 1, &arg))
1326                 result = -1;
1327
1328         else if (arg != NULL) {
1329                 if (PyObject_HasAttrString(arg, "keys"))
1330                         result = PyDict_Merge(self, arg, 1);
1331                 else
1332                         result = PyDict_MergeFromSeq2(self, arg, 1);
1333         }
1334         if (result == 0 && kwds != NULL)
1335                 result = PyDict_Merge(self, kwds, 1);
1336         return result;
1337 }
1338
1339 static PyObject *
1340 dict_update(PyObject *self, PyObject *args, PyObject *kwds)
1341 {
1342         if (dict_update_common(self, args, kwds, "update") != -1)
1343                 Py_RETURN_NONE;
1344         return NULL;
1345 }
1346
1347 /* Update unconditionally replaces existing items.
1348    Merge has a 3rd argument 'override'; if set, it acts like Update,
1349    otherwise it leaves existing items unchanged.
1350
1351    PyDict_{Update,Merge} update/merge from a mapping object.
1352
1353    PyDict_MergeFromSeq2 updates/merges from any iterable object
1354    producing iterable objects of length 2.
1355 */
1356
1357 int
1358 PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
1359 {
1360         PyObject *it;   /* iter(seq2) */
1361         Py_ssize_t i;   /* index into seq2 of current element */
1362         PyObject *item; /* seq2[i] */
1363         PyObject *fast; /* item as a 2-tuple or 2-list */
1364
1365         assert(d != NULL);
1366         assert(PyDict_Check(d));
1367         assert(seq2 != NULL);
1368
1369         it = PyObject_GetIter(seq2);
1370         if (it == NULL)
1371                 return -1;
1372
1373         for (i = 0; ; ++i) {
1374                 PyObject *key, *value;
1375                 Py_ssize_t n;
1376
1377                 fast = NULL;
1378                 item = PyIter_Next(it);
1379                 if (item == NULL) {
1380                         if (PyErr_Occurred())
1381                                 goto Fail;
1382                         break;
1383                 }
1384
1385                 /* Convert item to sequence, and verify length 2. */
1386                 fast = PySequence_Fast(item, "");
1387                 if (fast == NULL) {
1388                         if (PyErr_ExceptionMatches(PyExc_TypeError))
1389                                 PyErr_Format(PyExc_TypeError,
1390                                         "cannot convert dictionary update "
1391                                         "sequence element #%zd to a sequence",
1392                                         i);
1393                         goto Fail;
1394                 }
1395                 n = PySequence_Fast_GET_SIZE(fast);
1396                 if (n != 2) {
1397                         PyErr_Format(PyExc_ValueError,
1398                                      "dictionary update sequence element #%zd "
1399                                      "has length %zd; 2 is required",
1400                                      i, n);
1401                         goto Fail;
1402                 }
1403
1404                 /* Update/merge with this (key, value) pair. */
1405                 key = PySequence_Fast_GET_ITEM(fast, 0);
1406                 value = PySequence_Fast_GET_ITEM(fast, 1);
1407                 if (override || PyDict_GetItem(d, key) == NULL) {
1408                         int status = PyDict_SetItem(d, key, value);
1409                         if (status < 0)
1410                                 goto Fail;
1411                 }
1412                 Py_DECREF(fast);
1413                 Py_DECREF(item);
1414         }
1415
1416         i = 0;
1417         goto Return;
1418 Fail:
1419         Py_XDECREF(item);
1420         Py_XDECREF(fast);
1421         i = -1;
1422 Return:
1423         Py_DECREF(it);
1424         return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
1425 }
1426
1427 int
1428 PyDict_Update(PyObject *a, PyObject *b)
1429 {
1430         return PyDict_Merge(a, b, 1);
1431 }
1432
1433 int
1434 PyDict_Merge(PyObject *a, PyObject *b, int override)
1435 {
1436         register PyDictObject *mp, *other;
1437         register Py_ssize_t i;
1438         PyDictEntry *entry;
1439
1440         /* We accept for the argument either a concrete dictionary object,
1441          * or an abstract "mapping" object.  For the former, we can do
1442          * things quite efficiently.  For the latter, we only require that
1443          * PyMapping_Keys() and PyObject_GetItem() be supported.
1444          */
1445         if (a == NULL || !PyDict_Check(a) || b == NULL) {
1446                 PyErr_BadInternalCall();
1447                 return -1;
1448         }
1449         mp = (PyDictObject*)a;
1450         if (PyDict_Check(b)) {
1451                 other = (PyDictObject*)b;
1452                 if (other == mp || other->ma_used == 0)
1453                         /* a.update(a) or a.update({}); nothing to do */
1454                         return 0;
1455                 if (mp->ma_used == 0)
1456                         /* Since the target dict is empty, PyDict_GetItem()
1457                          * always returns NULL.  Setting override to 1
1458                          * skips the unnecessary test.
1459                          */
1460                         override = 1;
1461                 /* Do one big resize at the start, rather than
1462                  * incrementally resizing as we insert new items.  Expect
1463                  * that there will be no (or few) overlapping keys.
1464                  */
1465                 if ((mp->ma_fill + other->ma_used)*3 >= (mp->ma_mask+1)*2) {
1466                    if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
1467                            return -1;
1468                 }
1469                 for (i = 0; i <= other->ma_mask; i++) {
1470                         entry = &other->ma_table[i];
1471                         if (entry->me_value != NULL &&
1472                             (override ||
1473                              PyDict_GetItem(a, entry->me_key) == NULL)) {
1474                                 Py_INCREF(entry->me_key);
1475                                 Py_INCREF(entry->me_value);
1476                                 if (insertdict(mp, entry->me_key,
1477                                                (long)entry->me_hash,
1478                                                entry->me_value) != 0)
1479                                         return -1;
1480                         }
1481                 }
1482         }
1483         else {
1484                 /* Do it the generic, slower way */
1485                 PyObject *keys = PyMapping_Keys(b);
1486                 PyObject *iter;
1487                 PyObject *key, *value;
1488                 int status;
1489
1490                 if (keys == NULL)
1491                         /* Docstring says this is equivalent to E.keys() so
1492                          * if E doesn't have a .keys() method we want
1493                          * AttributeError to percolate up.  Might as well
1494                          * do the same for any other error.
1495                          */
1496                         return -1;
1497
1498                 iter = PyObject_GetIter(keys);
1499                 Py_DECREF(keys);
1500                 if (iter == NULL)
1501                         return -1;
1502
1503                 for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) {
1504                         if (!override && PyDict_GetItem(a, key) != NULL) {
1505                                 Py_DECREF(key);
1506                                 continue;
1507                         }
1508                         value = PyObject_GetItem(b, key);
1509                         if (value == NULL) {
1510                                 Py_DECREF(iter);
1511                                 Py_DECREF(key);
1512                                 return -1;
1513                         }
1514                         status = PyDict_SetItem(a, key, value);
1515                         Py_DECREF(key);
1516                         Py_DECREF(value);
1517                         if (status < 0) {
1518                                 Py_DECREF(iter);
1519                                 return -1;
1520                         }
1521                 }
1522                 Py_DECREF(iter);
1523                 if (PyErr_Occurred())
1524                         /* Iterator completed, via error */
1525                         return -1;
1526         }
1527         return 0;
1528 }
1529
1530 static PyObject *
1531 dict_copy(register PyDictObject *mp)
1532 {
1533         return PyDict_Copy((PyObject*)mp);
1534 }
1535
1536 PyObject *
1537 PyDict_Copy(PyObject *o)
1538 {
1539         PyObject *copy;
1540
1541         if (o == NULL || !PyDict_Check(o)) {
1542                 PyErr_BadInternalCall();
1543                 return NULL;
1544         }
1545         copy = PyDict_New();
1546         if (copy == NULL)
1547                 return NULL;
1548         if (PyDict_Merge(copy, o, 1) == 0)
1549                 return copy;
1550         Py_DECREF(copy);
1551         return NULL;
1552 }
1553
1554 Py_ssize_t
1555 PyDict_Size(PyObject *mp)
1556 {
1557         if (mp == NULL || !PyDict_Check(mp)) {
1558                 PyErr_BadInternalCall();
1559                 return -1;
1560         }
1561         return ((PyDictObject *)mp)->ma_used;
1562 }
1563
1564 PyObject *
1565 PyDict_Keys(PyObject *mp)
1566 {
1567         if (mp == NULL || !PyDict_Check(mp)) {
1568                 PyErr_BadInternalCall();
1569                 return NULL;
1570         }
1571         return dict_keys((PyDictObject *)mp);
1572 }
1573
1574 PyObject *
1575 PyDict_Values(PyObject *mp)
1576 {
1577         if (mp == NULL || !PyDict_Check(mp)) {
1578                 PyErr_BadInternalCall();
1579                 return NULL;
1580         }
1581         return dict_values((PyDictObject *)mp);
1582 }
1583
1584 PyObject *
1585 PyDict_Items(PyObject *mp)
1586 {
1587         if (mp == NULL || !PyDict_Check(mp)) {
1588                 PyErr_BadInternalCall();
1589                 return NULL;
1590         }
1591         return dict_items((PyDictObject *)mp);
1592 }
1593
1594 /* Subroutine which returns the smallest key in a for which b's value
1595    is different or absent.  The value is returned too, through the
1596    pval argument.  Both are NULL if no key in a is found for which b's status
1597    differs.  The refcounts on (and only on) non-NULL *pval and function return
1598    values must be decremented by the caller (characterize() increments them
1599    to ensure that mutating comparison and PyDict_GetItem calls can't delete
1600    them before the caller is done looking at them). */
1601
1602 static PyObject *
1603 characterize(PyDictObject *a, PyDictObject *b, PyObject **pval)
1604 {
1605         PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
1606         PyObject *aval = NULL; /* a[akey] */
1607         Py_ssize_t i;
1608         int cmp;
1609
1610         for (i = 0; i <= a->ma_mask; i++) {
1611                 PyObject *thiskey, *thisaval, *thisbval;
1612                 if (a->ma_table[i].me_value == NULL)
1613                         continue;
1614                 thiskey = a->ma_table[i].me_key;
1615                 Py_INCREF(thiskey);  /* keep alive across compares */
1616                 if (akey != NULL) {
1617                         cmp = PyObject_RichCompareBool(akey, thiskey, Py_LT);
1618                         if (cmp < 0) {
1619                                 Py_DECREF(thiskey);
1620                                 goto Fail;
1621                         }
1622                         if (cmp > 0 ||
1623                             i > a->ma_mask ||
1624                             a->ma_table[i].me_value == NULL)
1625                         {
1626                                 /* Not the *smallest* a key; or maybe it is
1627                                  * but the compare shrunk the dict so we can't
1628                                  * find its associated value anymore; or
1629                                  * maybe it is but the compare deleted the
1630                                  * a[thiskey] entry.
1631                                  */
1632                                 Py_DECREF(thiskey);
1633                                 continue;
1634                         }
1635                 }
1636
1637                 /* Compare a[thiskey] to b[thiskey]; cmp <- true iff equal. */
1638                 thisaval = a->ma_table[i].me_value;
1639                 assert(thisaval);
1640                 Py_INCREF(thisaval);   /* keep alive */
1641                 thisbval = PyDict_GetItem((PyObject *)b, thiskey);
1642                 if (thisbval == NULL)
1643                         cmp = 0;
1644                 else {
1645                         /* both dicts have thiskey:  same values? */
1646                         cmp = PyObject_RichCompareBool(
1647                                                 thisaval, thisbval, Py_EQ);
1648                         if (cmp < 0) {
1649                                 Py_DECREF(thiskey);
1650                                 Py_DECREF(thisaval);
1651                                 goto Fail;
1652                         }
1653                 }
1654                 if (cmp == 0) {
1655                         /* New winner. */
1656                         Py_XDECREF(akey);
1657                         Py_XDECREF(aval);
1658                         akey = thiskey;
1659                         aval = thisaval;
1660                 }
1661                 else {
1662                         Py_DECREF(thiskey);
1663                         Py_DECREF(thisaval);
1664                 }
1665         }
1666         *pval = aval;
1667         return akey;
1668
1669 Fail:
1670         Py_XDECREF(akey);
1671         Py_XDECREF(aval);
1672         *pval = NULL;
1673         return NULL;
1674 }
1675
1676 static int
1677 dict_compare(PyDictObject *a, PyDictObject *b)
1678 {
1679         PyObject *adiff, *bdiff, *aval, *bval;
1680         int res;
1681
1682         /* Compare lengths first */
1683         if (a->ma_used < b->ma_used)
1684                 return -1;      /* a is shorter */
1685         else if (a->ma_used > b->ma_used)
1686                 return 1;       /* b is shorter */
1687
1688         /* Same length -- check all keys */
1689         bdiff = bval = NULL;
1690         adiff = characterize(a, b, &aval);
1691         if (adiff == NULL) {
1692                 assert(!aval);
1693                 /* Either an error, or a is a subset with the same length so
1694                  * must be equal.
1695                  */
1696                 res = PyErr_Occurred() ? -1 : 0;
1697                 goto Finished;
1698         }
1699         bdiff = characterize(b, a, &bval);
1700         if (bdiff == NULL && PyErr_Occurred()) {
1701                 assert(!bval);
1702                 res = -1;
1703                 goto Finished;
1704         }
1705         res = 0;
1706         if (bdiff) {
1707                 /* bdiff == NULL "should be" impossible now, but perhaps
1708                  * the last comparison done by the characterize() on a had
1709                  * the side effect of making the dicts equal!
1710                  */
1711                 res = PyObject_Compare(adiff, bdiff);
1712         }
1713         if (res == 0 && bval != NULL)
1714                 res = PyObject_Compare(aval, bval);
1715
1716 Finished:
1717         Py_XDECREF(adiff);
1718         Py_XDECREF(bdiff);
1719         Py_XDECREF(aval);
1720         Py_XDECREF(bval);
1721         return res;
1722 }
1723
1724 /* Return 1 if dicts equal, 0 if not, -1 if error.
1725  * Gets out as soon as any difference is detected.
1726  * Uses only Py_EQ comparison.
1727  */
1728 static int
1729 dict_equal(PyDictObject *a, PyDictObject *b)
1730 {
1731         Py_ssize_t i;
1732
1733         if (a->ma_used != b->ma_used)
1734                 /* can't be equal if # of entries differ */
1735                 return 0;
1736
1737         /* Same # of entries -- check all of 'em.  Exit early on any diff. */
1738         for (i = 0; i <= a->ma_mask; i++) {
1739                 PyObject *aval = a->ma_table[i].me_value;
1740                 if (aval != NULL) {
1741                         int cmp;
1742                         PyObject *bval;
1743                         PyObject *key = a->ma_table[i].me_key;
1744                         /* temporarily bump aval's refcount to ensure it stays
1745                            alive until we're done with it */
1746                         Py_INCREF(aval);
1747                         /* ditto for key */
1748                         Py_INCREF(key);
1749                         bval = PyDict_GetItem((PyObject *)b, key);
1750                         Py_DECREF(key);
1751                         if (bval == NULL) {
1752                                 Py_DECREF(aval);
1753                                 return 0;
1754                         }
1755                         cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
1756                         Py_DECREF(aval);
1757                         if (cmp <= 0)  /* error or not equal */
1758                                 return cmp;
1759                 }
1760         }
1761         return 1;
1762  }
1763
1764 static PyObject *
1765 dict_richcompare(PyObject *v, PyObject *w, int op)
1766 {
1767         int cmp;
1768         PyObject *res;
1769
1770         if (!PyDict_Check(v) || !PyDict_Check(w)) {
1771                 res = Py_NotImplemented;
1772         }
1773         else if (op == Py_EQ || op == Py_NE) {
1774                 cmp = dict_equal((PyDictObject *)v, (PyDictObject *)w);
1775                 if (cmp < 0)
1776                         return NULL;
1777                 res = (cmp == (op == Py_EQ)) ? Py_True : Py_False;
1778         }
1779         else {
1780                 /* Py3K warning if comparison isn't == or !=  */
1781                 if (PyErr_WarnPy3k("dict inequality comparisons not supported "
1782                                    "in 3.x", 1) < 0) {
1783                         return NULL;
1784                 }
1785                 res = Py_NotImplemented;
1786         }
1787         Py_INCREF(res);
1788         return res;
1789  }
1790
1791 static PyObject *
1792 dict_contains(register PyDictObject *mp, PyObject *key)
1793 {
1794         long hash;
1795         PyDictEntry *ep;
1796
1797         if (!PyString_CheckExact(key) ||
1798             (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1799                 hash = PyObject_Hash(key);
1800                 if (hash == -1)
1801                         return NULL;
1802         }
1803         ep = (mp->ma_lookup)(mp, key, hash);
1804         if (ep == NULL)
1805                 return NULL;
1806         return PyBool_FromLong(ep->me_value != NULL);
1807 }
1808
1809 static PyObject *
1810 dict_has_key(register PyDictObject *mp, PyObject *key)
1811 {
1812         if (PyErr_WarnPy3k("dict.has_key() not supported in 3.x; "
1813                            "use the in operator", 1) < 0)
1814                 return NULL;
1815         return dict_contains(mp, key);
1816 }
1817
1818 static PyObject *
1819 dict_get(register PyDictObject *mp, PyObject *args)
1820 {
1821         PyObject *key;
1822         PyObject *failobj = Py_None;
1823         PyObject *val = NULL;
1824         long hash;
1825         PyDictEntry *ep;
1826
1827         if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
1828                 return NULL;
1829
1830         if (!PyString_CheckExact(key) ||
1831             (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1832                 hash = PyObject_Hash(key);
1833                 if (hash == -1)
1834                         return NULL;
1835         }
1836         ep = (mp->ma_lookup)(mp, key, hash);
1837         if (ep == NULL)
1838                 return NULL;
1839         val = ep->me_value;
1840         if (val == NULL)
1841                 val = failobj;
1842         Py_INCREF(val);
1843         return val;
1844 }
1845
1846
1847 static PyObject *
1848 dict_setdefault(register PyDictObject *mp, PyObject *args)
1849 {
1850         PyObject *key;
1851         PyObject *failobj = Py_None;
1852         PyObject *val = NULL;
1853         long hash;
1854         PyDictEntry *ep;
1855
1856         if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj))
1857                 return NULL;
1858
1859         if (!PyString_CheckExact(key) ||
1860             (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1861                 hash = PyObject_Hash(key);
1862                 if (hash == -1)
1863                         return NULL;
1864         }
1865         ep = (mp->ma_lookup)(mp, key, hash);
1866         if (ep == NULL)
1867                 return NULL;
1868         val = ep->me_value;
1869         if (val == NULL) {
1870                 val = failobj;
1871                 if (PyDict_SetItem((PyObject*)mp, key, failobj))
1872                         val = NULL;
1873         }
1874         Py_XINCREF(val);
1875         return val;
1876 }
1877
1878
1879 static PyObject *
1880 dict_clear(register PyDictObject *mp)
1881 {
1882         PyDict_Clear((PyObject *)mp);
1883         Py_RETURN_NONE;
1884 }
1885
1886 static PyObject *
1887 dict_pop(PyDictObject *mp, PyObject *args)
1888 {
1889         long hash;
1890         PyDictEntry *ep;
1891         PyObject *old_value, *old_key;
1892         PyObject *key, *deflt = NULL;
1893
1894         if(!PyArg_UnpackTuple(args, "pop", 1, 2, &key, &deflt))
1895                 return NULL;
1896         if (mp->ma_used == 0) {
1897                 if (deflt) {
1898                         Py_INCREF(deflt);
1899                         return deflt;
1900                 }
1901                 PyErr_SetString(PyExc_KeyError,
1902                                 "pop(): dictionary is empty");
1903                 return NULL;
1904         }
1905         if (!PyString_CheckExact(key) ||
1906             (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1907                 hash = PyObject_Hash(key);
1908                 if (hash == -1)
1909                         return NULL;
1910         }
1911         ep = (mp->ma_lookup)(mp, key, hash);
1912         if (ep == NULL)
1913                 return NULL;
1914         if (ep->me_value == NULL) {
1915                 if (deflt) {
1916                         Py_INCREF(deflt);
1917                         return deflt;
1918                 }
1919                 set_key_error(key);
1920                 return NULL;
1921         }
1922         old_key = ep->me_key;
1923         Py_INCREF(dummy);
1924         ep->me_key = dummy;
1925         old_value = ep->me_value;
1926         ep->me_value = NULL;
1927         mp->ma_used--;
1928         Py_DECREF(old_key);
1929         return old_value;
1930 }
1931
1932 static PyObject *
1933 dict_popitem(PyDictObject *mp)
1934 {
1935         Py_ssize_t i = 0;
1936         PyDictEntry *ep;
1937         PyObject *res;
1938
1939         /* Allocate the result tuple before checking the size.  Believe it
1940          * or not, this allocation could trigger a garbage collection which
1941          * could empty the dict, so if we checked the size first and that
1942          * happened, the result would be an infinite loop (searching for an
1943          * entry that no longer exists).  Note that the usual popitem()
1944          * idiom is "while d: k, v = d.popitem()". so needing to throw the
1945          * tuple away if the dict *is* empty isn't a significant
1946          * inefficiency -- possible, but unlikely in practice.
1947          */
1948         res = PyTuple_New(2);
1949         if (res == NULL)
1950                 return NULL;
1951         if (mp->ma_used == 0) {
1952                 Py_DECREF(res);
1953                 PyErr_SetString(PyExc_KeyError,
1954                                 "popitem(): dictionary is empty");
1955                 return NULL;
1956         }
1957         /* Set ep to "the first" dict entry with a value.  We abuse the hash
1958          * field of slot 0 to hold a search finger:
1959          * If slot 0 has a value, use slot 0.
1960          * Else slot 0 is being used to hold a search finger,
1961          * and we use its hash value as the first index to look.
1962          */
1963         ep = &mp->ma_table[0];
1964         if (ep->me_value == NULL) {
1965                 i = ep->me_hash;
1966                 /* The hash field may be a real hash value, or it may be a
1967                  * legit search finger, or it may be a once-legit search
1968                  * finger that's out of bounds now because it wrapped around
1969                  * or the table shrunk -- simply make sure it's in bounds now.
1970                  */
1971                 if (i > mp->ma_mask || i < 1)
1972                         i = 1;  /* skip slot 0 */
1973                 while ((ep = &mp->ma_table[i])->me_value == NULL) {
1974                         i++;
1975                         if (i > mp->ma_mask)
1976                                 i = 1;
1977                 }
1978         }
1979         PyTuple_SET_ITEM(res, 0, ep->me_key);
1980         PyTuple_SET_ITEM(res, 1, ep->me_value);
1981         Py_INCREF(dummy);
1982         ep->me_key = dummy;
1983         ep->me_value = NULL;
1984         mp->ma_used--;
1985         assert(mp->ma_table[0].me_value == NULL);
1986         mp->ma_table[0].me_hash = i + 1;  /* next place to start */
1987         return res;
1988 }
1989
1990 static int
1991 dict_traverse(PyObject *op, visitproc visit, void *arg)
1992 {
1993         Py_ssize_t i = 0;
1994         PyObject *pk;
1995         PyObject *pv;
1996
1997         while (PyDict_Next(op, &i, &pk, &pv)) {
1998                 Py_VISIT(pk);
1999                 Py_VISIT(pv);
2000         }
2001         return 0;
2002 }
2003
2004 static int
2005 dict_tp_clear(PyObject *op)
2006 {
2007         PyDict_Clear(op);
2008         return 0;
2009 }
2010
2011
2012 extern PyTypeObject PyDictIterKey_Type; /* Forward */
2013 extern PyTypeObject PyDictIterValue_Type; /* Forward */
2014 extern PyTypeObject PyDictIterItem_Type; /* Forward */
2015 static PyObject *dictiter_new(PyDictObject *, PyTypeObject *);
2016
2017 static PyObject *
2018 dict_iterkeys(PyDictObject *dict)
2019 {
2020         return dictiter_new(dict, &PyDictIterKey_Type);
2021 }
2022
2023 static PyObject *
2024 dict_itervalues(PyDictObject *dict)
2025 {
2026         return dictiter_new(dict, &PyDictIterValue_Type);
2027 }
2028
2029 static PyObject *
2030 dict_iteritems(PyDictObject *dict)
2031 {
2032         return dictiter_new(dict, &PyDictIterItem_Type);
2033 }
2034
2035 static PyObject *
2036 dict_sizeof(PyDictObject *mp)
2037 {
2038         Py_ssize_t res;
2039
2040         res = sizeof(PyDictObject);
2041         if (mp->ma_table != mp->ma_smalltable)
2042                 res = res + (mp->ma_mask + 1) * sizeof(PyDictEntry);
2043         return PyInt_FromSsize_t(res);
2044 }
2045
2046 PyDoc_STRVAR(has_key__doc__,
2047 "D.has_key(k) -> True if D has a key k, else False");
2048
2049 PyDoc_STRVAR(contains__doc__,
2050 "D.__contains__(k) -> True if D has a key k, else False");
2051
2052 PyDoc_STRVAR(getitem__doc__, "x.__getitem__(y) <==> x[y]");
2053
2054 PyDoc_STRVAR(sizeof__doc__,
2055 "D.__sizeof__() -> size of D in memory, in bytes");
2056
2057 PyDoc_STRVAR(get__doc__,
2058 "D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None.");
2059
2060 PyDoc_STRVAR(setdefault_doc__,
2061 "D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D");
2062
2063 PyDoc_STRVAR(pop__doc__,
2064 "D.pop(k[,d]) -> v, remove specified key and return the corresponding value\n\
2065 If key is not found, d is returned if given, otherwise KeyError is raised");
2066
2067 PyDoc_STRVAR(popitem__doc__,
2068 "D.popitem() -> (k, v), remove and return some (key, value) pair as a\n\
2069 2-tuple; but raise KeyError if D is empty");
2070
2071 PyDoc_STRVAR(keys__doc__,
2072 "D.keys() -> list of D's keys");
2073
2074 PyDoc_STRVAR(items__doc__,
2075 "D.items() -> list of D's (key, value) pairs, as 2-tuples");
2076
2077 PyDoc_STRVAR(values__doc__,
2078 "D.values() -> list of D's values");
2079
2080 PyDoc_STRVAR(update__doc__,
2081 "D.update(E, **F) -> None.  Update D from E and F: for k in E: D[k] = E[k]\n\
2082 (if E has keys else: for (k, v) in E: D[k] = v) then: for k in F: D[k] = F[k]");
2083
2084 PyDoc_STRVAR(fromkeys__doc__,
2085 "dict.fromkeys(S[,v]) -> New dict with keys from S and values equal to v.\n\
2086 v defaults to None.");
2087
2088 PyDoc_STRVAR(clear__doc__,
2089 "D.clear() -> None.  Remove all items from D.");
2090
2091 PyDoc_STRVAR(copy__doc__,
2092 "D.copy() -> a shallow copy of D");
2093
2094 PyDoc_STRVAR(iterkeys__doc__,
2095 "D.iterkeys() -> an iterator over the keys of D");
2096
2097 PyDoc_STRVAR(itervalues__doc__,
2098 "D.itervalues() -> an iterator over the values of D");
2099
2100 PyDoc_STRVAR(iteritems__doc__,
2101 "D.iteritems() -> an iterator over the (key, value) items of D");
2102
2103 static PyMethodDef mapp_methods[] = {
2104         {"__contains__",(PyCFunction)dict_contains,     METH_O | METH_COEXIST,
2105          contains__doc__},
2106         {"__getitem__", (PyCFunction)dict_subscript,    METH_O | METH_COEXIST,
2107          getitem__doc__},
2108         {"__sizeof__",  (PyCFunction)dict_sizeof,       METH_NOARGS,
2109          sizeof__doc__},
2110         {"has_key",     (PyCFunction)dict_has_key,      METH_O,
2111          has_key__doc__},
2112         {"get",         (PyCFunction)dict_get,          METH_VARARGS,
2113          get__doc__},
2114         {"setdefault",  (PyCFunction)dict_setdefault,   METH_VARARGS,
2115          setdefault_doc__},
2116         {"pop",         (PyCFunction)dict_pop,          METH_VARARGS,
2117          pop__doc__},
2118         {"popitem",     (PyCFunction)dict_popitem,      METH_NOARGS,
2119          popitem__doc__},
2120         {"keys",        (PyCFunction)dict_keys,         METH_NOARGS,
2121         keys__doc__},
2122         {"items",       (PyCFunction)dict_items,        METH_NOARGS,
2123          items__doc__},
2124         {"values",      (PyCFunction)dict_values,       METH_NOARGS,
2125          values__doc__},
2126         {"update",      (PyCFunction)dict_update,       METH_VARARGS | METH_KEYWORDS,
2127          update__doc__},
2128         {"fromkeys",    (PyCFunction)dict_fromkeys,     METH_VARARGS | METH_CLASS,
2129          fromkeys__doc__},
2130         {"clear",       (PyCFunction)dict_clear,        METH_NOARGS,
2131          clear__doc__},
2132         {"copy",        (PyCFunction)dict_copy,         METH_NOARGS,
2133          copy__doc__},
2134         {"iterkeys",    (PyCFunction)dict_iterkeys,     METH_NOARGS,
2135          iterkeys__doc__},
2136         {"itervalues",  (PyCFunction)dict_itervalues,   METH_NOARGS,
2137          itervalues__doc__},
2138         {"iteritems",   (PyCFunction)dict_iteritems,    METH_NOARGS,
2139          iteritems__doc__},
2140         {NULL,          NULL}   /* sentinel */
2141 };
2142
2143 /* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */
2144 int
2145 PyDict_Contains(PyObject *op, PyObject *key)
2146 {
2147         long hash;
2148         PyDictObject *mp = (PyDictObject *)op;
2149         PyDictEntry *ep;
2150
2151         if (!PyString_CheckExact(key) ||
2152             (hash = ((PyStringObject *) key)->ob_shash) == -1) {
2153                 hash = PyObject_Hash(key);
2154                 if (hash == -1)
2155                         return -1;
2156         }
2157         ep = (mp->ma_lookup)(mp, key, hash);
2158         return ep == NULL ? -1 : (ep->me_value != NULL);
2159 }
2160
2161 /* Internal version of PyDict_Contains used when the hash value is already known */
2162 int
2163 _PyDict_Contains(PyObject *op, PyObject *key, long hash)
2164 {
2165         PyDictObject *mp = (PyDictObject *)op;
2166         PyDictEntry *ep;
2167
2168         ep = (mp->ma_lookup)(mp, key, hash);
2169         return ep == NULL ? -1 : (ep->me_value != NULL);
2170 }
2171
2172 /* Hack to implement "key in dict" */
2173 static PySequenceMethods dict_as_sequence = {
2174         0,                      /* sq_length */
2175         0,                      /* sq_concat */
2176         0,                      /* sq_repeat */
2177         0,                      /* sq_item */
2178         0,                      /* sq_slice */
2179         0,                      /* sq_ass_item */
2180         0,                      /* sq_ass_slice */
2181         PyDict_Contains,        /* sq_contains */
2182         0,                      /* sq_inplace_concat */
2183         0,                      /* sq_inplace_repeat */
2184 };
2185
2186 static PyObject *
2187 dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2188 {
2189         PyObject *self;
2190
2191         assert(type != NULL && type->tp_alloc != NULL);
2192         self = type->tp_alloc(type, 0);
2193         if (self != NULL) {
2194                 PyDictObject *d = (PyDictObject *)self;
2195                 /* It's guaranteed that tp->alloc zeroed out the struct. */
2196                 assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0);
2197                 INIT_NONZERO_DICT_SLOTS(d);
2198                 d->ma_lookup = lookdict_string;
2199 #ifdef SHOW_CONVERSION_COUNTS
2200                 ++created;
2201 #endif
2202         }
2203         return self;
2204 }
2205
2206 static int
2207 dict_init(PyObject *self, PyObject *args, PyObject *kwds)
2208 {
2209         return dict_update_common(self, args, kwds, "dict");
2210 }
2211
2212 static PyObject *
2213 dict_iter(PyDictObject *dict)
2214 {
2215         return dictiter_new(dict, &PyDictIterKey_Type);
2216 }
2217
2218 PyDoc_STRVAR(dictionary_doc,
2219 "dict() -> new empty dictionary.\n"
2220 "dict(mapping) -> new dictionary initialized from a mapping object's\n"
2221 "    (key, value) pairs.\n"
2222 "dict(seq) -> new dictionary initialized as if via:\n"
2223 "    d = {}\n"
2224 "    for k, v in seq:\n"
2225 "        d[k] = v\n"
2226 "dict(**kwargs) -> new dictionary initialized with the name=value pairs\n"
2227 "    in the keyword argument list.  For example:  dict(one=1, two=2)");
2228
2229 PyTypeObject PyDict_Type = {
2230         PyVarObject_HEAD_INIT(&PyType_Type, 0)
2231         "dict",
2232         sizeof(PyDictObject),
2233         0,
2234         (destructor)dict_dealloc,               /* tp_dealloc */
2235         (printfunc)dict_print,                  /* tp_print */
2236         0,                                      /* tp_getattr */
2237         0,                                      /* tp_setattr */
2238         (cmpfunc)dict_compare,                  /* tp_compare */
2239         (reprfunc)dict_repr,                    /* tp_repr */
2240         0,                                      /* tp_as_number */
2241         &dict_as_sequence,                      /* tp_as_sequence */
2242         &dict_as_mapping,                       /* tp_as_mapping */
2243         (hashfunc)PyObject_HashNotImplemented,  /* tp_hash */
2244         0,                                      /* tp_call */
2245         0,                                      /* tp_str */
2246         PyObject_GenericGetAttr,                /* tp_getattro */
2247         0,                                      /* tp_setattro */
2248         0,                                      /* tp_as_buffer */
2249         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2250                 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DICT_SUBCLASS, /* tp_flags */
2251         dictionary_doc,                         /* tp_doc */
2252         dict_traverse,                          /* tp_traverse */
2253         dict_tp_clear,                          /* tp_clear */
2254         dict_richcompare,                       /* tp_richcompare */
2255         0,                                      /* tp_weaklistoffset */
2256         (getiterfunc)dict_iter,                 /* tp_iter */
2257         0,                                      /* tp_iternext */
2258         mapp_methods,                           /* tp_methods */
2259         0,                                      /* tp_members */
2260         0,                                      /* tp_getset */
2261         0,                                      /* tp_base */
2262         0,                                      /* tp_dict */
2263         0,                                      /* tp_descr_get */
2264         0,                                      /* tp_descr_set */
2265         0,                                      /* tp_dictoffset */
2266         dict_init,                              /* tp_init */
2267         PyType_GenericAlloc,                    /* tp_alloc */
2268         dict_new,                               /* tp_new */
2269         PyObject_GC_Del,                        /* tp_free */
2270 };
2271
2272 /* For backward compatibility with old dictionary interface */
2273
2274 PyObject *
2275 PyDict_GetItemString(PyObject *v, const char *key)
2276 {
2277         PyObject *kv, *rv;
2278         kv = PyString_FromString(key);
2279         if (kv == NULL)
2280                 return NULL;
2281         rv = PyDict_GetItem(v, kv);
2282         Py_DECREF(kv);
2283         return rv;
2284 }
2285
2286 int
2287 PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
2288 {
2289         PyObject *kv;
2290         int err;
2291         kv = PyString_FromString(key);
2292         if (kv == NULL)
2293                 return -1;
2294         PyString_InternInPlace(&kv); /* XXX Should we really? */
2295         err = PyDict_SetItem(v, kv, item);
2296         Py_DECREF(kv);
2297         return err;
2298 }
2299
2300 int
2301 PyDict_DelItemString(PyObject *v, const char *key)
2302 {
2303         PyObject *kv;
2304         int err;
2305         kv = PyString_FromString(key);
2306         if (kv == NULL)
2307                 return -1;
2308         err = PyDict_DelItem(v, kv);
2309         Py_DECREF(kv);
2310         return err;
2311 }
2312
2313 /* Dictionary iterator types */
2314
2315 typedef struct {
2316         PyObject_HEAD
2317         PyDictObject *di_dict; /* Set to NULL when iterator is exhausted */
2318         Py_ssize_t di_used;
2319         Py_ssize_t di_pos;
2320         PyObject* di_result; /* reusable result tuple for iteritems */
2321         Py_ssize_t len;
2322 } dictiterobject;
2323
2324 static PyObject *
2325 dictiter_new(PyDictObject *dict, PyTypeObject *itertype)
2326 {
2327         dictiterobject *di;
2328         di = PyObject_New(dictiterobject, itertype);
2329         if (di == NULL)
2330                 return NULL;
2331         Py_INCREF(dict);
2332         di->di_dict = dict;
2333         di->di_used = dict->ma_used;
2334         di->di_pos = 0;
2335         di->len = dict->ma_used;
2336         if (itertype == &PyDictIterItem_Type) {
2337                 di->di_result = PyTuple_Pack(2, Py_None, Py_None);
2338                 if (di->di_result == NULL) {
2339                         Py_DECREF(di);
2340                         return NULL;
2341                 }
2342         }
2343         else
2344                 di->di_result = NULL;
2345         return (PyObject *)di;
2346 }
2347
2348 static void
2349 dictiter_dealloc(dictiterobject *di)
2350 {
2351         Py_XDECREF(di->di_dict);
2352         Py_XDECREF(di->di_result);
2353         PyObject_Del(di);
2354 }
2355
2356 static PyObject *
2357 dictiter_len(dictiterobject *di)
2358 {
2359         Py_ssize_t len = 0;
2360         if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
2361                 len = di->len;
2362         return PyInt_FromSize_t(len);
2363 }
2364
2365 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
2366
2367 static PyMethodDef dictiter_methods[] = {
2368         {"__length_hint__", (PyCFunction)dictiter_len, METH_NOARGS, length_hint_doc},
2369         {NULL,          NULL}           /* sentinel */
2370 };
2371
2372 static PyObject *dictiter_iternextkey(dictiterobject *di)
2373 {
2374         PyObject *key;
2375         register Py_ssize_t i, mask;
2376         register PyDictEntry *ep;
2377         PyDictObject *d = di->di_dict;
2378
2379         if (d == NULL)
2380                 return NULL;
2381         assert (PyDict_Check(d));
2382
2383         if (di->di_used != d->ma_used) {
2384                 PyErr_SetString(PyExc_RuntimeError,
2385                                 "dictionary changed size during iteration");
2386                 di->di_used = -1; /* Make this state sticky */
2387                 return NULL;
2388         }
2389
2390         i = di->di_pos;
2391         if (i < 0)
2392                 goto fail;
2393         ep = d->ma_table;
2394         mask = d->ma_mask;
2395         while (i <= mask && ep[i].me_value == NULL)
2396                 i++;
2397         di->di_pos = i+1;
2398         if (i > mask)
2399                 goto fail;
2400         di->len--;
2401         key = ep[i].me_key;
2402         Py_INCREF(key);
2403         return key;
2404
2405 fail:
2406         Py_DECREF(d);
2407         di->di_dict = NULL;
2408         return NULL;
2409 }
2410
2411 PyTypeObject PyDictIterKey_Type = {
2412         PyVarObject_HEAD_INIT(&PyType_Type, 0)
2413         "dictionary-keyiterator",               /* tp_name */
2414         sizeof(dictiterobject),                 /* tp_basicsize */
2415         0,                                      /* tp_itemsize */
2416         /* methods */
2417         (destructor)dictiter_dealloc,           /* tp_dealloc */
2418         0,                                      /* tp_print */
2419         0,                                      /* tp_getattr */
2420         0,                                      /* tp_setattr */
2421         0,                                      /* tp_compare */
2422         0,                                      /* tp_repr */
2423         0,                                      /* tp_as_number */
2424         0,                                      /* tp_as_sequence */
2425         0,                                      /* tp_as_mapping */
2426         0,                                      /* tp_hash */
2427         0,                                      /* tp_call */
2428         0,                                      /* tp_str */
2429         PyObject_GenericGetAttr,                /* tp_getattro */
2430         0,                                      /* tp_setattro */
2431         0,                                      /* tp_as_buffer */
2432         Py_TPFLAGS_DEFAULT,                     /* tp_flags */
2433         0,                                      /* tp_doc */
2434         0,                                      /* tp_traverse */
2435         0,                                      /* tp_clear */
2436         0,                                      /* tp_richcompare */
2437         0,                                      /* tp_weaklistoffset */
2438         PyObject_SelfIter,                      /* tp_iter */
2439         (iternextfunc)dictiter_iternextkey,     /* tp_iternext */
2440         dictiter_methods,                       /* tp_methods */
2441         0,
2442 };
2443
2444 static PyObject *dictiter_iternextvalue(dictiterobject *di)
2445 {
2446         PyObject *value;
2447         register Py_ssize_t i, mask;
2448         register PyDictEntry *ep;
2449         PyDictObject *d = di->di_dict;
2450
2451         if (d == NULL)
2452                 return NULL;
2453         assert (PyDict_Check(d));
2454
2455         if (di->di_used != d->ma_used) {
2456                 PyErr_SetString(PyExc_RuntimeError,
2457                                 "dictionary changed size during iteration");
2458                 di->di_used = -1; /* Make this state sticky */
2459                 return NULL;
2460         }
2461
2462         i = di->di_pos;
2463         mask = d->ma_mask;
2464         if (i < 0 || i > mask)
2465                 goto fail;
2466         ep = d->ma_table;
2467         while ((value=ep[i].me_value) == NULL) {
2468                 i++;
2469                 if (i > mask)
2470                         goto fail;
2471         }
2472         di->di_pos = i+1;
2473         di->len--;
2474         Py_INCREF(value);
2475         return value;
2476
2477 fail:
2478         Py_DECREF(d);
2479         di->di_dict = NULL;
2480         return NULL;
2481 }
2482
2483 PyTypeObject PyDictIterValue_Type = {
2484         PyVarObject_HEAD_INIT(&PyType_Type, 0)
2485         "dictionary-valueiterator",             /* tp_name */
2486         sizeof(dictiterobject),                 /* tp_basicsize */
2487         0,                                      /* tp_itemsize */
2488         /* methods */
2489         (destructor)dictiter_dealloc,           /* tp_dealloc */
2490         0,                                      /* tp_print */
2491         0,                                      /* tp_getattr */
2492         0,                                      /* tp_setattr */
2493         0,                                      /* tp_compare */
2494         0,                                      /* tp_repr */
2495         0,                                      /* tp_as_number */
2496         0,                                      /* tp_as_sequence */
2497         0,                                      /* tp_as_mapping */
2498         0,                                      /* tp_hash */
2499         0,                                      /* tp_call */
2500         0,                                      /* tp_str */
2501         PyObject_GenericGetAttr,                /* tp_getattro */
2502         0,                                      /* tp_setattro */
2503         0,                                      /* tp_as_buffer */
2504         Py_TPFLAGS_DEFAULT,                     /* tp_flags */
2505         0,                                      /* tp_doc */
2506         0,                                      /* tp_traverse */
2507         0,                                      /* tp_clear */
2508         0,                                      /* tp_richcompare */
2509         0,                                      /* tp_weaklistoffset */
2510         PyObject_SelfIter,                      /* tp_iter */
2511         (iternextfunc)dictiter_iternextvalue,   /* tp_iternext */
2512         dictiter_methods,                       /* tp_methods */
2513         0,
2514 };
2515
2516 static PyObject *dictiter_iternextitem(dictiterobject *di)
2517 {
2518         PyObject *key, *value, *result = di->di_result;
2519         register Py_ssize_t i, mask;
2520         register PyDictEntry *ep;
2521         PyDictObject *d = di->di_dict;
2522
2523         if (d == NULL)
2524                 return NULL;
2525         assert (PyDict_Check(d));
2526
2527         if (di->di_used != d->ma_used) {
2528                 PyErr_SetString(PyExc_RuntimeError,
2529                                 "dictionary changed size during iteration");
2530                 di->di_used = -1; /* Make this state sticky */
2531                 return NULL;
2532         }
2533
2534         i = di->di_pos;
2535         if (i < 0)
2536                 goto fail;
2537         ep = d->ma_table;
2538         mask = d->ma_mask;
2539         while (i <= mask && ep[i].me_value == NULL)
2540                 i++;
2541         di->di_pos = i+1;
2542         if (i > mask)
2543                 goto fail;
2544
2545         if (result->ob_refcnt == 1) {
2546                 Py_INCREF(result);
2547                 Py_DECREF(PyTuple_GET_ITEM(result, 0));
2548                 Py_DECREF(PyTuple_GET_ITEM(result, 1));
2549         } else {
2550                 result = PyTuple_New(2);
2551                 if (result == NULL)
2552                         return NULL;
2553         }
2554         di->len--;
2555         key = ep[i].me_key;
2556         value = ep[i].me_value;
2557         Py_INCREF(key);
2558         Py_INCREF(value);
2559         PyTuple_SET_ITEM(result, 0, key);
2560         PyTuple_SET_ITEM(result, 1, value);
2561         return result;
2562
2563 fail:
2564         Py_DECREF(d);
2565         di->di_dict = NULL;
2566         return NULL;
2567 }
2568
2569 PyTypeObject PyDictIterItem_Type = {
2570         PyVarObject_HEAD_INIT(&PyType_Type, 0)
2571         "dictionary-itemiterator",              /* tp_name */
2572         sizeof(dictiterobject),                 /* tp_basicsize */
2573         0,                                      /* tp_itemsize */
2574         /* methods */
2575         (destructor)dictiter_dealloc,           /* tp_dealloc */
2576         0,                                      /* tp_print */
2577         0,                                      /* tp_getattr */
2578         0,                                      /* tp_setattr */
2579         0,                                      /* tp_compare */
2580         0,                                      /* tp_repr */
2581         0,                                      /* tp_as_number */
2582         0,                                      /* tp_as_sequence */
2583         0,                                      /* tp_as_mapping */
2584         0,                                      /* tp_hash */
2585         0,                                      /* tp_call */
2586         0,                                      /* tp_str */
2587         PyObject_GenericGetAttr,                /* tp_getattro */
2588         0,                                      /* tp_setattro */
2589         0,                                      /* tp_as_buffer */
2590         Py_TPFLAGS_DEFAULT,                     /* tp_flags */
2591         0,                                      /* tp_doc */
2592         0,                                      /* tp_traverse */
2593         0,                                      /* tp_clear */
2594         0,                                      /* tp_richcompare */
2595         0,                                      /* tp_weaklistoffset */
2596         PyObject_SelfIter,                      /* tp_iter */
2597         (iternextfunc)dictiter_iternextitem,    /* tp_iternext */
2598         dictiter_methods,                       /* tp_methods */
2599         0,
2600 };