Fix rechaining of pseudo-resurrected string keys.
[luajit-2.0.git] / src / lj_tab.c
blobf2f3c0b0c02f8319d06036673c7f5331a2d15630
1 /*
2 ** Table handling.
3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4 **
5 ** Major portions taken verbatim or adapted from the Lua interpreter.
6 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7 */
9 #define lj_tab_c
10 #define LUA_CORE
12 #include "lj_obj.h"
13 #include "lj_gc.h"
14 #include "lj_err.h"
15 #include "lj_tab.h"
17 /* -- Object hashing ------------------------------------------------------ */
19 /* Hash values are masked with the table hash mask and used as an index. */
20 static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
22 Node *n = noderef(t->node);
23 return &n[hash & t->hmask];
26 /* String hashes are precomputed when they are interned. */
27 #define hashstr(t, s) hashmask(t, (s)->hash)
29 #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
30 #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
31 #define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS)
32 #define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
34 /* Hash an arbitrary key and return its anchor position in the hash table. */
35 static Node *hashkey(const GCtab *t, cTValue *key)
37 lua_assert(!tvisint(key));
38 if (tvisstr(key))
39 return hashstr(t, strV(key));
40 else if (tvisnum(key))
41 return hashnum(t, key);
42 else if (tvisbool(key))
43 return hashmask(t, boolV(key));
44 else
45 return hashgcref(t, key->gcr);
46 /* Only hash 32 bits of lightuserdata on a 64 bit CPU. Good enough? */
49 /* -- Table creation and destruction -------------------------------------- */
51 /* Create new hash part for table. */
52 static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits)
54 uint32_t hsize;
55 Node *node;
56 lua_assert(hbits != 0);
57 if (hbits > LJ_MAX_HBITS)
58 lj_err_msg(L, LJ_ERR_TABOV);
59 hsize = 1u << hbits;
60 node = lj_mem_newvec(L, hsize, Node);
61 setmref(node->freetop, &node[hsize]);
62 setmref(t->node, node);
63 t->hmask = hsize-1;
67 ** Q: Why all of these copies of t->hmask, t->node etc. to local variables?
68 ** A: Because alias analysis for C is _really_ tough.
69 ** Even state-of-the-art C compilers won't produce good code without this.
72 /* Clear hash part of table. */
73 static LJ_AINLINE void clearhpart(GCtab *t)
75 uint32_t i, hmask = t->hmask;
76 Node *node = noderef(t->node);
77 lua_assert(t->hmask != 0);
78 for (i = 0; i <= hmask; i++) {
79 Node *n = &node[i];
80 setmref(n->next, NULL);
81 setnilV(&n->key);
82 setnilV(&n->val);
86 /* Clear array part of table. */
87 static LJ_AINLINE void clearapart(GCtab *t)
89 uint32_t i, asize = t->asize;
90 TValue *array = tvref(t->array);
91 for (i = 0; i < asize; i++)
92 setnilV(&array[i]);
95 /* Create a new table. Note: the slots are not initialized (yet). */
96 static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
98 GCtab *t;
99 /* First try to colocate the array part. */
100 if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) {
101 lua_assert((sizeof(GCtab) & 7) == 0);
102 t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize));
103 t->gct = ~LJ_TTAB;
104 t->nomm = (uint8_t)~0;
105 t->colo = (int8_t)asize;
106 setmref(t->array, (TValue *)((char *)t + sizeof(GCtab)));
107 setgcrefnull(t->metatable);
108 t->asize = asize;
109 t->hmask = 0;
110 setmref(t->node, &G(L)->nilnode);
111 } else { /* Otherwise separately allocate the array part. */
112 t = lj_mem_newobj(L, GCtab);
113 t->gct = ~LJ_TTAB;
114 t->nomm = (uint8_t)~0;
115 t->colo = 0;
116 setmref(t->array, NULL);
117 setgcrefnull(t->metatable);
118 t->asize = 0; /* In case the array allocation fails. */
119 t->hmask = 0;
120 setmref(t->node, &G(L)->nilnode);
121 if (asize > 0) {
122 if (asize > LJ_MAX_ASIZE)
123 lj_err_msg(L, LJ_ERR_TABOV);
124 setmref(t->array, lj_mem_newvec(L, asize, TValue));
125 t->asize = asize;
128 if (hbits)
129 newhpart(L, t, hbits);
130 return t;
133 /* Create a new table.
135 ** IMPORTANT NOTE: The API differs from lua_createtable()!
137 ** The array size is non-inclusive. E.g. asize=128 creates array slots
138 ** for 0..127, but not for 128. If you need slots 1..128, pass asize=129
139 ** (slot 0 is wasted in this case).
141 ** The hash size is given in hash bits. hbits=0 means no hash part.
142 ** hbits=1 creates 2 hash slots, hbits=2 creates 4 hash slots and so on.
144 GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
146 GCtab *t = newtab(L, asize, hbits);
147 clearapart(t);
148 if (t->hmask > 0) clearhpart(t);
149 return t;
152 #if LJ_HASJIT
153 GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
155 GCtab *t = newtab(L, ahsize & 0xffffff, ahsize >> 24);
156 clearapart(t);
157 if (t->hmask > 0) clearhpart(t);
158 return t;
160 #endif
162 /* Duplicate a table. */
163 GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
165 GCtab *t;
166 uint32_t asize, hmask;
167 t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0);
168 lua_assert(kt->asize == t->asize && kt->hmask == t->hmask);
169 t->nomm = 0; /* Keys with metamethod names may be present. */
170 asize = kt->asize;
171 if (asize > 0) {
172 TValue *array = tvref(t->array);
173 TValue *karray = tvref(kt->array);
174 if (asize < 64) { /* An inlined loop beats memcpy for < 512 bytes. */
175 uint32_t i;
176 for (i = 0; i < asize; i++)
177 copyTV(L, &array[i], &karray[i]);
178 } else {
179 memcpy(array, karray, asize*sizeof(TValue));
182 hmask = kt->hmask;
183 if (hmask > 0) {
184 uint32_t i;
185 Node *node = noderef(t->node);
186 Node *knode = noderef(kt->node);
187 ptrdiff_t d = (char *)node - (char *)knode;
188 setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d));
189 for (i = 0; i <= hmask; i++) {
190 Node *kn = &knode[i];
191 Node *n = &node[i];
192 Node *next = nextnode(kn);
193 /* Don't use copyTV here, since it asserts on a copy of a dead key. */
194 n->val = kn->val; n->key = kn->key;
195 setmref(n->next, next == NULL? next : (Node *)((char *)next + d));
198 return t;
201 /* Free a table. */
202 void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
204 if (t->hmask > 0)
205 lj_mem_freevec(g, noderef(t->node), t->hmask+1, Node);
206 if (t->asize > 0 && LJ_MAX_COLOSIZE != 0 && t->colo <= 0)
207 lj_mem_freevec(g, tvref(t->array), t->asize, TValue);
208 if (LJ_MAX_COLOSIZE != 0 && t->colo)
209 lj_mem_free(g, t, sizetabcolo((uint32_t)t->colo & 0x7f));
210 else
211 lj_mem_freet(g, t);
214 /* -- Table resizing ------------------------------------------------------ */
216 /* Resize a table to fit the new array/hash part sizes. */
217 static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
219 Node *oldnode = noderef(t->node);
220 uint32_t oldasize = t->asize;
221 uint32_t oldhmask = t->hmask;
222 if (asize > oldasize) { /* Array part grows? */
223 TValue *array;
224 uint32_t i;
225 if (asize > LJ_MAX_ASIZE)
226 lj_err_msg(L, LJ_ERR_TABOV);
227 if (LJ_MAX_COLOSIZE != 0 && t->colo > 0) {
228 /* A colocated array must be separated and copied. */
229 TValue *oarray = tvref(t->array);
230 array = lj_mem_newvec(L, asize, TValue);
231 t->colo = (int8_t)(t->colo | 0x80); /* Mark as separated (colo < 0). */
232 for (i = 0; i < oldasize; i++)
233 copyTV(L, &array[i], &oarray[i]);
234 } else {
235 array = (TValue *)lj_mem_realloc(L, tvref(t->array),
236 oldasize*sizeof(TValue), asize*sizeof(TValue));
238 setmref(t->array, array);
239 t->asize = asize;
240 for (i = oldasize; i < asize; i++) /* Clear newly allocated slots. */
241 setnilV(&array[i]);
243 /* Create new (empty) hash part. */
244 if (hbits) {
245 newhpart(L, t, hbits);
246 clearhpart(t);
247 } else {
248 global_State *g = G(L);
249 setmref(t->node, &g->nilnode);
250 t->hmask = 0;
252 if (asize < oldasize) { /* Array part shrinks? */
253 TValue *array = tvref(t->array);
254 uint32_t i;
255 t->asize = asize; /* Note: This 'shrinks' even colocated arrays. */
256 for (i = asize; i < oldasize; i++) /* Reinsert old array values. */
257 if (!tvisnil(&array[i]))
258 copyTV(L, lj_tab_setinth(L, t, (int32_t)i), &array[i]);
259 /* Physically shrink only separated arrays. */
260 if (LJ_MAX_COLOSIZE != 0 && t->colo <= 0)
261 setmref(t->array, lj_mem_realloc(L, array,
262 oldasize*sizeof(TValue), asize*sizeof(TValue)));
264 if (oldhmask > 0) { /* Reinsert pairs from old hash part. */
265 global_State *g;
266 uint32_t i;
267 for (i = 0; i <= oldhmask; i++) {
268 Node *n = &oldnode[i];
269 if (!tvisnil(&n->val))
270 copyTV(L, lj_tab_set(L, t, &n->key), &n->val);
272 g = G(L);
273 lj_mem_freevec(g, oldnode, oldhmask+1, Node);
277 static uint32_t countint(cTValue *key, uint32_t *bins)
279 lua_assert(!tvisint(key));
280 if (tvisnum(key)) {
281 lua_Number nk = numV(key);
282 int32_t k = lj_num2int(nk);
283 if ((uint32_t)k < LJ_MAX_ASIZE && nk == (lua_Number)k) {
284 bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++;
285 return 1;
288 return 0;
291 static uint32_t countarray(const GCtab *t, uint32_t *bins)
293 uint32_t na, b, i;
294 if (t->asize == 0) return 0;
295 for (na = i = b = 0; b < LJ_MAX_ABITS; b++) {
296 uint32_t n, top = 2u << b;
297 TValue *array;
298 if (top >= t->asize) {
299 top = t->asize-1;
300 if (i > top)
301 break;
303 array = tvref(t->array);
304 for (n = 0; i <= top; i++)
305 if (!tvisnil(&array[i]))
306 n++;
307 bins[b] += n;
308 na += n;
310 return na;
313 static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray)
315 uint32_t total, na, i, hmask = t->hmask;
316 Node *node = noderef(t->node);
317 for (total = na = 0, i = 0; i <= hmask; i++) {
318 Node *n = &node[i];
319 if (!tvisnil(&n->val)) {
320 na += countint(&n->key, bins);
321 total++;
324 *narray += na;
325 return total;
328 static uint32_t bestasize(uint32_t bins[], uint32_t *narray)
330 uint32_t b, sum, na = 0, sz = 0, nn = *narray;
331 for (b = 0, sum = 0; 2*nn > (1u<<b) && sum != nn; b++)
332 if (bins[b] > 0 && 2*(sum += bins[b]) > (1u<<b)) {
333 sz = (2u<<b)+1;
334 na = sum;
336 *narray = sz;
337 return na;
340 static void rehashtab(lua_State *L, GCtab *t, cTValue *ek)
342 uint32_t bins[LJ_MAX_ABITS];
343 uint32_t total, asize, na, i;
344 for (i = 0; i < LJ_MAX_ABITS; i++) bins[i] = 0;
345 asize = countarray(t, bins);
346 total = 1 + asize;
347 total += counthash(t, bins, &asize);
348 asize += countint(ek, bins);
349 na = bestasize(bins, &asize);
350 total -= na;
351 resizetab(L, t, asize, hsize2hbits(total));
354 #if LJ_HASFFI
355 void lj_tab_rehash(lua_State *L, GCtab *t)
357 rehashtab(L, t, niltv(L));
359 #endif
361 void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
363 resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);
366 /* -- Table getters ------------------------------------------------------- */
368 cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key)
370 TValue k;
371 Node *n;
372 k.n = (lua_Number)key;
373 n = hashnum(t, &k);
374 do {
375 if (tvisnum(&n->key) && n->key.n == k.n)
376 return &n->val;
377 } while ((n = nextnode(n)));
378 return NULL;
381 cTValue *lj_tab_getstr(GCtab *t, GCstr *key)
383 Node *n = hashstr(t, key);
384 do {
385 if (tvisstr(&n->key) && strV(&n->key) == key)
386 return &n->val;
387 } while ((n = nextnode(n)));
388 return NULL;
391 cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key)
393 if (tvisstr(key)) {
394 cTValue *tv = lj_tab_getstr(t, strV(key));
395 if (tv)
396 return tv;
397 } else if (tvisint(key)) {
398 cTValue *tv = lj_tab_getint(t, intV(key));
399 if (tv)
400 return tv;
401 } else if (tvisnum(key)) {
402 lua_Number nk = numV(key);
403 int32_t k = lj_num2int(nk);
404 if (nk == (lua_Number)k) {
405 cTValue *tv = lj_tab_getint(t, k);
406 if (tv)
407 return tv;
408 } else {
409 goto genlookup; /* Else use the generic lookup. */
411 } else if (!tvisnil(key)) {
412 Node *n;
413 genlookup:
414 n = hashkey(t, key);
415 do {
416 if (lj_obj_equal(&n->key, key))
417 return &n->val;
418 } while ((n = nextnode(n)));
420 return niltv(L);
423 /* -- Table setters ------------------------------------------------------- */
425 /* Insert new key. Use Brent's variation to optimize the chain length. */
426 TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
428 Node *n = hashkey(t, key);
429 if (!tvisnil(&n->val) || t->hmask == 0) {
430 Node *nodebase = noderef(t->node);
431 Node *collide, *freenode = noderef(nodebase->freetop);
432 lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1);
433 do {
434 if (freenode == nodebase) { /* No free node found? */
435 rehashtab(L, t, key); /* Rehash table. */
436 return lj_tab_set(L, t, key); /* Retry key insertion. */
438 } while (!tvisnil(&(--freenode)->key));
439 setmref(nodebase->freetop, freenode);
440 lua_assert(freenode != &G(L)->nilnode);
441 collide = hashkey(t, &n->key);
442 if (collide != n) { /* Colliding node not the main node? */
443 while (noderef(collide->next) != n) /* Find predecessor. */
444 collide = nextnode(collide);
445 setmref(collide->next, freenode); /* Relink chain. */
446 /* Copy colliding node into free node and free main node. */
447 freenode->val = n->val;
448 freenode->key = n->key;
449 freenode->next = n->next;
450 setmref(n->next, NULL);
451 setnilV(&n->val);
452 /* Rechain pseudo-resurrected string keys with colliding hashes. */
453 while (nextnode(freenode)) {
454 Node *nn = nextnode(freenode);
455 if (tvisstr(&nn->key) && !tvisnil(&nn->val) &&
456 hashstr(t, strV(&nn->key)) == n) {
457 freenode->next = nn->next;
458 nn->next = n->next;
459 setmref(n->next, nn);
461 ** Rechaining a resurrected string key creates a new dilemma:
462 ** Another string key may have originally been resurrected via
463 ** _any_ of the previous nodes as a chain anchor. Including
464 ** a node that had to be moved, which makes them unreachable.
465 ** It's not feasible to check for all previous nodes, so rechain
466 ** any string key that's currently in a non-main positions.
468 while ((nn = nextnode(freenode))) {
469 if (tvisstr(&nn->key) && !tvisnil(&nn->val)) {
470 Node *mn = hashstr(t, strV(&nn->key));
471 if (mn != freenode) {
472 freenode->next = nn->next;
473 nn->next = mn->next;
474 setmref(mn->next, nn);
475 } else {
476 freenode = nn;
478 } else {
479 freenode = nn;
482 break;
483 } else {
484 freenode = nn;
487 } else { /* Otherwise use free node. */
488 setmrefr(freenode->next, n->next); /* Insert into chain. */
489 setmref(n->next, freenode);
490 n = freenode;
493 n->key.u64 = key->u64;
494 if (LJ_UNLIKELY(tvismzero(&n->key)))
495 n->key.u64 = 0;
496 lj_gc_anybarriert(L, t);
497 lua_assert(tvisnil(&n->val));
498 return &n->val;
501 TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key)
503 TValue k;
504 Node *n;
505 k.n = (lua_Number)key;
506 n = hashnum(t, &k);
507 do {
508 if (tvisnum(&n->key) && n->key.n == k.n)
509 return &n->val;
510 } while ((n = nextnode(n)));
511 return lj_tab_newkey(L, t, &k);
514 TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key)
516 TValue k;
517 Node *n = hashstr(t, key);
518 do {
519 if (tvisstr(&n->key) && strV(&n->key) == key)
520 return &n->val;
521 } while ((n = nextnode(n)));
522 setstrV(L, &k, key);
523 return lj_tab_newkey(L, t, &k);
526 TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key)
528 Node *n;
529 t->nomm = 0; /* Invalidate negative metamethod cache. */
530 if (tvisstr(key)) {
531 return lj_tab_setstr(L, t, strV(key));
532 } else if (tvisint(key)) {
533 return lj_tab_setint(L, t, intV(key));
534 } else if (tvisnum(key)) {
535 lua_Number nk = numV(key);
536 int32_t k = lj_num2int(nk);
537 if (nk == (lua_Number)k)
538 return lj_tab_setint(L, t, k);
539 if (tvisnan(key))
540 lj_err_msg(L, LJ_ERR_NANIDX);
541 /* Else use the generic lookup. */
542 } else if (tvisnil(key)) {
543 lj_err_msg(L, LJ_ERR_NILIDX);
545 n = hashkey(t, key);
546 do {
547 if (lj_obj_equal(&n->key, key))
548 return &n->val;
549 } while ((n = nextnode(n)));
550 return lj_tab_newkey(L, t, key);
553 /* -- Table traversal ----------------------------------------------------- */
555 /* Get the traversal index of a key. */
556 static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key)
558 TValue tmp;
559 if (tvisint(key)) {
560 int32_t k = intV(key);
561 if ((uint32_t)k < t->asize)
562 return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */
563 setnumV(&tmp, (lua_Number)k);
564 key = &tmp;
565 } else if (tvisnum(key)) {
566 lua_Number nk = numV(key);
567 int32_t k = lj_num2int(nk);
568 if ((uint32_t)k < t->asize && nk == (lua_Number)k)
569 return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */
571 if (!tvisnil(key)) {
572 Node *n = hashkey(t, key);
573 do {
574 if (lj_obj_equal(&n->key, key))
575 return t->asize + (uint32_t)(n - noderef(t->node));
576 /* Hash key indexes: [t->asize..t->asize+t->nmask] */
577 } while ((n = nextnode(n)));
578 if (key->u32.hi == 0xfffe7fff) /* ITERN was despecialized while running. */
579 return key->u32.lo - 1;
580 lj_err_msg(L, LJ_ERR_NEXTIDX);
581 return 0; /* unreachable */
583 return ~0u; /* A nil key starts the traversal. */
586 /* Advance to the next step in a table traversal. */
587 int lj_tab_next(lua_State *L, GCtab *t, TValue *key)
589 uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */
590 for (i++; i < t->asize; i++) /* First traverse the array keys. */
591 if (!tvisnil(arrayslot(t, i))) {
592 setintV(key, i);
593 copyTV(L, key+1, arrayslot(t, i));
594 return 1;
596 for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */
597 Node *n = &noderef(t->node)[i];
598 if (!tvisnil(&n->val)) {
599 copyTV(L, key, &n->key);
600 copyTV(L, key+1, &n->val);
601 return 1;
604 return 0; /* End of traversal. */
607 /* -- Table length calculation -------------------------------------------- */
609 static MSize unbound_search(GCtab *t, MSize j)
611 cTValue *tv;
612 MSize i = j; /* i is zero or a present index */
613 j++;
614 /* find `i' and `j' such that i is present and j is not */
615 while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) {
616 i = j;
617 j *= 2;
618 if (j > (MSize)(INT_MAX-2)) { /* overflow? */
619 /* table was built with bad purposes: resort to linear search */
620 i = 1;
621 while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++;
622 return i - 1;
625 /* now do a binary search between them */
626 while (j - i > 1) {
627 MSize m = (i+j)/2;
628 cTValue *tvb = lj_tab_getint(t, (int32_t)m);
629 if (tvb && !tvisnil(tvb)) i = m; else j = m;
631 return i;
635 ** Try to find a boundary in table `t'. A `boundary' is an integer index
636 ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
638 MSize LJ_FASTCALL lj_tab_len(GCtab *t)
640 MSize j = (MSize)t->asize;
641 if (j > 1 && tvisnil(arrayslot(t, j-1))) {
642 MSize i = 1;
643 while (j - i > 1) {
644 MSize m = (i+j)/2;
645 if (tvisnil(arrayslot(t, m-1))) j = m; else i = m;
647 return i-1;
649 if (j) j--;
650 if (t->hmask <= 0)
651 return j;
652 return unbound_search(t, j);