[t] Refactor some namespace pmc tests to use throws_like
[parrot.git] / src / hash.c
blob14320f7d36c5c3b6c7c3e6baecedbbd4524fca7a
1 /*
2 Copyright (C) 2001-2009, Parrot Foundation.
3 $Id$
5 =head1 NAME
7 src/hash.c - Hash table
9 =head1 DESCRIPTION
11 A hashtable contains an array of bucket indexes. Buckets are nodes in a
12 linked list, each containing a C<void *> key and value. During hash
13 creation, the types of key and value as well as appropriate compare and
14 hashing functions can be set.
16 This hash implementation uses just one piece of malloced memory. The
17 C<< hash->bs >> bucket store points to this region.
19 This hash doesn't move during GC, therefore a lot of the old caveats
20 don't apply.
22 =head2 Functions
24 =over 4
26 =cut
30 #include "parrot/parrot.h"
31 #include "pmc/pmc_key.h"
33 /* the number of entries above which it's faster to hash the hashval instead of
34 * looping over the used HashBuckets directly */
35 #define SMALL_HASH_SIZE 4
36 #define INITIAL_BUCKETS 4
38 /* HEADERIZER HFILE: include/parrot/hash.h */
40 /* HEADERIZER BEGIN: static */
41 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
43 PARROT_WARN_UNUSED_RESULT
44 PARROT_PURE_FUNCTION
45 static int cstring_compare(SHIM_INTERP,
46 ARGIN(const char *a),
47 ARGIN(const char *b))
48 __attribute__nonnull__(2)
49 __attribute__nonnull__(3);
51 static void expand_hash(PARROT_INTERP, ARGMOD(Hash *hash))
52 __attribute__nonnull__(1)
53 __attribute__nonnull__(2)
54 FUNC_MODIFIES(*hash);
56 static void hash_freeze(PARROT_INTERP,
57 ARGIN(const Hash * const hash),
58 ARGMOD(visit_info *info))
59 __attribute__nonnull__(1)
60 __attribute__nonnull__(2)
61 __attribute__nonnull__(3)
62 FUNC_MODIFIES(*info);
64 static void hash_thaw(PARROT_INTERP,
65 ARGMOD(Hash *hash),
66 ARGMOD(visit_info *info))
67 __attribute__nonnull__(1)
68 __attribute__nonnull__(2)
69 __attribute__nonnull__(3)
70 FUNC_MODIFIES(*hash)
71 FUNC_MODIFIES(*info);
73 PARROT_WARN_UNUSED_RESULT
74 PARROT_PURE_FUNCTION
75 static size_t key_hash_cstring(SHIM_INTERP,
76 ARGIN(const void *value),
77 size_t seed)
78 __attribute__nonnull__(2);
80 PARROT_WARN_UNUSED_RESULT
81 PARROT_PURE_FUNCTION
82 static size_t key_hash_pointer(SHIM_INTERP,
83 ARGIN(const void *value),
84 size_t seed)
85 __attribute__nonnull__(2);
87 PARROT_WARN_UNUSED_RESULT
88 static size_t key_hash_STRING(PARROT_INTERP,
89 ARGMOD(STRING *s),
90 SHIM(size_t seed))
91 __attribute__nonnull__(1)
92 __attribute__nonnull__(2)
93 FUNC_MODIFIES(*s);
95 static void parrot_mark_hash_both(PARROT_INTERP, ARGIN(Hash *hash))
96 __attribute__nonnull__(1)
97 __attribute__nonnull__(2);
99 static void parrot_mark_hash_keys(PARROT_INTERP, ARGIN(Hash *hash))
100 __attribute__nonnull__(1)
101 __attribute__nonnull__(2);
103 static void parrot_mark_hash_values(PARROT_INTERP, ARGIN(Hash *hash))
104 __attribute__nonnull__(1)
105 __attribute__nonnull__(2);
107 PARROT_WARN_UNUSED_RESULT
108 PARROT_PURE_FUNCTION
109 static int pointer_compare(SHIM_INTERP,
110 ARGIN_NULLOK(const void *a),
111 ARGIN_NULLOK(const void *b));
113 PARROT_WARN_UNUSED_RESULT
114 static int STRING_compare(PARROT_INTERP,
115 ARGIN(const void *search_key),
116 ARGIN_NULLOK(const void *bucket_key))
117 __attribute__nonnull__(1)
118 __attribute__nonnull__(2);
120 #define ASSERT_ARGS_cstring_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = \
121 PARROT_ASSERT_ARG(a) \
122 || PARROT_ASSERT_ARG(b)
123 #define ASSERT_ARGS_expand_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = \
124 PARROT_ASSERT_ARG(interp) \
125 || PARROT_ASSERT_ARG(hash)
126 #define ASSERT_ARGS_hash_freeze __attribute__unused__ int _ASSERT_ARGS_CHECK = \
127 PARROT_ASSERT_ARG(interp) \
128 || PARROT_ASSERT_ARG(hash) \
129 || PARROT_ASSERT_ARG(info)
130 #define ASSERT_ARGS_hash_thaw __attribute__unused__ int _ASSERT_ARGS_CHECK = \
131 PARROT_ASSERT_ARG(interp) \
132 || PARROT_ASSERT_ARG(hash) \
133 || PARROT_ASSERT_ARG(info)
134 #define ASSERT_ARGS_key_hash_cstring __attribute__unused__ int _ASSERT_ARGS_CHECK = \
135 PARROT_ASSERT_ARG(value)
136 #define ASSERT_ARGS_key_hash_pointer __attribute__unused__ int _ASSERT_ARGS_CHECK = \
137 PARROT_ASSERT_ARG(value)
138 #define ASSERT_ARGS_key_hash_STRING __attribute__unused__ int _ASSERT_ARGS_CHECK = \
139 PARROT_ASSERT_ARG(interp) \
140 || PARROT_ASSERT_ARG(s)
141 #define ASSERT_ARGS_parrot_mark_hash_both __attribute__unused__ int _ASSERT_ARGS_CHECK = \
142 PARROT_ASSERT_ARG(interp) \
143 || PARROT_ASSERT_ARG(hash)
144 #define ASSERT_ARGS_parrot_mark_hash_keys __attribute__unused__ int _ASSERT_ARGS_CHECK = \
145 PARROT_ASSERT_ARG(interp) \
146 || PARROT_ASSERT_ARG(hash)
147 #define ASSERT_ARGS_parrot_mark_hash_values __attribute__unused__ int _ASSERT_ARGS_CHECK = \
148 PARROT_ASSERT_ARG(interp) \
149 || PARROT_ASSERT_ARG(hash)
150 #define ASSERT_ARGS_pointer_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = 0
151 #define ASSERT_ARGS_STRING_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = \
152 PARROT_ASSERT_ARG(interp) \
153 || PARROT_ASSERT_ARG(search_key)
154 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
155 /* HEADERIZER END: static */
161 =item C<static size_t key_hash_STRING(PARROT_INTERP, STRING *s, size_t seed)>
163 Returns the hashed value of the key C<value>. See also string.c.
165 =cut
170 PARROT_WARN_UNUSED_RESULT
171 static size_t
172 key_hash_STRING(PARROT_INTERP, ARGMOD(STRING *s), SHIM(size_t seed))
174 ASSERT_ARGS(key_hash_STRING)
176 if (s->hashval)
177 return s->hashval;
179 return Parrot_str_to_hashval(interp, s);
185 =item C<static int STRING_compare(PARROT_INTERP, const void *search_key, const
186 void *bucket_key)>
188 Compares the two strings, returning 0 if they are identical.
190 =cut
194 PARROT_WARN_UNUSED_RESULT
195 static int
196 STRING_compare(PARROT_INTERP, ARGIN(const void *search_key), ARGIN_NULLOK(const void *bucket_key))
198 ASSERT_ARGS(STRING_compare)
199 STRING const *s1 = (STRING const *)search_key;
200 STRING const *s2 = (STRING const *)bucket_key;
202 if (!s2)
203 return 1;
205 if (s1->hashval != s2->hashval)
206 return 1;
208 /* COWed strings */
209 if (Buffer_bufstart(s1) == Buffer_bufstart(s2) && s1->bufused == s2->bufused)
210 return 0;
212 return CHARSET_COMPARE(interp, s1, s2);
218 =item C<static int pointer_compare(PARROT_INTERP, const void *a, const void *b)>
220 Compares the two pointers, returning 0 if they are identical
222 =cut
226 PARROT_WARN_UNUSED_RESULT
227 PARROT_PURE_FUNCTION
228 static int
229 pointer_compare(SHIM_INTERP, ARGIN_NULLOK(const void *a), ARGIN_NULLOK(const void *b))
231 ASSERT_ARGS(pointer_compare)
232 return a != b;
238 =item C<static size_t key_hash_pointer(PARROT_INTERP, const void *value, size_t
239 seed)>
241 Returns a hashvalue for a pointer.
243 =cut
247 PARROT_WARN_UNUSED_RESULT
248 PARROT_PURE_FUNCTION
249 static size_t
250 key_hash_pointer(SHIM_INTERP, ARGIN(const void *value), size_t seed)
252 ASSERT_ARGS(key_hash_pointer)
253 return ((size_t) value) ^ seed;
259 =item C<static size_t key_hash_cstring(PARROT_INTERP, const void *value, size_t
260 seed)>
262 Creates and returns a hash value from a string.
264 Takes an interpreter, a pointer to a string, and a seed value.
265 Returns the hash value.
267 Used by parrot_new_cstring_hash.
269 =cut
273 PARROT_WARN_UNUSED_RESULT
274 PARROT_PURE_FUNCTION
275 static size_t
276 key_hash_cstring(SHIM_INTERP, ARGIN(const void *value), size_t seed)
278 ASSERT_ARGS(key_hash_cstring)
279 const unsigned char * p = (const unsigned char *) value;
280 register size_t h = seed;
282 while (*p) {
283 h += h << 5;
284 h += *p++;
287 return h;
293 =item C<static int cstring_compare(PARROT_INTERP, const char *a, const char *b)>
295 Compares two C strings for equality, returning -1, 0, and 1 if the first string
296 is less than, equal to, or greater than the second, respectively.
298 =cut
302 PARROT_WARN_UNUSED_RESULT
303 PARROT_PURE_FUNCTION
304 static int
305 cstring_compare(SHIM_INTERP, ARGIN(const char *a), ARGIN(const char *b))
307 ASSERT_ARGS(cstring_compare)
308 return strcmp(a, b);
314 =item C<size_t key_hash_int(PARROT_INTERP, const void *value, size_t seed)>
316 Returns a hashed value for an integer key (passed as a void pointer, sadly).
318 =cut
322 PARROT_WARN_UNUSED_RESULT
323 PARROT_PURE_FUNCTION
324 size_t
325 key_hash_int(SHIM_INTERP, ARGIN_NULLOK(const void *value), size_t seed)
327 ASSERT_ARGS(key_hash_int)
328 return (size_t)value ^ seed;
333 =item C<int int_compare(PARROT_INTERP, const void *a, const void *b)>
335 Compares two integers for equality, returning -1, 0, and 1 if the first is less
336 than, equal to, or greater than the second, respectively. Uses void pointers
337 to store the integers, sadly.
339 =cut
343 PARROT_WARN_UNUSED_RESULT
344 PARROT_PURE_FUNCTION
346 int_compare(SHIM_INTERP, ARGIN_NULLOK(const void *a), ARGIN_NULLOK(const void *b))
348 ASSERT_ARGS(int_compare)
349 return a != b;
354 =item C<void parrot_dump_hash(PARROT_INTERP, const Hash *hash)>
356 Prints out the hash in human-readable form, at least once someone implements
357 this.
359 =cut
363 PARROT_EXPORT
364 void
365 parrot_dump_hash(SHIM_INTERP, SHIM(const Hash *hash))
367 ASSERT_ARGS(parrot_dump_hash)
373 =item C<void parrot_mark_hash(PARROT_INTERP, Hash *hash)>
375 Marks the hash and its contents as live. Assumes that key and value are non
376 null in all buckets.
378 =cut
382 PARROT_EXPORT
383 void
384 parrot_mark_hash(PARROT_INTERP, ARGIN(Hash *hash))
386 ASSERT_ARGS(parrot_mark_hash)
387 int mark_key = 0;
388 int mark_value = 0;
390 if (hash->entry_type == (PARROT_DATA_TYPE) enum_hash_string
391 || hash->entry_type == (PARROT_DATA_TYPE) enum_hash_pmc)
392 mark_value = 1;
394 if (hash->key_type == Hash_key_type_STRING
395 || hash->key_type == Hash_key_type_PMC)
396 mark_key = 1;
398 if (mark_key) {
399 if (mark_value)
400 parrot_mark_hash_both(interp, hash);
401 else
402 parrot_mark_hash_keys(interp, hash);
404 else {
405 if (mark_value)
406 parrot_mark_hash_values(interp, hash);
413 =item C<static void parrot_mark_hash_keys(PARROT_INTERP, Hash *hash)>
415 Marks the hash and only its keys as live.
417 =cut
421 static void
422 parrot_mark_hash_keys(PARROT_INTERP, ARGIN(Hash *hash))
424 ASSERT_ARGS(parrot_mark_hash_keys)
425 UINTVAL entries = hash->entries;
426 UINTVAL found = 0;
427 INTVAL i;
429 for (i = hash->mask; i >= 0; --i) {
430 HashBucket *bucket = hash->bi[i];
432 while (bucket) {
433 if (++found > entries)
434 Parrot_ex_throw_from_c_args(interp, NULL, 1,
435 "Detected hash corruption at hash %p entries %d",
436 hash, (int)entries);
438 PARROT_ASSERT(bucket->key);
439 Parrot_gc_mark_PObj_alive(interp, (PObj *)bucket->key);
441 bucket = bucket->next;
449 =item C<static void parrot_mark_hash_values(PARROT_INTERP, Hash *hash)>
451 Marks the hash and only its values as live.
453 =cut
457 static void
458 parrot_mark_hash_values(PARROT_INTERP, ARGIN(Hash *hash))
460 ASSERT_ARGS(parrot_mark_hash_values)
461 const UINTVAL entries = hash->entries;
462 UINTVAL found = 0;
463 INTVAL i;
465 for (i = hash->mask; i >= 0; --i) {
466 HashBucket *bucket = hash->bi[i];
468 while (bucket) {
469 if (++found > entries)
470 Parrot_ex_throw_from_c_args(interp, NULL, 1,
471 "Detected hash corruption at hash %p entries %d",
472 hash, (int)entries);
474 PARROT_ASSERT(bucket->value);
475 Parrot_gc_mark_PObj_alive(interp, (PObj *)bucket->value);
477 bucket = bucket->next;
485 =item C<static void parrot_mark_hash_both(PARROT_INTERP, Hash *hash)>
487 Marks the hash and both its keys and values as live.
489 =cut
493 static void
494 parrot_mark_hash_both(PARROT_INTERP, ARGIN(Hash *hash))
496 ASSERT_ARGS(parrot_mark_hash_both)
497 const UINTVAL entries = hash->entries;
498 UINTVAL found = 0;
499 INTVAL i;
501 for (i = hash->mask; i >= 0; --i) {
502 HashBucket *bucket = hash->bi[i];
504 while (bucket) {
505 if (++found > entries)
506 Parrot_ex_throw_from_c_args(interp, NULL, 1,
507 "Detected hash corruption at hash %p entries %d",
508 hash, (int)entries);
510 PARROT_ASSERT(bucket->key);
511 Parrot_gc_mark_PObj_alive(interp, (PObj *)bucket->key);
513 PARROT_ASSERT(bucket->value);
514 Parrot_gc_mark_PObj_alive(interp, (PObj *)bucket->value);
516 bucket = bucket->next;
524 =item C<static void hash_thaw(PARROT_INTERP, Hash *hash, visit_info *info)>
526 Visits the contents of a hash during freeze/thaw.
528 C<pinfo> is the visit info, (see include/parrot/pmc_freeze.h>).
530 =cut
534 static void
535 hash_thaw(PARROT_INTERP, ARGMOD(Hash *hash), ARGMOD(visit_info *info))
537 ASSERT_ARGS(hash_thaw)
538 IMAGE_IO * const io = info->image_io;
540 /* during thaw, info->extra is the key/value count */
541 const size_t num_entries = (size_t) hash->entries;
542 size_t entry_index;
544 hash->entries = 0;
546 for (entry_index = 0; entry_index < num_entries; ++entry_index) {
547 HashBucket *b;
549 switch (hash->key_type) {
550 case Hash_key_type_STRING:
552 STRING * const s_key = VTABLE_shift_string(interp, io);
553 b = parrot_hash_put(interp, hash, s_key, NULL);
555 break;
556 case Hash_key_type_int:
558 const INTVAL i_key = VTABLE_shift_integer(interp, io);
559 b = parrot_hash_put(interp, hash, (void*)i_key, NULL);
561 break;
562 default:
563 Parrot_ex_throw_from_c_args(interp, NULL, 1,
564 "unimplemented key type");
565 break;
568 switch (hash->entry_type) {
569 case enum_hash_pmc:
571 /* this looks awful, but it avoids type-punning warnings */
572 void **ptr = &b->value;
573 info->thaw_ptr = (PMC **)ptr;
574 (info->visit_pmc_now)(interp, NULL, info);
575 break;
577 case enum_hash_int:
579 const INTVAL i = VTABLE_shift_integer(interp, io);
580 b->value = (void *)i;
581 break;
583 default:
584 Parrot_ex_throw_from_c_args(interp, NULL, 1,
585 "unimplemented value type");
586 break;
594 =item C<static void hash_freeze(PARROT_INTERP, const Hash * const hash,
595 visit_info *info)>
597 Freezes hash into a string.
599 Takes an interpreter, a pointer to the hash, and a pointer to the structure
600 containing the string start location.
602 Use by parrot_hash_visit.
604 =cut
608 static void
609 hash_freeze(PARROT_INTERP, ARGIN(const Hash * const hash), ARGMOD(visit_info *info))
611 ASSERT_ARGS(hash_freeze)
612 IMAGE_IO * const io = info->image_io;
613 size_t i;
615 for (i = 0; i < hash->entries; i++) {
616 HashBucket * const b = hash->bs+i;
618 switch (hash->key_type) {
619 case Hash_key_type_STRING:
620 VTABLE_push_string(interp, io, (STRING *)b->key);
621 break;
622 case Hash_key_type_int:
623 VTABLE_push_integer(interp, io, (INTVAL)b->key);
624 break;
625 default:
626 Parrot_ex_throw_from_c_args(interp, NULL, 1,
627 "unimplemented key type");
628 break;
631 switch (hash->entry_type) {
632 case enum_hash_pmc:
633 (info->visit_pmc_now)(interp, (PMC *)b->value, info);
634 break;
635 case enum_hash_int:
636 VTABLE_push_integer(interp, io, (INTVAL)b->value);
637 break;
638 default:
639 Parrot_ex_throw_from_c_args(interp, NULL, 1,
640 "unimplemented value type");
641 break;
649 =item C<void parrot_hash_visit(PARROT_INTERP, Hash *hash, void *pinfo)>
651 Freezes or thaws a hash as specified. Takes an interpreter, a pointer to the
652 hash, and a pointer to the structure identifying what to do and the location of
653 the string.
655 =cut
659 PARROT_EXPORT
660 void
661 parrot_hash_visit(PARROT_INTERP, ARGMOD(Hash *hash), ARGMOD(void *pinfo))
663 ASSERT_ARGS(parrot_hash_visit)
664 visit_info* const info = (visit_info*) pinfo;
666 switch (info->what) {
667 case VISIT_THAW_NORMAL:
668 case VISIT_THAW_CONSTANTS:
669 hash_thaw(interp, hash, info);
670 break;
671 case VISIT_FREEZE_NORMAL:
672 case VISIT_FREEZE_AT_DESTRUCT:
673 hash_freeze(interp, hash, info);
674 break;
675 default:
676 Parrot_ex_throw_from_c_args(interp, NULL, 1,
677 "unimplemented visit mode");
684 =item C<static void expand_hash(PARROT_INTERP, Hash *hash)>
686 Expands a hash when necessary.
688 For a hashtable of size N, we use C<MAXFULL_PERCENT> % of N as the
689 number of buckets. This way, as soon as we run out of buckets on the
690 free list, we know that it's time to resize the hashtable.
692 Algorithm for expansion: We exactly double the size of the hashtable.
693 Keys are assigned to buckets with the formula
695 bucket_index = hash(key) % parrot_hash_size
697 When doubling the size of the hashtable, we know that every key is either
698 already in the correct bucket, or belongs in the current bucket plus
699 C<parrot_hash_size> (the old C<parrot_hash_size>). In fact, because the
700 hashtable is always a power of two in size, it depends only on the next bit
701 in the hash value, after the ones previously used.
703 We scan through all the buckets in order, moving the buckets that need to be
704 moved. No bucket will be scanned twice, and the cache should be reasonably
705 happy because the hashtable accesses will be two parallel sequential scans.
706 (Of course, this also mucks with the C<< ->next >> pointers, and they'll be
707 all over memory.)
709 =cut
713 static void
714 expand_hash(PARROT_INTERP, ARGMOD(Hash *hash))
716 ASSERT_ARGS(expand_hash)
717 HashBucket **old_bi, **new_bi;
718 HashBucket *bs, *b, *new_mem;
719 HashBucket *old_offset = (HashBucket *)((char *)hash + sizeof (Hash));
721 void * const old_mem = hash->bs;
722 const UINTVAL old_size = hash->mask + 1;
723 const UINTVAL new_size = old_size << 1;
724 const UINTVAL old_nb = N_BUCKETS(old_size);
725 size_t offset, i, new_loc;
728 allocate some less buckets
729 e.g. 3 buckets, 4 pointers:
731 +---+---+---+-+-+-+-+
732 | --> bs | -> bi |
733 +---+---+---+-+-+-+-+
735 | old_mem | hash->bi
738 /* resize mem */
739 if (old_offset != old_mem) {
740 /* This buffer has been reallocated at least once before. */
741 new_mem = (HashBucket *)mem_sys_realloc(old_mem, HASH_ALLOC_SIZE(new_size));
743 else {
744 /* Allocate a new buffer. */
745 new_mem = (HashBucket *)mem_sys_allocate(HASH_ALLOC_SIZE(new_size));
746 memcpy(new_mem, old_mem, HASH_ALLOC_SIZE(old_size));
750 +---+---+---+---+---+---+-+-+-+-+-+-+-+-+
751 | bs | old_bi | new_bi |
752 +---+---+---+---+---+---+-+-+-+-+-+-+-+-+
754 | new_mem | hash->bi
756 bs = new_mem;
757 old_bi = (HashBucket **)(bs + old_nb);
758 new_bi = (HashBucket **)(bs + N_BUCKETS(new_size));
760 /* things can have moved by this offset */
761 offset = (char *)new_mem - (char *)old_mem;
763 /* relocate the bucket index */
764 mem_sys_memmove(new_bi, old_bi, old_size * sizeof (HashBucket *));
766 /* update hash data */
767 hash->bi = new_bi;
768 hash->bs = bs;
769 hash->mask = new_size - 1;
771 /* clear freshly allocated bucket index */
772 memset(new_bi + old_size, 0, sizeof (HashBucket *) * old_size);
775 * reloc pointers - this part would be also needed, if we
776 * allocate hash memory from GC movable memory, and then
777 * also the free_list needs updating (this is empty now,
778 * as expand_hash is only called for that case).
780 if (offset) {
781 for (i = 0; i < old_size; ++i) {
782 HashBucket **next_p = new_bi + i;
783 while (*next_p) {
784 *next_p = (HashBucket *)((char *)*next_p + offset);
785 b = *next_p;
786 next_p = &b->next;
791 /* recalc bucket index */
792 for (i = 0; i < old_size; ++i) {
793 HashBucket **next_p = new_bi + i;
794 while (*next_p) {
795 b = *next_p;
796 /* rehash the bucket */
797 new_loc = (hash->hash_val)(interp, b->key, hash->seed) &
798 (new_size - 1);
800 if (i != new_loc) {
801 *next_p = b->next;
802 b->next = new_bi[new_loc];
803 new_bi[new_loc] = b;
805 else
806 next_p = &b->next;
810 /* add new buckets to free_list in reverse order
811 * lowest bucket is top on free list and will be used first */
812 for (i = 0, b = (HashBucket *)new_bi - 1; i < old_nb; ++i, --b) {
813 b->next = hash->free_list;
814 b->key = b->value = NULL;
815 hash->free_list = b;
822 =item C<Hash* parrot_new_hash(PARROT_INTERP)>
824 Creates a new Parrot STRING hash.
826 =cut
830 PARROT_EXPORT
831 PARROT_CANNOT_RETURN_NULL
832 Hash*
833 parrot_new_hash(PARROT_INTERP)
835 ASSERT_ARGS(parrot_new_hash)
836 return parrot_create_hash(interp,
837 enum_type_PMC,
838 Hash_key_type_STRING,
839 STRING_compare,
840 (hash_hash_key_fn)key_hash_STRING);
846 =item C<Hash* parrot_new_cstring_hash(PARROT_INTERP)>
848 Creates a new C string hash in C<hptr>.
850 =cut
854 PARROT_EXPORT
855 PARROT_CANNOT_RETURN_NULL
856 Hash*
857 parrot_new_cstring_hash(PARROT_INTERP)
859 ASSERT_ARGS(parrot_new_cstring_hash)
860 return parrot_create_hash(interp,
861 enum_type_PMC,
862 Hash_key_type_cstring,
863 (hash_comp_fn)cstring_compare,
864 (hash_hash_key_fn)key_hash_cstring);
870 =item C<Hash * parrot_new_pointer_hash(PARROT_INTERP)>
872 Create and return a new hash with void * keys and values.
874 =cut
878 PARROT_EXPORT
879 PARROT_CANNOT_RETURN_NULL
880 Hash *
881 parrot_new_pointer_hash(PARROT_INTERP)
883 ASSERT_ARGS(parrot_new_pointer_hash)
884 return parrot_create_hash(interp,
885 enum_type_ptr,
886 Hash_key_type_ptr,
887 pointer_compare,
888 key_hash_pointer);
894 =item C<Hash* parrot_new_intval_hash(PARROT_INTERP)>
896 Creates and returns new Hash PMC with INTVAL keys and values. C<flags> can be
897 C<PObj_constant_FLAG> or 0.
899 =cut
904 PARROT_EXPORT
905 PARROT_WARN_UNUSED_RESULT
906 PARROT_CANNOT_RETURN_NULL
907 Hash*
908 parrot_new_intval_hash(PARROT_INTERP)
910 ASSERT_ARGS(parrot_new_intval_hash)
911 return parrot_create_hash(interp,
912 enum_type_INTVAL,
913 Hash_key_type_int,
914 int_compare,
915 key_hash_int);
920 =item C<Hash * parrot_create_hash(PARROT_INTERP, PARROT_DATA_TYPE val_type,
921 Hash_key_type hkey_type, hash_comp_fn compare, hash_hash_key_fn keyhash)>
923 Creates and initializes a hash. Function pointers determine its behaviors.
924 The container passed in is the address of the hash PMC that is using it. The
925 hash and the PMC point to each other.
927 Memory from this function must be freed.
929 =cut
933 PARROT_CANNOT_RETURN_NULL
934 PARROT_WARN_UNUSED_RESULT
935 PARROT_MALLOC
936 Hash *
937 parrot_create_hash(PARROT_INTERP, PARROT_DATA_TYPE val_type, Hash_key_type hkey_type,
938 ARGIN(hash_comp_fn compare), ARGIN(hash_hash_key_fn keyhash))
940 ASSERT_ARGS(parrot_create_hash)
941 HashBucket *bp;
942 void *alloc = mem_sys_allocate(sizeof (Hash) + HASH_ALLOC_SIZE(INITIAL_BUCKETS));
943 Hash * const hash = (Hash*)alloc;
944 size_t i;
946 PARROT_ASSERT(INITIAL_BUCKETS % 4 == 0);
948 hash->compare = compare;
949 hash->hash_val = keyhash;
950 hash->entry_type = val_type;
951 hash->key_type = hkey_type;
952 hash->seed = interp->hash_seed;
953 hash->mask = INITIAL_BUCKETS - 1;
954 hash->entries = 0;
955 hash->container = PMCNULL;
958 * TODO if we have a significant amount of small hashes:
959 * - allocate a bigger hash structure e.g. 128 byte
960 * - use the bucket store and bi inside this structure
961 * - when reallocate copy this part
963 bp = (HashBucket *)((char *)alloc + sizeof (Hash));
964 hash->free_list = NULL;
966 /* fill free_list from hi addresses so that we can use
967 * buckets[i] directly in an OrderedHash, *if* nothing
968 * was deleted */
970 hash->bs = bp;
971 bp += N_BUCKETS(INITIAL_BUCKETS);
972 hash->bi = (HashBucket **)bp;
974 for (i = 0, --bp; i < N_BUCKETS(INITIAL_BUCKETS); ++i, --bp) {
975 bp->next = hash->free_list;
976 bp->key = NULL;
977 bp->value = NULL;
978 hash->free_list = bp;
981 for (i = 0; i < INITIAL_BUCKETS; ++i)
982 hash->bi[i] = NULL;
984 return hash;
990 =item C<void parrot_hash_destroy(PARROT_INTERP, Hash *hash)>
992 Frees the memory allocated to the specified hash and its bucket store. Used by
993 Parrot_chash_destroy.
995 =cut
999 PARROT_EXPORT
1000 void
1001 parrot_hash_destroy(SHIM_INTERP, ARGMOD(Hash *hash))
1003 ASSERT_ARGS(parrot_hash_destroy)
1004 HashBucket *bp = (HashBucket*)((char*)hash + sizeof (Hash));
1005 if (bp != hash->bs)
1006 mem_sys_free(hash->bs);
1007 mem_sys_free(hash);
1013 =item C<void parrot_chash_destroy(PARROT_INTERP, Hash *hash)>
1015 Deletes the specified hash by freeing the memory allocated to all the key-value
1016 pairs, and finally the hash itself.
1018 =cut
1022 void
1023 parrot_chash_destroy(PARROT_INTERP, ARGMOD(Hash *hash))
1025 ASSERT_ARGS(parrot_chash_destroy)
1026 UINTVAL i;
1028 for (i = 0; i <= hash->mask; i++) {
1029 HashBucket *bucket = hash->bi[i];
1030 while (bucket) {
1031 mem_sys_free(bucket->key);
1032 mem_sys_free(bucket->value);
1033 bucket = bucket->next;
1037 parrot_hash_destroy(interp, hash);
1043 =item C<void parrot_chash_destroy_values(PARROT_INTERP, Hash *hash, value_free
1044 func)>
1046 Deletes the specified hash by freeing the memory allocated to all the key-value
1047 pairs, calling the provided callback to free the values, and finally the hash
1048 itself.
1050 The callback returns C<void> and takes a C<void *>.
1052 =cut
1056 void
1057 parrot_chash_destroy_values(PARROT_INTERP, ARGMOD(Hash *hash),
1058 ARGIN(value_free func))
1060 ASSERT_ARGS(parrot_chash_destroy_values)
1061 UINTVAL i;
1063 for (i = 0; i <= hash->mask; i++) {
1064 HashBucket *bucket = hash->bi[i];
1065 while (bucket) {
1066 mem_sys_free(bucket->key);
1067 func(bucket->value);
1068 bucket = bucket->next;
1072 parrot_hash_destroy(interp, hash);
1078 =item C<INTVAL parrot_hash_size(PARROT_INTERP, const Hash *hash)>
1080 Returns the number of used entries in the hash.
1082 =cut
1086 PARROT_EXPORT
1087 PARROT_WARN_UNUSED_RESULT
1088 PARROT_PURE_FUNCTION
1089 INTVAL
1090 parrot_hash_size(SHIM_INTERP, ARGIN(const Hash *hash))
1092 ASSERT_ARGS(parrot_hash_size)
1094 return hash->entries;
1100 =item C<void * parrot_hash_get_idx(PARROT_INTERP, const Hash *hash, PMC *key)>
1102 Finds the next index into the hash's internal storage for the given Key. Used
1103 by iterators. Ugly.
1105 =cut
1109 PARROT_EXPORT
1110 PARROT_WARN_UNUSED_RESULT
1111 PARROT_CAN_RETURN_NULL
1112 void *
1113 parrot_hash_get_idx(PARROT_INTERP, ARGIN(const Hash *hash), ARGMOD(PMC *key))
1115 ASSERT_ARGS(parrot_hash_get_idx)
1116 HashBucket *b;
1117 void *res;
1118 INTVAL i = VTABLE_get_integer(interp, key);
1119 PMC *fake_bi;
1120 BucketIndex bi;
1122 /* idx directly in the bucket store, which is at negative
1123 * address from the data pointer */
1124 /* locate initial */
1125 const INTVAL size = (INTVAL)N_BUCKETS(hash->mask + 1);
1127 GETATTR_Key_next_key(interp, key, fake_bi);
1128 bi = (BucketIndex)fake_bi;
1130 if (bi == INITBucketIndex) {
1131 i = 0;
1132 SETATTR_Key_next_key(interp, key, NULL);
1134 else if (i >= size || i < 0) {
1135 /* NOTE: These instances of SETATTR_Key_int_key can't be VTABLE
1136 * functions because of the "special" way hash iterators work. */
1137 SETATTR_Key_int_key(interp, key, -1);
1138 return NULL;
1141 res = NULL;
1143 for (b = hash->bs + i; i < size ; ++i, ++b) {
1144 /* XXX int keys may be zero - use different iterator */
1145 if (b->key) {
1146 if (!res)
1147 res = b->key;
1149 /* found next key - FIXME hash iter does auto next */
1150 else
1151 break;
1155 if (i >= size)
1156 i = -1;
1158 SETATTR_Key_int_key(interp, key, i);
1160 return res;
1166 =item C<HashBucket * parrot_hash_get_bucket(PARROT_INTERP, const Hash *hash,
1167 const void *key)>
1169 Returns the bucket for C<key>, if found, and NULL otherwise.
1171 =cut
1175 PARROT_EXPORT
1176 PARROT_WARN_UNUSED_RESULT
1177 PARROT_CAN_RETURN_NULL
1178 HashBucket *
1179 parrot_hash_get_bucket(PARROT_INTERP, ARGIN(const Hash *hash), ARGIN_NULLOK(const void *key))
1181 ASSERT_ARGS(parrot_hash_get_bucket)
1183 if (hash->entries <= 0)
1184 return NULL;
1186 /* a very fast search for very small hashes */
1187 if (hash->entries <= SMALL_HASH_SIZE) {
1188 const UINTVAL entries = hash->entries;
1189 UINTVAL i;
1191 for (i = 0; i < entries; i++) {
1192 HashBucket *bucket = hash->bs + i;
1194 /* the hash->compare cost is too high for this fast path */
1195 if (bucket->key == key)
1196 return bucket;
1200 /* if the fast search didn't work, try the normal hashing search */
1202 const UINTVAL hashval = (hash->hash_val)(interp, key, hash->seed);
1203 HashBucket *bucket = hash->bi[hashval & hash->mask];
1205 while (bucket) {
1206 /* key equality is always a match, so it's worth checking */
1207 if (bucket->key == key
1209 /* ... but the slower comparison is more accurate */
1210 || ((hash->compare)(interp, key, bucket->key) == 0))
1211 return bucket;
1212 bucket = bucket->next;
1216 return NULL;
1222 =item C<void * parrot_hash_get(PARROT_INTERP, Hash *hash, const void *key)>
1224 Returns the value keyed by C<key>, or C<NULL> if no bucket is found.
1226 =cut
1230 PARROT_EXPORT
1231 PARROT_WARN_UNUSED_RESULT
1232 PARROT_CAN_RETURN_NULL
1233 void *
1234 parrot_hash_get(PARROT_INTERP, ARGIN(Hash *hash), ARGIN(const void *key))
1236 ASSERT_ARGS(parrot_hash_get)
1237 const HashBucket * const bucket = parrot_hash_get_bucket(interp, hash, key);
1238 return bucket ? bucket->value : NULL;
1244 =item C<INTVAL parrot_hash_exists(PARROT_INTERP, Hash *hash, void *key)>
1246 Returns whether the key exists in the hash.
1248 =cut
1252 PARROT_EXPORT
1253 PARROT_WARN_UNUSED_RESULT
1254 INTVAL
1255 parrot_hash_exists(PARROT_INTERP, ARGIN(Hash *hash), ARGIN(void *key))
1257 ASSERT_ARGS(parrot_hash_exists)
1258 const HashBucket * const bucket = parrot_hash_get_bucket(interp, hash, key);
1259 return bucket ? 1 : 0;
1265 =item C<HashBucket* parrot_hash_put(PARROT_INTERP, Hash *hash, void *key, void
1266 *value)>
1268 Puts the key and value into the hash. Note that C<key> is B<not> copied.
1270 =cut
1274 PARROT_EXPORT
1275 PARROT_IGNORABLE_RESULT
1276 PARROT_CANNOT_RETURN_NULL
1277 HashBucket*
1278 parrot_hash_put(PARROT_INTERP, ARGMOD(Hash *hash),
1279 ARGIN_NULLOK(void *key), ARGIN_NULLOK(void *value))
1281 ASSERT_ARGS(parrot_hash_put)
1282 const UINTVAL hashval = (hash->hash_val)(interp, key, hash->seed);
1283 HashBucket *bucket = hash->bi[hashval & hash->mask];
1285 /* Very complex assert that we'll not put non-constant stuff into constant hash */
1286 PARROT_ASSERT(
1287 PMC_IS_NULL(hash->container)
1288 || !(PObj_constant_TEST(hash->container))
1289 || (
1291 !(hash->key_type == Hash_key_type_STRING)
1292 || PObj_constant_TEST((PObj *)key))
1293 && (
1294 !((hash->entry_type == enum_type_PMC) || (hash->entry_type == enum_type_STRING))
1295 || PObj_constant_TEST((PObj *)value)))
1296 || !"Use non-constant key or value in constant hash");
1298 while (bucket) {
1299 /* store hash_val or not */
1300 if ((hash->compare)(interp, key, bucket->key) == 0)
1301 break;
1302 bucket = bucket->next;
1305 if (bucket) {
1306 if (hash->entry_type == enum_type_PMC && hash->container) {
1307 GC_WRITE_BARRIER_KEY(interp, hash->container,
1308 (PMC *)bucket->value, bucket->key, (PMC *)value, key);
1311 bucket->value = value;
1313 else {
1314 if (hash->entry_type == enum_type_PMC && hash->container) {
1315 GC_WRITE_BARRIER_KEY(interp, hash->container,
1316 NULL, NULL, (PMC *)value, key);
1319 bucket = hash->free_list;
1321 if (!bucket) {
1322 expand_hash(interp, hash);
1323 bucket = hash->free_list;
1326 hash->entries++;
1327 hash->free_list = bucket->next;
1328 bucket->key = key;
1329 bucket->value = value;
1330 bucket->next = hash->bi[hashval & hash->mask];
1331 hash->bi[hashval & hash->mask] = bucket;
1334 return bucket;
1340 =item C<void parrot_hash_delete(PARROT_INTERP, Hash *hash, void *key)>
1342 Deletes the key from the hash.
1344 =cut
1348 PARROT_EXPORT
1349 void
1350 parrot_hash_delete(PARROT_INTERP, ARGMOD(Hash *hash), ARGIN(void *key))
1352 ASSERT_ARGS(parrot_hash_delete)
1353 HashBucket *bucket;
1354 HashBucket *prev = NULL;
1355 const UINTVAL hashval = (hash->hash_val)(interp, key, hash->seed) & hash->mask;
1357 for (bucket = hash->bi[hashval]; bucket; bucket = bucket->next) {
1358 if ((hash->compare)(interp, key, bucket->key) == 0) {
1360 if (prev)
1361 prev->next = bucket->next;
1362 else
1363 hash->bi[hashval] = bucket->next;
1365 hash->entries--;
1366 bucket->next = hash->free_list;
1367 bucket->key = NULL;
1368 hash->free_list = bucket;
1370 return;
1373 prev = bucket;
1380 =item C<void parrot_hash_clone(PARROT_INTERP, const Hash *hash, Hash *dest)>
1382 Clones C<hash> to C<dest>.
1384 =cut
1388 PARROT_EXPORT
1389 void
1390 parrot_hash_clone(PARROT_INTERP, ARGIN(const Hash *hash), ARGOUT(Hash *dest))
1392 ASSERT_ARGS(parrot_hash_clone)
1393 UINTVAL entries = hash->entries;
1394 UINTVAL i;
1396 for (i = 0; i < entries; i++) {
1397 void *valtmp;
1398 HashBucket *b = hash->bs+i;
1399 void * const key = b->key;
1401 switch (hash->entry_type) {
1402 case enum_type_undef:
1403 case enum_type_ptr:
1404 case enum_type_INTVAL:
1405 valtmp = (void *)b->value;
1406 break;
1408 case enum_type_STRING:
1409 valtmp = (void *)Parrot_str_copy(interp, (STRING *)b->value);
1410 break;
1412 case enum_type_PMC:
1413 if (PMC_IS_NULL((PMC *)b->value))
1414 valtmp = (void *)PMCNULL;
1415 else
1416 valtmp = (void *)VTABLE_clone(interp, (PMC*)b->value);
1417 break;
1419 default:
1420 valtmp = NULL; /* avoid warning */
1421 Parrot_ex_throw_from_c_args(interp, NULL, -1,
1422 "hash corruption: type = %d\n", hash->entry_type);
1425 if (key)
1426 parrot_hash_put(interp, dest, key, valtmp);
1432 =back
1434 =head1 SEE ALSO
1436 F<docs/pdds/pdd08_keys.pod>.
1438 =head1 TODO
1440 Future optimizations:
1442 =over 4
1444 =item * Stop reallocating the bucket pool, and instead add chunks on.
1445 (Saves pointer fixups and copying during C<realloc>.)
1447 =item * Hash contraction (don't if it's worth it)
1449 =back
1451 =cut
1457 * Local variables:
1458 * c-file-style: "parrot"
1459 * End:
1460 * vim: expandtab shiftwidth=4: