2 Copyright (C) 2001-2009, Parrot Foundation.
7 src/hash.c - Hash table
11 A hashtable contains an array of bucket indexes. Buckets are nodes in a
12 linked list, each containing a C<void *> key and value. During hash
13 creation, the types of key and value as well as appropriate compare and
14 hashing functions can be set.
16 This hash implementation uses just one piece of malloced memory. The
17 C<< hash->bs >> bucket store points to this region.
19 This hash doesn't move during GC, therefore a lot of the old caveats
30 #include "parrot/parrot.h"
31 #include "pmc/pmc_key.h"
33 /* the number of entries above which it's faster to hash the hashval instead of
34 * looping over the used HashBuckets directly */
35 #define SMALL_HASH_SIZE 4
36 #define INITIAL_BUCKETS 4
38 /* HEADERIZER HFILE: include/parrot/hash.h */
40 /* HEADERIZER BEGIN: static */
41 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
43 PARROT_WARN_UNUSED_RESULT
45 static int cstring_compare(SHIM_INTERP
,
48 __attribute__nonnull__(2)
49 __attribute__nonnull__(3);
51 static void expand_hash(PARROT_INTERP
, ARGMOD(Hash
*hash
))
52 __attribute__nonnull__(1)
53 __attribute__nonnull__(2)
56 static void hash_freeze(PARROT_INTERP
,
57 ARGIN(const Hash
* const hash
),
58 ARGMOD(visit_info
*info
))
59 __attribute__nonnull__(1)
60 __attribute__nonnull__(2)
61 __attribute__nonnull__(3)
64 static void hash_thaw(PARROT_INTERP
,
66 ARGMOD(visit_info
*info
))
67 __attribute__nonnull__(1)
68 __attribute__nonnull__(2)
69 __attribute__nonnull__(3)
73 PARROT_WARN_UNUSED_RESULT
75 static size_t key_hash_cstring(SHIM_INTERP
,
76 ARGIN(const void *value
),
78 __attribute__nonnull__(2);
80 PARROT_WARN_UNUSED_RESULT
82 static size_t key_hash_pointer(SHIM_INTERP
,
83 ARGIN(const void *value
),
85 __attribute__nonnull__(2);
87 PARROT_WARN_UNUSED_RESULT
88 static size_t key_hash_STRING(PARROT_INTERP
,
91 __attribute__nonnull__(1)
92 __attribute__nonnull__(2)
95 static void parrot_mark_hash_both(PARROT_INTERP
, ARGIN(Hash
*hash
))
96 __attribute__nonnull__(1)
97 __attribute__nonnull__(2);
99 static void parrot_mark_hash_keys(PARROT_INTERP
, ARGIN(Hash
*hash
))
100 __attribute__nonnull__(1)
101 __attribute__nonnull__(2);
103 static void parrot_mark_hash_values(PARROT_INTERP
, ARGIN(Hash
*hash
))
104 __attribute__nonnull__(1)
105 __attribute__nonnull__(2);
107 PARROT_WARN_UNUSED_RESULT
109 static int pointer_compare(SHIM_INTERP
,
110 ARGIN_NULLOK(const void *a
),
111 ARGIN_NULLOK(const void *b
));
113 PARROT_WARN_UNUSED_RESULT
114 static int STRING_compare(PARROT_INTERP
,
115 ARGIN(const void *search_key
),
116 ARGIN_NULLOK(const void *bucket_key
))
117 __attribute__nonnull__(1)
118 __attribute__nonnull__(2);
120 #define ASSERT_ARGS_cstring_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = \
121 PARROT_ASSERT_ARG(a) \
122 || PARROT_ASSERT_ARG(b)
123 #define ASSERT_ARGS_expand_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = \
124 PARROT_ASSERT_ARG(interp) \
125 || PARROT_ASSERT_ARG(hash)
126 #define ASSERT_ARGS_hash_freeze __attribute__unused__ int _ASSERT_ARGS_CHECK = \
127 PARROT_ASSERT_ARG(interp) \
128 || PARROT_ASSERT_ARG(hash) \
129 || PARROT_ASSERT_ARG(info)
130 #define ASSERT_ARGS_hash_thaw __attribute__unused__ int _ASSERT_ARGS_CHECK = \
131 PARROT_ASSERT_ARG(interp) \
132 || PARROT_ASSERT_ARG(hash) \
133 || PARROT_ASSERT_ARG(info)
134 #define ASSERT_ARGS_key_hash_cstring __attribute__unused__ int _ASSERT_ARGS_CHECK = \
135 PARROT_ASSERT_ARG(value)
136 #define ASSERT_ARGS_key_hash_pointer __attribute__unused__ int _ASSERT_ARGS_CHECK = \
137 PARROT_ASSERT_ARG(value)
138 #define ASSERT_ARGS_key_hash_STRING __attribute__unused__ int _ASSERT_ARGS_CHECK = \
139 PARROT_ASSERT_ARG(interp) \
140 || PARROT_ASSERT_ARG(s)
141 #define ASSERT_ARGS_parrot_mark_hash_both __attribute__unused__ int _ASSERT_ARGS_CHECK = \
142 PARROT_ASSERT_ARG(interp) \
143 || PARROT_ASSERT_ARG(hash)
144 #define ASSERT_ARGS_parrot_mark_hash_keys __attribute__unused__ int _ASSERT_ARGS_CHECK = \
145 PARROT_ASSERT_ARG(interp) \
146 || PARROT_ASSERT_ARG(hash)
147 #define ASSERT_ARGS_parrot_mark_hash_values __attribute__unused__ int _ASSERT_ARGS_CHECK = \
148 PARROT_ASSERT_ARG(interp) \
149 || PARROT_ASSERT_ARG(hash)
150 #define ASSERT_ARGS_pointer_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = 0
151 #define ASSERT_ARGS_STRING_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = \
152 PARROT_ASSERT_ARG(interp) \
153 || PARROT_ASSERT_ARG(search_key)
154 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
155 /* HEADERIZER END: static */
161 =item C<static size_t key_hash_STRING(PARROT_INTERP, STRING *s, size_t seed)>
163 Returns the hashed value of the key C<value>. See also string.c.
170 PARROT_WARN_UNUSED_RESULT
172 key_hash_STRING(PARROT_INTERP
, ARGMOD(STRING
*s
), SHIM(size_t seed
))
174 ASSERT_ARGS(key_hash_STRING
)
179 return Parrot_str_to_hashval(interp
, s
);
185 =item C<static int STRING_compare(PARROT_INTERP, const void *search_key, const
188 Compares the two strings, returning 0 if they are identical.
194 PARROT_WARN_UNUSED_RESULT
196 STRING_compare(PARROT_INTERP
, ARGIN(const void *search_key
), ARGIN_NULLOK(const void *bucket_key
))
198 ASSERT_ARGS(STRING_compare
)
199 STRING
const *s1
= (STRING
const *)search_key
;
200 STRING
const *s2
= (STRING
const *)bucket_key
;
205 if (s1
->hashval
!= s2
->hashval
)
209 if (s1
->strstart
== s2
->strstart
&& s1
->bufused
== s2
->bufused
)
212 return CHARSET_COMPARE(interp
, s1
, s2
);
218 =item C<static int pointer_compare(PARROT_INTERP, const void *a, const void *b)>
220 Compares the two pointers, returning 0 if they are identical
226 PARROT_WARN_UNUSED_RESULT
229 pointer_compare(SHIM_INTERP
, ARGIN_NULLOK(const void *a
), ARGIN_NULLOK(const void *b
))
231 ASSERT_ARGS(pointer_compare
)
238 =item C<static size_t key_hash_pointer(PARROT_INTERP, const void *value, size_t
241 Returns a hashvalue for a pointer.
247 PARROT_WARN_UNUSED_RESULT
250 key_hash_pointer(SHIM_INTERP
, ARGIN(const void *value
), size_t seed
)
252 ASSERT_ARGS(key_hash_pointer
)
253 return ((size_t) value
) ^ seed
;
259 =item C<static size_t key_hash_cstring(PARROT_INTERP, const void *value, size_t
262 Creates and returns a hash value from a string.
264 Takes an interpreter, a pointer to a string, and a seed value.
265 Returns the hash value.
267 Used by parrot_new_cstring_hash.
273 PARROT_WARN_UNUSED_RESULT
276 key_hash_cstring(SHIM_INTERP
, ARGIN(const void *value
), size_t seed
)
278 ASSERT_ARGS(key_hash_cstring
)
279 const unsigned char * p
= (const unsigned char *) value
;
280 register size_t h
= seed
;
293 =item C<static int cstring_compare(PARROT_INTERP, const char *a, const char *b)>
295 Compares two C strings for equality, returning -1, 0, and 1 if the first string
296 is less than, equal to, or greater than the second, respectively.
302 PARROT_WARN_UNUSED_RESULT
305 cstring_compare(SHIM_INTERP
, ARGIN(const char *a
), ARGIN(const char *b
))
307 ASSERT_ARGS(cstring_compare
)
314 =item C<size_t key_hash_int(PARROT_INTERP, const void *value, size_t seed)>
316 Returns a hashed value for an integer key (passed as a void pointer, sadly).
322 PARROT_WARN_UNUSED_RESULT
325 key_hash_int(SHIM_INTERP
, ARGIN_NULLOK(const void *value
), size_t seed
)
327 ASSERT_ARGS(key_hash_int
)
328 return (size_t)value
^ seed
;
333 =item C<int int_compare(PARROT_INTERP, const void *a, const void *b)>
335 Compares two integers for equality, returning -1, 0, and 1 if the first is less
336 than, equal to, or greater than the second, respectively. Uses void pointers
337 to store the integers, sadly.
343 PARROT_WARN_UNUSED_RESULT
346 int_compare(SHIM_INTERP
, ARGIN_NULLOK(const void *a
), ARGIN_NULLOK(const void *b
))
348 ASSERT_ARGS(int_compare
)
354 =item C<void parrot_dump_hash(PARROT_INTERP, const Hash *hash)>
356 Prints out the hash in human-readable form, at least once someone implements
365 parrot_dump_hash(SHIM_INTERP
, SHIM(const Hash
*hash
))
367 ASSERT_ARGS(parrot_dump_hash
)
373 =item C<void parrot_mark_hash(PARROT_INTERP, Hash *hash)>
375 Marks the hash and its contents as live. Assumes that key and value are non
384 parrot_mark_hash(PARROT_INTERP
, ARGIN(Hash
*hash
))
386 ASSERT_ARGS(parrot_mark_hash
)
390 if (hash
->entry_type
== (PARROT_DATA_TYPE
) enum_hash_string
391 || hash
->entry_type
== (PARROT_DATA_TYPE
) enum_hash_pmc
)
394 if (hash
->key_type
== Hash_key_type_STRING
395 || hash
->key_type
== Hash_key_type_PMC
)
400 parrot_mark_hash_both(interp
, hash
);
402 parrot_mark_hash_keys(interp
, hash
);
406 parrot_mark_hash_values(interp
, hash
);
413 =item C<static void parrot_mark_hash_keys(PARROT_INTERP, Hash *hash)>
415 Marks the hash and only its keys as live.
422 parrot_mark_hash_keys(PARROT_INTERP
, ARGIN(Hash
*hash
))
424 ASSERT_ARGS(parrot_mark_hash_keys
)
425 UINTVAL entries
= hash
->entries
;
429 for (i
= hash
->mask
; i
>= 0; --i
) {
430 HashBucket
*bucket
= hash
->bi
[i
];
433 if (++found
> entries
)
434 Parrot_ex_throw_from_c_args(interp
, NULL
, 1,
435 "Detected hash corruption at hash %p entries %d",
438 PARROT_ASSERT(bucket
->key
);
439 Parrot_gc_mark_PObj_alive(interp
, (PObj
*)bucket
->key
);
441 bucket
= bucket
->next
;
449 =item C<static void parrot_mark_hash_values(PARROT_INTERP, Hash *hash)>
451 Marks the hash and only its values as live.
458 parrot_mark_hash_values(PARROT_INTERP
, ARGIN(Hash
*hash
))
460 ASSERT_ARGS(parrot_mark_hash_values
)
461 const UINTVAL entries
= hash
->entries
;
465 for (i
= hash
->mask
; i
>= 0; --i
) {
466 HashBucket
*bucket
= hash
->bi
[i
];
469 if (++found
> entries
)
470 Parrot_ex_throw_from_c_args(interp
, NULL
, 1,
471 "Detected hash corruption at hash %p entries %d",
474 PARROT_ASSERT(bucket
->value
);
475 Parrot_gc_mark_PObj_alive(interp
, (PObj
*)bucket
->value
);
477 bucket
= bucket
->next
;
485 =item C<static void parrot_mark_hash_both(PARROT_INTERP, Hash *hash)>
487 Marks the hash and both its keys and values as live.
494 parrot_mark_hash_both(PARROT_INTERP
, ARGIN(Hash
*hash
))
496 ASSERT_ARGS(parrot_mark_hash_both
)
497 const UINTVAL entries
= hash
->entries
;
501 for (i
= hash
->mask
; i
>= 0; --i
) {
502 HashBucket
*bucket
= hash
->bi
[i
];
505 if (++found
> entries
)
506 Parrot_ex_throw_from_c_args(interp
, NULL
, 1,
507 "Detected hash corruption at hash %p entries %d",
510 PARROT_ASSERT(bucket
->key
);
511 Parrot_gc_mark_PObj_alive(interp
, (PObj
*)bucket
->key
);
513 PARROT_ASSERT(bucket
->value
);
514 Parrot_gc_mark_PObj_alive(interp
, (PObj
*)bucket
->value
);
516 bucket
= bucket
->next
;
524 =item C<static void hash_thaw(PARROT_INTERP, Hash *hash, visit_info *info)>
526 Visits the contents of a hash during freeze/thaw.
528 C<pinfo> is the visit info, (see include/parrot/pmc_freeze.h>).
535 hash_thaw(PARROT_INTERP
, ARGMOD(Hash
*hash
), ARGMOD(visit_info
*info
))
537 ASSERT_ARGS(hash_thaw
)
538 IMAGE_IO
* const io
= info
->image_io
;
540 /* during thaw, info->extra is the key/value count */
541 const size_t num_entries
= (size_t) hash
->entries
;
546 for (entry_index
= 0; entry_index
< num_entries
; ++entry_index
) {
549 switch (hash
->key_type
) {
550 case Hash_key_type_STRING
:
552 STRING
* const s_key
= VTABLE_shift_string(interp
, io
);
553 b
= parrot_hash_put(interp
, hash
, s_key
, NULL
);
556 case Hash_key_type_int
:
558 const INTVAL i_key
= VTABLE_shift_integer(interp
, io
);
559 b
= parrot_hash_put(interp
, hash
, (void*)i_key
, NULL
);
563 Parrot_ex_throw_from_c_args(interp
, NULL
, 1,
564 "unimplemented key type");
568 switch (hash
->entry_type
) {
571 /* this looks awful, but it avoids type-punning warnings */
572 void **ptr
= &b
->value
;
573 info
->thaw_ptr
= (PMC
**)ptr
;
574 (info
->visit_pmc_now
)(interp
, NULL
, info
);
579 const INTVAL i
= VTABLE_shift_integer(interp
, io
);
580 b
->value
= (void *)i
;
584 Parrot_ex_throw_from_c_args(interp
, NULL
, 1,
585 "unimplemented value type");
594 =item C<static void hash_freeze(PARROT_INTERP, const Hash * const hash,
597 Freezes hash into a string.
599 Takes an interpreter, a pointer to the hash, and a pointer to the structure
600 containing the string start location.
602 Use by parrot_hash_visit.
609 hash_freeze(PARROT_INTERP
, ARGIN(const Hash
* const hash
), ARGMOD(visit_info
*info
))
611 ASSERT_ARGS(hash_freeze
)
612 IMAGE_IO
* const io
= info
->image_io
;
615 for (i
= 0; i
< hash
->entries
; i
++) {
616 HashBucket
* const b
= hash
->bs
+i
;
618 switch (hash
->key_type
) {
619 case Hash_key_type_STRING
:
620 VTABLE_push_string(interp
, io
, (STRING
*)b
->key
);
622 case Hash_key_type_int
:
623 VTABLE_push_integer(interp
, io
, (INTVAL
)b
->key
);
626 Parrot_ex_throw_from_c_args(interp
, NULL
, 1,
627 "unimplemented key type");
631 switch (hash
->entry_type
) {
633 (info
->visit_pmc_now
)(interp
, (PMC
*)b
->value
, info
);
636 VTABLE_push_integer(interp
, io
, (INTVAL
)b
->value
);
639 Parrot_ex_throw_from_c_args(interp
, NULL
, 1,
640 "unimplemented value type");
649 =item C<void parrot_hash_visit(PARROT_INTERP, Hash *hash, void *pinfo)>
651 Freezes or thaws a hash as specified. Takes an interpreter, a pointer to the
652 hash, and a pointer to the structure identifying what to do and the location of
661 parrot_hash_visit(PARROT_INTERP
, ARGMOD(Hash
*hash
), ARGMOD(void *pinfo
))
663 ASSERT_ARGS(parrot_hash_visit
)
664 visit_info
* const info
= (visit_info
*) pinfo
;
666 switch (info
->what
) {
667 case VISIT_THAW_NORMAL
:
668 case VISIT_THAW_CONSTANTS
:
669 hash_thaw(interp
, hash
, info
);
671 case VISIT_FREEZE_NORMAL
:
672 case VISIT_FREEZE_AT_DESTRUCT
:
673 hash_freeze(interp
, hash
, info
);
676 Parrot_ex_throw_from_c_args(interp
, NULL
, 1,
677 "unimplemented visit mode");
684 =item C<static void expand_hash(PARROT_INTERP, Hash *hash)>
686 Expands a hash when necessary.
688 For a hashtable of size N, we use C<MAXFULL_PERCENT> % of N as the
689 number of buckets. This way, as soon as we run out of buckets on the
690 free list, we know that it's time to resize the hashtable.
692 Algorithm for expansion: We exactly double the size of the hashtable.
693 Keys are assigned to buckets with the formula
695 bucket_index = hash(key) % parrot_hash_size
697 When doubling the size of the hashtable, we know that every key is either
698 already in the correct bucket, or belongs in the current bucket plus
699 C<parrot_hash_size> (the old C<parrot_hash_size>). In fact, because the
700 hashtable is always a power of two in size, it depends only on the next bit
701 in the hash value, after the ones previously used.
703 We scan through all the buckets in order, moving the buckets that need to be
704 moved. No bucket will be scanned twice, and the cache should be reasonably
705 happy because the hashtable accesses will be two parallel sequential scans.
706 (Of course, this also mucks with the C<< ->next >> pointers, and they'll be
714 expand_hash(PARROT_INTERP
, ARGMOD(Hash
*hash
))
716 ASSERT_ARGS(expand_hash
)
717 HashBucket
**old_bi
, **new_bi
;
718 HashBucket
*bs
, *b
, *new_mem
;
719 HashBucket
*old_offset
= (HashBucket
*)((char *)hash
+ sizeof (Hash
));
721 void * const old_mem
= hash
->bs
;
722 const UINTVAL old_size
= hash
->mask
+ 1;
723 const UINTVAL new_size
= old_size
<< 1;
724 const UINTVAL old_nb
= N_BUCKETS(old_size
);
725 size_t offset
, i
, new_loc
;
728 allocate some less buckets
729 e.g. 3 buckets, 4 pointers:
731 +---+---+---+-+-+-+-+
733 +---+---+---+-+-+-+-+
739 if (old_offset
!= old_mem
) {
740 /* This buffer has been reallocated at least once before. */
741 new_mem
= (HashBucket
*)mem_sys_realloc(old_mem
, HASH_ALLOC_SIZE(new_size
));
744 /* Allocate a new buffer. */
745 new_mem
= (HashBucket
*)mem_sys_allocate(HASH_ALLOC_SIZE(new_size
));
746 memcpy(new_mem
, old_mem
, HASH_ALLOC_SIZE(old_size
));
750 +---+---+---+---+---+---+-+-+-+-+-+-+-+-+
751 | bs | old_bi | new_bi |
752 +---+---+---+---+---+---+-+-+-+-+-+-+-+-+
757 old_bi
= (HashBucket
**)(bs
+ old_nb
);
758 new_bi
= (HashBucket
**)(bs
+ N_BUCKETS(new_size
));
760 /* things can have moved by this offset */
761 offset
= (char *)new_mem
- (char *)old_mem
;
763 /* relocate the bucket index */
764 mem_sys_memmove(new_bi
, old_bi
, old_size
* sizeof (HashBucket
*));
766 /* update hash data */
769 hash
->mask
= new_size
- 1;
771 /* clear freshly allocated bucket index */
772 memset(new_bi
+ old_size
, 0, sizeof (HashBucket
*) * old_size
);
775 * reloc pointers - this part would be also needed, if we
776 * allocate hash memory from GC movable memory, and then
777 * also the free_list needs updating (this is empty now,
778 * as expand_hash is only called for that case).
781 for (i
= 0; i
< old_size
; ++i
) {
782 HashBucket
**next_p
= new_bi
+ i
;
784 *next_p
= (HashBucket
*)((char *)*next_p
+ offset
);
791 /* recalc bucket index */
792 for (i
= 0; i
< old_size
; ++i
) {
793 HashBucket
**next_p
= new_bi
+ i
;
796 /* rehash the bucket */
797 new_loc
= (hash
->hash_val
)(interp
, b
->key
, hash
->seed
) &
802 b
->next
= new_bi
[new_loc
];
810 /* add new buckets to free_list in reverse order
811 * lowest bucket is top on free list and will be used first */
812 for (i
= 0, b
= (HashBucket
*)new_bi
- 1; i
< old_nb
; ++i
, --b
) {
813 b
->next
= hash
->free_list
;
814 b
->key
= b
->value
= NULL
;
822 =item C<Hash* parrot_new_hash(PARROT_INTERP)>
824 Creates a new Parrot STRING hash.
831 PARROT_CANNOT_RETURN_NULL
833 parrot_new_hash(PARROT_INTERP
)
835 ASSERT_ARGS(parrot_new_hash
)
836 return parrot_create_hash(interp
,
838 Hash_key_type_STRING
,
840 (hash_hash_key_fn
)key_hash_STRING
);
846 =item C<Hash* parrot_new_cstring_hash(PARROT_INTERP)>
848 Creates a new C string hash in C<hptr>.
855 PARROT_CANNOT_RETURN_NULL
857 parrot_new_cstring_hash(PARROT_INTERP
)
859 ASSERT_ARGS(parrot_new_cstring_hash
)
860 return parrot_create_hash(interp
,
862 Hash_key_type_cstring
,
863 (hash_comp_fn
)cstring_compare
,
864 (hash_hash_key_fn
)key_hash_cstring
);
870 =item C<Hash * parrot_new_pointer_hash(PARROT_INTERP)>
872 Create and return a new hash with void * keys and values.
879 PARROT_CANNOT_RETURN_NULL
881 parrot_new_pointer_hash(PARROT_INTERP
)
883 ASSERT_ARGS(parrot_new_pointer_hash
)
884 return parrot_create_hash(interp
,
894 =item C<Hash* parrot_new_intval_hash(PARROT_INTERP)>
896 Creates and returns new Hash PMC with INTVAL keys and values. C<flags> can be
897 C<PObj_constant_FLAG> or 0.
905 PARROT_WARN_UNUSED_RESULT
906 PARROT_CANNOT_RETURN_NULL
908 parrot_new_intval_hash(PARROT_INTERP
)
910 ASSERT_ARGS(parrot_new_intval_hash
)
911 return parrot_create_hash(interp
,
920 =item C<Hash * parrot_create_hash(PARROT_INTERP, PARROT_DATA_TYPE val_type,
921 Hash_key_type hkey_type, hash_comp_fn compare, hash_hash_key_fn keyhash)>
923 Creates and initializes a hash. Function pointers determine its behaviors.
924 The container passed in is the address of the hash PMC that is using it. The
925 hash and the PMC point to each other.
927 Memory from this function must be freed.
933 PARROT_CANNOT_RETURN_NULL
934 PARROT_WARN_UNUSED_RESULT
937 parrot_create_hash(PARROT_INTERP
, PARROT_DATA_TYPE val_type
, Hash_key_type hkey_type
,
938 ARGIN(hash_comp_fn compare
), ARGIN(hash_hash_key_fn keyhash
))
940 ASSERT_ARGS(parrot_create_hash
)
942 void *alloc
= mem_sys_allocate(sizeof (Hash
) + HASH_ALLOC_SIZE(INITIAL_BUCKETS
));
943 Hash
* const hash
= (Hash
*)alloc
;
946 PARROT_ASSERT(INITIAL_BUCKETS
% 4 == 0);
948 hash
->compare
= compare
;
949 hash
->hash_val
= keyhash
;
950 hash
->entry_type
= val_type
;
951 hash
->key_type
= hkey_type
;
952 hash
->seed
= interp
->hash_seed
;
953 hash
->mask
= INITIAL_BUCKETS
- 1;
955 hash
->container
= PMCNULL
;
958 * TODO if we have a significant amount of small hashes:
959 * - allocate a bigger hash structure e.g. 128 byte
960 * - use the bucket store and bi inside this structure
961 * - when reallocate copy this part
963 bp
= (HashBucket
*)((char *)alloc
+ sizeof (Hash
));
964 hash
->free_list
= NULL
;
966 /* fill free_list from hi addresses so that we can use
967 * buckets[i] directly in an OrderedHash, *if* nothing
971 bp
+= N_BUCKETS(INITIAL_BUCKETS
);
972 hash
->bi
= (HashBucket
**)bp
;
974 for (i
= 0, --bp
; i
< N_BUCKETS(INITIAL_BUCKETS
); ++i
, --bp
) {
975 bp
->next
= hash
->free_list
;
978 hash
->free_list
= bp
;
981 for (i
= 0; i
< INITIAL_BUCKETS
; ++i
)
990 =item C<void parrot_hash_destroy(PARROT_INTERP, Hash *hash)>
992 Frees the memory allocated to the specified hash and its bucket store. Used by
993 Parrot_chash_destroy.
1001 parrot_hash_destroy(SHIM_INTERP
, ARGMOD(Hash
*hash
))
1003 ASSERT_ARGS(parrot_hash_destroy
)
1004 HashBucket
*bp
= (HashBucket
*)((char*)hash
+ sizeof (Hash
));
1006 mem_sys_free(hash
->bs
);
1013 =item C<void parrot_chash_destroy(PARROT_INTERP, Hash *hash)>
1015 Deletes the specified hash by freeing the memory allocated to all the key-value
1016 pairs, and finally the hash itself.
1023 parrot_chash_destroy(PARROT_INTERP
, ARGMOD(Hash
*hash
))
1025 ASSERT_ARGS(parrot_chash_destroy
)
1028 for (i
= 0; i
<= hash
->mask
; i
++) {
1029 HashBucket
*bucket
= hash
->bi
[i
];
1031 mem_sys_free(bucket
->key
);
1032 mem_sys_free(bucket
->value
);
1033 bucket
= bucket
->next
;
1037 parrot_hash_destroy(interp
, hash
);
1043 =item C<void parrot_chash_destroy_values(PARROT_INTERP, Hash *hash, value_free
1046 Deletes the specified hash by freeing the memory allocated to all the key-value
1047 pairs, calling the provided callback to free the values, and finally the hash
1050 The callback returns C<void> and takes a C<void *>.
1057 parrot_chash_destroy_values(PARROT_INTERP
, ARGMOD(Hash
*hash
),
1058 ARGIN(value_free func
))
1060 ASSERT_ARGS(parrot_chash_destroy_values
)
1063 for (i
= 0; i
<= hash
->mask
; i
++) {
1064 HashBucket
*bucket
= hash
->bi
[i
];
1066 mem_sys_free(bucket
->key
);
1067 func(bucket
->value
);
1068 bucket
= bucket
->next
;
1072 parrot_hash_destroy(interp
, hash
);
1078 =item C<INTVAL parrot_hash_size(PARROT_INTERP, const Hash *hash)>
1080 Returns the number of used entries in the hash.
1087 PARROT_WARN_UNUSED_RESULT
1088 PARROT_PURE_FUNCTION
1090 parrot_hash_size(SHIM_INTERP
, ARGIN(const Hash
*hash
))
1092 ASSERT_ARGS(parrot_hash_size
)
1094 return hash
->entries
;
1100 =item C<void * parrot_hash_get_idx(PARROT_INTERP, const Hash *hash, PMC *key)>
1102 Finds the next index into the hash's internal storage for the given Key. Used
1110 PARROT_WARN_UNUSED_RESULT
1111 PARROT_CAN_RETURN_NULL
1113 parrot_hash_get_idx(PARROT_INTERP
, ARGIN(const Hash
*hash
), ARGMOD(PMC
*key
))
1115 ASSERT_ARGS(parrot_hash_get_idx
)
1118 INTVAL i
= VTABLE_get_integer(interp
, key
);
1122 /* idx directly in the bucket store, which is at negative
1123 * address from the data pointer */
1124 /* locate initial */
1125 const INTVAL size
= (INTVAL
)N_BUCKETS(hash
->mask
+ 1);
1127 GETATTR_Key_next_key(interp
, key
, fake_bi
);
1128 bi
= (BucketIndex
)fake_bi
;
1130 if (bi
== INITBucketIndex
) {
1132 SETATTR_Key_next_key(interp
, key
, NULL
);
1134 else if (i
>= size
|| i
< 0) {
1135 /* NOTE: These instances of SETATTR_Key_int_key can't be VTABLE
1136 * functions because of the "special" way hash iterators work. */
1137 SETATTR_Key_int_key(interp
, key
, -1);
1143 for (b
= hash
->bs
+ i
; i
< size
; ++i
, ++b
) {
1144 /* XXX int keys may be zero - use different iterator */
1149 /* found next key - FIXME hash iter does auto next */
1158 SETATTR_Key_int_key(interp
, key
, i
);
1166 =item C<HashBucket * parrot_hash_get_bucket(PARROT_INTERP, const Hash *hash,
1169 Returns the bucket for C<key>, if found, and NULL otherwise.
1176 PARROT_WARN_UNUSED_RESULT
1177 PARROT_CAN_RETURN_NULL
1179 parrot_hash_get_bucket(PARROT_INTERP
, ARGIN(const Hash
*hash
), ARGIN_NULLOK(const void *key
))
1181 ASSERT_ARGS(parrot_hash_get_bucket
)
1183 if (hash
->entries
<= 0)
1186 /* a very fast search for very small hashes */
1187 if (hash
->entries
<= SMALL_HASH_SIZE
) {
1188 const UINTVAL entries
= hash
->entries
;
1191 for (i
= 0; i
< entries
; i
++) {
1192 HashBucket
*bucket
= hash
->bs
+ i
;
1194 /* the hash->compare cost is too high for this fast path */
1195 if (bucket
->key
== key
)
1200 /* if the fast search didn't work, try the normal hashing search */
1202 const UINTVAL hashval
= (hash
->hash_val
)(interp
, key
, hash
->seed
);
1203 HashBucket
*bucket
= hash
->bi
[hashval
& hash
->mask
];
1206 /* key equality is always a match, so it's worth checking */
1207 if (bucket
->key
== key
1209 /* ... but the slower comparison is more accurate */
1210 || ((hash
->compare
)(interp
, key
, bucket
->key
) == 0))
1212 bucket
= bucket
->next
;
1222 =item C<void * parrot_hash_get(PARROT_INTERP, Hash *hash, const void *key)>
1224 Returns the value keyed by C<key>, or C<NULL> if no bucket is found.
1231 PARROT_WARN_UNUSED_RESULT
1232 PARROT_CAN_RETURN_NULL
1234 parrot_hash_get(PARROT_INTERP
, ARGIN(Hash
*hash
), ARGIN(const void *key
))
1236 ASSERT_ARGS(parrot_hash_get
)
1237 const HashBucket
* const bucket
= parrot_hash_get_bucket(interp
, hash
, key
);
1238 return bucket
? bucket
->value
: NULL
;
1244 =item C<INTVAL parrot_hash_exists(PARROT_INTERP, Hash *hash, void *key)>
1246 Returns whether the key exists in the hash.
1253 PARROT_WARN_UNUSED_RESULT
1255 parrot_hash_exists(PARROT_INTERP
, ARGIN(Hash
*hash
), ARGIN(void *key
))
1257 ASSERT_ARGS(parrot_hash_exists
)
1258 const HashBucket
* const bucket
= parrot_hash_get_bucket(interp
, hash
, key
);
1259 return bucket
? 1 : 0;
1265 =item C<HashBucket* parrot_hash_put(PARROT_INTERP, Hash *hash, void *key, void
1268 Puts the key and value into the hash. Note that C<key> is B<not> copied.
1275 PARROT_IGNORABLE_RESULT
1276 PARROT_CANNOT_RETURN_NULL
1278 parrot_hash_put(PARROT_INTERP
, ARGMOD(Hash
*hash
),
1279 ARGIN_NULLOK(void *key
), ARGIN_NULLOK(void *value
))
1281 ASSERT_ARGS(parrot_hash_put
)
1282 const UINTVAL hashval
= (hash
->hash_val
)(interp
, key
, hash
->seed
);
1283 HashBucket
*bucket
= hash
->bi
[hashval
& hash
->mask
];
1285 /* Very complex assert that we'll not put non-constant stuff into constant hash */
1287 PMC_IS_NULL(hash
->container
)
1288 || !(PObj_constant_TEST(hash
->container
))
1291 !(hash
->key_type
== Hash_key_type_STRING
)
1292 || PObj_constant_TEST((PObj
*)key
))
1294 !((hash
->entry_type
== enum_type_PMC
) || (hash
->entry_type
== enum_type_STRING
))
1295 || PObj_constant_TEST((PObj
*)value
)))
1296 || !"Use non-constant key or value in constant hash");
1299 /* store hash_val or not */
1300 if ((hash
->compare
)(interp
, key
, bucket
->key
) == 0)
1302 bucket
= bucket
->next
;
1306 if (hash
->entry_type
== enum_type_PMC
&& hash
->container
) {
1307 GC_WRITE_BARRIER_KEY(interp
, hash
->container
,
1308 (PMC
*)bucket
->value
, bucket
->key
, (PMC
*)value
, key
);
1311 bucket
->value
= value
;
1314 if (hash
->entry_type
== enum_type_PMC
&& hash
->container
) {
1315 GC_WRITE_BARRIER_KEY(interp
, hash
->container
,
1316 NULL
, NULL
, (PMC
*)value
, key
);
1319 bucket
= hash
->free_list
;
1322 expand_hash(interp
, hash
);
1323 bucket
= hash
->free_list
;
1327 hash
->free_list
= bucket
->next
;
1329 bucket
->value
= value
;
1330 bucket
->next
= hash
->bi
[hashval
& hash
->mask
];
1331 hash
->bi
[hashval
& hash
->mask
] = bucket
;
1340 =item C<void parrot_hash_delete(PARROT_INTERP, Hash *hash, void *key)>
1342 Deletes the key from the hash.
1350 parrot_hash_delete(PARROT_INTERP
, ARGMOD(Hash
*hash
), ARGIN(void *key
))
1352 ASSERT_ARGS(parrot_hash_delete
)
1354 HashBucket
*prev
= NULL
;
1355 const UINTVAL hashval
= (hash
->hash_val
)(interp
, key
, hash
->seed
) & hash
->mask
;
1357 for (bucket
= hash
->bi
[hashval
]; bucket
; bucket
= bucket
->next
) {
1358 if ((hash
->compare
)(interp
, key
, bucket
->key
) == 0) {
1361 prev
->next
= bucket
->next
;
1363 hash
->bi
[hashval
] = bucket
->next
;
1366 bucket
->next
= hash
->free_list
;
1368 hash
->free_list
= bucket
;
1380 =item C<void parrot_hash_clone(PARROT_INTERP, const Hash *hash, Hash *dest)>
1382 Clones C<hash> to C<dest>.
1390 parrot_hash_clone(PARROT_INTERP
, ARGIN(const Hash
*hash
), ARGOUT(Hash
*dest
))
1392 ASSERT_ARGS(parrot_hash_clone
)
1393 UINTVAL entries
= hash
->entries
;
1396 for (i
= 0; i
< entries
; i
++) {
1398 HashBucket
*b
= hash
->bs
+i
;
1399 void * const key
= b
->key
;
1401 switch (hash
->entry_type
) {
1402 case enum_type_undef
:
1404 case enum_type_INTVAL
:
1405 valtmp
= (void *)b
->value
;
1408 case enum_type_STRING
:
1409 valtmp
= (void *)Parrot_str_copy(interp
, (STRING
*)b
->value
);
1413 if (PMC_IS_NULL((PMC
*)b
->value
))
1414 valtmp
= (void *)PMCNULL
;
1416 valtmp
= (void *)VTABLE_clone(interp
, (PMC
*)b
->value
);
1420 valtmp
= NULL
; /* avoid warning */
1421 Parrot_ex_throw_from_c_args(interp
, NULL
, -1,
1422 "hash corruption: type = %d\n", hash
->entry_type
);
1426 parrot_hash_put(interp
, dest
, key
, valtmp
);
1436 F<docs/pdds/pdd08_keys.pod>.
1440 Future optimizations:
1444 =item * Stop reallocating the bucket pool, and instead add chunks on.
1445 (Saves pointer fixups and copying during C<realloc>.)
1447 =item * Hash contraction (don't if it's worth it)
1458 * c-file-style: "parrot"
1460 * vim: expandtab shiftwidth=4: