1 /*-------------------------------------------------------------------------
4 * Support functions for hash access method.
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/hash/hashfunc.c
14 * These functions are stored in pg_amproc. For each operator class
15 * defined for hash indexes, they compute the hash value of the argument.
17 * Additional hash functions appear in /utils/adt/ files for various
18 * specialized datatypes.
20 * It is expected that every bit of a hash function's 32-bit result is
21 * as random as every other; failure to ensure this is likely to lead
22 * to poor performance of hash joins, for example. In most cases a hash
23 * function should use hash_any() or its variant hash_uint32().
24 *-------------------------------------------------------------------------
29 #include "access/hash.h"
30 #include "catalog/pg_collation.h"
31 #include "common/hashfn.h"
32 #include "utils/builtins.h"
33 #include "utils/float.h"
34 #include "utils/pg_locale.h"
37 * Datatype-specific hash functions.
39 * These support both hash indexes and hash joins.
41 * NOTE: some of these are also used by catcache operations, without
42 * any direct connection to hash indexes. Also, the common hash_any
43 * routine is also used by dynahash tables.
46 /* Note: this is used for both "char" and boolean datatypes */
48 hashchar(PG_FUNCTION_ARGS
)
50 return hash_uint32((int32
) PG_GETARG_CHAR(0));
54 hashcharextended(PG_FUNCTION_ARGS
)
56 return hash_uint32_extended((int32
) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
60 hashint2(PG_FUNCTION_ARGS
)
62 return hash_uint32((int32
) PG_GETARG_INT16(0));
66 hashint2extended(PG_FUNCTION_ARGS
)
68 return hash_uint32_extended((int32
) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
72 hashint4(PG_FUNCTION_ARGS
)
74 return hash_uint32(PG_GETARG_INT32(0));
78 hashint4extended(PG_FUNCTION_ARGS
)
80 return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
84 hashint8(PG_FUNCTION_ARGS
)
87 * The idea here is to produce a hash value compatible with the values
88 * produced by hashint4 and hashint2 for logically equal inputs; this is
89 * necessary to support cross-type hash joins across these input types.
90 * Since all three types are signed, we can xor the high half of the int8
91 * value if the sign is positive, or the complement of the high half when
92 * the sign is negative.
94 int64 val
= PG_GETARG_INT64(0);
95 uint32 lohalf
= (uint32
) val
;
96 uint32 hihalf
= (uint32
) (val
>> 32);
98 lohalf
^= (val
>= 0) ? hihalf
: ~hihalf
;
100 return hash_uint32(lohalf
);
104 hashint8extended(PG_FUNCTION_ARGS
)
106 /* Same approach as hashint8 */
107 int64 val
= PG_GETARG_INT64(0);
108 uint32 lohalf
= (uint32
) val
;
109 uint32 hihalf
= (uint32
) (val
>> 32);
111 lohalf
^= (val
>= 0) ? hihalf
: ~hihalf
;
113 return hash_uint32_extended(lohalf
, PG_GETARG_INT64(1));
117 hashoid(PG_FUNCTION_ARGS
)
119 return hash_uint32((uint32
) PG_GETARG_OID(0));
123 hashoidextended(PG_FUNCTION_ARGS
)
125 return hash_uint32_extended((uint32
) PG_GETARG_OID(0), PG_GETARG_INT64(1));
129 hashenum(PG_FUNCTION_ARGS
)
131 return hash_uint32((uint32
) PG_GETARG_OID(0));
135 hashenumextended(PG_FUNCTION_ARGS
)
137 return hash_uint32_extended((uint32
) PG_GETARG_OID(0), PG_GETARG_INT64(1));
141 hashfloat4(PG_FUNCTION_ARGS
)
143 float4 key
= PG_GETARG_FLOAT4(0);
147 * On IEEE-float machines, minus zero and zero have different bit patterns
148 * but should compare as equal. We must ensure that they have the same
149 * hash value, which is most reliably done this way:
151 if (key
== (float4
) 0)
155 * To support cross-type hashing of float8 and float4, we want to return
156 * the same hash value hashfloat8 would produce for an equal float8 value.
157 * So, widen the value to float8 and hash that. (We must do this rather
158 * than have hashfloat8 try to narrow its value to float4; that could fail
164 * Similarly, NaNs can have different bit patterns but they should all
165 * compare as equal. For backwards-compatibility reasons we force them to
166 * have the hash value of a standard float8 NaN. (You'd think we could
167 * replace key with a float4 NaN and then widen it; but on some old
168 * platforms, that way produces a different bit pattern.)
171 key8
= get_float8_nan();
173 return hash_any((unsigned char *) &key8
, sizeof(key8
));
177 hashfloat4extended(PG_FUNCTION_ARGS
)
179 float4 key
= PG_GETARG_FLOAT4(0);
180 uint64 seed
= PG_GETARG_INT64(1);
183 /* Same approach as hashfloat4 */
184 if (key
== (float4
) 0)
185 PG_RETURN_UINT64(seed
);
188 key8
= get_float8_nan();
190 return hash_any_extended((unsigned char *) &key8
, sizeof(key8
), seed
);
194 hashfloat8(PG_FUNCTION_ARGS
)
196 float8 key
= PG_GETARG_FLOAT8(0);
199 * On IEEE-float machines, minus zero and zero have different bit patterns
200 * but should compare as equal. We must ensure that they have the same
201 * hash value, which is most reliably done this way:
203 if (key
== (float8
) 0)
207 * Similarly, NaNs can have different bit patterns but they should all
208 * compare as equal. For backwards-compatibility reasons we force them to
209 * have the hash value of a standard NaN.
212 key
= get_float8_nan();
214 return hash_any((unsigned char *) &key
, sizeof(key
));
218 hashfloat8extended(PG_FUNCTION_ARGS
)
220 float8 key
= PG_GETARG_FLOAT8(0);
221 uint64 seed
= PG_GETARG_INT64(1);
223 /* Same approach as hashfloat8 */
224 if (key
== (float8
) 0)
225 PG_RETURN_UINT64(seed
);
227 key
= get_float8_nan();
229 return hash_any_extended((unsigned char *) &key
, sizeof(key
), seed
);
233 hashoidvector(PG_FUNCTION_ARGS
)
235 oidvector
*key
= (oidvector
*) PG_GETARG_POINTER(0);
237 return hash_any((unsigned char *) key
->values
, key
->dim1
* sizeof(Oid
));
241 hashoidvectorextended(PG_FUNCTION_ARGS
)
243 oidvector
*key
= (oidvector
*) PG_GETARG_POINTER(0);
245 return hash_any_extended((unsigned char *) key
->values
,
246 key
->dim1
* sizeof(Oid
),
251 hashname(PG_FUNCTION_ARGS
)
253 char *key
= NameStr(*PG_GETARG_NAME(0));
255 return hash_any((unsigned char *) key
, strlen(key
));
259 hashnameextended(PG_FUNCTION_ARGS
)
261 char *key
= NameStr(*PG_GETARG_NAME(0));
263 return hash_any_extended((unsigned char *) key
, strlen(key
),
268 hashtext(PG_FUNCTION_ARGS
)
270 text
*key
= PG_GETARG_TEXT_PP(0);
271 Oid collid
= PG_GET_COLLATION();
272 pg_locale_t mylocale
= 0;
277 (errcode(ERRCODE_INDETERMINATE_COLLATION
),
278 errmsg("could not determine which collation to use for string hashing"),
279 errhint("Use the COLLATE clause to set the collation explicitly.")));
281 if (!lc_collate_is_c(collid
) && collid
!= DEFAULT_COLLATION_OID
)
282 mylocale
= pg_newlocale_from_collation(collid
);
284 if (!mylocale
|| mylocale
->deterministic
)
286 result
= hash_any((unsigned char *) VARDATA_ANY(key
),
287 VARSIZE_ANY_EXHDR(key
));
292 if (mylocale
->provider
== COLLPROVIDER_ICU
)
299 ulen
= icu_to_uchar(&uchar
, VARDATA_ANY(key
), VARSIZE_ANY_EXHDR(key
));
301 bsize
= ucol_getSortKey(mylocale
->info
.icu
.ucol
,
302 uchar
, ulen
, NULL
, 0);
304 ucol_getSortKey(mylocale
->info
.icu
.ucol
,
305 uchar
, ulen
, buf
, bsize
);
307 result
= hash_any(buf
, bsize
);
313 /* shouldn't happen */
314 elog(ERROR
, "unsupported collprovider: %c", mylocale
->provider
);
317 /* Avoid leaking memory for toasted inputs */
318 PG_FREE_IF_COPY(key
, 0);
324 hashtextextended(PG_FUNCTION_ARGS
)
326 text
*key
= PG_GETARG_TEXT_PP(0);
327 Oid collid
= PG_GET_COLLATION();
328 pg_locale_t mylocale
= 0;
333 (errcode(ERRCODE_INDETERMINATE_COLLATION
),
334 errmsg("could not determine which collation to use for string hashing"),
335 errhint("Use the COLLATE clause to set the collation explicitly.")));
337 if (!lc_collate_is_c(collid
) && collid
!= DEFAULT_COLLATION_OID
)
338 mylocale
= pg_newlocale_from_collation(collid
);
340 if (!mylocale
|| mylocale
->deterministic
)
342 result
= hash_any_extended((unsigned char *) VARDATA_ANY(key
),
343 VARSIZE_ANY_EXHDR(key
),
349 if (mylocale
->provider
== COLLPROVIDER_ICU
)
356 ulen
= icu_to_uchar(&uchar
, VARDATA_ANY(key
), VARSIZE_ANY_EXHDR(key
));
358 bsize
= ucol_getSortKey(mylocale
->info
.icu
.ucol
,
359 uchar
, ulen
, NULL
, 0);
361 ucol_getSortKey(mylocale
->info
.icu
.ucol
,
362 uchar
, ulen
, buf
, bsize
);
364 result
= hash_any_extended(buf
, bsize
, PG_GETARG_INT64(1));
370 /* shouldn't happen */
371 elog(ERROR
, "unsupported collprovider: %c", mylocale
->provider
);
374 PG_FREE_IF_COPY(key
, 0);
380 * hashvarlena() can be used for any varlena datatype in which there are
381 * no non-significant bits, ie, distinct bitpatterns never compare as equal.
384 hashvarlena(PG_FUNCTION_ARGS
)
386 struct varlena
*key
= PG_GETARG_VARLENA_PP(0);
389 result
= hash_any((unsigned char *) VARDATA_ANY(key
),
390 VARSIZE_ANY_EXHDR(key
));
392 /* Avoid leaking memory for toasted inputs */
393 PG_FREE_IF_COPY(key
, 0);
399 hashvarlenaextended(PG_FUNCTION_ARGS
)
401 struct varlena
*key
= PG_GETARG_VARLENA_PP(0);
404 result
= hash_any_extended((unsigned char *) VARDATA_ANY(key
),
405 VARSIZE_ANY_EXHDR(key
),
408 PG_FREE_IF_COPY(key
, 0);