Update copyright for 2022
[pgsql.git] / src / backend / access / hash / hashfunc.c
blob0521c69dd57a2d2d3a3a8e359af5afbe85ba96eb
1 /*-------------------------------------------------------------------------
3 * hashfunc.c
4 * Support functions for hash access method.
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * src/backend/access/hash/hashfunc.c
13 * NOTES
14 * These functions are stored in pg_amproc. For each operator class
15 * defined for hash indexes, they compute the hash value of the argument.
17 * Additional hash functions appear in /utils/adt/ files for various
18 * specialized datatypes.
20 * It is expected that every bit of a hash function's 32-bit result is
21 * as random as every other; failure to ensure this is likely to lead
22 * to poor performance of hash joins, for example. In most cases a hash
23 * function should use hash_any() or its variant hash_uint32().
24 *-------------------------------------------------------------------------
27 #include "postgres.h"
29 #include "access/hash.h"
30 #include "catalog/pg_collation.h"
31 #include "common/hashfn.h"
32 #include "utils/builtins.h"
33 #include "utils/float.h"
34 #include "utils/pg_locale.h"
37 * Datatype-specific hash functions.
39 * These support both hash indexes and hash joins.
41 * NOTE: some of these are also used by catcache operations, without
42 * any direct connection to hash indexes. Also, the common hash_any
43 * routine is also used by dynahash tables.
46 /* Note: this is used for both "char" and boolean datatypes */
47 Datum
48 hashchar(PG_FUNCTION_ARGS)
50 return hash_uint32((int32) PG_GETARG_CHAR(0));
53 Datum
54 hashcharextended(PG_FUNCTION_ARGS)
56 return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
59 Datum
60 hashint2(PG_FUNCTION_ARGS)
62 return hash_uint32((int32) PG_GETARG_INT16(0));
65 Datum
66 hashint2extended(PG_FUNCTION_ARGS)
68 return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
71 Datum
72 hashint4(PG_FUNCTION_ARGS)
74 return hash_uint32(PG_GETARG_INT32(0));
77 Datum
78 hashint4extended(PG_FUNCTION_ARGS)
80 return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
83 Datum
84 hashint8(PG_FUNCTION_ARGS)
87 * The idea here is to produce a hash value compatible with the values
88 * produced by hashint4 and hashint2 for logically equal inputs; this is
89 * necessary to support cross-type hash joins across these input types.
90 * Since all three types are signed, we can xor the high half of the int8
91 * value if the sign is positive, or the complement of the high half when
92 * the sign is negative.
94 int64 val = PG_GETARG_INT64(0);
95 uint32 lohalf = (uint32) val;
96 uint32 hihalf = (uint32) (val >> 32);
98 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
100 return hash_uint32(lohalf);
103 Datum
104 hashint8extended(PG_FUNCTION_ARGS)
106 /* Same approach as hashint8 */
107 int64 val = PG_GETARG_INT64(0);
108 uint32 lohalf = (uint32) val;
109 uint32 hihalf = (uint32) (val >> 32);
111 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
113 return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
116 Datum
117 hashoid(PG_FUNCTION_ARGS)
119 return hash_uint32((uint32) PG_GETARG_OID(0));
122 Datum
123 hashoidextended(PG_FUNCTION_ARGS)
125 return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
128 Datum
129 hashenum(PG_FUNCTION_ARGS)
131 return hash_uint32((uint32) PG_GETARG_OID(0));
134 Datum
135 hashenumextended(PG_FUNCTION_ARGS)
137 return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
140 Datum
141 hashfloat4(PG_FUNCTION_ARGS)
143 float4 key = PG_GETARG_FLOAT4(0);
144 float8 key8;
147 * On IEEE-float machines, minus zero and zero have different bit patterns
148 * but should compare as equal. We must ensure that they have the same
149 * hash value, which is most reliably done this way:
151 if (key == (float4) 0)
152 PG_RETURN_UINT32(0);
155 * To support cross-type hashing of float8 and float4, we want to return
156 * the same hash value hashfloat8 would produce for an equal float8 value.
157 * So, widen the value to float8 and hash that. (We must do this rather
158 * than have hashfloat8 try to narrow its value to float4; that could fail
159 * on overflow.)
161 key8 = key;
164 * Similarly, NaNs can have different bit patterns but they should all
165 * compare as equal. For backwards-compatibility reasons we force them to
166 * have the hash value of a standard float8 NaN. (You'd think we could
167 * replace key with a float4 NaN and then widen it; but on some old
168 * platforms, that way produces a different bit pattern.)
170 if (isnan(key8))
171 key8 = get_float8_nan();
173 return hash_any((unsigned char *) &key8, sizeof(key8));
176 Datum
177 hashfloat4extended(PG_FUNCTION_ARGS)
179 float4 key = PG_GETARG_FLOAT4(0);
180 uint64 seed = PG_GETARG_INT64(1);
181 float8 key8;
183 /* Same approach as hashfloat4 */
184 if (key == (float4) 0)
185 PG_RETURN_UINT64(seed);
186 key8 = key;
187 if (isnan(key8))
188 key8 = get_float8_nan();
190 return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
193 Datum
194 hashfloat8(PG_FUNCTION_ARGS)
196 float8 key = PG_GETARG_FLOAT8(0);
199 * On IEEE-float machines, minus zero and zero have different bit patterns
200 * but should compare as equal. We must ensure that they have the same
201 * hash value, which is most reliably done this way:
203 if (key == (float8) 0)
204 PG_RETURN_UINT32(0);
207 * Similarly, NaNs can have different bit patterns but they should all
208 * compare as equal. For backwards-compatibility reasons we force them to
209 * have the hash value of a standard NaN.
211 if (isnan(key))
212 key = get_float8_nan();
214 return hash_any((unsigned char *) &key, sizeof(key));
217 Datum
218 hashfloat8extended(PG_FUNCTION_ARGS)
220 float8 key = PG_GETARG_FLOAT8(0);
221 uint64 seed = PG_GETARG_INT64(1);
223 /* Same approach as hashfloat8 */
224 if (key == (float8) 0)
225 PG_RETURN_UINT64(seed);
226 if (isnan(key))
227 key = get_float8_nan();
229 return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
232 Datum
233 hashoidvector(PG_FUNCTION_ARGS)
235 oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
237 return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
240 Datum
241 hashoidvectorextended(PG_FUNCTION_ARGS)
243 oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
245 return hash_any_extended((unsigned char *) key->values,
246 key->dim1 * sizeof(Oid),
247 PG_GETARG_INT64(1));
250 Datum
251 hashname(PG_FUNCTION_ARGS)
253 char *key = NameStr(*PG_GETARG_NAME(0));
255 return hash_any((unsigned char *) key, strlen(key));
258 Datum
259 hashnameextended(PG_FUNCTION_ARGS)
261 char *key = NameStr(*PG_GETARG_NAME(0));
263 return hash_any_extended((unsigned char *) key, strlen(key),
264 PG_GETARG_INT64(1));
267 Datum
268 hashtext(PG_FUNCTION_ARGS)
270 text *key = PG_GETARG_TEXT_PP(0);
271 Oid collid = PG_GET_COLLATION();
272 pg_locale_t mylocale = 0;
273 Datum result;
275 if (!collid)
276 ereport(ERROR,
277 (errcode(ERRCODE_INDETERMINATE_COLLATION),
278 errmsg("could not determine which collation to use for string hashing"),
279 errhint("Use the COLLATE clause to set the collation explicitly.")));
281 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
282 mylocale = pg_newlocale_from_collation(collid);
284 if (!mylocale || mylocale->deterministic)
286 result = hash_any((unsigned char *) VARDATA_ANY(key),
287 VARSIZE_ANY_EXHDR(key));
289 else
291 #ifdef USE_ICU
292 if (mylocale->provider == COLLPROVIDER_ICU)
294 int32_t ulen = -1;
295 UChar *uchar = NULL;
296 Size bsize;
297 uint8_t *buf;
299 ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
301 bsize = ucol_getSortKey(mylocale->info.icu.ucol,
302 uchar, ulen, NULL, 0);
303 buf = palloc(bsize);
304 ucol_getSortKey(mylocale->info.icu.ucol,
305 uchar, ulen, buf, bsize);
307 result = hash_any(buf, bsize);
309 pfree(buf);
311 else
312 #endif
313 /* shouldn't happen */
314 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
317 /* Avoid leaking memory for toasted inputs */
318 PG_FREE_IF_COPY(key, 0);
320 return result;
323 Datum
324 hashtextextended(PG_FUNCTION_ARGS)
326 text *key = PG_GETARG_TEXT_PP(0);
327 Oid collid = PG_GET_COLLATION();
328 pg_locale_t mylocale = 0;
329 Datum result;
331 if (!collid)
332 ereport(ERROR,
333 (errcode(ERRCODE_INDETERMINATE_COLLATION),
334 errmsg("could not determine which collation to use for string hashing"),
335 errhint("Use the COLLATE clause to set the collation explicitly.")));
337 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
338 mylocale = pg_newlocale_from_collation(collid);
340 if (!mylocale || mylocale->deterministic)
342 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
343 VARSIZE_ANY_EXHDR(key),
344 PG_GETARG_INT64(1));
346 else
348 #ifdef USE_ICU
349 if (mylocale->provider == COLLPROVIDER_ICU)
351 int32_t ulen = -1;
352 UChar *uchar = NULL;
353 Size bsize;
354 uint8_t *buf;
356 ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
358 bsize = ucol_getSortKey(mylocale->info.icu.ucol,
359 uchar, ulen, NULL, 0);
360 buf = palloc(bsize);
361 ucol_getSortKey(mylocale->info.icu.ucol,
362 uchar, ulen, buf, bsize);
364 result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
366 pfree(buf);
368 else
369 #endif
370 /* shouldn't happen */
371 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
374 PG_FREE_IF_COPY(key, 0);
376 return result;
380 * hashvarlena() can be used for any varlena datatype in which there are
381 * no non-significant bits, ie, distinct bitpatterns never compare as equal.
383 Datum
384 hashvarlena(PG_FUNCTION_ARGS)
386 struct varlena *key = PG_GETARG_VARLENA_PP(0);
387 Datum result;
389 result = hash_any((unsigned char *) VARDATA_ANY(key),
390 VARSIZE_ANY_EXHDR(key));
392 /* Avoid leaking memory for toasted inputs */
393 PG_FREE_IF_COPY(key, 0);
395 return result;
398 Datum
399 hashvarlenaextended(PG_FUNCTION_ARGS)
401 struct varlena *key = PG_GETARG_VARLENA_PP(0);
402 Datum result;
404 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
405 VARSIZE_ANY_EXHDR(key),
406 PG_GETARG_INT64(1));
408 PG_FREE_IF_COPY(key, 0);
410 return result;