Update copyright for 2022
[pgsql.git] / src / backend / executor / execGrouping.c
blobaf6e9c42d816e6a319f325cc667c1e6c68b47907
1 /*-------------------------------------------------------------------------
3 * execGrouping.c
4 * executor utility routines for grouping, hashing, and aggregation
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * src/backend/executor/execGrouping.c
13 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include "access/parallel.h"
18 #include "common/hashfn.h"
19 #include "executor/executor.h"
20 #include "miscadmin.h"
21 #include "utils/lsyscache.h"
22 #include "utils/memutils.h"
24 static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
25 static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
26 const MinimalTuple tuple);
27 static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
28 TupleTableSlot *slot,
29 bool *isnew, uint32 hash);
32 * Define parameters for tuple hash table code generation. The interface is
33 * *also* declared in execnodes.h (to generate the types, which are externally
34 * visible).
36 #define SH_PREFIX tuplehash
37 #define SH_ELEMENT_TYPE TupleHashEntryData
38 #define SH_KEY_TYPE MinimalTuple
39 #define SH_KEY firstTuple
40 #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
41 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
42 #define SH_SCOPE extern
43 #define SH_STORE_HASH
44 #define SH_GET_HASH(tb, a) a->hash
45 #define SH_DEFINE
46 #include "lib/simplehash.h"
49 /*****************************************************************************
50 * Utility routines for grouping tuples together
51 *****************************************************************************/
54 * execTuplesMatchPrepare
55 * Build expression that can be evaluated using ExecQual(), returning
56 * whether an ExprContext's inner/outer tuples are NOT DISTINCT
58 ExprState *
59 execTuplesMatchPrepare(TupleDesc desc,
60 int numCols,
61 const AttrNumber *keyColIdx,
62 const Oid *eqOperators,
63 const Oid *collations,
64 PlanState *parent)
66 Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
67 int i;
68 ExprState *expr;
70 if (numCols == 0)
71 return NULL;
73 /* lookup equality functions */
74 for (i = 0; i < numCols; i++)
75 eqFunctions[i] = get_opcode(eqOperators[i]);
77 /* build actual expression */
78 expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
79 numCols, keyColIdx, eqFunctions, collations,
80 parent);
82 return expr;
86 * execTuplesHashPrepare
87 * Look up the equality and hashing functions needed for a TupleHashTable.
89 * This is similar to execTuplesMatchPrepare, but we also need to find the
90 * hash functions associated with the equality operators. *eqFunctions and
91 * *hashFunctions receive the palloc'd result arrays.
93 * Note: we expect that the given operators are not cross-type comparisons.
95 void
96 execTuplesHashPrepare(int numCols,
97 const Oid *eqOperators,
98 Oid **eqFuncOids,
99 FmgrInfo **hashFunctions)
101 int i;
103 *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
104 *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
106 for (i = 0; i < numCols; i++)
108 Oid eq_opr = eqOperators[i];
109 Oid eq_function;
110 Oid left_hash_function;
111 Oid right_hash_function;
113 eq_function = get_opcode(eq_opr);
114 if (!get_op_hash_functions(eq_opr,
115 &left_hash_function, &right_hash_function))
116 elog(ERROR, "could not find hash function for hash operator %u",
117 eq_opr);
118 /* We're not supporting cross-type cases here */
119 Assert(left_hash_function == right_hash_function);
120 (*eqFuncOids)[i] = eq_function;
121 fmgr_info(right_hash_function, &(*hashFunctions)[i]);
126 /*****************************************************************************
127 * Utility routines for all-in-memory hash tables
129 * These routines build hash tables for grouping tuples together (eg, for
130 * hash aggregation). There is one entry for each not-distinct set of tuples
131 * presented.
132 *****************************************************************************/
135 * Construct an empty TupleHashTable
137 * numCols, keyColIdx: identify the tuple fields to use as lookup key
138 * eqfunctions: equality comparison functions to use
139 * hashfunctions: datatype-specific hashing functions to use
140 * nbuckets: initial estimate of hashtable size
141 * additionalsize: size of data stored in ->additional
142 * metacxt: memory context for long-lived allocation, but not per-entry data
143 * tablecxt: memory context in which to store table entries
144 * tempcxt: short-lived context for evaluation hash and comparison functions
146 * The function arrays may be made with execTuplesHashPrepare(). Note they
147 * are not cross-type functions, but expect to see the table datatype(s)
148 * on both sides.
150 * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
151 * storage that will live as long as the hashtable does.
153 TupleHashTable
154 BuildTupleHashTableExt(PlanState *parent,
155 TupleDesc inputDesc,
156 int numCols, AttrNumber *keyColIdx,
157 const Oid *eqfuncoids,
158 FmgrInfo *hashfunctions,
159 Oid *collations,
160 long nbuckets, Size additionalsize,
161 MemoryContext metacxt,
162 MemoryContext tablecxt,
163 MemoryContext tempcxt,
164 bool use_variable_hash_iv)
166 TupleHashTable hashtable;
167 Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
168 Size hash_mem_limit;
169 MemoryContext oldcontext;
170 bool allow_jit;
172 Assert(nbuckets > 0);
174 /* Limit initial table size request to not more than hash_mem */
175 hash_mem_limit = get_hash_memory_limit() / entrysize;
176 if (nbuckets > hash_mem_limit)
177 nbuckets = hash_mem_limit;
179 oldcontext = MemoryContextSwitchTo(metacxt);
181 hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
183 hashtable->numCols = numCols;
184 hashtable->keyColIdx = keyColIdx;
185 hashtable->tab_hash_funcs = hashfunctions;
186 hashtable->tab_collations = collations;
187 hashtable->tablecxt = tablecxt;
188 hashtable->tempcxt = tempcxt;
189 hashtable->entrysize = entrysize;
190 hashtable->tableslot = NULL; /* will be made on first lookup */
191 hashtable->inputslot = NULL;
192 hashtable->in_hash_funcs = NULL;
193 hashtable->cur_eq_func = NULL;
196 * If parallelism is in use, even if the leader backend is performing the
197 * scan itself, we don't want to create the hashtable exactly the same way
198 * in all workers. As hashtables are iterated over in keyspace-order,
199 * doing so in all processes in the same way is likely to lead to
200 * "unbalanced" hashtables when the table size initially is
201 * underestimated.
203 if (use_variable_hash_iv)
204 hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
205 else
206 hashtable->hash_iv = 0;
208 hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
211 * We copy the input tuple descriptor just for safety --- we assume all
212 * input tuples will have equivalent descriptors.
214 hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
215 &TTSOpsMinimalTuple);
218 * If the old reset interface is used (i.e. BuildTupleHashTable, rather
219 * than BuildTupleHashTableExt), allowing JIT would lead to the generated
220 * functions to a) live longer than the query b) be re-generated each time
221 * the table is being reset. Therefore prevent JIT from being used in that
222 * case, by not providing a parent node (which prevents accessing the
223 * JitContext in the EState).
225 allow_jit = metacxt != tablecxt;
227 /* build comparator for all columns */
228 /* XXX: should we support non-minimal tuples for the inputslot? */
229 hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
230 &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
231 numCols,
232 keyColIdx, eqfuncoids, collations,
233 allow_jit ? parent : NULL);
236 * While not pretty, it's ok to not shut down this context, but instead
237 * rely on the containing memory context being reset, as
238 * ExecBuildGroupingEqual() only builds a very simple expression calling
239 * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
241 hashtable->exprcontext = CreateStandaloneExprContext();
243 MemoryContextSwitchTo(oldcontext);
245 return hashtable;
249 * BuildTupleHashTable is a backwards-compatibilty wrapper for
250 * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
251 * tablecxt. Note that hashtables created this way cannot be reset leak-free
252 * with ResetTupleHashTable().
254 TupleHashTable
255 BuildTupleHashTable(PlanState *parent,
256 TupleDesc inputDesc,
257 int numCols, AttrNumber *keyColIdx,
258 const Oid *eqfuncoids,
259 FmgrInfo *hashfunctions,
260 Oid *collations,
261 long nbuckets, Size additionalsize,
262 MemoryContext tablecxt,
263 MemoryContext tempcxt,
264 bool use_variable_hash_iv)
266 return BuildTupleHashTableExt(parent,
267 inputDesc,
268 numCols, keyColIdx,
269 eqfuncoids,
270 hashfunctions,
271 collations,
272 nbuckets, additionalsize,
273 tablecxt,
274 tablecxt,
275 tempcxt,
276 use_variable_hash_iv);
280 * Reset contents of the hashtable to be empty, preserving all the non-content
281 * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
282 * also be reset, otherwise there will be leaks.
284 void
285 ResetTupleHashTable(TupleHashTable hashtable)
287 tuplehash_reset(hashtable->hashtab);
291 * Find or create a hashtable entry for the tuple group containing the
292 * given tuple. The tuple must be the same type as the hashtable entries.
294 * If isnew is NULL, we do not create new entries; we return NULL if no
295 * match is found.
297 * If hash is not NULL, we set it to the calculated hash value. This allows
298 * callers access to the hash value even if no entry is returned.
300 * If isnew isn't NULL, then a new entry is created if no existing entry
301 * matches. On return, *isnew is true if the entry is newly created,
302 * false if it existed already. ->additional_data in the new entry has
303 * been zeroed.
305 TupleHashEntry
306 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
307 bool *isnew, uint32 *hash)
309 TupleHashEntry entry;
310 MemoryContext oldContext;
311 uint32 local_hash;
313 /* Need to run the hash functions in short-lived context */
314 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
316 /* set up data needed by hash and match functions */
317 hashtable->inputslot = slot;
318 hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
319 hashtable->cur_eq_func = hashtable->tab_eq_func;
321 local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
322 entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
324 if (hash != NULL)
325 *hash = local_hash;
327 Assert(entry == NULL || entry->hash == local_hash);
329 MemoryContextSwitchTo(oldContext);
331 return entry;
335 * Compute the hash value for a tuple
337 uint32
338 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
340 MemoryContext oldContext;
341 uint32 hash;
343 hashtable->inputslot = slot;
344 hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
346 /* Need to run the hash functions in short-lived context */
347 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
349 hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
351 MemoryContextSwitchTo(oldContext);
353 return hash;
357 * A variant of LookupTupleHashEntry for callers that have already computed
358 * the hash value.
360 TupleHashEntry
361 LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
362 bool *isnew, uint32 hash)
364 TupleHashEntry entry;
365 MemoryContext oldContext;
367 /* Need to run the hash functions in short-lived context */
368 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
370 /* set up data needed by hash and match functions */
371 hashtable->inputslot = slot;
372 hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
373 hashtable->cur_eq_func = hashtable->tab_eq_func;
375 entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
376 Assert(entry == NULL || entry->hash == hash);
378 MemoryContextSwitchTo(oldContext);
380 return entry;
384 * Search for a hashtable entry matching the given tuple. No entry is
385 * created if there's not a match. This is similar to the non-creating
386 * case of LookupTupleHashEntry, except that it supports cross-type
387 * comparisons, in which the given tuple is not of the same type as the
388 * table entries. The caller must provide the hash functions to use for
389 * the input tuple, as well as the equality functions, since these may be
390 * different from the table's internal functions.
392 TupleHashEntry
393 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
394 ExprState *eqcomp,
395 FmgrInfo *hashfunctions)
397 TupleHashEntry entry;
398 MemoryContext oldContext;
399 MinimalTuple key;
401 /* Need to run the hash functions in short-lived context */
402 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
404 /* Set up data needed by hash and match functions */
405 hashtable->inputslot = slot;
406 hashtable->in_hash_funcs = hashfunctions;
407 hashtable->cur_eq_func = eqcomp;
409 /* Search the hash table */
410 key = NULL; /* flag to reference inputslot */
411 entry = tuplehash_lookup(hashtable->hashtab, key);
412 MemoryContextSwitchTo(oldContext);
414 return entry;
418 * If tuple is NULL, use the input slot instead. This convention avoids the
419 * need to materialize virtual input tuples unless they actually need to get
420 * copied into the table.
422 * Also, the caller must select an appropriate memory context for running
423 * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
425 static uint32
426 TupleHashTableHash_internal(struct tuplehash_hash *tb,
427 const MinimalTuple tuple)
429 TupleHashTable hashtable = (TupleHashTable) tb->private_data;
430 int numCols = hashtable->numCols;
431 AttrNumber *keyColIdx = hashtable->keyColIdx;
432 uint32 hashkey = hashtable->hash_iv;
433 TupleTableSlot *slot;
434 FmgrInfo *hashfunctions;
435 int i;
437 if (tuple == NULL)
439 /* Process the current input tuple for the table */
440 slot = hashtable->inputslot;
441 hashfunctions = hashtable->in_hash_funcs;
443 else
446 * Process a tuple already stored in the table.
448 * (this case never actually occurs due to the way simplehash.h is
449 * used, as the hash-value is stored in the entries)
451 slot = hashtable->tableslot;
452 ExecStoreMinimalTuple(tuple, slot, false);
453 hashfunctions = hashtable->tab_hash_funcs;
456 for (i = 0; i < numCols; i++)
458 AttrNumber att = keyColIdx[i];
459 Datum attr;
460 bool isNull;
462 /* rotate hashkey left 1 bit at each step */
463 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
465 attr = slot_getattr(slot, att, &isNull);
467 if (!isNull) /* treat nulls as having hash key 0 */
469 uint32 hkey;
471 hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
472 hashtable->tab_collations[i],
473 attr));
474 hashkey ^= hkey;
479 * The way hashes are combined above, among each other and with the IV,
480 * doesn't lead to good bit perturbation. As the IV's goal is to lead to
481 * achieve that, perform a round of hashing of the combined hash -
482 * resulting in near perfect perturbation.
484 return murmurhash32(hashkey);
488 * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
489 * so that we can avoid switching the memory context multiple times for
490 * LookupTupleHashEntry.
492 * NB: This function may or may not change the memory context. Caller is
493 * expected to change it back.
495 static inline TupleHashEntry
496 LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
497 bool *isnew, uint32 hash)
499 TupleHashEntryData *entry;
500 bool found;
501 MinimalTuple key;
503 key = NULL; /* flag to reference inputslot */
505 if (isnew)
507 entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
509 if (found)
511 /* found pre-existing entry */
512 *isnew = false;
514 else
516 /* created new entry */
517 *isnew = true;
518 /* zero caller data */
519 entry->additional = NULL;
520 MemoryContextSwitchTo(hashtable->tablecxt);
521 /* Copy the first tuple into the table context */
522 entry->firstTuple = ExecCopySlotMinimalTuple(slot);
525 else
527 entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
530 return entry;
534 * See whether two tuples (presumably of the same hash value) match
536 static int
537 TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
539 TupleTableSlot *slot1;
540 TupleTableSlot *slot2;
541 TupleHashTable hashtable = (TupleHashTable) tb->private_data;
542 ExprContext *econtext = hashtable->exprcontext;
545 * We assume that simplehash.h will only ever call us with the first
546 * argument being an actual table entry, and the second argument being
547 * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
548 * could be supported too, but is not currently required.
550 Assert(tuple1 != NULL);
551 slot1 = hashtable->tableslot;
552 ExecStoreMinimalTuple(tuple1, slot1, false);
553 Assert(tuple2 == NULL);
554 slot2 = hashtable->inputslot;
556 /* For crosstype comparisons, the inputslot must be first */
557 econtext->ecxt_innertuple = slot2;
558 econtext->ecxt_outertuple = slot1;
559 return !ExecQualAndReset(hashtable->cur_eq_func, econtext);