1 /*-------------------------------------------------------------------------
4 * executor utility routines for grouping, hashing, and aggregation
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/execGrouping.c
13 *-------------------------------------------------------------------------
17 #include "access/parallel.h"
18 #include "common/hashfn.h"
19 #include "executor/executor.h"
20 #include "miscadmin.h"
21 #include "utils/lsyscache.h"
22 #include "utils/memutils.h"
24 static int TupleHashTableMatch(struct tuplehash_hash
*tb
, const MinimalTuple tuple1
, const MinimalTuple tuple2
);
25 static inline uint32
TupleHashTableHash_internal(struct tuplehash_hash
*tb
,
26 const MinimalTuple tuple
);
27 static inline TupleHashEntry
LookupTupleHashEntry_internal(TupleHashTable hashtable
,
29 bool *isnew
, uint32 hash
);
32 * Define parameters for tuple hash table code generation. The interface is
33 * *also* declared in execnodes.h (to generate the types, which are externally
36 #define SH_PREFIX tuplehash
37 #define SH_ELEMENT_TYPE TupleHashEntryData
38 #define SH_KEY_TYPE MinimalTuple
39 #define SH_KEY firstTuple
40 #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
41 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
42 #define SH_SCOPE extern
44 #define SH_GET_HASH(tb, a) a->hash
46 #include "lib/simplehash.h"
49 /*****************************************************************************
50 * Utility routines for grouping tuples together
51 *****************************************************************************/
54 * execTuplesMatchPrepare
55 * Build expression that can be evaluated using ExecQual(), returning
56 * whether an ExprContext's inner/outer tuples are NOT DISTINCT
59 execTuplesMatchPrepare(TupleDesc desc
,
61 const AttrNumber
*keyColIdx
,
62 const Oid
*eqOperators
,
63 const Oid
*collations
,
66 Oid
*eqFunctions
= (Oid
*) palloc(numCols
* sizeof(Oid
));
73 /* lookup equality functions */
74 for (i
= 0; i
< numCols
; i
++)
75 eqFunctions
[i
] = get_opcode(eqOperators
[i
]);
77 /* build actual expression */
78 expr
= ExecBuildGroupingEqual(desc
, desc
, NULL
, NULL
,
79 numCols
, keyColIdx
, eqFunctions
, collations
,
86 * execTuplesHashPrepare
87 * Look up the equality and hashing functions needed for a TupleHashTable.
89 * This is similar to execTuplesMatchPrepare, but we also need to find the
90 * hash functions associated with the equality operators. *eqFunctions and
91 * *hashFunctions receive the palloc'd result arrays.
93 * Note: we expect that the given operators are not cross-type comparisons.
96 execTuplesHashPrepare(int numCols
,
97 const Oid
*eqOperators
,
99 FmgrInfo
**hashFunctions
)
103 *eqFuncOids
= (Oid
*) palloc(numCols
* sizeof(Oid
));
104 *hashFunctions
= (FmgrInfo
*) palloc(numCols
* sizeof(FmgrInfo
));
106 for (i
= 0; i
< numCols
; i
++)
108 Oid eq_opr
= eqOperators
[i
];
110 Oid left_hash_function
;
111 Oid right_hash_function
;
113 eq_function
= get_opcode(eq_opr
);
114 if (!get_op_hash_functions(eq_opr
,
115 &left_hash_function
, &right_hash_function
))
116 elog(ERROR
, "could not find hash function for hash operator %u",
118 /* We're not supporting cross-type cases here */
119 Assert(left_hash_function
== right_hash_function
);
120 (*eqFuncOids
)[i
] = eq_function
;
121 fmgr_info(right_hash_function
, &(*hashFunctions
)[i
]);
126 /*****************************************************************************
127 * Utility routines for all-in-memory hash tables
129 * These routines build hash tables for grouping tuples together (eg, for
130 * hash aggregation). There is one entry for each not-distinct set of tuples
132 *****************************************************************************/
135 * Construct an empty TupleHashTable
137 * numCols, keyColIdx: identify the tuple fields to use as lookup key
138 * eqfunctions: equality comparison functions to use
139 * hashfunctions: datatype-specific hashing functions to use
140 * nbuckets: initial estimate of hashtable size
141 * additionalsize: size of data stored in ->additional
142 * metacxt: memory context for long-lived allocation, but not per-entry data
143 * tablecxt: memory context in which to store table entries
144 * tempcxt: short-lived context for evaluation hash and comparison functions
146 * The function arrays may be made with execTuplesHashPrepare(). Note they
147 * are not cross-type functions, but expect to see the table datatype(s)
150 * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
151 * storage that will live as long as the hashtable does.
154 BuildTupleHashTableExt(PlanState
*parent
,
156 int numCols
, AttrNumber
*keyColIdx
,
157 const Oid
*eqfuncoids
,
158 FmgrInfo
*hashfunctions
,
160 long nbuckets
, Size additionalsize
,
161 MemoryContext metacxt
,
162 MemoryContext tablecxt
,
163 MemoryContext tempcxt
,
164 bool use_variable_hash_iv
)
166 TupleHashTable hashtable
;
167 Size entrysize
= sizeof(TupleHashEntryData
) + additionalsize
;
169 MemoryContext oldcontext
;
172 Assert(nbuckets
> 0);
174 /* Limit initial table size request to not more than hash_mem */
175 hash_mem_limit
= get_hash_memory_limit() / entrysize
;
176 if (nbuckets
> hash_mem_limit
)
177 nbuckets
= hash_mem_limit
;
179 oldcontext
= MemoryContextSwitchTo(metacxt
);
181 hashtable
= (TupleHashTable
) palloc(sizeof(TupleHashTableData
));
183 hashtable
->numCols
= numCols
;
184 hashtable
->keyColIdx
= keyColIdx
;
185 hashtable
->tab_hash_funcs
= hashfunctions
;
186 hashtable
->tab_collations
= collations
;
187 hashtable
->tablecxt
= tablecxt
;
188 hashtable
->tempcxt
= tempcxt
;
189 hashtable
->entrysize
= entrysize
;
190 hashtable
->tableslot
= NULL
; /* will be made on first lookup */
191 hashtable
->inputslot
= NULL
;
192 hashtable
->in_hash_funcs
= NULL
;
193 hashtable
->cur_eq_func
= NULL
;
196 * If parallelism is in use, even if the leader backend is performing the
197 * scan itself, we don't want to create the hashtable exactly the same way
198 * in all workers. As hashtables are iterated over in keyspace-order,
199 * doing so in all processes in the same way is likely to lead to
200 * "unbalanced" hashtables when the table size initially is
203 if (use_variable_hash_iv
)
204 hashtable
->hash_iv
= murmurhash32(ParallelWorkerNumber
);
206 hashtable
->hash_iv
= 0;
208 hashtable
->hashtab
= tuplehash_create(metacxt
, nbuckets
, hashtable
);
211 * We copy the input tuple descriptor just for safety --- we assume all
212 * input tuples will have equivalent descriptors.
214 hashtable
->tableslot
= MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc
),
215 &TTSOpsMinimalTuple
);
218 * If the old reset interface is used (i.e. BuildTupleHashTable, rather
219 * than BuildTupleHashTableExt), allowing JIT would lead to the generated
220 * functions to a) live longer than the query b) be re-generated each time
221 * the table is being reset. Therefore prevent JIT from being used in that
222 * case, by not providing a parent node (which prevents accessing the
223 * JitContext in the EState).
225 allow_jit
= metacxt
!= tablecxt
;
227 /* build comparator for all columns */
228 /* XXX: should we support non-minimal tuples for the inputslot? */
229 hashtable
->tab_eq_func
= ExecBuildGroupingEqual(inputDesc
, inputDesc
,
230 &TTSOpsMinimalTuple
, &TTSOpsMinimalTuple
,
232 keyColIdx
, eqfuncoids
, collations
,
233 allow_jit
? parent
: NULL
);
236 * While not pretty, it's ok to not shut down this context, but instead
237 * rely on the containing memory context being reset, as
238 * ExecBuildGroupingEqual() only builds a very simple expression calling
239 * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
241 hashtable
->exprcontext
= CreateStandaloneExprContext();
243 MemoryContextSwitchTo(oldcontext
);
249 * BuildTupleHashTable is a backwards-compatibilty wrapper for
250 * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
251 * tablecxt. Note that hashtables created this way cannot be reset leak-free
252 * with ResetTupleHashTable().
255 BuildTupleHashTable(PlanState
*parent
,
257 int numCols
, AttrNumber
*keyColIdx
,
258 const Oid
*eqfuncoids
,
259 FmgrInfo
*hashfunctions
,
261 long nbuckets
, Size additionalsize
,
262 MemoryContext tablecxt
,
263 MemoryContext tempcxt
,
264 bool use_variable_hash_iv
)
266 return BuildTupleHashTableExt(parent
,
272 nbuckets
, additionalsize
,
276 use_variable_hash_iv
);
280 * Reset contents of the hashtable to be empty, preserving all the non-content
281 * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
282 * also be reset, otherwise there will be leaks.
285 ResetTupleHashTable(TupleHashTable hashtable
)
287 tuplehash_reset(hashtable
->hashtab
);
291 * Find or create a hashtable entry for the tuple group containing the
292 * given tuple. The tuple must be the same type as the hashtable entries.
294 * If isnew is NULL, we do not create new entries; we return NULL if no
297 * If hash is not NULL, we set it to the calculated hash value. This allows
298 * callers access to the hash value even if no entry is returned.
300 * If isnew isn't NULL, then a new entry is created if no existing entry
301 * matches. On return, *isnew is true if the entry is newly created,
302 * false if it existed already. ->additional_data in the new entry has
306 LookupTupleHashEntry(TupleHashTable hashtable
, TupleTableSlot
*slot
,
307 bool *isnew
, uint32
*hash
)
309 TupleHashEntry entry
;
310 MemoryContext oldContext
;
313 /* Need to run the hash functions in short-lived context */
314 oldContext
= MemoryContextSwitchTo(hashtable
->tempcxt
);
316 /* set up data needed by hash and match functions */
317 hashtable
->inputslot
= slot
;
318 hashtable
->in_hash_funcs
= hashtable
->tab_hash_funcs
;
319 hashtable
->cur_eq_func
= hashtable
->tab_eq_func
;
321 local_hash
= TupleHashTableHash_internal(hashtable
->hashtab
, NULL
);
322 entry
= LookupTupleHashEntry_internal(hashtable
, slot
, isnew
, local_hash
);
327 Assert(entry
== NULL
|| entry
->hash
== local_hash
);
329 MemoryContextSwitchTo(oldContext
);
335 * Compute the hash value for a tuple
338 TupleHashTableHash(TupleHashTable hashtable
, TupleTableSlot
*slot
)
340 MemoryContext oldContext
;
343 hashtable
->inputslot
= slot
;
344 hashtable
->in_hash_funcs
= hashtable
->tab_hash_funcs
;
346 /* Need to run the hash functions in short-lived context */
347 oldContext
= MemoryContextSwitchTo(hashtable
->tempcxt
);
349 hash
= TupleHashTableHash_internal(hashtable
->hashtab
, NULL
);
351 MemoryContextSwitchTo(oldContext
);
357 * A variant of LookupTupleHashEntry for callers that have already computed
361 LookupTupleHashEntryHash(TupleHashTable hashtable
, TupleTableSlot
*slot
,
362 bool *isnew
, uint32 hash
)
364 TupleHashEntry entry
;
365 MemoryContext oldContext
;
367 /* Need to run the hash functions in short-lived context */
368 oldContext
= MemoryContextSwitchTo(hashtable
->tempcxt
);
370 /* set up data needed by hash and match functions */
371 hashtable
->inputslot
= slot
;
372 hashtable
->in_hash_funcs
= hashtable
->tab_hash_funcs
;
373 hashtable
->cur_eq_func
= hashtable
->tab_eq_func
;
375 entry
= LookupTupleHashEntry_internal(hashtable
, slot
, isnew
, hash
);
376 Assert(entry
== NULL
|| entry
->hash
== hash
);
378 MemoryContextSwitchTo(oldContext
);
384 * Search for a hashtable entry matching the given tuple. No entry is
385 * created if there's not a match. This is similar to the non-creating
386 * case of LookupTupleHashEntry, except that it supports cross-type
387 * comparisons, in which the given tuple is not of the same type as the
388 * table entries. The caller must provide the hash functions to use for
389 * the input tuple, as well as the equality functions, since these may be
390 * different from the table's internal functions.
393 FindTupleHashEntry(TupleHashTable hashtable
, TupleTableSlot
*slot
,
395 FmgrInfo
*hashfunctions
)
397 TupleHashEntry entry
;
398 MemoryContext oldContext
;
401 /* Need to run the hash functions in short-lived context */
402 oldContext
= MemoryContextSwitchTo(hashtable
->tempcxt
);
404 /* Set up data needed by hash and match functions */
405 hashtable
->inputslot
= slot
;
406 hashtable
->in_hash_funcs
= hashfunctions
;
407 hashtable
->cur_eq_func
= eqcomp
;
409 /* Search the hash table */
410 key
= NULL
; /* flag to reference inputslot */
411 entry
= tuplehash_lookup(hashtable
->hashtab
, key
);
412 MemoryContextSwitchTo(oldContext
);
418 * If tuple is NULL, use the input slot instead. This convention avoids the
419 * need to materialize virtual input tuples unless they actually need to get
420 * copied into the table.
422 * Also, the caller must select an appropriate memory context for running
423 * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
426 TupleHashTableHash_internal(struct tuplehash_hash
*tb
,
427 const MinimalTuple tuple
)
429 TupleHashTable hashtable
= (TupleHashTable
) tb
->private_data
;
430 int numCols
= hashtable
->numCols
;
431 AttrNumber
*keyColIdx
= hashtable
->keyColIdx
;
432 uint32 hashkey
= hashtable
->hash_iv
;
433 TupleTableSlot
*slot
;
434 FmgrInfo
*hashfunctions
;
439 /* Process the current input tuple for the table */
440 slot
= hashtable
->inputslot
;
441 hashfunctions
= hashtable
->in_hash_funcs
;
446 * Process a tuple already stored in the table.
448 * (this case never actually occurs due to the way simplehash.h is
449 * used, as the hash-value is stored in the entries)
451 slot
= hashtable
->tableslot
;
452 ExecStoreMinimalTuple(tuple
, slot
, false);
453 hashfunctions
= hashtable
->tab_hash_funcs
;
456 for (i
= 0; i
< numCols
; i
++)
458 AttrNumber att
= keyColIdx
[i
];
462 /* rotate hashkey left 1 bit at each step */
463 hashkey
= (hashkey
<< 1) | ((hashkey
& 0x80000000) ? 1 : 0);
465 attr
= slot_getattr(slot
, att
, &isNull
);
467 if (!isNull
) /* treat nulls as having hash key 0 */
471 hkey
= DatumGetUInt32(FunctionCall1Coll(&hashfunctions
[i
],
472 hashtable
->tab_collations
[i
],
479 * The way hashes are combined above, among each other and with the IV,
480 * doesn't lead to good bit perturbation. As the IV's goal is to lead to
481 * achieve that, perform a round of hashing of the combined hash -
482 * resulting in near perfect perturbation.
484 return murmurhash32(hashkey
);
488 * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
489 * so that we can avoid switching the memory context multiple times for
490 * LookupTupleHashEntry.
492 * NB: This function may or may not change the memory context. Caller is
493 * expected to change it back.
495 static inline TupleHashEntry
496 LookupTupleHashEntry_internal(TupleHashTable hashtable
, TupleTableSlot
*slot
,
497 bool *isnew
, uint32 hash
)
499 TupleHashEntryData
*entry
;
503 key
= NULL
; /* flag to reference inputslot */
507 entry
= tuplehash_insert_hash(hashtable
->hashtab
, key
, hash
, &found
);
511 /* found pre-existing entry */
516 /* created new entry */
518 /* zero caller data */
519 entry
->additional
= NULL
;
520 MemoryContextSwitchTo(hashtable
->tablecxt
);
521 /* Copy the first tuple into the table context */
522 entry
->firstTuple
= ExecCopySlotMinimalTuple(slot
);
527 entry
= tuplehash_lookup_hash(hashtable
->hashtab
, key
, hash
);
534 * See whether two tuples (presumably of the same hash value) match
537 TupleHashTableMatch(struct tuplehash_hash
*tb
, const MinimalTuple tuple1
, const MinimalTuple tuple2
)
539 TupleTableSlot
*slot1
;
540 TupleTableSlot
*slot2
;
541 TupleHashTable hashtable
= (TupleHashTable
) tb
->private_data
;
542 ExprContext
*econtext
= hashtable
->exprcontext
;
545 * We assume that simplehash.h will only ever call us with the first
546 * argument being an actual table entry, and the second argument being
547 * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
548 * could be supported too, but is not currently required.
550 Assert(tuple1
!= NULL
);
551 slot1
= hashtable
->tableslot
;
552 ExecStoreMinimalTuple(tuple1
, slot1
, false);
553 Assert(tuple2
== NULL
);
554 slot2
= hashtable
->inputslot
;
556 /* For crosstype comparisons, the inputslot must be first */
557 econtext
->ecxt_innertuple
= slot2
;
558 econtext
->ecxt_outertuple
= slot1
;
559 return !ExecQualAndReset(hashtable
->cur_eq_func
, econtext
);