src/backend/executor/execGrouping.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * execGrouping.c
   4  *        executor utility routines for grouping, hashing, and aggregation
   5  *
   6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/executor/execGrouping.c
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include "access/parallel.h"
  18 #include "common/hashfn.h"
  19 #include "executor/executor.h"
  20 #include "miscadmin.h"
  21 #include "utils/lsyscache.h"
  22 #include "utils/memutils.h"
  23
  24 static int      TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
  25 static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
  26                                                                                                  const MinimalTuple tuple);
  27 static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
  28                                                                                                                    TupleTableSlot *slot,
  29                                                                                                                    bool *isnew, uint32 hash);
  30
  31 /*
  32  * Define parameters for tuple hash table code generation. The interface is
  33  * *also* declared in execnodes.h (to generate the types, which are externally
  34  * visible).
  35  */
  36 #define SH_PREFIX tuplehash
  37 #define SH_ELEMENT_TYPE TupleHashEntryData
  38 #define SH_KEY_TYPE MinimalTuple
  39 #define SH_KEY firstTuple
  40 #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
  41 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
  42 #define SH_SCOPE extern
  43 #define SH_STORE_HASH
  44 #define SH_GET_HASH(tb, a) a->hash
  45 #define SH_DEFINE
  46 #include "lib/simplehash.h"
  47
  48
  49 /*****************************************************************************
  50  *              Utility routines for grouping tuples together
  51  *****************************************************************************/
  52
  53 /*
  54  * execTuplesMatchPrepare
  55  *              Build expression that can be evaluated using ExecQual(), returning
  56  *              whether an ExprContext's inner/outer tuples are NOT DISTINCT
  57  */
  58 ExprState *
  59 execTuplesMatchPrepare(TupleDesc desc,
  60                                            int numCols,
  61                                            const AttrNumber *keyColIdx,
  62                                            const Oid *eqOperators,
  63                                            const Oid *collations,
  64                                            PlanState *parent)
  65 {
  66         Oid                *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
  67         int                     i;
  68         ExprState  *expr;
  69
  70         if (numCols == 0)
  71                 return NULL;
  72
  73         /* lookup equality functions */
  74         for (i = 0; i < numCols; i++)
  75                 eqFunctions[i] = get_opcode(eqOperators[i]);
  76
  77         /* build actual expression */
  78         expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
  79                                                                   numCols, keyColIdx, eqFunctions, collations,
  80                                                                   parent);
  81
  82         return expr;
  83 }
  84
  85 /*
  86  * execTuplesHashPrepare
  87  *              Look up the equality and hashing functions needed for a TupleHashTable.
  88  *
  89  * This is similar to execTuplesMatchPrepare, but we also need to find the
  90  * hash functions associated with the equality operators.  *eqFunctions and
  91  * *hashFunctions receive the palloc'd result arrays.
  92  *
  93  * Note: we expect that the given operators are not cross-type comparisons.
  94  */
  95 void
  96 execTuplesHashPrepare(int numCols,
  97                                           const Oid *eqOperators,
  98                                           Oid **eqFuncOids,
  99                                           FmgrInfo **hashFunctions)
 100 {
 101         int                     i;
 102
 103         *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
 104         *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
 105
 106         for (i = 0; i < numCols; i++)
 107         {
 108                 Oid                     eq_opr = eqOperators[i];
 109                 Oid                     eq_function;
 110                 Oid                     left_hash_function;
 111                 Oid                     right_hash_function;
 112
 113                 eq_function = get_opcode(eq_opr);
 114                 if (!get_op_hash_functions(eq_opr,
 115                                                                    &left_hash_function, &right_hash_function))
 116                         elog(ERROR, "could not find hash function for hash operator %u",
 117                                  eq_opr);
 118                 /* We're not supporting cross-type cases here */
 119                 Assert(left_hash_function == right_hash_function);
 120                 (*eqFuncOids)[i] = eq_function;
 121                 fmgr_info(right_hash_function, &(*hashFunctions)[i]);
 122         }
 123 }
 124
 125
 126 /*****************************************************************************
 127  *              Utility routines for all-in-memory hash tables
 128  *
 129  * These routines build hash tables for grouping tuples together (eg, for
 130  * hash aggregation).  There is one entry for each not-distinct set of tuples
 131  * presented.
 132  *****************************************************************************/
 133
 134 /*
 135  * Construct an empty TupleHashTable
 136  *
 137  *      numCols, keyColIdx: identify the tuple fields to use as lookup key
 138  *      eqfunctions: equality comparison functions to use
 139  *      hashfunctions: datatype-specific hashing functions to use
 140  *      nbuckets: initial estimate of hashtable size
 141  *      additionalsize: size of data stored in ->additional
 142  *      metacxt: memory context for long-lived allocation, but not per-entry data
 143  *      tablecxt: memory context in which to store table entries
 144  *      tempcxt: short-lived context for evaluation hash and comparison functions
 145  *
 146  * The function arrays may be made with execTuplesHashPrepare().  Note they
 147  * are not cross-type functions, but expect to see the table datatype(s)
 148  * on both sides.
 149  *
 150  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
 151  * storage that will live as long as the hashtable does.
 152  */
 153 TupleHashTable
 154 BuildTupleHashTableExt(PlanState *parent,
 155                                            TupleDesc inputDesc,
 156                                            int numCols, AttrNumber *keyColIdx,
 157                                            const Oid *eqfuncoids,
 158                                            FmgrInfo *hashfunctions,
 159                                            Oid *collations,
 160                                            long nbuckets, Size additionalsize,
 161                                            MemoryContext metacxt,
 162                                            MemoryContext tablecxt,
 163                                            MemoryContext tempcxt,
 164                                            bool use_variable_hash_iv)
 165 {
 166         TupleHashTable hashtable;
 167         Size            entrysize = sizeof(TupleHashEntryData) + additionalsize;
 168         Size            hash_mem_limit;
 169         MemoryContext oldcontext;
 170         bool            allow_jit;
 171
 172         Assert(nbuckets > 0);
 173
 174         /* Limit initial table size request to not more than hash_mem */
 175         hash_mem_limit = get_hash_memory_limit() / entrysize;
 176         if (nbuckets > hash_mem_limit)
 177                 nbuckets = hash_mem_limit;
 178
 179         oldcontext = MemoryContextSwitchTo(metacxt);
 180
 181         hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
 182
 183         hashtable->numCols = numCols;
 184         hashtable->keyColIdx = keyColIdx;
 185         hashtable->tab_hash_funcs = hashfunctions;
 186         hashtable->tab_collations = collations;
 187         hashtable->tablecxt = tablecxt;
 188         hashtable->tempcxt = tempcxt;
 189         hashtable->entrysize = entrysize;
 190         hashtable->tableslot = NULL;    /* will be made on first lookup */
 191         hashtable->inputslot = NULL;
 192         hashtable->in_hash_funcs = NULL;
 193         hashtable->cur_eq_func = NULL;
 194
 195         /*
 196          * If parallelism is in use, even if the leader backend is performing the
 197          * scan itself, we don't want to create the hashtable exactly the same way
 198          * in all workers. As hashtables are iterated over in keyspace-order,
 199          * doing so in all processes in the same way is likely to lead to
 200          * "unbalanced" hashtables when the table size initially is
 201          * underestimated.
 202          */
 203         if (use_variable_hash_iv)
 204                 hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
 205         else
 206                 hashtable->hash_iv = 0;
 207
 208         hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
 209
 210         /*
 211          * We copy the input tuple descriptor just for safety --- we assume all
 212          * input tuples will have equivalent descriptors.
 213          */
 214         hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
 215                                                                                                         &TTSOpsMinimalTuple);
 216
 217         /*
 218          * If the old reset interface is used (i.e. BuildTupleHashTable, rather
 219          * than BuildTupleHashTableExt), allowing JIT would lead to the generated
 220          * functions to a) live longer than the query b) be re-generated each time
 221          * the table is being reset. Therefore prevent JIT from being used in that
 222          * case, by not providing a parent node (which prevents accessing the
 223          * JitContext in the EState).
 224          */
 225         allow_jit = metacxt != tablecxt;
 226
 227         /* build comparator for all columns */
 228         /* XXX: should we support non-minimal tuples for the inputslot? */
 229         hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
 230                                                                                                         &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
 231                                                                                                         numCols,
 232                                                                                                         keyColIdx, eqfuncoids, collations,
 233                                                                                                         allow_jit ? parent : NULL);
 234
 235         /*
 236          * While not pretty, it's ok to not shut down this context, but instead
 237          * rely on the containing memory context being reset, as
 238          * ExecBuildGroupingEqual() only builds a very simple expression calling
 239          * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
 240          */
 241         hashtable->exprcontext = CreateStandaloneExprContext();
 242
 243         MemoryContextSwitchTo(oldcontext);
 244
 245         return hashtable;
 246 }
 247
 248 /*
 249  * BuildTupleHashTable is a backwards-compatibilty wrapper for
 250  * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
 251  * tablecxt. Note that hashtables created this way cannot be reset leak-free
 252  * with ResetTupleHashTable().
 253  */
 254 TupleHashTable
 255 BuildTupleHashTable(PlanState *parent,
 256                                         TupleDesc inputDesc,
 257                                         int numCols, AttrNumber *keyColIdx,
 258                                         const Oid *eqfuncoids,
 259                                         FmgrInfo *hashfunctions,
 260                                         Oid *collations,
 261                                         long nbuckets, Size additionalsize,
 262                                         MemoryContext tablecxt,
 263                                         MemoryContext tempcxt,
 264                                         bool use_variable_hash_iv)
 265 {
 266         return BuildTupleHashTableExt(parent,
 267                                                                   inputDesc,
 268                                                                   numCols, keyColIdx,
 269                                                                   eqfuncoids,
 270                                                                   hashfunctions,
 271                                                                   collations,
 272                                                                   nbuckets, additionalsize,
 273                                                                   tablecxt,
 274                                                                   tablecxt,
 275                                                                   tempcxt,
 276                                                                   use_variable_hash_iv);
 277 }
 278
 279 /*
 280  * Reset contents of the hashtable to be empty, preserving all the non-content
 281  * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
 282  * also be reset, otherwise there will be leaks.
 283  */
 284 void
 285 ResetTupleHashTable(TupleHashTable hashtable)
 286 {
 287         tuplehash_reset(hashtable->hashtab);
 288 }
 289
 290 /*
 291  * Find or create a hashtable entry for the tuple group containing the
 292  * given tuple.  The tuple must be the same type as the hashtable entries.
 293  *
 294  * If isnew is NULL, we do not create new entries; we return NULL if no
 295  * match is found.
 296  *
 297  * If hash is not NULL, we set it to the calculated hash value. This allows
 298  * callers access to the hash value even if no entry is returned.
 299  *
 300  * If isnew isn't NULL, then a new entry is created if no existing entry
 301  * matches.  On return, *isnew is true if the entry is newly created,
 302  * false if it existed already.  ->additional_data in the new entry has
 303  * been zeroed.
 304  */
 305 TupleHashEntry
 306 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 307                                          bool *isnew, uint32 *hash)
 308 {
 309         TupleHashEntry entry;
 310         MemoryContext oldContext;
 311         uint32          local_hash;
 312
 313         /* Need to run the hash functions in short-lived context */
 314         oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
 315
 316         /* set up data needed by hash and match functions */
 317         hashtable->inputslot = slot;
 318         hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
 319         hashtable->cur_eq_func = hashtable->tab_eq_func;
 320
 321         local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
 322         entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
 323
 324         if (hash != NULL)
 325                 *hash = local_hash;
 326
 327         Assert(entry == NULL || entry->hash == local_hash);
 328
 329         MemoryContextSwitchTo(oldContext);
 330
 331         return entry;
 332 }
 333
 334 /*
 335  * Compute the hash value for a tuple
 336  */
 337 uint32
 338 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
 339 {
 340         MemoryContext oldContext;
 341         uint32          hash;
 342
 343         hashtable->inputslot = slot;
 344         hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
 345
 346         /* Need to run the hash functions in short-lived context */
 347         oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
 348
 349         hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
 350
 351         MemoryContextSwitchTo(oldContext);
 352
 353         return hash;
 354 }
 355
 356 /*
 357  * A variant of LookupTupleHashEntry for callers that have already computed
 358  * the hash value.
 359  */
 360 TupleHashEntry
 361 LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
 362                                                  bool *isnew, uint32 hash)
 363 {
 364         TupleHashEntry entry;
 365         MemoryContext oldContext;
 366
 367         /* Need to run the hash functions in short-lived context */
 368         oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
 369
 370         /* set up data needed by hash and match functions */
 371         hashtable->inputslot = slot;
 372         hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
 373         hashtable->cur_eq_func = hashtable->tab_eq_func;
 374
 375         entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
 376         Assert(entry == NULL || entry->hash == hash);
 377
 378         MemoryContextSwitchTo(oldContext);
 379
 380         return entry;
 381 }
 382
 383 /*
 384  * Search for a hashtable entry matching the given tuple.  No entry is
 385  * created if there's not a match.  This is similar to the non-creating
 386  * case of LookupTupleHashEntry, except that it supports cross-type
 387  * comparisons, in which the given tuple is not of the same type as the
 388  * table entries.  The caller must provide the hash functions to use for
 389  * the input tuple, as well as the equality functions, since these may be
 390  * different from the table's internal functions.
 391  */
 392 TupleHashEntry
 393 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 394                                    ExprState *eqcomp,
 395                                    FmgrInfo *hashfunctions)
 396 {
 397         TupleHashEntry entry;
 398         MemoryContext oldContext;
 399         MinimalTuple key;
 400
 401         /* Need to run the hash functions in short-lived context */
 402         oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
 403
 404         /* Set up data needed by hash and match functions */
 405         hashtable->inputslot = slot;
 406         hashtable->in_hash_funcs = hashfunctions;
 407         hashtable->cur_eq_func = eqcomp;
 408
 409         /* Search the hash table */
 410         key = NULL;                                     /* flag to reference inputslot */
 411         entry = tuplehash_lookup(hashtable->hashtab, key);
 412         MemoryContextSwitchTo(oldContext);
 413
 414         return entry;
 415 }
 416
 417 /*
 418  * If tuple is NULL, use the input slot instead. This convention avoids the
 419  * need to materialize virtual input tuples unless they actually need to get
 420  * copied into the table.
 421  *
 422  * Also, the caller must select an appropriate memory context for running
 423  * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
 424  */
 425 static uint32
 426 TupleHashTableHash_internal(struct tuplehash_hash *tb,
 427                                                         const MinimalTuple tuple)
 428 {
 429         TupleHashTable hashtable = (TupleHashTable) tb->private_data;
 430         int                     numCols = hashtable->numCols;
 431         AttrNumber *keyColIdx = hashtable->keyColIdx;
 432         uint32          hashkey = hashtable->hash_iv;
 433         TupleTableSlot *slot;
 434         FmgrInfo   *hashfunctions;
 435         int                     i;
 436
 437         if (tuple == NULL)
 438         {
 439                 /* Process the current input tuple for the table */
 440                 slot = hashtable->inputslot;
 441                 hashfunctions = hashtable->in_hash_funcs;
 442         }
 443         else
 444         {
 445                 /*
 446                  * Process a tuple already stored in the table.
 447                  *
 448                  * (this case never actually occurs due to the way simplehash.h is
 449                  * used, as the hash-value is stored in the entries)
 450                  */
 451                 slot = hashtable->tableslot;
 452                 ExecStoreMinimalTuple(tuple, slot, false);
 453                 hashfunctions = hashtable->tab_hash_funcs;
 454         }
 455
 456         for (i = 0; i < numCols; i++)
 457         {
 458                 AttrNumber      att = keyColIdx[i];
 459                 Datum           attr;
 460                 bool            isNull;
 461
 462                 /* rotate hashkey left 1 bit at each step */
 463                 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
 464
 465                 attr = slot_getattr(slot, att, &isNull);
 466
 467                 if (!isNull)                    /* treat nulls as having hash key 0 */
 468                 {
 469                         uint32          hkey;
 470
 471                         hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
 472                                                                                                         hashtable->tab_collations[i],
 473                                                                                                         attr));
 474                         hashkey ^= hkey;
 475                 }
 476         }
 477
 478         /*
 479          * The way hashes are combined above, among each other and with the IV,
 480          * doesn't lead to good bit perturbation. As the IV's goal is to lead to
 481          * achieve that, perform a round of hashing of the combined hash -
 482          * resulting in near perfect perturbation.
 483          */
 484         return murmurhash32(hashkey);
 485 }
 486
 487 /*
 488  * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
 489  * so that we can avoid switching the memory context multiple times for
 490  * LookupTupleHashEntry.
 491  *
 492  * NB: This function may or may not change the memory context. Caller is
 493  * expected to change it back.
 494  */
 495 static inline TupleHashEntry
 496 LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
 497                                                           bool *isnew, uint32 hash)
 498 {
 499         TupleHashEntryData *entry;
 500         bool            found;
 501         MinimalTuple key;
 502
 503         key = NULL;                                     /* flag to reference inputslot */
 504
 505         if (isnew)
 506         {
 507                 entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
 508
 509                 if (found)
 510                 {
 511                         /* found pre-existing entry */
 512                         *isnew = false;
 513                 }
 514                 else
 515                 {
 516                         /* created new entry */
 517                         *isnew = true;
 518                         /* zero caller data */
 519                         entry->additional = NULL;
 520                         MemoryContextSwitchTo(hashtable->tablecxt);
 521                         /* Copy the first tuple into the table context */
 522                         entry->firstTuple = ExecCopySlotMinimalTuple(slot);
 523                 }
 524         }
 525         else
 526         {
 527                 entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
 528         }
 529
 530         return entry;
 531 }
 532
 533 /*
 534  * See whether two tuples (presumably of the same hash value) match
 535  */
 536 static int
 537 TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
 538 {
 539         TupleTableSlot *slot1;
 540         TupleTableSlot *slot2;
 541         TupleHashTable hashtable = (TupleHashTable) tb->private_data;
 542         ExprContext *econtext = hashtable->exprcontext;
 543
 544         /*
 545          * We assume that simplehash.h will only ever call us with the first
 546          * argument being an actual table entry, and the second argument being
 547          * LookupTupleHashEntry's dummy TupleHashEntryData.  The other direction
 548          * could be supported too, but is not currently required.
 549          */
 550         Assert(tuple1 != NULL);
 551         slot1 = hashtable->tableslot;
 552         ExecStoreMinimalTuple(tuple1, slot1, false);
 553         Assert(tuple2 == NULL);
 554         slot2 = hashtable->inputslot;
 555
 556         /* For crosstype comparisons, the inputslot must be first */
 557         econtext->ecxt_innertuple = slot2;
 558         econtext->ecxt_outertuple = slot1;
 559         return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
 560 }