Update copyright for 2022
[pgsql.git] / src / include / nodes / execnodes.h
blob8429a9c55dfc58018ef427e47fe1361b14d83cfa
1 /*-------------------------------------------------------------------------
3 * execnodes.h
4 * definitions for executor state nodes
7 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/include/nodes/execnodes.h
12 *-------------------------------------------------------------------------
14 #ifndef EXECNODES_H
15 #define EXECNODES_H
17 #include "access/tupconvert.h"
18 #include "executor/instrument.h"
19 #include "fmgr.h"
20 #include "lib/ilist.h"
21 #include "lib/pairingheap.h"
22 #include "nodes/params.h"
23 #include "nodes/plannodes.h"
24 #include "nodes/tidbitmap.h"
25 #include "partitioning/partdefs.h"
26 #include "storage/condition_variable.h"
27 #include "utils/hsearch.h"
28 #include "utils/queryenvironment.h"
29 #include "utils/reltrigger.h"
30 #include "utils/sharedtuplestore.h"
31 #include "utils/snapshot.h"
32 #include "utils/sortsupport.h"
33 #include "utils/tuplesort.h"
34 #include "utils/tuplestore.h"
36 struct PlanState; /* forward references in this file */
37 struct ParallelHashJoinState;
38 struct ExecRowMark;
39 struct ExprState;
40 struct ExprContext;
41 struct RangeTblEntry; /* avoid including parsenodes.h here */
42 struct ExprEvalStep; /* avoid including execExpr.h everywhere */
43 struct CopyMultiInsertBuffer;
44 struct LogicalTapeSet;
47 /* ----------------
48 * ExprState node
50 * ExprState is the top-level node for expression evaluation.
51 * It contains instructions (in ->steps) to evaluate the expression.
52 * ----------------
54 typedef Datum (*ExprStateEvalFunc) (struct ExprState *expression,
55 struct ExprContext *econtext,
56 bool *isNull);
58 /* Bits in ExprState->flags (see also execExpr.h for private flag bits): */
59 /* expression is for use with ExecQual() */
60 #define EEO_FLAG_IS_QUAL (1 << 0)
62 typedef struct ExprState
64 NodeTag type;
66 uint8 flags; /* bitmask of EEO_FLAG_* bits, see above */
69 * Storage for result value of a scalar expression, or for individual
70 * column results within expressions built by ExecBuildProjectionInfo().
72 #define FIELDNO_EXPRSTATE_RESNULL 2
73 bool resnull;
74 #define FIELDNO_EXPRSTATE_RESVALUE 3
75 Datum resvalue;
78 * If projecting a tuple result, this slot holds the result; else NULL.
80 #define FIELDNO_EXPRSTATE_RESULTSLOT 4
81 TupleTableSlot *resultslot;
84 * Instructions to compute expression's return value.
86 struct ExprEvalStep *steps;
89 * Function that actually evaluates the expression. This can be set to
90 * different values depending on the complexity of the expression.
92 ExprStateEvalFunc evalfunc;
94 /* original expression tree, for debugging only */
95 Expr *expr;
97 /* private state for an evalfunc */
98 void *evalfunc_private;
101 * XXX: following fields only needed during "compilation" (ExecInitExpr);
102 * could be thrown away afterwards.
105 int steps_len; /* number of steps currently */
106 int steps_alloc; /* allocated length of steps array */
108 #define FIELDNO_EXPRSTATE_PARENT 11
109 struct PlanState *parent; /* parent PlanState node, if any */
110 ParamListInfo ext_params; /* for compiling PARAM_EXTERN nodes */
112 Datum *innermost_caseval;
113 bool *innermost_casenull;
115 Datum *innermost_domainval;
116 bool *innermost_domainnull;
117 } ExprState;
120 /* ----------------
121 * IndexInfo information
123 * this struct holds the information needed to construct new index
124 * entries for a particular index. Used for both index_build and
125 * retail creation of index entries.
127 * NumIndexAttrs total number of columns in this index
128 * NumIndexKeyAttrs number of key columns in index
129 * IndexAttrNumbers underlying-rel attribute numbers used as keys
130 * (zeroes indicate expressions). It also contains
131 * info about included columns.
132 * Expressions expr trees for expression entries, or NIL if none
133 * ExpressionsState exec state for expressions, or NIL if none
134 * Predicate partial-index predicate, or NIL if none
135 * PredicateState exec state for predicate, or NIL if none
136 * ExclusionOps Per-column exclusion operators, or NULL if none
137 * ExclusionProcs Underlying function OIDs for ExclusionOps
138 * ExclusionStrats Opclass strategy numbers for ExclusionOps
139 * UniqueOps These are like Exclusion*, but for unique indexes
140 * UniqueProcs
141 * UniqueStrats
142 * Unique is it a unique index?
143 * OpclassOptions opclass-specific options, or NULL if none
144 * ReadyForInserts is it valid for inserts?
145 * Concurrent are we doing a concurrent index build?
146 * BrokenHotChain did we detect any broken HOT chains?
147 * ParallelWorkers # of workers requested (excludes leader)
148 * Am Oid of index AM
149 * AmCache private cache area for index AM
150 * Context memory context holding this IndexInfo
152 * ii_Concurrent, ii_BrokenHotChain, and ii_ParallelWorkers are used only
153 * during index build; they're conventionally zeroed otherwise.
154 * ----------------
156 typedef struct IndexInfo
158 NodeTag type;
159 int ii_NumIndexAttrs; /* total number of columns in index */
160 int ii_NumIndexKeyAttrs; /* number of key columns in index */
161 AttrNumber ii_IndexAttrNumbers[INDEX_MAX_KEYS];
162 List *ii_Expressions; /* list of Expr */
163 List *ii_ExpressionsState; /* list of ExprState */
164 List *ii_Predicate; /* list of Expr */
165 ExprState *ii_PredicateState;
166 Oid *ii_ExclusionOps; /* array with one entry per column */
167 Oid *ii_ExclusionProcs; /* array with one entry per column */
168 uint16 *ii_ExclusionStrats; /* array with one entry per column */
169 Oid *ii_UniqueOps; /* array with one entry per column */
170 Oid *ii_UniqueProcs; /* array with one entry per column */
171 uint16 *ii_UniqueStrats; /* array with one entry per column */
172 Datum *ii_OpclassOptions; /* array with one entry per column */
173 bool ii_Unique;
174 bool ii_ReadyForInserts;
175 bool ii_Concurrent;
176 bool ii_BrokenHotChain;
177 int ii_ParallelWorkers;
178 Oid ii_Am;
179 void *ii_AmCache;
180 MemoryContext ii_Context;
181 } IndexInfo;
183 /* ----------------
184 * ExprContext_CB
186 * List of callbacks to be called at ExprContext shutdown.
187 * ----------------
189 typedef void (*ExprContextCallbackFunction) (Datum arg);
191 typedef struct ExprContext_CB
193 struct ExprContext_CB *next;
194 ExprContextCallbackFunction function;
195 Datum arg;
196 } ExprContext_CB;
198 /* ----------------
199 * ExprContext
201 * This class holds the "current context" information
202 * needed to evaluate expressions for doing tuple qualifications
203 * and tuple projections. For example, if an expression refers
204 * to an attribute in the current inner tuple then we need to know
205 * what the current inner tuple is and so we look at the expression
206 * context.
208 * There are two memory contexts associated with an ExprContext:
209 * * ecxt_per_query_memory is a query-lifespan context, typically the same
210 * context the ExprContext node itself is allocated in. This context
211 * can be used for purposes such as storing function call cache info.
212 * * ecxt_per_tuple_memory is a short-term context for expression results.
213 * As the name suggests, it will typically be reset once per tuple,
214 * before we begin to evaluate expressions for that tuple. Each
215 * ExprContext normally has its very own per-tuple memory context.
217 * CurrentMemoryContext should be set to ecxt_per_tuple_memory before
218 * calling ExecEvalExpr() --- see ExecEvalExprSwitchContext().
219 * ----------------
221 typedef struct ExprContext
223 NodeTag type;
225 /* Tuples that Var nodes in expression may refer to */
226 #define FIELDNO_EXPRCONTEXT_SCANTUPLE 1
227 TupleTableSlot *ecxt_scantuple;
228 #define FIELDNO_EXPRCONTEXT_INNERTUPLE 2
229 TupleTableSlot *ecxt_innertuple;
230 #define FIELDNO_EXPRCONTEXT_OUTERTUPLE 3
231 TupleTableSlot *ecxt_outertuple;
233 /* Memory contexts for expression evaluation --- see notes above */
234 MemoryContext ecxt_per_query_memory;
235 MemoryContext ecxt_per_tuple_memory;
237 /* Values to substitute for Param nodes in expression */
238 ParamExecData *ecxt_param_exec_vals; /* for PARAM_EXEC params */
239 ParamListInfo ecxt_param_list_info; /* for other param types */
242 * Values to substitute for Aggref nodes in the expressions of an Agg
243 * node, or for WindowFunc nodes within a WindowAgg node.
245 #define FIELDNO_EXPRCONTEXT_AGGVALUES 8
246 Datum *ecxt_aggvalues; /* precomputed values for aggs/windowfuncs */
247 #define FIELDNO_EXPRCONTEXT_AGGNULLS 9
248 bool *ecxt_aggnulls; /* null flags for aggs/windowfuncs */
250 /* Value to substitute for CaseTestExpr nodes in expression */
251 #define FIELDNO_EXPRCONTEXT_CASEDATUM 10
252 Datum caseValue_datum;
253 #define FIELDNO_EXPRCONTEXT_CASENULL 11
254 bool caseValue_isNull;
256 /* Value to substitute for CoerceToDomainValue nodes in expression */
257 #define FIELDNO_EXPRCONTEXT_DOMAINDATUM 12
258 Datum domainValue_datum;
259 #define FIELDNO_EXPRCONTEXT_DOMAINNULL 13
260 bool domainValue_isNull;
262 /* Link to containing EState (NULL if a standalone ExprContext) */
263 struct EState *ecxt_estate;
265 /* Functions to call back when ExprContext is shut down or rescanned */
266 ExprContext_CB *ecxt_callbacks;
267 } ExprContext;
270 * Set-result status used when evaluating functions potentially returning a
271 * set.
273 typedef enum
275 ExprSingleResult, /* expression does not return a set */
276 ExprMultipleResult, /* this result is an element of a set */
277 ExprEndResult /* there are no more elements in the set */
278 } ExprDoneCond;
281 * Return modes for functions returning sets. Note values must be chosen
282 * as separate bits so that a bitmask can be formed to indicate supported
283 * modes. SFRM_Materialize_Random and SFRM_Materialize_Preferred are
284 * auxiliary flags about SFRM_Materialize mode, rather than separate modes.
286 typedef enum
288 SFRM_ValuePerCall = 0x01, /* one value returned per call */
289 SFRM_Materialize = 0x02, /* result set instantiated in Tuplestore */
290 SFRM_Materialize_Random = 0x04, /* Tuplestore needs randomAccess */
291 SFRM_Materialize_Preferred = 0x08 /* caller prefers Tuplestore */
292 } SetFunctionReturnMode;
295 * When calling a function that might return a set (multiple rows),
296 * a node of this type is passed as fcinfo->resultinfo to allow
297 * return status to be passed back. A function returning set should
298 * raise an error if no such resultinfo is provided.
300 typedef struct ReturnSetInfo
302 NodeTag type;
303 /* values set by caller: */
304 ExprContext *econtext; /* context function is being called in */
305 TupleDesc expectedDesc; /* tuple descriptor expected by caller */
306 int allowedModes; /* bitmask: return modes caller can handle */
307 /* result status from function (but pre-initialized by caller): */
308 SetFunctionReturnMode returnMode; /* actual return mode */
309 ExprDoneCond isDone; /* status for ValuePerCall mode */
310 /* fields filled by function in Materialize return mode: */
311 Tuplestorestate *setResult; /* holds the complete returned tuple set */
312 TupleDesc setDesc; /* actual descriptor for returned tuples */
313 } ReturnSetInfo;
315 /* ----------------
316 * ProjectionInfo node information
318 * This is all the information needed to perform projections ---
319 * that is, form new tuples by evaluation of targetlist expressions.
320 * Nodes which need to do projections create one of these.
322 * The target tuple slot is kept in ProjectionInfo->pi_state.resultslot.
323 * ExecProject() evaluates the tlist, forms a tuple, and stores it
324 * in the given slot. Note that the result will be a "virtual" tuple
325 * unless ExecMaterializeSlot() is then called to force it to be
326 * converted to a physical tuple. The slot must have a tupledesc
327 * that matches the output of the tlist!
328 * ----------------
330 typedef struct ProjectionInfo
332 NodeTag type;
333 /* instructions to evaluate projection */
334 ExprState pi_state;
335 /* expression context in which to evaluate expression */
336 ExprContext *pi_exprContext;
337 } ProjectionInfo;
339 /* ----------------
340 * JunkFilter
342 * This class is used to store information regarding junk attributes.
343 * A junk attribute is an attribute in a tuple that is needed only for
344 * storing intermediate information in the executor, and does not belong
345 * in emitted tuples. For example, when we do an UPDATE query,
346 * the planner adds a "junk" entry to the targetlist so that the tuples
347 * returned to ExecutePlan() contain an extra attribute: the ctid of
348 * the tuple to be updated. This is needed to do the update, but we
349 * don't want the ctid to be part of the stored new tuple! So, we
350 * apply a "junk filter" to remove the junk attributes and form the
351 * real output tuple. The junkfilter code also provides routines to
352 * extract the values of the junk attribute(s) from the input tuple.
354 * targetList: the original target list (including junk attributes).
355 * cleanTupType: the tuple descriptor for the "clean" tuple (with
356 * junk attributes removed).
357 * cleanMap: A map with the correspondence between the non-junk
358 * attribute numbers of the "original" tuple and the
359 * attribute numbers of the "clean" tuple.
360 * resultSlot: tuple slot used to hold cleaned tuple.
361 * ----------------
363 typedef struct JunkFilter
365 NodeTag type;
366 List *jf_targetList;
367 TupleDesc jf_cleanTupType;
368 AttrNumber *jf_cleanMap;
369 TupleTableSlot *jf_resultSlot;
370 } JunkFilter;
373 * OnConflictSetState
375 * Executor state of an ON CONFLICT DO UPDATE operation.
377 typedef struct OnConflictSetState
379 NodeTag type;
381 TupleTableSlot *oc_Existing; /* slot to store existing target tuple in */
382 TupleTableSlot *oc_ProjSlot; /* CONFLICT ... SET ... projection target */
383 ProjectionInfo *oc_ProjInfo; /* for ON CONFLICT DO UPDATE SET */
384 ExprState *oc_WhereClause; /* state for the WHERE clause */
385 } OnConflictSetState;
388 * ResultRelInfo
390 * Whenever we update an existing relation, we have to update indexes on the
391 * relation, and perhaps also fire triggers. ResultRelInfo holds all the
392 * information needed about a result relation, including indexes.
394 * Normally, a ResultRelInfo refers to a table that is in the query's range
395 * table; then ri_RangeTableIndex is the RT index and ri_RelationDesc is
396 * just a copy of the relevant es_relations[] entry. However, in some
397 * situations we create ResultRelInfos for relations that are not in the
398 * range table, namely for targets of tuple routing in a partitioned table,
399 * and when firing triggers in tables other than the target tables (See
400 * ExecGetTriggerResultRel). In these situations, ri_RangeTableIndex is 0
401 * and ri_RelationDesc is a separately-opened relcache pointer that needs to
402 * be separately closed.
404 typedef struct ResultRelInfo
406 NodeTag type;
408 /* result relation's range table index, or 0 if not in range table */
409 Index ri_RangeTableIndex;
411 /* relation descriptor for result relation */
412 Relation ri_RelationDesc;
414 /* # of indices existing on result relation */
415 int ri_NumIndices;
417 /* array of relation descriptors for indices */
418 RelationPtr ri_IndexRelationDescs;
420 /* array of key/attr info for indices */
421 IndexInfo **ri_IndexRelationInfo;
424 * For UPDATE/DELETE result relations, the attribute number of the row
425 * identity junk attribute in the source plan's output tuples
427 AttrNumber ri_RowIdAttNo;
429 /* Projection to generate new tuple in an INSERT/UPDATE */
430 ProjectionInfo *ri_projectNew;
431 /* Slot to hold that tuple */
432 TupleTableSlot *ri_newTupleSlot;
433 /* Slot to hold the old tuple being updated */
434 TupleTableSlot *ri_oldTupleSlot;
435 /* Have the projection and the slots above been initialized? */
436 bool ri_projectNewInfoValid;
438 /* triggers to be fired, if any */
439 TriggerDesc *ri_TrigDesc;
441 /* cached lookup info for trigger functions */
442 FmgrInfo *ri_TrigFunctions;
444 /* array of trigger WHEN expr states */
445 ExprState **ri_TrigWhenExprs;
447 /* optional runtime measurements for triggers */
448 Instrumentation *ri_TrigInstrument;
450 /* On-demand created slots for triggers / returning processing */
451 TupleTableSlot *ri_ReturningSlot; /* for trigger output tuples */
452 TupleTableSlot *ri_TrigOldSlot; /* for a trigger's old tuple */
453 TupleTableSlot *ri_TrigNewSlot; /* for a trigger's new tuple */
455 /* FDW callback functions, if foreign table */
456 struct FdwRoutine *ri_FdwRoutine;
458 /* available to save private state of FDW */
459 void *ri_FdwState;
461 /* true when modifying foreign table directly */
462 bool ri_usesFdwDirectModify;
464 /* batch insert stuff */
465 int ri_NumSlots; /* number of slots in the array */
466 int ri_NumSlotsInitialized; /* number of initialized slots */
467 int ri_BatchSize; /* max slots inserted in a single batch */
468 TupleTableSlot **ri_Slots; /* input tuples for batch insert */
469 TupleTableSlot **ri_PlanSlots;
471 /* list of WithCheckOption's to be checked */
472 List *ri_WithCheckOptions;
474 /* list of WithCheckOption expr states */
475 List *ri_WithCheckOptionExprs;
477 /* array of constraint-checking expr states */
478 ExprState **ri_ConstraintExprs;
480 /* array of stored generated columns expr states */
481 ExprState **ri_GeneratedExprs;
483 /* number of stored generated columns we need to compute */
484 int ri_NumGeneratedNeeded;
486 /* list of RETURNING expressions */
487 List *ri_returningList;
489 /* for computing a RETURNING list */
490 ProjectionInfo *ri_projectReturning;
492 /* list of arbiter indexes to use to check conflicts */
493 List *ri_onConflictArbiterIndexes;
495 /* ON CONFLICT evaluation state */
496 OnConflictSetState *ri_onConflict;
498 /* partition check expression state (NULL if not set up yet) */
499 ExprState *ri_PartitionCheckExpr;
502 * Information needed by tuple routing target relations
504 * RootResultRelInfo gives the target relation mentioned in the query, if
505 * it's a partitioned table. It is not set if the target relation
506 * mentioned in the query is an inherited table, nor when tuple routing is
507 * not needed.
509 * RootToPartitionMap and PartitionTupleSlot, initialized by
510 * ExecInitRoutingInfo, are non-NULL if partition has a different tuple
511 * format than the root table.
513 struct ResultRelInfo *ri_RootResultRelInfo;
514 TupleConversionMap *ri_RootToPartitionMap;
515 TupleTableSlot *ri_PartitionTupleSlot;
518 * Map to convert child result relation tuples to the format of the table
519 * actually mentioned in the query (called "root"). Computed only if
520 * needed. A NULL map value indicates that no conversion is needed, so we
521 * must have a separate flag to show if the map has been computed.
523 TupleConversionMap *ri_ChildToRootMap;
524 bool ri_ChildToRootMapValid;
526 /* for use by copyfrom.c when performing multi-inserts */
527 struct CopyMultiInsertBuffer *ri_CopyMultiInsertBuffer;
528 } ResultRelInfo;
530 /* ----------------
531 * AsyncRequest
533 * State for an asynchronous tuple request.
534 * ----------------
536 typedef struct AsyncRequest
538 struct PlanState *requestor; /* Node that wants a tuple */
539 struct PlanState *requestee; /* Node from which a tuple is wanted */
540 int request_index; /* Scratch space for requestor */
541 bool callback_pending; /* Callback is needed */
542 bool request_complete; /* Request complete, result valid */
543 TupleTableSlot *result; /* Result (NULL or an empty slot if no more
544 * tuples) */
545 } AsyncRequest;
547 /* ----------------
548 * EState information
550 * Working state for an Executor invocation
551 * ----------------
553 typedef struct EState
555 NodeTag type;
557 /* Basic state for all query types: */
558 ScanDirection es_direction; /* current scan direction */
559 Snapshot es_snapshot; /* time qual to use */
560 Snapshot es_crosscheck_snapshot; /* crosscheck time qual for RI */
561 List *es_range_table; /* List of RangeTblEntry */
562 Index es_range_table_size; /* size of the range table arrays */
563 Relation *es_relations; /* Array of per-range-table-entry Relation
564 * pointers, or NULL if not yet opened */
565 struct ExecRowMark **es_rowmarks; /* Array of per-range-table-entry
566 * ExecRowMarks, or NULL if none */
567 PlannedStmt *es_plannedstmt; /* link to top of plan tree */
568 const char *es_sourceText; /* Source text from QueryDesc */
570 JunkFilter *es_junkFilter; /* top-level junk filter, if any */
572 /* If query can insert/delete tuples, the command ID to mark them with */
573 CommandId es_output_cid;
575 /* Info about target table(s) for insert/update/delete queries: */
576 ResultRelInfo **es_result_relations; /* Array of per-range-table-entry
577 * ResultRelInfo pointers, or NULL
578 * if not a target table */
579 List *es_opened_result_relations; /* List of non-NULL entries in
580 * es_result_relations in no
581 * specific order */
583 PartitionDirectory es_partition_directory; /* for PartitionDesc lookup */
586 * The following list contains ResultRelInfos created by the tuple routing
587 * code for partitions that aren't found in the es_result_relations array.
589 List *es_tuple_routing_result_relations;
591 /* Stuff used for firing triggers: */
592 List *es_trig_target_relations; /* trigger-only ResultRelInfos */
594 /* Parameter info: */
595 ParamListInfo es_param_list_info; /* values of external params */
596 ParamExecData *es_param_exec_vals; /* values of internal params */
598 QueryEnvironment *es_queryEnv; /* query environment */
600 /* Other working state: */
601 MemoryContext es_query_cxt; /* per-query context in which EState lives */
603 List *es_tupleTable; /* List of TupleTableSlots */
605 uint64 es_processed; /* # of tuples processed */
607 int es_top_eflags; /* eflags passed to ExecutorStart */
608 int es_instrument; /* OR of InstrumentOption flags */
609 bool es_finished; /* true when ExecutorFinish is done */
611 List *es_exprcontexts; /* List of ExprContexts within EState */
613 List *es_subplanstates; /* List of PlanState for SubPlans */
615 List *es_auxmodifytables; /* List of secondary ModifyTableStates */
618 * this ExprContext is for per-output-tuple operations, such as constraint
619 * checks and index-value computations. It will be reset for each output
620 * tuple. Note that it will be created only if needed.
622 ExprContext *es_per_tuple_exprcontext;
625 * If not NULL, this is an EPQState's EState. This is a field in EState
626 * both to allow EvalPlanQual aware executor nodes to detect that they
627 * need to perform EPQ related work, and to provide necessary information
628 * to do so.
630 struct EPQState *es_epq_active;
632 bool es_use_parallel_mode; /* can we use parallel workers? */
634 /* The per-query shared memory area to use for parallel execution. */
635 struct dsa_area *es_query_dsa;
638 * JIT information. es_jit_flags indicates whether JIT should be performed
639 * and with which options. es_jit is created on-demand when JITing is
640 * performed.
642 * es_jit_worker_instr is the combined, on demand allocated,
643 * instrumentation from all workers. The leader's instrumentation is kept
644 * separate, and is combined on demand by ExplainPrintJITSummary().
646 int es_jit_flags;
647 struct JitContext *es_jit;
648 struct JitInstrumentation *es_jit_worker_instr;
649 } EState;
653 * ExecRowMark -
654 * runtime representation of FOR [KEY] UPDATE/SHARE clauses
656 * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we will have an
657 * ExecRowMark for each non-target relation in the query (except inheritance
658 * parent RTEs, which can be ignored at runtime). Virtual relations such as
659 * subqueries-in-FROM will have an ExecRowMark with relation == NULL. See
660 * PlanRowMark for details about most of the fields. In addition to fields
661 * directly derived from PlanRowMark, we store an activity flag (to denote
662 * inactive children of inheritance trees), curCtid, which is used by the
663 * WHERE CURRENT OF code, and ermExtra, which is available for use by the plan
664 * node that sources the relation (e.g., for a foreign table the FDW can use
665 * ermExtra to hold information).
667 * EState->es_rowmarks is an array of these structs, indexed by RT index,
668 * with NULLs for irrelevant RT indexes. es_rowmarks itself is NULL if
669 * there are no rowmarks.
671 typedef struct ExecRowMark
673 Relation relation; /* opened and suitably locked relation */
674 Oid relid; /* its OID (or InvalidOid, if subquery) */
675 Index rti; /* its range table index */
676 Index prti; /* parent range table index, if child */
677 Index rowmarkId; /* unique identifier for resjunk columns */
678 RowMarkType markType; /* see enum in nodes/plannodes.h */
679 LockClauseStrength strength; /* LockingClause's strength, or LCS_NONE */
680 LockWaitPolicy waitPolicy; /* NOWAIT and SKIP LOCKED */
681 bool ermActive; /* is this mark relevant for current tuple? */
682 ItemPointerData curCtid; /* ctid of currently locked tuple, if any */
683 void *ermExtra; /* available for use by relation source node */
684 } ExecRowMark;
687 * ExecAuxRowMark -
688 * additional runtime representation of FOR [KEY] UPDATE/SHARE clauses
690 * Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to
691 * deal with. In addition to a pointer to the related entry in es_rowmarks,
692 * this struct carries the column number(s) of the resjunk columns associated
693 * with the rowmark (see comments for PlanRowMark for more detail).
695 typedef struct ExecAuxRowMark
697 ExecRowMark *rowmark; /* related entry in es_rowmarks */
698 AttrNumber ctidAttNo; /* resno of ctid junk attribute, if any */
699 AttrNumber toidAttNo; /* resno of tableoid junk attribute, if any */
700 AttrNumber wholeAttNo; /* resno of whole-row junk attribute, if any */
701 } ExecAuxRowMark;
704 /* ----------------------------------------------------------------
705 * Tuple Hash Tables
707 * All-in-memory tuple hash tables are used for a number of purposes.
709 * Note: tab_hash_funcs are for the key datatype(s) stored in the table,
710 * and tab_eq_funcs are non-cross-type equality operators for those types.
711 * Normally these are the only functions used, but FindTupleHashEntry()
712 * supports searching a hashtable using cross-data-type hashing. For that,
713 * the caller must supply hash functions for the LHS datatype as well as
714 * the cross-type equality operators to use. in_hash_funcs and cur_eq_func
715 * are set to point to the caller's function arrays while doing such a search.
716 * During LookupTupleHashEntry(), they point to tab_hash_funcs and
717 * tab_eq_func respectively.
718 * ----------------------------------------------------------------
720 typedef struct TupleHashEntryData *TupleHashEntry;
721 typedef struct TupleHashTableData *TupleHashTable;
723 typedef struct TupleHashEntryData
725 MinimalTuple firstTuple; /* copy of first tuple in this group */
726 void *additional; /* user data */
727 uint32 status; /* hash status */
728 uint32 hash; /* hash value (cached) */
729 } TupleHashEntryData;
731 /* define parameters necessary to generate the tuple hash table interface */
732 #define SH_PREFIX tuplehash
733 #define SH_ELEMENT_TYPE TupleHashEntryData
734 #define SH_KEY_TYPE MinimalTuple
735 #define SH_SCOPE extern
736 #define SH_DECLARE
737 #include "lib/simplehash.h"
739 typedef struct TupleHashTableData
741 tuplehash_hash *hashtab; /* underlying hash table */
742 int numCols; /* number of columns in lookup key */
743 AttrNumber *keyColIdx; /* attr numbers of key columns */
744 FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */
745 ExprState *tab_eq_func; /* comparator for table datatype(s) */
746 Oid *tab_collations; /* collations for hash and comparison */
747 MemoryContext tablecxt; /* memory context containing table */
748 MemoryContext tempcxt; /* context for function evaluations */
749 Size entrysize; /* actual size to make each hash entry */
750 TupleTableSlot *tableslot; /* slot for referencing table entries */
751 /* The following fields are set transiently for each table search: */
752 TupleTableSlot *inputslot; /* current input tuple's slot */
753 FmgrInfo *in_hash_funcs; /* hash functions for input datatype(s) */
754 ExprState *cur_eq_func; /* comparator for input vs. table */
755 uint32 hash_iv; /* hash-function IV */
756 ExprContext *exprcontext; /* expression context */
757 } TupleHashTableData;
759 typedef tuplehash_iterator TupleHashIterator;
762 * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan.
763 * Use ResetTupleHashIterator if the table can be frozen (in this case no
764 * explicit scan termination is needed).
766 #define InitTupleHashIterator(htable, iter) \
767 tuplehash_start_iterate(htable->hashtab, iter)
768 #define TermTupleHashIterator(iter) \
769 ((void) 0)
770 #define ResetTupleHashIterator(htable, iter) \
771 InitTupleHashIterator(htable, iter)
772 #define ScanTupleHashTable(htable, iter) \
773 tuplehash_iterate(htable->hashtab, iter)
776 /* ----------------------------------------------------------------
777 * Expression State Nodes
779 * Formerly, there was a separate executor expression state node corresponding
780 * to each node in a planned expression tree. That's no longer the case; for
781 * common expression node types, all the execution info is embedded into
782 * step(s) in a single ExprState node. But we still have a few executor state
783 * node types for selected expression node types, mostly those in which info
784 * has to be shared with other parts of the execution state tree.
785 * ----------------------------------------------------------------
788 /* ----------------
789 * WindowFuncExprState node
790 * ----------------
792 typedef struct WindowFuncExprState
794 NodeTag type;
795 WindowFunc *wfunc; /* expression plan node */
796 List *args; /* ExprStates for argument expressions */
797 ExprState *aggfilter; /* FILTER expression */
798 int wfuncno; /* ID number for wfunc within its plan node */
799 } WindowFuncExprState;
802 /* ----------------
803 * SetExprState node
805 * State for evaluating a potentially set-returning expression (like FuncExpr
806 * or OpExpr). In some cases, like some of the expressions in ROWS FROM(...)
807 * the expression might not be a SRF, but nonetheless it uses the same
808 * machinery as SRFs; it will be treated as a SRF returning a single row.
809 * ----------------
811 typedef struct SetExprState
813 NodeTag type;
814 Expr *expr; /* expression plan node */
815 List *args; /* ExprStates for argument expressions */
818 * In ROWS FROM, functions can be inlined, removing the FuncExpr normally
819 * inside. In such a case this is the compiled expression (which cannot
820 * return a set), which'll be evaluated using regular ExecEvalExpr().
822 ExprState *elidedFuncState;
825 * Function manager's lookup info for the target function. If func.fn_oid
826 * is InvalidOid, we haven't initialized it yet (nor any of the following
827 * fields, except funcReturnsSet).
829 FmgrInfo func;
832 * For a set-returning function (SRF) that returns a tuplestore, we keep
833 * the tuplestore here and dole out the result rows one at a time. The
834 * slot holds the row currently being returned.
836 Tuplestorestate *funcResultStore;
837 TupleTableSlot *funcResultSlot;
840 * In some cases we need to compute a tuple descriptor for the function's
841 * output. If so, it's stored here.
843 TupleDesc funcResultDesc;
844 bool funcReturnsTuple; /* valid when funcResultDesc isn't NULL */
847 * Remember whether the function is declared to return a set. This is set
848 * by ExecInitExpr, and is valid even before the FmgrInfo is set up.
850 bool funcReturnsSet;
853 * setArgsValid is true when we are evaluating a set-returning function
854 * that uses value-per-call mode and we are in the middle of a call
855 * series; we want to pass the same argument values to the function again
856 * (and again, until it returns ExprEndResult). This indicates that
857 * fcinfo_data already contains valid argument data.
859 bool setArgsValid;
862 * Flag to remember whether we have registered a shutdown callback for
863 * this SetExprState. We do so only if funcResultStore or setArgsValid
864 * has been set at least once (since all the callback is for is to release
865 * the tuplestore or clear setArgsValid).
867 bool shutdown_reg; /* a shutdown callback is registered */
870 * Call parameter structure for the function. This has been initialized
871 * (by InitFunctionCallInfoData) if func.fn_oid is valid. It also saves
872 * argument values between calls, when setArgsValid is true.
874 FunctionCallInfo fcinfo;
875 } SetExprState;
877 /* ----------------
878 * SubPlanState node
879 * ----------------
881 typedef struct SubPlanState
883 NodeTag type;
884 SubPlan *subplan; /* expression plan node */
885 struct PlanState *planstate; /* subselect plan's state tree */
886 struct PlanState *parent; /* parent plan node's state tree */
887 ExprState *testexpr; /* state of combining expression */
888 List *args; /* states of argument expression(s) */
889 HeapTuple curTuple; /* copy of most recent tuple from subplan */
890 Datum curArray; /* most recent array from ARRAY() subplan */
891 /* these are used when hashing the subselect's output: */
892 TupleDesc descRight; /* subselect desc after projection */
893 ProjectionInfo *projLeft; /* for projecting lefthand exprs */
894 ProjectionInfo *projRight; /* for projecting subselect output */
895 TupleHashTable hashtable; /* hash table for no-nulls subselect rows */
896 TupleHashTable hashnulls; /* hash table for rows with null(s) */
897 bool havehashrows; /* true if hashtable is not empty */
898 bool havenullrows; /* true if hashnulls is not empty */
899 MemoryContext hashtablecxt; /* memory context containing hash tables */
900 MemoryContext hashtempcxt; /* temp memory context for hash tables */
901 ExprContext *innerecontext; /* econtext for computing inner tuples */
902 int numCols; /* number of columns being hashed */
903 /* each of the remaining fields is an array of length numCols: */
904 AttrNumber *keyColIdx; /* control data for hash tables */
905 Oid *tab_eq_funcoids; /* equality func oids for table
906 * datatype(s) */
907 Oid *tab_collations; /* collations for hash and comparison */
908 FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */
909 FmgrInfo *tab_eq_funcs; /* equality functions for table datatype(s) */
910 FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */
911 FmgrInfo *cur_eq_funcs; /* equality functions for LHS vs. table */
912 ExprState *cur_eq_comp; /* equality comparator for LHS vs. table */
913 } SubPlanState;
916 * DomainConstraintState - one item to check during CoerceToDomain
918 * Note: we consider this to be part of an ExprState tree, so we give it
919 * a name following the xxxState convention. But there's no directly
920 * associated plan-tree node.
922 typedef enum DomainConstraintType
924 DOM_CONSTRAINT_NOTNULL,
925 DOM_CONSTRAINT_CHECK
926 } DomainConstraintType;
928 typedef struct DomainConstraintState
930 NodeTag type;
931 DomainConstraintType constrainttype; /* constraint type */
932 char *name; /* name of constraint (for error msgs) */
933 Expr *check_expr; /* for CHECK, a boolean expression */
934 ExprState *check_exprstate; /* check_expr's eval state, or NULL */
935 } DomainConstraintState;
938 /* ----------------------------------------------------------------
939 * Executor State Trees
941 * An executing query has a PlanState tree paralleling the Plan tree
942 * that describes the plan.
943 * ----------------------------------------------------------------
946 /* ----------------
947 * ExecProcNodeMtd
949 * This is the method called by ExecProcNode to return the next tuple
950 * from an executor node. It returns NULL, or an empty TupleTableSlot,
951 * if no more tuples are available.
952 * ----------------
954 typedef TupleTableSlot *(*ExecProcNodeMtd) (struct PlanState *pstate);
956 /* ----------------
957 * PlanState node
959 * We never actually instantiate any PlanState nodes; this is just the common
960 * abstract superclass for all PlanState-type nodes.
961 * ----------------
963 typedef struct PlanState
965 NodeTag type;
967 Plan *plan; /* associated Plan node */
969 EState *state; /* at execution time, states of individual
970 * nodes point to one EState for the whole
971 * top-level plan */
973 ExecProcNodeMtd ExecProcNode; /* function to return next tuple */
974 ExecProcNodeMtd ExecProcNodeReal; /* actual function, if above is a
975 * wrapper */
977 Instrumentation *instrument; /* Optional runtime stats for this node */
978 WorkerInstrumentation *worker_instrument; /* per-worker instrumentation */
980 /* Per-worker JIT instrumentation */
981 struct SharedJitInstrumentation *worker_jit_instrument;
984 * Common structural data for all Plan types. These links to subsidiary
985 * state trees parallel links in the associated plan tree (except for the
986 * subPlan list, which does not exist in the plan tree).
988 ExprState *qual; /* boolean qual condition */
989 struct PlanState *lefttree; /* input plan tree(s) */
990 struct PlanState *righttree;
992 List *initPlan; /* Init SubPlanState nodes (un-correlated expr
993 * subselects) */
994 List *subPlan; /* SubPlanState nodes in my expressions */
997 * State for management of parameter-change-driven rescanning
999 Bitmapset *chgParam; /* set of IDs of changed Params */
1002 * Other run-time state needed by most if not all node types.
1004 TupleDesc ps_ResultTupleDesc; /* node's return type */
1005 TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */
1006 ExprContext *ps_ExprContext; /* node's expression-evaluation context */
1007 ProjectionInfo *ps_ProjInfo; /* info for doing tuple projection */
1009 bool async_capable; /* true if node is async-capable */
1012 * Scanslot's descriptor if known. This is a bit of a hack, but otherwise
1013 * it's hard for expression compilation to optimize based on the
1014 * descriptor, without encoding knowledge about all executor nodes.
1016 TupleDesc scandesc;
1019 * Define the slot types for inner, outer and scanslots for expression
1020 * contexts with this state as a parent. If *opsset is set, then
1021 * *opsfixed indicates whether *ops is guaranteed to be the type of slot
1022 * used. That means that every slot in the corresponding
1023 * ExprContext.ecxt_*tuple will point to a slot of that type, while
1024 * evaluating the expression. If *opsfixed is false, but *ops is set,
1025 * that indicates the most likely type of slot.
1027 * The scan* fields are set by ExecInitScanTupleSlot(). If that's not
1028 * called, nodes can initialize the fields themselves.
1030 * If outer/inneropsset is false, the information is inferred on-demand
1031 * using ExecGetResultSlotOps() on ->righttree/lefttree, using the
1032 * corresponding node's resultops* fields.
1034 * The result* fields are automatically set when ExecInitResultSlot is
1035 * used (be it directly or when the slot is created by
1036 * ExecAssignScanProjectionInfo() /
1037 * ExecConditionalAssignProjectionInfo()). If no projection is necessary
1038 * ExecConditionalAssignProjectionInfo() defaults those fields to the scan
1039 * operations.
1041 const TupleTableSlotOps *scanops;
1042 const TupleTableSlotOps *outerops;
1043 const TupleTableSlotOps *innerops;
1044 const TupleTableSlotOps *resultops;
1045 bool scanopsfixed;
1046 bool outeropsfixed;
1047 bool inneropsfixed;
1048 bool resultopsfixed;
1049 bool scanopsset;
1050 bool outeropsset;
1051 bool inneropsset;
1052 bool resultopsset;
1053 } PlanState;
1055 /* ----------------
1056 * these are defined to avoid confusion problems with "left"
1057 * and "right" and "inner" and "outer". The convention is that
1058 * the "left" plan is the "outer" plan and the "right" plan is
1059 * the inner plan, but these make the code more readable.
1060 * ----------------
1062 #define innerPlanState(node) (((PlanState *)(node))->righttree)
1063 #define outerPlanState(node) (((PlanState *)(node))->lefttree)
1065 /* Macros for inline access to certain instrumentation counters */
1066 #define InstrCountTuples2(node, delta) \
1067 do { \
1068 if (((PlanState *)(node))->instrument) \
1069 ((PlanState *)(node))->instrument->ntuples2 += (delta); \
1070 } while (0)
1071 #define InstrCountFiltered1(node, delta) \
1072 do { \
1073 if (((PlanState *)(node))->instrument) \
1074 ((PlanState *)(node))->instrument->nfiltered1 += (delta); \
1075 } while(0)
1076 #define InstrCountFiltered2(node, delta) \
1077 do { \
1078 if (((PlanState *)(node))->instrument) \
1079 ((PlanState *)(node))->instrument->nfiltered2 += (delta); \
1080 } while(0)
1083 * EPQState is state for executing an EvalPlanQual recheck on a candidate
1084 * tuples e.g. in ModifyTable or LockRows.
1086 * To execute EPQ a separate EState is created (stored in ->recheckestate),
1087 * which shares some resources, like the rangetable, with the main query's
1088 * EState (stored in ->parentestate). The (sub-)tree of the plan that needs to
1089 * be rechecked (in ->plan), is separately initialized (into
1090 * ->recheckplanstate), but shares plan nodes with the corresponding nodes in
1091 * the main query. The scan nodes in that separate executor tree are changed
1092 * to return only the current tuple of interest for the respective
1093 * table. Those tuples are either provided by the caller (using
1094 * EvalPlanQualSlot), and/or found using the rowmark mechanism (non-locking
1095 * rowmarks by the EPQ machinery itself, locking ones by the caller).
1097 * While the plan to be checked may be changed using EvalPlanQualSetPlan(),
1098 * all such plans need to share the same EState.
1100 typedef struct EPQState
1102 /* Initialized at EvalPlanQualInit() time: */
1104 EState *parentestate; /* main query's EState */
1105 int epqParam; /* ID of Param to force scan node re-eval */
1108 * Tuples to be substituted by scan nodes. They need to set up, before
1109 * calling EvalPlanQual()/EvalPlanQualNext(), into the slot returned by
1110 * EvalPlanQualSlot(scanrelid). The array is indexed by scanrelid - 1.
1112 List *tuple_table; /* tuple table for relsubs_slot */
1113 TupleTableSlot **relsubs_slot;
1116 * Initialized by EvalPlanQualInit(), may be changed later with
1117 * EvalPlanQualSetPlan():
1120 Plan *plan; /* plan tree to be executed */
1121 List *arowMarks; /* ExecAuxRowMarks (non-locking only) */
1125 * The original output tuple to be rechecked. Set by
1126 * EvalPlanQualSetSlot(), before EvalPlanQualNext() or EvalPlanQual() may
1127 * be called.
1129 TupleTableSlot *origslot;
1132 /* Initialized or reset by EvalPlanQualBegin(): */
1134 EState *recheckestate; /* EState for EPQ execution, see above */
1137 * Rowmarks that can be fetched on-demand using
1138 * EvalPlanQualFetchRowMark(), indexed by scanrelid - 1. Only non-locking
1139 * rowmarks.
1141 ExecAuxRowMark **relsubs_rowmark;
1144 * True if a relation's EPQ tuple has been fetched for relation, indexed
1145 * by scanrelid - 1.
1147 bool *relsubs_done;
1149 PlanState *recheckplanstate; /* EPQ specific exec nodes, for ->plan */
1150 } EPQState;
1153 /* ----------------
1154 * ResultState information
1155 * ----------------
1157 typedef struct ResultState
1159 PlanState ps; /* its first field is NodeTag */
1160 ExprState *resconstantqual;
1161 bool rs_done; /* are we done? */
1162 bool rs_checkqual; /* do we need to check the qual? */
1163 } ResultState;
1165 /* ----------------
1166 * ProjectSetState information
1168 * Note: at least one of the "elems" will be a SetExprState; the rest are
1169 * regular ExprStates.
1170 * ----------------
1172 typedef struct ProjectSetState
1174 PlanState ps; /* its first field is NodeTag */
1175 Node **elems; /* array of expression states */
1176 ExprDoneCond *elemdone; /* array of per-SRF is-done states */
1177 int nelems; /* length of elemdone[] array */
1178 bool pending_srf_tuples; /* still evaluating srfs in tlist? */
1179 MemoryContext argcontext; /* context for SRF arguments */
1180 } ProjectSetState;
1182 /* ----------------
1183 * ModifyTableState information
1184 * ----------------
1186 typedef struct ModifyTableState
1188 PlanState ps; /* its first field is NodeTag */
1189 CmdType operation; /* INSERT, UPDATE, or DELETE */
1190 bool canSetTag; /* do we set the command tag/es_processed? */
1191 bool mt_done; /* are we done? */
1192 int mt_nrels; /* number of entries in resultRelInfo[] */
1193 ResultRelInfo *resultRelInfo; /* info about target relation(s) */
1196 * Target relation mentioned in the original statement, used to fire
1197 * statement-level triggers and as the root for tuple routing. (This
1198 * might point to one of the resultRelInfo[] entries, but it can also be a
1199 * distinct struct.)
1201 ResultRelInfo *rootResultRelInfo;
1203 EPQState mt_epqstate; /* for evaluating EvalPlanQual rechecks */
1204 bool fireBSTriggers; /* do we need to fire stmt triggers? */
1207 * These fields are used for inherited UPDATE and DELETE, to track which
1208 * target relation a given tuple is from. If there are a lot of target
1209 * relations, we use a hash table to translate table OIDs to
1210 * resultRelInfo[] indexes; otherwise mt_resultOidHash is NULL.
1212 int mt_resultOidAttno; /* resno of "tableoid" junk attr */
1213 Oid mt_lastResultOid; /* last-seen value of tableoid */
1214 int mt_lastResultIndex; /* corresponding index in resultRelInfo[] */
1215 HTAB *mt_resultOidHash; /* optional hash table to speed lookups */
1218 * Slot for storing tuples in the root partitioned table's rowtype during
1219 * an UPDATE of a partitioned table.
1221 TupleTableSlot *mt_root_tuple_slot;
1223 /* Tuple-routing support info */
1224 struct PartitionTupleRouting *mt_partition_tuple_routing;
1226 /* controls transition table population for specified operation */
1227 struct TransitionCaptureState *mt_transition_capture;
1229 /* controls transition table population for INSERT...ON CONFLICT UPDATE */
1230 struct TransitionCaptureState *mt_oc_transition_capture;
1231 } ModifyTableState;
1233 /* ----------------
1234 * AppendState information
1236 * nplans how many plans are in the array
1237 * whichplan which synchronous plan is being executed (0 .. n-1)
1238 * or a special negative value. See nodeAppend.c.
1239 * prune_state details required to allow partitions to be
1240 * eliminated from the scan, or NULL if not possible.
1241 * valid_subplans for runtime pruning, valid synchronous appendplans
1242 * indexes to scan.
1243 * ----------------
1246 struct AppendState;
1247 typedef struct AppendState AppendState;
1248 struct ParallelAppendState;
1249 typedef struct ParallelAppendState ParallelAppendState;
1250 struct PartitionPruneState;
1252 struct AppendState
1254 PlanState ps; /* its first field is NodeTag */
1255 PlanState **appendplans; /* array of PlanStates for my inputs */
1256 int as_nplans;
1257 int as_whichplan;
1258 bool as_begun; /* false means need to initialize */
1259 Bitmapset *as_asyncplans; /* asynchronous plans indexes */
1260 int as_nasyncplans; /* # of asynchronous plans */
1261 AsyncRequest **as_asyncrequests; /* array of AsyncRequests */
1262 TupleTableSlot **as_asyncresults; /* unreturned results of async plans */
1263 int as_nasyncresults; /* # of valid entries in as_asyncresults */
1264 bool as_syncdone; /* true if all synchronous plans done in
1265 * asynchronous mode, else false */
1266 int as_nasyncremain; /* # of remaining asynchronous plans */
1267 Bitmapset *as_needrequest; /* asynchronous plans needing a new request */
1268 struct WaitEventSet *as_eventset; /* WaitEventSet used to configure file
1269 * descriptor wait events */
1270 int as_first_partial_plan; /* Index of 'appendplans' containing
1271 * the first partial plan */
1272 ParallelAppendState *as_pstate; /* parallel coordination info */
1273 Size pstate_len; /* size of parallel coordination info */
1274 struct PartitionPruneState *as_prune_state;
1275 Bitmapset *as_valid_subplans;
1276 Bitmapset *as_valid_asyncplans; /* valid asynchronous plans indexes */
1277 bool (*choose_next_subplan) (AppendState *);
1280 /* ----------------
1281 * MergeAppendState information
1283 * nplans how many plans are in the array
1284 * nkeys number of sort key columns
1285 * sortkeys sort keys in SortSupport representation
1286 * slots current output tuple of each subplan
1287 * heap heap of active tuples
1288 * initialized true if we have fetched first tuple from each subplan
1289 * prune_state details required to allow partitions to be
1290 * eliminated from the scan, or NULL if not possible.
1291 * valid_subplans for runtime pruning, valid mergeplans indexes to
1292 * scan.
1293 * ----------------
1295 typedef struct MergeAppendState
1297 PlanState ps; /* its first field is NodeTag */
1298 PlanState **mergeplans; /* array of PlanStates for my inputs */
1299 int ms_nplans;
1300 int ms_nkeys;
1301 SortSupport ms_sortkeys; /* array of length ms_nkeys */
1302 TupleTableSlot **ms_slots; /* array of length ms_nplans */
1303 struct binaryheap *ms_heap; /* binary heap of slot indices */
1304 bool ms_initialized; /* are subplans started? */
1305 struct PartitionPruneState *ms_prune_state;
1306 Bitmapset *ms_valid_subplans;
1307 } MergeAppendState;
1309 /* ----------------
1310 * RecursiveUnionState information
1312 * RecursiveUnionState is used for performing a recursive union.
1314 * recursing T when we're done scanning the non-recursive term
1315 * intermediate_empty T if intermediate_table is currently empty
1316 * working_table working table (to be scanned by recursive term)
1317 * intermediate_table current recursive output (next generation of WT)
1318 * ----------------
1320 typedef struct RecursiveUnionState
1322 PlanState ps; /* its first field is NodeTag */
1323 bool recursing;
1324 bool intermediate_empty;
1325 Tuplestorestate *working_table;
1326 Tuplestorestate *intermediate_table;
1327 /* Remaining fields are unused in UNION ALL case */
1328 Oid *eqfuncoids; /* per-grouping-field equality fns */
1329 FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
1330 MemoryContext tempContext; /* short-term context for comparisons */
1331 TupleHashTable hashtable; /* hash table for tuples already seen */
1332 MemoryContext tableContext; /* memory context containing hash table */
1333 } RecursiveUnionState;
1335 /* ----------------
1336 * BitmapAndState information
1337 * ----------------
1339 typedef struct BitmapAndState
1341 PlanState ps; /* its first field is NodeTag */
1342 PlanState **bitmapplans; /* array of PlanStates for my inputs */
1343 int nplans; /* number of input plans */
1344 } BitmapAndState;
1346 /* ----------------
1347 * BitmapOrState information
1348 * ----------------
1350 typedef struct BitmapOrState
1352 PlanState ps; /* its first field is NodeTag */
1353 PlanState **bitmapplans; /* array of PlanStates for my inputs */
1354 int nplans; /* number of input plans */
1355 } BitmapOrState;
1357 /* ----------------------------------------------------------------
1358 * Scan State Information
1359 * ----------------------------------------------------------------
1362 /* ----------------
1363 * ScanState information
1365 * ScanState extends PlanState for node types that represent
1366 * scans of an underlying relation. It can also be used for nodes
1367 * that scan the output of an underlying plan node --- in that case,
1368 * only ScanTupleSlot is actually useful, and it refers to the tuple
1369 * retrieved from the subplan.
1371 * currentRelation relation being scanned (NULL if none)
1372 * currentScanDesc current scan descriptor for scan (NULL if none)
1373 * ScanTupleSlot pointer to slot in tuple table holding scan tuple
1374 * ----------------
1376 typedef struct ScanState
1378 PlanState ps; /* its first field is NodeTag */
1379 Relation ss_currentRelation;
1380 struct TableScanDescData *ss_currentScanDesc;
1381 TupleTableSlot *ss_ScanTupleSlot;
1382 } ScanState;
1384 /* ----------------
1385 * SeqScanState information
1386 * ----------------
1388 typedef struct SeqScanState
1390 ScanState ss; /* its first field is NodeTag */
1391 Size pscan_len; /* size of parallel heap scan descriptor */
1392 } SeqScanState;
1394 /* ----------------
1395 * SampleScanState information
1396 * ----------------
1398 typedef struct SampleScanState
1400 ScanState ss;
1401 List *args; /* expr states for TABLESAMPLE params */
1402 ExprState *repeatable; /* expr state for REPEATABLE expr */
1403 /* use struct pointer to avoid including tsmapi.h here */
1404 struct TsmRoutine *tsmroutine; /* descriptor for tablesample method */
1405 void *tsm_state; /* tablesample method can keep state here */
1406 bool use_bulkread; /* use bulkread buffer access strategy? */
1407 bool use_pagemode; /* use page-at-a-time visibility checking? */
1408 bool begun; /* false means need to call BeginSampleScan */
1409 uint32 seed; /* random seed */
1410 int64 donetuples; /* number of tuples already returned */
1411 bool haveblock; /* has a block for sampling been determined */
1412 bool done; /* exhausted all tuples? */
1413 } SampleScanState;
1416 * These structs store information about index quals that don't have simple
1417 * constant right-hand sides. See comments for ExecIndexBuildScanKeys()
1418 * for discussion.
1420 typedef struct
1422 struct ScanKeyData *scan_key; /* scankey to put value into */
1423 ExprState *key_expr; /* expr to evaluate to get value */
1424 bool key_toastable; /* is expr's result a toastable datatype? */
1425 } IndexRuntimeKeyInfo;
1427 typedef struct
1429 struct ScanKeyData *scan_key; /* scankey to put value into */
1430 ExprState *array_expr; /* expr to evaluate to get array value */
1431 int next_elem; /* next array element to use */
1432 int num_elems; /* number of elems in current array value */
1433 Datum *elem_values; /* array of num_elems Datums */
1434 bool *elem_nulls; /* array of num_elems is-null flags */
1435 } IndexArrayKeyInfo;
1437 /* ----------------
1438 * IndexScanState information
1440 * indexqualorig execution state for indexqualorig expressions
1441 * indexorderbyorig execution state for indexorderbyorig expressions
1442 * ScanKeys Skey structures for index quals
1443 * NumScanKeys number of ScanKeys
1444 * OrderByKeys Skey structures for index ordering operators
1445 * NumOrderByKeys number of OrderByKeys
1446 * RuntimeKeys info about Skeys that must be evaluated at runtime
1447 * NumRuntimeKeys number of RuntimeKeys
1448 * RuntimeKeysReady true if runtime Skeys have been computed
1449 * RuntimeContext expr context for evaling runtime Skeys
1450 * RelationDesc index relation descriptor
1451 * ScanDesc index scan descriptor
1453 * ReorderQueue tuples that need reordering due to re-check
1454 * ReachedEnd have we fetched all tuples from index already?
1455 * OrderByValues values of ORDER BY exprs of last fetched tuple
1456 * OrderByNulls null flags for OrderByValues
1457 * SortSupport for reordering ORDER BY exprs
1458 * OrderByTypByVals is the datatype of order by expression pass-by-value?
1459 * OrderByTypLens typlens of the datatypes of order by expressions
1460 * PscanLen size of parallel index scan descriptor
1461 * ----------------
1463 typedef struct IndexScanState
1465 ScanState ss; /* its first field is NodeTag */
1466 ExprState *indexqualorig;
1467 List *indexorderbyorig;
1468 struct ScanKeyData *iss_ScanKeys;
1469 int iss_NumScanKeys;
1470 struct ScanKeyData *iss_OrderByKeys;
1471 int iss_NumOrderByKeys;
1472 IndexRuntimeKeyInfo *iss_RuntimeKeys;
1473 int iss_NumRuntimeKeys;
1474 bool iss_RuntimeKeysReady;
1475 ExprContext *iss_RuntimeContext;
1476 Relation iss_RelationDesc;
1477 struct IndexScanDescData *iss_ScanDesc;
1479 /* These are needed for re-checking ORDER BY expr ordering */
1480 pairingheap *iss_ReorderQueue;
1481 bool iss_ReachedEnd;
1482 Datum *iss_OrderByValues;
1483 bool *iss_OrderByNulls;
1484 SortSupport iss_SortSupport;
1485 bool *iss_OrderByTypByVals;
1486 int16 *iss_OrderByTypLens;
1487 Size iss_PscanLen;
1488 } IndexScanState;
1490 /* ----------------
1491 * IndexOnlyScanState information
1493 * recheckqual execution state for recheckqual expressions
1494 * ScanKeys Skey structures for index quals
1495 * NumScanKeys number of ScanKeys
1496 * OrderByKeys Skey structures for index ordering operators
1497 * NumOrderByKeys number of OrderByKeys
1498 * RuntimeKeys info about Skeys that must be evaluated at runtime
1499 * NumRuntimeKeys number of RuntimeKeys
1500 * RuntimeKeysReady true if runtime Skeys have been computed
1501 * RuntimeContext expr context for evaling runtime Skeys
1502 * RelationDesc index relation descriptor
1503 * ScanDesc index scan descriptor
1504 * TableSlot slot for holding tuples fetched from the table
1505 * VMBuffer buffer in use for visibility map testing, if any
1506 * PscanLen size of parallel index-only scan descriptor
1507 * ----------------
1509 typedef struct IndexOnlyScanState
1511 ScanState ss; /* its first field is NodeTag */
1512 ExprState *recheckqual;
1513 struct ScanKeyData *ioss_ScanKeys;
1514 int ioss_NumScanKeys;
1515 struct ScanKeyData *ioss_OrderByKeys;
1516 int ioss_NumOrderByKeys;
1517 IndexRuntimeKeyInfo *ioss_RuntimeKeys;
1518 int ioss_NumRuntimeKeys;
1519 bool ioss_RuntimeKeysReady;
1520 ExprContext *ioss_RuntimeContext;
1521 Relation ioss_RelationDesc;
1522 struct IndexScanDescData *ioss_ScanDesc;
1523 TupleTableSlot *ioss_TableSlot;
1524 Buffer ioss_VMBuffer;
1525 Size ioss_PscanLen;
1526 } IndexOnlyScanState;
1528 /* ----------------
1529 * BitmapIndexScanState information
1531 * result bitmap to return output into, or NULL
1532 * ScanKeys Skey structures for index quals
1533 * NumScanKeys number of ScanKeys
1534 * RuntimeKeys info about Skeys that must be evaluated at runtime
1535 * NumRuntimeKeys number of RuntimeKeys
1536 * ArrayKeys info about Skeys that come from ScalarArrayOpExprs
1537 * NumArrayKeys number of ArrayKeys
1538 * RuntimeKeysReady true if runtime Skeys have been computed
1539 * RuntimeContext expr context for evaling runtime Skeys
1540 * RelationDesc index relation descriptor
1541 * ScanDesc index scan descriptor
1542 * ----------------
1544 typedef struct BitmapIndexScanState
1546 ScanState ss; /* its first field is NodeTag */
1547 TIDBitmap *biss_result;
1548 struct ScanKeyData *biss_ScanKeys;
1549 int biss_NumScanKeys;
1550 IndexRuntimeKeyInfo *biss_RuntimeKeys;
1551 int biss_NumRuntimeKeys;
1552 IndexArrayKeyInfo *biss_ArrayKeys;
1553 int biss_NumArrayKeys;
1554 bool biss_RuntimeKeysReady;
1555 ExprContext *biss_RuntimeContext;
1556 Relation biss_RelationDesc;
1557 struct IndexScanDescData *biss_ScanDesc;
1558 } BitmapIndexScanState;
1560 /* ----------------
1561 * SharedBitmapState information
1563 * BM_INITIAL TIDBitmap creation is not yet started, so first worker
1564 * to see this state will set the state to BM_INPROGRESS
1565 * and that process will be responsible for creating
1566 * TIDBitmap.
1567 * BM_INPROGRESS TIDBitmap creation is in progress; workers need to
1568 * sleep until it's finished.
1569 * BM_FINISHED TIDBitmap creation is done, so now all workers can
1570 * proceed to iterate over TIDBitmap.
1571 * ----------------
1573 typedef enum
1575 BM_INITIAL,
1576 BM_INPROGRESS,
1577 BM_FINISHED
1578 } SharedBitmapState;
1580 /* ----------------
1581 * ParallelBitmapHeapState information
1582 * tbmiterator iterator for scanning current pages
1583 * prefetch_iterator iterator for prefetching ahead of current page
1584 * mutex mutual exclusion for the prefetching variable
1585 * and state
1586 * prefetch_pages # pages prefetch iterator is ahead of current
1587 * prefetch_target current target prefetch distance
1588 * state current state of the TIDBitmap
1589 * cv conditional wait variable
1590 * phs_snapshot_data snapshot data shared to workers
1591 * ----------------
1593 typedef struct ParallelBitmapHeapState
1595 dsa_pointer tbmiterator;
1596 dsa_pointer prefetch_iterator;
1597 slock_t mutex;
1598 int prefetch_pages;
1599 int prefetch_target;
1600 SharedBitmapState state;
1601 ConditionVariable cv;
1602 char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
1603 } ParallelBitmapHeapState;
1605 /* ----------------
1606 * BitmapHeapScanState information
1608 * bitmapqualorig execution state for bitmapqualorig expressions
1609 * tbm bitmap obtained from child index scan(s)
1610 * tbmiterator iterator for scanning current pages
1611 * tbmres current-page data
1612 * can_skip_fetch can we potentially skip tuple fetches in this scan?
1613 * return_empty_tuples number of empty tuples to return
1614 * vmbuffer buffer for visibility-map lookups
1615 * pvmbuffer ditto, for prefetched pages
1616 * exact_pages total number of exact pages retrieved
1617 * lossy_pages total number of lossy pages retrieved
1618 * prefetch_iterator iterator for prefetching ahead of current page
1619 * prefetch_pages # pages prefetch iterator is ahead of current
1620 * prefetch_target current target prefetch distance
1621 * prefetch_maximum maximum value for prefetch_target
1622 * pscan_len size of the shared memory for parallel bitmap
1623 * initialized is node is ready to iterate
1624 * shared_tbmiterator shared iterator
1625 * shared_prefetch_iterator shared iterator for prefetching
1626 * pstate shared state for parallel bitmap scan
1627 * ----------------
1629 typedef struct BitmapHeapScanState
1631 ScanState ss; /* its first field is NodeTag */
1632 ExprState *bitmapqualorig;
1633 TIDBitmap *tbm;
1634 TBMIterator *tbmiterator;
1635 TBMIterateResult *tbmres;
1636 bool can_skip_fetch;
1637 int return_empty_tuples;
1638 Buffer vmbuffer;
1639 Buffer pvmbuffer;
1640 long exact_pages;
1641 long lossy_pages;
1642 TBMIterator *prefetch_iterator;
1643 int prefetch_pages;
1644 int prefetch_target;
1645 int prefetch_maximum;
1646 Size pscan_len;
1647 bool initialized;
1648 TBMSharedIterator *shared_tbmiterator;
1649 TBMSharedIterator *shared_prefetch_iterator;
1650 ParallelBitmapHeapState *pstate;
1651 } BitmapHeapScanState;
1653 /* ----------------
1654 * TidScanState information
1656 * tidexprs list of TidExpr structs (see nodeTidscan.c)
1657 * isCurrentOf scan has a CurrentOfExpr qual
1658 * NumTids number of tids in this scan
1659 * TidPtr index of currently fetched tid
1660 * TidList evaluated item pointers (array of size NumTids)
1661 * htup currently-fetched tuple, if any
1662 * ----------------
1664 typedef struct TidScanState
1666 ScanState ss; /* its first field is NodeTag */
1667 List *tss_tidexprs;
1668 bool tss_isCurrentOf;
1669 int tss_NumTids;
1670 int tss_TidPtr;
1671 ItemPointerData *tss_TidList;
1672 HeapTupleData tss_htup;
1673 } TidScanState;
1675 /* ----------------
1676 * TidRangeScanState information
1678 * trss_tidexprs list of TidOpExpr structs (see nodeTidrangescan.c)
1679 * trss_mintid the lowest TID in the scan range
1680 * trss_maxtid the highest TID in the scan range
1681 * trss_inScan is a scan currently in progress?
1682 * ----------------
1684 typedef struct TidRangeScanState
1686 ScanState ss; /* its first field is NodeTag */
1687 List *trss_tidexprs;
1688 ItemPointerData trss_mintid;
1689 ItemPointerData trss_maxtid;
1690 bool trss_inScan;
1691 } TidRangeScanState;
1693 /* ----------------
1694 * SubqueryScanState information
1696 * SubqueryScanState is used for scanning a sub-query in the range table.
1697 * ScanTupleSlot references the current output tuple of the sub-query.
1698 * ----------------
1700 typedef struct SubqueryScanState
1702 ScanState ss; /* its first field is NodeTag */
1703 PlanState *subplan;
1704 } SubqueryScanState;
1706 /* ----------------
1707 * FunctionScanState information
1709 * Function nodes are used to scan the results of a
1710 * function appearing in FROM (typically a function returning set).
1712 * eflags node's capability flags
1713 * ordinality is this scan WITH ORDINALITY?
1714 * simple true if we have 1 function and no ordinality
1715 * ordinal current ordinal column value
1716 * nfuncs number of functions being executed
1717 * funcstates per-function execution states (private in
1718 * nodeFunctionscan.c)
1719 * argcontext memory context to evaluate function arguments in
1720 * ----------------
1722 struct FunctionScanPerFuncState;
1724 typedef struct FunctionScanState
1726 ScanState ss; /* its first field is NodeTag */
1727 int eflags;
1728 bool ordinality;
1729 bool simple;
1730 int64 ordinal;
1731 int nfuncs;
1732 struct FunctionScanPerFuncState *funcstates; /* array of length nfuncs */
1733 MemoryContext argcontext;
1734 } FunctionScanState;
1736 /* ----------------
1737 * ValuesScanState information
1739 * ValuesScan nodes are used to scan the results of a VALUES list
1741 * rowcontext per-expression-list context
1742 * exprlists array of expression lists being evaluated
1743 * exprstatelists array of expression state lists, for SubPlans only
1744 * array_len size of above arrays
1745 * curr_idx current array index (0-based)
1747 * Note: ss.ps.ps_ExprContext is used to evaluate any qual or projection
1748 * expressions attached to the node. We create a second ExprContext,
1749 * rowcontext, in which to build the executor expression state for each
1750 * Values sublist. Resetting this context lets us get rid of expression
1751 * state for each row, avoiding major memory leakage over a long values list.
1752 * However, that doesn't work for sublists containing SubPlans, because a
1753 * SubPlan has to be connected up to the outer plan tree to work properly.
1754 * Therefore, for only those sublists containing SubPlans, we do expression
1755 * state construction at executor start, and store those pointers in
1756 * exprstatelists[]. NULL entries in that array correspond to simple
1757 * subexpressions that are handled as described above.
1758 * ----------------
1760 typedef struct ValuesScanState
1762 ScanState ss; /* its first field is NodeTag */
1763 ExprContext *rowcontext;
1764 List **exprlists;
1765 List **exprstatelists;
1766 int array_len;
1767 int curr_idx;
1768 } ValuesScanState;
1770 /* ----------------
1771 * TableFuncScanState node
1773 * Used in table-expression functions like XMLTABLE.
1774 * ----------------
1776 typedef struct TableFuncScanState
1778 ScanState ss; /* its first field is NodeTag */
1779 ExprState *docexpr; /* state for document expression */
1780 ExprState *rowexpr; /* state for row-generating expression */
1781 List *colexprs; /* state for column-generating expression */
1782 List *coldefexprs; /* state for column default expressions */
1783 List *ns_names; /* same as TableFunc.ns_names */
1784 List *ns_uris; /* list of states of namespace URI exprs */
1785 Bitmapset *notnulls; /* nullability flag for each output column */
1786 void *opaque; /* table builder private space */
1787 const struct TableFuncRoutine *routine; /* table builder methods */
1788 FmgrInfo *in_functions; /* input function for each column */
1789 Oid *typioparams; /* typioparam for each column */
1790 int64 ordinal; /* row number to be output next */
1791 MemoryContext perTableCxt; /* per-table context */
1792 Tuplestorestate *tupstore; /* output tuple store */
1793 } TableFuncScanState;
1795 /* ----------------
1796 * CteScanState information
1798 * CteScan nodes are used to scan a CommonTableExpr query.
1800 * Multiple CteScan nodes can read out from the same CTE query. We use
1801 * a tuplestore to hold rows that have been read from the CTE query but
1802 * not yet consumed by all readers.
1803 * ----------------
1805 typedef struct CteScanState
1807 ScanState ss; /* its first field is NodeTag */
1808 int eflags; /* capability flags to pass to tuplestore */
1809 int readptr; /* index of my tuplestore read pointer */
1810 PlanState *cteplanstate; /* PlanState for the CTE query itself */
1811 /* Link to the "leader" CteScanState (possibly this same node) */
1812 struct CteScanState *leader;
1813 /* The remaining fields are only valid in the "leader" CteScanState */
1814 Tuplestorestate *cte_table; /* rows already read from the CTE query */
1815 bool eof_cte; /* reached end of CTE query? */
1816 } CteScanState;
1818 /* ----------------
1819 * NamedTuplestoreScanState information
1821 * NamedTuplestoreScan nodes are used to scan a Tuplestore created and
1822 * named prior to execution of the query. An example is a transition
1823 * table for an AFTER trigger.
1825 * Multiple NamedTuplestoreScan nodes can read out from the same Tuplestore.
1826 * ----------------
1828 typedef struct NamedTuplestoreScanState
1830 ScanState ss; /* its first field is NodeTag */
1831 int readptr; /* index of my tuplestore read pointer */
1832 TupleDesc tupdesc; /* format of the tuples in the tuplestore */
1833 Tuplestorestate *relation; /* the rows */
1834 } NamedTuplestoreScanState;
1836 /* ----------------
1837 * WorkTableScanState information
1839 * WorkTableScan nodes are used to scan the work table created by
1840 * a RecursiveUnion node. We locate the RecursiveUnion node
1841 * during executor startup.
1842 * ----------------
1844 typedef struct WorkTableScanState
1846 ScanState ss; /* its first field is NodeTag */
1847 RecursiveUnionState *rustate;
1848 } WorkTableScanState;
1850 /* ----------------
1851 * ForeignScanState information
1853 * ForeignScan nodes are used to scan foreign-data tables.
1854 * ----------------
1856 typedef struct ForeignScanState
1858 ScanState ss; /* its first field is NodeTag */
1859 ExprState *fdw_recheck_quals; /* original quals not in ss.ps.qual */
1860 Size pscan_len; /* size of parallel coordination information */
1861 ResultRelInfo *resultRelInfo; /* result rel info, if UPDATE or DELETE */
1862 /* use struct pointer to avoid including fdwapi.h here */
1863 struct FdwRoutine *fdwroutine;
1864 void *fdw_state; /* foreign-data wrapper can keep state here */
1865 } ForeignScanState;
1867 /* ----------------
1868 * CustomScanState information
1870 * CustomScan nodes are used to execute custom code within executor.
1872 * Core code must avoid assuming that the CustomScanState is only as large as
1873 * the structure declared here; providers are allowed to make it the first
1874 * element in a larger structure, and typically would need to do so. The
1875 * struct is actually allocated by the CreateCustomScanState method associated
1876 * with the plan node. Any additional fields can be initialized there, or in
1877 * the BeginCustomScan method.
1878 * ----------------
1880 struct CustomExecMethods;
1882 typedef struct CustomScanState
1884 ScanState ss;
1885 uint32 flags; /* mask of CUSTOMPATH_* flags, see
1886 * nodes/extensible.h */
1887 List *custom_ps; /* list of child PlanState nodes, if any */
1888 Size pscan_len; /* size of parallel coordination information */
1889 const struct CustomExecMethods *methods;
1890 } CustomScanState;
1892 /* ----------------------------------------------------------------
1893 * Join State Information
1894 * ----------------------------------------------------------------
1897 /* ----------------
1898 * JoinState information
1900 * Superclass for state nodes of join plans.
1901 * ----------------
1903 typedef struct JoinState
1905 PlanState ps;
1906 JoinType jointype;
1907 bool single_match; /* True if we should skip to next outer tuple
1908 * after finding one inner match */
1909 ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */
1910 } JoinState;
1912 /* ----------------
1913 * NestLoopState information
1915 * NeedNewOuter true if need new outer tuple on next call
1916 * MatchedOuter true if found a join match for current outer tuple
1917 * NullInnerTupleSlot prepared null tuple for left outer joins
1918 * ----------------
1920 typedef struct NestLoopState
1922 JoinState js; /* its first field is NodeTag */
1923 bool nl_NeedNewOuter;
1924 bool nl_MatchedOuter;
1925 TupleTableSlot *nl_NullInnerTupleSlot;
1926 } NestLoopState;
1928 /* ----------------
1929 * MergeJoinState information
1931 * NumClauses number of mergejoinable join clauses
1932 * Clauses info for each mergejoinable clause
1933 * JoinState current state of ExecMergeJoin state machine
1934 * SkipMarkRestore true if we may skip Mark and Restore operations
1935 * ExtraMarks true to issue extra Mark operations on inner scan
1936 * ConstFalseJoin true if we have a constant-false joinqual
1937 * FillOuter true if should emit unjoined outer tuples anyway
1938 * FillInner true if should emit unjoined inner tuples anyway
1939 * MatchedOuter true if found a join match for current outer tuple
1940 * MatchedInner true if found a join match for current inner tuple
1941 * OuterTupleSlot slot in tuple table for cur outer tuple
1942 * InnerTupleSlot slot in tuple table for cur inner tuple
1943 * MarkedTupleSlot slot in tuple table for marked tuple
1944 * NullOuterTupleSlot prepared null tuple for right outer joins
1945 * NullInnerTupleSlot prepared null tuple for left outer joins
1946 * OuterEContext workspace for computing outer tuple's join values
1947 * InnerEContext workspace for computing inner tuple's join values
1948 * ----------------
1950 /* private in nodeMergejoin.c: */
1951 typedef struct MergeJoinClauseData *MergeJoinClause;
1953 typedef struct MergeJoinState
1955 JoinState js; /* its first field is NodeTag */
1956 int mj_NumClauses;
1957 MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */
1958 int mj_JoinState;
1959 bool mj_SkipMarkRestore;
1960 bool mj_ExtraMarks;
1961 bool mj_ConstFalseJoin;
1962 bool mj_FillOuter;
1963 bool mj_FillInner;
1964 bool mj_MatchedOuter;
1965 bool mj_MatchedInner;
1966 TupleTableSlot *mj_OuterTupleSlot;
1967 TupleTableSlot *mj_InnerTupleSlot;
1968 TupleTableSlot *mj_MarkedTupleSlot;
1969 TupleTableSlot *mj_NullOuterTupleSlot;
1970 TupleTableSlot *mj_NullInnerTupleSlot;
1971 ExprContext *mj_OuterEContext;
1972 ExprContext *mj_InnerEContext;
1973 } MergeJoinState;
1975 /* ----------------
1976 * HashJoinState information
1978 * hashclauses original form of the hashjoin condition
1979 * hj_OuterHashKeys the outer hash keys in the hashjoin condition
1980 * hj_HashOperators the join operators in the hashjoin condition
1981 * hj_HashTable hash table for the hashjoin
1982 * (NULL if table not built yet)
1983 * hj_CurHashValue hash value for current outer tuple
1984 * hj_CurBucketNo regular bucket# for current outer tuple
1985 * hj_CurSkewBucketNo skew bucket# for current outer tuple
1986 * hj_CurTuple last inner tuple matched to current outer
1987 * tuple, or NULL if starting search
1988 * (hj_CurXXX variables are undefined if
1989 * OuterTupleSlot is empty!)
1990 * hj_OuterTupleSlot tuple slot for outer tuples
1991 * hj_HashTupleSlot tuple slot for inner (hashed) tuples
1992 * hj_NullOuterTupleSlot prepared null tuple for right/full outer joins
1993 * hj_NullInnerTupleSlot prepared null tuple for left/full outer joins
1994 * hj_FirstOuterTupleSlot first tuple retrieved from outer plan
1995 * hj_JoinState current state of ExecHashJoin state machine
1996 * hj_MatchedOuter true if found a join match for current outer
1997 * hj_OuterNotEmpty true if outer relation known not empty
1998 * ----------------
2001 /* these structs are defined in executor/hashjoin.h: */
2002 typedef struct HashJoinTupleData *HashJoinTuple;
2003 typedef struct HashJoinTableData *HashJoinTable;
2005 typedef struct HashJoinState
2007 JoinState js; /* its first field is NodeTag */
2008 ExprState *hashclauses;
2009 List *hj_OuterHashKeys; /* list of ExprState nodes */
2010 List *hj_HashOperators; /* list of operator OIDs */
2011 List *hj_Collations;
2012 HashJoinTable hj_HashTable;
2013 uint32 hj_CurHashValue;
2014 int hj_CurBucketNo;
2015 int hj_CurSkewBucketNo;
2016 HashJoinTuple hj_CurTuple;
2017 TupleTableSlot *hj_OuterTupleSlot;
2018 TupleTableSlot *hj_HashTupleSlot;
2019 TupleTableSlot *hj_NullOuterTupleSlot;
2020 TupleTableSlot *hj_NullInnerTupleSlot;
2021 TupleTableSlot *hj_FirstOuterTupleSlot;
2022 int hj_JoinState;
2023 bool hj_MatchedOuter;
2024 bool hj_OuterNotEmpty;
2025 } HashJoinState;
2028 /* ----------------------------------------------------------------
2029 * Materialization State Information
2030 * ----------------------------------------------------------------
2033 /* ----------------
2034 * MaterialState information
2036 * materialize nodes are used to materialize the results
2037 * of a subplan into a temporary file.
2039 * ss.ss_ScanTupleSlot refers to output of underlying plan.
2040 * ----------------
2042 typedef struct MaterialState
2044 ScanState ss; /* its first field is NodeTag */
2045 int eflags; /* capability flags to pass to tuplestore */
2046 bool eof_underlying; /* reached end of underlying plan? */
2047 Tuplestorestate *tuplestorestate;
2048 } MaterialState;
2050 struct MemoizeEntry;
2051 struct MemoizeTuple;
2052 struct MemoizeKey;
2054 typedef struct MemoizeInstrumentation
2056 uint64 cache_hits; /* number of rescans where we've found the
2057 * scan parameter values to be cached */
2058 uint64 cache_misses; /* number of rescans where we've not found the
2059 * scan parameter values to be cached. */
2060 uint64 cache_evictions; /* number of cache entries removed due to
2061 * the need to free memory */
2062 uint64 cache_overflows; /* number of times we've had to bypass the
2063 * cache when filling it due to not being
2064 * able to free enough space to store the
2065 * current scan's tuples. */
2066 uint64 mem_peak; /* peak memory usage in bytes */
2067 } MemoizeInstrumentation;
2069 /* ----------------
2070 * Shared memory container for per-worker memoize information
2071 * ----------------
2073 typedef struct SharedMemoizeInfo
2075 int num_workers;
2076 MemoizeInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
2077 } SharedMemoizeInfo;
2079 /* ----------------
2080 * MemoizeState information
2082 * memoize nodes are used to cache recent and commonly seen results from
2083 * a parameterized scan.
2084 * ----------------
2086 typedef struct MemoizeState
2088 ScanState ss; /* its first field is NodeTag */
2089 int mstatus; /* value of ExecMemoize state machine */
2090 int nkeys; /* number of cache keys */
2091 struct memoize_hash *hashtable; /* hash table for cache entries */
2092 TupleDesc hashkeydesc; /* tuple descriptor for cache keys */
2093 TupleTableSlot *tableslot; /* min tuple slot for existing cache entries */
2094 TupleTableSlot *probeslot; /* virtual slot used for hash lookups */
2095 ExprState *cache_eq_expr; /* Compare exec params to hash key */
2096 ExprState **param_exprs; /* exprs containing the parameters to this
2097 * node */
2098 FmgrInfo *hashfunctions; /* lookup data for hash funcs nkeys in size */
2099 Oid *collations; /* collation for comparisons nkeys in size */
2100 uint64 mem_used; /* bytes of memory used by cache */
2101 uint64 mem_limit; /* memory limit in bytes for the cache */
2102 MemoryContext tableContext; /* memory context to store cache data */
2103 dlist_head lru_list; /* least recently used entry list */
2104 struct MemoizeTuple *last_tuple; /* Used to point to the last tuple
2105 * returned during a cache hit and the
2106 * tuple we last stored when
2107 * populating the cache. */
2108 struct MemoizeEntry *entry; /* the entry that 'last_tuple' belongs to or
2109 * NULL if 'last_tuple' is NULL. */
2110 bool singlerow; /* true if the cache entry is to be marked as
2111 * complete after caching the first tuple. */
2112 bool binary_mode; /* true when cache key should be compared bit
2113 * by bit, false when using hash equality ops */
2114 MemoizeInstrumentation stats; /* execution statistics */
2115 SharedMemoizeInfo *shared_info; /* statistics for parallel workers */
2116 Bitmapset *keyparamids; /* Param->paramids of expressions belonging to
2117 * param_exprs */
2118 } MemoizeState;
2120 /* ----------------
2121 * When performing sorting by multiple keys, it's possible that the input
2122 * dataset is already sorted on a prefix of those keys. We call these
2123 * "presorted keys".
2124 * PresortedKeyData represents information about one such key.
2125 * ----------------
2127 typedef struct PresortedKeyData
2129 FmgrInfo flinfo; /* comparison function info */
2130 FunctionCallInfo fcinfo; /* comparison function call info */
2131 OffsetNumber attno; /* attribute number in tuple */
2132 } PresortedKeyData;
2134 /* ----------------
2135 * Shared memory container for per-worker sort information
2136 * ----------------
2138 typedef struct SharedSortInfo
2140 int num_workers;
2141 TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
2142 } SharedSortInfo;
2144 /* ----------------
2145 * SortState information
2146 * ----------------
2148 typedef struct SortState
2150 ScanState ss; /* its first field is NodeTag */
2151 bool randomAccess; /* need random access to sort output? */
2152 bool bounded; /* is the result set bounded? */
2153 int64 bound; /* if bounded, how many tuples are needed */
2154 bool sort_Done; /* sort completed yet? */
2155 bool bounded_Done; /* value of bounded we did the sort with */
2156 int64 bound_Done; /* value of bound we did the sort with */
2157 void *tuplesortstate; /* private state of tuplesort.c */
2158 bool am_worker; /* are we a worker? */
2159 bool datumSort; /* Datum sort instead of tuple sort? */
2160 SharedSortInfo *shared_info; /* one entry per worker */
2161 } SortState;
2163 /* ----------------
2164 * Instrumentation information for IncrementalSort
2165 * ----------------
2167 typedef struct IncrementalSortGroupInfo
2169 int64 groupCount;
2170 int64 maxDiskSpaceUsed;
2171 int64 totalDiskSpaceUsed;
2172 int64 maxMemorySpaceUsed;
2173 int64 totalMemorySpaceUsed;
2174 bits32 sortMethods; /* bitmask of TuplesortMethod */
2175 } IncrementalSortGroupInfo;
2177 typedef struct IncrementalSortInfo
2179 IncrementalSortGroupInfo fullsortGroupInfo;
2180 IncrementalSortGroupInfo prefixsortGroupInfo;
2181 } IncrementalSortInfo;
2183 /* ----------------
2184 * Shared memory container for per-worker incremental sort information
2185 * ----------------
2187 typedef struct SharedIncrementalSortInfo
2189 int num_workers;
2190 IncrementalSortInfo sinfo[FLEXIBLE_ARRAY_MEMBER];
2191 } SharedIncrementalSortInfo;
2193 /* ----------------
2194 * IncrementalSortState information
2195 * ----------------
2197 typedef enum
2199 INCSORT_LOADFULLSORT,
2200 INCSORT_LOADPREFIXSORT,
2201 INCSORT_READFULLSORT,
2202 INCSORT_READPREFIXSORT,
2203 } IncrementalSortExecutionStatus;
2205 typedef struct IncrementalSortState
2207 ScanState ss; /* its first field is NodeTag */
2208 bool bounded; /* is the result set bounded? */
2209 int64 bound; /* if bounded, how many tuples are needed */
2210 bool outerNodeDone; /* finished fetching tuples from outer node */
2211 int64 bound_Done; /* value of bound we did the sort with */
2212 IncrementalSortExecutionStatus execution_status;
2213 int64 n_fullsort_remaining;
2214 Tuplesortstate *fullsort_state; /* private state of tuplesort.c */
2215 Tuplesortstate *prefixsort_state; /* private state of tuplesort.c */
2216 /* the keys by which the input path is already sorted */
2217 PresortedKeyData *presorted_keys;
2219 IncrementalSortInfo incsort_info;
2221 /* slot for pivot tuple defining values of presorted keys within group */
2222 TupleTableSlot *group_pivot;
2223 TupleTableSlot *transfer_tuple;
2224 bool am_worker; /* are we a worker? */
2225 SharedIncrementalSortInfo *shared_info; /* one entry per worker */
2226 } IncrementalSortState;
2228 /* ---------------------
2229 * GroupState information
2230 * ---------------------
2232 typedef struct GroupState
2234 ScanState ss; /* its first field is NodeTag */
2235 ExprState *eqfunction; /* equality function */
2236 bool grp_done; /* indicates completion of Group scan */
2237 } GroupState;
2239 /* ---------------------
2240 * per-worker aggregate information
2241 * ---------------------
2243 typedef struct AggregateInstrumentation
2245 Size hash_mem_peak; /* peak hash table memory usage */
2246 uint64 hash_disk_used; /* kB of disk space used */
2247 int hash_batches_used; /* batches used during entire execution */
2248 } AggregateInstrumentation;
2250 /* ----------------
2251 * Shared memory container for per-worker aggregate information
2252 * ----------------
2254 typedef struct SharedAggInfo
2256 int num_workers;
2257 AggregateInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
2258 } SharedAggInfo;
2260 /* ---------------------
2261 * AggState information
2263 * ss.ss_ScanTupleSlot refers to output of underlying plan.
2265 * Note: ss.ps.ps_ExprContext contains ecxt_aggvalues and
2266 * ecxt_aggnulls arrays, which hold the computed agg values for the current
2267 * input group during evaluation of an Agg node's output tuple(s). We
2268 * create a second ExprContext, tmpcontext, in which to evaluate input
2269 * expressions and run the aggregate transition functions.
2270 * ---------------------
2272 /* these structs are private in nodeAgg.c: */
2273 typedef struct AggStatePerAggData *AggStatePerAgg;
2274 typedef struct AggStatePerTransData *AggStatePerTrans;
2275 typedef struct AggStatePerGroupData *AggStatePerGroup;
2276 typedef struct AggStatePerPhaseData *AggStatePerPhase;
2277 typedef struct AggStatePerHashData *AggStatePerHash;
2279 typedef struct AggState
2281 ScanState ss; /* its first field is NodeTag */
2282 List *aggs; /* all Aggref nodes in targetlist & quals */
2283 int numaggs; /* length of list (could be zero!) */
2284 int numtrans; /* number of pertrans items */
2285 AggStrategy aggstrategy; /* strategy mode */
2286 AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
2287 AggStatePerPhase phase; /* pointer to current phase data */
2288 int numphases; /* number of phases (including phase 0) */
2289 int current_phase; /* current phase number */
2290 AggStatePerAgg peragg; /* per-Aggref information */
2291 AggStatePerTrans pertrans; /* per-Trans state information */
2292 ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
2293 ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
2294 ExprContext *tmpcontext; /* econtext for input expressions */
2295 #define FIELDNO_AGGSTATE_CURAGGCONTEXT 14
2296 ExprContext *curaggcontext; /* currently active aggcontext */
2297 AggStatePerAgg curperagg; /* currently active aggregate, if any */
2298 #define FIELDNO_AGGSTATE_CURPERTRANS 16
2299 AggStatePerTrans curpertrans; /* currently active trans state, if any */
2300 bool input_done; /* indicates end of input */
2301 bool agg_done; /* indicates completion of Agg scan */
2302 int projected_set; /* The last projected grouping set */
2303 #define FIELDNO_AGGSTATE_CURRENT_SET 20
2304 int current_set; /* The current grouping set being evaluated */
2305 Bitmapset *grouped_cols; /* grouped cols in current projection */
2306 List *all_grouped_cols; /* list of all grouped cols in DESC order */
2307 Bitmapset *colnos_needed; /* all columns needed from the outer plan */
2308 int max_colno_needed; /* highest colno needed from outer plan */
2309 bool all_cols_needed; /* are all cols from outer plan needed? */
2310 /* These fields are for grouping set phase data */
2311 int maxsets; /* The max number of sets in any phase */
2312 AggStatePerPhase phases; /* array of all phases */
2313 Tuplesortstate *sort_in; /* sorted input to phases > 1 */
2314 Tuplesortstate *sort_out; /* input is copied here for next phase */
2315 TupleTableSlot *sort_slot; /* slot for sort results */
2316 /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
2317 AggStatePerGroup *pergroups; /* grouping set indexed array of per-group
2318 * pointers */
2319 HeapTuple grp_firstTuple; /* copy of first tuple of current group */
2320 /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
2321 bool table_filled; /* hash table filled yet? */
2322 int num_hashes;
2323 MemoryContext hash_metacxt; /* memory for hash table itself */
2324 struct LogicalTapeSet *hash_tapeset; /* tape set for hash spill tapes */
2325 struct HashAggSpill *hash_spills; /* HashAggSpill for each grouping set,
2326 * exists only during first pass */
2327 TupleTableSlot *hash_spill_rslot; /* for reading spill files */
2328 TupleTableSlot *hash_spill_wslot; /* for writing spill files */
2329 List *hash_batches; /* hash batches remaining to be processed */
2330 bool hash_ever_spilled; /* ever spilled during this execution? */
2331 bool hash_spill_mode; /* we hit a limit during the current batch
2332 * and we must not create new groups */
2333 Size hash_mem_limit; /* limit before spilling hash table */
2334 uint64 hash_ngroups_limit; /* limit before spilling hash table */
2335 int hash_planned_partitions; /* number of partitions planned
2336 * for first pass */
2337 double hashentrysize; /* estimate revised during execution */
2338 Size hash_mem_peak; /* peak hash table memory usage */
2339 uint64 hash_ngroups_current; /* number of groups currently in
2340 * memory in all hash tables */
2341 uint64 hash_disk_used; /* kB of disk space used */
2342 int hash_batches_used; /* batches used during entire execution */
2344 AggStatePerHash perhash; /* array of per-hashtable data */
2345 AggStatePerGroup *hash_pergroup; /* grouping set indexed array of
2346 * per-group pointers */
2348 /* support for evaluation of agg input expressions: */
2349 #define FIELDNO_AGGSTATE_ALL_PERGROUPS 53
2350 AggStatePerGroup *all_pergroups; /* array of first ->pergroups, than
2351 * ->hash_pergroup */
2352 ProjectionInfo *combinedproj; /* projection machinery */
2353 SharedAggInfo *shared_info; /* one entry per worker */
2354 } AggState;
2356 /* ----------------
2357 * WindowAggState information
2358 * ----------------
2360 /* these structs are private in nodeWindowAgg.c: */
2361 typedef struct WindowStatePerFuncData *WindowStatePerFunc;
2362 typedef struct WindowStatePerAggData *WindowStatePerAgg;
2364 typedef struct WindowAggState
2366 ScanState ss; /* its first field is NodeTag */
2368 /* these fields are filled in by ExecInitExpr: */
2369 List *funcs; /* all WindowFunc nodes in targetlist */
2370 int numfuncs; /* total number of window functions */
2371 int numaggs; /* number that are plain aggregates */
2373 WindowStatePerFunc perfunc; /* per-window-function information */
2374 WindowStatePerAgg peragg; /* per-plain-aggregate information */
2375 ExprState *partEqfunction; /* equality funcs for partition columns */
2376 ExprState *ordEqfunction; /* equality funcs for ordering columns */
2377 Tuplestorestate *buffer; /* stores rows of current partition */
2378 int current_ptr; /* read pointer # for current row */
2379 int framehead_ptr; /* read pointer # for frame head, if used */
2380 int frametail_ptr; /* read pointer # for frame tail, if used */
2381 int grouptail_ptr; /* read pointer # for group tail, if used */
2382 int64 spooled_rows; /* total # of rows in buffer */
2383 int64 currentpos; /* position of current row in partition */
2384 int64 frameheadpos; /* current frame head position */
2385 int64 frametailpos; /* current frame tail position (frame end+1) */
2386 /* use struct pointer to avoid including windowapi.h here */
2387 struct WindowObjectData *agg_winobj; /* winobj for aggregate fetches */
2388 int64 aggregatedbase; /* start row for current aggregates */
2389 int64 aggregatedupto; /* rows before this one are aggregated */
2391 int frameOptions; /* frame_clause options, see WindowDef */
2392 ExprState *startOffset; /* expression for starting bound offset */
2393 ExprState *endOffset; /* expression for ending bound offset */
2394 Datum startOffsetValue; /* result of startOffset evaluation */
2395 Datum endOffsetValue; /* result of endOffset evaluation */
2397 /* these fields are used with RANGE offset PRECEDING/FOLLOWING: */
2398 FmgrInfo startInRangeFunc; /* in_range function for startOffset */
2399 FmgrInfo endInRangeFunc; /* in_range function for endOffset */
2400 Oid inRangeColl; /* collation for in_range tests */
2401 bool inRangeAsc; /* use ASC sort order for in_range tests? */
2402 bool inRangeNullsFirst; /* nulls sort first for in_range tests? */
2404 /* these fields are used in GROUPS mode: */
2405 int64 currentgroup; /* peer group # of current row in partition */
2406 int64 frameheadgroup; /* peer group # of frame head row */
2407 int64 frametailgroup; /* peer group # of frame tail row */
2408 int64 groupheadpos; /* current row's peer group head position */
2409 int64 grouptailpos; /* " " " " tail position (group end+1) */
2411 MemoryContext partcontext; /* context for partition-lifespan data */
2412 MemoryContext aggcontext; /* shared context for aggregate working data */
2413 MemoryContext curaggcontext; /* current aggregate's working data */
2414 ExprContext *tmpcontext; /* short-term evaluation context */
2416 bool all_first; /* true if the scan is starting */
2417 bool all_done; /* true if the scan is finished */
2418 bool partition_spooled; /* true if all tuples in current partition
2419 * have been spooled into tuplestore */
2420 bool more_partitions; /* true if there's more partitions after
2421 * this one */
2422 bool framehead_valid; /* true if frameheadpos is known up to
2423 * date for current row */
2424 bool frametail_valid; /* true if frametailpos is known up to
2425 * date for current row */
2426 bool grouptail_valid; /* true if grouptailpos is known up to
2427 * date for current row */
2429 TupleTableSlot *first_part_slot; /* first tuple of current or next
2430 * partition */
2431 TupleTableSlot *framehead_slot; /* first tuple of current frame */
2432 TupleTableSlot *frametail_slot; /* first tuple after current frame */
2434 /* temporary slots for tuples fetched back from tuplestore */
2435 TupleTableSlot *agg_row_slot;
2436 TupleTableSlot *temp_slot_1;
2437 TupleTableSlot *temp_slot_2;
2438 } WindowAggState;
2440 /* ----------------
2441 * UniqueState information
2443 * Unique nodes are used "on top of" sort nodes to discard
2444 * duplicate tuples returned from the sort phase. Basically
2445 * all it does is compare the current tuple from the subplan
2446 * with the previously fetched tuple (stored in its result slot).
2447 * If the two are identical in all interesting fields, then
2448 * we just fetch another tuple from the sort and try again.
2449 * ----------------
2451 typedef struct UniqueState
2453 PlanState ps; /* its first field is NodeTag */
2454 ExprState *eqfunction; /* tuple equality qual */
2455 } UniqueState;
2457 /* ----------------
2458 * GatherState information
2460 * Gather nodes launch 1 or more parallel workers, run a subplan
2461 * in those workers, and collect the results.
2462 * ----------------
2464 typedef struct GatherState
2466 PlanState ps; /* its first field is NodeTag */
2467 bool initialized; /* workers launched? */
2468 bool need_to_scan_locally; /* need to read from local plan? */
2469 int64 tuples_needed; /* tuple bound, see ExecSetTupleBound */
2470 /* these fields are set up once: */
2471 TupleTableSlot *funnel_slot;
2472 struct ParallelExecutorInfo *pei;
2473 /* all remaining fields are reinitialized during a rescan: */
2474 int nworkers_launched; /* original number of workers */
2475 int nreaders; /* number of still-active workers */
2476 int nextreader; /* next one to try to read from */
2477 struct TupleQueueReader **reader; /* array with nreaders active entries */
2478 } GatherState;
2480 /* ----------------
2481 * GatherMergeState information
2483 * Gather merge nodes launch 1 or more parallel workers, run a
2484 * subplan which produces sorted output in each worker, and then
2485 * merge the results into a single sorted stream.
2486 * ----------------
2488 struct GMReaderTupleBuffer; /* private in nodeGatherMerge.c */
2490 typedef struct GatherMergeState
2492 PlanState ps; /* its first field is NodeTag */
2493 bool initialized; /* workers launched? */
2494 bool gm_initialized; /* gather_merge_init() done? */
2495 bool need_to_scan_locally; /* need to read from local plan? */
2496 int64 tuples_needed; /* tuple bound, see ExecSetTupleBound */
2497 /* these fields are set up once: */
2498 TupleDesc tupDesc; /* descriptor for subplan result tuples */
2499 int gm_nkeys; /* number of sort columns */
2500 SortSupport gm_sortkeys; /* array of length gm_nkeys */
2501 struct ParallelExecutorInfo *pei;
2502 /* all remaining fields are reinitialized during a rescan */
2503 /* (but the arrays are not reallocated, just cleared) */
2504 int nworkers_launched; /* original number of workers */
2505 int nreaders; /* number of active workers */
2506 TupleTableSlot **gm_slots; /* array with nreaders+1 entries */
2507 struct TupleQueueReader **reader; /* array with nreaders active entries */
2508 struct GMReaderTupleBuffer *gm_tuple_buffers; /* nreaders tuple buffers */
2509 struct binaryheap *gm_heap; /* binary heap of slot indices */
2510 } GatherMergeState;
2512 /* ----------------
2513 * Values displayed by EXPLAIN ANALYZE
2514 * ----------------
2516 typedef struct HashInstrumentation
2518 int nbuckets; /* number of buckets at end of execution */
2519 int nbuckets_original; /* planned number of buckets */
2520 int nbatch; /* number of batches at end of execution */
2521 int nbatch_original; /* planned number of batches */
2522 Size space_peak; /* peak memory usage in bytes */
2523 } HashInstrumentation;
2525 /* ----------------
2526 * Shared memory container for per-worker hash information
2527 * ----------------
2529 typedef struct SharedHashInfo
2531 int num_workers;
2532 HashInstrumentation hinstrument[FLEXIBLE_ARRAY_MEMBER];
2533 } SharedHashInfo;
2535 /* ----------------
2536 * HashState information
2537 * ----------------
2539 typedef struct HashState
2541 PlanState ps; /* its first field is NodeTag */
2542 HashJoinTable hashtable; /* hash table for the hashjoin */
2543 List *hashkeys; /* list of ExprState nodes */
2546 * In a parallelized hash join, the leader retains a pointer to the
2547 * shared-memory stats area in its shared_info field, and then copies the
2548 * shared-memory info back to local storage before DSM shutdown. The
2549 * shared_info field remains NULL in workers, or in non-parallel joins.
2551 SharedHashInfo *shared_info;
2554 * If we are collecting hash stats, this points to an initially-zeroed
2555 * collection area, which could be either local storage or in shared
2556 * memory; either way it's for just one process.
2558 HashInstrumentation *hinstrument;
2560 /* Parallel hash state. */
2561 struct ParallelHashJoinState *parallel_state;
2562 } HashState;
2564 /* ----------------
2565 * SetOpState information
2567 * Even in "sorted" mode, SetOp nodes are more complex than a simple
2568 * Unique, since we have to count how many duplicates to return. But
2569 * we also support hashing, so this is really more like a cut-down
2570 * form of Agg.
2571 * ----------------
2573 /* this struct is private in nodeSetOp.c: */
2574 typedef struct SetOpStatePerGroupData *SetOpStatePerGroup;
2576 typedef struct SetOpState
2578 PlanState ps; /* its first field is NodeTag */
2579 ExprState *eqfunction; /* equality comparator */
2580 Oid *eqfuncoids; /* per-grouping-field equality fns */
2581 FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
2582 bool setop_done; /* indicates completion of output scan */
2583 long numOutput; /* number of dups left to output */
2584 /* these fields are used in SETOP_SORTED mode: */
2585 SetOpStatePerGroup pergroup; /* per-group working state */
2586 HeapTuple grp_firstTuple; /* copy of first tuple of current group */
2587 /* these fields are used in SETOP_HASHED mode: */
2588 TupleHashTable hashtable; /* hash table with one entry per group */
2589 MemoryContext tableContext; /* memory context containing hash table */
2590 bool table_filled; /* hash table filled yet? */
2591 TupleHashIterator hashiter; /* for iterating through hash table */
2592 } SetOpState;
2594 /* ----------------
2595 * LockRowsState information
2597 * LockRows nodes are used to enforce FOR [KEY] UPDATE/SHARE locking.
2598 * ----------------
2600 typedef struct LockRowsState
2602 PlanState ps; /* its first field is NodeTag */
2603 List *lr_arowMarks; /* List of ExecAuxRowMarks */
2604 EPQState lr_epqstate; /* for evaluating EvalPlanQual rechecks */
2605 } LockRowsState;
2607 /* ----------------
2608 * LimitState information
2610 * Limit nodes are used to enforce LIMIT/OFFSET clauses.
2611 * They just select the desired subrange of their subplan's output.
2613 * offset is the number of initial tuples to skip (0 does nothing).
2614 * count is the number of tuples to return after skipping the offset tuples.
2615 * If no limit count was specified, count is undefined and noCount is true.
2616 * When lstate == LIMIT_INITIAL, offset/count/noCount haven't been set yet.
2617 * ----------------
2619 typedef enum
2621 LIMIT_INITIAL, /* initial state for LIMIT node */
2622 LIMIT_RESCAN, /* rescan after recomputing parameters */
2623 LIMIT_EMPTY, /* there are no returnable rows */
2624 LIMIT_INWINDOW, /* have returned a row in the window */
2625 LIMIT_WINDOWEND_TIES, /* have returned a tied row */
2626 LIMIT_SUBPLANEOF, /* at EOF of subplan (within window) */
2627 LIMIT_WINDOWEND, /* stepped off end of window */
2628 LIMIT_WINDOWSTART /* stepped off beginning of window */
2629 } LimitStateCond;
2631 typedef struct LimitState
2633 PlanState ps; /* its first field is NodeTag */
2634 ExprState *limitOffset; /* OFFSET parameter, or NULL if none */
2635 ExprState *limitCount; /* COUNT parameter, or NULL if none */
2636 LimitOption limitOption; /* limit specification type */
2637 int64 offset; /* current OFFSET value */
2638 int64 count; /* current COUNT, if any */
2639 bool noCount; /* if true, ignore count */
2640 LimitStateCond lstate; /* state machine status, as above */
2641 int64 position; /* 1-based index of last tuple returned */
2642 TupleTableSlot *subSlot; /* tuple last obtained from subplan */
2643 ExprState *eqfunction; /* tuple equality qual in case of WITH TIES
2644 * option */
2645 TupleTableSlot *last_slot; /* slot for evaluation of ties */
2646 } LimitState;
2648 #endif /* EXECNODES_H */