Optimize creation of slots for FDW bulk inserts
[pgsql.git] / src / backend / executor / nodeModifyTable.c
blob88c479c6da33a07ec57f0d2627e304fbf8a960e8
1 /*-------------------------------------------------------------------------
3 * nodeModifyTable.c
4 * routines to handle ModifyTable nodes.
6 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * src/backend/executor/nodeModifyTable.c
13 *-------------------------------------------------------------------------
15 /* INTERFACE ROUTINES
16 * ExecInitModifyTable - initialize the ModifyTable node
17 * ExecModifyTable - retrieve the next tuple from the node
18 * ExecEndModifyTable - shut down the ModifyTable node
19 * ExecReScanModifyTable - rescan the ModifyTable node
21 * NOTES
22 * The ModifyTable node receives input from its outerPlan, which is
23 * the data to insert for INSERT cases, or the changed columns' new
24 * values plus row-locating info for UPDATE cases, or just the
25 * row-locating info for DELETE cases.
27 * If the query specifies RETURNING, then the ModifyTable returns a
28 * RETURNING tuple after completing each row insert, update, or delete.
29 * It must be called again to continue the operation. Without RETURNING,
30 * we just loop within the node until all the work is done, then
31 * return NULL. This avoids useless call/return overhead.
34 #include "postgres.h"
36 #include "access/heapam.h"
37 #include "access/htup_details.h"
38 #include "access/tableam.h"
39 #include "access/xact.h"
40 #include "catalog/catalog.h"
41 #include "commands/trigger.h"
42 #include "executor/execPartition.h"
43 #include "executor/executor.h"
44 #include "executor/nodeModifyTable.h"
45 #include "foreign/fdwapi.h"
46 #include "miscadmin.h"
47 #include "nodes/nodeFuncs.h"
48 #include "rewrite/rewriteHandler.h"
49 #include "storage/bufmgr.h"
50 #include "storage/lmgr.h"
51 #include "utils/builtins.h"
52 #include "utils/datum.h"
53 #include "utils/memutils.h"
54 #include "utils/rel.h"
57 typedef struct MTTargetRelLookup
59 Oid relationOid; /* hash key, must be first */
60 int relationIndex; /* rel's index in resultRelInfo[] array */
61 } MTTargetRelLookup;
63 static void ExecBatchInsert(ModifyTableState *mtstate,
64 ResultRelInfo *resultRelInfo,
65 TupleTableSlot **slots,
66 TupleTableSlot **planSlots,
67 int numSlots,
68 EState *estate,
69 bool canSetTag);
70 static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
71 ResultRelInfo *resultRelInfo,
72 ItemPointer conflictTid,
73 TupleTableSlot *planSlot,
74 TupleTableSlot *excludedSlot,
75 EState *estate,
76 bool canSetTag,
77 TupleTableSlot **returning);
78 static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
79 EState *estate,
80 PartitionTupleRouting *proute,
81 ResultRelInfo *targetRelInfo,
82 TupleTableSlot *slot,
83 ResultRelInfo **partRelInfo);
86 * Verify that the tuples to be produced by INSERT match the
87 * target relation's rowtype
89 * We do this to guard against stale plans. If plan invalidation is
90 * functioning properly then we should never get a failure here, but better
91 * safe than sorry. Note that this is called after we have obtained lock
92 * on the target rel, so the rowtype can't change underneath us.
94 * The plan output is represented by its targetlist, because that makes
95 * handling the dropped-column case easier.
97 * We used to use this for UPDATE as well, but now the equivalent checks
98 * are done in ExecBuildUpdateProjection.
100 static void
101 ExecCheckPlanOutput(Relation resultRel, List *targetList)
103 TupleDesc resultDesc = RelationGetDescr(resultRel);
104 int attno = 0;
105 ListCell *lc;
107 foreach(lc, targetList)
109 TargetEntry *tle = (TargetEntry *) lfirst(lc);
110 Form_pg_attribute attr;
112 Assert(!tle->resjunk); /* caller removed junk items already */
114 if (attno >= resultDesc->natts)
115 ereport(ERROR,
116 (errcode(ERRCODE_DATATYPE_MISMATCH),
117 errmsg("table row type and query-specified row type do not match"),
118 errdetail("Query has too many columns.")));
119 attr = TupleDescAttr(resultDesc, attno);
120 attno++;
122 if (!attr->attisdropped)
124 /* Normal case: demand type match */
125 if (exprType((Node *) tle->expr) != attr->atttypid)
126 ereport(ERROR,
127 (errcode(ERRCODE_DATATYPE_MISMATCH),
128 errmsg("table row type and query-specified row type do not match"),
129 errdetail("Table has type %s at ordinal position %d, but query expects %s.",
130 format_type_be(attr->atttypid),
131 attno,
132 format_type_be(exprType((Node *) tle->expr)))));
134 else
137 * For a dropped column, we can't check atttypid (it's likely 0).
138 * In any case the planner has most likely inserted an INT4 null.
139 * What we insist on is just *some* NULL constant.
141 if (!IsA(tle->expr, Const) ||
142 !((Const *) tle->expr)->constisnull)
143 ereport(ERROR,
144 (errcode(ERRCODE_DATATYPE_MISMATCH),
145 errmsg("table row type and query-specified row type do not match"),
146 errdetail("Query provides a value for a dropped column at ordinal position %d.",
147 attno)));
150 if (attno != resultDesc->natts)
151 ereport(ERROR,
152 (errcode(ERRCODE_DATATYPE_MISMATCH),
153 errmsg("table row type and query-specified row type do not match"),
154 errdetail("Query has too few columns.")));
158 * ExecProcessReturning --- evaluate a RETURNING list
160 * resultRelInfo: current result rel
161 * tupleSlot: slot holding tuple actually inserted/updated/deleted
162 * planSlot: slot holding tuple returned by top subplan node
164 * Note: If tupleSlot is NULL, the FDW should have already provided econtext's
165 * scan tuple.
167 * Returns a slot holding the result tuple
169 static TupleTableSlot *
170 ExecProcessReturning(ResultRelInfo *resultRelInfo,
171 TupleTableSlot *tupleSlot,
172 TupleTableSlot *planSlot)
174 ProjectionInfo *projectReturning = resultRelInfo->ri_projectReturning;
175 ExprContext *econtext = projectReturning->pi_exprContext;
177 /* Make tuple and any needed join variables available to ExecProject */
178 if (tupleSlot)
179 econtext->ecxt_scantuple = tupleSlot;
180 econtext->ecxt_outertuple = planSlot;
183 * RETURNING expressions might reference the tableoid column, so
184 * reinitialize tts_tableOid before evaluating them.
186 econtext->ecxt_scantuple->tts_tableOid =
187 RelationGetRelid(resultRelInfo->ri_RelationDesc);
189 /* Compute the RETURNING expressions */
190 return ExecProject(projectReturning);
194 * ExecCheckTupleVisible -- verify tuple is visible
196 * It would not be consistent with guarantees of the higher isolation levels to
197 * proceed with avoiding insertion (taking speculative insertion's alternative
198 * path) on the basis of another tuple that is not visible to MVCC snapshot.
199 * Check for the need to raise a serialization failure, and do so as necessary.
201 static void
202 ExecCheckTupleVisible(EState *estate,
203 Relation rel,
204 TupleTableSlot *slot)
206 if (!IsolationUsesXactSnapshot())
207 return;
209 if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
211 Datum xminDatum;
212 TransactionId xmin;
213 bool isnull;
215 xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
216 Assert(!isnull);
217 xmin = DatumGetTransactionId(xminDatum);
220 * We should not raise a serialization failure if the conflict is
221 * against a tuple inserted by our own transaction, even if it's not
222 * visible to our snapshot. (This would happen, for example, if
223 * conflicting keys are proposed for insertion in a single command.)
225 if (!TransactionIdIsCurrentTransactionId(xmin))
226 ereport(ERROR,
227 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
228 errmsg("could not serialize access due to concurrent update")));
233 * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
235 static void
236 ExecCheckTIDVisible(EState *estate,
237 ResultRelInfo *relinfo,
238 ItemPointer tid,
239 TupleTableSlot *tempSlot)
241 Relation rel = relinfo->ri_RelationDesc;
243 /* Redundantly check isolation level */
244 if (!IsolationUsesXactSnapshot())
245 return;
247 if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
248 elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
249 ExecCheckTupleVisible(estate, rel, tempSlot);
250 ExecClearTuple(tempSlot);
254 * Compute stored generated columns for a tuple
256 void
257 ExecComputeStoredGenerated(ResultRelInfo *resultRelInfo,
258 EState *estate, TupleTableSlot *slot,
259 CmdType cmdtype)
261 Relation rel = resultRelInfo->ri_RelationDesc;
262 TupleDesc tupdesc = RelationGetDescr(rel);
263 int natts = tupdesc->natts;
264 MemoryContext oldContext;
265 Datum *values;
266 bool *nulls;
268 Assert(tupdesc->constr && tupdesc->constr->has_generated_stored);
271 * If first time through for this result relation, build expression
272 * nodetrees for rel's stored generation expressions. Keep them in the
273 * per-query memory context so they'll survive throughout the query.
275 if (resultRelInfo->ri_GeneratedExprs == NULL)
277 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
279 resultRelInfo->ri_GeneratedExprs =
280 (ExprState **) palloc(natts * sizeof(ExprState *));
281 resultRelInfo->ri_NumGeneratedNeeded = 0;
283 for (int i = 0; i < natts; i++)
285 if (TupleDescAttr(tupdesc, i)->attgenerated == ATTRIBUTE_GENERATED_STORED)
287 Expr *expr;
290 * If it's an update and the current column was not marked as
291 * being updated, then we can skip the computation. But if
292 * there is a BEFORE ROW UPDATE trigger, we cannot skip
293 * because the trigger might affect additional columns.
295 if (cmdtype == CMD_UPDATE &&
296 !(rel->trigdesc && rel->trigdesc->trig_update_before_row) &&
297 !bms_is_member(i + 1 - FirstLowInvalidHeapAttributeNumber,
298 ExecGetExtraUpdatedCols(resultRelInfo, estate)))
300 resultRelInfo->ri_GeneratedExprs[i] = NULL;
301 continue;
304 expr = (Expr *) build_column_default(rel, i + 1);
305 if (expr == NULL)
306 elog(ERROR, "no generation expression found for column number %d of table \"%s\"",
307 i + 1, RelationGetRelationName(rel));
309 resultRelInfo->ri_GeneratedExprs[i] = ExecPrepareExpr(expr, estate);
310 resultRelInfo->ri_NumGeneratedNeeded++;
314 MemoryContextSwitchTo(oldContext);
318 * If no generated columns have been affected by this change, then skip
319 * the rest.
321 if (resultRelInfo->ri_NumGeneratedNeeded == 0)
322 return;
324 oldContext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
326 values = palloc(sizeof(*values) * natts);
327 nulls = palloc(sizeof(*nulls) * natts);
329 slot_getallattrs(slot);
330 memcpy(nulls, slot->tts_isnull, sizeof(*nulls) * natts);
332 for (int i = 0; i < natts; i++)
334 Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
336 if (attr->attgenerated == ATTRIBUTE_GENERATED_STORED &&
337 resultRelInfo->ri_GeneratedExprs[i])
339 ExprContext *econtext;
340 Datum val;
341 bool isnull;
343 econtext = GetPerTupleExprContext(estate);
344 econtext->ecxt_scantuple = slot;
346 val = ExecEvalExpr(resultRelInfo->ri_GeneratedExprs[i], econtext, &isnull);
349 * We must make a copy of val as we have no guarantees about where
350 * memory for a pass-by-reference Datum is located.
352 if (!isnull)
353 val = datumCopy(val, attr->attbyval, attr->attlen);
355 values[i] = val;
356 nulls[i] = isnull;
358 else
360 if (!nulls[i])
361 values[i] = datumCopy(slot->tts_values[i], attr->attbyval, attr->attlen);
365 ExecClearTuple(slot);
366 memcpy(slot->tts_values, values, sizeof(*values) * natts);
367 memcpy(slot->tts_isnull, nulls, sizeof(*nulls) * natts);
368 ExecStoreVirtualTuple(slot);
369 ExecMaterializeSlot(slot);
371 MemoryContextSwitchTo(oldContext);
375 * ExecInitInsertProjection
376 * Do one-time initialization of projection data for INSERT tuples.
378 * INSERT queries may need a projection to filter out junk attrs in the tlist.
380 * This is also a convenient place to verify that the
381 * output of an INSERT matches the target table.
383 static void
384 ExecInitInsertProjection(ModifyTableState *mtstate,
385 ResultRelInfo *resultRelInfo)
387 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
388 Plan *subplan = outerPlan(node);
389 EState *estate = mtstate->ps.state;
390 List *insertTargetList = NIL;
391 bool need_projection = false;
392 ListCell *l;
394 /* Extract non-junk columns of the subplan's result tlist. */
395 foreach(l, subplan->targetlist)
397 TargetEntry *tle = (TargetEntry *) lfirst(l);
399 if (!tle->resjunk)
400 insertTargetList = lappend(insertTargetList, tle);
401 else
402 need_projection = true;
406 * The junk-free list must produce a tuple suitable for the result
407 * relation.
409 ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc, insertTargetList);
411 /* We'll need a slot matching the table's format. */
412 resultRelInfo->ri_newTupleSlot =
413 table_slot_create(resultRelInfo->ri_RelationDesc,
414 &estate->es_tupleTable);
416 /* Build ProjectionInfo if needed (it probably isn't). */
417 if (need_projection)
419 TupleDesc relDesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
421 /* need an expression context to do the projection */
422 if (mtstate->ps.ps_ExprContext == NULL)
423 ExecAssignExprContext(estate, &mtstate->ps);
425 resultRelInfo->ri_projectNew =
426 ExecBuildProjectionInfo(insertTargetList,
427 mtstate->ps.ps_ExprContext,
428 resultRelInfo->ri_newTupleSlot,
429 &mtstate->ps,
430 relDesc);
433 resultRelInfo->ri_projectNewInfoValid = true;
437 * ExecInitUpdateProjection
438 * Do one-time initialization of projection data for UPDATE tuples.
440 * UPDATE always needs a projection, because (1) there's always some junk
441 * attrs, and (2) we may need to merge values of not-updated columns from
442 * the old tuple into the final tuple. In UPDATE, the tuple arriving from
443 * the subplan contains only new values for the changed columns, plus row
444 * identity info in the junk attrs.
446 * This is "one-time" for any given result rel, but we might touch more than
447 * one result rel in the course of an inherited UPDATE, and each one needs
448 * its own projection due to possible column order variation.
450 * This is also a convenient place to verify that the output of an UPDATE
451 * matches the target table (ExecBuildUpdateProjection does that).
453 static void
454 ExecInitUpdateProjection(ModifyTableState *mtstate,
455 ResultRelInfo *resultRelInfo)
457 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
458 Plan *subplan = outerPlan(node);
459 EState *estate = mtstate->ps.state;
460 TupleDesc relDesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
461 int whichrel;
462 List *updateColnos;
465 * Usually, mt_lastResultIndex matches the target rel. If it happens not
466 * to, we can get the index the hard way with an integer division.
468 whichrel = mtstate->mt_lastResultIndex;
469 if (resultRelInfo != mtstate->resultRelInfo + whichrel)
471 whichrel = resultRelInfo - mtstate->resultRelInfo;
472 Assert(whichrel >= 0 && whichrel < mtstate->mt_nrels);
475 updateColnos = (List *) list_nth(node->updateColnosLists, whichrel);
478 * For UPDATE, we use the old tuple to fill up missing values in the tuple
479 * produced by the subplan to get the new tuple. We need two slots, both
480 * matching the table's desired format.
482 resultRelInfo->ri_oldTupleSlot =
483 table_slot_create(resultRelInfo->ri_RelationDesc,
484 &estate->es_tupleTable);
485 resultRelInfo->ri_newTupleSlot =
486 table_slot_create(resultRelInfo->ri_RelationDesc,
487 &estate->es_tupleTable);
489 /* need an expression context to do the projection */
490 if (mtstate->ps.ps_ExprContext == NULL)
491 ExecAssignExprContext(estate, &mtstate->ps);
493 resultRelInfo->ri_projectNew =
494 ExecBuildUpdateProjection(subplan->targetlist,
495 false, /* subplan did the evaluation */
496 updateColnos,
497 relDesc,
498 mtstate->ps.ps_ExprContext,
499 resultRelInfo->ri_newTupleSlot,
500 &mtstate->ps);
502 resultRelInfo->ri_projectNewInfoValid = true;
506 * ExecGetInsertNewTuple
507 * This prepares a "new" tuple ready to be inserted into given result
508 * relation, by removing any junk columns of the plan's output tuple
509 * and (if necessary) coercing the tuple to the right tuple format.
511 static TupleTableSlot *
512 ExecGetInsertNewTuple(ResultRelInfo *relinfo,
513 TupleTableSlot *planSlot)
515 ProjectionInfo *newProj = relinfo->ri_projectNew;
516 ExprContext *econtext;
519 * If there's no projection to be done, just make sure the slot is of the
520 * right type for the target rel. If the planSlot is the right type we
521 * can use it as-is, else copy the data into ri_newTupleSlot.
523 if (newProj == NULL)
525 if (relinfo->ri_newTupleSlot->tts_ops != planSlot->tts_ops)
527 ExecCopySlot(relinfo->ri_newTupleSlot, planSlot);
528 return relinfo->ri_newTupleSlot;
530 else
531 return planSlot;
535 * Else project; since the projection output slot is ri_newTupleSlot, this
536 * will also fix any slot-type problem.
538 * Note: currently, this is dead code, because INSERT cases don't receive
539 * any junk columns so there's never a projection to be done.
541 econtext = newProj->pi_exprContext;
542 econtext->ecxt_outertuple = planSlot;
543 return ExecProject(newProj);
547 * ExecGetUpdateNewTuple
548 * This prepares a "new" tuple by combining an UPDATE subplan's output
549 * tuple (which contains values of changed columns) with unchanged
550 * columns taken from the old tuple.
552 * The subplan tuple might also contain junk columns, which are ignored.
553 * Note that the projection also ensures we have a slot of the right type.
555 TupleTableSlot *
556 ExecGetUpdateNewTuple(ResultRelInfo *relinfo,
557 TupleTableSlot *planSlot,
558 TupleTableSlot *oldSlot)
560 ProjectionInfo *newProj = relinfo->ri_projectNew;
561 ExprContext *econtext;
563 /* Use a few extra Asserts to protect against outside callers */
564 Assert(relinfo->ri_projectNewInfoValid);
565 Assert(planSlot != NULL && !TTS_EMPTY(planSlot));
566 Assert(oldSlot != NULL && !TTS_EMPTY(oldSlot));
568 econtext = newProj->pi_exprContext;
569 econtext->ecxt_outertuple = planSlot;
570 econtext->ecxt_scantuple = oldSlot;
571 return ExecProject(newProj);
575 /* ----------------------------------------------------------------
576 * ExecInsert
578 * For INSERT, we have to insert the tuple into the target relation
579 * (or partition thereof) and insert appropriate tuples into the index
580 * relations.
582 * slot contains the new tuple value to be stored.
583 * planSlot is the output of the ModifyTable's subplan; we use it
584 * to access "junk" columns that are not going to be stored.
586 * Returns RETURNING result if any, otherwise NULL.
588 * This may change the currently active tuple conversion map in
589 * mtstate->mt_transition_capture, so the callers must take care to
590 * save the previous value to avoid losing track of it.
591 * ----------------------------------------------------------------
593 static TupleTableSlot *
594 ExecInsert(ModifyTableState *mtstate,
595 ResultRelInfo *resultRelInfo,
596 TupleTableSlot *slot,
597 TupleTableSlot *planSlot,
598 EState *estate,
599 bool canSetTag)
601 Relation resultRelationDesc;
602 List *recheckIndexes = NIL;
603 TupleTableSlot *result = NULL;
604 TransitionCaptureState *ar_insert_trig_tcs;
605 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
606 OnConflictAction onconflict = node->onConflictAction;
607 PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
608 MemoryContext oldContext;
611 * If the input result relation is a partitioned table, find the leaf
612 * partition to insert the tuple into.
614 if (proute)
616 ResultRelInfo *partRelInfo;
618 slot = ExecPrepareTupleRouting(mtstate, estate, proute,
619 resultRelInfo, slot,
620 &partRelInfo);
621 resultRelInfo = partRelInfo;
624 ExecMaterializeSlot(slot);
626 resultRelationDesc = resultRelInfo->ri_RelationDesc;
629 * Open the table's indexes, if we have not done so already, so that we
630 * can add new index entries for the inserted tuple.
632 if (resultRelationDesc->rd_rel->relhasindex &&
633 resultRelInfo->ri_IndexRelationDescs == NULL)
634 ExecOpenIndices(resultRelInfo, onconflict != ONCONFLICT_NONE);
637 * BEFORE ROW INSERT Triggers.
639 * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
640 * INSERT ... ON CONFLICT statement. We cannot check for constraint
641 * violations before firing these triggers, because they can change the
642 * values to insert. Also, they can run arbitrary user-defined code with
643 * side-effects that we can't cancel by just not inserting the tuple.
645 if (resultRelInfo->ri_TrigDesc &&
646 resultRelInfo->ri_TrigDesc->trig_insert_before_row)
648 if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
649 return NULL; /* "do nothing" */
652 /* INSTEAD OF ROW INSERT Triggers */
653 if (resultRelInfo->ri_TrigDesc &&
654 resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
656 if (!ExecIRInsertTriggers(estate, resultRelInfo, slot))
657 return NULL; /* "do nothing" */
659 else if (resultRelInfo->ri_FdwRoutine)
662 * GENERATED expressions might reference the tableoid column, so
663 * (re-)initialize tts_tableOid before evaluating them.
665 slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
668 * Compute stored generated columns
670 if (resultRelationDesc->rd_att->constr &&
671 resultRelationDesc->rd_att->constr->has_generated_stored)
672 ExecComputeStoredGenerated(resultRelInfo, estate, slot,
673 CMD_INSERT);
676 * If the FDW supports batching, and batching is requested, accumulate
677 * rows and insert them in batches. Otherwise use the per-row inserts.
679 if (resultRelInfo->ri_BatchSize > 1)
682 * If a certain number of tuples have already been accumulated, or
683 * a tuple has come for a different relation than that for the
684 * accumulated tuples, perform the batch insert
686 if (resultRelInfo->ri_NumSlots == resultRelInfo->ri_BatchSize)
688 ExecBatchInsert(mtstate, resultRelInfo,
689 resultRelInfo->ri_Slots,
690 resultRelInfo->ri_PlanSlots,
691 resultRelInfo->ri_NumSlots,
692 estate, canSetTag);
693 resultRelInfo->ri_NumSlots = 0;
696 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
698 if (resultRelInfo->ri_Slots == NULL)
700 resultRelInfo->ri_Slots = palloc(sizeof(TupleTableSlot *) *
701 resultRelInfo->ri_BatchSize);
702 resultRelInfo->ri_PlanSlots = palloc(sizeof(TupleTableSlot *) *
703 resultRelInfo->ri_BatchSize);
707 * Initialize the batch slots. We don't know how many slots will be
708 * needed, so we initialize them as the batch grows, and we keep
709 * them across batches. To mitigate an inefficiency in how resource
710 * owner handles objects with many references (as with many slots
711 * all referencing the same tuple descriptor) we copy the tuple
712 * descriptor for each slot.
714 if (resultRelInfo->ri_NumSlots >= resultRelInfo->ri_NumSlotsInitialized)
716 TupleDesc tdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
718 resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots] =
719 MakeSingleTupleTableSlot(tdesc, slot->tts_ops);
720 ExecCopySlot(resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots],
721 slot);
723 resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots] =
724 MakeSingleTupleTableSlot(tdesc, planSlot->tts_ops);
725 ExecCopySlot(resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots],
726 planSlot);
728 /* remember how many batch slots we initialized */
729 resultRelInfo->ri_NumSlotsInitialized++;
732 resultRelInfo->ri_NumSlots++;
734 MemoryContextSwitchTo(oldContext);
736 return NULL;
740 * insert into foreign table: let the FDW do it
742 slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
743 resultRelInfo,
744 slot,
745 planSlot);
747 if (slot == NULL) /* "do nothing" */
748 return NULL;
751 * AFTER ROW Triggers or RETURNING expressions might reference the
752 * tableoid column, so (re-)initialize tts_tableOid before evaluating
753 * them. (This covers the case where the FDW replaced the slot.)
755 slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
757 else
759 WCOKind wco_kind;
762 * Constraints and GENERATED expressions might reference the tableoid
763 * column, so (re-)initialize tts_tableOid before evaluating them.
765 slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
768 * Compute stored generated columns
770 if (resultRelationDesc->rd_att->constr &&
771 resultRelationDesc->rd_att->constr->has_generated_stored)
772 ExecComputeStoredGenerated(resultRelInfo, estate, slot,
773 CMD_INSERT);
776 * Check any RLS WITH CHECK policies.
778 * Normally we should check INSERT policies. But if the insert is the
779 * result of a partition key update that moved the tuple to a new
780 * partition, we should instead check UPDATE policies, because we are
781 * executing policies defined on the target table, and not those
782 * defined on the child partitions.
784 wco_kind = (mtstate->operation == CMD_UPDATE) ?
785 WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
788 * ExecWithCheckOptions() will skip any WCOs which are not of the kind
789 * we are looking for at this point.
791 if (resultRelInfo->ri_WithCheckOptions != NIL)
792 ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
795 * Check the constraints of the tuple.
797 if (resultRelationDesc->rd_att->constr)
798 ExecConstraints(resultRelInfo, slot, estate);
801 * Also check the tuple against the partition constraint, if there is
802 * one; except that if we got here via tuple-routing, we don't need to
803 * if there's no BR trigger defined on the partition.
805 if (resultRelationDesc->rd_rel->relispartition &&
806 (resultRelInfo->ri_RootResultRelInfo == NULL ||
807 (resultRelInfo->ri_TrigDesc &&
808 resultRelInfo->ri_TrigDesc->trig_insert_before_row)))
809 ExecPartitionCheck(resultRelInfo, slot, estate, true);
811 if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
813 /* Perform a speculative insertion. */
814 uint32 specToken;
815 ItemPointerData conflictTid;
816 bool specConflict;
817 List *arbiterIndexes;
819 arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
822 * Do a non-conclusive check for conflicts first.
824 * We're not holding any locks yet, so this doesn't guarantee that
825 * the later insert won't conflict. But it avoids leaving behind
826 * a lot of canceled speculative insertions, if you run a lot of
827 * INSERT ON CONFLICT statements that do conflict.
829 * We loop back here if we find a conflict below, either during
830 * the pre-check, or when we re-check after inserting the tuple
831 * speculatively.
833 vlock:
834 specConflict = false;
835 if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate,
836 &conflictTid, arbiterIndexes))
838 /* committed conflict tuple found */
839 if (onconflict == ONCONFLICT_UPDATE)
842 * In case of ON CONFLICT DO UPDATE, execute the UPDATE
843 * part. Be prepared to retry if the UPDATE fails because
844 * of another concurrent UPDATE/DELETE to the conflict
845 * tuple.
847 TupleTableSlot *returning = NULL;
849 if (ExecOnConflictUpdate(mtstate, resultRelInfo,
850 &conflictTid, planSlot, slot,
851 estate, canSetTag, &returning))
853 InstrCountTuples2(&mtstate->ps, 1);
854 return returning;
856 else
857 goto vlock;
859 else
862 * In case of ON CONFLICT DO NOTHING, do nothing. However,
863 * verify that the tuple is visible to the executor's MVCC
864 * snapshot at higher isolation levels.
866 * Using ExecGetReturningSlot() to store the tuple for the
867 * recheck isn't that pretty, but we can't trivially use
868 * the input slot, because it might not be of a compatible
869 * type. As there's no conflicting usage of
870 * ExecGetReturningSlot() in the DO NOTHING case...
872 Assert(onconflict == ONCONFLICT_NOTHING);
873 ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid,
874 ExecGetReturningSlot(estate, resultRelInfo));
875 InstrCountTuples2(&mtstate->ps, 1);
876 return NULL;
881 * Before we start insertion proper, acquire our "speculative
882 * insertion lock". Others can use that to wait for us to decide
883 * if we're going to go ahead with the insertion, instead of
884 * waiting for the whole transaction to complete.
886 specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
888 /* insert the tuple, with the speculative token */
889 table_tuple_insert_speculative(resultRelationDesc, slot,
890 estate->es_output_cid,
892 NULL,
893 specToken);
895 /* insert index entries for tuple */
896 recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
897 slot, estate, false, true,
898 &specConflict,
899 arbiterIndexes);
901 /* adjust the tuple's state accordingly */
902 table_tuple_complete_speculative(resultRelationDesc, slot,
903 specToken, !specConflict);
906 * Wake up anyone waiting for our decision. They will re-check
907 * the tuple, see that it's no longer speculative, and wait on our
908 * XID as if this was a regularly inserted tuple all along. Or if
909 * we killed the tuple, they will see it's dead, and proceed as if
910 * the tuple never existed.
912 SpeculativeInsertionLockRelease(GetCurrentTransactionId());
915 * If there was a conflict, start from the beginning. We'll do
916 * the pre-check again, which will now find the conflicting tuple
917 * (unless it aborts before we get there).
919 if (specConflict)
921 list_free(recheckIndexes);
922 goto vlock;
925 /* Since there was no insertion conflict, we're done */
927 else
929 /* insert the tuple normally */
930 table_tuple_insert(resultRelationDesc, slot,
931 estate->es_output_cid,
932 0, NULL);
934 /* insert index entries for tuple */
935 if (resultRelInfo->ri_NumIndices > 0)
936 recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
937 slot, estate, false,
938 false, NULL, NIL);
942 if (canSetTag)
943 (estate->es_processed)++;
946 * If this insert is the result of a partition key update that moved the
947 * tuple to a new partition, put this row into the transition NEW TABLE,
948 * if there is one. We need to do this separately for DELETE and INSERT
949 * because they happen on different tables.
951 ar_insert_trig_tcs = mtstate->mt_transition_capture;
952 if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
953 && mtstate->mt_transition_capture->tcs_update_new_table)
955 ExecARUpdateTriggers(estate, resultRelInfo, NULL,
956 NULL,
957 slot,
958 NULL,
959 mtstate->mt_transition_capture);
962 * We've already captured the NEW TABLE row, so make sure any AR
963 * INSERT trigger fired below doesn't capture it again.
965 ar_insert_trig_tcs = NULL;
968 /* AFTER ROW INSERT Triggers */
969 ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes,
970 ar_insert_trig_tcs);
972 list_free(recheckIndexes);
975 * Check any WITH CHECK OPTION constraints from parent views. We are
976 * required to do this after testing all constraints and uniqueness
977 * violations per the SQL spec, so we do it after actually inserting the
978 * record into the heap and all indexes.
980 * ExecWithCheckOptions will elog(ERROR) if a violation is found, so the
981 * tuple will never be seen, if it violates the WITH CHECK OPTION.
983 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
984 * are looking for at this point.
986 if (resultRelInfo->ri_WithCheckOptions != NIL)
987 ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
989 /* Process RETURNING if present */
990 if (resultRelInfo->ri_projectReturning)
991 result = ExecProcessReturning(resultRelInfo, slot, planSlot);
993 return result;
996 /* ----------------------------------------------------------------
997 * ExecBatchInsert
999 * Insert multiple tuples in an efficient way.
1000 * Currently, this handles inserting into a foreign table without
1001 * RETURNING clause.
1002 * ----------------------------------------------------------------
1004 static void
1005 ExecBatchInsert(ModifyTableState *mtstate,
1006 ResultRelInfo *resultRelInfo,
1007 TupleTableSlot **slots,
1008 TupleTableSlot **planSlots,
1009 int numSlots,
1010 EState *estate,
1011 bool canSetTag)
1013 int i;
1014 int numInserted = numSlots;
1015 TupleTableSlot *slot = NULL;
1016 TupleTableSlot **rslots;
1019 * insert into foreign table: let the FDW do it
1021 rslots = resultRelInfo->ri_FdwRoutine->ExecForeignBatchInsert(estate,
1022 resultRelInfo,
1023 slots,
1024 planSlots,
1025 &numInserted);
1027 for (i = 0; i < numInserted; i++)
1029 slot = rslots[i];
1032 * AFTER ROW Triggers or RETURNING expressions might reference the
1033 * tableoid column, so (re-)initialize tts_tableOid before evaluating
1034 * them.
1036 slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
1038 /* AFTER ROW INSERT Triggers */
1039 ExecARInsertTriggers(estate, resultRelInfo, slot, NIL,
1040 mtstate->mt_transition_capture);
1043 * Check any WITH CHECK OPTION constraints from parent views. See the
1044 * comment in ExecInsert.
1046 if (resultRelInfo->ri_WithCheckOptions != NIL)
1047 ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
1050 if (canSetTag && numInserted > 0)
1051 estate->es_processed += numInserted;
1054 /* ----------------------------------------------------------------
1055 * ExecDelete
1057 * DELETE is like UPDATE, except that we delete the tuple and no
1058 * index modifications are needed.
1060 * When deleting from a table, tupleid identifies the tuple to
1061 * delete and oldtuple is NULL. When deleting from a view,
1062 * oldtuple is passed to the INSTEAD OF triggers and identifies
1063 * what to delete, and tupleid is invalid. When deleting from a
1064 * foreign table, tupleid is invalid; the FDW has to figure out
1065 * which row to delete using data from the planSlot. oldtuple is
1066 * passed to foreign table triggers; it is NULL when the foreign
1067 * table has no relevant triggers. We use tupleDeleted to indicate
1068 * whether the tuple is actually deleted, callers can use it to
1069 * decide whether to continue the operation. When this DELETE is a
1070 * part of an UPDATE of partition-key, then the slot returned by
1071 * EvalPlanQual() is passed back using output parameter epqslot.
1073 * Returns RETURNING result if any, otherwise NULL.
1074 * ----------------------------------------------------------------
1076 static TupleTableSlot *
1077 ExecDelete(ModifyTableState *mtstate,
1078 ResultRelInfo *resultRelInfo,
1079 ItemPointer tupleid,
1080 HeapTuple oldtuple,
1081 TupleTableSlot *planSlot,
1082 EPQState *epqstate,
1083 EState *estate,
1084 bool processReturning,
1085 bool canSetTag,
1086 bool changingPart,
1087 bool *tupleDeleted,
1088 TupleTableSlot **epqreturnslot)
1090 Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
1091 TM_Result result;
1092 TM_FailureData tmfd;
1093 TupleTableSlot *slot = NULL;
1094 TransitionCaptureState *ar_delete_trig_tcs;
1096 if (tupleDeleted)
1097 *tupleDeleted = false;
1099 /* BEFORE ROW DELETE Triggers */
1100 if (resultRelInfo->ri_TrigDesc &&
1101 resultRelInfo->ri_TrigDesc->trig_delete_before_row)
1103 bool dodelete;
1105 dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
1106 tupleid, oldtuple, epqreturnslot);
1108 if (!dodelete) /* "do nothing" */
1109 return NULL;
1112 /* INSTEAD OF ROW DELETE Triggers */
1113 if (resultRelInfo->ri_TrigDesc &&
1114 resultRelInfo->ri_TrigDesc->trig_delete_instead_row)
1116 bool dodelete;
1118 Assert(oldtuple != NULL);
1119 dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, oldtuple);
1121 if (!dodelete) /* "do nothing" */
1122 return NULL;
1124 else if (resultRelInfo->ri_FdwRoutine)
1127 * delete from foreign table: let the FDW do it
1129 * We offer the returning slot as a place to store RETURNING data,
1130 * although the FDW can return some other slot if it wants.
1132 slot = ExecGetReturningSlot(estate, resultRelInfo);
1133 slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
1134 resultRelInfo,
1135 slot,
1136 planSlot);
1138 if (slot == NULL) /* "do nothing" */
1139 return NULL;
1142 * RETURNING expressions might reference the tableoid column, so
1143 * (re)initialize tts_tableOid before evaluating them.
1145 if (TTS_EMPTY(slot))
1146 ExecStoreAllNullTuple(slot);
1148 slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1150 else
1153 * delete the tuple
1155 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
1156 * that the row to be deleted is visible to that snapshot, and throw a
1157 * can't-serialize error if not. This is a special-case behavior
1158 * needed for referential integrity updates in transaction-snapshot
1159 * mode transactions.
1161 ldelete:;
1162 result = table_tuple_delete(resultRelationDesc, tupleid,
1163 estate->es_output_cid,
1164 estate->es_snapshot,
1165 estate->es_crosscheck_snapshot,
1166 true /* wait for commit */ ,
1167 &tmfd,
1168 changingPart);
1170 switch (result)
1172 case TM_SelfModified:
1175 * The target tuple was already updated or deleted by the
1176 * current command, or by a later command in the current
1177 * transaction. The former case is possible in a join DELETE
1178 * where multiple tuples join to the same target tuple. This
1179 * is somewhat questionable, but Postgres has always allowed
1180 * it: we just ignore additional deletion attempts.
1182 * The latter case arises if the tuple is modified by a
1183 * command in a BEFORE trigger, or perhaps by a command in a
1184 * volatile function used in the query. In such situations we
1185 * should not ignore the deletion, but it is equally unsafe to
1186 * proceed. We don't want to discard the original DELETE
1187 * while keeping the triggered actions based on its deletion;
1188 * and it would be no better to allow the original DELETE
1189 * while discarding updates that it triggered. The row update
1190 * carries some information that might be important according
1191 * to business rules; so throwing an error is the only safe
1192 * course.
1194 * If a trigger actually intends this type of interaction, it
1195 * can re-execute the DELETE and then return NULL to cancel
1196 * the outer delete.
1198 if (tmfd.cmax != estate->es_output_cid)
1199 ereport(ERROR,
1200 (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1201 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
1202 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1204 /* Else, already deleted by self; nothing to do */
1205 return NULL;
1207 case TM_Ok:
1208 break;
1210 case TM_Updated:
1212 TupleTableSlot *inputslot;
1213 TupleTableSlot *epqslot;
1215 if (IsolationUsesXactSnapshot())
1216 ereport(ERROR,
1217 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1218 errmsg("could not serialize access due to concurrent update")));
1221 * Already know that we're going to need to do EPQ, so
1222 * fetch tuple directly into the right slot.
1224 EvalPlanQualBegin(epqstate);
1225 inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
1226 resultRelInfo->ri_RangeTableIndex);
1228 result = table_tuple_lock(resultRelationDesc, tupleid,
1229 estate->es_snapshot,
1230 inputslot, estate->es_output_cid,
1231 LockTupleExclusive, LockWaitBlock,
1232 TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
1233 &tmfd);
1235 switch (result)
1237 case TM_Ok:
1238 Assert(tmfd.traversed);
1239 epqslot = EvalPlanQual(epqstate,
1240 resultRelationDesc,
1241 resultRelInfo->ri_RangeTableIndex,
1242 inputslot);
1243 if (TupIsNull(epqslot))
1244 /* Tuple not passing quals anymore, exiting... */
1245 return NULL;
1248 * If requested, skip delete and pass back the
1249 * updated row.
1251 if (epqreturnslot)
1253 *epqreturnslot = epqslot;
1254 return NULL;
1256 else
1257 goto ldelete;
1259 case TM_SelfModified:
1262 * This can be reached when following an update
1263 * chain from a tuple updated by another session,
1264 * reaching a tuple that was already updated in
1265 * this transaction. If previously updated by this
1266 * command, ignore the delete, otherwise error
1267 * out.
1269 * See also TM_SelfModified response to
1270 * table_tuple_delete() above.
1272 if (tmfd.cmax != estate->es_output_cid)
1273 ereport(ERROR,
1274 (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1275 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
1276 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1277 return NULL;
1279 case TM_Deleted:
1280 /* tuple already deleted; nothing to do */
1281 return NULL;
1283 default:
1286 * TM_Invisible should be impossible because we're
1287 * waiting for updated row versions, and would
1288 * already have errored out if the first version
1289 * is invisible.
1291 * TM_Updated should be impossible, because we're
1292 * locking the latest version via
1293 * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
1295 elog(ERROR, "unexpected table_tuple_lock status: %u",
1296 result);
1297 return NULL;
1300 Assert(false);
1301 break;
1304 case TM_Deleted:
1305 if (IsolationUsesXactSnapshot())
1306 ereport(ERROR,
1307 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1308 errmsg("could not serialize access due to concurrent delete")));
1309 /* tuple already deleted; nothing to do */
1310 return NULL;
1312 default:
1313 elog(ERROR, "unrecognized table_tuple_delete status: %u",
1314 result);
1315 return NULL;
1319 * Note: Normally one would think that we have to delete index tuples
1320 * associated with the heap tuple now...
1322 * ... but in POSTGRES, we have no need to do this because VACUUM will
1323 * take care of it later. We can't delete index tuples immediately
1324 * anyway, since the tuple is still visible to other transactions.
1328 if (canSetTag)
1329 (estate->es_processed)++;
1331 /* Tell caller that the delete actually happened. */
1332 if (tupleDeleted)
1333 *tupleDeleted = true;
1336 * If this delete is the result of a partition key update that moved the
1337 * tuple to a new partition, put this row into the transition OLD TABLE,
1338 * if there is one. We need to do this separately for DELETE and INSERT
1339 * because they happen on different tables.
1341 ar_delete_trig_tcs = mtstate->mt_transition_capture;
1342 if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
1343 && mtstate->mt_transition_capture->tcs_update_old_table)
1345 ExecARUpdateTriggers(estate, resultRelInfo,
1346 tupleid,
1347 oldtuple,
1348 NULL,
1349 NULL,
1350 mtstate->mt_transition_capture);
1353 * We've already captured the NEW TABLE row, so make sure any AR
1354 * DELETE trigger fired below doesn't capture it again.
1356 ar_delete_trig_tcs = NULL;
1359 /* AFTER ROW DELETE Triggers */
1360 ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
1361 ar_delete_trig_tcs);
1363 /* Process RETURNING if present and if requested */
1364 if (processReturning && resultRelInfo->ri_projectReturning)
1367 * We have to put the target tuple into a slot, which means first we
1368 * gotta fetch it. We can use the trigger tuple slot.
1370 TupleTableSlot *rslot;
1372 if (resultRelInfo->ri_FdwRoutine)
1374 /* FDW must have provided a slot containing the deleted row */
1375 Assert(!TupIsNull(slot));
1377 else
1379 slot = ExecGetReturningSlot(estate, resultRelInfo);
1380 if (oldtuple != NULL)
1382 ExecForceStoreHeapTuple(oldtuple, slot, false);
1384 else
1386 if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
1387 SnapshotAny, slot))
1388 elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
1392 rslot = ExecProcessReturning(resultRelInfo, slot, planSlot);
1395 * Before releasing the target tuple again, make sure rslot has a
1396 * local copy of any pass-by-reference values.
1398 ExecMaterializeSlot(rslot);
1400 ExecClearTuple(slot);
1402 return rslot;
1405 return NULL;
1409 * ExecCrossPartitionUpdate --- Move an updated tuple to another partition.
1411 * This works by first deleting the old tuple from the current partition,
1412 * followed by inserting the new tuple into the root parent table, that is,
1413 * mtstate->rootResultRelInfo. It will be re-routed from there to the
1414 * correct partition.
1416 * Returns true if the tuple has been successfully moved, or if it's found
1417 * that the tuple was concurrently deleted so there's nothing more to do
1418 * for the caller.
1420 * False is returned if the tuple we're trying to move is found to have been
1421 * concurrently updated. In that case, the caller must to check if the
1422 * updated tuple that's returned in *retry_slot still needs to be re-routed,
1423 * and call this function again or perform a regular update accordingly.
1425 static bool
1426 ExecCrossPartitionUpdate(ModifyTableState *mtstate,
1427 ResultRelInfo *resultRelInfo,
1428 ItemPointer tupleid, HeapTuple oldtuple,
1429 TupleTableSlot *slot, TupleTableSlot *planSlot,
1430 EPQState *epqstate, bool canSetTag,
1431 TupleTableSlot **retry_slot,
1432 TupleTableSlot **inserted_tuple)
1434 EState *estate = mtstate->ps.state;
1435 TupleConversionMap *tupconv_map;
1436 bool tuple_deleted;
1437 TupleTableSlot *epqslot = NULL;
1439 *inserted_tuple = NULL;
1440 *retry_slot = NULL;
1443 * Disallow an INSERT ON CONFLICT DO UPDATE that causes the original row
1444 * to migrate to a different partition. Maybe this can be implemented
1445 * some day, but it seems a fringe feature with little redeeming value.
1447 if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
1448 ereport(ERROR,
1449 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1450 errmsg("invalid ON UPDATE specification"),
1451 errdetail("The result tuple would appear in a different partition than the original tuple.")));
1454 * When an UPDATE is run directly on a leaf partition, simply fail with a
1455 * partition constraint violation error.
1457 if (resultRelInfo == mtstate->rootResultRelInfo)
1458 ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1460 /* Initialize tuple routing info if not already done. */
1461 if (mtstate->mt_partition_tuple_routing == NULL)
1463 Relation rootRel = mtstate->rootResultRelInfo->ri_RelationDesc;
1464 MemoryContext oldcxt;
1466 /* Things built here have to last for the query duration. */
1467 oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
1469 mtstate->mt_partition_tuple_routing =
1470 ExecSetupPartitionTupleRouting(estate, rootRel);
1473 * Before a partition's tuple can be re-routed, it must first be
1474 * converted to the root's format, so we'll need a slot for storing
1475 * such tuples.
1477 Assert(mtstate->mt_root_tuple_slot == NULL);
1478 mtstate->mt_root_tuple_slot = table_slot_create(rootRel, NULL);
1480 MemoryContextSwitchTo(oldcxt);
1484 * Row movement, part 1. Delete the tuple, but skip RETURNING processing.
1485 * We want to return rows from INSERT.
1487 ExecDelete(mtstate, resultRelInfo, tupleid, oldtuple, planSlot,
1488 epqstate, estate,
1489 false, /* processReturning */
1490 false, /* canSetTag */
1491 true, /* changingPart */
1492 &tuple_deleted, &epqslot);
1495 * For some reason if DELETE didn't happen (e.g. trigger prevented it, or
1496 * it was already deleted by self, or it was concurrently deleted by
1497 * another transaction), then we should skip the insert as well;
1498 * otherwise, an UPDATE could cause an increase in the total number of
1499 * rows across all partitions, which is clearly wrong.
1501 * For a normal UPDATE, the case where the tuple has been the subject of a
1502 * concurrent UPDATE or DELETE would be handled by the EvalPlanQual
1503 * machinery, but for an UPDATE that we've translated into a DELETE from
1504 * this partition and an INSERT into some other partition, that's not
1505 * available, because CTID chains can't span relation boundaries. We
1506 * mimic the semantics to a limited extent by skipping the INSERT if the
1507 * DELETE fails to find a tuple. This ensures that two concurrent
1508 * attempts to UPDATE the same tuple at the same time can't turn one tuple
1509 * into two, and that an UPDATE of a just-deleted tuple can't resurrect
1510 * it.
1512 if (!tuple_deleted)
1515 * epqslot will be typically NULL. But when ExecDelete() finds that
1516 * another transaction has concurrently updated the same row, it
1517 * re-fetches the row, skips the delete, and epqslot is set to the
1518 * re-fetched tuple slot. In that case, we need to do all the checks
1519 * again.
1521 if (TupIsNull(epqslot))
1522 return true;
1523 else
1525 /* Fetch the most recent version of old tuple. */
1526 TupleTableSlot *oldSlot;
1528 /* ... but first, make sure ri_oldTupleSlot is initialized. */
1529 if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
1530 ExecInitUpdateProjection(mtstate, resultRelInfo);
1531 oldSlot = resultRelInfo->ri_oldTupleSlot;
1532 if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
1533 tupleid,
1534 SnapshotAny,
1535 oldSlot))
1536 elog(ERROR, "failed to fetch tuple being updated");
1537 *retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot,
1538 oldSlot);
1539 return false;
1544 * resultRelInfo is one of the per-relation resultRelInfos. So we should
1545 * convert the tuple into root's tuple descriptor if needed, since
1546 * ExecInsert() starts the search from root.
1548 tupconv_map = ExecGetChildToRootMap(resultRelInfo);
1549 if (tupconv_map != NULL)
1550 slot = execute_attr_map_slot(tupconv_map->attrMap,
1551 slot,
1552 mtstate->mt_root_tuple_slot);
1554 /* Tuple routing starts from the root table. */
1555 *inserted_tuple = ExecInsert(mtstate, mtstate->rootResultRelInfo, slot,
1556 planSlot, estate, canSetTag);
1559 * Reset the transition state that may possibly have been written by
1560 * INSERT.
1562 if (mtstate->mt_transition_capture)
1563 mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1565 /* We're done moving. */
1566 return true;
1569 /* ----------------------------------------------------------------
1570 * ExecUpdate
1572 * note: we can't run UPDATE queries with transactions
1573 * off because UPDATEs are actually INSERTs and our
1574 * scan will mistakenly loop forever, updating the tuple
1575 * it just inserted.. This should be fixed but until it
1576 * is, we don't want to get stuck in an infinite loop
1577 * which corrupts your database..
1579 * When updating a table, tupleid identifies the tuple to
1580 * update and oldtuple is NULL. When updating a view, oldtuple
1581 * is passed to the INSTEAD OF triggers and identifies what to
1582 * update, and tupleid is invalid. When updating a foreign table,
1583 * tupleid is invalid; the FDW has to figure out which row to
1584 * update using data from the planSlot. oldtuple is passed to
1585 * foreign table triggers; it is NULL when the foreign table has
1586 * no relevant triggers.
1588 * slot contains the new tuple value to be stored.
1589 * planSlot is the output of the ModifyTable's subplan; we use it
1590 * to access values from other input tables (for RETURNING),
1591 * row-ID junk columns, etc.
1593 * Returns RETURNING result if any, otherwise NULL.
1594 * ----------------------------------------------------------------
1596 static TupleTableSlot *
1597 ExecUpdate(ModifyTableState *mtstate,
1598 ResultRelInfo *resultRelInfo,
1599 ItemPointer tupleid,
1600 HeapTuple oldtuple,
1601 TupleTableSlot *slot,
1602 TupleTableSlot *planSlot,
1603 EPQState *epqstate,
1604 EState *estate,
1605 bool canSetTag)
1607 Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
1608 TM_Result result;
1609 TM_FailureData tmfd;
1610 List *recheckIndexes = NIL;
1613 * abort the operation if not running transactions
1615 if (IsBootstrapProcessingMode())
1616 elog(ERROR, "cannot UPDATE during bootstrap");
1618 ExecMaterializeSlot(slot);
1621 * Open the table's indexes, if we have not done so already, so that we
1622 * can add new index entries for the updated tuple.
1624 if (resultRelationDesc->rd_rel->relhasindex &&
1625 resultRelInfo->ri_IndexRelationDescs == NULL)
1626 ExecOpenIndices(resultRelInfo, false);
1628 /* BEFORE ROW UPDATE Triggers */
1629 if (resultRelInfo->ri_TrigDesc &&
1630 resultRelInfo->ri_TrigDesc->trig_update_before_row)
1632 if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
1633 tupleid, oldtuple, slot))
1634 return NULL; /* "do nothing" */
1637 /* INSTEAD OF ROW UPDATE Triggers */
1638 if (resultRelInfo->ri_TrigDesc &&
1639 resultRelInfo->ri_TrigDesc->trig_update_instead_row)
1641 if (!ExecIRUpdateTriggers(estate, resultRelInfo,
1642 oldtuple, slot))
1643 return NULL; /* "do nothing" */
1645 else if (resultRelInfo->ri_FdwRoutine)
1648 * GENERATED expressions might reference the tableoid column, so
1649 * (re-)initialize tts_tableOid before evaluating them.
1651 slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
1654 * Compute stored generated columns
1656 if (resultRelationDesc->rd_att->constr &&
1657 resultRelationDesc->rd_att->constr->has_generated_stored)
1658 ExecComputeStoredGenerated(resultRelInfo, estate, slot,
1659 CMD_UPDATE);
1662 * update in foreign table: let the FDW do it
1664 slot = resultRelInfo->ri_FdwRoutine->ExecForeignUpdate(estate,
1665 resultRelInfo,
1666 slot,
1667 planSlot);
1669 if (slot == NULL) /* "do nothing" */
1670 return NULL;
1673 * AFTER ROW Triggers or RETURNING expressions might reference the
1674 * tableoid column, so (re-)initialize tts_tableOid before evaluating
1675 * them. (This covers the case where the FDW replaced the slot.)
1677 slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1679 else
1681 LockTupleMode lockmode;
1682 bool partition_constraint_failed;
1683 bool update_indexes;
1686 * Constraints and GENERATED expressions might reference the tableoid
1687 * column, so (re-)initialize tts_tableOid before evaluating them.
1689 slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1692 * Compute stored generated columns
1694 if (resultRelationDesc->rd_att->constr &&
1695 resultRelationDesc->rd_att->constr->has_generated_stored)
1696 ExecComputeStoredGenerated(resultRelInfo, estate, slot,
1697 CMD_UPDATE);
1700 * Check any RLS UPDATE WITH CHECK policies
1702 * If we generate a new candidate tuple after EvalPlanQual testing, we
1703 * must loop back here and recheck any RLS policies and constraints.
1704 * (We don't need to redo triggers, however. If there are any BEFORE
1705 * triggers then trigger.c will have done table_tuple_lock to lock the
1706 * correct tuple, so there's no need to do them again.)
1708 lreplace:;
1710 /* ensure slot is independent, consider e.g. EPQ */
1711 ExecMaterializeSlot(slot);
1714 * If partition constraint fails, this row might get moved to another
1715 * partition, in which case we should check the RLS CHECK policy just
1716 * before inserting into the new partition, rather than doing it here.
1717 * This is because a trigger on that partition might again change the
1718 * row. So skip the WCO checks if the partition constraint fails.
1720 partition_constraint_failed =
1721 resultRelationDesc->rd_rel->relispartition &&
1722 !ExecPartitionCheck(resultRelInfo, slot, estate, false);
1724 if (!partition_constraint_failed &&
1725 resultRelInfo->ri_WithCheckOptions != NIL)
1728 * ExecWithCheckOptions() will skip any WCOs which are not of the
1729 * kind we are looking for at this point.
1731 ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
1732 resultRelInfo, slot, estate);
1736 * If a partition check failed, try to move the row into the right
1737 * partition.
1739 if (partition_constraint_failed)
1741 TupleTableSlot *inserted_tuple,
1742 *retry_slot;
1743 bool retry;
1746 * ExecCrossPartitionUpdate will first DELETE the row from the
1747 * partition it's currently in and then insert it back into the
1748 * root table, which will re-route it to the correct partition.
1749 * The first part may have to be repeated if it is detected that
1750 * the tuple we're trying to move has been concurrently updated.
1752 retry = !ExecCrossPartitionUpdate(mtstate, resultRelInfo, tupleid,
1753 oldtuple, slot, planSlot,
1754 epqstate, canSetTag,
1755 &retry_slot, &inserted_tuple);
1756 if (retry)
1758 slot = retry_slot;
1759 goto lreplace;
1762 return inserted_tuple;
1766 * Check the constraints of the tuple. We've already checked the
1767 * partition constraint above; however, we must still ensure the tuple
1768 * passes all other constraints, so we will call ExecConstraints() and
1769 * have it validate all remaining checks.
1771 if (resultRelationDesc->rd_att->constr)
1772 ExecConstraints(resultRelInfo, slot, estate);
1775 * replace the heap tuple
1777 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
1778 * that the row to be updated is visible to that snapshot, and throw a
1779 * can't-serialize error if not. This is a special-case behavior
1780 * needed for referential integrity updates in transaction-snapshot
1781 * mode transactions.
1783 result = table_tuple_update(resultRelationDesc, tupleid, slot,
1784 estate->es_output_cid,
1785 estate->es_snapshot,
1786 estate->es_crosscheck_snapshot,
1787 true /* wait for commit */ ,
1788 &tmfd, &lockmode, &update_indexes);
1790 switch (result)
1792 case TM_SelfModified:
1795 * The target tuple was already updated or deleted by the
1796 * current command, or by a later command in the current
1797 * transaction. The former case is possible in a join UPDATE
1798 * where multiple tuples join to the same target tuple. This
1799 * is pretty questionable, but Postgres has always allowed it:
1800 * we just execute the first update action and ignore
1801 * additional update attempts.
1803 * The latter case arises if the tuple is modified by a
1804 * command in a BEFORE trigger, or perhaps by a command in a
1805 * volatile function used in the query. In such situations we
1806 * should not ignore the update, but it is equally unsafe to
1807 * proceed. We don't want to discard the original UPDATE
1808 * while keeping the triggered actions based on it; and we
1809 * have no principled way to merge this update with the
1810 * previous ones. So throwing an error is the only safe
1811 * course.
1813 * If a trigger actually intends this type of interaction, it
1814 * can re-execute the UPDATE (assuming it can figure out how)
1815 * and then return NULL to cancel the outer update.
1817 if (tmfd.cmax != estate->es_output_cid)
1818 ereport(ERROR,
1819 (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1820 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1821 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1823 /* Else, already updated by self; nothing to do */
1824 return NULL;
1826 case TM_Ok:
1827 break;
1829 case TM_Updated:
1831 TupleTableSlot *inputslot;
1832 TupleTableSlot *epqslot;
1833 TupleTableSlot *oldSlot;
1835 if (IsolationUsesXactSnapshot())
1836 ereport(ERROR,
1837 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1838 errmsg("could not serialize access due to concurrent update")));
1841 * Already know that we're going to need to do EPQ, so
1842 * fetch tuple directly into the right slot.
1844 inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
1845 resultRelInfo->ri_RangeTableIndex);
1847 result = table_tuple_lock(resultRelationDesc, tupleid,
1848 estate->es_snapshot,
1849 inputslot, estate->es_output_cid,
1850 lockmode, LockWaitBlock,
1851 TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
1852 &tmfd);
1854 switch (result)
1856 case TM_Ok:
1857 Assert(tmfd.traversed);
1859 epqslot = EvalPlanQual(epqstate,
1860 resultRelationDesc,
1861 resultRelInfo->ri_RangeTableIndex,
1862 inputslot);
1863 if (TupIsNull(epqslot))
1864 /* Tuple not passing quals anymore, exiting... */
1865 return NULL;
1867 /* Make sure ri_oldTupleSlot is initialized. */
1868 if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
1869 ExecInitUpdateProjection(mtstate, resultRelInfo);
1871 /* Fetch the most recent version of old tuple. */
1872 oldSlot = resultRelInfo->ri_oldTupleSlot;
1873 if (!table_tuple_fetch_row_version(resultRelationDesc,
1874 tupleid,
1875 SnapshotAny,
1876 oldSlot))
1877 elog(ERROR, "failed to fetch tuple being updated");
1878 slot = ExecGetUpdateNewTuple(resultRelInfo,
1879 epqslot, oldSlot);
1880 goto lreplace;
1882 case TM_Deleted:
1883 /* tuple already deleted; nothing to do */
1884 return NULL;
1886 case TM_SelfModified:
1889 * This can be reached when following an update
1890 * chain from a tuple updated by another session,
1891 * reaching a tuple that was already updated in
1892 * this transaction. If previously modified by
1893 * this command, ignore the redundant update,
1894 * otherwise error out.
1896 * See also TM_SelfModified response to
1897 * table_tuple_update() above.
1899 if (tmfd.cmax != estate->es_output_cid)
1900 ereport(ERROR,
1901 (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1902 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1903 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1904 return NULL;
1906 default:
1907 /* see table_tuple_lock call in ExecDelete() */
1908 elog(ERROR, "unexpected table_tuple_lock status: %u",
1909 result);
1910 return NULL;
1914 break;
1916 case TM_Deleted:
1917 if (IsolationUsesXactSnapshot())
1918 ereport(ERROR,
1919 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1920 errmsg("could not serialize access due to concurrent delete")));
1921 /* tuple already deleted; nothing to do */
1922 return NULL;
1924 default:
1925 elog(ERROR, "unrecognized table_tuple_update status: %u",
1926 result);
1927 return NULL;
1930 /* insert index entries for tuple if necessary */
1931 if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
1932 recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
1933 slot, estate, true, false,
1934 NULL, NIL);
1937 if (canSetTag)
1938 (estate->es_processed)++;
1940 /* AFTER ROW UPDATE Triggers */
1941 ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot,
1942 recheckIndexes,
1943 mtstate->operation == CMD_INSERT ?
1944 mtstate->mt_oc_transition_capture :
1945 mtstate->mt_transition_capture);
1947 list_free(recheckIndexes);
1950 * Check any WITH CHECK OPTION constraints from parent views. We are
1951 * required to do this after testing all constraints and uniqueness
1952 * violations per the SQL spec, so we do it after actually updating the
1953 * record in the heap and all indexes.
1955 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
1956 * are looking for at this point.
1958 if (resultRelInfo->ri_WithCheckOptions != NIL)
1959 ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
1961 /* Process RETURNING if present */
1962 if (resultRelInfo->ri_projectReturning)
1963 return ExecProcessReturning(resultRelInfo, slot, planSlot);
1965 return NULL;
1969 * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
1971 * Try to lock tuple for update as part of speculative insertion. If
1972 * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
1973 * (but still lock row, even though it may not satisfy estate's
1974 * snapshot).
1976 * Returns true if we're done (with or without an update), or false if
1977 * the caller must retry the INSERT from scratch.
1979 static bool
1980 ExecOnConflictUpdate(ModifyTableState *mtstate,
1981 ResultRelInfo *resultRelInfo,
1982 ItemPointer conflictTid,
1983 TupleTableSlot *planSlot,
1984 TupleTableSlot *excludedSlot,
1985 EState *estate,
1986 bool canSetTag,
1987 TupleTableSlot **returning)
1989 ExprContext *econtext = mtstate->ps.ps_ExprContext;
1990 Relation relation = resultRelInfo->ri_RelationDesc;
1991 ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
1992 TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
1993 TM_FailureData tmfd;
1994 LockTupleMode lockmode;
1995 TM_Result test;
1996 Datum xminDatum;
1997 TransactionId xmin;
1998 bool isnull;
2000 /* Determine lock mode to use */
2001 lockmode = ExecUpdateLockMode(estate, resultRelInfo);
2004 * Lock tuple for update. Don't follow updates when tuple cannot be
2005 * locked without doing so. A row locking conflict here means our
2006 * previous conclusion that the tuple is conclusively committed is not
2007 * true anymore.
2009 test = table_tuple_lock(relation, conflictTid,
2010 estate->es_snapshot,
2011 existing, estate->es_output_cid,
2012 lockmode, LockWaitBlock, 0,
2013 &tmfd);
2014 switch (test)
2016 case TM_Ok:
2017 /* success! */
2018 break;
2020 case TM_Invisible:
2023 * This can occur when a just inserted tuple is updated again in
2024 * the same command. E.g. because multiple rows with the same
2025 * conflicting key values are inserted.
2027 * This is somewhat similar to the ExecUpdate() TM_SelfModified
2028 * case. We do not want to proceed because it would lead to the
2029 * same row being updated a second time in some unspecified order,
2030 * and in contrast to plain UPDATEs there's no historical behavior
2031 * to break.
2033 * It is the user's responsibility to prevent this situation from
2034 * occurring. These problems are why SQL-2003 similarly specifies
2035 * that for SQL MERGE, an exception must be raised in the event of
2036 * an attempt to update the same row twice.
2038 xminDatum = slot_getsysattr(existing,
2039 MinTransactionIdAttributeNumber,
2040 &isnull);
2041 Assert(!isnull);
2042 xmin = DatumGetTransactionId(xminDatum);
2044 if (TransactionIdIsCurrentTransactionId(xmin))
2045 ereport(ERROR,
2046 (errcode(ERRCODE_CARDINALITY_VIOLATION),
2047 errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
2048 errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
2050 /* This shouldn't happen */
2051 elog(ERROR, "attempted to lock invisible tuple");
2052 break;
2054 case TM_SelfModified:
2057 * This state should never be reached. As a dirty snapshot is used
2058 * to find conflicting tuples, speculative insertion wouldn't have
2059 * seen this row to conflict with.
2061 elog(ERROR, "unexpected self-updated tuple");
2062 break;
2064 case TM_Updated:
2065 if (IsolationUsesXactSnapshot())
2066 ereport(ERROR,
2067 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2068 errmsg("could not serialize access due to concurrent update")));
2071 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
2072 * a partitioned table we shouldn't reach to a case where tuple to
2073 * be lock is moved to another partition due to concurrent update
2074 * of the partition key.
2076 Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
2079 * Tell caller to try again from the very start.
2081 * It does not make sense to use the usual EvalPlanQual() style
2082 * loop here, as the new version of the row might not conflict
2083 * anymore, or the conflicting tuple has actually been deleted.
2085 ExecClearTuple(existing);
2086 return false;
2088 case TM_Deleted:
2089 if (IsolationUsesXactSnapshot())
2090 ereport(ERROR,
2091 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2092 errmsg("could not serialize access due to concurrent delete")));
2094 /* see TM_Updated case */
2095 Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
2096 ExecClearTuple(existing);
2097 return false;
2099 default:
2100 elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
2103 /* Success, the tuple is locked. */
2106 * Verify that the tuple is visible to our MVCC snapshot if the current
2107 * isolation level mandates that.
2109 * It's not sufficient to rely on the check within ExecUpdate() as e.g.
2110 * CONFLICT ... WHERE clause may prevent us from reaching that.
2112 * This means we only ever continue when a new command in the current
2113 * transaction could see the row, even though in READ COMMITTED mode the
2114 * tuple will not be visible according to the current statement's
2115 * snapshot. This is in line with the way UPDATE deals with newer tuple
2116 * versions.
2118 ExecCheckTupleVisible(estate, relation, existing);
2121 * Make tuple and any needed join variables available to ExecQual and
2122 * ExecProject. The EXCLUDED tuple is installed in ecxt_innertuple, while
2123 * the target's existing tuple is installed in the scantuple. EXCLUDED
2124 * has been made to reference INNER_VAR in setrefs.c, but there is no
2125 * other redirection.
2127 econtext->ecxt_scantuple = existing;
2128 econtext->ecxt_innertuple = excludedSlot;
2129 econtext->ecxt_outertuple = NULL;
2131 if (!ExecQual(onConflictSetWhere, econtext))
2133 ExecClearTuple(existing); /* see return below */
2134 InstrCountFiltered1(&mtstate->ps, 1);
2135 return true; /* done with the tuple */
2138 if (resultRelInfo->ri_WithCheckOptions != NIL)
2141 * Check target's existing tuple against UPDATE-applicable USING
2142 * security barrier quals (if any), enforced here as RLS checks/WCOs.
2144 * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
2145 * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
2146 * but that's almost the extent of its special handling for ON
2147 * CONFLICT DO UPDATE.
2149 * The rewriter will also have associated UPDATE applicable straight
2150 * RLS checks/WCOs for the benefit of the ExecUpdate() call that
2151 * follows. INSERTs and UPDATEs naturally have mutually exclusive WCO
2152 * kinds, so there is no danger of spurious over-enforcement in the
2153 * INSERT or UPDATE path.
2155 ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
2156 existing,
2157 mtstate->ps.state);
2160 /* Project the new tuple version */
2161 ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
2164 * Note that it is possible that the target tuple has been modified in
2165 * this session, after the above table_tuple_lock. We choose to not error
2166 * out in that case, in line with ExecUpdate's treatment of similar cases.
2167 * This can happen if an UPDATE is triggered from within ExecQual(),
2168 * ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a
2169 * wCTE in the ON CONFLICT's SET.
2172 /* Execute UPDATE with projection */
2173 *returning = ExecUpdate(mtstate, resultRelInfo, conflictTid, NULL,
2174 resultRelInfo->ri_onConflict->oc_ProjSlot,
2175 planSlot,
2176 &mtstate->mt_epqstate, mtstate->ps.state,
2177 canSetTag);
2180 * Clear out existing tuple, as there might not be another conflict among
2181 * the next input rows. Don't want to hold resources till the end of the
2182 * query.
2184 ExecClearTuple(existing);
2185 return true;
2190 * Process BEFORE EACH STATEMENT triggers
2192 static void
2193 fireBSTriggers(ModifyTableState *node)
2195 ModifyTable *plan = (ModifyTable *) node->ps.plan;
2196 ResultRelInfo *resultRelInfo = node->rootResultRelInfo;
2198 switch (node->operation)
2200 case CMD_INSERT:
2201 ExecBSInsertTriggers(node->ps.state, resultRelInfo);
2202 if (plan->onConflictAction == ONCONFLICT_UPDATE)
2203 ExecBSUpdateTriggers(node->ps.state,
2204 resultRelInfo);
2205 break;
2206 case CMD_UPDATE:
2207 ExecBSUpdateTriggers(node->ps.state, resultRelInfo);
2208 break;
2209 case CMD_DELETE:
2210 ExecBSDeleteTriggers(node->ps.state, resultRelInfo);
2211 break;
2212 default:
2213 elog(ERROR, "unknown operation");
2214 break;
2219 * Process AFTER EACH STATEMENT triggers
2221 static void
2222 fireASTriggers(ModifyTableState *node)
2224 ModifyTable *plan = (ModifyTable *) node->ps.plan;
2225 ResultRelInfo *resultRelInfo = node->rootResultRelInfo;
2227 switch (node->operation)
2229 case CMD_INSERT:
2230 if (plan->onConflictAction == ONCONFLICT_UPDATE)
2231 ExecASUpdateTriggers(node->ps.state,
2232 resultRelInfo,
2233 node->mt_oc_transition_capture);
2234 ExecASInsertTriggers(node->ps.state, resultRelInfo,
2235 node->mt_transition_capture);
2236 break;
2237 case CMD_UPDATE:
2238 ExecASUpdateTriggers(node->ps.state, resultRelInfo,
2239 node->mt_transition_capture);
2240 break;
2241 case CMD_DELETE:
2242 ExecASDeleteTriggers(node->ps.state, resultRelInfo,
2243 node->mt_transition_capture);
2244 break;
2245 default:
2246 elog(ERROR, "unknown operation");
2247 break;
2252 * Set up the state needed for collecting transition tuples for AFTER
2253 * triggers.
2255 static void
2256 ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
2258 ModifyTable *plan = (ModifyTable *) mtstate->ps.plan;
2259 ResultRelInfo *targetRelInfo = mtstate->rootResultRelInfo;
2261 /* Check for transition tables on the directly targeted relation. */
2262 mtstate->mt_transition_capture =
2263 MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
2264 RelationGetRelid(targetRelInfo->ri_RelationDesc),
2265 mtstate->operation);
2266 if (plan->operation == CMD_INSERT &&
2267 plan->onConflictAction == ONCONFLICT_UPDATE)
2268 mtstate->mt_oc_transition_capture =
2269 MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
2270 RelationGetRelid(targetRelInfo->ri_RelationDesc),
2271 CMD_UPDATE);
2275 * ExecPrepareTupleRouting --- prepare for routing one tuple
2277 * Determine the partition in which the tuple in slot is to be inserted,
2278 * and return its ResultRelInfo in *partRelInfo. The return value is
2279 * a slot holding the tuple of the partition rowtype.
2281 * This also sets the transition table information in mtstate based on the
2282 * selected partition.
2284 static TupleTableSlot *
2285 ExecPrepareTupleRouting(ModifyTableState *mtstate,
2286 EState *estate,
2287 PartitionTupleRouting *proute,
2288 ResultRelInfo *targetRelInfo,
2289 TupleTableSlot *slot,
2290 ResultRelInfo **partRelInfo)
2292 ResultRelInfo *partrel;
2293 TupleConversionMap *map;
2296 * Lookup the target partition's ResultRelInfo. If ExecFindPartition does
2297 * not find a valid partition for the tuple in 'slot' then an error is
2298 * raised. An error may also be raised if the found partition is not a
2299 * valid target for INSERTs. This is required since a partitioned table
2300 * UPDATE to another partition becomes a DELETE+INSERT.
2302 partrel = ExecFindPartition(mtstate, targetRelInfo, proute, slot, estate);
2305 * If we're capturing transition tuples, we might need to convert from the
2306 * partition rowtype to root partitioned table's rowtype. But if there
2307 * are no BEFORE triggers on the partition that could change the tuple, we
2308 * can just remember the original unconverted tuple to avoid a needless
2309 * round trip conversion.
2311 if (mtstate->mt_transition_capture != NULL)
2313 bool has_before_insert_row_trig;
2315 has_before_insert_row_trig = (partrel->ri_TrigDesc &&
2316 partrel->ri_TrigDesc->trig_insert_before_row);
2318 mtstate->mt_transition_capture->tcs_original_insert_tuple =
2319 !has_before_insert_row_trig ? slot : NULL;
2323 * Convert the tuple, if necessary.
2325 map = partrel->ri_RootToPartitionMap;
2326 if (map != NULL)
2328 TupleTableSlot *new_slot = partrel->ri_PartitionTupleSlot;
2330 slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
2333 *partRelInfo = partrel;
2334 return slot;
2337 /* ----------------------------------------------------------------
2338 * ExecModifyTable
2340 * Perform table modifications as required, and return RETURNING results
2341 * if needed.
2342 * ----------------------------------------------------------------
2344 static TupleTableSlot *
2345 ExecModifyTable(PlanState *pstate)
2347 ModifyTableState *node = castNode(ModifyTableState, pstate);
2348 EState *estate = node->ps.state;
2349 CmdType operation = node->operation;
2350 ResultRelInfo *resultRelInfo;
2351 PlanState *subplanstate;
2352 TupleTableSlot *slot;
2353 TupleTableSlot *planSlot;
2354 TupleTableSlot *oldSlot;
2355 ItemPointer tupleid;
2356 ItemPointerData tuple_ctid;
2357 HeapTupleData oldtupdata;
2358 HeapTuple oldtuple;
2359 PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
2360 List *relinfos = NIL;
2361 ListCell *lc;
2363 CHECK_FOR_INTERRUPTS();
2366 * This should NOT get called during EvalPlanQual; we should have passed a
2367 * subplan tree to EvalPlanQual, instead. Use a runtime test not just
2368 * Assert because this condition is easy to miss in testing. (Note:
2369 * although ModifyTable should not get executed within an EvalPlanQual
2370 * operation, we do have to allow it to be initialized and shut down in
2371 * case it is within a CTE subplan. Hence this test must be here, not in
2372 * ExecInitModifyTable.)
2374 if (estate->es_epq_active != NULL)
2375 elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
2378 * If we've already completed processing, don't try to do more. We need
2379 * this test because ExecPostprocessPlan might call us an extra time, and
2380 * our subplan's nodes aren't necessarily robust against being called
2381 * extra times.
2383 if (node->mt_done)
2384 return NULL;
2387 * On first call, fire BEFORE STATEMENT triggers before proceeding.
2389 if (node->fireBSTriggers)
2391 fireBSTriggers(node);
2392 node->fireBSTriggers = false;
2395 /* Preload local variables */
2396 resultRelInfo = node->resultRelInfo + node->mt_lastResultIndex;
2397 subplanstate = outerPlanState(node);
2400 * Fetch rows from subplan, and execute the required table modification
2401 * for each row.
2403 for (;;)
2406 * Reset the per-output-tuple exprcontext. This is needed because
2407 * triggers expect to use that context as workspace. It's a bit ugly
2408 * to do this below the top level of the plan, however. We might need
2409 * to rethink this later.
2411 ResetPerTupleExprContext(estate);
2414 * Reset per-tuple memory context used for processing on conflict and
2415 * returning clauses, to free any expression evaluation storage
2416 * allocated in the previous cycle.
2418 if (pstate->ps_ExprContext)
2419 ResetExprContext(pstate->ps_ExprContext);
2421 planSlot = ExecProcNode(subplanstate);
2423 /* No more tuples to process? */
2424 if (TupIsNull(planSlot))
2425 break;
2428 * When there are multiple result relations, each tuple contains a
2429 * junk column that gives the OID of the rel from which it came.
2430 * Extract it and select the correct result relation.
2432 if (AttributeNumberIsValid(node->mt_resultOidAttno))
2434 Datum datum;
2435 bool isNull;
2436 Oid resultoid;
2438 datum = ExecGetJunkAttribute(planSlot, node->mt_resultOidAttno,
2439 &isNull);
2440 if (isNull)
2441 elog(ERROR, "tableoid is NULL");
2442 resultoid = DatumGetObjectId(datum);
2444 /* If it's not the same as last time, we need to locate the rel */
2445 if (resultoid != node->mt_lastResultOid)
2446 resultRelInfo = ExecLookupResultRelByOid(node, resultoid,
2447 false, true);
2451 * If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do
2452 * here is compute the RETURNING expressions.
2454 if (resultRelInfo->ri_usesFdwDirectModify)
2456 Assert(resultRelInfo->ri_projectReturning);
2459 * A scan slot containing the data that was actually inserted,
2460 * updated or deleted has already been made available to
2461 * ExecProcessReturning by IterateDirectModify, so no need to
2462 * provide it here.
2464 slot = ExecProcessReturning(resultRelInfo, NULL, planSlot);
2466 return slot;
2469 EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
2470 slot = planSlot;
2472 tupleid = NULL;
2473 oldtuple = NULL;
2476 * For UPDATE/DELETE, fetch the row identity info for the tuple to be
2477 * updated/deleted. For a heap relation, that's a TID; otherwise we
2478 * may have a wholerow junk attr that carries the old tuple in toto.
2479 * Keep this in step with the part of ExecInitModifyTable that sets up
2480 * ri_RowIdAttNo.
2482 if (operation == CMD_UPDATE || operation == CMD_DELETE)
2484 char relkind;
2485 Datum datum;
2486 bool isNull;
2488 relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2489 if (relkind == RELKIND_RELATION ||
2490 relkind == RELKIND_MATVIEW ||
2491 relkind == RELKIND_PARTITIONED_TABLE)
2493 /* ri_RowIdAttNo refers to a ctid attribute */
2494 Assert(AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo));
2495 datum = ExecGetJunkAttribute(slot,
2496 resultRelInfo->ri_RowIdAttNo,
2497 &isNull);
2498 /* shouldn't ever get a null result... */
2499 if (isNull)
2500 elog(ERROR, "ctid is NULL");
2502 tupleid = (ItemPointer) DatumGetPointer(datum);
2503 tuple_ctid = *tupleid; /* be sure we don't free ctid!! */
2504 tupleid = &tuple_ctid;
2508 * Use the wholerow attribute, when available, to reconstruct the
2509 * old relation tuple. The old tuple serves one or both of two
2510 * purposes: 1) it serves as the OLD tuple for row triggers, 2) it
2511 * provides values for any unchanged columns for the NEW tuple of
2512 * an UPDATE, because the subplan does not produce all the columns
2513 * of the target table.
2515 * Note that the wholerow attribute does not carry system columns,
2516 * so foreign table triggers miss seeing those, except that we
2517 * know enough here to set t_tableOid. Quite separately from
2518 * this, the FDW may fetch its own junk attrs to identify the row.
2520 * Other relevant relkinds, currently limited to views, always
2521 * have a wholerow attribute.
2523 else if (AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
2525 datum = ExecGetJunkAttribute(slot,
2526 resultRelInfo->ri_RowIdAttNo,
2527 &isNull);
2528 /* shouldn't ever get a null result... */
2529 if (isNull)
2530 elog(ERROR, "wholerow is NULL");
2532 oldtupdata.t_data = DatumGetHeapTupleHeader(datum);
2533 oldtupdata.t_len =
2534 HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
2535 ItemPointerSetInvalid(&(oldtupdata.t_self));
2536 /* Historically, view triggers see invalid t_tableOid. */
2537 oldtupdata.t_tableOid =
2538 (relkind == RELKIND_VIEW) ? InvalidOid :
2539 RelationGetRelid(resultRelInfo->ri_RelationDesc);
2541 oldtuple = &oldtupdata;
2543 else
2545 /* Only foreign tables are allowed to omit a row-ID attr */
2546 Assert(relkind == RELKIND_FOREIGN_TABLE);
2550 switch (operation)
2552 case CMD_INSERT:
2553 /* Initialize projection info if first time for this table */
2554 if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
2555 ExecInitInsertProjection(node, resultRelInfo);
2556 slot = ExecGetInsertNewTuple(resultRelInfo, planSlot);
2557 slot = ExecInsert(node, resultRelInfo, slot, planSlot,
2558 estate, node->canSetTag);
2559 break;
2560 case CMD_UPDATE:
2561 /* Initialize projection info if first time for this table */
2562 if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
2563 ExecInitUpdateProjection(node, resultRelInfo);
2566 * Make the new tuple by combining plan's output tuple with
2567 * the old tuple being updated.
2569 oldSlot = resultRelInfo->ri_oldTupleSlot;
2570 if (oldtuple != NULL)
2572 /* Use the wholerow junk attr as the old tuple. */
2573 ExecForceStoreHeapTuple(oldtuple, oldSlot, false);
2575 else
2577 /* Fetch the most recent version of old tuple. */
2578 Relation relation = resultRelInfo->ri_RelationDesc;
2580 Assert(tupleid != NULL);
2581 if (!table_tuple_fetch_row_version(relation, tupleid,
2582 SnapshotAny,
2583 oldSlot))
2584 elog(ERROR, "failed to fetch tuple being updated");
2586 slot = ExecGetUpdateNewTuple(resultRelInfo, planSlot,
2587 oldSlot);
2589 /* Now apply the update. */
2590 slot = ExecUpdate(node, resultRelInfo, tupleid, oldtuple, slot,
2591 planSlot, &node->mt_epqstate, estate,
2592 node->canSetTag);
2593 break;
2594 case CMD_DELETE:
2595 slot = ExecDelete(node, resultRelInfo, tupleid, oldtuple,
2596 planSlot, &node->mt_epqstate, estate,
2597 true, /* processReturning */
2598 node->canSetTag,
2599 false, /* changingPart */
2600 NULL, NULL);
2601 break;
2602 default:
2603 elog(ERROR, "unknown operation");
2604 break;
2608 * If we got a RETURNING result, return it to caller. We'll continue
2609 * the work on next call.
2611 if (slot)
2612 return slot;
2616 * Insert remaining tuples for batch insert.
2618 if (proute)
2619 relinfos = estate->es_tuple_routing_result_relations;
2620 else
2621 relinfos = estate->es_opened_result_relations;
2623 foreach(lc, relinfos)
2625 resultRelInfo = lfirst(lc);
2626 if (resultRelInfo->ri_NumSlots > 0)
2627 ExecBatchInsert(node, resultRelInfo,
2628 resultRelInfo->ri_Slots,
2629 resultRelInfo->ri_PlanSlots,
2630 resultRelInfo->ri_NumSlots,
2631 estate, node->canSetTag);
2635 * We're done, but fire AFTER STATEMENT triggers before exiting.
2637 fireASTriggers(node);
2639 node->mt_done = true;
2641 return NULL;
2645 * ExecLookupResultRelByOid
2646 * If the table with given OID is among the result relations to be
2647 * updated by the given ModifyTable node, return its ResultRelInfo.
2649 * If not found, return NULL if missing_ok, else raise error.
2651 * If update_cache is true, then upon successful lookup, update the node's
2652 * one-element cache. ONLY ExecModifyTable may pass true for this.
2654 ResultRelInfo *
2655 ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid,
2656 bool missing_ok, bool update_cache)
2658 if (node->mt_resultOidHash)
2660 /* Use the pre-built hash table to locate the rel */
2661 MTTargetRelLookup *mtlookup;
2663 mtlookup = (MTTargetRelLookup *)
2664 hash_search(node->mt_resultOidHash, &resultoid, HASH_FIND, NULL);
2665 if (mtlookup)
2667 if (update_cache)
2669 node->mt_lastResultOid = resultoid;
2670 node->mt_lastResultIndex = mtlookup->relationIndex;
2672 return node->resultRelInfo + mtlookup->relationIndex;
2675 else
2677 /* With few target rels, just search the ResultRelInfo array */
2678 for (int ndx = 0; ndx < node->mt_nrels; ndx++)
2680 ResultRelInfo *rInfo = node->resultRelInfo + ndx;
2682 if (RelationGetRelid(rInfo->ri_RelationDesc) == resultoid)
2684 if (update_cache)
2686 node->mt_lastResultOid = resultoid;
2687 node->mt_lastResultIndex = ndx;
2689 return rInfo;
2694 if (!missing_ok)
2695 elog(ERROR, "incorrect result relation OID %u", resultoid);
2696 return NULL;
2699 /* ----------------------------------------------------------------
2700 * ExecInitModifyTable
2701 * ----------------------------------------------------------------
2703 ModifyTableState *
2704 ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
2706 ModifyTableState *mtstate;
2707 Plan *subplan = outerPlan(node);
2708 CmdType operation = node->operation;
2709 int nrels = list_length(node->resultRelations);
2710 ResultRelInfo *resultRelInfo;
2711 List *arowmarks;
2712 ListCell *l;
2713 int i;
2714 Relation rel;
2716 /* check for unsupported flags */
2717 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
2720 * create state structure
2722 mtstate = makeNode(ModifyTableState);
2723 mtstate->ps.plan = (Plan *) node;
2724 mtstate->ps.state = estate;
2725 mtstate->ps.ExecProcNode = ExecModifyTable;
2727 mtstate->operation = operation;
2728 mtstate->canSetTag = node->canSetTag;
2729 mtstate->mt_done = false;
2731 mtstate->mt_nrels = nrels;
2732 mtstate->resultRelInfo = (ResultRelInfo *)
2733 palloc(nrels * sizeof(ResultRelInfo));
2735 /*----------
2736 * Resolve the target relation. This is the same as:
2738 * - the relation for which we will fire FOR STATEMENT triggers,
2739 * - the relation into whose tuple format all captured transition tuples
2740 * must be converted, and
2741 * - the root partitioned table used for tuple routing.
2743 * If it's a partitioned table, the root partition doesn't appear
2744 * elsewhere in the plan and its RT index is given explicitly in
2745 * node->rootRelation. Otherwise (i.e. table inheritance) the target
2746 * relation is the first relation in the node->resultRelations list.
2747 *----------
2749 if (node->rootRelation > 0)
2751 mtstate->rootResultRelInfo = makeNode(ResultRelInfo);
2752 ExecInitResultRelation(estate, mtstate->rootResultRelInfo,
2753 node->rootRelation);
2755 else
2757 mtstate->rootResultRelInfo = mtstate->resultRelInfo;
2758 ExecInitResultRelation(estate, mtstate->resultRelInfo,
2759 linitial_int(node->resultRelations));
2762 /* set up epqstate with dummy subplan data for the moment */
2763 EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
2764 mtstate->fireBSTriggers = true;
2767 * Build state for collecting transition tuples. This requires having a
2768 * valid trigger query context, so skip it in explain-only mode.
2770 if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
2771 ExecSetupTransitionCaptureState(mtstate, estate);
2774 * Open all the result relations and initialize the ResultRelInfo structs.
2775 * (But root relation was initialized above, if it's part of the array.)
2776 * We must do this before initializing the subplan, because direct-modify
2777 * FDWs expect their ResultRelInfos to be available.
2779 resultRelInfo = mtstate->resultRelInfo;
2780 i = 0;
2781 foreach(l, node->resultRelations)
2783 Index resultRelation = lfirst_int(l);
2785 if (resultRelInfo != mtstate->rootResultRelInfo)
2787 ExecInitResultRelation(estate, resultRelInfo, resultRelation);
2790 * For child result relations, store the root result relation
2791 * pointer. We do so for the convenience of places that want to
2792 * look at the query's original target relation but don't have the
2793 * mtstate handy.
2795 resultRelInfo->ri_RootResultRelInfo = mtstate->rootResultRelInfo;
2798 /* Initialize the usesFdwDirectModify flag */
2799 resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,
2800 node->fdwDirectModifyPlans);
2803 * Verify result relation is a valid target for the current operation
2805 CheckValidResultRel(resultRelInfo, operation);
2807 resultRelInfo++;
2808 i++;
2812 * Now we may initialize the subplan.
2814 outerPlanState(mtstate) = ExecInitNode(subplan, estate, eflags);
2817 * Do additional per-result-relation initialization.
2819 for (i = 0; i < nrels; i++)
2821 resultRelInfo = &mtstate->resultRelInfo[i];
2823 /* Let FDWs init themselves for foreign-table result rels */
2824 if (!resultRelInfo->ri_usesFdwDirectModify &&
2825 resultRelInfo->ri_FdwRoutine != NULL &&
2826 resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL)
2828 List *fdw_private = (List *) list_nth(node->fdwPrivLists, i);
2830 resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate,
2831 resultRelInfo,
2832 fdw_private,
2834 eflags);
2838 * For UPDATE/DELETE, find the appropriate junk attr now, either a
2839 * 'ctid' or 'wholerow' attribute depending on relkind. For foreign
2840 * tables, the FDW might have created additional junk attr(s), but
2841 * those are no concern of ours.
2843 if (operation == CMD_UPDATE || operation == CMD_DELETE)
2845 char relkind;
2847 relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2848 if (relkind == RELKIND_RELATION ||
2849 relkind == RELKIND_MATVIEW ||
2850 relkind == RELKIND_PARTITIONED_TABLE)
2852 resultRelInfo->ri_RowIdAttNo =
2853 ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid");
2854 if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
2855 elog(ERROR, "could not find junk ctid column");
2857 else if (relkind == RELKIND_FOREIGN_TABLE)
2860 * When there is a row-level trigger, there should be a
2861 * wholerow attribute. We also require it to be present in
2862 * UPDATE, so we can get the values of unchanged columns.
2864 resultRelInfo->ri_RowIdAttNo =
2865 ExecFindJunkAttributeInTlist(subplan->targetlist,
2866 "wholerow");
2867 if (mtstate->operation == CMD_UPDATE &&
2868 !AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
2869 elog(ERROR, "could not find junk wholerow column");
2871 else
2873 /* Other valid target relkinds must provide wholerow */
2874 resultRelInfo->ri_RowIdAttNo =
2875 ExecFindJunkAttributeInTlist(subplan->targetlist,
2876 "wholerow");
2877 if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
2878 elog(ERROR, "could not find junk wholerow column");
2884 * If this is an inherited update/delete, there will be a junk attribute
2885 * named "tableoid" present in the subplan's targetlist. It will be used
2886 * to identify the result relation for a given tuple to be
2887 * updated/deleted.
2889 mtstate->mt_resultOidAttno =
2890 ExecFindJunkAttributeInTlist(subplan->targetlist, "tableoid");
2891 Assert(AttributeNumberIsValid(mtstate->mt_resultOidAttno) || nrels == 1);
2892 mtstate->mt_lastResultOid = InvalidOid; /* force lookup at first tuple */
2893 mtstate->mt_lastResultIndex = 0; /* must be zero if no such attr */
2895 /* Get the root target relation */
2896 rel = mtstate->rootResultRelInfo->ri_RelationDesc;
2899 * Build state for tuple routing if it's a partitioned INSERT. An UPDATE
2900 * might need this too, but only if it actually moves tuples between
2901 * partitions; in that case setup is done by ExecCrossPartitionUpdate.
2903 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
2904 operation == CMD_INSERT)
2905 mtstate->mt_partition_tuple_routing =
2906 ExecSetupPartitionTupleRouting(estate, rel);
2909 * Initialize any WITH CHECK OPTION constraints if needed.
2911 resultRelInfo = mtstate->resultRelInfo;
2912 foreach(l, node->withCheckOptionLists)
2914 List *wcoList = (List *) lfirst(l);
2915 List *wcoExprs = NIL;
2916 ListCell *ll;
2918 foreach(ll, wcoList)
2920 WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
2921 ExprState *wcoExpr = ExecInitQual((List *) wco->qual,
2922 &mtstate->ps);
2924 wcoExprs = lappend(wcoExprs, wcoExpr);
2927 resultRelInfo->ri_WithCheckOptions = wcoList;
2928 resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
2929 resultRelInfo++;
2933 * Initialize RETURNING projections if needed.
2935 if (node->returningLists)
2937 TupleTableSlot *slot;
2938 ExprContext *econtext;
2941 * Initialize result tuple slot and assign its rowtype using the first
2942 * RETURNING list. We assume the rest will look the same.
2944 mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists);
2946 /* Set up a slot for the output of the RETURNING projection(s) */
2947 ExecInitResultTupleSlotTL(&mtstate->ps, &TTSOpsVirtual);
2948 slot = mtstate->ps.ps_ResultTupleSlot;
2950 /* Need an econtext too */
2951 if (mtstate->ps.ps_ExprContext == NULL)
2952 ExecAssignExprContext(estate, &mtstate->ps);
2953 econtext = mtstate->ps.ps_ExprContext;
2956 * Build a projection for each result rel.
2958 resultRelInfo = mtstate->resultRelInfo;
2959 foreach(l, node->returningLists)
2961 List *rlist = (List *) lfirst(l);
2963 resultRelInfo->ri_returningList = rlist;
2964 resultRelInfo->ri_projectReturning =
2965 ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
2966 resultRelInfo->ri_RelationDesc->rd_att);
2967 resultRelInfo++;
2970 else
2973 * We still must construct a dummy result tuple type, because InitPlan
2974 * expects one (maybe should change that?).
2976 mtstate->ps.plan->targetlist = NIL;
2977 ExecInitResultTypeTL(&mtstate->ps);
2979 mtstate->ps.ps_ExprContext = NULL;
2982 /* Set the list of arbiter indexes if needed for ON CONFLICT */
2983 resultRelInfo = mtstate->resultRelInfo;
2984 if (node->onConflictAction != ONCONFLICT_NONE)
2986 /* insert may only have one relation, inheritance is not expanded */
2987 Assert(nrels == 1);
2988 resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
2992 * If needed, Initialize target list, projection and qual for ON CONFLICT
2993 * DO UPDATE.
2995 if (node->onConflictAction == ONCONFLICT_UPDATE)
2997 OnConflictSetState *onconfl = makeNode(OnConflictSetState);
2998 ExprContext *econtext;
2999 TupleDesc relationDesc;
3001 /* already exists if created by RETURNING processing above */
3002 if (mtstate->ps.ps_ExprContext == NULL)
3003 ExecAssignExprContext(estate, &mtstate->ps);
3005 econtext = mtstate->ps.ps_ExprContext;
3006 relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
3008 /* create state for DO UPDATE SET operation */
3009 resultRelInfo->ri_onConflict = onconfl;
3011 /* initialize slot for the existing tuple */
3012 onconfl->oc_Existing =
3013 table_slot_create(resultRelInfo->ri_RelationDesc,
3014 &mtstate->ps.state->es_tupleTable);
3017 * Create the tuple slot for the UPDATE SET projection. We want a slot
3018 * of the table's type here, because the slot will be used to insert
3019 * into the table, and for RETURNING processing - which may access
3020 * system attributes.
3022 onconfl->oc_ProjSlot =
3023 table_slot_create(resultRelInfo->ri_RelationDesc,
3024 &mtstate->ps.state->es_tupleTable);
3026 /* build UPDATE SET projection state */
3027 onconfl->oc_ProjInfo =
3028 ExecBuildUpdateProjection(node->onConflictSet,
3029 true,
3030 node->onConflictCols,
3031 relationDesc,
3032 econtext,
3033 onconfl->oc_ProjSlot,
3034 &mtstate->ps);
3036 /* initialize state to evaluate the WHERE clause, if any */
3037 if (node->onConflictWhere)
3039 ExprState *qualexpr;
3041 qualexpr = ExecInitQual((List *) node->onConflictWhere,
3042 &mtstate->ps);
3043 onconfl->oc_WhereClause = qualexpr;
3048 * If we have any secondary relations in an UPDATE or DELETE, they need to
3049 * be treated like non-locked relations in SELECT FOR UPDATE, ie, the
3050 * EvalPlanQual mechanism needs to be told about them. Locate the
3051 * relevant ExecRowMarks.
3053 arowmarks = NIL;
3054 foreach(l, node->rowMarks)
3056 PlanRowMark *rc = lfirst_node(PlanRowMark, l);
3057 ExecRowMark *erm;
3058 ExecAuxRowMark *aerm;
3060 /* ignore "parent" rowmarks; they are irrelevant at runtime */
3061 if (rc->isParent)
3062 continue;
3064 /* Find ExecRowMark and build ExecAuxRowMark */
3065 erm = ExecFindRowMark(estate, rc->rti, false);
3066 aerm = ExecBuildAuxRowMark(erm, subplan->targetlist);
3067 arowmarks = lappend(arowmarks, aerm);
3070 EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan, arowmarks);
3073 * If there are a lot of result relations, use a hash table to speed the
3074 * lookups. If there are not a lot, a simple linear search is faster.
3076 * It's not clear where the threshold is, but try 64 for starters. In a
3077 * debugging build, use a small threshold so that we get some test
3078 * coverage of both code paths.
3080 #ifdef USE_ASSERT_CHECKING
3081 #define MT_NRELS_HASH 4
3082 #else
3083 #define MT_NRELS_HASH 64
3084 #endif
3085 if (nrels >= MT_NRELS_HASH)
3087 HASHCTL hash_ctl;
3089 hash_ctl.keysize = sizeof(Oid);
3090 hash_ctl.entrysize = sizeof(MTTargetRelLookup);
3091 hash_ctl.hcxt = CurrentMemoryContext;
3092 mtstate->mt_resultOidHash =
3093 hash_create("ModifyTable target hash",
3094 nrels, &hash_ctl,
3095 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
3096 for (i = 0; i < nrels; i++)
3098 Oid hashkey;
3099 MTTargetRelLookup *mtlookup;
3100 bool found;
3102 resultRelInfo = &mtstate->resultRelInfo[i];
3103 hashkey = RelationGetRelid(resultRelInfo->ri_RelationDesc);
3104 mtlookup = (MTTargetRelLookup *)
3105 hash_search(mtstate->mt_resultOidHash, &hashkey,
3106 HASH_ENTER, &found);
3107 Assert(!found);
3108 mtlookup->relationIndex = i;
3111 else
3112 mtstate->mt_resultOidHash = NULL;
3115 * Determine if the FDW supports batch insert and determine the batch size
3116 * (a FDW may support batching, but it may be disabled for the
3117 * server/table).
3119 * We only do this for INSERT, so that for UPDATE/DELETE the batch size
3120 * remains set to 0.
3122 if (operation == CMD_INSERT)
3124 /* insert may only have one relation, inheritance is not expanded */
3125 Assert(nrels == 1);
3126 resultRelInfo = mtstate->resultRelInfo;
3127 if (!resultRelInfo->ri_usesFdwDirectModify &&
3128 resultRelInfo->ri_FdwRoutine != NULL &&
3129 resultRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize &&
3130 resultRelInfo->ri_FdwRoutine->ExecForeignBatchInsert)
3132 resultRelInfo->ri_BatchSize =
3133 resultRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(resultRelInfo);
3134 Assert(resultRelInfo->ri_BatchSize >= 1);
3136 else
3137 resultRelInfo->ri_BatchSize = 1;
3141 * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
3142 * to estate->es_auxmodifytables so that it will be run to completion by
3143 * ExecPostprocessPlan. (It'd actually work fine to add the primary
3144 * ModifyTable node too, but there's no need.) Note the use of lcons not
3145 * lappend: we need later-initialized ModifyTable nodes to be shut down
3146 * before earlier ones. This ensures that we don't throw away RETURNING
3147 * rows that need to be seen by a later CTE subplan.
3149 if (!mtstate->canSetTag)
3150 estate->es_auxmodifytables = lcons(mtstate,
3151 estate->es_auxmodifytables);
3153 return mtstate;
3156 /* ----------------------------------------------------------------
3157 * ExecEndModifyTable
3159 * Shuts down the plan.
3161 * Returns nothing of interest.
3162 * ----------------------------------------------------------------
3164 void
3165 ExecEndModifyTable(ModifyTableState *node)
3167 int i;
3170 * Allow any FDWs to shut down
3172 for (i = 0; i < node->mt_nrels; i++)
3174 int j;
3175 ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
3177 if (!resultRelInfo->ri_usesFdwDirectModify &&
3178 resultRelInfo->ri_FdwRoutine != NULL &&
3179 resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
3180 resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
3181 resultRelInfo);
3184 * Cleanup the initialized batch slots. This only matters for FDWs with
3185 * batching, but the other cases will have ri_NumSlotsInitialized == 0.
3187 for (j = 0; j < resultRelInfo->ri_NumSlotsInitialized; j++)
3189 ExecDropSingleTupleTableSlot(resultRelInfo->ri_Slots[j]);
3190 ExecDropSingleTupleTableSlot(resultRelInfo->ri_PlanSlots[j]);
3195 * Close all the partitioned tables, leaf partitions, and their indices
3196 * and release the slot used for tuple routing, if set.
3198 if (node->mt_partition_tuple_routing)
3200 ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
3202 if (node->mt_root_tuple_slot)
3203 ExecDropSingleTupleTableSlot(node->mt_root_tuple_slot);
3207 * Free the exprcontext
3209 ExecFreeExprContext(&node->ps);
3212 * clean out the tuple table
3214 if (node->ps.ps_ResultTupleSlot)
3215 ExecClearTuple(node->ps.ps_ResultTupleSlot);
3218 * Terminate EPQ execution if active
3220 EvalPlanQualEnd(&node->mt_epqstate);
3223 * shut down subplan
3225 ExecEndNode(outerPlanState(node));
3228 void
3229 ExecReScanModifyTable(ModifyTableState *node)
3232 * Currently, we don't need to support rescan on ModifyTable nodes. The
3233 * semantics of that would be a bit debatable anyway.
3235 elog(ERROR, "ExecReScanModifyTable is not implemented");