1 /* -------------------------------------------------------------------------
4 * This module decodes WAL records read using xlogreader.h's APIs for the
5 * purpose of logical decoding by passing information to the
6 * reorderbuffer module (containing the actual changes) and to the
7 * snapbuild module to build a fitting catalog snapshot (to be able to
8 * properly decode the changes in the reorderbuffer).
11 * This basically tries to handle all low level xlog stuff for
12 * reorderbuffer.c and snapbuild.c. There's some minor leakage where a
13 * specific record's struct is used to pass data along, but those just
14 * happen to contain the right amount of data in a convenient
15 * format. There isn't and shouldn't be much intelligence about the
16 * contents of records in here except turning them into a more usable
19 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
20 * Portions Copyright (c) 1994, Regents of the University of California
23 * src/backend/replication/logical/decode.c
25 * -------------------------------------------------------------------------
29 #include "access/heapam.h"
30 #include "access/heapam_xlog.h"
31 #include "access/transam.h"
32 #include "access/xact.h"
33 #include "access/xlog_internal.h"
34 #include "access/xlogreader.h"
35 #include "access/xlogrecord.h"
36 #include "access/xlogutils.h"
37 #include "catalog/pg_control.h"
38 #include "replication/decode.h"
39 #include "replication/logical.h"
40 #include "replication/message.h"
41 #include "replication/origin.h"
42 #include "replication/reorderbuffer.h"
43 #include "replication/snapbuild.h"
44 #include "storage/standby.h"
46 typedef struct XLogRecordBuffer
50 XLogReaderState
*record
;
54 static void DecodeXLogOp(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
55 static void DecodeHeapOp(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
56 static void DecodeHeap2Op(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
57 static void DecodeXactOp(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
58 static void DecodeStandbyOp(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
59 static void DecodeLogicalMsgOp(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
61 /* individual record(group)'s handlers */
62 static void DecodeInsert(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
63 static void DecodeUpdate(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
64 static void DecodeDelete(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
65 static void DecodeTruncate(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
66 static void DecodeMultiInsert(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
67 static void DecodeSpecConfirm(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
);
69 static void DecodeCommit(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
,
70 xl_xact_parsed_commit
*parsed
, TransactionId xid
,
72 static void DecodeAbort(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
,
73 xl_xact_parsed_abort
*parsed
, TransactionId xid
,
75 static void DecodePrepare(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
,
76 xl_xact_parsed_prepare
*parsed
);
79 /* common function to decode tuples */
80 static void DecodeXLogTuple(char *data
, Size len
, ReorderBufferTupleBuf
*tup
);
82 /* helper functions for decoding transactions */
83 static inline bool FilterPrepare(LogicalDecodingContext
*ctx
,
84 TransactionId xid
, const char *gid
);
85 static bool DecodeTXNNeedSkip(LogicalDecodingContext
*ctx
,
86 XLogRecordBuffer
*buf
, Oid dbId
,
87 RepOriginId origin_id
);
90 * Take every XLogReadRecord()ed record and perform the actions required to
91 * decode it using the output plugin already setup in the logical decoding
94 * NB: Note that every record's xid needs to be processed by reorderbuffer
95 * (xids contained in the content of records are not relevant for this rule).
96 * That means that for records which'd otherwise not go through the
97 * reorderbuffer ReorderBufferProcessXid() has to be called. We don't want to
98 * call ReorderBufferProcessXid for each record type by default, because
99 * e.g. empty xacts can be handled more efficiently if there's no previous
102 * We also support the ability to fast forward thru records, skipping some
103 * record types completely - see individual record types for details.
106 LogicalDecodingProcessRecord(LogicalDecodingContext
*ctx
, XLogReaderState
*record
)
108 XLogRecordBuffer buf
;
111 buf
.origptr
= ctx
->reader
->ReadRecPtr
;
112 buf
.endptr
= ctx
->reader
->EndRecPtr
;
115 txid
= XLogRecGetTopXid(record
);
118 * If the top-level xid is valid, we need to assign the subxact to the
119 * top-level xact. We need to do this for all records, hence we do it
122 if (TransactionIdIsValid(txid
))
124 ReorderBufferAssignChild(ctx
->reorder
,
126 record
->decoded_record
->xl_xid
,
130 /* cast so we get a warning when new rmgrs are added */
131 switch ((RmgrId
) XLogRecGetRmid(record
))
134 * Rmgrs we care about for logical decoding. Add new rmgrs in
135 * rmgrlist.h's order.
138 DecodeXLogOp(ctx
, &buf
);
142 DecodeXactOp(ctx
, &buf
);
146 DecodeStandbyOp(ctx
, &buf
);
150 DecodeHeap2Op(ctx
, &buf
);
154 DecodeHeapOp(ctx
, &buf
);
157 case RM_LOGICALMSG_ID
:
158 DecodeLogicalMsgOp(ctx
, &buf
);
162 * Rmgrs irrelevant for logical decoding; they describe stuff not
163 * represented in logical decoding. Add new rmgrs in rmgrlist.h's
170 case RM_MULTIXACT_ID
:
179 case RM_COMMIT_TS_ID
:
180 case RM_REPLORIGIN_ID
:
182 /* just deal with xid, and done */
183 ReorderBufferProcessXid(ctx
->reorder
, XLogRecGetXid(record
),
187 elog(ERROR
, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds
) XLogRecGetRmid(buf
.record
));
192 * Handle rmgr XLOG_ID records for DecodeRecordIntoReorderBuffer().
195 DecodeXLogOp(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
197 SnapBuild
*builder
= ctx
->snapshot_builder
;
198 uint8 info
= XLogRecGetInfo(buf
->record
) & ~XLR_INFO_MASK
;
200 ReorderBufferProcessXid(ctx
->reorder
, XLogRecGetXid(buf
->record
),
205 /* this is also used in END_OF_RECOVERY checkpoints */
206 case XLOG_CHECKPOINT_SHUTDOWN
:
207 case XLOG_END_OF_RECOVERY
:
208 SnapBuildSerializationPoint(builder
, buf
->origptr
);
211 case XLOG_CHECKPOINT_ONLINE
:
214 * a RUNNING_XACTS record will have been logged near to this, we
215 * can restart from there.
221 case XLOG_BACKUP_END
:
222 case XLOG_PARAMETER_CHANGE
:
223 case XLOG_RESTORE_POINT
:
224 case XLOG_FPW_CHANGE
:
225 case XLOG_FPI_FOR_HINT
:
227 case XLOG_OVERWRITE_CONTRECORD
:
230 elog(ERROR
, "unexpected RM_XLOG_ID record type: %u", info
);
235 * Handle rmgr XACT_ID records for DecodeRecordIntoReorderBuffer().
238 DecodeXactOp(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
240 SnapBuild
*builder
= ctx
->snapshot_builder
;
241 ReorderBuffer
*reorder
= ctx
->reorder
;
242 XLogReaderState
*r
= buf
->record
;
243 uint8 info
= XLogRecGetInfo(r
) & XLOG_XACT_OPMASK
;
246 * If the snapshot isn't yet fully built, we cannot decode anything, so
249 if (SnapBuildCurrentState(builder
) < SNAPBUILD_FULL_SNAPSHOT
)
254 case XLOG_XACT_COMMIT
:
255 case XLOG_XACT_COMMIT_PREPARED
:
257 xl_xact_commit
*xlrec
;
258 xl_xact_parsed_commit parsed
;
260 bool two_phase
= false;
262 xlrec
= (xl_xact_commit
*) XLogRecGetData(r
);
263 ParseCommitRecord(XLogRecGetInfo(buf
->record
), xlrec
, &parsed
);
265 if (!TransactionIdIsValid(parsed
.twophase_xid
))
266 xid
= XLogRecGetXid(r
);
268 xid
= parsed
.twophase_xid
;
271 * We would like to process the transaction in a two-phase
272 * manner iff output plugin supports two-phase commits and
273 * doesn't filter the transaction at prepare time.
275 if (info
== XLOG_XACT_COMMIT_PREPARED
)
276 two_phase
= !(FilterPrepare(ctx
, xid
,
277 parsed
.twophase_gid
));
279 DecodeCommit(ctx
, buf
, &parsed
, xid
, two_phase
);
282 case XLOG_XACT_ABORT
:
283 case XLOG_XACT_ABORT_PREPARED
:
285 xl_xact_abort
*xlrec
;
286 xl_xact_parsed_abort parsed
;
288 bool two_phase
= false;
290 xlrec
= (xl_xact_abort
*) XLogRecGetData(r
);
291 ParseAbortRecord(XLogRecGetInfo(buf
->record
), xlrec
, &parsed
);
293 if (!TransactionIdIsValid(parsed
.twophase_xid
))
294 xid
= XLogRecGetXid(r
);
296 xid
= parsed
.twophase_xid
;
299 * We would like to process the transaction in a two-phase
300 * manner iff output plugin supports two-phase commits and
301 * doesn't filter the transaction at prepare time.
303 if (info
== XLOG_XACT_ABORT_PREPARED
)
304 two_phase
= !(FilterPrepare(ctx
, xid
,
305 parsed
.twophase_gid
));
307 DecodeAbort(ctx
, buf
, &parsed
, xid
, two_phase
);
310 case XLOG_XACT_ASSIGNMENT
:
313 * We assign subxact to the toplevel xact while processing each
314 * record if required. So, we don't need to do anything here. See
315 * LogicalDecodingProcessRecord.
318 case XLOG_XACT_INVALIDATIONS
:
321 xl_xact_invals
*invals
;
323 xid
= XLogRecGetXid(r
);
324 invals
= (xl_xact_invals
*) XLogRecGetData(r
);
327 * Execute the invalidations for xid-less transactions,
328 * otherwise, accumulate them so that they can be processed at
331 if (TransactionIdIsValid(xid
))
333 if (!ctx
->fast_forward
)
334 ReorderBufferAddInvalidations(reorder
, xid
,
338 ReorderBufferXidSetCatalogChanges(ctx
->reorder
, xid
,
341 else if ((!ctx
->fast_forward
))
342 ReorderBufferImmediateInvalidation(ctx
->reorder
,
347 case XLOG_XACT_PREPARE
:
349 xl_xact_parsed_prepare parsed
;
350 xl_xact_prepare
*xlrec
;
353 xlrec
= (xl_xact_prepare
*) XLogRecGetData(r
);
354 ParsePrepareRecord(XLogRecGetInfo(buf
->record
),
358 * We would like to process the transaction in a two-phase
359 * manner iff output plugin supports two-phase commits and
360 * doesn't filter the transaction at prepare time.
362 if (FilterPrepare(ctx
, parsed
.twophase_xid
,
363 parsed
.twophase_gid
))
365 ReorderBufferProcessXid(reorder
, parsed
.twophase_xid
,
371 * Note that if the prepared transaction has locked [user]
372 * catalog tables exclusively then decoding prepare can block
373 * till the main transaction is committed because it needs to
374 * lock the catalog tables.
376 * XXX Now, this can even lead to a deadlock if the prepare
377 * transaction is waiting to get it logically replicated for
378 * distributed 2PC. This can be avoided by disallowing
379 * preparing transactions that have locked [user] catalog
380 * tables exclusively but as of now, we ask users not to do
383 DecodePrepare(ctx
, buf
, &parsed
);
387 elog(ERROR
, "unexpected RM_XACT_ID record type: %u", info
);
392 * Handle rmgr STANDBY_ID records for DecodeRecordIntoReorderBuffer().
395 DecodeStandbyOp(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
397 SnapBuild
*builder
= ctx
->snapshot_builder
;
398 XLogReaderState
*r
= buf
->record
;
399 uint8 info
= XLogRecGetInfo(r
) & ~XLR_INFO_MASK
;
401 ReorderBufferProcessXid(ctx
->reorder
, XLogRecGetXid(r
), buf
->origptr
);
405 case XLOG_RUNNING_XACTS
:
407 xl_running_xacts
*running
= (xl_running_xacts
*) XLogRecGetData(r
);
409 SnapBuildProcessRunningXacts(builder
, buf
->origptr
, running
);
412 * Abort all transactions that we keep track of, that are
413 * older than the record's oldestRunningXid. This is the most
414 * convenient spot for doing so since, in contrast to shutdown
415 * or end-of-recovery checkpoints, we have information about
416 * all running transactions which includes prepared ones,
417 * while shutdown checkpoints just know that no non-prepared
418 * transactions are in progress.
420 ReorderBufferAbortOld(ctx
->reorder
, running
->oldestRunningXid
);
423 case XLOG_STANDBY_LOCK
:
425 case XLOG_INVALIDATIONS
:
428 * We are processing the invalidations at the command level via
429 * XLOG_XACT_INVALIDATIONS. So we don't need to do anything here.
433 elog(ERROR
, "unexpected RM_STANDBY_ID record type: %u", info
);
438 * Handle rmgr HEAP2_ID records for DecodeRecordIntoReorderBuffer().
441 DecodeHeap2Op(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
443 uint8 info
= XLogRecGetInfo(buf
->record
) & XLOG_HEAP_OPMASK
;
444 TransactionId xid
= XLogRecGetXid(buf
->record
);
445 SnapBuild
*builder
= ctx
->snapshot_builder
;
447 ReorderBufferProcessXid(ctx
->reorder
, xid
, buf
->origptr
);
450 * If we don't have snapshot or we are just fast-forwarding, there is no
451 * point in decoding changes.
453 if (SnapBuildCurrentState(builder
) < SNAPBUILD_FULL_SNAPSHOT
||
459 case XLOG_HEAP2_MULTI_INSERT
:
460 if (!ctx
->fast_forward
&&
461 SnapBuildProcessChange(builder
, xid
, buf
->origptr
))
462 DecodeMultiInsert(ctx
, buf
);
464 case XLOG_HEAP2_NEW_CID
:
466 xl_heap_new_cid
*xlrec
;
468 xlrec
= (xl_heap_new_cid
*) XLogRecGetData(buf
->record
);
469 SnapBuildProcessNewCid(builder
, xid
, buf
->origptr
, xlrec
);
473 case XLOG_HEAP2_REWRITE
:
476 * Although these records only exist to serve the needs of logical
477 * decoding, all the work happens as part of crash or archive
478 * recovery, so we don't need to do anything here.
483 * Everything else here is just low level physical stuff we're not
486 case XLOG_HEAP2_FREEZE_PAGE
:
487 case XLOG_HEAP2_PRUNE
:
488 case XLOG_HEAP2_VACUUM
:
489 case XLOG_HEAP2_VISIBLE
:
490 case XLOG_HEAP2_LOCK_UPDATED
:
493 elog(ERROR
, "unexpected RM_HEAP2_ID record type: %u", info
);
498 * Handle rmgr HEAP_ID records for DecodeRecordIntoReorderBuffer().
501 DecodeHeapOp(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
503 uint8 info
= XLogRecGetInfo(buf
->record
) & XLOG_HEAP_OPMASK
;
504 TransactionId xid
= XLogRecGetXid(buf
->record
);
505 SnapBuild
*builder
= ctx
->snapshot_builder
;
507 ReorderBufferProcessXid(ctx
->reorder
, xid
, buf
->origptr
);
510 * If we don't have snapshot or we are just fast-forwarding, there is no
511 * point in decoding data changes.
513 if (SnapBuildCurrentState(builder
) < SNAPBUILD_FULL_SNAPSHOT
||
519 case XLOG_HEAP_INSERT
:
520 if (SnapBuildProcessChange(builder
, xid
, buf
->origptr
))
521 DecodeInsert(ctx
, buf
);
525 * Treat HOT update as normal updates. There is no useful
526 * information in the fact that we could make it a HOT update
527 * locally and the WAL layout is compatible.
529 case XLOG_HEAP_HOT_UPDATE
:
530 case XLOG_HEAP_UPDATE
:
531 if (SnapBuildProcessChange(builder
, xid
, buf
->origptr
))
532 DecodeUpdate(ctx
, buf
);
535 case XLOG_HEAP_DELETE
:
536 if (SnapBuildProcessChange(builder
, xid
, buf
->origptr
))
537 DecodeDelete(ctx
, buf
);
540 case XLOG_HEAP_TRUNCATE
:
541 if (SnapBuildProcessChange(builder
, xid
, buf
->origptr
))
542 DecodeTruncate(ctx
, buf
);
545 case XLOG_HEAP_INPLACE
:
548 * Inplace updates are only ever performed on catalog tuples and
549 * can, per definition, not change tuple visibility. Since we
550 * don't decode catalog tuples, we're not interested in the
553 * In-place updates can be used either by XID-bearing transactions
554 * (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less
555 * transactions (e.g. VACUUM). In the former case, the commit
556 * record will include cache invalidations, so we mark the
557 * transaction as catalog modifying here. Currently that's
558 * redundant because the commit will do that as well, but once we
559 * support decoding in-progress relations, this will be important.
561 if (!TransactionIdIsValid(xid
))
564 SnapBuildProcessChange(builder
, xid
, buf
->origptr
);
565 ReorderBufferXidSetCatalogChanges(ctx
->reorder
, xid
, buf
->origptr
);
568 case XLOG_HEAP_CONFIRM
:
569 if (SnapBuildProcessChange(builder
, xid
, buf
->origptr
))
570 DecodeSpecConfirm(ctx
, buf
);
574 /* we don't care about row level locks for now */
578 elog(ERROR
, "unexpected RM_HEAP_ID record type: %u", info
);
584 * Ask output plugin whether we want to skip this PREPARE and send
585 * this transaction as a regular commit later.
588 FilterPrepare(LogicalDecodingContext
*ctx
, TransactionId xid
,
592 * Skip if decoding of two-phase transactions at PREPARE time is not
593 * enabled. In that case, all two-phase transactions are considered
594 * filtered out and will be applied as regular transactions at COMMIT
601 * The filter_prepare callback is optional. When not supplied, all
602 * prepared transactions should go through.
604 if (ctx
->callbacks
.filter_prepare_cb
== NULL
)
607 return filter_prepare_cb_wrapper(ctx
, xid
, gid
);
611 FilterByOrigin(LogicalDecodingContext
*ctx
, RepOriginId origin_id
)
613 if (ctx
->callbacks
.filter_by_origin_cb
== NULL
)
616 return filter_by_origin_cb_wrapper(ctx
, origin_id
);
620 * Handle rmgr LOGICALMSG_ID records for DecodeRecordIntoReorderBuffer().
623 DecodeLogicalMsgOp(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
625 SnapBuild
*builder
= ctx
->snapshot_builder
;
626 XLogReaderState
*r
= buf
->record
;
627 TransactionId xid
= XLogRecGetXid(r
);
628 uint8 info
= XLogRecGetInfo(r
) & ~XLR_INFO_MASK
;
629 RepOriginId origin_id
= XLogRecGetOrigin(r
);
631 xl_logical_message
*message
;
633 if (info
!= XLOG_LOGICAL_MESSAGE
)
634 elog(ERROR
, "unexpected RM_LOGICALMSG_ID record type: %u", info
);
636 ReorderBufferProcessXid(ctx
->reorder
, XLogRecGetXid(r
), buf
->origptr
);
639 * If we don't have snapshot or we are just fast-forwarding, there is no
640 * point in decoding messages.
642 if (SnapBuildCurrentState(builder
) < SNAPBUILD_FULL_SNAPSHOT
||
646 message
= (xl_logical_message
*) XLogRecGetData(r
);
648 if (message
->dbId
!= ctx
->slot
->data
.database
||
649 FilterByOrigin(ctx
, origin_id
))
652 if (message
->transactional
&&
653 !SnapBuildProcessChange(builder
, xid
, buf
->origptr
))
655 else if (!message
->transactional
&&
656 (SnapBuildCurrentState(builder
) != SNAPBUILD_CONSISTENT
||
657 SnapBuildXactNeedsSkip(builder
, buf
->origptr
)))
660 snapshot
= SnapBuildGetOrBuildSnapshot(builder
, xid
);
661 ReorderBufferQueueMessage(ctx
->reorder
, xid
, snapshot
, buf
->endptr
,
662 message
->transactional
,
663 message
->message
, /* first part of message is
665 message
->message_size
,
666 message
->message
+ message
->prefix_size
);
670 * Consolidated commit record handling between the different form of commit
673 * 'two_phase' indicates that caller wants to process the transaction in two
674 * phases, first process prepare if not already done and then process
678 DecodeCommit(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
,
679 xl_xact_parsed_commit
*parsed
, TransactionId xid
,
682 XLogRecPtr origin_lsn
= InvalidXLogRecPtr
;
683 TimestampTz commit_time
= parsed
->xact_time
;
684 RepOriginId origin_id
= XLogRecGetOrigin(buf
->record
);
687 if (parsed
->xinfo
& XACT_XINFO_HAS_ORIGIN
)
689 origin_lsn
= parsed
->origin_lsn
;
690 commit_time
= parsed
->origin_timestamp
;
693 SnapBuildCommitTxn(ctx
->snapshot_builder
, buf
->origptr
, xid
,
694 parsed
->nsubxacts
, parsed
->subxacts
);
697 * Check whether we are interested in this specific transaction, and tell
698 * the reorderbuffer to forget the content of the (sub-)transactions
701 * We can't just use ReorderBufferAbort() here, because we need to execute
702 * the transaction's invalidations. This currently won't be needed if
703 * we're just skipping over the transaction because currently we only do
704 * so during startup, to get to the first transaction the client needs. As
705 * we have reset the catalog caches before starting to read WAL, and we
706 * haven't yet touched any catalogs, there can't be anything to invalidate.
707 * But if we're "forgetting" this commit because it happened in another
708 * database, the invalidations might be important, because they could be
709 * for shared catalogs and we might have loaded data into the relevant
713 if (DecodeTXNNeedSkip(ctx
, buf
, parsed
->dbId
, origin_id
))
715 for (i
= 0; i
< parsed
->nsubxacts
; i
++)
717 ReorderBufferForget(ctx
->reorder
, parsed
->subxacts
[i
], buf
->origptr
);
719 ReorderBufferForget(ctx
->reorder
, xid
, buf
->origptr
);
724 /* tell the reorderbuffer about the surviving subtransactions */
725 for (i
= 0; i
< parsed
->nsubxacts
; i
++)
727 ReorderBufferCommitChild(ctx
->reorder
, xid
, parsed
->subxacts
[i
],
728 buf
->origptr
, buf
->endptr
);
732 * Send the final commit record if the transaction data is already
733 * decoded, otherwise, process the entire transaction.
737 ReorderBufferFinishPrepared(ctx
->reorder
, xid
, buf
->origptr
, buf
->endptr
,
738 SnapBuildGetTwoPhaseAt(ctx
->snapshot_builder
),
739 commit_time
, origin_id
, origin_lsn
,
740 parsed
->twophase_gid
, true);
744 ReorderBufferCommit(ctx
->reorder
, xid
, buf
->origptr
, buf
->endptr
,
745 commit_time
, origin_id
, origin_lsn
);
749 * Update the decoding stats at transaction prepare/commit/abort.
750 * Additionally we send the stats when we spill or stream the changes to
751 * avoid losing them in case the decoding is interrupted. It is not clear
752 * that sending more or less frequently than this would be better.
754 UpdateDecodingStats(ctx
);
758 * Decode PREPARE record. Similar logic as in DecodeCommit.
760 * Note that we don't skip prepare even if have detected concurrent abort
761 * because it is quite possible that we had already sent some changes before we
762 * detect abort in which case we need to abort those changes in the subscriber.
763 * To abort such changes, we do send the prepare and then the rollback prepared
764 * which is what happened on the publisher-side as well. Now, we can invent a
765 * new abort API wherein in such cases we send abort and skip sending prepared
766 * and rollback prepared but then it is not that straightforward because we
767 * might have streamed this transaction by that time in which case it is
768 * handled when the rollback is encountered. It is not impossible to optimize
769 * the concurrent abort case but it can introduce design complexity w.r.t
770 * handling different cases so leaving it for now as it doesn't seem worth it.
773 DecodePrepare(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
,
774 xl_xact_parsed_prepare
*parsed
)
776 SnapBuild
*builder
= ctx
->snapshot_builder
;
777 XLogRecPtr origin_lsn
= parsed
->origin_lsn
;
778 TimestampTz prepare_time
= parsed
->xact_time
;
779 XLogRecPtr origin_id
= XLogRecGetOrigin(buf
->record
);
781 TransactionId xid
= parsed
->twophase_xid
;
783 if (parsed
->origin_timestamp
!= 0)
784 prepare_time
= parsed
->origin_timestamp
;
787 * Remember the prepare info for a txn so that it can be used later in
788 * commit prepared if required. See ReorderBufferFinishPrepared.
790 if (!ReorderBufferRememberPrepareInfo(ctx
->reorder
, xid
, buf
->origptr
,
791 buf
->endptr
, prepare_time
, origin_id
,
795 /* We can't start streaming unless a consistent state is reached. */
796 if (SnapBuildCurrentState(builder
) < SNAPBUILD_CONSISTENT
)
798 ReorderBufferSkipPrepare(ctx
->reorder
, xid
);
803 * Check whether we need to process this transaction. See
804 * DecodeTXNNeedSkip for the reasons why we sometimes want to skip the
807 * We can't call ReorderBufferForget as we did in DecodeCommit as the txn
808 * hasn't yet been committed, removing this txn before a commit might
809 * result in the computation of an incorrect restart_lsn. See
810 * SnapBuildProcessRunningXacts. But we need to process cache
811 * invalidations if there are any for the reasons mentioned in
814 if (DecodeTXNNeedSkip(ctx
, buf
, parsed
->dbId
, origin_id
))
816 ReorderBufferSkipPrepare(ctx
->reorder
, xid
);
817 ReorderBufferInvalidate(ctx
->reorder
, xid
, buf
->origptr
);
821 /* Tell the reorderbuffer about the surviving subtransactions. */
822 for (i
= 0; i
< parsed
->nsubxacts
; i
++)
824 ReorderBufferCommitChild(ctx
->reorder
, xid
, parsed
->subxacts
[i
],
825 buf
->origptr
, buf
->endptr
);
828 /* replay actions of all transaction + subtransactions in order */
829 ReorderBufferPrepare(ctx
->reorder
, xid
, parsed
->twophase_gid
);
832 * Update the decoding stats at transaction prepare/commit/abort.
833 * Additionally we send the stats when we spill or stream the changes to
834 * avoid losing them in case the decoding is interrupted. It is not clear
835 * that sending more or less frequently than this would be better.
837 UpdateDecodingStats(ctx
);
842 * Get the data from the various forms of abort records and pass it on to
843 * snapbuild.c and reorderbuffer.c.
845 * 'two_phase' indicates to finish prepared transaction.
848 DecodeAbort(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
,
849 xl_xact_parsed_abort
*parsed
, TransactionId xid
,
853 XLogRecPtr origin_lsn
= InvalidXLogRecPtr
;
854 TimestampTz abort_time
= parsed
->xact_time
;
855 XLogRecPtr origin_id
= XLogRecGetOrigin(buf
->record
);
858 if (parsed
->xinfo
& XACT_XINFO_HAS_ORIGIN
)
860 origin_lsn
= parsed
->origin_lsn
;
861 abort_time
= parsed
->origin_timestamp
;
865 * Check whether we need to process this transaction. See
866 * DecodeTXNNeedSkip for the reasons why we sometimes want to skip the
869 skip_xact
= DecodeTXNNeedSkip(ctx
, buf
, parsed
->dbId
, origin_id
);
872 * Send the final rollback record for a prepared transaction unless we
873 * need to skip it. For non-two-phase xacts, simply forget the xact.
875 if (two_phase
&& !skip_xact
)
877 ReorderBufferFinishPrepared(ctx
->reorder
, xid
, buf
->origptr
, buf
->endptr
,
879 abort_time
, origin_id
, origin_lsn
,
880 parsed
->twophase_gid
, false);
884 for (i
= 0; i
< parsed
->nsubxacts
; i
++)
886 ReorderBufferAbort(ctx
->reorder
, parsed
->subxacts
[i
],
887 buf
->record
->EndRecPtr
);
890 ReorderBufferAbort(ctx
->reorder
, xid
, buf
->record
->EndRecPtr
);
893 /* update the decoding stats */
894 UpdateDecodingStats(ctx
);
898 * Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs.
900 * Deletes can contain the new tuple.
903 DecodeInsert(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
908 XLogReaderState
*r
= buf
->record
;
909 xl_heap_insert
*xlrec
;
910 ReorderBufferChange
*change
;
911 RelFileNode target_node
;
913 xlrec
= (xl_heap_insert
*) XLogRecGetData(r
);
916 * Ignore insert records without new tuples (this does happen when
917 * raw_heap_insert marks the TOAST record as HEAP_INSERT_NO_LOGICAL).
919 if (!(xlrec
->flags
& XLH_INSERT_CONTAINS_NEW_TUPLE
))
922 /* only interested in our database */
923 XLogRecGetBlockTag(r
, 0, &target_node
, NULL
, NULL
);
924 if (target_node
.dbNode
!= ctx
->slot
->data
.database
)
927 /* output plugin doesn't look for this origin, no need to queue */
928 if (FilterByOrigin(ctx
, XLogRecGetOrigin(r
)))
931 change
= ReorderBufferGetChange(ctx
->reorder
);
932 if (!(xlrec
->flags
& XLH_INSERT_IS_SPECULATIVE
))
933 change
->action
= REORDER_BUFFER_CHANGE_INSERT
;
935 change
->action
= REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT
;
936 change
->origin_id
= XLogRecGetOrigin(r
);
938 memcpy(&change
->data
.tp
.relnode
, &target_node
, sizeof(RelFileNode
));
940 tupledata
= XLogRecGetBlockData(r
, 0, &datalen
);
941 tuplelen
= datalen
- SizeOfHeapHeader
;
943 change
->data
.tp
.newtuple
=
944 ReorderBufferGetTupleBuf(ctx
->reorder
, tuplelen
);
946 DecodeXLogTuple(tupledata
, datalen
, change
->data
.tp
.newtuple
);
948 change
->data
.tp
.clear_toast_afterwards
= true;
950 ReorderBufferQueueChange(ctx
->reorder
, XLogRecGetXid(r
), buf
->origptr
,
952 xlrec
->flags
& XLH_INSERT_ON_TOAST_RELATION
);
956 * Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout
957 * in the record, from wal into proper tuplebufs.
959 * Updates can possibly contain a new tuple and the old primary key.
962 DecodeUpdate(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
964 XLogReaderState
*r
= buf
->record
;
965 xl_heap_update
*xlrec
;
966 ReorderBufferChange
*change
;
968 RelFileNode target_node
;
970 xlrec
= (xl_heap_update
*) XLogRecGetData(r
);
972 /* only interested in our database */
973 XLogRecGetBlockTag(r
, 0, &target_node
, NULL
, NULL
);
974 if (target_node
.dbNode
!= ctx
->slot
->data
.database
)
977 /* output plugin doesn't look for this origin, no need to queue */
978 if (FilterByOrigin(ctx
, XLogRecGetOrigin(r
)))
981 change
= ReorderBufferGetChange(ctx
->reorder
);
982 change
->action
= REORDER_BUFFER_CHANGE_UPDATE
;
983 change
->origin_id
= XLogRecGetOrigin(r
);
984 memcpy(&change
->data
.tp
.relnode
, &target_node
, sizeof(RelFileNode
));
986 if (xlrec
->flags
& XLH_UPDATE_CONTAINS_NEW_TUPLE
)
991 data
= XLogRecGetBlockData(r
, 0, &datalen
);
993 tuplelen
= datalen
- SizeOfHeapHeader
;
995 change
->data
.tp
.newtuple
=
996 ReorderBufferGetTupleBuf(ctx
->reorder
, tuplelen
);
998 DecodeXLogTuple(data
, datalen
, change
->data
.tp
.newtuple
);
1001 if (xlrec
->flags
& XLH_UPDATE_CONTAINS_OLD
)
1006 /* caution, remaining data in record is not aligned */
1007 data
= XLogRecGetData(r
) + SizeOfHeapUpdate
;
1008 datalen
= XLogRecGetDataLen(r
) - SizeOfHeapUpdate
;
1009 tuplelen
= datalen
- SizeOfHeapHeader
;
1011 change
->data
.tp
.oldtuple
=
1012 ReorderBufferGetTupleBuf(ctx
->reorder
, tuplelen
);
1014 DecodeXLogTuple(data
, datalen
, change
->data
.tp
.oldtuple
);
1017 change
->data
.tp
.clear_toast_afterwards
= true;
1019 ReorderBufferQueueChange(ctx
->reorder
, XLogRecGetXid(r
), buf
->origptr
,
1024 * Parse XLOG_HEAP_DELETE from wal into proper tuplebufs.
1026 * Deletes can possibly contain the old primary key.
1029 DecodeDelete(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
1031 XLogReaderState
*r
= buf
->record
;
1032 xl_heap_delete
*xlrec
;
1033 ReorderBufferChange
*change
;
1034 RelFileNode target_node
;
1036 xlrec
= (xl_heap_delete
*) XLogRecGetData(r
);
1038 /* only interested in our database */
1039 XLogRecGetBlockTag(r
, 0, &target_node
, NULL
, NULL
);
1040 if (target_node
.dbNode
!= ctx
->slot
->data
.database
)
1043 /* output plugin doesn't look for this origin, no need to queue */
1044 if (FilterByOrigin(ctx
, XLogRecGetOrigin(r
)))
1047 change
= ReorderBufferGetChange(ctx
->reorder
);
1049 if (xlrec
->flags
& XLH_DELETE_IS_SUPER
)
1050 change
->action
= REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT
;
1052 change
->action
= REORDER_BUFFER_CHANGE_DELETE
;
1054 change
->origin_id
= XLogRecGetOrigin(r
);
1056 memcpy(&change
->data
.tp
.relnode
, &target_node
, sizeof(RelFileNode
));
1058 /* old primary key stored */
1059 if (xlrec
->flags
& XLH_DELETE_CONTAINS_OLD
)
1061 Size datalen
= XLogRecGetDataLen(r
) - SizeOfHeapDelete
;
1062 Size tuplelen
= datalen
- SizeOfHeapHeader
;
1064 Assert(XLogRecGetDataLen(r
) > (SizeOfHeapDelete
+ SizeOfHeapHeader
));
1066 change
->data
.tp
.oldtuple
=
1067 ReorderBufferGetTupleBuf(ctx
->reorder
, tuplelen
);
1069 DecodeXLogTuple((char *) xlrec
+ SizeOfHeapDelete
,
1070 datalen
, change
->data
.tp
.oldtuple
);
1073 change
->data
.tp
.clear_toast_afterwards
= true;
1075 ReorderBufferQueueChange(ctx
->reorder
, XLogRecGetXid(r
), buf
->origptr
,
1080 * Parse XLOG_HEAP_TRUNCATE from wal
1083 DecodeTruncate(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
1085 XLogReaderState
*r
= buf
->record
;
1086 xl_heap_truncate
*xlrec
;
1087 ReorderBufferChange
*change
;
1089 xlrec
= (xl_heap_truncate
*) XLogRecGetData(r
);
1091 /* only interested in our database */
1092 if (xlrec
->dbId
!= ctx
->slot
->data
.database
)
1095 /* output plugin doesn't look for this origin, no need to queue */
1096 if (FilterByOrigin(ctx
, XLogRecGetOrigin(r
)))
1099 change
= ReorderBufferGetChange(ctx
->reorder
);
1100 change
->action
= REORDER_BUFFER_CHANGE_TRUNCATE
;
1101 change
->origin_id
= XLogRecGetOrigin(r
);
1102 if (xlrec
->flags
& XLH_TRUNCATE_CASCADE
)
1103 change
->data
.truncate
.cascade
= true;
1104 if (xlrec
->flags
& XLH_TRUNCATE_RESTART_SEQS
)
1105 change
->data
.truncate
.restart_seqs
= true;
1106 change
->data
.truncate
.nrelids
= xlrec
->nrelids
;
1107 change
->data
.truncate
.relids
= ReorderBufferGetRelids(ctx
->reorder
,
1109 memcpy(change
->data
.truncate
.relids
, xlrec
->relids
,
1110 xlrec
->nrelids
* sizeof(Oid
));
1111 ReorderBufferQueueChange(ctx
->reorder
, XLogRecGetXid(r
),
1112 buf
->origptr
, change
, false);
1116 * Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs.
1118 * Currently MULTI_INSERT will always contain the full tuples.
1121 DecodeMultiInsert(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
1123 XLogReaderState
*r
= buf
->record
;
1124 xl_heap_multi_insert
*xlrec
;
1131 xlrec
= (xl_heap_multi_insert
*) XLogRecGetData(r
);
1134 * Ignore insert records without new tuples. This happens when a
1135 * multi_insert is done on a catalog or on a non-persistent relation.
1137 if (!(xlrec
->flags
& XLH_INSERT_CONTAINS_NEW_TUPLE
))
1140 /* only interested in our database */
1141 XLogRecGetBlockTag(r
, 0, &rnode
, NULL
, NULL
);
1142 if (rnode
.dbNode
!= ctx
->slot
->data
.database
)
1145 /* output plugin doesn't look for this origin, no need to queue */
1146 if (FilterByOrigin(ctx
, XLogRecGetOrigin(r
)))
1150 * We know that this multi_insert isn't for a catalog, so the block should
1151 * always have data even if a full-page write of it is taken.
1153 tupledata
= XLogRecGetBlockData(r
, 0, &tuplelen
);
1154 Assert(tupledata
!= NULL
);
1157 for (i
= 0; i
< xlrec
->ntuples
; i
++)
1159 ReorderBufferChange
*change
;
1160 xl_multi_insert_tuple
*xlhdr
;
1162 ReorderBufferTupleBuf
*tuple
;
1163 HeapTupleHeader header
;
1165 change
= ReorderBufferGetChange(ctx
->reorder
);
1166 change
->action
= REORDER_BUFFER_CHANGE_INSERT
;
1167 change
->origin_id
= XLogRecGetOrigin(r
);
1169 memcpy(&change
->data
.tp
.relnode
, &rnode
, sizeof(RelFileNode
));
1171 xlhdr
= (xl_multi_insert_tuple
*) SHORTALIGN(data
);
1172 data
= ((char *) xlhdr
) + SizeOfMultiInsertTuple
;
1173 datalen
= xlhdr
->datalen
;
1175 change
->data
.tp
.newtuple
=
1176 ReorderBufferGetTupleBuf(ctx
->reorder
, datalen
);
1178 tuple
= change
->data
.tp
.newtuple
;
1179 header
= tuple
->tuple
.t_data
;
1181 /* not a disk based tuple */
1182 ItemPointerSetInvalid(&tuple
->tuple
.t_self
);
1185 * We can only figure this out after reassembling the transactions.
1187 tuple
->tuple
.t_tableOid
= InvalidOid
;
1189 tuple
->tuple
.t_len
= datalen
+ SizeofHeapTupleHeader
;
1191 memset(header
, 0, SizeofHeapTupleHeader
);
1193 memcpy((char *) tuple
->tuple
.t_data
+ SizeofHeapTupleHeader
,
1196 header
->t_infomask
= xlhdr
->t_infomask
;
1197 header
->t_infomask2
= xlhdr
->t_infomask2
;
1198 header
->t_hoff
= xlhdr
->t_hoff
;
1201 * Reset toast reassembly state only after the last row in the last
1202 * xl_multi_insert_tuple record emitted by one heap_multi_insert()
1205 if (xlrec
->flags
& XLH_INSERT_LAST_IN_MULTI
&&
1206 (i
+ 1) == xlrec
->ntuples
)
1207 change
->data
.tp
.clear_toast_afterwards
= true;
1209 change
->data
.tp
.clear_toast_afterwards
= false;
1211 ReorderBufferQueueChange(ctx
->reorder
, XLogRecGetXid(r
),
1212 buf
->origptr
, change
, false);
1214 /* move to the next xl_multi_insert_tuple entry */
1217 Assert(data
== tupledata
+ tuplelen
);
1221 * Parse XLOG_HEAP_CONFIRM from wal into a confirmation change.
1223 * This is pretty trivial, all the state essentially already setup by the
1224 * speculative insertion.
1227 DecodeSpecConfirm(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
)
1229 XLogReaderState
*r
= buf
->record
;
1230 ReorderBufferChange
*change
;
1231 RelFileNode target_node
;
1233 /* only interested in our database */
1234 XLogRecGetBlockTag(r
, 0, &target_node
, NULL
, NULL
);
1235 if (target_node
.dbNode
!= ctx
->slot
->data
.database
)
1238 /* output plugin doesn't look for this origin, no need to queue */
1239 if (FilterByOrigin(ctx
, XLogRecGetOrigin(r
)))
1242 change
= ReorderBufferGetChange(ctx
->reorder
);
1243 change
->action
= REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM
;
1244 change
->origin_id
= XLogRecGetOrigin(r
);
1246 memcpy(&change
->data
.tp
.relnode
, &target_node
, sizeof(RelFileNode
));
1248 change
->data
.tp
.clear_toast_afterwards
= true;
1250 ReorderBufferQueueChange(ctx
->reorder
, XLogRecGetXid(r
), buf
->origptr
,
1256 * Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete
1257 * (but not by heap_multi_insert) into a tuplebuf.
1259 * The size 'len' and the pointer 'data' in the record need to be
1260 * computed outside as they are record specific.
1263 DecodeXLogTuple(char *data
, Size len
, ReorderBufferTupleBuf
*tuple
)
1265 xl_heap_header xlhdr
;
1266 int datalen
= len
- SizeOfHeapHeader
;
1267 HeapTupleHeader header
;
1269 Assert(datalen
>= 0);
1271 tuple
->tuple
.t_len
= datalen
+ SizeofHeapTupleHeader
;
1272 header
= tuple
->tuple
.t_data
;
1274 /* not a disk based tuple */
1275 ItemPointerSetInvalid(&tuple
->tuple
.t_self
);
1277 /* we can only figure this out after reassembling the transactions */
1278 tuple
->tuple
.t_tableOid
= InvalidOid
;
1280 /* data is not stored aligned, copy to aligned storage */
1281 memcpy((char *) &xlhdr
,
1285 memset(header
, 0, SizeofHeapTupleHeader
);
1287 memcpy(((char *) tuple
->tuple
.t_data
) + SizeofHeapTupleHeader
,
1288 data
+ SizeOfHeapHeader
,
1291 header
->t_infomask
= xlhdr
.t_infomask
;
1292 header
->t_infomask2
= xlhdr
.t_infomask2
;
1293 header
->t_hoff
= xlhdr
.t_hoff
;
1297 * Check whether we are interested in this specific transaction.
1299 * There can be several reasons we might not be interested in this
1301 * 1) We might not be interested in decoding transactions up to this
1302 * LSN. This can happen because we previously decoded it and now just
1303 * are restarting or if we haven't assembled a consistent snapshot yet.
1304 * 2) The transaction happened in another database.
1305 * 3) The output plugin is not interested in the origin.
1306 * 4) We are doing fast-forwarding
1309 DecodeTXNNeedSkip(LogicalDecodingContext
*ctx
, XLogRecordBuffer
*buf
,
1310 Oid txn_dbid
, RepOriginId origin_id
)
1312 return (SnapBuildXactNeedsSkip(ctx
->snapshot_builder
, buf
->origptr
) ||
1313 (txn_dbid
!= InvalidOid
&& txn_dbid
!= ctx
->slot
->data
.database
) ||
1314 ctx
->fast_forward
|| FilterByOrigin(ctx
, origin_id
));