From cbfbda78413a5b2f4807e029407dcc98a0e63162 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 27 Jun 2024 21:06:32 +0300 Subject: [PATCH] Fix MVCC bug with prepared xact with subxacts on standby We did not recover the subtransaction IDs of prepared transactions when starting a hot standby from a shutdown checkpoint. As a result, such subtransactions were considered as aborted, rather than in-progress. That would lead to hint bits being set incorrectly, and the subtransactions suddenly becoming visible to old snapshots when the prepared transaction was committed. To fix, update pg_subtrans with prepared transactions's subxids when starting hot standby from a shutdown checkpoint. The snapshots taken from that state need to be marked as "suboverflowed", so that we also check the pg_subtrans. Backport to all supported versions. Discussion: https://www.postgresql.org/message-id/6b852e98-2d49-4ca1-9e95-db419a2696e0@iki.fi --- src/backend/access/transam/twophase.c | 7 +++--- src/backend/access/transam/xlog.c | 14 ++++++----- src/backend/storage/ipc/procarray.c | 18 +++++++++++--- src/backend/storage/ipc/standby.c | 6 ++--- src/include/storage/standby.h | 10 +++++++- src/test/recovery/t/009_twophase.pl | 46 +++++++++++++++++++++++++++++++++++ src/tools/pgindent/typedefs.list | 1 + 7 files changed, 85 insertions(+), 17 deletions(-) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index bf451d42ff..9a8257fcaf 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -2035,9 +2035,8 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) * This is never called at the end of recovery - we use * RecoverPreparedTransactions() at that point. * - * The lack of calls to SubTransSetParent() calls here is by design; - * those calls are made by RecoverPreparedTransactions() at the end of recovery - * for those xacts that need this. + * This updates pg_subtrans, so that any subtransactions will be correctly + * seen as in-progress in snapshots taken during recovery. */ void StandbyRecoverPreparedTransactions(void) @@ -2057,7 +2056,7 @@ StandbyRecoverPreparedTransactions(void) buf = ProcessTwoPhaseBuffer(xid, gxact->prepare_start_lsn, - gxact->ondisk, false, false); + gxact->ondisk, true, false); if (buf != NULL) pfree(buf); } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 8dcdf5a764..a69337f2d4 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5777,6 +5777,9 @@ StartupXLOG(void) RunningTransactionsData running; TransactionId latestCompletedXid; + /* Update pg_subtrans entries for any prepared transactions */ + StandbyRecoverPreparedTransactions(); + /* * Construct a RunningTransactions snapshot representing a * shut down server, with only prepared transactions still @@ -5785,7 +5788,7 @@ StartupXLOG(void) */ running.xcnt = nxids; running.subxcnt = 0; - running.subxid_overflow = false; + running.subxid_status = SUBXIDS_IN_SUBTRANS; running.nextXid = XidFromFullTransactionId(checkPoint.nextXid); running.oldestRunningXid = oldestActiveXID; latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid); @@ -5795,8 +5798,6 @@ StartupXLOG(void) running.xids = xids; ProcArrayApplyRecoveryInfo(&running); - - StandbyRecoverPreparedTransactions(); } } @@ -8244,6 +8245,9 @@ xlog_redo(XLogReaderState *record) oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids); + /* Update pg_subtrans entries for any prepared transactions */ + StandbyRecoverPreparedTransactions(); + /* * Construct a RunningTransactions snapshot representing a shut * down server, with only prepared transactions still alive. We're @@ -8252,7 +8256,7 @@ xlog_redo(XLogReaderState *record) */ running.xcnt = nxids; running.subxcnt = 0; - running.subxid_overflow = false; + running.subxid_status = SUBXIDS_IN_SUBTRANS; running.nextXid = XidFromFullTransactionId(checkPoint.nextXid); running.oldestRunningXid = oldestActiveXID; latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid); @@ -8262,8 +8266,6 @@ xlog_redo(XLogReaderState *record) running.xids = xids; ProcArrayApplyRecoveryInfo(&running); - - StandbyRecoverPreparedTransactions(); } /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index d5165aa0d9..387b4a405b 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -1106,7 +1106,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) * If the snapshot isn't overflowed or if its empty we can reset our * pending state and use this snapshot instead. */ - if (!running->subxid_overflow || running->xcnt == 0) + if (running->subxid_status != SUBXIDS_MISSING || running->xcnt == 0) { /* * If we have already collected known assigned xids, we need to @@ -1258,7 +1258,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) * missing, so conservatively assume the last one is latestObservedXid. * ---------- */ - if (running->subxid_overflow) + if (running->subxid_status == SUBXIDS_MISSING) { standbyState = STANDBY_SNAPSHOT_PENDING; @@ -1270,6 +1270,18 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) standbyState = STANDBY_SNAPSHOT_READY; standbySnapshotPendingXmin = InvalidTransactionId; + + /* + * If the 'xids' array didn't include all subtransactions, we have to + * mark any snapshots taken as overflowed. + */ + if (running->subxid_status == SUBXIDS_IN_SUBTRANS) + procArray->lastOverflowedXid = latestObservedXid; + else + { + Assert(running->subxid_status == SUBXIDS_IN_ARRAY); + procArray->lastOverflowedXid = InvalidTransactionId; + } } /* @@ -2833,7 +2845,7 @@ GetRunningTransactionData(void) CurrentRunningXacts->xcnt = count - subcount; CurrentRunningXacts->subxcnt = subcount; - CurrentRunningXacts->subxid_overflow = suboverflowed; + CurrentRunningXacts->subxid_status = suboverflowed ? SUBXIDS_IN_SUBTRANS : SUBXIDS_IN_ARRAY; CurrentRunningXacts->nextXid = XidFromFullTransactionId(TransamVariables->nextXid); CurrentRunningXacts->oldestRunningXid = oldestRunningXid; CurrentRunningXacts->oldestDatabaseRunningXid = oldestDatabaseRunningXid; diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 87b04e51b3..872679ca44 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -1184,7 +1184,7 @@ standby_redo(XLogReaderState *record) running.xcnt = xlrec->xcnt; running.subxcnt = xlrec->subxcnt; - running.subxid_overflow = xlrec->subxid_overflow; + running.subxid_status = xlrec->subxid_overflow ? SUBXIDS_MISSING : SUBXIDS_IN_ARRAY; running.nextXid = xlrec->nextXid; running.latestCompletedXid = xlrec->latestCompletedXid; running.oldestRunningXid = xlrec->oldestRunningXid; @@ -1349,7 +1349,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) xlrec.xcnt = CurrRunningXacts->xcnt; xlrec.subxcnt = CurrRunningXacts->subxcnt; - xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow; + xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY); xlrec.nextXid = CurrRunningXacts->nextXid; xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid; xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid; @@ -1366,7 +1366,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS); - if (CurrRunningXacts->subxid_overflow) + if (xlrec.subxid_overflow) elog(DEBUG2, "snapshot of %d running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)", CurrRunningXacts->xcnt, diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 0fc0804e26..cce0bc521e 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -75,11 +75,19 @@ extern void StandbyReleaseOldLocks(TransactionId oldxid); * almost immediately see the data we need to begin executing queries. */ +typedef enum +{ + SUBXIDS_IN_ARRAY, /* xids array includes all running subxids */ + SUBXIDS_MISSING, /* snapshot overflowed, subxids are missing */ + SUBXIDS_IN_SUBTRANS, /* subxids are not included in 'xids', but + * pg_subtrans is fully up-to-date */ +} subxids_array_status; + typedef struct RunningTransactionsData { int xcnt; /* # of xact ids in xids[] */ int subxcnt; /* # of subxact ids in xids[] */ - bool subxid_overflow; /* snapshot overflowed, subxids missing */ + subxids_array_status subxid_status; TransactionId nextXid; /* xid from TransamVariables->nextXid */ TransactionId oldestRunningXid; /* *not* oldestXmin */ TransactionId oldestDatabaseRunningXid; /* same as above, but within the diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl index 701f9cc20f..21a65cd298 100644 --- a/src/test/recovery/t/009_twophase.pl +++ b/src/test/recovery/t/009_twophase.pl @@ -313,6 +313,52 @@ $cur_standby->start; $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_12'"); ############################################################################### +# Check visibility of prepared transactions in standby after a restart while +# primary is down. +############################################################################### + +$cur_primary->psql( + 'postgres', " + CREATE TABLE t_009_tbl_standby_mvcc (id int, msg text); + BEGIN; + INSERT INTO t_009_tbl_standby_mvcc VALUES (1, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl_standby_mvcc VALUES (2, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_standby_mvcc'; + "); +$cur_primary->stop; +$cur_standby->restart; + +# Acquire a snapshot in standby, before we commit the prepared transaction +my $standby_session = $cur_standby->background_psql('postgres', on_error_die => 1); +$standby_session->query_safe("BEGIN ISOLATION LEVEL REPEATABLE READ"); +$psql_out = $standby_session->query_safe( + "SELECT count(*) FROM t_009_tbl_standby_mvcc"); +is($psql_out, '0', + "Prepared transaction not visible in standby before commit"); + +# Commit the transaction in primary +$cur_primary->start; +$cur_primary->psql('postgres', " +SET synchronous_commit='remote_apply'; -- To ensure the standby is caught up +COMMIT PREPARED 'xact_009_standby_mvcc'; +"); + +# Still not visible to the old snapshot +$psql_out = $standby_session->query_safe( + "SELECT count(*) FROM t_009_tbl_standby_mvcc"); +is($psql_out, '0', + "Committed prepared transaction not visible to old snapshot in standby"); + +# Is visible to a new snapshot +$standby_session->query_safe("COMMIT"); +$psql_out = $standby_session->query_safe( + "SELECT count(*) FROM t_009_tbl_standby_mvcc"); +is($psql_out, '2', + "Committed prepared transaction is visible to new snapshot in standby"); +$standby_session->quit; + +############################################################################### # Check for a lock conflict between prepared transaction with DDL inside and # replay of XLOG_STANDBY_LOCK wal record. ############################################################################### diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 61ad417cde..d90982466c 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3931,6 +3931,7 @@ string substitute_actual_parameters_context substitute_actual_srf_parameters_context substitute_phv_relids_context +subxids_array_status symbol tablespaceinfo td_entry -- 2.11.4.GIT