contrib/amcheck/verify_heapam.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * verify_heapam.c
   4  *        Functions to check postgresql heap relations for corruption
   5  *
   6  * Copyright (c) 2016-2022, PostgreSQL Global Development Group
   7  *
   8  *        contrib/amcheck/verify_heapam.c
   9  *-------------------------------------------------------------------------
  10  */
  11 #include "postgres.h"
  12
  13 #include "access/detoast.h"
  14 #include "access/genam.h"
  15 #include "access/heapam.h"
  16 #include "access/heaptoast.h"
  17 #include "access/multixact.h"
  18 #include "access/toast_internals.h"
  19 #include "access/visibilitymap.h"
  20 #include "catalog/pg_am.h"
  21 #include "funcapi.h"
  22 #include "miscadmin.h"
  23 #include "storage/bufmgr.h"
  24 #include "storage/procarray.h"
  25 #include "utils/builtins.h"
  26 #include "utils/fmgroids.h"
  27
  28 PG_FUNCTION_INFO_V1(verify_heapam);
  29
  30 /* The number of columns in tuples returned by verify_heapam */
  31 #define HEAPCHECK_RELATION_COLS 4
  32
  33 /* The largest valid toast va_rawsize */
  34 #define VARLENA_SIZE_LIMIT 0x3FFFFFFF
  35
  36 /*
  37  * Despite the name, we use this for reporting problems with both XIDs and
  38  * MXIDs.
  39  */
  40 typedef enum XidBoundsViolation
  41 {
  42         XID_INVALID,
  43         XID_IN_FUTURE,
  44         XID_PRECEDES_CLUSTERMIN,
  45         XID_PRECEDES_RELMIN,
  46         XID_BOUNDS_OK
  47 } XidBoundsViolation;
  48
  49 typedef enum XidCommitStatus
  50 {
  51         XID_COMMITTED,
  52         XID_IS_CURRENT_XID,
  53         XID_IN_PROGRESS,
  54         XID_ABORTED
  55 } XidCommitStatus;
  56
  57 typedef enum SkipPages
  58 {
  59         SKIP_PAGES_ALL_FROZEN,
  60         SKIP_PAGES_ALL_VISIBLE,
  61         SKIP_PAGES_NONE
  62 } SkipPages;
  63
  64 /*
  65  * Struct holding information about a toasted attribute sufficient to both
  66  * check the toasted attribute and, if found to be corrupt, to report where it
  67  * was encountered in the main table.
  68  */
  69 typedef struct ToastedAttribute
  70 {
  71         struct varatt_external toast_pointer;
  72         BlockNumber blkno;                      /* block in main table */
  73         OffsetNumber offnum;            /* offset in main table */
  74         AttrNumber      attnum;                 /* attribute in main table */
  75 } ToastedAttribute;
  76
  77 /*
  78  * Struct holding the running context information during
  79  * a lifetime of a verify_heapam execution.
  80  */
  81 typedef struct HeapCheckContext
  82 {
  83         /*
  84          * Cached copies of values from ShmemVariableCache and computed values
  85          * from them.
  86          */
  87         FullTransactionId next_fxid;    /* ShmemVariableCache->nextXid */
  88         TransactionId next_xid;         /* 32-bit version of next_fxid */
  89         TransactionId oldest_xid;       /* ShmemVariableCache->oldestXid */
  90         FullTransactionId oldest_fxid;  /* 64-bit version of oldest_xid, computed
  91                                                                          * relative to next_fxid */
  92         TransactionId safe_xmin;        /* this XID and newer ones can't become
  93                                                                  * all-visible while we're running */
  94
  95         /*
  96          * Cached copy of value from MultiXactState
  97          */
  98         MultiXactId next_mxact;         /* MultiXactState->nextMXact */
  99         MultiXactId oldest_mxact;       /* MultiXactState->oldestMultiXactId */
 100
 101         /*
 102          * Cached copies of the most recently checked xid and its status.
 103          */
 104         TransactionId cached_xid;
 105         XidCommitStatus cached_status;
 106
 107         /* Values concerning the heap relation being checked */
 108         Relation        rel;
 109         TransactionId relfrozenxid;
 110         FullTransactionId relfrozenfxid;
 111         TransactionId relminmxid;
 112         Relation        toast_rel;
 113         Relation   *toast_indexes;
 114         Relation        valid_toast_index;
 115         int                     num_toast_indexes;
 116
 117         /* Values for iterating over pages in the relation */
 118         BlockNumber blkno;
 119         BufferAccessStrategy bstrategy;
 120         Buffer          buffer;
 121         Page            page;
 122
 123         /* Values for iterating over tuples within a page */
 124         OffsetNumber offnum;
 125         ItemId          itemid;
 126         uint16          lp_len;
 127         uint16          lp_off;
 128         HeapTupleHeader tuphdr;
 129         int                     natts;
 130
 131         /* Values for iterating over attributes within the tuple */
 132         uint32          offset;                 /* offset in tuple data */
 133         AttrNumber      attnum;
 134
 135         /* True if tuple's xmax makes it eligible for pruning */
 136         bool            tuple_could_be_pruned;
 137
 138         /*
 139          * List of ToastedAttribute structs for toasted attributes which are not
 140          * eligible for pruning and should be checked
 141          */
 142         List       *toasted_attributes;
 143
 144         /* Whether verify_heapam has yet encountered any corrupt tuples */
 145         bool            is_corrupt;
 146
 147         /* The descriptor and tuplestore for verify_heapam's result tuples */
 148         TupleDesc       tupdesc;
 149         Tuplestorestate *tupstore;
 150 } HeapCheckContext;
 151
 152 /* Internal implementation */
 153 static void check_tuple(HeapCheckContext *ctx);
 154 static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
 155                                                           ToastedAttribute *ta, int32 *expected_chunk_seq,
 156                                                           uint32 extsize);
 157
 158 static bool check_tuple_attribute(HeapCheckContext *ctx);
 159 static void check_toasted_attribute(HeapCheckContext *ctx,
 160                                                                         ToastedAttribute *ta);
 161
 162 static bool check_tuple_header(HeapCheckContext *ctx);
 163 static bool check_tuple_visibility(HeapCheckContext *ctx);
 164
 165 static void report_corruption(HeapCheckContext *ctx, char *msg);
 166 static void report_toast_corruption(HeapCheckContext *ctx,
 167                                                                         ToastedAttribute *ta, char *msg);
 168 static TupleDesc verify_heapam_tupdesc(void);
 169 static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid,
 170                                                                                                                 const HeapCheckContext *ctx);
 171 static void update_cached_xid_range(HeapCheckContext *ctx);
 172 static void update_cached_mxid_range(HeapCheckContext *ctx);
 173 static XidBoundsViolation check_mxid_in_range(MultiXactId mxid,
 174                                                                                           HeapCheckContext *ctx);
 175 static XidBoundsViolation check_mxid_valid_in_rel(MultiXactId mxid,
 176                                                                                                   HeapCheckContext *ctx);
 177 static XidBoundsViolation get_xid_status(TransactionId xid,
 178                                                                                  HeapCheckContext *ctx,
 179                                                                                  XidCommitStatus *status);
 180
 181 /*
 182  * Scan and report corruption in heap pages, optionally reconciling toasted
 183  * attributes with entries in the associated toast table.  Intended to be
 184  * called from SQL with the following parameters:
 185  *
 186  *   relation:
 187  *     The Oid of the heap relation to be checked.
 188  *
 189  *   on_error_stop:
 190  *     Whether to stop at the end of the first page for which errors are
 191  *     detected.  Note that multiple rows may be returned.
 192  *
 193  *   check_toast:
 194  *     Whether to check each toasted attribute against the toast table to
 195  *     verify that it can be found there.
 196  *
 197  *   skip:
 198  *     What kinds of pages in the heap relation should be skipped.  Valid
 199  *     options are "all-visible", "all-frozen", and "none".
 200  *
 201  * Returns to the SQL caller a set of tuples, each containing the location
 202  * and a description of a corruption found in the heap.
 203  *
 204  * This code goes to some trouble to avoid crashing the server even if the
 205  * table pages are badly corrupted, but it's probably not perfect. If
 206  * check_toast is true, we'll use regular index lookups to try to fetch TOAST
 207  * tuples, which can certainly cause crashes if the right kind of corruption
 208  * exists in the toast table or index. No matter what parameters you pass,
 209  * we can't protect against crashes that might occur trying to look up the
 210  * commit status of transaction IDs (though we avoid trying to do such lookups
 211  * for transaction IDs that can't legally appear in the table).
 212  */
 213 Datum
 214 verify_heapam(PG_FUNCTION_ARGS)
 215 {
 216         ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
 217         MemoryContext old_context;
 218         bool            random_access;
 219         HeapCheckContext ctx;
 220         Buffer          vmbuffer = InvalidBuffer;
 221         Oid                     relid;
 222         bool            on_error_stop;
 223         bool            check_toast;
 224         SkipPages       skip_option = SKIP_PAGES_NONE;
 225         BlockNumber first_block;
 226         BlockNumber last_block;
 227         BlockNumber nblocks;
 228         const char *skip;
 229
 230         /* Check to see if caller supports us returning a tuplestore */
 231         if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
 232                 ereport(ERROR,
 233                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 234                                  errmsg("set-valued function called in context that cannot accept a set")));
 235         if (!(rsinfo->allowedModes & SFRM_Materialize))
 236                 ereport(ERROR,
 237                                 (errcode(ERRCODE_SYNTAX_ERROR),
 238                                  errmsg("materialize mode required, but it is not allowed in this context")));
 239
 240         /* Check supplied arguments */
 241         if (PG_ARGISNULL(0))
 242                 ereport(ERROR,
 243                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 244                                  errmsg("relation cannot be null")));
 245         relid = PG_GETARG_OID(0);
 246
 247         if (PG_ARGISNULL(1))
 248                 ereport(ERROR,
 249                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 250                                  errmsg("on_error_stop cannot be null")));
 251         on_error_stop = PG_GETARG_BOOL(1);
 252
 253         if (PG_ARGISNULL(2))
 254                 ereport(ERROR,
 255                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 256                                  errmsg("check_toast cannot be null")));
 257         check_toast = PG_GETARG_BOOL(2);
 258
 259         if (PG_ARGISNULL(3))
 260                 ereport(ERROR,
 261                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 262                                  errmsg("skip cannot be null")));
 263         skip = text_to_cstring(PG_GETARG_TEXT_PP(3));
 264         if (pg_strcasecmp(skip, "all-visible") == 0)
 265                 skip_option = SKIP_PAGES_ALL_VISIBLE;
 266         else if (pg_strcasecmp(skip, "all-frozen") == 0)
 267                 skip_option = SKIP_PAGES_ALL_FROZEN;
 268         else if (pg_strcasecmp(skip, "none") == 0)
 269                 skip_option = SKIP_PAGES_NONE;
 270         else
 271                 ereport(ERROR,
 272                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 273                                  errmsg("invalid skip option"),
 274                                  errhint("Valid skip options are \"all-visible\", \"all-frozen\", and \"none\".")));
 275
 276         memset(&ctx, 0, sizeof(HeapCheckContext));
 277         ctx.cached_xid = InvalidTransactionId;
 278         ctx.toasted_attributes = NIL;
 279
 280         /*
 281          * Any xmin newer than the xmin of our snapshot can't become all-visible
 282          * while we're running.
 283          */
 284         ctx.safe_xmin = GetTransactionSnapshot()->xmin;
 285
 286         /*
 287          * If we report corruption when not examining some individual attribute,
 288          * we need attnum to be reported as NULL.  Set that up before any
 289          * corruption reporting might happen.
 290          */
 291         ctx.attnum = -1;
 292
 293         /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
 294         old_context = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
 295         random_access = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
 296         ctx.tupdesc = verify_heapam_tupdesc();
 297         ctx.tupstore = tuplestore_begin_heap(random_access, false, work_mem);
 298         rsinfo->returnMode = SFRM_Materialize;
 299         rsinfo->setResult = ctx.tupstore;
 300         rsinfo->setDesc = ctx.tupdesc;
 301         MemoryContextSwitchTo(old_context);
 302
 303         /* Open relation, check relkind and access method */
 304         ctx.rel = relation_open(relid, AccessShareLock);
 305
 306         /*
 307          * Check that a relation's relkind and access method are both supported.
 308          */
 309         if (!RELKIND_HAS_TABLE_AM(ctx.rel->rd_rel->relkind) &&
 310                 ctx.rel->rd_rel->relkind != RELKIND_SEQUENCE)
 311                 ereport(ERROR,
 312                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 313                                  errmsg("cannot check relation \"%s\"",
 314                                                 RelationGetRelationName(ctx.rel)),
 315                                  errdetail_relkind_not_supported(ctx.rel->rd_rel->relkind)));
 316
 317         /*
 318          * Sequences always use heap AM, but they don't show that in the catalogs.
 319          * Other relkinds might be using a different AM, so check.
 320          */
 321         if (ctx.rel->rd_rel->relkind != RELKIND_SEQUENCE &&
 322                 ctx.rel->rd_rel->relam != HEAP_TABLE_AM_OID)
 323                 ereport(ERROR,
 324                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 325                                  errmsg("only heap AM is supported")));
 326
 327         /*
 328          * Early exit for unlogged relations during recovery.  These will have no
 329          * relation fork, so there won't be anything to check.  We behave as if
 330          * the relation is empty.
 331          */
 332         if (ctx.rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
 333                 RecoveryInProgress())
 334         {
 335                 ereport(DEBUG1,
 336                                 (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
 337                                  errmsg("cannot verify unlogged relation \"%s\" during recovery, skipping",
 338                                                 RelationGetRelationName(ctx.rel))));
 339                 relation_close(ctx.rel, AccessShareLock);
 340                 PG_RETURN_NULL();
 341         }
 342
 343         /* Early exit if the relation is empty */
 344         nblocks = RelationGetNumberOfBlocks(ctx.rel);
 345         if (!nblocks)
 346         {
 347                 relation_close(ctx.rel, AccessShareLock);
 348                 PG_RETURN_NULL();
 349         }
 350
 351         ctx.bstrategy = GetAccessStrategy(BAS_BULKREAD);
 352         ctx.buffer = InvalidBuffer;
 353         ctx.page = NULL;
 354
 355         /* Validate block numbers, or handle nulls. */
 356         if (PG_ARGISNULL(4))
 357                 first_block = 0;
 358         else
 359         {
 360                 int64           fb = PG_GETARG_INT64(4);
 361
 362                 if (fb < 0 || fb >= nblocks)
 363                         ereport(ERROR,
 364                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 365                                          errmsg("starting block number must be between 0 and %u",
 366                                                         nblocks - 1)));
 367                 first_block = (BlockNumber) fb;
 368         }
 369         if (PG_ARGISNULL(5))
 370                 last_block = nblocks - 1;
 371         else
 372         {
 373                 int64           lb = PG_GETARG_INT64(5);
 374
 375                 if (lb < 0 || lb >= nblocks)
 376                         ereport(ERROR,
 377                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 378                                          errmsg("ending block number must be between 0 and %u",
 379                                                         nblocks - 1)));
 380                 last_block = (BlockNumber) lb;
 381         }
 382
 383         /* Optionally open the toast relation, if any. */
 384         if (ctx.rel->rd_rel->reltoastrelid && check_toast)
 385         {
 386                 int                     offset;
 387
 388                 /* Main relation has associated toast relation */
 389                 ctx.toast_rel = table_open(ctx.rel->rd_rel->reltoastrelid,
 390                                                                    AccessShareLock);
 391                 offset = toast_open_indexes(ctx.toast_rel,
 392                                                                         AccessShareLock,
 393                                                                         &(ctx.toast_indexes),
 394                                                                         &(ctx.num_toast_indexes));
 395                 ctx.valid_toast_index = ctx.toast_indexes[offset];
 396         }
 397         else
 398         {
 399                 /*
 400                  * Main relation has no associated toast relation, or we're
 401                  * intentionally skipping it.
 402                  */
 403                 ctx.toast_rel = NULL;
 404                 ctx.toast_indexes = NULL;
 405                 ctx.num_toast_indexes = 0;
 406         }
 407
 408         update_cached_xid_range(&ctx);
 409         update_cached_mxid_range(&ctx);
 410         ctx.relfrozenxid = ctx.rel->rd_rel->relfrozenxid;
 411         ctx.relfrozenfxid = FullTransactionIdFromXidAndCtx(ctx.relfrozenxid, &ctx);
 412         ctx.relminmxid = ctx.rel->rd_rel->relminmxid;
 413
 414         if (TransactionIdIsNormal(ctx.relfrozenxid))
 415                 ctx.oldest_xid = ctx.relfrozenxid;
 416
 417         for (ctx.blkno = first_block; ctx.blkno <= last_block; ctx.blkno++)
 418         {
 419                 OffsetNumber maxoff;
 420
 421                 CHECK_FOR_INTERRUPTS();
 422
 423                 /* Optionally skip over all-frozen or all-visible blocks */
 424                 if (skip_option != SKIP_PAGES_NONE)
 425                 {
 426                         int32           mapbits;
 427
 428                         mapbits = (int32) visibilitymap_get_status(ctx.rel, ctx.blkno,
 429                                                                                                            &vmbuffer);
 430                         if (skip_option == SKIP_PAGES_ALL_FROZEN)
 431                         {
 432                                 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
 433                                         continue;
 434                         }
 435
 436                         if (skip_option == SKIP_PAGES_ALL_VISIBLE)
 437                         {
 438                                 if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
 439                                         continue;
 440                         }
 441                 }
 442
 443                 /* Read and lock the next page. */
 444                 ctx.buffer = ReadBufferExtended(ctx.rel, MAIN_FORKNUM, ctx.blkno,
 445                                                                                 RBM_NORMAL, ctx.bstrategy);
 446                 LockBuffer(ctx.buffer, BUFFER_LOCK_SHARE);
 447                 ctx.page = BufferGetPage(ctx.buffer);
 448
 449                 /* Perform tuple checks */
 450                 maxoff = PageGetMaxOffsetNumber(ctx.page);
 451                 for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
 452                          ctx.offnum = OffsetNumberNext(ctx.offnum))
 453                 {
 454                         ctx.itemid = PageGetItemId(ctx.page, ctx.offnum);
 455
 456                         /* Skip over unused/dead line pointers */
 457                         if (!ItemIdIsUsed(ctx.itemid) || ItemIdIsDead(ctx.itemid))
 458                                 continue;
 459
 460                         /*
 461                          * If this line pointer has been redirected, check that it
 462                          * redirects to a valid offset within the line pointer array
 463                          */
 464                         if (ItemIdIsRedirected(ctx.itemid))
 465                         {
 466                                 OffsetNumber rdoffnum = ItemIdGetRedirect(ctx.itemid);
 467                                 ItemId          rditem;
 468
 469                                 if (rdoffnum < FirstOffsetNumber)
 470                                 {
 471                                         report_corruption(&ctx,
 472                                                                           psprintf("line pointer redirection to item at offset %u precedes minimum offset %u",
 473                                                                                            (unsigned) rdoffnum,
 474                                                                                            (unsigned) FirstOffsetNumber));
 475                                         continue;
 476                                 }
 477                                 if (rdoffnum > maxoff)
 478                                 {
 479                                         report_corruption(&ctx,
 480                                                                           psprintf("line pointer redirection to item at offset %u exceeds maximum offset %u",
 481                                                                                            (unsigned) rdoffnum,
 482                                                                                            (unsigned) maxoff));
 483                                         continue;
 484                                 }
 485                                 rditem = PageGetItemId(ctx.page, rdoffnum);
 486                                 if (!ItemIdIsUsed(rditem))
 487                                         report_corruption(&ctx,
 488                                                                           psprintf("line pointer redirection to unused item at offset %u",
 489                                                                                            (unsigned) rdoffnum));
 490                                 continue;
 491                         }
 492
 493                         /* Sanity-check the line pointer's offset and length values */
 494                         ctx.lp_len = ItemIdGetLength(ctx.itemid);
 495                         ctx.lp_off = ItemIdGetOffset(ctx.itemid);
 496
 497                         if (ctx.lp_off != MAXALIGN(ctx.lp_off))
 498                         {
 499                                 report_corruption(&ctx,
 500                                                                   psprintf("line pointer to page offset %u is not maximally aligned",
 501                                                                                    ctx.lp_off));
 502                                 continue;
 503                         }
 504                         if (ctx.lp_len < MAXALIGN(SizeofHeapTupleHeader))
 505                         {
 506                                 report_corruption(&ctx,
 507                                                                   psprintf("line pointer length %u is less than the minimum tuple header size %u",
 508                                                                                    ctx.lp_len,
 509                                                                                    (unsigned) MAXALIGN(SizeofHeapTupleHeader)));
 510                                 continue;
 511                         }
 512                         if (ctx.lp_off + ctx.lp_len > BLCKSZ)
 513                         {
 514                                 report_corruption(&ctx,
 515                                                                   psprintf("line pointer to page offset %u with length %u ends beyond maximum page offset %u",
 516                                                                                    ctx.lp_off,
 517                                                                                    ctx.lp_len,
 518                                                                                    (unsigned) BLCKSZ));
 519                                 continue;
 520                         }
 521
 522                         /* It should be safe to examine the tuple's header, at least */
 523                         ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid);
 524                         ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr);
 525
 526                         /* Ok, ready to check this next tuple */
 527                         check_tuple(&ctx);
 528                 }
 529
 530                 /* clean up */
 531                 UnlockReleaseBuffer(ctx.buffer);
 532
 533                 /*
 534                  * Check any toast pointers from the page whose lock we just released
 535                  */
 536                 if (ctx.toasted_attributes != NIL)
 537                 {
 538                         ListCell   *cell;
 539
 540                         foreach(cell, ctx.toasted_attributes)
 541                                 check_toasted_attribute(&ctx, lfirst(cell));
 542                         list_free_deep(ctx.toasted_attributes);
 543                         ctx.toasted_attributes = NIL;
 544                 }
 545
 546                 if (on_error_stop && ctx.is_corrupt)
 547                         break;
 548         }
 549
 550         if (vmbuffer != InvalidBuffer)
 551                 ReleaseBuffer(vmbuffer);
 552
 553         /* Close the associated toast table and indexes, if any. */
 554         if (ctx.toast_indexes)
 555                 toast_close_indexes(ctx.toast_indexes, ctx.num_toast_indexes,
 556                                                         AccessShareLock);
 557         if (ctx.toast_rel)
 558                 table_close(ctx.toast_rel, AccessShareLock);
 559
 560         /* Close the main relation */
 561         relation_close(ctx.rel, AccessShareLock);
 562
 563         PG_RETURN_NULL();
 564 }
 565
 566 /*
 567  * Shared internal implementation for report_corruption and
 568  * report_toast_corruption.
 569  */
 570 static void
 571 report_corruption_internal(Tuplestorestate *tupstore, TupleDesc tupdesc,
 572                                                    BlockNumber blkno, OffsetNumber offnum,
 573                                                    AttrNumber attnum, char *msg)
 574 {
 575         Datum           values[HEAPCHECK_RELATION_COLS];
 576         bool            nulls[HEAPCHECK_RELATION_COLS];
 577         HeapTuple       tuple;
 578
 579         MemSet(values, 0, sizeof(values));
 580         MemSet(nulls, 0, sizeof(nulls));
 581         values[0] = Int64GetDatum(blkno);
 582         values[1] = Int32GetDatum(offnum);
 583         values[2] = Int32GetDatum(attnum);
 584         nulls[2] = (attnum < 0);
 585         values[3] = CStringGetTextDatum(msg);
 586
 587         /*
 588          * In principle, there is nothing to prevent a scan over a large, highly
 589          * corrupted table from using work_mem worth of memory building up the
 590          * tuplestore.  That's ok, but if we also leak the msg argument memory
 591          * until the end of the query, we could exceed work_mem by more than a
 592          * trivial amount.  Therefore, free the msg argument each time we are
 593          * called rather than waiting for our current memory context to be freed.
 594          */
 595         pfree(msg);
 596
 597         tuple = heap_form_tuple(tupdesc, values, nulls);
 598         tuplestore_puttuple(tupstore, tuple);
 599 }
 600
 601 /*
 602  * Record a single corruption found in the main table.  The values in ctx should
 603  * indicate the location of the corruption, and the msg argument should contain
 604  * a human-readable description of the corruption.
 605  *
 606  * The msg argument is pfree'd by this function.
 607  */
 608 static void
 609 report_corruption(HeapCheckContext *ctx, char *msg)
 610 {
 611         report_corruption_internal(ctx->tupstore, ctx->tupdesc, ctx->blkno,
 612                                                            ctx->offnum, ctx->attnum, msg);
 613         ctx->is_corrupt = true;
 614 }
 615
 616 /*
 617  * Record corruption found in the toast table.  The values in ta should
 618  * indicate the location in the main table where the toast pointer was
 619  * encountered, and the msg argument should contain a human-readable
 620  * description of the toast table corruption.
 621  *
 622  * As above, the msg argument is pfree'd by this function.
 623  */
 624 static void
 625 report_toast_corruption(HeapCheckContext *ctx, ToastedAttribute *ta,
 626                                                 char *msg)
 627 {
 628         report_corruption_internal(ctx->tupstore, ctx->tupdesc, ta->blkno,
 629                                                            ta->offnum, ta->attnum, msg);
 630         ctx->is_corrupt = true;
 631 }
 632
 633 /*
 634  * Construct the TupleDesc used to report messages about corruptions found
 635  * while scanning the heap.
 636  */
 637 static TupleDesc
 638 verify_heapam_tupdesc(void)
 639 {
 640         TupleDesc       tupdesc;
 641         AttrNumber      a = 0;
 642
 643         tupdesc = CreateTemplateTupleDesc(HEAPCHECK_RELATION_COLS);
 644         TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
 645         TupleDescInitEntry(tupdesc, ++a, "offnum", INT4OID, -1, 0);
 646         TupleDescInitEntry(tupdesc, ++a, "attnum", INT4OID, -1, 0);
 647         TupleDescInitEntry(tupdesc, ++a, "msg", TEXTOID, -1, 0);
 648         Assert(a == HEAPCHECK_RELATION_COLS);
 649
 650         return BlessTupleDesc(tupdesc);
 651 }
 652
 653 /*
 654  * Check for tuple header corruption.
 655  *
 656  * Some kinds of corruption make it unsafe to check the tuple attributes, for
 657  * example when the line pointer refers to a range of bytes outside the page.
 658  * In such cases, we return false (not checkable) after recording appropriate
 659  * corruption messages.
 660  *
 661  * Some other kinds of tuple header corruption confuse the question of where
 662  * the tuple attributes begin, or how long the nulls bitmap is, etc., making it
 663  * unreasonable to attempt to check attributes, even if all candidate answers
 664  * to those questions would not result in reading past the end of the line
 665  * pointer or page.  In such cases, like above, we record corruption messages
 666  * about the header and then return false.
 667  *
 668  * Other kinds of tuple header corruption do not bear on the question of
 669  * whether the tuple attributes can be checked, so we record corruption
 670  * messages for them but we do not return false merely because we detected
 671  * them.
 672  *
 673  * Returns whether the tuple is sufficiently sensible to undergo visibility and
 674  * attribute checks.
 675  */
 676 static bool
 677 check_tuple_header(HeapCheckContext *ctx)
 678 {
 679         HeapTupleHeader tuphdr = ctx->tuphdr;
 680         uint16          infomask = tuphdr->t_infomask;
 681         bool            result = true;
 682         unsigned        expected_hoff;
 683
 684         if (ctx->tuphdr->t_hoff > ctx->lp_len)
 685         {
 686                 report_corruption(ctx,
 687                                                   psprintf("data begins at offset %u beyond the tuple length %u",
 688                                                                    ctx->tuphdr->t_hoff, ctx->lp_len));
 689                 result = false;
 690         }
 691
 692         if ((ctx->tuphdr->t_infomask & HEAP_XMAX_COMMITTED) &&
 693                 (ctx->tuphdr->t_infomask & HEAP_XMAX_IS_MULTI))
 694         {
 695                 report_corruption(ctx,
 696                                                   pstrdup("multixact should not be marked committed"));
 697
 698                 /*
 699                  * This condition is clearly wrong, but it's not enough to justify
 700                  * skipping further checks, because we don't rely on this to determine
 701                  * whether the tuple is visible or to interpret other relevant header
 702                  * fields.
 703                  */
 704         }
 705
 706         if (infomask & HEAP_HASNULL)
 707                 expected_hoff = MAXALIGN(SizeofHeapTupleHeader + BITMAPLEN(ctx->natts));
 708         else
 709                 expected_hoff = MAXALIGN(SizeofHeapTupleHeader);
 710         if (ctx->tuphdr->t_hoff != expected_hoff)
 711         {
 712                 if ((infomask & HEAP_HASNULL) && ctx->natts == 1)
 713                         report_corruption(ctx,
 714                                                           psprintf("tuple data should begin at byte %u, but actually begins at byte %u (1 attribute, has nulls)",
 715                                                                            expected_hoff, ctx->tuphdr->t_hoff));
 716                 else if ((infomask & HEAP_HASNULL))
 717                         report_corruption(ctx,
 718                                                           psprintf("tuple data should begin at byte %u, but actually begins at byte %u (%u attributes, has nulls)",
 719                                                                            expected_hoff, ctx->tuphdr->t_hoff, ctx->natts));
 720                 else if (ctx->natts == 1)
 721                         report_corruption(ctx,
 722                                                           psprintf("tuple data should begin at byte %u, but actually begins at byte %u (1 attribute, no nulls)",
 723                                                                            expected_hoff, ctx->tuphdr->t_hoff));
 724                 else
 725                         report_corruption(ctx,
 726                                                           psprintf("tuple data should begin at byte %u, but actually begins at byte %u (%u attributes, no nulls)",
 727                                                                            expected_hoff, ctx->tuphdr->t_hoff, ctx->natts));
 728                 result = false;
 729         }
 730
 731         return result;
 732 }
 733
 734 /*
 735  * Checks tuple visibility so we know which further checks are safe to
 736  * perform.
 737  *
 738  * If a tuple could have been inserted by a transaction that also added a
 739  * column to the table, but which ultimately did not commit, or which has not
 740  * yet committed, then the table's current TupleDesc might differ from the one
 741  * used to construct this tuple, so we must not check it.
 742  *
 743  * As a special case, if our own transaction inserted the tuple, even if we
 744  * added a column to the table, our TupleDesc should match.  We could check the
 745  * tuple, but choose not to do so.
 746  *
 747  * If a tuple has been updated or deleted, we can still read the old tuple for
 748  * corruption checking purposes, as long as we are careful about concurrent
 749  * vacuums.  The main table tuple itself cannot be vacuumed away because we
 750  * hold a buffer lock on the page, but if the deleting transaction is older
 751  * than our transaction snapshot's xmin, then vacuum could remove the toast at
 752  * any time, so we must not try to follow TOAST pointers.
 753  *
 754  * If xmin or xmax values are older than can be checked against clog, or appear
 755  * to be in the future (possibly due to wrap-around), then we cannot make a
 756  * determination about the visibility of the tuple, so we skip further checks.
 757  *
 758  * Returns true if the tuple itself should be checked, false otherwise.  Sets
 759  * ctx->tuple_could_be_pruned if the tuple -- and thus also any associated
 760  * TOAST tuples -- are eligible for pruning.
 761  */
 762 static bool
 763 check_tuple_visibility(HeapCheckContext *ctx)
 764 {
 765         TransactionId xmin;
 766         TransactionId xvac;
 767         TransactionId xmax;
 768         XidCommitStatus xmin_status;
 769         XidCommitStatus xvac_status;
 770         XidCommitStatus xmax_status;
 771         HeapTupleHeader tuphdr = ctx->tuphdr;
 772
 773         ctx->tuple_could_be_pruned = true;      /* have not yet proven otherwise */
 774
 775         /* If xmin is normal, it should be within valid range */
 776         xmin = HeapTupleHeaderGetXmin(tuphdr);
 777         switch (get_xid_status(xmin, ctx, &xmin_status))
 778         {
 779                 case XID_INVALID:
 780                 case XID_BOUNDS_OK:
 781                         break;
 782                 case XID_IN_FUTURE:
 783                         report_corruption(ctx,
 784                                                           psprintf("xmin %u equals or exceeds next valid transaction ID %u:%u",
 785                                                                            xmin,
 786                                                                            EpochFromFullTransactionId(ctx->next_fxid),
 787                                                                            XidFromFullTransactionId(ctx->next_fxid)));
 788                         return false;
 789                 case XID_PRECEDES_CLUSTERMIN:
 790                         report_corruption(ctx,
 791                                                           psprintf("xmin %u precedes oldest valid transaction ID %u:%u",
 792                                                                            xmin,
 793                                                                            EpochFromFullTransactionId(ctx->oldest_fxid),
 794                                                                            XidFromFullTransactionId(ctx->oldest_fxid)));
 795                         return false;
 796                 case XID_PRECEDES_RELMIN:
 797                         report_corruption(ctx,
 798                                                           psprintf("xmin %u precedes relation freeze threshold %u:%u",
 799                                                                            xmin,
 800                                                                            EpochFromFullTransactionId(ctx->relfrozenfxid),
 801                                                                            XidFromFullTransactionId(ctx->relfrozenfxid)));
 802                         return false;
 803         }
 804
 805         /*
 806          * Has inserting transaction committed?
 807          */
 808         if (!HeapTupleHeaderXminCommitted(tuphdr))
 809         {
 810                 if (HeapTupleHeaderXminInvalid(tuphdr))
 811                         return false;           /* inserter aborted, don't check */
 812                 /* Used by pre-9.0 binary upgrades */
 813                 else if (tuphdr->t_infomask & HEAP_MOVED_OFF)
 814                 {
 815                         xvac = HeapTupleHeaderGetXvac(tuphdr);
 816
 817                         switch (get_xid_status(xvac, ctx, &xvac_status))
 818                         {
 819                                 case XID_INVALID:
 820                                         report_corruption(ctx,
 821                                                                           pstrdup("old-style VACUUM FULL transaction ID for moved off tuple is invalid"));
 822                                         return false;
 823                                 case XID_IN_FUTURE:
 824                                         report_corruption(ctx,
 825                                                                           psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple equals or exceeds next valid transaction ID %u:%u",
 826                                                                                            xvac,
 827                                                                                            EpochFromFullTransactionId(ctx->next_fxid),
 828                                                                                            XidFromFullTransactionId(ctx->next_fxid)));
 829                                         return false;
 830                                 case XID_PRECEDES_RELMIN:
 831                                         report_corruption(ctx,
 832                                                                           psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple precedes relation freeze threshold %u:%u",
 833                                                                                            xvac,
 834                                                                                            EpochFromFullTransactionId(ctx->relfrozenfxid),
 835                                                                                            XidFromFullTransactionId(ctx->relfrozenfxid)));
 836                                         return false;
 837                                 case XID_PRECEDES_CLUSTERMIN:
 838                                         report_corruption(ctx,
 839                                                                           psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple precedes oldest valid transaction ID %u:%u",
 840                                                                                            xvac,
 841                                                                                            EpochFromFullTransactionId(ctx->oldest_fxid),
 842                                                                                            XidFromFullTransactionId(ctx->oldest_fxid)));
 843                                         return false;
 844                                 case XID_BOUNDS_OK:
 845                                         break;
 846                         }
 847
 848                         switch (xvac_status)
 849                         {
 850                                 case XID_IS_CURRENT_XID:
 851                                         report_corruption(ctx,
 852                                                                           psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple matches our current transaction ID",
 853                                                                                            xvac));
 854                                         return false;
 855                                 case XID_IN_PROGRESS:
 856                                         report_corruption(ctx,
 857                                                                           psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple appears to be in progress",
 858                                                                                            xvac));
 859                                         return false;
 860
 861                                 case XID_COMMITTED:
 862
 863                                         /*
 864                                          * The tuple is dead, because the xvac transaction moved
 865                                          * it off and committed. It's checkable, but also
 866                                          * prunable.
 867                                          */
 868                                         return true;
 869
 870                                 case XID_ABORTED:
 871
 872                                         /*
 873                                          * The original xmin must have committed, because the xvac
 874                                          * transaction tried to move it later. Since xvac is
 875                                          * aborted, whether it's still alive now depends on the
 876                                          * status of xmax.
 877                                          */
 878                                         break;
 879                         }
 880                 }
 881                 /* Used by pre-9.0 binary upgrades */
 882                 else if (tuphdr->t_infomask & HEAP_MOVED_IN)
 883                 {
 884                         xvac = HeapTupleHeaderGetXvac(tuphdr);
 885
 886                         switch (get_xid_status(xvac, ctx, &xvac_status))
 887                         {
 888                                 case XID_INVALID:
 889                                         report_corruption(ctx,
 890                                                                           pstrdup("old-style VACUUM FULL transaction ID for moved in tuple is invalid"));
 891                                         return false;
 892                                 case XID_IN_FUTURE:
 893                                         report_corruption(ctx,
 894                                                                           psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple equals or exceeds next valid transaction ID %u:%u",
 895                                                                                            xvac,
 896                                                                                            EpochFromFullTransactionId(ctx->next_fxid),
 897                                                                                            XidFromFullTransactionId(ctx->next_fxid)));
 898                                         return false;
 899                                 case XID_PRECEDES_RELMIN:
 900                                         report_corruption(ctx,
 901                                                                           psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple precedes relation freeze threshold %u:%u",
 902                                                                                            xvac,
 903                                                                                            EpochFromFullTransactionId(ctx->relfrozenfxid),
 904                                                                                            XidFromFullTransactionId(ctx->relfrozenfxid)));
 905                                         return false;
 906                                 case XID_PRECEDES_CLUSTERMIN:
 907                                         report_corruption(ctx,
 908                                                                           psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple precedes oldest valid transaction ID %u:%u",
 909                                                                                            xvac,
 910                                                                                            EpochFromFullTransactionId(ctx->oldest_fxid),
 911                                                                                            XidFromFullTransactionId(ctx->oldest_fxid)));
 912                                         return false;
 913                                 case XID_BOUNDS_OK:
 914                                         break;
 915                         }
 916
 917                         switch (xvac_status)
 918                         {
 919                                 case XID_IS_CURRENT_XID:
 920                                         report_corruption(ctx,
 921                                                                           psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple matches our current transaction ID",
 922                                                                                            xvac));
 923                                         return false;
 924                                 case XID_IN_PROGRESS:
 925                                         report_corruption(ctx,
 926                                                                           psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple appears to be in progress",
 927                                                                                            xvac));
 928                                         return false;
 929
 930                                 case XID_COMMITTED:
 931
 932                                         /*
 933                                          * The original xmin must have committed, because the xvac
 934                                          * transaction moved it later. Whether it's still alive
 935                                          * now depends on the status of xmax.
 936                                          */
 937                                         break;
 938
 939                                 case XID_ABORTED:
 940
 941                                         /*
 942                                          * The tuple is dead, because the xvac transaction moved
 943                                          * it off and committed. It's checkable, but also
 944                                          * prunable.
 945                                          */
 946                                         return true;
 947                         }
 948                 }
 949                 else if (xmin_status != XID_COMMITTED)
 950                 {
 951                         /*
 952                          * Inserting transaction is not in progress, and not committed, so
 953                          * it might have changed the TupleDesc in ways we don't know
 954                          * about. Thus, don't try to check the tuple structure.
 955                          *
 956                          * If xmin_status happens to be XID_IS_CURRENT_XID, then in theory
 957                          * any such DDL changes ought to be visible to us, so perhaps we
 958                          * could check anyway in that case. But, for now, let's be
 959                          * conservative and treat this like any other uncommitted insert.
 960                          */
 961                         return false;
 962                 }
 963         }
 964
 965         /*
 966          * Okay, the inserter committed, so it was good at some point.  Now what
 967          * about the deleting transaction?
 968          */
 969
 970         if (tuphdr->t_infomask & HEAP_XMAX_IS_MULTI)
 971         {
 972                 /*
 973                  * xmax is a multixact, so sanity-check the MXID. Note that we do this
 974                  * prior to checking for HEAP_XMAX_INVALID or
 975                  * HEAP_XMAX_IS_LOCKED_ONLY. This might therefore complain about
 976                  * things that wouldn't actually be a problem during a normal scan,
 977                  * but eventually we're going to have to freeze, and that process will
 978                  * ignore hint bits.
 979                  *
 980                  * Even if the MXID is out of range, we still know that the original
 981                  * insert committed, so we can check the tuple itself. However, we
 982                  * can't rule out the possibility that this tuple is dead, so don't
 983                  * clear ctx->tuple_could_be_pruned. Possibly we should go ahead and
 984                  * clear that flag anyway if HEAP_XMAX_INVALID is set or if
 985                  * HEAP_XMAX_IS_LOCKED_ONLY is true, but for now we err on the side of
 986                  * avoiding possibly-bogus complaints about missing TOAST entries.
 987                  */
 988                 xmax = HeapTupleHeaderGetRawXmax(tuphdr);
 989                 switch (check_mxid_valid_in_rel(xmax, ctx))
 990                 {
 991                         case XID_INVALID:
 992                                 report_corruption(ctx,
 993                                                                   pstrdup("multitransaction ID is invalid"));
 994                                 return true;
 995                         case XID_PRECEDES_RELMIN:
 996                                 report_corruption(ctx,
 997                                                                   psprintf("multitransaction ID %u precedes relation minimum multitransaction ID threshold %u",
 998                                                                                    xmax, ctx->relminmxid));
 999                                 return true;
1000                         case XID_PRECEDES_CLUSTERMIN:
1001                                 report_corruption(ctx,
1002                                                                   psprintf("multitransaction ID %u precedes oldest valid multitransaction ID threshold %u",
1003                                                                                    xmax, ctx->oldest_mxact));
1004                                 return true;
1005                         case XID_IN_FUTURE:
1006                                 report_corruption(ctx,
1007                                                                   psprintf("multitransaction ID %u equals or exceeds next valid multitransaction ID %u",
1008                                                                                    xmax,
1009                                                                                    ctx->next_mxact));
1010                                 return true;
1011                         case XID_BOUNDS_OK:
1012                                 break;
1013                 }
1014         }
1015
1016         if (tuphdr->t_infomask & HEAP_XMAX_INVALID)
1017         {
1018                 /*
1019                  * This tuple is live.  A concurrently running transaction could
1020                  * delete it before we get around to checking the toast, but any such
1021                  * running transaction is surely not less than our safe_xmin, so the
1022                  * toast cannot be vacuumed out from under us.
1023                  */
1024                 ctx->tuple_could_be_pruned = false;
1025                 return true;
1026         }
1027
1028         if (HEAP_XMAX_IS_LOCKED_ONLY(tuphdr->t_infomask))
1029         {
1030                 /*
1031                  * "Deleting" xact really only locked it, so the tuple is live in any
1032                  * case.  As above, a concurrently running transaction could delete
1033                  * it, but it cannot be vacuumed out from under us.
1034                  */
1035                 ctx->tuple_could_be_pruned = false;
1036                 return true;
1037         }
1038
1039         if (tuphdr->t_infomask & HEAP_XMAX_IS_MULTI)
1040         {
1041                 /*
1042                  * We already checked above that this multixact is within limits for
1043                  * this table.  Now check the update xid from this multixact.
1044                  */
1045                 xmax = HeapTupleGetUpdateXid(tuphdr);
1046                 switch (get_xid_status(xmax, ctx, &xmax_status))
1047                 {
1048                         case XID_INVALID:
1049                                 /* not LOCKED_ONLY, so it has to have an xmax */
1050                                 report_corruption(ctx,
1051                                                                   pstrdup("update xid is invalid"));
1052                                 return true;
1053                         case XID_IN_FUTURE:
1054                                 report_corruption(ctx,
1055                                                                   psprintf("update xid %u equals or exceeds next valid transaction ID %u:%u",
1056                                                                                    xmax,
1057                                                                                    EpochFromFullTransactionId(ctx->next_fxid),
1058                                                                                    XidFromFullTransactionId(ctx->next_fxid)));
1059                                 return true;
1060                         case XID_PRECEDES_RELMIN:
1061                                 report_corruption(ctx,
1062                                                                   psprintf("update xid %u precedes relation freeze threshold %u:%u",
1063                                                                                    xmax,
1064                                                                                    EpochFromFullTransactionId(ctx->relfrozenfxid),
1065                                                                                    XidFromFullTransactionId(ctx->relfrozenfxid)));
1066                                 return true;
1067                         case XID_PRECEDES_CLUSTERMIN:
1068                                 report_corruption(ctx,
1069                                                                   psprintf("update xid %u precedes oldest valid transaction ID %u:%u",
1070                                                                                    xmax,
1071                                                                                    EpochFromFullTransactionId(ctx->oldest_fxid),
1072                                                                                    XidFromFullTransactionId(ctx->oldest_fxid)));
1073                                 return true;
1074                         case XID_BOUNDS_OK:
1075                                 break;
1076                 }
1077
1078                 switch (xmax_status)
1079                 {
1080                         case XID_IS_CURRENT_XID:
1081                         case XID_IN_PROGRESS:
1082
1083                                 /*
1084                                  * The delete is in progress, so it cannot be visible to our
1085                                  * snapshot.
1086                                  */
1087                                 ctx->tuple_could_be_pruned = false;
1088                                 break;
1089                         case XID_COMMITTED:
1090
1091                                 /*
1092                                  * The delete committed.  Whether the toast can be vacuumed
1093                                  * away depends on how old the deleting transaction is.
1094                                  */
1095                                 ctx->tuple_could_be_pruned = TransactionIdPrecedes(xmax,
1096                                                                                                                                    ctx->safe_xmin);
1097                                 break;
1098                         case XID_ABORTED:
1099
1100                                 /*
1101                                  * The delete aborted or crashed.  The tuple is still live.
1102                                  */
1103                                 ctx->tuple_could_be_pruned = false;
1104                                 break;
1105                 }
1106
1107                 /* Tuple itself is checkable even if it's dead. */
1108                 return true;
1109         }
1110
1111         /* xmax is an XID, not a MXID. Sanity check it. */
1112         xmax = HeapTupleHeaderGetRawXmax(tuphdr);
1113         switch (get_xid_status(xmax, ctx, &xmax_status))
1114         {
1115                 case XID_IN_FUTURE:
1116                         report_corruption(ctx,
1117                                                           psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u",
1118                                                                            xmax,
1119                                                                            EpochFromFullTransactionId(ctx->next_fxid),
1120                                                                            XidFromFullTransactionId(ctx->next_fxid)));
1121                         return false;           /* corrupt */
1122                 case XID_PRECEDES_RELMIN:
1123                         report_corruption(ctx,
1124                                                           psprintf("xmax %u precedes relation freeze threshold %u:%u",
1125                                                                            xmax,
1126                                                                            EpochFromFullTransactionId(ctx->relfrozenfxid),
1127                                                                            XidFromFullTransactionId(ctx->relfrozenfxid)));
1128                         return false;           /* corrupt */
1129                 case XID_PRECEDES_CLUSTERMIN:
1130                         report_corruption(ctx,
1131                                                           psprintf("xmax %u precedes oldest valid transaction ID %u:%u",
1132                                                                            xmax,
1133                                                                            EpochFromFullTransactionId(ctx->oldest_fxid),
1134                                                                            XidFromFullTransactionId(ctx->oldest_fxid)));
1135                         return false;           /* corrupt */
1136                 case XID_BOUNDS_OK:
1137                 case XID_INVALID:
1138                         break;
1139         }
1140
1141         /*
1142          * Whether the toast can be vacuumed away depends on how old the deleting
1143          * transaction is.
1144          */
1145         switch (xmax_status)
1146         {
1147                 case XID_IS_CURRENT_XID:
1148                 case XID_IN_PROGRESS:
1149
1150                         /*
1151                          * The delete is in progress, so it cannot be visible to our
1152                          * snapshot.
1153                          */
1154                         ctx->tuple_could_be_pruned = false;
1155                         break;
1156
1157                 case XID_COMMITTED:
1158
1159                         /*
1160                          * The delete committed.  Whether the toast can be vacuumed away
1161                          * depends on how old the deleting transaction is.
1162                          */
1163                         ctx->tuple_could_be_pruned = TransactionIdPrecedes(xmax,
1164                                                                                                                            ctx->safe_xmin);
1165                         break;
1166
1167                 case XID_ABORTED:
1168
1169                         /*
1170                          * The delete aborted or crashed.  The tuple is still live.
1171                          */
1172                         ctx->tuple_could_be_pruned = false;
1173                         break;
1174         }
1175
1176         /* Tuple itself is checkable even if it's dead. */
1177         return true;
1178 }
1179
1180
1181 /*
1182  * Check the current toast tuple against the state tracked in ctx, recording
1183  * any corruption found in ctx->tupstore.
1184  *
1185  * This is not equivalent to running verify_heapam on the toast table itself,
1186  * and is not hardened against corruption of the toast table.  Rather, when
1187  * validating a toasted attribute in the main table, the sequence of toast
1188  * tuples that store the toasted value are retrieved and checked in order, with
1189  * each toast tuple being checked against where we are in the sequence, as well
1190  * as each toast tuple having its varlena structure sanity checked.
1191  *
1192  * On entry, *expected_chunk_seq should be the chunk_seq value that we expect
1193  * to find in toasttup. On exit, it will be updated to the value the next call
1194  * to this function should expect to see.
1195  */
1196 static void
1197 check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
1198                                   ToastedAttribute *ta, int32 *expected_chunk_seq,
1199                                   uint32 extsize)
1200 {
1201         int32           chunk_seq;
1202         int32           last_chunk_seq = (extsize - 1) / TOAST_MAX_CHUNK_SIZE;
1203         Pointer         chunk;
1204         bool            isnull;
1205         int32           chunksize;
1206         int32           expected_size;
1207
1208         /* Sanity-check the sequence number. */
1209         chunk_seq = DatumGetInt32(fastgetattr(toasttup, 2,
1210                                                                                   ctx->toast_rel->rd_att, &isnull));
1211         if (isnull)
1212         {
1213                 report_toast_corruption(ctx, ta,
1214                                                                 psprintf("toast value %u has toast chunk with null sequence number",
1215                                                                                  ta->toast_pointer.va_valueid));
1216                 return;
1217         }
1218         if (chunk_seq != *expected_chunk_seq)
1219         {
1220                 /* Either the TOAST index is corrupt, or we don't have all chunks. */
1221                 report_toast_corruption(ctx, ta,
1222                                                                 psprintf("toast value %u index scan returned chunk %d when expecting chunk %d",
1223                                                                                  ta->toast_pointer.va_valueid,
1224                                                                                  chunk_seq, *expected_chunk_seq));
1225         }
1226         *expected_chunk_seq = chunk_seq + 1;
1227
1228         /* Sanity-check the chunk data. */
1229         chunk = DatumGetPointer(fastgetattr(toasttup, 3,
1230                                                                                 ctx->toast_rel->rd_att, &isnull));
1231         if (isnull)
1232         {
1233                 report_toast_corruption(ctx, ta,
1234                                                                 psprintf("toast value %u chunk %d has null data",
1235                                                                                  ta->toast_pointer.va_valueid,
1236                                                                                  chunk_seq));
1237                 return;
1238         }
1239         if (!VARATT_IS_EXTENDED(chunk))
1240                 chunksize = VARSIZE(chunk) - VARHDRSZ;
1241         else if (VARATT_IS_SHORT(chunk))
1242         {
1243                 /*
1244                  * could happen due to heap_form_tuple doing its thing
1245                  */
1246                 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
1247         }
1248         else
1249         {
1250                 /* should never happen */
1251                 uint32          header = ((varattrib_4b *) chunk)->va_4byte.va_header;
1252
1253                 report_toast_corruption(ctx, ta,
1254                                                                 psprintf("toast value %u chunk %d has invalid varlena header %0x",
1255                                                                                  ta->toast_pointer.va_valueid,
1256                                                                                  chunk_seq, header));
1257                 return;
1258         }
1259
1260         /*
1261          * Some checks on the data we've found
1262          */
1263         if (chunk_seq > last_chunk_seq)
1264         {
1265                 report_toast_corruption(ctx, ta,
1266                                                                 psprintf("toast value %u chunk %d follows last expected chunk %d",
1267                                                                                  ta->toast_pointer.va_valueid,
1268                                                                                  chunk_seq, last_chunk_seq));
1269                 return;
1270         }
1271
1272         expected_size = chunk_seq < last_chunk_seq ? TOAST_MAX_CHUNK_SIZE
1273                 : extsize - (last_chunk_seq * TOAST_MAX_CHUNK_SIZE);
1274
1275         if (chunksize != expected_size)
1276                 report_toast_corruption(ctx, ta,
1277                                                                 psprintf("toast value %u chunk %d has size %u, but expected size %u",
1278                                                                                  ta->toast_pointer.va_valueid,
1279                                                                                  chunk_seq, chunksize, expected_size));
1280 }
1281
1282 /*
1283  * Check the current attribute as tracked in ctx, recording any corruption
1284  * found in ctx->tupstore.
1285  *
1286  * This function follows the logic performed by heap_deform_tuple(), and in the
1287  * case of a toasted value, optionally stores the toast pointer so later it can
1288  * be checked following the logic of detoast_external_attr(), checking for any
1289  * conditions that would result in either of those functions Asserting or
1290  * crashing the backend.  The checks performed by Asserts present in those two
1291  * functions are also performed here and in check_toasted_attribute.  In cases
1292  * where those two functions are a bit cavalier in their assumptions about data
1293  * being correct, we perform additional checks not present in either of those
1294  * two functions.  Where some condition is checked in both of those functions,
1295  * we perform it here twice, as we parallel the logical flow of those two
1296  * functions.  The presence of duplicate checks seems a reasonable price to pay
1297  * for keeping this code tightly coupled with the code it protects.
1298  *
1299  * Returns true if the tuple attribute is sane enough for processing to
1300  * continue on to the next attribute, false otherwise.
1301  */
1302 static bool
1303 check_tuple_attribute(HeapCheckContext *ctx)
1304 {
1305         Datum           attdatum;
1306         struct varlena *attr;
1307         char       *tp;                         /* pointer to the tuple data */
1308         uint16          infomask;
1309         Form_pg_attribute thisatt;
1310         struct varatt_external toast_pointer;
1311
1312         infomask = ctx->tuphdr->t_infomask;
1313         thisatt = TupleDescAttr(RelationGetDescr(ctx->rel), ctx->attnum);
1314
1315         tp = (char *) ctx->tuphdr + ctx->tuphdr->t_hoff;
1316
1317         if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len)
1318         {
1319                 report_corruption(ctx,
1320                                                   psprintf("attribute with length %u starts at offset %u beyond total tuple length %u",
1321                                                                    thisatt->attlen,
1322                                                                    ctx->tuphdr->t_hoff + ctx->offset,
1323                                                                    ctx->lp_len));
1324                 return false;
1325         }
1326
1327         /* Skip null values */
1328         if (infomask & HEAP_HASNULL && att_isnull(ctx->attnum, ctx->tuphdr->t_bits))
1329                 return true;
1330
1331         /* Skip non-varlena values, but update offset first */
1332         if (thisatt->attlen != -1)
1333         {
1334                 ctx->offset = att_align_nominal(ctx->offset, thisatt->attalign);
1335                 ctx->offset = att_addlength_pointer(ctx->offset, thisatt->attlen,
1336                                                                                         tp + ctx->offset);
1337                 if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len)
1338                 {
1339                         report_corruption(ctx,
1340                                                           psprintf("attribute with length %u ends at offset %u beyond total tuple length %u",
1341                                                                            thisatt->attlen,
1342                                                                            ctx->tuphdr->t_hoff + ctx->offset,
1343                                                                            ctx->lp_len));
1344                         return false;
1345                 }
1346                 return true;
1347         }
1348
1349         /* Ok, we're looking at a varlena attribute. */
1350         ctx->offset = att_align_pointer(ctx->offset, thisatt->attalign, -1,
1351                                                                         tp + ctx->offset);
1352
1353         /* Get the (possibly corrupt) varlena datum */
1354         attdatum = fetchatt(thisatt, tp + ctx->offset);
1355
1356         /*
1357          * We have the datum, but we cannot decode it carelessly, as it may still
1358          * be corrupt.
1359          */
1360
1361         /*
1362          * Check that VARTAG_SIZE won't hit a TrapMacro on a corrupt va_tag before
1363          * risking a call into att_addlength_pointer
1364          */
1365         if (VARATT_IS_EXTERNAL(tp + ctx->offset))
1366         {
1367                 uint8           va_tag = VARTAG_EXTERNAL(tp + ctx->offset);
1368
1369                 if (va_tag != VARTAG_ONDISK)
1370                 {
1371                         report_corruption(ctx,
1372                                                           psprintf("toasted attribute has unexpected TOAST tag %u",
1373                                                                            va_tag));
1374                         /* We can't know where the next attribute begins */
1375                         return false;
1376                 }
1377         }
1378
1379         /* Ok, should be safe now */
1380         ctx->offset = att_addlength_pointer(ctx->offset, thisatt->attlen,
1381                                                                                 tp + ctx->offset);
1382
1383         if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len)
1384         {
1385                 report_corruption(ctx,
1386                                                   psprintf("attribute with length %u ends at offset %u beyond total tuple length %u",
1387                                                                    thisatt->attlen,
1388                                                                    ctx->tuphdr->t_hoff + ctx->offset,
1389                                                                    ctx->lp_len));
1390
1391                 return false;
1392         }
1393
1394         /*
1395          * heap_deform_tuple would be done with this attribute at this point,
1396          * having stored it in values[], and would continue to the next attribute.
1397          * We go further, because we need to check if the toast datum is corrupt.
1398          */
1399
1400         attr = (struct varlena *) DatumGetPointer(attdatum);
1401
1402         /*
1403          * Now we follow the logic of detoast_external_attr(), with the same
1404          * caveats about being paranoid about corruption.
1405          */
1406
1407         /* Skip values that are not external */
1408         if (!VARATT_IS_EXTERNAL(attr))
1409                 return true;
1410
1411         /* It is external, and we're looking at a page on disk */
1412
1413         /*
1414          * Must copy attr into toast_pointer for alignment considerations
1415          */
1416         VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1417
1418         /* Toasted attributes too large to be untoasted should never be stored */
1419         if (toast_pointer.va_rawsize > VARLENA_SIZE_LIMIT)
1420                 report_corruption(ctx,
1421                                                   psprintf("toast value %u rawsize %d exceeds limit %d",
1422                                                                    toast_pointer.va_valueid,
1423                                                                    toast_pointer.va_rawsize,
1424                                                                    VARLENA_SIZE_LIMIT));
1425
1426         if (VARATT_IS_COMPRESSED(&toast_pointer))
1427         {
1428                 ToastCompressionId cmid;
1429                 bool            valid = false;
1430
1431                 /* Compression should never expand the attribute */
1432                 if (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) > toast_pointer.va_rawsize - VARHDRSZ)
1433                         report_corruption(ctx,
1434                                                           psprintf("toast value %u external size %u exceeds maximum expected for rawsize %d",
1435                                                                            toast_pointer.va_valueid,
1436                                                                            VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer),
1437                                                                            toast_pointer.va_rawsize));
1438
1439                 /* Compressed attributes should have a valid compression method */
1440                 cmid = TOAST_COMPRESS_METHOD(&toast_pointer);
1441                 switch (cmid)
1442                 {
1443                         /* List of all valid compression method IDs */
1444                         case TOAST_PGLZ_COMPRESSION_ID:
1445                         case TOAST_LZ4_COMPRESSION_ID:
1446                                 valid = true;
1447                                 break;
1448
1449                         /* Recognized but invalid compression method ID */
1450                         case TOAST_INVALID_COMPRESSION_ID:
1451                                 break;
1452
1453                         /* Intentionally no default here */
1454                 }
1455                 if (!valid)
1456                         report_corruption(ctx,
1457                                                           psprintf("toast value %u has invalid compression method id %d",
1458                                                                            toast_pointer.va_valueid, cmid));
1459         }
1460
1461         /* The tuple header better claim to contain toasted values */
1462         if (!(infomask & HEAP_HASEXTERNAL))
1463         {
1464                 report_corruption(ctx,
1465                                                   psprintf("toast value %u is external but tuple header flag HEAP_HASEXTERNAL not set",
1466                                                                    toast_pointer.va_valueid));
1467                 return true;
1468         }
1469
1470         /* The relation better have a toast table */
1471         if (!ctx->rel->rd_rel->reltoastrelid)
1472         {
1473                 report_corruption(ctx,
1474                                                   psprintf("toast value %u is external but relation has no toast relation",
1475                                                                    toast_pointer.va_valueid));
1476                 return true;
1477         }
1478
1479         /* If we were told to skip toast checking, then we're done. */
1480         if (ctx->toast_rel == NULL)
1481                 return true;
1482
1483         /*
1484          * If this tuple is eligible to be pruned, we cannot check the toast.
1485          * Otherwise, we push a copy of the toast tuple so we can check it after
1486          * releasing the main table buffer lock.
1487          */
1488         if (!ctx->tuple_could_be_pruned)
1489         {
1490                 ToastedAttribute *ta;
1491
1492                 ta = (ToastedAttribute *) palloc0(sizeof(ToastedAttribute));
1493
1494                 VARATT_EXTERNAL_GET_POINTER(ta->toast_pointer, attr);
1495                 ta->blkno = ctx->blkno;
1496                 ta->offnum = ctx->offnum;
1497                 ta->attnum = ctx->attnum;
1498                 ctx->toasted_attributes = lappend(ctx->toasted_attributes, ta);
1499         }
1500
1501         return true;
1502 }
1503
1504 /*
1505  * For each attribute collected in ctx->toasted_attributes, look up the value
1506  * in the toast table and perform checks on it.  This function should only be
1507  * called on toast pointers which cannot be vacuumed away during our
1508  * processing.
1509  */
1510 static void
1511 check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
1512 {
1513         SnapshotData SnapshotToast;
1514         ScanKeyData toastkey;
1515         SysScanDesc toastscan;
1516         bool            found_toasttup;
1517         HeapTuple       toasttup;
1518         uint32          extsize;
1519         int32           expected_chunk_seq = 0;
1520         int32           last_chunk_seq;
1521
1522         extsize = VARATT_EXTERNAL_GET_EXTSIZE(ta->toast_pointer);
1523         last_chunk_seq = (extsize - 1) / TOAST_MAX_CHUNK_SIZE;
1524
1525         /*
1526          * Setup a scan key to find chunks in toast table with matching va_valueid
1527          */
1528         ScanKeyInit(&toastkey,
1529                                 (AttrNumber) 1,
1530                                 BTEqualStrategyNumber, F_OIDEQ,
1531                                 ObjectIdGetDatum(ta->toast_pointer.va_valueid));
1532
1533         /*
1534          * Check if any chunks for this toasted object exist in the toast table,
1535          * accessible via the index.
1536          */
1537         init_toast_snapshot(&SnapshotToast);
1538         toastscan = systable_beginscan_ordered(ctx->toast_rel,
1539                                                                                    ctx->valid_toast_index,
1540                                                                                    &SnapshotToast, 1,
1541                                                                                    &toastkey);
1542         found_toasttup = false;
1543         while ((toasttup =
1544                         systable_getnext_ordered(toastscan,
1545                                                                          ForwardScanDirection)) != NULL)
1546         {
1547                 found_toasttup = true;
1548                 check_toast_tuple(toasttup, ctx, ta, &expected_chunk_seq, extsize);
1549         }
1550         systable_endscan_ordered(toastscan);
1551
1552         if (!found_toasttup)
1553                 report_toast_corruption(ctx, ta,
1554                                                                 psprintf("toast value %u not found in toast table",
1555                                                                                  ta->toast_pointer.va_valueid));
1556         else if (expected_chunk_seq <= last_chunk_seq)
1557                 report_toast_corruption(ctx, ta,
1558                                                                 psprintf("toast value %u was expected to end at chunk %d, but ended while expecting chunk %d",
1559                                                                                  ta->toast_pointer.va_valueid,
1560                                                                                  last_chunk_seq, expected_chunk_seq));
1561 }
1562
1563 /*
1564  * Check the current tuple as tracked in ctx, recording any corruption found in
1565  * ctx->tupstore.
1566  */
1567 static void
1568 check_tuple(HeapCheckContext *ctx)
1569 {
1570         /*
1571          * Check various forms of tuple header corruption, and if the header is
1572          * too corrupt, do not continue with other checks.
1573          */
1574         if (!check_tuple_header(ctx))
1575                 return;
1576
1577         /*
1578          * Check tuple visibility.  If the inserting transaction aborted, we
1579          * cannot assume our relation description matches the tuple structure, and
1580          * therefore cannot check it.
1581          */
1582         if (!check_tuple_visibility(ctx))
1583                 return;
1584
1585         /*
1586          * The tuple is visible, so it must be compatible with the current version
1587          * of the relation descriptor. It might have fewer columns than are
1588          * present in the relation descriptor, but it cannot have more.
1589          */
1590         if (RelationGetDescr(ctx->rel)->natts < ctx->natts)
1591         {
1592                 report_corruption(ctx,
1593                                                   psprintf("number of attributes %u exceeds maximum expected for table %u",
1594                                                                    ctx->natts,
1595                                                                    RelationGetDescr(ctx->rel)->natts));
1596                 return;
1597         }
1598
1599         /*
1600          * Check each attribute unless we hit corruption that confuses what to do
1601          * next, at which point we abort further attribute checks for this tuple.
1602          * Note that we don't abort for all types of corruption, only for those
1603          * types where we don't know how to continue.  We also don't abort the
1604          * checking of toasted attributes collected from the tuple prior to
1605          * aborting.  Those will still be checked later along with other toasted
1606          * attributes collected from the page.
1607          */
1608         ctx->offset = 0;
1609         for (ctx->attnum = 0; ctx->attnum < ctx->natts; ctx->attnum++)
1610                 if (!check_tuple_attribute(ctx))
1611                         break;                          /* cannot continue */
1612
1613         /* revert attnum to -1 until we again examine individual attributes */
1614         ctx->attnum = -1;
1615 }
1616
1617 /*
1618  * Convert a TransactionId into a FullTransactionId using our cached values of
1619  * the valid transaction ID range.  It is the caller's responsibility to have
1620  * already updated the cached values, if necessary.
1621  */
1622 static FullTransactionId
1623 FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx)
1624 {
1625         uint32          epoch;
1626
1627         if (!TransactionIdIsNormal(xid))
1628                 return FullTransactionIdFromEpochAndXid(0, xid);
1629         epoch = EpochFromFullTransactionId(ctx->next_fxid);
1630         if (xid > ctx->next_xid)
1631                 epoch--;
1632         return FullTransactionIdFromEpochAndXid(epoch, xid);
1633 }
1634
1635 /*
1636  * Update our cached range of valid transaction IDs.
1637  */
1638 static void
1639 update_cached_xid_range(HeapCheckContext *ctx)
1640 {
1641         /* Make cached copies */
1642         LWLockAcquire(XidGenLock, LW_SHARED);
1643         ctx->next_fxid = ShmemVariableCache->nextXid;
1644         ctx->oldest_xid = ShmemVariableCache->oldestXid;
1645         LWLockRelease(XidGenLock);
1646
1647         /* And compute alternate versions of the same */
1648         ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx);
1649         ctx->next_xid = XidFromFullTransactionId(ctx->next_fxid);
1650 }
1651
1652 /*
1653  * Update our cached range of valid multitransaction IDs.
1654  */
1655 static void
1656 update_cached_mxid_range(HeapCheckContext *ctx)
1657 {
1658         ReadMultiXactIdRange(&ctx->oldest_mxact, &ctx->next_mxact);
1659 }
1660
1661 /*
1662  * Return whether the given FullTransactionId is within our cached valid
1663  * transaction ID range.
1664  */
1665 static inline bool
1666 fxid_in_cached_range(FullTransactionId fxid, const HeapCheckContext *ctx)
1667 {
1668         return (FullTransactionIdPrecedesOrEquals(ctx->oldest_fxid, fxid) &&
1669                         FullTransactionIdPrecedes(fxid, ctx->next_fxid));
1670 }
1671
1672 /*
1673  * Checks whether a multitransaction ID is in the cached valid range, returning
1674  * the nature of the range violation, if any.
1675  */
1676 static XidBoundsViolation
1677 check_mxid_in_range(MultiXactId mxid, HeapCheckContext *ctx)
1678 {
1679         if (!TransactionIdIsValid(mxid))
1680                 return XID_INVALID;
1681         if (MultiXactIdPrecedes(mxid, ctx->relminmxid))
1682                 return XID_PRECEDES_RELMIN;
1683         if (MultiXactIdPrecedes(mxid, ctx->oldest_mxact))
1684                 return XID_PRECEDES_CLUSTERMIN;
1685         if (MultiXactIdPrecedesOrEquals(ctx->next_mxact, mxid))
1686                 return XID_IN_FUTURE;
1687         return XID_BOUNDS_OK;
1688 }
1689
1690 /*
1691  * Checks whether the given mxid is valid to appear in the heap being checked,
1692  * returning the nature of the range violation, if any.
1693  *
1694  * This function attempts to return quickly by caching the known valid mxid
1695  * range in ctx.  Callers should already have performed the initial setup of
1696  * the cache prior to the first call to this function.
1697  */
1698 static XidBoundsViolation
1699 check_mxid_valid_in_rel(MultiXactId mxid, HeapCheckContext *ctx)
1700 {
1701         XidBoundsViolation result;
1702
1703         result = check_mxid_in_range(mxid, ctx);
1704         if (result == XID_BOUNDS_OK)
1705                 return XID_BOUNDS_OK;
1706
1707         /* The range may have advanced.  Recheck. */
1708         update_cached_mxid_range(ctx);
1709         return check_mxid_in_range(mxid, ctx);
1710 }
1711
1712 /*
1713  * Checks whether the given transaction ID is (or was recently) valid to appear
1714  * in the heap being checked, or whether it is too old or too new to appear in
1715  * the relation, returning information about the nature of the bounds violation.
1716  *
1717  * We cache the range of valid transaction IDs.  If xid is in that range, we
1718  * conclude that it is valid, even though concurrent changes to the table might
1719  * invalidate it under certain corrupt conditions.  (For example, if the table
1720  * contains corrupt all-frozen bits, a concurrent vacuum might skip the page(s)
1721  * containing the xid and then truncate clog and advance the relfrozenxid
1722  * beyond xid.) Reporting the xid as valid under such conditions seems
1723  * acceptable, since if we had checked it earlier in our scan it would have
1724  * truly been valid at that time.
1725  *
1726  * If the status argument is not NULL, and if and only if the transaction ID
1727  * appears to be valid in this relation, the status argument will be set with
1728  * the commit status of the transaction ID.
1729  */
1730 static XidBoundsViolation
1731 get_xid_status(TransactionId xid, HeapCheckContext *ctx,
1732                            XidCommitStatus *status)
1733 {
1734         FullTransactionId fxid;
1735         FullTransactionId clog_horizon;
1736
1737         /* Quick check for special xids */
1738         if (!TransactionIdIsValid(xid))
1739                 return XID_INVALID;
1740         else if (xid == BootstrapTransactionId || xid == FrozenTransactionId)
1741         {
1742                 if (status != NULL)
1743                         *status = XID_COMMITTED;
1744                 return XID_BOUNDS_OK;
1745         }
1746
1747         /* Check if the xid is within bounds */
1748         fxid = FullTransactionIdFromXidAndCtx(xid, ctx);
1749         if (!fxid_in_cached_range(fxid, ctx))
1750         {
1751                 /*
1752                  * We may have been checking against stale values.  Update the cached
1753                  * range to be sure, and since we relied on the cached range when we
1754                  * performed the full xid conversion, reconvert.
1755                  */
1756                 update_cached_xid_range(ctx);
1757                 fxid = FullTransactionIdFromXidAndCtx(xid, ctx);
1758         }
1759
1760         if (FullTransactionIdPrecedesOrEquals(ctx->next_fxid, fxid))
1761                 return XID_IN_FUTURE;
1762         if (FullTransactionIdPrecedes(fxid, ctx->oldest_fxid))
1763                 return XID_PRECEDES_CLUSTERMIN;
1764         if (FullTransactionIdPrecedes(fxid, ctx->relfrozenfxid))
1765                 return XID_PRECEDES_RELMIN;
1766
1767         /* Early return if the caller does not request clog checking */
1768         if (status == NULL)
1769                 return XID_BOUNDS_OK;
1770
1771         /* Early return if we just checked this xid in a prior call */
1772         if (xid == ctx->cached_xid)
1773         {
1774                 *status = ctx->cached_status;
1775                 return XID_BOUNDS_OK;
1776         }
1777
1778         *status = XID_COMMITTED;
1779         LWLockAcquire(XactTruncationLock, LW_SHARED);
1780         clog_horizon =
1781                 FullTransactionIdFromXidAndCtx(ShmemVariableCache->oldestClogXid,
1782                                                                            ctx);
1783         if (FullTransactionIdPrecedesOrEquals(clog_horizon, fxid))
1784         {
1785                 if (TransactionIdIsCurrentTransactionId(xid))
1786                         *status = XID_IS_CURRENT_XID;
1787                 else if (TransactionIdIsInProgress(xid))
1788                         *status = XID_IN_PROGRESS;
1789                 else if (TransactionIdDidCommit(xid))
1790                         *status = XID_COMMITTED;
1791                 else
1792                         *status = XID_ABORTED;
1793         }
1794         LWLockRelease(XactTruncationLock);
1795         ctx->cached_xid = xid;
1796         ctx->cached_status = *status;
1797         return XID_BOUNDS_OK;
1798 }