src/backend/commands/vacuum.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuum.c
   4  *        The postgres vacuum cleaner.
   5  *
   6  * This file now includes only control and dispatch code for VACUUM and
   7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
   8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
   9  * in cluster.c.
  10  *
  11  *
  12  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  13  * Portions Copyright (c) 1994, Regents of the University of California
  14  *
  15  *
  16  * IDENTIFICATION
  17  *        src/backend/commands/vacuum.c
  18  *
  19  *-------------------------------------------------------------------------
  20  */
  21 #include "postgres.h"
  22
  23 #include <math.h>
  24
  25 #include "access/clog.h"
  26 #include "access/commit_ts.h"
  27 #include "access/genam.h"
  28 #include "access/heapam.h"
  29 #include "access/htup_details.h"
  30 #include "access/multixact.h"
  31 #include "access/tableam.h"
  32 #include "access/transam.h"
  33 #include "access/xact.h"
  34 #include "catalog/indexing.h"
  35 #include "catalog/namespace.h"
  36 #include "catalog/pg_database.h"
  37 #include "catalog/pg_inherits.h"
  38 #include "catalog/pg_namespace.h"
  39 #include "commands/cluster.h"
  40 #include "commands/defrem.h"
  41 #include "commands/vacuum.h"
  42 #include "miscadmin.h"
  43 #include "nodes/makefuncs.h"
  44 #include "pgstat.h"
  45 #include "postmaster/autovacuum.h"
  46 #include "storage/bufmgr.h"
  47 #include "storage/lmgr.h"
  48 #include "storage/proc.h"
  49 #include "storage/procarray.h"
  50 #include "utils/acl.h"
  51 #include "utils/fmgroids.h"
  52 #include "utils/guc.h"
  53 #include "utils/memutils.h"
  54 #include "utils/snapmgr.h"
  55 #include "utils/syscache.h"
  56
  57
  58 /*
  59  * GUC parameters
  60  */
  61 int                     vacuum_freeze_min_age;
  62 int                     vacuum_freeze_table_age;
  63 int                     vacuum_multixact_freeze_min_age;
  64 int                     vacuum_multixact_freeze_table_age;
  65
  66
  67 /* A few variables that don't seem worth passing around as parameters */
  68 static MemoryContext vac_context = NULL;
  69 static BufferAccessStrategy vac_strategy;
  70
  71
  72 /* non-export function prototypes */
  73 static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
  74 static List *get_all_vacuum_rels(int options);
  75 static void vac_truncate_clog(TransactionId frozenXID,
  76                                                           MultiXactId minMulti,
  77                                                           TransactionId lastSaneFrozenXid,
  78                                                           MultiXactId lastSaneMinMulti);
  79 static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
  80 static VacOptTernaryValue get_vacopt_ternary_value(DefElem *def);
  81
  82 /*
  83  * Primary entry point for manual VACUUM and ANALYZE commands
  84  *
  85  * This is mainly a preparation wrapper for the real operations that will
  86  * happen in vacuum().
  87  */
  88 void
  89 ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
  90 {
  91         VacuumParams params;
  92         bool            verbose = false;
  93         bool            skip_locked = false;
  94         bool            analyze = false;
  95         bool            freeze = false;
  96         bool            full = false;
  97         bool            disable_page_skipping = false;
  98         ListCell   *lc;
  99
 100         /* Set default value */
 101         params.index_cleanup = VACOPT_TERNARY_DEFAULT;
 102         params.truncate = VACOPT_TERNARY_DEFAULT;
 103
 104         /* Parse options list */
 105         foreach(lc, vacstmt->options)
 106         {
 107                 DefElem    *opt = (DefElem *) lfirst(lc);
 108
 109                 /* Parse common options for VACUUM and ANALYZE */
 110                 if (strcmp(opt->defname, "verbose") == 0)
 111                         verbose = defGetBoolean(opt);
 112                 else if (strcmp(opt->defname, "skip_locked") == 0)
 113                         skip_locked = defGetBoolean(opt);
 114                 else if (!vacstmt->is_vacuumcmd)
 115                         ereport(ERROR,
 116                                         (errcode(ERRCODE_SYNTAX_ERROR),
 117                                          errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
 118                                          parser_errposition(pstate, opt->location)));
 119
 120                 /* Parse options available on VACUUM */
 121                 else if (strcmp(opt->defname, "analyze") == 0)
 122                         analyze = defGetBoolean(opt);
 123                 else if (strcmp(opt->defname, "freeze") == 0)
 124                         freeze = defGetBoolean(opt);
 125                 else if (strcmp(opt->defname, "full") == 0)
 126                         full = defGetBoolean(opt);
 127                 else if (strcmp(opt->defname, "disable_page_skipping") == 0)
 128                         disable_page_skipping = defGetBoolean(opt);
 129                 else if (strcmp(opt->defname, "index_cleanup") == 0)
 130                         params.index_cleanup = get_vacopt_ternary_value(opt);
 131                 else if (strcmp(opt->defname, "truncate") == 0)
 132                         params.truncate = get_vacopt_ternary_value(opt);
 133                 else
 134                         ereport(ERROR,
 135                                         (errcode(ERRCODE_SYNTAX_ERROR),
 136                                          errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
 137                                          parser_errposition(pstate, opt->location)));
 138         }
 139
 140         /* Set vacuum options */
 141         params.options =
 142                 (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
 143                 (verbose ? VACOPT_VERBOSE : 0) |
 144                 (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
 145                 (analyze ? VACOPT_ANALYZE : 0) |
 146                 (freeze ? VACOPT_FREEZE : 0) |
 147                 (full ? VACOPT_FULL : 0) |
 148                 (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0);
 149
 150         /* sanity checks on options */
 151         Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
 152         Assert((params.options & VACOPT_VACUUM) ||
 153                    !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
 154         Assert(!(params.options & VACOPT_SKIPTOAST));
 155
 156         /*
 157          * Make sure VACOPT_ANALYZE is specified if any column lists are present.
 158          */
 159         if (!(params.options & VACOPT_ANALYZE))
 160         {
 161                 ListCell   *lc;
 162
 163                 foreach(lc, vacstmt->rels)
 164                 {
 165                         VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
 166
 167                         if (vrel->va_cols != NIL)
 168                                 ereport(ERROR,
 169                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 170                                                  errmsg("ANALYZE option must be specified when a column list is provided")));
 171                 }
 172         }
 173
 174         /*
 175          * All freeze ages are zero if the FREEZE option is given; otherwise pass
 176          * them as -1 which means to use the default values.
 177          */
 178         if (params.options & VACOPT_FREEZE)
 179         {
 180                 params.freeze_min_age = 0;
 181                 params.freeze_table_age = 0;
 182                 params.multixact_freeze_min_age = 0;
 183                 params.multixact_freeze_table_age = 0;
 184         }
 185         else
 186         {
 187                 params.freeze_min_age = -1;
 188                 params.freeze_table_age = -1;
 189                 params.multixact_freeze_min_age = -1;
 190                 params.multixact_freeze_table_age = -1;
 191         }
 192
 193         /* user-invoked vacuum is never "for wraparound" */
 194         params.is_wraparound = false;
 195
 196         /* user-invoked vacuum never uses this parameter */
 197         params.log_min_duration = -1;
 198
 199         /* Now go through the common routine */
 200         vacuum(vacstmt->rels, &params, NULL, isTopLevel);
 201 }
 202
 203 /*
 204  * Internal entry point for VACUUM and ANALYZE commands.
 205  *
 206  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
 207  * we process all relevant tables in the database.  For each VacuumRelation,
 208  * if a valid OID is supplied, the table with that OID is what to process;
 209  * otherwise, the VacuumRelation's RangeVar indicates what to process.
 210  *
 211  * params contains a set of parameters that can be used to customize the
 212  * behavior.
 213  *
 214  * bstrategy is normally given as NULL, but in autovacuum it can be passed
 215  * in to use the same buffer strategy object across multiple vacuum() calls.
 216  *
 217  * isTopLevel should be passed down from ProcessUtility.
 218  *
 219  * It is the caller's responsibility that all parameters are allocated in a
 220  * memory context that will not disappear at transaction commit.
 221  */
 222 void
 223 vacuum(List *relations, VacuumParams *params,
 224            BufferAccessStrategy bstrategy, bool isTopLevel)
 225 {
 226         static bool in_vacuum = false;
 227
 228         const char *stmttype;
 229         volatile bool in_outer_xact,
 230                                 use_own_xacts;
 231
 232         Assert(params != NULL);
 233
 234         stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
 235
 236         /*
 237          * We cannot run VACUUM inside a user transaction block; if we were inside
 238          * a transaction, then our commit- and start-transaction-command calls
 239          * would not have the intended effect!  There are numerous other subtle
 240          * dependencies on this, too.
 241          *
 242          * ANALYZE (without VACUUM) can run either way.
 243          */
 244         if (params->options & VACOPT_VACUUM)
 245         {
 246                 PreventInTransactionBlock(isTopLevel, stmttype);
 247                 in_outer_xact = false;
 248         }
 249         else
 250                 in_outer_xact = IsInTransactionBlock(isTopLevel);
 251
 252         /*
 253          * Due to static variables vac_context, anl_context and vac_strategy,
 254          * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
 255          * calls a hostile index expression that itself calls ANALYZE.
 256          */
 257         if (in_vacuum)
 258                 ereport(ERROR,
 259                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 260                                  errmsg("%s cannot be executed from VACUUM or ANALYZE",
 261                                                 stmttype)));
 262
 263         /*
 264          * Sanity check DISABLE_PAGE_SKIPPING option.
 265          */
 266         if ((params->options & VACOPT_FULL) != 0 &&
 267                 (params->options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
 268                 ereport(ERROR,
 269                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 270                                  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
 271
 272         /*
 273          * Send info about dead objects to the statistics collector, unless we are
 274          * in autovacuum --- autovacuum.c does this for itself.
 275          */
 276         if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 277                 pgstat_vacuum_stat();
 278
 279         /*
 280          * Create special memory context for cross-transaction storage.
 281          *
 282          * Since it is a child of PortalContext, it will go away eventually even
 283          * if we suffer an error; there's no need for special abort cleanup logic.
 284          */
 285         vac_context = AllocSetContextCreate(PortalContext,
 286                                                                                 "Vacuum",
 287                                                                                 ALLOCSET_DEFAULT_SIZES);
 288
 289         /*
 290          * If caller didn't give us a buffer strategy object, make one in the
 291          * cross-transaction memory context.
 292          */
 293         if (bstrategy == NULL)
 294         {
 295                 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
 296
 297                 bstrategy = GetAccessStrategy(BAS_VACUUM);
 298                 MemoryContextSwitchTo(old_context);
 299         }
 300         vac_strategy = bstrategy;
 301
 302         /*
 303          * Build list of relation(s) to process, putting any new data in
 304          * vac_context for safekeeping.
 305          */
 306         if (relations != NIL)
 307         {
 308                 List       *newrels = NIL;
 309                 ListCell   *lc;
 310
 311                 foreach(lc, relations)
 312                 {
 313                         VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
 314                         List       *sublist;
 315                         MemoryContext old_context;
 316
 317                         sublist = expand_vacuum_rel(vrel, params->options);
 318                         old_context = MemoryContextSwitchTo(vac_context);
 319                         newrels = list_concat(newrels, sublist);
 320                         MemoryContextSwitchTo(old_context);
 321                 }
 322                 relations = newrels;
 323         }
 324         else
 325                 relations = get_all_vacuum_rels(params->options);
 326
 327         /*
 328          * Decide whether we need to start/commit our own transactions.
 329          *
 330          * For VACUUM (with or without ANALYZE): always do so, so that we can
 331          * release locks as soon as possible.  (We could possibly use the outer
 332          * transaction for a one-table VACUUM, but handling TOAST tables would be
 333          * problematic.)
 334          *
 335          * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
 336          * start/commit our own transactions.  Also, there's no need to do so if
 337          * only processing one relation.  For multiple relations when not within a
 338          * transaction block, and also in an autovacuum worker, use own
 339          * transactions so we can release locks sooner.
 340          */
 341         if (params->options & VACOPT_VACUUM)
 342                 use_own_xacts = true;
 343         else
 344         {
 345                 Assert(params->options & VACOPT_ANALYZE);
 346                 if (IsAutoVacuumWorkerProcess())
 347                         use_own_xacts = true;
 348                 else if (in_outer_xact)
 349                         use_own_xacts = false;
 350                 else if (list_length(relations) > 1)
 351                         use_own_xacts = true;
 352                 else
 353                         use_own_xacts = false;
 354         }
 355
 356         /*
 357          * vacuum_rel expects to be entered with no transaction active; it will
 358          * start and commit its own transaction.  But we are called by an SQL
 359          * command, and so we are executing inside a transaction already. We
 360          * commit the transaction started in PostgresMain() here, and start
 361          * another one before exiting to match the commit waiting for us back in
 362          * PostgresMain().
 363          */
 364         if (use_own_xacts)
 365         {
 366                 Assert(!in_outer_xact);
 367
 368                 /* ActiveSnapshot is not set by autovacuum */
 369                 if (ActiveSnapshotSet())
 370                         PopActiveSnapshot();
 371
 372                 /* matches the StartTransaction in PostgresMain() */
 373                 CommitTransactionCommand();
 374         }
 375
 376         /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
 377         PG_TRY();
 378         {
 379                 ListCell   *cur;
 380
 381                 in_vacuum = true;
 382                 VacuumCostActive = (VacuumCostDelay > 0);
 383                 VacuumCostBalance = 0;
 384                 VacuumPageHit = 0;
 385                 VacuumPageMiss = 0;
 386                 VacuumPageDirty = 0;
 387
 388                 /*
 389                  * Loop to process each selected relation.
 390                  */
 391                 foreach(cur, relations)
 392                 {
 393                         VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
 394
 395                         if (params->options & VACOPT_VACUUM)
 396                         {
 397                                 if (!vacuum_rel(vrel->oid, vrel->relation, params))
 398                                         continue;
 399                         }
 400
 401                         if (params->options & VACOPT_ANALYZE)
 402                         {
 403                                 /*
 404                                  * If using separate xacts, start one for analyze. Otherwise,
 405                                  * we can use the outer transaction.
 406                                  */
 407                                 if (use_own_xacts)
 408                                 {
 409                                         StartTransactionCommand();
 410                                         /* functions in indexes may want a snapshot set */
 411                                         PushActiveSnapshot(GetTransactionSnapshot());
 412                                 }
 413
 414                                 analyze_rel(vrel->oid, vrel->relation, params,
 415                                                         vrel->va_cols, in_outer_xact, vac_strategy);
 416
 417                                 if (use_own_xacts)
 418                                 {
 419                                         PopActiveSnapshot();
 420                                         CommitTransactionCommand();
 421                                 }
 422                                 else
 423                                 {
 424                                         /*
 425                                          * If we're not using separate xacts, better separate the
 426                                          * ANALYZE actions with CCIs.  This avoids trouble if user
 427                                          * says "ANALYZE t, t".
 428                                          */
 429                                         CommandCounterIncrement();
 430                                 }
 431                         }
 432                 }
 433         }
 434         PG_CATCH();
 435         {
 436                 in_vacuum = false;
 437                 VacuumCostActive = false;
 438                 PG_RE_THROW();
 439         }
 440         PG_END_TRY();
 441
 442         in_vacuum = false;
 443         VacuumCostActive = false;
 444
 445         /*
 446          * Finish up processing.
 447          */
 448         if (use_own_xacts)
 449         {
 450                 /* here, we are not in a transaction */
 451
 452                 /*
 453                  * This matches the CommitTransaction waiting for us in
 454                  * PostgresMain().
 455                  */
 456                 StartTransactionCommand();
 457         }
 458
 459         if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 460         {
 461                 /*
 462                  * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
 463                  * (autovacuum.c does this for itself.)
 464                  */
 465                 vac_update_datfrozenxid();
 466         }
 467
 468         /*
 469          * Clean up working storage --- note we must do this after
 470          * StartTransactionCommand, else we might be trying to delete the active
 471          * context!
 472          */
 473         MemoryContextDelete(vac_context);
 474         vac_context = NULL;
 475 }
 476
 477 /*
 478  * Check if a given relation can be safely vacuumed or analyzed.  If the
 479  * user is not the relation owner, issue a WARNING log message and return
 480  * false to let the caller decide what to do with this relation.  This
 481  * routine is used to decide if a relation can be processed for VACUUM or
 482  * ANALYZE.
 483  */
 484 bool
 485 vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, int options)
 486 {
 487         char       *relname;
 488
 489         Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
 490
 491         /*
 492          * Check permissions.
 493          *
 494          * We allow the user to vacuum or analyze a table if he is superuser, the
 495          * table owner, or the database owner (but in the latter case, only if
 496          * it's not a shared relation).  pg_class_ownercheck includes the
 497          * superuser case.
 498          *
 499          * Note we choose to treat permissions failure as a WARNING and keep
 500          * trying to vacuum or analyze the rest of the DB --- is this appropriate?
 501          */
 502         if (pg_class_ownercheck(relid, GetUserId()) ||
 503                 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !reltuple->relisshared))
 504                 return true;
 505
 506         relname = NameStr(reltuple->relname);
 507
 508         if ((options & VACOPT_VACUUM) != 0)
 509         {
 510                 if (reltuple->relisshared)
 511                         ereport(WARNING,
 512                                         (errmsg("skipping \"%s\" --- only superuser can vacuum it",
 513                                                         relname)));
 514                 else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
 515                         ereport(WARNING,
 516                                         (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
 517                                                         relname)));
 518                 else
 519                         ereport(WARNING,
 520                                         (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
 521                                                         relname)));
 522
 523                 /*
 524                  * For VACUUM ANALYZE, both logs could show up, but just generate
 525                  * information for VACUUM as that would be the first one to be
 526                  * processed.
 527                  */
 528                 return false;
 529         }
 530
 531         if ((options & VACOPT_ANALYZE) != 0)
 532         {
 533                 if (reltuple->relisshared)
 534                         ereport(WARNING,
 535                                         (errmsg("skipping \"%s\" --- only superuser can analyze it",
 536                                                         relname)));
 537                 else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
 538                         ereport(WARNING,
 539                                         (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
 540                                                         relname)));
 541                 else
 542                         ereport(WARNING,
 543                                         (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
 544                                                         relname)));
 545         }
 546
 547         return false;
 548 }
 549
 550
 551 /*
 552  * vacuum_open_relation
 553  *
 554  * This routine is used for attempting to open and lock a relation which
 555  * is going to be vacuumed or analyzed.  If the relation cannot be opened
 556  * or locked, a log is emitted if possible.
 557  */
 558 Relation
 559 vacuum_open_relation(Oid relid, RangeVar *relation, int options,
 560                                          bool verbose, LOCKMODE lmode)
 561 {
 562         Relation        onerel;
 563         bool            rel_lock = true;
 564         int                     elevel;
 565
 566         Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
 567
 568         /*
 569          * Open the relation and get the appropriate lock on it.
 570          *
 571          * There's a race condition here: the relation may have gone away since
 572          * the last time we saw it.  If so, we don't need to vacuum or analyze it.
 573          *
 574          * If we've been asked not to wait for the relation lock, acquire it first
 575          * in non-blocking mode, before calling try_relation_open().
 576          */
 577         if (!(options & VACOPT_SKIP_LOCKED))
 578                 onerel = try_relation_open(relid, lmode);
 579         else if (ConditionalLockRelationOid(relid, lmode))
 580                 onerel = try_relation_open(relid, NoLock);
 581         else
 582         {
 583                 onerel = NULL;
 584                 rel_lock = false;
 585         }
 586
 587         /* if relation is opened, leave */
 588         if (onerel)
 589                 return onerel;
 590
 591         /*
 592          * Relation could not be opened, hence generate if possible a log
 593          * informing on the situation.
 594          *
 595          * If the RangeVar is not defined, we do not have enough information to
 596          * provide a meaningful log statement.  Chances are that the caller has
 597          * intentionally not provided this information so that this logging is
 598          * skipped, anyway.
 599          */
 600         if (relation == NULL)
 601                 return NULL;
 602
 603         /*
 604          * Determine the log level.
 605          *
 606          * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
 607          * statements in the permission checks; otherwise, only log if the caller
 608          * so requested.
 609          */
 610         if (!IsAutoVacuumWorkerProcess())
 611                 elevel = WARNING;
 612         else if (verbose)
 613                 elevel = LOG;
 614         else
 615                 return NULL;
 616
 617         if ((options & VACOPT_VACUUM) != 0)
 618         {
 619                 if (!rel_lock)
 620                         ereport(elevel,
 621                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 622                                          errmsg("skipping vacuum of \"%s\" --- lock not available",
 623                                                         relation->relname)));
 624                 else
 625                         ereport(elevel,
 626                                         (errcode(ERRCODE_UNDEFINED_TABLE),
 627                                          errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
 628                                                         relation->relname)));
 629
 630                 /*
 631                  * For VACUUM ANALYZE, both logs could show up, but just generate
 632                  * information for VACUUM as that would be the first one to be
 633                  * processed.
 634                  */
 635                 return NULL;
 636         }
 637
 638         if ((options & VACOPT_ANALYZE) != 0)
 639         {
 640                 if (!rel_lock)
 641                         ereport(elevel,
 642                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 643                                          errmsg("skipping analyze of \"%s\" --- lock not available",
 644                                                         relation->relname)));
 645                 else
 646                         ereport(elevel,
 647                                         (errcode(ERRCODE_UNDEFINED_TABLE),
 648                                          errmsg("skipping analyze of \"%s\" --- relation no longer exists",
 649                                                         relation->relname)));
 650         }
 651
 652         return NULL;
 653 }
 654
 655
 656 /*
 657  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
 658  * and optionally add VacuumRelations for partitions of the table.
 659  *
 660  * If a VacuumRelation does not have an OID supplied and is a partitioned
 661  * table, an extra entry will be added to the output for each partition.
 662  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
 663  * it does not want us to expand partitioned tables.
 664  *
 665  * We take care not to modify the input data structure, but instead build
 666  * new VacuumRelation(s) to return.  (But note that they will reference
 667  * unmodified parts of the input, eg column lists.)  New data structures
 668  * are made in vac_context.
 669  */
 670 static List *
 671 expand_vacuum_rel(VacuumRelation *vrel, int options)
 672 {
 673         List       *vacrels = NIL;
 674         MemoryContext oldcontext;
 675
 676         /* If caller supplied OID, there's nothing we need do here. */
 677         if (OidIsValid(vrel->oid))
 678         {
 679                 oldcontext = MemoryContextSwitchTo(vac_context);
 680                 vacrels = lappend(vacrels, vrel);
 681                 MemoryContextSwitchTo(oldcontext);
 682         }
 683         else
 684         {
 685                 /* Process a specific relation, and possibly partitions thereof */
 686                 Oid                     relid;
 687                 HeapTuple       tuple;
 688                 Form_pg_class classForm;
 689                 bool            include_parts;
 690                 int                     rvr_opts;
 691
 692                 /*
 693                  * Since autovacuum workers supply OIDs when calling vacuum(), no
 694                  * autovacuum worker should reach this code.
 695                  */
 696                 Assert(!IsAutoVacuumWorkerProcess());
 697
 698                 /*
 699                  * We transiently take AccessShareLock to protect the syscache lookup
 700                  * below, as well as find_all_inheritors's expectation that the caller
 701                  * holds some lock on the starting relation.
 702                  */
 703                 rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
 704                 relid = RangeVarGetRelidExtended(vrel->relation,
 705                                                                                  AccessShareLock,
 706                                                                                  rvr_opts,
 707                                                                                  NULL, NULL);
 708
 709                 /*
 710                  * If the lock is unavailable, emit the same log statement that
 711                  * vacuum_rel() and analyze_rel() would.
 712                  */
 713                 if (!OidIsValid(relid))
 714                 {
 715                         if (options & VACOPT_VACUUM)
 716                                 ereport(WARNING,
 717                                                 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 718                                                  errmsg("skipping vacuum of \"%s\" --- lock not available",
 719                                                                 vrel->relation->relname)));
 720                         else
 721                                 ereport(WARNING,
 722                                                 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 723                                                  errmsg("skipping analyze of \"%s\" --- lock not available",
 724                                                                 vrel->relation->relname)));
 725                         return vacrels;
 726                 }
 727
 728                 /*
 729                  * To check whether the relation is a partitioned table and its
 730                  * ownership, fetch its syscache entry.
 731                  */
 732                 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
 733                 if (!HeapTupleIsValid(tuple))
 734                         elog(ERROR, "cache lookup failed for relation %u", relid);
 735                 classForm = (Form_pg_class) GETSTRUCT(tuple);
 736
 737                 /*
 738                  * Make a returnable VacuumRelation for this rel if user is a proper
 739                  * owner.
 740                  */
 741                 if (vacuum_is_relation_owner(relid, classForm, options))
 742                 {
 743                         oldcontext = MemoryContextSwitchTo(vac_context);
 744                         vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
 745                                                                                                                   relid,
 746                                                                                                                   vrel->va_cols));
 747                         MemoryContextSwitchTo(oldcontext);
 748                 }
 749
 750
 751                 include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
 752                 ReleaseSysCache(tuple);
 753
 754                 /*
 755                  * If it is, make relation list entries for its partitions.  Note that
 756                  * the list returned by find_all_inheritors() includes the passed-in
 757                  * OID, so we have to skip that.  There's no point in taking locks on
 758                  * the individual partitions yet, and doing so would just add
 759                  * unnecessary deadlock risk.  For this last reason we do not check
 760                  * yet the ownership of the partitions, which get added to the list to
 761                  * process.  Ownership will be checked later on anyway.
 762                  */
 763                 if (include_parts)
 764                 {
 765                         List       *part_oids = find_all_inheritors(relid, NoLock, NULL);
 766                         ListCell   *part_lc;
 767
 768                         foreach(part_lc, part_oids)
 769                         {
 770                                 Oid                     part_oid = lfirst_oid(part_lc);
 771
 772                                 if (part_oid == relid)
 773                                         continue;       /* ignore original table */
 774
 775                                 /*
 776                                  * We omit a RangeVar since it wouldn't be appropriate to
 777                                  * complain about failure to open one of these relations
 778                                  * later.
 779                                  */
 780                                 oldcontext = MemoryContextSwitchTo(vac_context);
 781                                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
 782                                                                                                                           part_oid,
 783                                                                                                                           vrel->va_cols));
 784                                 MemoryContextSwitchTo(oldcontext);
 785                         }
 786                 }
 787
 788                 /*
 789                  * Release lock again.  This means that by the time we actually try to
 790                  * process the table, it might be gone or renamed.  In the former case
 791                  * we'll silently ignore it; in the latter case we'll process it
 792                  * anyway, but we must beware that the RangeVar doesn't necessarily
 793                  * identify it anymore.  This isn't ideal, perhaps, but there's little
 794                  * practical alternative, since we're typically going to commit this
 795                  * transaction and begin a new one between now and then.  Moreover,
 796                  * holding locks on multiple relations would create significant risk
 797                  * of deadlock.
 798                  */
 799                 UnlockRelationOid(relid, AccessShareLock);
 800         }
 801
 802         return vacrels;
 803 }
 804
 805 /*
 806  * Construct a list of VacuumRelations for all vacuumable rels in
 807  * the current database.  The list is built in vac_context.
 808  */
 809 static List *
 810 get_all_vacuum_rels(int options)
 811 {
 812         List       *vacrels = NIL;
 813         Relation        pgclass;
 814         TableScanDesc scan;
 815         HeapTuple       tuple;
 816
 817         pgclass = table_open(RelationRelationId, AccessShareLock);
 818
 819         scan = table_beginscan_catalog(pgclass, 0, NULL);
 820
 821         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 822         {
 823                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
 824                 MemoryContext oldcontext;
 825                 Oid                     relid = classForm->oid;
 826
 827                 /* check permissions of relation */
 828                 if (!vacuum_is_relation_owner(relid, classForm, options))
 829                         continue;
 830
 831                 /*
 832                  * We include partitioned tables here; depending on which operation is
 833                  * to be performed, caller will decide whether to process or ignore
 834                  * them.
 835                  */
 836                 if (classForm->relkind != RELKIND_RELATION &&
 837                         classForm->relkind != RELKIND_MATVIEW &&
 838                         classForm->relkind != RELKIND_PARTITIONED_TABLE)
 839                         continue;
 840
 841                 /*
 842                  * Build VacuumRelation(s) specifying the table OIDs to be processed.
 843                  * We omit a RangeVar since it wouldn't be appropriate to complain
 844                  * about failure to open one of these relations later.
 845                  */
 846                 oldcontext = MemoryContextSwitchTo(vac_context);
 847                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
 848                                                                                                           relid,
 849                                                                                                           NIL));
 850                 MemoryContextSwitchTo(oldcontext);
 851         }
 852
 853         table_endscan(scan);
 854         table_close(pgclass, AccessShareLock);
 855
 856         return vacrels;
 857 }
 858
 859 /*
 860  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
 861  *
 862  * The output parameters are:
 863  * - oldestXmin is the cutoff value used to distinguish whether tuples are
 864  *       DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
 865  * - freezeLimit is the Xid below which all Xids are replaced by
 866  *       FrozenTransactionId during vacuum.
 867  * - xidFullScanLimit (computed from table_freeze_age parameter)
 868  *       represents a minimum Xid value; a table whose relfrozenxid is older than
 869  *       this will have a full-table vacuum applied to it, to freeze tuples across
 870  *       the whole table.  Vacuuming a table younger than this value can use a
 871  *       partial scan.
 872  * - multiXactCutoff is the value below which all MultiXactIds are removed from
 873  *       Xmax.
 874  * - mxactFullScanLimit is a value against which a table's relminmxid value is
 875  *       compared to produce a full-table vacuum, as with xidFullScanLimit.
 876  *
 877  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
 878  * not interested.
 879  */
 880 void
 881 vacuum_set_xid_limits(Relation rel,
 882                                           int freeze_min_age,
 883                                           int freeze_table_age,
 884                                           int multixact_freeze_min_age,
 885                                           int multixact_freeze_table_age,
 886                                           TransactionId *oldestXmin,
 887                                           TransactionId *freezeLimit,
 888                                           TransactionId *xidFullScanLimit,
 889                                           MultiXactId *multiXactCutoff,
 890                                           MultiXactId *mxactFullScanLimit)
 891 {
 892         int                     freezemin;
 893         int                     mxid_freezemin;
 894         int                     effective_multixact_freeze_max_age;
 895         TransactionId limit;
 896         TransactionId safeLimit;
 897         MultiXactId oldestMxact;
 898         MultiXactId mxactLimit;
 899         MultiXactId safeMxactLimit;
 900
 901         /*
 902          * We can always ignore processes running lazy vacuum.  This is because we
 903          * use these values only for deciding which tuples we must keep in the
 904          * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
 905          * ignore it.  In theory it could be problematic to ignore lazy vacuums in
 906          * a full vacuum, but keep in mind that only one vacuum process can be
 907          * working on a particular table at any time, and that each vacuum is
 908          * always an independent transaction.
 909          */
 910         *oldestXmin =
 911                 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
 912
 913         Assert(TransactionIdIsNormal(*oldestXmin));
 914
 915         /*
 916          * Determine the minimum freeze age to use: as specified by the caller, or
 917          * vacuum_freeze_min_age, but in any case not more than half
 918          * autovacuum_freeze_max_age, so that autovacuums to prevent XID
 919          * wraparound won't occur too frequently.
 920          */
 921         freezemin = freeze_min_age;
 922         if (freezemin < 0)
 923                 freezemin = vacuum_freeze_min_age;
 924         freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
 925         Assert(freezemin >= 0);
 926
 927         /*
 928          * Compute the cutoff XID, being careful not to generate a "permanent" XID
 929          */
 930         limit = *oldestXmin - freezemin;
 931         if (!TransactionIdIsNormal(limit))
 932                 limit = FirstNormalTransactionId;
 933
 934         /*
 935          * If oldestXmin is very far back (in practice, more than
 936          * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
 937          * freeze age of zero.
 938          */
 939         safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
 940         if (!TransactionIdIsNormal(safeLimit))
 941                 safeLimit = FirstNormalTransactionId;
 942
 943         if (TransactionIdPrecedes(limit, safeLimit))
 944         {
 945                 ereport(WARNING,
 946                                 (errmsg("oldest xmin is far in the past"),
 947                                  errhint("Close open transactions soon to avoid wraparound problems.\n"
 948                                                  "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
 949                 limit = *oldestXmin;
 950         }
 951
 952         *freezeLimit = limit;
 953
 954         /*
 955          * Compute the multixact age for which freezing is urgent.  This is
 956          * normally autovacuum_multixact_freeze_max_age, but may be less if we are
 957          * short of multixact member space.
 958          */
 959         effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
 960
 961         /*
 962          * Determine the minimum multixact freeze age to use: as specified by
 963          * caller, or vacuum_multixact_freeze_min_age, but in any case not more
 964          * than half effective_multixact_freeze_max_age, so that autovacuums to
 965          * prevent MultiXact wraparound won't occur too frequently.
 966          */
 967         mxid_freezemin = multixact_freeze_min_age;
 968         if (mxid_freezemin < 0)
 969                 mxid_freezemin = vacuum_multixact_freeze_min_age;
 970         mxid_freezemin = Min(mxid_freezemin,
 971                                                  effective_multixact_freeze_max_age / 2);
 972         Assert(mxid_freezemin >= 0);
 973
 974         /* compute the cutoff multi, being careful to generate a valid value */
 975         oldestMxact = GetOldestMultiXactId();
 976         mxactLimit = oldestMxact - mxid_freezemin;
 977         if (mxactLimit < FirstMultiXactId)
 978                 mxactLimit = FirstMultiXactId;
 979
 980         safeMxactLimit =
 981                 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
 982         if (safeMxactLimit < FirstMultiXactId)
 983                 safeMxactLimit = FirstMultiXactId;
 984
 985         if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
 986         {
 987                 ereport(WARNING,
 988                                 (errmsg("oldest multixact is far in the past"),
 989                                  errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
 990                 /* Use the safe limit, unless an older mxact is still running */
 991                 if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
 992                         mxactLimit = oldestMxact;
 993                 else
 994                         mxactLimit = safeMxactLimit;
 995         }
 996
 997         *multiXactCutoff = mxactLimit;
 998
 999         if (xidFullScanLimit != NULL)
1000         {
1001                 int                     freezetable;
1002
1003                 Assert(mxactFullScanLimit != NULL);
1004
1005                 /*
1006                  * Determine the table freeze age to use: as specified by the caller,
1007                  * or vacuum_freeze_table_age, but in any case not more than
1008                  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1009                  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
1010                  * before anti-wraparound autovacuum is launched.
1011                  */
1012                 freezetable = freeze_table_age;
1013                 if (freezetable < 0)
1014                         freezetable = vacuum_freeze_table_age;
1015                 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
1016                 Assert(freezetable >= 0);
1017
1018                 /*
1019                  * Compute XID limit causing a full-table vacuum, being careful not to
1020                  * generate a "permanent" XID.
1021                  */
1022                 limit = ReadNewTransactionId() - freezetable;
1023                 if (!TransactionIdIsNormal(limit))
1024                         limit = FirstNormalTransactionId;
1025
1026                 *xidFullScanLimit = limit;
1027
1028                 /*
1029                  * Similar to the above, determine the table freeze age to use for
1030                  * multixacts: as specified by the caller, or
1031                  * vacuum_multixact_freeze_table_age, but in any case not more than
1032                  * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
1033                  * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
1034                  * freeze multixacts before anti-wraparound autovacuum is launched.
1035                  */
1036                 freezetable = multixact_freeze_table_age;
1037                 if (freezetable < 0)
1038                         freezetable = vacuum_multixact_freeze_table_age;
1039                 freezetable = Min(freezetable,
1040                                                   effective_multixact_freeze_max_age * 0.95);
1041                 Assert(freezetable >= 0);
1042
1043                 /*
1044                  * Compute MultiXact limit causing a full-table vacuum, being careful
1045                  * to generate a valid MultiXact value.
1046                  */
1047                 mxactLimit = ReadNextMultiXactId() - freezetable;
1048                 if (mxactLimit < FirstMultiXactId)
1049                         mxactLimit = FirstMultiXactId;
1050
1051                 *mxactFullScanLimit = mxactLimit;
1052         }
1053         else
1054         {
1055                 Assert(mxactFullScanLimit == NULL);
1056         }
1057 }
1058
1059 /*
1060  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1061  *
1062  *              If we scanned the whole relation then we should just use the count of
1063  *              live tuples seen; but if we did not, we should not blindly extrapolate
1064  *              from that number, since VACUUM may have scanned a quite nonrandom
1065  *              subset of the table.  When we have only partial information, we take
1066  *              the old value of pg_class.reltuples as a measurement of the
1067  *              tuple density in the unscanned pages.
1068  *
1069  *              Note: scanned_tuples should count only *live* tuples, since
1070  *              pg_class.reltuples is defined that way.
1071  */
1072 double
1073 vac_estimate_reltuples(Relation relation,
1074                                            BlockNumber total_pages,
1075                                            BlockNumber scanned_pages,
1076                                            double scanned_tuples)
1077 {
1078         BlockNumber old_rel_pages = relation->rd_rel->relpages;
1079         double          old_rel_tuples = relation->rd_rel->reltuples;
1080         double          old_density;
1081         double          unscanned_pages;
1082         double          total_tuples;
1083
1084         /* If we did scan the whole table, just use the count as-is */
1085         if (scanned_pages >= total_pages)
1086                 return scanned_tuples;
1087
1088         /*
1089          * If scanned_pages is zero but total_pages isn't, keep the existing value
1090          * of reltuples.  (Note: callers should avoid updating the pg_class
1091          * statistics in this situation, since no new information has been
1092          * provided.)
1093          */
1094         if (scanned_pages == 0)
1095                 return old_rel_tuples;
1096
1097         /*
1098          * If old value of relpages is zero, old density is indeterminate; we
1099          * can't do much except scale up scanned_tuples to match total_pages.
1100          */
1101         if (old_rel_pages == 0)
1102                 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1103
1104         /*
1105          * Okay, we've covered the corner cases.  The normal calculation is to
1106          * convert the old measurement to a density (tuples per page), then
1107          * estimate the number of tuples in the unscanned pages using that figure,
1108          * and finally add on the number of tuples in the scanned pages.
1109          */
1110         old_density = old_rel_tuples / old_rel_pages;
1111         unscanned_pages = (double) total_pages - (double) scanned_pages;
1112         total_tuples = old_density * unscanned_pages + scanned_tuples;
1113         return floor(total_tuples + 0.5);
1114 }
1115
1116
1117 /*
1118  *      vac_update_relstats() -- update statistics for one relation
1119  *
1120  *              Update the whole-relation statistics that are kept in its pg_class
1121  *              row.  There are additional stats that will be updated if we are
1122  *              doing ANALYZE, but we always update these stats.  This routine works
1123  *              for both index and heap relation entries in pg_class.
1124  *
1125  *              We violate transaction semantics here by overwriting the rel's
1126  *              existing pg_class tuple with the new values.  This is reasonably
1127  *              safe as long as we're sure that the new values are correct whether or
1128  *              not this transaction commits.  The reason for doing this is that if
1129  *              we updated these tuples in the usual way, vacuuming pg_class itself
1130  *              wouldn't work very well --- by the time we got done with a vacuum
1131  *              cycle, most of the tuples in pg_class would've been obsoleted.  Of
1132  *              course, this only works for fixed-size not-null columns, but these are.
1133  *
1134  *              Another reason for doing it this way is that when we are in a lazy
1135  *              VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1136  *              Somebody vacuuming pg_class might think they could delete a tuple
1137  *              marked with xmin = our xid.
1138  *
1139  *              In addition to fundamentally nontransactional statistics such as
1140  *              relpages and relallvisible, we try to maintain certain lazily-updated
1141  *              DDL flags such as relhasindex, by clearing them if no longer correct.
1142  *              It's safe to do this in VACUUM, which can't run in parallel with
1143  *              CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1144  *              However, it's *not* safe to do it in an ANALYZE that's within an
1145  *              outer transaction, because for example the current transaction might
1146  *              have dropped the last index; then we'd think relhasindex should be
1147  *              cleared, but if the transaction later rolls back this would be wrong.
1148  *              So we refrain from updating the DDL flags if we're inside an outer
1149  *              transaction.  This is OK since postponing the flag maintenance is
1150  *              always allowable.
1151  *
1152  *              Note: num_tuples should count only *live* tuples, since
1153  *              pg_class.reltuples is defined that way.
1154  *
1155  *              This routine is shared by VACUUM and ANALYZE.
1156  */
1157 void
1158 vac_update_relstats(Relation relation,
1159                                         BlockNumber num_pages, double num_tuples,
1160                                         BlockNumber num_all_visible_pages,
1161                                         bool hasindex, TransactionId frozenxid,
1162                                         MultiXactId minmulti,
1163                                         bool in_outer_xact)
1164 {
1165         Oid                     relid = RelationGetRelid(relation);
1166         Relation        rd;
1167         HeapTuple       ctup;
1168         Form_pg_class pgcform;
1169         bool            dirty;
1170
1171         rd = table_open(RelationRelationId, RowExclusiveLock);
1172
1173         /* Fetch a copy of the tuple to scribble on */
1174         ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
1175         if (!HeapTupleIsValid(ctup))
1176                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1177                          relid);
1178         pgcform = (Form_pg_class) GETSTRUCT(ctup);
1179
1180         /* Apply statistical updates, if any, to copied tuple */
1181
1182         dirty = false;
1183         if (pgcform->relpages != (int32) num_pages)
1184         {
1185                 pgcform->relpages = (int32) num_pages;
1186                 dirty = true;
1187         }
1188         if (pgcform->reltuples != (float4) num_tuples)
1189         {
1190                 pgcform->reltuples = (float4) num_tuples;
1191                 dirty = true;
1192         }
1193         if (pgcform->relallvisible != (int32) num_all_visible_pages)
1194         {
1195                 pgcform->relallvisible = (int32) num_all_visible_pages;
1196                 dirty = true;
1197         }
1198
1199         /* Apply DDL updates, but not inside an outer transaction (see above) */
1200
1201         if (!in_outer_xact)
1202         {
1203                 /*
1204                  * If we didn't find any indexes, reset relhasindex.
1205                  */
1206                 if (pgcform->relhasindex && !hasindex)
1207                 {
1208                         pgcform->relhasindex = false;
1209                         dirty = true;
1210                 }
1211
1212                 /* We also clear relhasrules and relhastriggers if needed */
1213                 if (pgcform->relhasrules && relation->rd_rules == NULL)
1214                 {
1215                         pgcform->relhasrules = false;
1216                         dirty = true;
1217                 }
1218                 if (pgcform->relhastriggers && relation->trigdesc == NULL)
1219                 {
1220                         pgcform->relhastriggers = false;
1221                         dirty = true;
1222                 }
1223         }
1224
1225         /*
1226          * Update relfrozenxid, unless caller passed InvalidTransactionId
1227          * indicating it has no new data.
1228          *
1229          * Ordinarily, we don't let relfrozenxid go backwards: if things are
1230          * working correctly, the only way the new frozenxid could be older would
1231          * be if a previous VACUUM was done with a tighter freeze_min_age, in
1232          * which case we don't want to forget the work it already did.  However,
1233          * if the stored relfrozenxid is "in the future", then it must be corrupt
1234          * and it seems best to overwrite it with the cutoff we used this time.
1235          * This should match vac_update_datfrozenxid() concerning what we consider
1236          * to be "in the future".
1237          */
1238         if (TransactionIdIsNormal(frozenxid) &&
1239                 pgcform->relfrozenxid != frozenxid &&
1240                 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
1241                  TransactionIdPrecedes(ReadNewTransactionId(),
1242                                                            pgcform->relfrozenxid)))
1243         {
1244                 pgcform->relfrozenxid = frozenxid;
1245                 dirty = true;
1246         }
1247
1248         /* Similarly for relminmxid */
1249         if (MultiXactIdIsValid(minmulti) &&
1250                 pgcform->relminmxid != minmulti &&
1251                 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
1252                  MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
1253         {
1254                 pgcform->relminmxid = minmulti;
1255                 dirty = true;
1256         }
1257
1258         /* If anything changed, write out the tuple. */
1259         if (dirty)
1260                 heap_inplace_update(rd, ctup);
1261
1262         table_close(rd, RowExclusiveLock);
1263 }
1264
1265
1266 /*
1267  *      vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1268  *
1269  *              Update pg_database's datfrozenxid entry for our database to be the
1270  *              minimum of the pg_class.relfrozenxid values.
1271  *
1272  *              Similarly, update our datminmxid to be the minimum of the
1273  *              pg_class.relminmxid values.
1274  *
1275  *              If we are able to advance either pg_database value, also try to
1276  *              truncate pg_xact and pg_multixact.
1277  *
1278  *              We violate transaction semantics here by overwriting the database's
1279  *              existing pg_database tuple with the new values.  This is reasonably
1280  *              safe since the new values are correct whether or not this transaction
1281  *              commits.  As with vac_update_relstats, this avoids leaving dead tuples
1282  *              behind after a VACUUM.
1283  */
1284 void
1285 vac_update_datfrozenxid(void)
1286 {
1287         HeapTuple       tuple;
1288         Form_pg_database dbform;
1289         Relation        relation;
1290         SysScanDesc scan;
1291         HeapTuple       classTup;
1292         TransactionId newFrozenXid;
1293         MultiXactId newMinMulti;
1294         TransactionId lastSaneFrozenXid;
1295         MultiXactId lastSaneMinMulti;
1296         bool            bogus = false;
1297         bool            dirty = false;
1298         ScanKeyData key[1];
1299
1300         /*
1301          * Restrict this task to one backend per database.  This avoids race
1302          * conditions that would move datfrozenxid or datminmxid backward.  It
1303          * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1304          * datfrozenxid passed to an earlier vac_truncate_clog() call.
1305          */
1306         LockDatabaseFrozenIds(ExclusiveLock);
1307
1308         /*
1309          * Initialize the "min" calculation with GetOldestXmin, which is a
1310          * reasonable approximation to the minimum relfrozenxid for not-yet-
1311          * committed pg_class entries for new tables; see AddNewRelationTuple().
1312          * So we cannot produce a wrong minimum by starting with this.
1313          */
1314         newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
1315
1316         /*
1317          * Similarly, initialize the MultiXact "min" with the value that would be
1318          * used on pg_class for new tables.  See AddNewRelationTuple().
1319          */
1320         newMinMulti = GetOldestMultiXactId();
1321
1322         /*
1323          * Identify the latest relfrozenxid and relminmxid values that we could
1324          * validly see during the scan.  These are conservative values, but it's
1325          * not really worth trying to be more exact.
1326          */
1327         lastSaneFrozenXid = ReadNewTransactionId();
1328         lastSaneMinMulti = ReadNextMultiXactId();
1329
1330         /*
1331          * We must seqscan pg_class to find the minimum Xid, because there is no
1332          * index that can help us here.
1333          *
1334          * See vac_truncate_clog() for the race condition to prevent.
1335          */
1336         relation = table_open(RelationRelationId, AccessShareLock);
1337
1338         scan = systable_beginscan(relation, InvalidOid, false,
1339                                                           NULL, 0, NULL);
1340
1341         while ((classTup = systable_getnext(scan)) != NULL)
1342         {
1343                 volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
1344                 TransactionId relfrozenxid = classForm->relfrozenxid;
1345                 TransactionId relminmxid = classForm->relminmxid;
1346
1347                 /*
1348                  * Only consider relations able to hold unfrozen XIDs (anything else
1349                  * should have InvalidTransactionId in relfrozenxid anyway).
1350                  */
1351                 if (classForm->relkind != RELKIND_RELATION &&
1352                         classForm->relkind != RELKIND_MATVIEW &&
1353                         classForm->relkind != RELKIND_TOASTVALUE)
1354                 {
1355                         Assert(!TransactionIdIsValid(relfrozenxid));
1356                         Assert(!MultiXactIdIsValid(relminmxid));
1357                         continue;
1358                 }
1359
1360                 /*
1361                  * Some table AMs might not need per-relation xid / multixid horizons.
1362                  * It therefore seems reasonable to allow relfrozenxid and relminmxid
1363                  * to not be set (i.e. set to their respective Invalid*Id)
1364                  * independently. Thus validate and compute horizon for each only if
1365                  * set.
1366                  *
1367                  * If things are working properly, no relation should have a
1368                  * relfrozenxid or relminmxid that is "in the future".  However, such
1369                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1370                  * see any entries that are "in the future", chicken out and don't do
1371                  * anything.  This ensures we won't truncate clog & multixact SLRUs
1372                  * before those relations have been scanned and cleaned up.
1373                  */
1374
1375                 if (TransactionIdIsValid(relfrozenxid))
1376                 {
1377                         Assert(TransactionIdIsNormal(relfrozenxid));
1378
1379                         /* check for values in the future */
1380                         if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1381                         {
1382                                 bogus = true;
1383                                 break;
1384                         }
1385
1386                         /* determine new horizon */
1387                         if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1388                                 newFrozenXid = relfrozenxid;
1389                 }
1390
1391                 if (MultiXactIdIsValid(relminmxid))
1392                 {
1393                         /* check for values in the future */
1394                         if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1395                         {
1396                                 bogus = true;
1397                                 break;
1398                         }
1399
1400                         /* determine new horizon */
1401                         if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1402                                 newMinMulti = relminmxid;
1403                 }
1404         }
1405
1406         /* we're done with pg_class */
1407         systable_endscan(scan);
1408         table_close(relation, AccessShareLock);
1409
1410         /* chicken out if bogus data found */
1411         if (bogus)
1412                 return;
1413
1414         Assert(TransactionIdIsNormal(newFrozenXid));
1415         Assert(MultiXactIdIsValid(newMinMulti));
1416
1417         /* Now fetch the pg_database tuple we need to update. */
1418         relation = table_open(DatabaseRelationId, RowExclusiveLock);
1419
1420         /*
1421          * Get the pg_database tuple to scribble on.  Note that this does not
1422          * directly rely on the syscache to avoid issues with flattened toast
1423          * values for the in-place update.
1424          */
1425         ScanKeyInit(&key[0],
1426                                 Anum_pg_database_oid,
1427                                 BTEqualStrategyNumber, F_OIDEQ,
1428                                 ObjectIdGetDatum(MyDatabaseId));
1429
1430         scan = systable_beginscan(relation, DatabaseOidIndexId, true,
1431                                                           NULL, 1, key);
1432         tuple = systable_getnext(scan);
1433         tuple = heap_copytuple(tuple);
1434         systable_endscan(scan);
1435
1436         if (!HeapTupleIsValid(tuple))
1437                 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1438
1439         dbform = (Form_pg_database) GETSTRUCT(tuple);
1440
1441         /*
1442          * As in vac_update_relstats(), we ordinarily don't want to let
1443          * datfrozenxid go backward; but if it's "in the future" then it must be
1444          * corrupt and it seems best to overwrite it.
1445          */
1446         if (dbform->datfrozenxid != newFrozenXid &&
1447                 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1448                  TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1449         {
1450                 dbform->datfrozenxid = newFrozenXid;
1451                 dirty = true;
1452         }
1453         else
1454                 newFrozenXid = dbform->datfrozenxid;
1455
1456         /* Ditto for datminmxid */
1457         if (dbform->datminmxid != newMinMulti &&
1458                 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1459                  MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1460         {
1461                 dbform->datminmxid = newMinMulti;
1462                 dirty = true;
1463         }
1464         else
1465                 newMinMulti = dbform->datminmxid;
1466
1467         if (dirty)
1468                 heap_inplace_update(relation, tuple);
1469
1470         heap_freetuple(tuple);
1471         table_close(relation, RowExclusiveLock);
1472
1473         /*
1474          * If we were able to advance datfrozenxid or datminmxid, see if we can
1475          * truncate pg_xact and/or pg_multixact.  Also do it if the shared
1476          * XID-wrap-limit info is stale, since this action will update that too.
1477          */
1478         if (dirty || ForceTransactionIdLimitUpdate())
1479                 vac_truncate_clog(newFrozenXid, newMinMulti,
1480                                                   lastSaneFrozenXid, lastSaneMinMulti);
1481 }
1482
1483
1484 /*
1485  *      vac_truncate_clog() -- attempt to truncate the commit log
1486  *
1487  *              Scan pg_database to determine the system-wide oldest datfrozenxid,
1488  *              and use it to truncate the transaction commit log (pg_xact).
1489  *              Also update the XID wrap limit info maintained by varsup.c.
1490  *              Likewise for datminmxid.
1491  *
1492  *              The passed frozenXID and minMulti are the updated values for my own
1493  *              pg_database entry. They're used to initialize the "min" calculations.
1494  *              The caller also passes the "last sane" XID and MXID, since it has
1495  *              those at hand already.
1496  *
1497  *              This routine is only invoked when we've managed to change our
1498  *              DB's datfrozenxid/datminmxid values, or we found that the shared
1499  *              XID-wrap-limit info is stale.
1500  */
1501 static void
1502 vac_truncate_clog(TransactionId frozenXID,
1503                                   MultiXactId minMulti,
1504                                   TransactionId lastSaneFrozenXid,
1505                                   MultiXactId lastSaneMinMulti)
1506 {
1507         TransactionId nextXID = ReadNewTransactionId();
1508         Relation        relation;
1509         TableScanDesc scan;
1510         HeapTuple       tuple;
1511         Oid                     oldestxid_datoid;
1512         Oid                     minmulti_datoid;
1513         bool            bogus = false;
1514         bool            frozenAlreadyWrapped = false;
1515
1516         /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1517         LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1518
1519         /* init oldest datoids to sync with my frozenXID/minMulti values */
1520         oldestxid_datoid = MyDatabaseId;
1521         minmulti_datoid = MyDatabaseId;
1522
1523         /*
1524          * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1525          *
1526          * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1527          * the values could change while we look at them.  Fetch each one just
1528          * once to ensure sane behavior of the comparison logic.  (Here, as in
1529          * many other places, we assume that fetching or updating an XID in shared
1530          * storage is atomic.)
1531          *
1532          * Note: we need not worry about a race condition with new entries being
1533          * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1534          * existing DB's datfrozenxid, and that source DB cannot be ours because
1535          * of the interlock against copying a DB containing an active backend.
1536          * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1537          * concurrently modify the datfrozenxid's of different databases, the
1538          * worst possible outcome is that pg_xact is not truncated as aggressively
1539          * as it could be.
1540          */
1541         relation = table_open(DatabaseRelationId, AccessShareLock);
1542
1543         scan = table_beginscan_catalog(relation, 0, NULL);
1544
1545         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1546         {
1547                 volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1548                 TransactionId datfrozenxid = dbform->datfrozenxid;
1549                 TransactionId datminmxid = dbform->datminmxid;
1550
1551                 Assert(TransactionIdIsNormal(datfrozenxid));
1552                 Assert(MultiXactIdIsValid(datminmxid));
1553
1554                 /*
1555                  * If database is in the process of getting dropped, or has been
1556                  * interrupted while doing so, no connections to it are possible
1557                  * anymore. Therefore we don't need to take it into account here.
1558                  * Which is good, because it can't be processed by autovacuum either.
1559                  */
1560                 if (database_is_invalid_form((Form_pg_database) dbform))
1561                 {
1562                         elog(DEBUG2,
1563                                  "skipping invalid database \"%s\" while computing relfrozenxid",
1564                                  NameStr(dbform->datname));
1565                         continue;
1566                 }
1567
1568                 /*
1569                  * If things are working properly, no database should have a
1570                  * datfrozenxid or datminmxid that is "in the future".  However, such
1571                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1572                  * see any entries that are "in the future", chicken out and don't do
1573                  * anything.  This ensures we won't truncate clog before those
1574                  * databases have been scanned and cleaned up.  (We will issue the
1575                  * "already wrapped" warning if appropriate, though.)
1576                  */
1577                 if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1578                         MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1579                         bogus = true;
1580
1581                 if (TransactionIdPrecedes(nextXID, datfrozenxid))
1582                         frozenAlreadyWrapped = true;
1583                 else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1584                 {
1585                         frozenXID = datfrozenxid;
1586                         oldestxid_datoid = dbform->oid;
1587                 }
1588
1589                 if (MultiXactIdPrecedes(datminmxid, minMulti))
1590                 {
1591                         minMulti = datminmxid;
1592                         minmulti_datoid = dbform->oid;
1593                 }
1594         }
1595
1596         table_endscan(scan);
1597
1598         table_close(relation, AccessShareLock);
1599
1600         /*
1601          * Do not truncate CLOG if we seem to have suffered wraparound already;
1602          * the computed minimum XID might be bogus.  This case should now be
1603          * impossible due to the defenses in GetNewTransactionId, but we keep the
1604          * test anyway.
1605          */
1606         if (frozenAlreadyWrapped)
1607         {
1608                 ereport(WARNING,
1609                                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1610                                  errdetail("You might have already suffered transaction-wraparound data loss.")));
1611                 LWLockRelease(WrapLimitsVacuumLock);
1612                 return;
1613         }
1614
1615         /* chicken out if data is bogus in any other way */
1616         if (bogus)
1617         {
1618                 LWLockRelease(WrapLimitsVacuumLock);
1619                 return;
1620         }
1621
1622         /*
1623          * Advance the oldest value for commit timestamps before truncating, so
1624          * that if a user requests a timestamp for a transaction we're truncating
1625          * away right after this point, they get NULL instead of an ugly "file not
1626          * found" error from slru.c.  This doesn't matter for xact/multixact
1627          * because they are not subject to arbitrary lookups from users.
1628          */
1629         AdvanceOldestCommitTsXid(frozenXID);
1630
1631         /*
1632          * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1633          */
1634         TruncateCLOG(frozenXID, oldestxid_datoid);
1635         TruncateCommitTs(frozenXID);
1636         TruncateMultiXact(minMulti, minmulti_datoid);
1637
1638         /*
1639          * Update the wrap limit for GetNewTransactionId and creation of new
1640          * MultiXactIds.  Note: these functions will also signal the postmaster
1641          * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1642          * signalling twice?
1643          */
1644         SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1645         SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1646
1647         LWLockRelease(WrapLimitsVacuumLock);
1648 }
1649
1650
1651 /*
1652  *      vacuum_rel() -- vacuum one heap relation
1653  *
1654  *              relid identifies the relation to vacuum.  If relation is supplied,
1655  *              use the name therein for reporting any failure to open/lock the rel;
1656  *              do not use it once we've successfully opened the rel, since it might
1657  *              be stale.
1658  *
1659  *              Returns true if it's okay to proceed with a requested ANALYZE
1660  *              operation on this table.
1661  *
1662  *              Doing one heap at a time incurs extra overhead, since we need to
1663  *              check that the heap exists again just before we vacuum it.  The
1664  *              reason that we do this is so that vacuuming can be spread across
1665  *              many small transactions.  Otherwise, two-phase locking would require
1666  *              us to lock the entire database during one pass of the vacuum cleaner.
1667  *
1668  *              At entry and exit, we are not inside a transaction.
1669  */
1670 static bool
1671 vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
1672 {
1673         LOCKMODE        lmode;
1674         Relation        onerel;
1675         LockRelId       onerelid;
1676         Oid                     toast_relid;
1677         Oid                     save_userid;
1678         int                     save_sec_context;
1679         int                     save_nestlevel;
1680
1681         Assert(params != NULL);
1682
1683         /* Begin a transaction for vacuuming this relation */
1684         StartTransactionCommand();
1685
1686         /*
1687          * Functions in indexes may want a snapshot set.  Also, setting a snapshot
1688          * ensures that RecentGlobalXmin is kept truly recent.
1689          */
1690         PushActiveSnapshot(GetTransactionSnapshot());
1691
1692         if (!(params->options & VACOPT_FULL))
1693         {
1694                 /*
1695                  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1696                  * other concurrent VACUUMs know that they can ignore this one while
1697                  * determining their OldestXmin.  (The reason we don't set it during a
1698                  * full VACUUM is exactly that we may have to run user-defined
1699                  * functions for functional indexes, and we want to make sure that if
1700                  * they use the snapshot set above, any tuples it requires can't get
1701                  * removed from other tables.  An index function that depends on the
1702                  * contents of other tables is arguably broken, but we won't break it
1703                  * here by violating transaction semantics.)
1704                  *
1705                  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1706                  * autovacuum; it's used to avoid canceling a vacuum that was invoked
1707                  * in an emergency.
1708                  *
1709                  * Note: these flags remain set until CommitTransaction or
1710                  * AbortTransaction.  We don't want to clear them until we reset
1711                  * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1712                  * which is probably Not Good.
1713                  */
1714                 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1715                 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1716                 if (params->is_wraparound)
1717                         MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1718                 LWLockRelease(ProcArrayLock);
1719         }
1720
1721         /*
1722          * Check for user-requested abort.  Note we want this to be inside a
1723          * transaction, so xact.c doesn't issue useless WARNING.
1724          */
1725         CHECK_FOR_INTERRUPTS();
1726
1727         /*
1728          * Determine the type of lock we want --- hard exclusive lock for a FULL
1729          * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1730          * way, we can be sure that no other backend is vacuuming the same table.
1731          */
1732         lmode = (params->options & VACOPT_FULL) ?
1733                 AccessExclusiveLock : ShareUpdateExclusiveLock;
1734
1735         /* open the relation and get the appropriate lock on it */
1736         onerel = vacuum_open_relation(relid, relation, params->options,
1737                                                                   params->log_min_duration >= 0, lmode);
1738
1739         /* leave if relation could not be opened or locked */
1740         if (!onerel)
1741         {
1742                 PopActiveSnapshot();
1743                 CommitTransactionCommand();
1744                 return false;
1745         }
1746
1747         /*
1748          * Check if relation needs to be skipped based on ownership.  This check
1749          * happens also when building the relation list to vacuum for a manual
1750          * operation, and needs to be done additionally here as VACUUM could
1751          * happen across multiple transactions where relation ownership could have
1752          * changed in-between.  Make sure to only generate logs for VACUUM in this
1753          * case.
1754          */
1755         if (!vacuum_is_relation_owner(RelationGetRelid(onerel),
1756                                                                   onerel->rd_rel,
1757                                                                   params->options & VACOPT_VACUUM))
1758         {
1759                 relation_close(onerel, lmode);
1760                 PopActiveSnapshot();
1761                 CommitTransactionCommand();
1762                 return false;
1763         }
1764
1765         /*
1766          * Check that it's of a vacuumable relkind.
1767          */
1768         if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1769                 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1770                 onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1771                 onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1772         {
1773                 ereport(WARNING,
1774                                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1775                                                 RelationGetRelationName(onerel))));
1776                 relation_close(onerel, lmode);
1777                 PopActiveSnapshot();
1778                 CommitTransactionCommand();
1779                 return false;
1780         }
1781
1782         /*
1783          * Silently ignore tables that are temp tables of other backends ---
1784          * trying to vacuum these will lead to great unhappiness, since their
1785          * contents are probably not up-to-date on disk.  (We don't throw a
1786          * warning here; it would just lead to chatter during a database-wide
1787          * VACUUM.)
1788          */
1789         if (RELATION_IS_OTHER_TEMP(onerel))
1790         {
1791                 relation_close(onerel, lmode);
1792                 PopActiveSnapshot();
1793                 CommitTransactionCommand();
1794                 return false;
1795         }
1796
1797         /*
1798          * Silently ignore partitioned tables as there is no work to be done.  The
1799          * useful work is on their child partitions, which have been queued up for
1800          * us separately.
1801          */
1802         if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1803         {
1804                 relation_close(onerel, lmode);
1805                 PopActiveSnapshot();
1806                 CommitTransactionCommand();
1807                 /* It's OK to proceed with ANALYZE on this table */
1808                 return true;
1809         }
1810
1811         /*
1812          * Get a session-level lock too. This will protect our access to the
1813          * relation across multiple transactions, so that we can vacuum the
1814          * relation's TOAST table (if any) secure in the knowledge that no one is
1815          * deleting the parent relation.
1816          *
1817          * NOTE: this cannot block, even if someone else is waiting for access,
1818          * because the lock manager knows that both lock requests are from the
1819          * same process.
1820          */
1821         onerelid = onerel->rd_lockInfo.lockRelId;
1822         LockRelationIdForSession(&onerelid, lmode);
1823
1824         /* Set index cleanup option based on reloptions if not yet */
1825         if (params->index_cleanup == VACOPT_TERNARY_DEFAULT)
1826         {
1827                 if (onerel->rd_options == NULL ||
1828                         ((StdRdOptions *) onerel->rd_options)->vacuum_index_cleanup)
1829                         params->index_cleanup = VACOPT_TERNARY_ENABLED;
1830                 else
1831                         params->index_cleanup = VACOPT_TERNARY_DISABLED;
1832         }
1833
1834         /* Set truncate option based on reloptions if not yet */
1835         if (params->truncate == VACOPT_TERNARY_DEFAULT)
1836         {
1837                 if (onerel->rd_options == NULL ||
1838                         ((StdRdOptions *) onerel->rd_options)->vacuum_truncate)
1839                         params->truncate = VACOPT_TERNARY_ENABLED;
1840                 else
1841                         params->truncate = VACOPT_TERNARY_DISABLED;
1842         }
1843
1844         /*
1845          * Remember the relation's TOAST relation for later, if the caller asked
1846          * us to process it.  In VACUUM FULL, though, the toast table is
1847          * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1848          */
1849         if (!(params->options & VACOPT_SKIPTOAST) && !(params->options & VACOPT_FULL))
1850                 toast_relid = onerel->rd_rel->reltoastrelid;
1851         else
1852                 toast_relid = InvalidOid;
1853
1854         /*
1855          * Switch to the table owner's userid, so that any index functions are run
1856          * as that user.  Also lock down security-restricted operations and
1857          * arrange to make GUC variable changes local to this command. (This is
1858          * unnecessary, but harmless, for lazy VACUUM.)
1859          */
1860         GetUserIdAndSecContext(&save_userid, &save_sec_context);
1861         SetUserIdAndSecContext(onerel->rd_rel->relowner,
1862                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
1863         save_nestlevel = NewGUCNestLevel();
1864
1865         /*
1866          * Do the actual work --- either FULL or "lazy" vacuum
1867          */
1868         if (params->options & VACOPT_FULL)
1869         {
1870                 int                     cluster_options = 0;
1871
1872                 /* close relation before vacuuming, but hold lock until commit */
1873                 relation_close(onerel, NoLock);
1874                 onerel = NULL;
1875
1876                 if ((params->options & VACOPT_VERBOSE) != 0)
1877                         cluster_options |= CLUOPT_VERBOSE;
1878
1879                 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1880                 cluster_rel(relid, InvalidOid, cluster_options);
1881         }
1882         else
1883                 table_relation_vacuum(onerel, params, vac_strategy);
1884
1885         /* Roll back any GUC changes executed by index functions */
1886         AtEOXact_GUC(false, save_nestlevel);
1887
1888         /* Restore userid and security context */
1889         SetUserIdAndSecContext(save_userid, save_sec_context);
1890
1891         /* all done with this class, but hold lock until commit */
1892         if (onerel)
1893                 relation_close(onerel, NoLock);
1894
1895         /*
1896          * Complete the transaction and free all temporary memory used.
1897          */
1898         PopActiveSnapshot();
1899         CommitTransactionCommand();
1900
1901         /*
1902          * If the relation has a secondary toast rel, vacuum that too while we
1903          * still hold the session lock on the master table.  Note however that
1904          * "analyze" will not get done on the toast table.  This is good, because
1905          * the toaster always uses hardcoded index access and statistics are
1906          * totally unimportant for toast relations.
1907          */
1908         if (toast_relid != InvalidOid)
1909                 vacuum_rel(toast_relid, NULL, params);
1910
1911         /*
1912          * Now release the session-level lock on the master table.
1913          */
1914         UnlockRelationIdForSession(&onerelid, lmode);
1915
1916         /* Report that we really did it. */
1917         return true;
1918 }
1919
1920
1921 /*
1922  * Open all the vacuumable indexes of the given relation, obtaining the
1923  * specified kind of lock on each.  Return an array of Relation pointers for
1924  * the indexes into *Irel, and the number of indexes into *nindexes.
1925  *
1926  * We consider an index vacuumable if it is marked insertable (indisready).
1927  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1928  * execution, and what we have is too corrupt to be processable.  We will
1929  * vacuum even if the index isn't indisvalid; this is important because in a
1930  * unique index, uniqueness checks will be performed anyway and had better not
1931  * hit dangling index pointers.
1932  */
1933 void
1934 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1935                                  int *nindexes, Relation **Irel)
1936 {
1937         List       *indexoidlist;
1938         ListCell   *indexoidscan;
1939         int                     i;
1940
1941         Assert(lockmode != NoLock);
1942
1943         indexoidlist = RelationGetIndexList(relation);
1944
1945         /* allocate enough memory for all indexes */
1946         i = list_length(indexoidlist);
1947
1948         if (i > 0)
1949                 *Irel = (Relation *) palloc(i * sizeof(Relation));
1950         else
1951                 *Irel = NULL;
1952
1953         /* collect just the ready indexes */
1954         i = 0;
1955         foreach(indexoidscan, indexoidlist)
1956         {
1957                 Oid                     indexoid = lfirst_oid(indexoidscan);
1958                 Relation        indrel;
1959
1960                 indrel = index_open(indexoid, lockmode);
1961                 if (indrel->rd_index->indisready)
1962                         (*Irel)[i++] = indrel;
1963                 else
1964                         index_close(indrel, lockmode);
1965         }
1966
1967         *nindexes = i;
1968
1969         list_free(indexoidlist);
1970 }
1971
1972 /*
1973  * Release the resources acquired by vac_open_indexes.  Optionally release
1974  * the locks (say NoLock to keep 'em).
1975  */
1976 void
1977 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1978 {
1979         if (Irel == NULL)
1980                 return;
1981
1982         while (nindexes--)
1983         {
1984                 Relation        ind = Irel[nindexes];
1985
1986                 index_close(ind, lockmode);
1987         }
1988         pfree(Irel);
1989 }
1990
1991 /*
1992  * vacuum_delay_point --- check for interrupts and cost-based delay.
1993  *
1994  * This should be called in each major loop of VACUUM processing,
1995  * typically once per page processed.
1996  */
1997 void
1998 vacuum_delay_point(void)
1999 {
2000         /* Always check for interrupts */
2001         CHECK_FOR_INTERRUPTS();
2002
2003         /* Nap if appropriate */
2004         if (VacuumCostActive && !InterruptPending &&
2005                 VacuumCostBalance >= VacuumCostLimit)
2006         {
2007                 double          msec;
2008
2009                 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
2010                 if (msec > VacuumCostDelay * 4)
2011                         msec = VacuumCostDelay * 4;
2012
2013                 pg_usleep((long) (msec * 1000));
2014
2015                 VacuumCostBalance = 0;
2016
2017                 /* update balance values for workers */
2018                 AutoVacuumUpdateDelay();
2019
2020                 /* Might have gotten an interrupt while sleeping */
2021                 CHECK_FOR_INTERRUPTS();
2022         }
2023 }
2024
2025 /*
2026  * A wrapper function of defGetBoolean().
2027  *
2028  * This function returns VACOPT_TERNARY_ENABLED and VACOPT_TERNARY_DISABLED
2029  * instead of true and false.
2030  */
2031 static VacOptTernaryValue
2032 get_vacopt_ternary_value(DefElem *def)
2033 {
2034         return defGetBoolean(def) ? VACOPT_TERNARY_ENABLED : VACOPT_TERNARY_DISABLED;
2035 }