Close race condition between datfrozen and relfrozen updates.
[pgsql.git] / src / backend / commands / vacuum.c
blob9eca8a88dab15f24b78fbb8961b004fa6d3b7865
1 /*-------------------------------------------------------------------------
3 * vacuum.c
4 * The postgres vacuum cleaner.
6 * This file now includes only control and dispatch code for VACUUM and
7 * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
8 * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9 * in cluster.c.
12 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
13 * Portions Copyright (c) 1994, Regents of the University of California
16 * IDENTIFICATION
17 * src/backend/commands/vacuum.c
19 *-------------------------------------------------------------------------
21 #include "postgres.h"
23 #include <math.h>
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/tableam.h"
32 #include "access/transam.h"
33 #include "access/xact.h"
34 #include "catalog/indexing.h"
35 #include "catalog/namespace.h"
36 #include "catalog/pg_database.h"
37 #include "catalog/pg_inherits.h"
38 #include "catalog/pg_namespace.h"
39 #include "commands/cluster.h"
40 #include "commands/defrem.h"
41 #include "commands/vacuum.h"
42 #include "miscadmin.h"
43 #include "nodes/makefuncs.h"
44 #include "pgstat.h"
45 #include "postmaster/autovacuum.h"
46 #include "storage/bufmgr.h"
47 #include "storage/lmgr.h"
48 #include "storage/proc.h"
49 #include "storage/procarray.h"
50 #include "utils/acl.h"
51 #include "utils/fmgroids.h"
52 #include "utils/guc.h"
53 #include "utils/memutils.h"
54 #include "utils/snapmgr.h"
55 #include "utils/syscache.h"
59 * GUC parameters
61 int vacuum_freeze_min_age;
62 int vacuum_freeze_table_age;
63 int vacuum_multixact_freeze_min_age;
64 int vacuum_multixact_freeze_table_age;
67 /* A few variables that don't seem worth passing around as parameters */
68 static MemoryContext vac_context = NULL;
69 static BufferAccessStrategy vac_strategy;
72 /* non-export function prototypes */
73 static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
74 static List *get_all_vacuum_rels(int options);
75 static void vac_truncate_clog(TransactionId frozenXID,
76 MultiXactId minMulti,
77 TransactionId lastSaneFrozenXid,
78 MultiXactId lastSaneMinMulti);
79 static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
80 static VacOptTernaryValue get_vacopt_ternary_value(DefElem *def);
83 * Primary entry point for manual VACUUM and ANALYZE commands
85 * This is mainly a preparation wrapper for the real operations that will
86 * happen in vacuum().
88 void
89 ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
91 VacuumParams params;
92 bool verbose = false;
93 bool skip_locked = false;
94 bool analyze = false;
95 bool freeze = false;
96 bool full = false;
97 bool disable_page_skipping = false;
98 ListCell *lc;
100 /* Set default value */
101 params.index_cleanup = VACOPT_TERNARY_DEFAULT;
102 params.truncate = VACOPT_TERNARY_DEFAULT;
104 /* Parse options list */
105 foreach(lc, vacstmt->options)
107 DefElem *opt = (DefElem *) lfirst(lc);
109 /* Parse common options for VACUUM and ANALYZE */
110 if (strcmp(opt->defname, "verbose") == 0)
111 verbose = defGetBoolean(opt);
112 else if (strcmp(opt->defname, "skip_locked") == 0)
113 skip_locked = defGetBoolean(opt);
114 else if (!vacstmt->is_vacuumcmd)
115 ereport(ERROR,
116 (errcode(ERRCODE_SYNTAX_ERROR),
117 errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
118 parser_errposition(pstate, opt->location)));
120 /* Parse options available on VACUUM */
121 else if (strcmp(opt->defname, "analyze") == 0)
122 analyze = defGetBoolean(opt);
123 else if (strcmp(opt->defname, "freeze") == 0)
124 freeze = defGetBoolean(opt);
125 else if (strcmp(opt->defname, "full") == 0)
126 full = defGetBoolean(opt);
127 else if (strcmp(opt->defname, "disable_page_skipping") == 0)
128 disable_page_skipping = defGetBoolean(opt);
129 else if (strcmp(opt->defname, "index_cleanup") == 0)
130 params.index_cleanup = get_vacopt_ternary_value(opt);
131 else if (strcmp(opt->defname, "truncate") == 0)
132 params.truncate = get_vacopt_ternary_value(opt);
133 else
134 ereport(ERROR,
135 (errcode(ERRCODE_SYNTAX_ERROR),
136 errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
137 parser_errposition(pstate, opt->location)));
140 /* Set vacuum options */
141 params.options =
142 (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
143 (verbose ? VACOPT_VERBOSE : 0) |
144 (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
145 (analyze ? VACOPT_ANALYZE : 0) |
146 (freeze ? VACOPT_FREEZE : 0) |
147 (full ? VACOPT_FULL : 0) |
148 (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0);
150 /* sanity checks on options */
151 Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
152 Assert((params.options & VACOPT_VACUUM) ||
153 !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
154 Assert(!(params.options & VACOPT_SKIPTOAST));
157 * Make sure VACOPT_ANALYZE is specified if any column lists are present.
159 if (!(params.options & VACOPT_ANALYZE))
161 ListCell *lc;
163 foreach(lc, vacstmt->rels)
165 VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
167 if (vrel->va_cols != NIL)
168 ereport(ERROR,
169 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
170 errmsg("ANALYZE option must be specified when a column list is provided")));
175 * All freeze ages are zero if the FREEZE option is given; otherwise pass
176 * them as -1 which means to use the default values.
178 if (params.options & VACOPT_FREEZE)
180 params.freeze_min_age = 0;
181 params.freeze_table_age = 0;
182 params.multixact_freeze_min_age = 0;
183 params.multixact_freeze_table_age = 0;
185 else
187 params.freeze_min_age = -1;
188 params.freeze_table_age = -1;
189 params.multixact_freeze_min_age = -1;
190 params.multixact_freeze_table_age = -1;
193 /* user-invoked vacuum is never "for wraparound" */
194 params.is_wraparound = false;
196 /* user-invoked vacuum never uses this parameter */
197 params.log_min_duration = -1;
199 /* Now go through the common routine */
200 vacuum(vacstmt->rels, &params, NULL, isTopLevel);
204 * Internal entry point for VACUUM and ANALYZE commands.
206 * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
207 * we process all relevant tables in the database. For each VacuumRelation,
208 * if a valid OID is supplied, the table with that OID is what to process;
209 * otherwise, the VacuumRelation's RangeVar indicates what to process.
211 * params contains a set of parameters that can be used to customize the
212 * behavior.
214 * bstrategy is normally given as NULL, but in autovacuum it can be passed
215 * in to use the same buffer strategy object across multiple vacuum() calls.
217 * isTopLevel should be passed down from ProcessUtility.
219 * It is the caller's responsibility that all parameters are allocated in a
220 * memory context that will not disappear at transaction commit.
222 void
223 vacuum(List *relations, VacuumParams *params,
224 BufferAccessStrategy bstrategy, bool isTopLevel)
226 static bool in_vacuum = false;
228 const char *stmttype;
229 volatile bool in_outer_xact,
230 use_own_xacts;
232 Assert(params != NULL);
234 stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
237 * We cannot run VACUUM inside a user transaction block; if we were inside
238 * a transaction, then our commit- and start-transaction-command calls
239 * would not have the intended effect! There are numerous other subtle
240 * dependencies on this, too.
242 * ANALYZE (without VACUUM) can run either way.
244 if (params->options & VACOPT_VACUUM)
246 PreventInTransactionBlock(isTopLevel, stmttype);
247 in_outer_xact = false;
249 else
250 in_outer_xact = IsInTransactionBlock(isTopLevel);
253 * Due to static variables vac_context, anl_context and vac_strategy,
254 * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE
255 * calls a hostile index expression that itself calls ANALYZE.
257 if (in_vacuum)
258 ereport(ERROR,
259 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
260 errmsg("%s cannot be executed from VACUUM or ANALYZE",
261 stmttype)));
264 * Sanity check DISABLE_PAGE_SKIPPING option.
266 if ((params->options & VACOPT_FULL) != 0 &&
267 (params->options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
268 ereport(ERROR,
269 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
270 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
273 * Send info about dead objects to the statistics collector, unless we are
274 * in autovacuum --- autovacuum.c does this for itself.
276 if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
277 pgstat_vacuum_stat();
280 * Create special memory context for cross-transaction storage.
282 * Since it is a child of PortalContext, it will go away eventually even
283 * if we suffer an error; there's no need for special abort cleanup logic.
285 vac_context = AllocSetContextCreate(PortalContext,
286 "Vacuum",
287 ALLOCSET_DEFAULT_SIZES);
290 * If caller didn't give us a buffer strategy object, make one in the
291 * cross-transaction memory context.
293 if (bstrategy == NULL)
295 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
297 bstrategy = GetAccessStrategy(BAS_VACUUM);
298 MemoryContextSwitchTo(old_context);
300 vac_strategy = bstrategy;
303 * Build list of relation(s) to process, putting any new data in
304 * vac_context for safekeeping.
306 if (relations != NIL)
308 List *newrels = NIL;
309 ListCell *lc;
311 foreach(lc, relations)
313 VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
314 List *sublist;
315 MemoryContext old_context;
317 sublist = expand_vacuum_rel(vrel, params->options);
318 old_context = MemoryContextSwitchTo(vac_context);
319 newrels = list_concat(newrels, sublist);
320 MemoryContextSwitchTo(old_context);
322 relations = newrels;
324 else
325 relations = get_all_vacuum_rels(params->options);
328 * Decide whether we need to start/commit our own transactions.
330 * For VACUUM (with or without ANALYZE): always do so, so that we can
331 * release locks as soon as possible. (We could possibly use the outer
332 * transaction for a one-table VACUUM, but handling TOAST tables would be
333 * problematic.)
335 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
336 * start/commit our own transactions. Also, there's no need to do so if
337 * only processing one relation. For multiple relations when not within a
338 * transaction block, and also in an autovacuum worker, use own
339 * transactions so we can release locks sooner.
341 if (params->options & VACOPT_VACUUM)
342 use_own_xacts = true;
343 else
345 Assert(params->options & VACOPT_ANALYZE);
346 if (IsAutoVacuumWorkerProcess())
347 use_own_xacts = true;
348 else if (in_outer_xact)
349 use_own_xacts = false;
350 else if (list_length(relations) > 1)
351 use_own_xacts = true;
352 else
353 use_own_xacts = false;
357 * vacuum_rel expects to be entered with no transaction active; it will
358 * start and commit its own transaction. But we are called by an SQL
359 * command, and so we are executing inside a transaction already. We
360 * commit the transaction started in PostgresMain() here, and start
361 * another one before exiting to match the commit waiting for us back in
362 * PostgresMain().
364 if (use_own_xacts)
366 Assert(!in_outer_xact);
368 /* ActiveSnapshot is not set by autovacuum */
369 if (ActiveSnapshotSet())
370 PopActiveSnapshot();
372 /* matches the StartTransaction in PostgresMain() */
373 CommitTransactionCommand();
376 /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
377 PG_TRY();
379 ListCell *cur;
381 in_vacuum = true;
382 VacuumCostActive = (VacuumCostDelay > 0);
383 VacuumCostBalance = 0;
384 VacuumPageHit = 0;
385 VacuumPageMiss = 0;
386 VacuumPageDirty = 0;
389 * Loop to process each selected relation.
391 foreach(cur, relations)
393 VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
395 if (params->options & VACOPT_VACUUM)
397 if (!vacuum_rel(vrel->oid, vrel->relation, params))
398 continue;
401 if (params->options & VACOPT_ANALYZE)
404 * If using separate xacts, start one for analyze. Otherwise,
405 * we can use the outer transaction.
407 if (use_own_xacts)
409 StartTransactionCommand();
410 /* functions in indexes may want a snapshot set */
411 PushActiveSnapshot(GetTransactionSnapshot());
414 analyze_rel(vrel->oid, vrel->relation, params,
415 vrel->va_cols, in_outer_xact, vac_strategy);
417 if (use_own_xacts)
419 PopActiveSnapshot();
420 CommitTransactionCommand();
422 else
425 * If we're not using separate xacts, better separate the
426 * ANALYZE actions with CCIs. This avoids trouble if user
427 * says "ANALYZE t, t".
429 CommandCounterIncrement();
434 PG_CATCH();
436 in_vacuum = false;
437 VacuumCostActive = false;
438 PG_RE_THROW();
440 PG_END_TRY();
442 in_vacuum = false;
443 VacuumCostActive = false;
446 * Finish up processing.
448 if (use_own_xacts)
450 /* here, we are not in a transaction */
453 * This matches the CommitTransaction waiting for us in
454 * PostgresMain().
456 StartTransactionCommand();
459 if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
462 * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
463 * (autovacuum.c does this for itself.)
465 vac_update_datfrozenxid();
469 * Clean up working storage --- note we must do this after
470 * StartTransactionCommand, else we might be trying to delete the active
471 * context!
473 MemoryContextDelete(vac_context);
474 vac_context = NULL;
478 * Check if a given relation can be safely vacuumed or analyzed. If the
479 * user is not the relation owner, issue a WARNING log message and return
480 * false to let the caller decide what to do with this relation. This
481 * routine is used to decide if a relation can be processed for VACUUM or
482 * ANALYZE.
484 bool
485 vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, int options)
487 char *relname;
489 Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
492 * Check permissions.
494 * We allow the user to vacuum or analyze a table if he is superuser, the
495 * table owner, or the database owner (but in the latter case, only if
496 * it's not a shared relation). pg_class_ownercheck includes the
497 * superuser case.
499 * Note we choose to treat permissions failure as a WARNING and keep
500 * trying to vacuum or analyze the rest of the DB --- is this appropriate?
502 if (pg_class_ownercheck(relid, GetUserId()) ||
503 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !reltuple->relisshared))
504 return true;
506 relname = NameStr(reltuple->relname);
508 if ((options & VACOPT_VACUUM) != 0)
510 if (reltuple->relisshared)
511 ereport(WARNING,
512 (errmsg("skipping \"%s\" --- only superuser can vacuum it",
513 relname)));
514 else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
515 ereport(WARNING,
516 (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
517 relname)));
518 else
519 ereport(WARNING,
520 (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
521 relname)));
524 * For VACUUM ANALYZE, both logs could show up, but just generate
525 * information for VACUUM as that would be the first one to be
526 * processed.
528 return false;
531 if ((options & VACOPT_ANALYZE) != 0)
533 if (reltuple->relisshared)
534 ereport(WARNING,
535 (errmsg("skipping \"%s\" --- only superuser can analyze it",
536 relname)));
537 else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
538 ereport(WARNING,
539 (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
540 relname)));
541 else
542 ereport(WARNING,
543 (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
544 relname)));
547 return false;
552 * vacuum_open_relation
554 * This routine is used for attempting to open and lock a relation which
555 * is going to be vacuumed or analyzed. If the relation cannot be opened
556 * or locked, a log is emitted if possible.
558 Relation
559 vacuum_open_relation(Oid relid, RangeVar *relation, int options,
560 bool verbose, LOCKMODE lmode)
562 Relation onerel;
563 bool rel_lock = true;
564 int elevel;
566 Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
569 * Open the relation and get the appropriate lock on it.
571 * There's a race condition here: the relation may have gone away since
572 * the last time we saw it. If so, we don't need to vacuum or analyze it.
574 * If we've been asked not to wait for the relation lock, acquire it first
575 * in non-blocking mode, before calling try_relation_open().
577 if (!(options & VACOPT_SKIP_LOCKED))
578 onerel = try_relation_open(relid, lmode);
579 else if (ConditionalLockRelationOid(relid, lmode))
580 onerel = try_relation_open(relid, NoLock);
581 else
583 onerel = NULL;
584 rel_lock = false;
587 /* if relation is opened, leave */
588 if (onerel)
589 return onerel;
592 * Relation could not be opened, hence generate if possible a log
593 * informing on the situation.
595 * If the RangeVar is not defined, we do not have enough information to
596 * provide a meaningful log statement. Chances are that the caller has
597 * intentionally not provided this information so that this logging is
598 * skipped, anyway.
600 if (relation == NULL)
601 return NULL;
604 * Determine the log level.
606 * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
607 * statements in the permission checks; otherwise, only log if the caller
608 * so requested.
610 if (!IsAutoVacuumWorkerProcess())
611 elevel = WARNING;
612 else if (verbose)
613 elevel = LOG;
614 else
615 return NULL;
617 if ((options & VACOPT_VACUUM) != 0)
619 if (!rel_lock)
620 ereport(elevel,
621 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
622 errmsg("skipping vacuum of \"%s\" --- lock not available",
623 relation->relname)));
624 else
625 ereport(elevel,
626 (errcode(ERRCODE_UNDEFINED_TABLE),
627 errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
628 relation->relname)));
631 * For VACUUM ANALYZE, both logs could show up, but just generate
632 * information for VACUUM as that would be the first one to be
633 * processed.
635 return NULL;
638 if ((options & VACOPT_ANALYZE) != 0)
640 if (!rel_lock)
641 ereport(elevel,
642 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
643 errmsg("skipping analyze of \"%s\" --- lock not available",
644 relation->relname)));
645 else
646 ereport(elevel,
647 (errcode(ERRCODE_UNDEFINED_TABLE),
648 errmsg("skipping analyze of \"%s\" --- relation no longer exists",
649 relation->relname)));
652 return NULL;
657 * Given a VacuumRelation, fill in the table OID if it wasn't specified,
658 * and optionally add VacuumRelations for partitions of the table.
660 * If a VacuumRelation does not have an OID supplied and is a partitioned
661 * table, an extra entry will be added to the output for each partition.
662 * Presently, only autovacuum supplies OIDs when calling vacuum(), and
663 * it does not want us to expand partitioned tables.
665 * We take care not to modify the input data structure, but instead build
666 * new VacuumRelation(s) to return. (But note that they will reference
667 * unmodified parts of the input, eg column lists.) New data structures
668 * are made in vac_context.
670 static List *
671 expand_vacuum_rel(VacuumRelation *vrel, int options)
673 List *vacrels = NIL;
674 MemoryContext oldcontext;
676 /* If caller supplied OID, there's nothing we need do here. */
677 if (OidIsValid(vrel->oid))
679 oldcontext = MemoryContextSwitchTo(vac_context);
680 vacrels = lappend(vacrels, vrel);
681 MemoryContextSwitchTo(oldcontext);
683 else
685 /* Process a specific relation, and possibly partitions thereof */
686 Oid relid;
687 HeapTuple tuple;
688 Form_pg_class classForm;
689 bool include_parts;
690 int rvr_opts;
693 * Since autovacuum workers supply OIDs when calling vacuum(), no
694 * autovacuum worker should reach this code.
696 Assert(!IsAutoVacuumWorkerProcess());
699 * We transiently take AccessShareLock to protect the syscache lookup
700 * below, as well as find_all_inheritors's expectation that the caller
701 * holds some lock on the starting relation.
703 rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
704 relid = RangeVarGetRelidExtended(vrel->relation,
705 AccessShareLock,
706 rvr_opts,
707 NULL, NULL);
710 * If the lock is unavailable, emit the same log statement that
711 * vacuum_rel() and analyze_rel() would.
713 if (!OidIsValid(relid))
715 if (options & VACOPT_VACUUM)
716 ereport(WARNING,
717 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
718 errmsg("skipping vacuum of \"%s\" --- lock not available",
719 vrel->relation->relname)));
720 else
721 ereport(WARNING,
722 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
723 errmsg("skipping analyze of \"%s\" --- lock not available",
724 vrel->relation->relname)));
725 return vacrels;
729 * To check whether the relation is a partitioned table and its
730 * ownership, fetch its syscache entry.
732 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
733 if (!HeapTupleIsValid(tuple))
734 elog(ERROR, "cache lookup failed for relation %u", relid);
735 classForm = (Form_pg_class) GETSTRUCT(tuple);
738 * Make a returnable VacuumRelation for this rel if user is a proper
739 * owner.
741 if (vacuum_is_relation_owner(relid, classForm, options))
743 oldcontext = MemoryContextSwitchTo(vac_context);
744 vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
745 relid,
746 vrel->va_cols));
747 MemoryContextSwitchTo(oldcontext);
751 include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
752 ReleaseSysCache(tuple);
755 * If it is, make relation list entries for its partitions. Note that
756 * the list returned by find_all_inheritors() includes the passed-in
757 * OID, so we have to skip that. There's no point in taking locks on
758 * the individual partitions yet, and doing so would just add
759 * unnecessary deadlock risk. For this last reason we do not check
760 * yet the ownership of the partitions, which get added to the list to
761 * process. Ownership will be checked later on anyway.
763 if (include_parts)
765 List *part_oids = find_all_inheritors(relid, NoLock, NULL);
766 ListCell *part_lc;
768 foreach(part_lc, part_oids)
770 Oid part_oid = lfirst_oid(part_lc);
772 if (part_oid == relid)
773 continue; /* ignore original table */
776 * We omit a RangeVar since it wouldn't be appropriate to
777 * complain about failure to open one of these relations
778 * later.
780 oldcontext = MemoryContextSwitchTo(vac_context);
781 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
782 part_oid,
783 vrel->va_cols));
784 MemoryContextSwitchTo(oldcontext);
789 * Release lock again. This means that by the time we actually try to
790 * process the table, it might be gone or renamed. In the former case
791 * we'll silently ignore it; in the latter case we'll process it
792 * anyway, but we must beware that the RangeVar doesn't necessarily
793 * identify it anymore. This isn't ideal, perhaps, but there's little
794 * practical alternative, since we're typically going to commit this
795 * transaction and begin a new one between now and then. Moreover,
796 * holding locks on multiple relations would create significant risk
797 * of deadlock.
799 UnlockRelationOid(relid, AccessShareLock);
802 return vacrels;
806 * Construct a list of VacuumRelations for all vacuumable rels in
807 * the current database. The list is built in vac_context.
809 static List *
810 get_all_vacuum_rels(int options)
812 List *vacrels = NIL;
813 Relation pgclass;
814 TableScanDesc scan;
815 HeapTuple tuple;
817 pgclass = table_open(RelationRelationId, AccessShareLock);
819 scan = table_beginscan_catalog(pgclass, 0, NULL);
821 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
823 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
824 MemoryContext oldcontext;
825 Oid relid = classForm->oid;
827 /* check permissions of relation */
828 if (!vacuum_is_relation_owner(relid, classForm, options))
829 continue;
832 * We include partitioned tables here; depending on which operation is
833 * to be performed, caller will decide whether to process or ignore
834 * them.
836 if (classForm->relkind != RELKIND_RELATION &&
837 classForm->relkind != RELKIND_MATVIEW &&
838 classForm->relkind != RELKIND_PARTITIONED_TABLE)
839 continue;
842 * Build VacuumRelation(s) specifying the table OIDs to be processed.
843 * We omit a RangeVar since it wouldn't be appropriate to complain
844 * about failure to open one of these relations later.
846 oldcontext = MemoryContextSwitchTo(vac_context);
847 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
848 relid,
849 NIL));
850 MemoryContextSwitchTo(oldcontext);
853 table_endscan(scan);
854 table_close(pgclass, AccessShareLock);
856 return vacrels;
860 * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
862 * The output parameters are:
863 * - oldestXmin is the cutoff value used to distinguish whether tuples are
864 * DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
865 * - freezeLimit is the Xid below which all Xids are replaced by
866 * FrozenTransactionId during vacuum.
867 * - xidFullScanLimit (computed from table_freeze_age parameter)
868 * represents a minimum Xid value; a table whose relfrozenxid is older than
869 * this will have a full-table vacuum applied to it, to freeze tuples across
870 * the whole table. Vacuuming a table younger than this value can use a
871 * partial scan.
872 * - multiXactCutoff is the value below which all MultiXactIds are removed from
873 * Xmax.
874 * - mxactFullScanLimit is a value against which a table's relminmxid value is
875 * compared to produce a full-table vacuum, as with xidFullScanLimit.
877 * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
878 * not interested.
880 void
881 vacuum_set_xid_limits(Relation rel,
882 int freeze_min_age,
883 int freeze_table_age,
884 int multixact_freeze_min_age,
885 int multixact_freeze_table_age,
886 TransactionId *oldestXmin,
887 TransactionId *freezeLimit,
888 TransactionId *xidFullScanLimit,
889 MultiXactId *multiXactCutoff,
890 MultiXactId *mxactFullScanLimit)
892 int freezemin;
893 int mxid_freezemin;
894 int effective_multixact_freeze_max_age;
895 TransactionId limit;
896 TransactionId safeLimit;
897 MultiXactId oldestMxact;
898 MultiXactId mxactLimit;
899 MultiXactId safeMxactLimit;
902 * We can always ignore processes running lazy vacuum. This is because we
903 * use these values only for deciding which tuples we must keep in the
904 * tables. Since lazy vacuum doesn't write its XID anywhere, it's safe to
905 * ignore it. In theory it could be problematic to ignore lazy vacuums in
906 * a full vacuum, but keep in mind that only one vacuum process can be
907 * working on a particular table at any time, and that each vacuum is
908 * always an independent transaction.
910 *oldestXmin =
911 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
913 Assert(TransactionIdIsNormal(*oldestXmin));
916 * Determine the minimum freeze age to use: as specified by the caller, or
917 * vacuum_freeze_min_age, but in any case not more than half
918 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
919 * wraparound won't occur too frequently.
921 freezemin = freeze_min_age;
922 if (freezemin < 0)
923 freezemin = vacuum_freeze_min_age;
924 freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
925 Assert(freezemin >= 0);
928 * Compute the cutoff XID, being careful not to generate a "permanent" XID
930 limit = *oldestXmin - freezemin;
931 if (!TransactionIdIsNormal(limit))
932 limit = FirstNormalTransactionId;
935 * If oldestXmin is very far back (in practice, more than
936 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
937 * freeze age of zero.
939 safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
940 if (!TransactionIdIsNormal(safeLimit))
941 safeLimit = FirstNormalTransactionId;
943 if (TransactionIdPrecedes(limit, safeLimit))
945 ereport(WARNING,
946 (errmsg("oldest xmin is far in the past"),
947 errhint("Close open transactions soon to avoid wraparound problems.\n"
948 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
949 limit = *oldestXmin;
952 *freezeLimit = limit;
955 * Compute the multixact age for which freezing is urgent. This is
956 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
957 * short of multixact member space.
959 effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
962 * Determine the minimum multixact freeze age to use: as specified by
963 * caller, or vacuum_multixact_freeze_min_age, but in any case not more
964 * than half effective_multixact_freeze_max_age, so that autovacuums to
965 * prevent MultiXact wraparound won't occur too frequently.
967 mxid_freezemin = multixact_freeze_min_age;
968 if (mxid_freezemin < 0)
969 mxid_freezemin = vacuum_multixact_freeze_min_age;
970 mxid_freezemin = Min(mxid_freezemin,
971 effective_multixact_freeze_max_age / 2);
972 Assert(mxid_freezemin >= 0);
974 /* compute the cutoff multi, being careful to generate a valid value */
975 oldestMxact = GetOldestMultiXactId();
976 mxactLimit = oldestMxact - mxid_freezemin;
977 if (mxactLimit < FirstMultiXactId)
978 mxactLimit = FirstMultiXactId;
980 safeMxactLimit =
981 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
982 if (safeMxactLimit < FirstMultiXactId)
983 safeMxactLimit = FirstMultiXactId;
985 if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
987 ereport(WARNING,
988 (errmsg("oldest multixact is far in the past"),
989 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
990 /* Use the safe limit, unless an older mxact is still running */
991 if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
992 mxactLimit = oldestMxact;
993 else
994 mxactLimit = safeMxactLimit;
997 *multiXactCutoff = mxactLimit;
999 if (xidFullScanLimit != NULL)
1001 int freezetable;
1003 Assert(mxactFullScanLimit != NULL);
1006 * Determine the table freeze age to use: as specified by the caller,
1007 * or vacuum_freeze_table_age, but in any case not more than
1008 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1009 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
1010 * before anti-wraparound autovacuum is launched.
1012 freezetable = freeze_table_age;
1013 if (freezetable < 0)
1014 freezetable = vacuum_freeze_table_age;
1015 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
1016 Assert(freezetable >= 0);
1019 * Compute XID limit causing a full-table vacuum, being careful not to
1020 * generate a "permanent" XID.
1022 limit = ReadNewTransactionId() - freezetable;
1023 if (!TransactionIdIsNormal(limit))
1024 limit = FirstNormalTransactionId;
1026 *xidFullScanLimit = limit;
1029 * Similar to the above, determine the table freeze age to use for
1030 * multixacts: as specified by the caller, or
1031 * vacuum_multixact_freeze_table_age, but in any case not more than
1032 * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
1033 * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
1034 * freeze multixacts before anti-wraparound autovacuum is launched.
1036 freezetable = multixact_freeze_table_age;
1037 if (freezetable < 0)
1038 freezetable = vacuum_multixact_freeze_table_age;
1039 freezetable = Min(freezetable,
1040 effective_multixact_freeze_max_age * 0.95);
1041 Assert(freezetable >= 0);
1044 * Compute MultiXact limit causing a full-table vacuum, being careful
1045 * to generate a valid MultiXact value.
1047 mxactLimit = ReadNextMultiXactId() - freezetable;
1048 if (mxactLimit < FirstMultiXactId)
1049 mxactLimit = FirstMultiXactId;
1051 *mxactFullScanLimit = mxactLimit;
1053 else
1055 Assert(mxactFullScanLimit == NULL);
1060 * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1062 * If we scanned the whole relation then we should just use the count of
1063 * live tuples seen; but if we did not, we should not blindly extrapolate
1064 * from that number, since VACUUM may have scanned a quite nonrandom
1065 * subset of the table. When we have only partial information, we take
1066 * the old value of pg_class.reltuples as a measurement of the
1067 * tuple density in the unscanned pages.
1069 * Note: scanned_tuples should count only *live* tuples, since
1070 * pg_class.reltuples is defined that way.
1072 double
1073 vac_estimate_reltuples(Relation relation,
1074 BlockNumber total_pages,
1075 BlockNumber scanned_pages,
1076 double scanned_tuples)
1078 BlockNumber old_rel_pages = relation->rd_rel->relpages;
1079 double old_rel_tuples = relation->rd_rel->reltuples;
1080 double old_density;
1081 double unscanned_pages;
1082 double total_tuples;
1084 /* If we did scan the whole table, just use the count as-is */
1085 if (scanned_pages >= total_pages)
1086 return scanned_tuples;
1089 * If scanned_pages is zero but total_pages isn't, keep the existing value
1090 * of reltuples. (Note: callers should avoid updating the pg_class
1091 * statistics in this situation, since no new information has been
1092 * provided.)
1094 if (scanned_pages == 0)
1095 return old_rel_tuples;
1098 * If old value of relpages is zero, old density is indeterminate; we
1099 * can't do much except scale up scanned_tuples to match total_pages.
1101 if (old_rel_pages == 0)
1102 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1105 * Okay, we've covered the corner cases. The normal calculation is to
1106 * convert the old measurement to a density (tuples per page), then
1107 * estimate the number of tuples in the unscanned pages using that figure,
1108 * and finally add on the number of tuples in the scanned pages.
1110 old_density = old_rel_tuples / old_rel_pages;
1111 unscanned_pages = (double) total_pages - (double) scanned_pages;
1112 total_tuples = old_density * unscanned_pages + scanned_tuples;
1113 return floor(total_tuples + 0.5);
1118 * vac_update_relstats() -- update statistics for one relation
1120 * Update the whole-relation statistics that are kept in its pg_class
1121 * row. There are additional stats that will be updated if we are
1122 * doing ANALYZE, but we always update these stats. This routine works
1123 * for both index and heap relation entries in pg_class.
1125 * We violate transaction semantics here by overwriting the rel's
1126 * existing pg_class tuple with the new values. This is reasonably
1127 * safe as long as we're sure that the new values are correct whether or
1128 * not this transaction commits. The reason for doing this is that if
1129 * we updated these tuples in the usual way, vacuuming pg_class itself
1130 * wouldn't work very well --- by the time we got done with a vacuum
1131 * cycle, most of the tuples in pg_class would've been obsoleted. Of
1132 * course, this only works for fixed-size not-null columns, but these are.
1134 * Another reason for doing it this way is that when we are in a lazy
1135 * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1136 * Somebody vacuuming pg_class might think they could delete a tuple
1137 * marked with xmin = our xid.
1139 * In addition to fundamentally nontransactional statistics such as
1140 * relpages and relallvisible, we try to maintain certain lazily-updated
1141 * DDL flags such as relhasindex, by clearing them if no longer correct.
1142 * It's safe to do this in VACUUM, which can't run in parallel with
1143 * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1144 * However, it's *not* safe to do it in an ANALYZE that's within an
1145 * outer transaction, because for example the current transaction might
1146 * have dropped the last index; then we'd think relhasindex should be
1147 * cleared, but if the transaction later rolls back this would be wrong.
1148 * So we refrain from updating the DDL flags if we're inside an outer
1149 * transaction. This is OK since postponing the flag maintenance is
1150 * always allowable.
1152 * Note: num_tuples should count only *live* tuples, since
1153 * pg_class.reltuples is defined that way.
1155 * This routine is shared by VACUUM and ANALYZE.
1157 void
1158 vac_update_relstats(Relation relation,
1159 BlockNumber num_pages, double num_tuples,
1160 BlockNumber num_all_visible_pages,
1161 bool hasindex, TransactionId frozenxid,
1162 MultiXactId minmulti,
1163 bool in_outer_xact)
1165 Oid relid = RelationGetRelid(relation);
1166 Relation rd;
1167 HeapTuple ctup;
1168 Form_pg_class pgcform;
1169 bool dirty;
1171 rd = table_open(RelationRelationId, RowExclusiveLock);
1173 /* Fetch a copy of the tuple to scribble on */
1174 ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
1175 if (!HeapTupleIsValid(ctup))
1176 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1177 relid);
1178 pgcform = (Form_pg_class) GETSTRUCT(ctup);
1180 /* Apply statistical updates, if any, to copied tuple */
1182 dirty = false;
1183 if (pgcform->relpages != (int32) num_pages)
1185 pgcform->relpages = (int32) num_pages;
1186 dirty = true;
1188 if (pgcform->reltuples != (float4) num_tuples)
1190 pgcform->reltuples = (float4) num_tuples;
1191 dirty = true;
1193 if (pgcform->relallvisible != (int32) num_all_visible_pages)
1195 pgcform->relallvisible = (int32) num_all_visible_pages;
1196 dirty = true;
1199 /* Apply DDL updates, but not inside an outer transaction (see above) */
1201 if (!in_outer_xact)
1204 * If we didn't find any indexes, reset relhasindex.
1206 if (pgcform->relhasindex && !hasindex)
1208 pgcform->relhasindex = false;
1209 dirty = true;
1212 /* We also clear relhasrules and relhastriggers if needed */
1213 if (pgcform->relhasrules && relation->rd_rules == NULL)
1215 pgcform->relhasrules = false;
1216 dirty = true;
1218 if (pgcform->relhastriggers && relation->trigdesc == NULL)
1220 pgcform->relhastriggers = false;
1221 dirty = true;
1226 * Update relfrozenxid, unless caller passed InvalidTransactionId
1227 * indicating it has no new data.
1229 * Ordinarily, we don't let relfrozenxid go backwards: if things are
1230 * working correctly, the only way the new frozenxid could be older would
1231 * be if a previous VACUUM was done with a tighter freeze_min_age, in
1232 * which case we don't want to forget the work it already did. However,
1233 * if the stored relfrozenxid is "in the future", then it must be corrupt
1234 * and it seems best to overwrite it with the cutoff we used this time.
1235 * This should match vac_update_datfrozenxid() concerning what we consider
1236 * to be "in the future".
1238 if (TransactionIdIsNormal(frozenxid) &&
1239 pgcform->relfrozenxid != frozenxid &&
1240 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
1241 TransactionIdPrecedes(ReadNewTransactionId(),
1242 pgcform->relfrozenxid)))
1244 pgcform->relfrozenxid = frozenxid;
1245 dirty = true;
1248 /* Similarly for relminmxid */
1249 if (MultiXactIdIsValid(minmulti) &&
1250 pgcform->relminmxid != minmulti &&
1251 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
1252 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
1254 pgcform->relminmxid = minmulti;
1255 dirty = true;
1258 /* If anything changed, write out the tuple. */
1259 if (dirty)
1260 heap_inplace_update(rd, ctup);
1262 table_close(rd, RowExclusiveLock);
1267 * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1269 * Update pg_database's datfrozenxid entry for our database to be the
1270 * minimum of the pg_class.relfrozenxid values.
1272 * Similarly, update our datminmxid to be the minimum of the
1273 * pg_class.relminmxid values.
1275 * If we are able to advance either pg_database value, also try to
1276 * truncate pg_xact and pg_multixact.
1278 * We violate transaction semantics here by overwriting the database's
1279 * existing pg_database tuple with the new values. This is reasonably
1280 * safe since the new values are correct whether or not this transaction
1281 * commits. As with vac_update_relstats, this avoids leaving dead tuples
1282 * behind after a VACUUM.
1284 void
1285 vac_update_datfrozenxid(void)
1287 HeapTuple tuple;
1288 Form_pg_database dbform;
1289 Relation relation;
1290 SysScanDesc scan;
1291 HeapTuple classTup;
1292 TransactionId newFrozenXid;
1293 MultiXactId newMinMulti;
1294 TransactionId lastSaneFrozenXid;
1295 MultiXactId lastSaneMinMulti;
1296 bool bogus = false;
1297 bool dirty = false;
1298 ScanKeyData key[1];
1301 * Restrict this task to one backend per database. This avoids race
1302 * conditions that would move datfrozenxid or datminmxid backward. It
1303 * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1304 * datfrozenxid passed to an earlier vac_truncate_clog() call.
1306 LockDatabaseFrozenIds(ExclusiveLock);
1309 * Initialize the "min" calculation with GetOldestXmin, which is a
1310 * reasonable approximation to the minimum relfrozenxid for not-yet-
1311 * committed pg_class entries for new tables; see AddNewRelationTuple().
1312 * So we cannot produce a wrong minimum by starting with this.
1314 newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
1317 * Similarly, initialize the MultiXact "min" with the value that would be
1318 * used on pg_class for new tables. See AddNewRelationTuple().
1320 newMinMulti = GetOldestMultiXactId();
1323 * Identify the latest relfrozenxid and relminmxid values that we could
1324 * validly see during the scan. These are conservative values, but it's
1325 * not really worth trying to be more exact.
1327 lastSaneFrozenXid = ReadNewTransactionId();
1328 lastSaneMinMulti = ReadNextMultiXactId();
1331 * We must seqscan pg_class to find the minimum Xid, because there is no
1332 * index that can help us here.
1334 * See vac_truncate_clog() for the race condition to prevent.
1336 relation = table_open(RelationRelationId, AccessShareLock);
1338 scan = systable_beginscan(relation, InvalidOid, false,
1339 NULL, 0, NULL);
1341 while ((classTup = systable_getnext(scan)) != NULL)
1343 volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
1344 TransactionId relfrozenxid = classForm->relfrozenxid;
1345 TransactionId relminmxid = classForm->relminmxid;
1348 * Only consider relations able to hold unfrozen XIDs (anything else
1349 * should have InvalidTransactionId in relfrozenxid anyway).
1351 if (classForm->relkind != RELKIND_RELATION &&
1352 classForm->relkind != RELKIND_MATVIEW &&
1353 classForm->relkind != RELKIND_TOASTVALUE)
1355 Assert(!TransactionIdIsValid(relfrozenxid));
1356 Assert(!MultiXactIdIsValid(relminmxid));
1357 continue;
1361 * Some table AMs might not need per-relation xid / multixid horizons.
1362 * It therefore seems reasonable to allow relfrozenxid and relminmxid
1363 * to not be set (i.e. set to their respective Invalid*Id)
1364 * independently. Thus validate and compute horizon for each only if
1365 * set.
1367 * If things are working properly, no relation should have a
1368 * relfrozenxid or relminmxid that is "in the future". However, such
1369 * cases have been known to arise due to bugs in pg_upgrade. If we
1370 * see any entries that are "in the future", chicken out and don't do
1371 * anything. This ensures we won't truncate clog & multixact SLRUs
1372 * before those relations have been scanned and cleaned up.
1375 if (TransactionIdIsValid(relfrozenxid))
1377 Assert(TransactionIdIsNormal(relfrozenxid));
1379 /* check for values in the future */
1380 if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1382 bogus = true;
1383 break;
1386 /* determine new horizon */
1387 if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1388 newFrozenXid = relfrozenxid;
1391 if (MultiXactIdIsValid(relminmxid))
1393 /* check for values in the future */
1394 if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1396 bogus = true;
1397 break;
1400 /* determine new horizon */
1401 if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1402 newMinMulti = relminmxid;
1406 /* we're done with pg_class */
1407 systable_endscan(scan);
1408 table_close(relation, AccessShareLock);
1410 /* chicken out if bogus data found */
1411 if (bogus)
1412 return;
1414 Assert(TransactionIdIsNormal(newFrozenXid));
1415 Assert(MultiXactIdIsValid(newMinMulti));
1417 /* Now fetch the pg_database tuple we need to update. */
1418 relation = table_open(DatabaseRelationId, RowExclusiveLock);
1421 * Get the pg_database tuple to scribble on. Note that this does not
1422 * directly rely on the syscache to avoid issues with flattened toast
1423 * values for the in-place update.
1425 ScanKeyInit(&key[0],
1426 Anum_pg_database_oid,
1427 BTEqualStrategyNumber, F_OIDEQ,
1428 ObjectIdGetDatum(MyDatabaseId));
1430 scan = systable_beginscan(relation, DatabaseOidIndexId, true,
1431 NULL, 1, key);
1432 tuple = systable_getnext(scan);
1433 tuple = heap_copytuple(tuple);
1434 systable_endscan(scan);
1436 if (!HeapTupleIsValid(tuple))
1437 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1439 dbform = (Form_pg_database) GETSTRUCT(tuple);
1442 * As in vac_update_relstats(), we ordinarily don't want to let
1443 * datfrozenxid go backward; but if it's "in the future" then it must be
1444 * corrupt and it seems best to overwrite it.
1446 if (dbform->datfrozenxid != newFrozenXid &&
1447 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1448 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1450 dbform->datfrozenxid = newFrozenXid;
1451 dirty = true;
1453 else
1454 newFrozenXid = dbform->datfrozenxid;
1456 /* Ditto for datminmxid */
1457 if (dbform->datminmxid != newMinMulti &&
1458 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1459 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1461 dbform->datminmxid = newMinMulti;
1462 dirty = true;
1464 else
1465 newMinMulti = dbform->datminmxid;
1467 if (dirty)
1468 heap_inplace_update(relation, tuple);
1470 heap_freetuple(tuple);
1471 table_close(relation, RowExclusiveLock);
1474 * If we were able to advance datfrozenxid or datminmxid, see if we can
1475 * truncate pg_xact and/or pg_multixact. Also do it if the shared
1476 * XID-wrap-limit info is stale, since this action will update that too.
1478 if (dirty || ForceTransactionIdLimitUpdate())
1479 vac_truncate_clog(newFrozenXid, newMinMulti,
1480 lastSaneFrozenXid, lastSaneMinMulti);
1485 * vac_truncate_clog() -- attempt to truncate the commit log
1487 * Scan pg_database to determine the system-wide oldest datfrozenxid,
1488 * and use it to truncate the transaction commit log (pg_xact).
1489 * Also update the XID wrap limit info maintained by varsup.c.
1490 * Likewise for datminmxid.
1492 * The passed frozenXID and minMulti are the updated values for my own
1493 * pg_database entry. They're used to initialize the "min" calculations.
1494 * The caller also passes the "last sane" XID and MXID, since it has
1495 * those at hand already.
1497 * This routine is only invoked when we've managed to change our
1498 * DB's datfrozenxid/datminmxid values, or we found that the shared
1499 * XID-wrap-limit info is stale.
1501 static void
1502 vac_truncate_clog(TransactionId frozenXID,
1503 MultiXactId minMulti,
1504 TransactionId lastSaneFrozenXid,
1505 MultiXactId lastSaneMinMulti)
1507 TransactionId nextXID = ReadNewTransactionId();
1508 Relation relation;
1509 TableScanDesc scan;
1510 HeapTuple tuple;
1511 Oid oldestxid_datoid;
1512 Oid minmulti_datoid;
1513 bool bogus = false;
1514 bool frozenAlreadyWrapped = false;
1516 /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1517 LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1519 /* init oldest datoids to sync with my frozenXID/minMulti values */
1520 oldestxid_datoid = MyDatabaseId;
1521 minmulti_datoid = MyDatabaseId;
1524 * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1526 * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1527 * the values could change while we look at them. Fetch each one just
1528 * once to ensure sane behavior of the comparison logic. (Here, as in
1529 * many other places, we assume that fetching or updating an XID in shared
1530 * storage is atomic.)
1532 * Note: we need not worry about a race condition with new entries being
1533 * inserted by CREATE DATABASE. Any such entry will have a copy of some
1534 * existing DB's datfrozenxid, and that source DB cannot be ours because
1535 * of the interlock against copying a DB containing an active backend.
1536 * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1537 * concurrently modify the datfrozenxid's of different databases, the
1538 * worst possible outcome is that pg_xact is not truncated as aggressively
1539 * as it could be.
1541 relation = table_open(DatabaseRelationId, AccessShareLock);
1543 scan = table_beginscan_catalog(relation, 0, NULL);
1545 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1547 volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1548 TransactionId datfrozenxid = dbform->datfrozenxid;
1549 TransactionId datminmxid = dbform->datminmxid;
1551 Assert(TransactionIdIsNormal(datfrozenxid));
1552 Assert(MultiXactIdIsValid(datminmxid));
1555 * If database is in the process of getting dropped, or has been
1556 * interrupted while doing so, no connections to it are possible
1557 * anymore. Therefore we don't need to take it into account here.
1558 * Which is good, because it can't be processed by autovacuum either.
1560 if (database_is_invalid_form((Form_pg_database) dbform))
1562 elog(DEBUG2,
1563 "skipping invalid database \"%s\" while computing relfrozenxid",
1564 NameStr(dbform->datname));
1565 continue;
1569 * If things are working properly, no database should have a
1570 * datfrozenxid or datminmxid that is "in the future". However, such
1571 * cases have been known to arise due to bugs in pg_upgrade. If we
1572 * see any entries that are "in the future", chicken out and don't do
1573 * anything. This ensures we won't truncate clog before those
1574 * databases have been scanned and cleaned up. (We will issue the
1575 * "already wrapped" warning if appropriate, though.)
1577 if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1578 MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1579 bogus = true;
1581 if (TransactionIdPrecedes(nextXID, datfrozenxid))
1582 frozenAlreadyWrapped = true;
1583 else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1585 frozenXID = datfrozenxid;
1586 oldestxid_datoid = dbform->oid;
1589 if (MultiXactIdPrecedes(datminmxid, minMulti))
1591 minMulti = datminmxid;
1592 minmulti_datoid = dbform->oid;
1596 table_endscan(scan);
1598 table_close(relation, AccessShareLock);
1601 * Do not truncate CLOG if we seem to have suffered wraparound already;
1602 * the computed minimum XID might be bogus. This case should now be
1603 * impossible due to the defenses in GetNewTransactionId, but we keep the
1604 * test anyway.
1606 if (frozenAlreadyWrapped)
1608 ereport(WARNING,
1609 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1610 errdetail("You might have already suffered transaction-wraparound data loss.")));
1611 LWLockRelease(WrapLimitsVacuumLock);
1612 return;
1615 /* chicken out if data is bogus in any other way */
1616 if (bogus)
1618 LWLockRelease(WrapLimitsVacuumLock);
1619 return;
1623 * Advance the oldest value for commit timestamps before truncating, so
1624 * that if a user requests a timestamp for a transaction we're truncating
1625 * away right after this point, they get NULL instead of an ugly "file not
1626 * found" error from slru.c. This doesn't matter for xact/multixact
1627 * because they are not subject to arbitrary lookups from users.
1629 AdvanceOldestCommitTsXid(frozenXID);
1632 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1634 TruncateCLOG(frozenXID, oldestxid_datoid);
1635 TruncateCommitTs(frozenXID);
1636 TruncateMultiXact(minMulti, minmulti_datoid);
1639 * Update the wrap limit for GetNewTransactionId and creation of new
1640 * MultiXactIds. Note: these functions will also signal the postmaster
1641 * for an(other) autovac cycle if needed. XXX should we avoid possibly
1642 * signalling twice?
1644 SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1645 SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1647 LWLockRelease(WrapLimitsVacuumLock);
1652 * vacuum_rel() -- vacuum one heap relation
1654 * relid identifies the relation to vacuum. If relation is supplied,
1655 * use the name therein for reporting any failure to open/lock the rel;
1656 * do not use it once we've successfully opened the rel, since it might
1657 * be stale.
1659 * Returns true if it's okay to proceed with a requested ANALYZE
1660 * operation on this table.
1662 * Doing one heap at a time incurs extra overhead, since we need to
1663 * check that the heap exists again just before we vacuum it. The
1664 * reason that we do this is so that vacuuming can be spread across
1665 * many small transactions. Otherwise, two-phase locking would require
1666 * us to lock the entire database during one pass of the vacuum cleaner.
1668 * At entry and exit, we are not inside a transaction.
1670 static bool
1671 vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
1673 LOCKMODE lmode;
1674 Relation onerel;
1675 LockRelId onerelid;
1676 Oid toast_relid;
1677 Oid save_userid;
1678 int save_sec_context;
1679 int save_nestlevel;
1681 Assert(params != NULL);
1683 /* Begin a transaction for vacuuming this relation */
1684 StartTransactionCommand();
1687 * Functions in indexes may want a snapshot set. Also, setting a snapshot
1688 * ensures that RecentGlobalXmin is kept truly recent.
1690 PushActiveSnapshot(GetTransactionSnapshot());
1692 if (!(params->options & VACOPT_FULL))
1695 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1696 * other concurrent VACUUMs know that they can ignore this one while
1697 * determining their OldestXmin. (The reason we don't set it during a
1698 * full VACUUM is exactly that we may have to run user-defined
1699 * functions for functional indexes, and we want to make sure that if
1700 * they use the snapshot set above, any tuples it requires can't get
1701 * removed from other tables. An index function that depends on the
1702 * contents of other tables is arguably broken, but we won't break it
1703 * here by violating transaction semantics.)
1705 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1706 * autovacuum; it's used to avoid canceling a vacuum that was invoked
1707 * in an emergency.
1709 * Note: these flags remain set until CommitTransaction or
1710 * AbortTransaction. We don't want to clear them until we reset
1711 * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1712 * which is probably Not Good.
1714 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1715 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1716 if (params->is_wraparound)
1717 MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1718 LWLockRelease(ProcArrayLock);
1722 * Check for user-requested abort. Note we want this to be inside a
1723 * transaction, so xact.c doesn't issue useless WARNING.
1725 CHECK_FOR_INTERRUPTS();
1728 * Determine the type of lock we want --- hard exclusive lock for a FULL
1729 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1730 * way, we can be sure that no other backend is vacuuming the same table.
1732 lmode = (params->options & VACOPT_FULL) ?
1733 AccessExclusiveLock : ShareUpdateExclusiveLock;
1735 /* open the relation and get the appropriate lock on it */
1736 onerel = vacuum_open_relation(relid, relation, params->options,
1737 params->log_min_duration >= 0, lmode);
1739 /* leave if relation could not be opened or locked */
1740 if (!onerel)
1742 PopActiveSnapshot();
1743 CommitTransactionCommand();
1744 return false;
1748 * Check if relation needs to be skipped based on ownership. This check
1749 * happens also when building the relation list to vacuum for a manual
1750 * operation, and needs to be done additionally here as VACUUM could
1751 * happen across multiple transactions where relation ownership could have
1752 * changed in-between. Make sure to only generate logs for VACUUM in this
1753 * case.
1755 if (!vacuum_is_relation_owner(RelationGetRelid(onerel),
1756 onerel->rd_rel,
1757 params->options & VACOPT_VACUUM))
1759 relation_close(onerel, lmode);
1760 PopActiveSnapshot();
1761 CommitTransactionCommand();
1762 return false;
1766 * Check that it's of a vacuumable relkind.
1768 if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1769 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1770 onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1771 onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1773 ereport(WARNING,
1774 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1775 RelationGetRelationName(onerel))));
1776 relation_close(onerel, lmode);
1777 PopActiveSnapshot();
1778 CommitTransactionCommand();
1779 return false;
1783 * Silently ignore tables that are temp tables of other backends ---
1784 * trying to vacuum these will lead to great unhappiness, since their
1785 * contents are probably not up-to-date on disk. (We don't throw a
1786 * warning here; it would just lead to chatter during a database-wide
1787 * VACUUM.)
1789 if (RELATION_IS_OTHER_TEMP(onerel))
1791 relation_close(onerel, lmode);
1792 PopActiveSnapshot();
1793 CommitTransactionCommand();
1794 return false;
1798 * Silently ignore partitioned tables as there is no work to be done. The
1799 * useful work is on their child partitions, which have been queued up for
1800 * us separately.
1802 if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1804 relation_close(onerel, lmode);
1805 PopActiveSnapshot();
1806 CommitTransactionCommand();
1807 /* It's OK to proceed with ANALYZE on this table */
1808 return true;
1812 * Get a session-level lock too. This will protect our access to the
1813 * relation across multiple transactions, so that we can vacuum the
1814 * relation's TOAST table (if any) secure in the knowledge that no one is
1815 * deleting the parent relation.
1817 * NOTE: this cannot block, even if someone else is waiting for access,
1818 * because the lock manager knows that both lock requests are from the
1819 * same process.
1821 onerelid = onerel->rd_lockInfo.lockRelId;
1822 LockRelationIdForSession(&onerelid, lmode);
1824 /* Set index cleanup option based on reloptions if not yet */
1825 if (params->index_cleanup == VACOPT_TERNARY_DEFAULT)
1827 if (onerel->rd_options == NULL ||
1828 ((StdRdOptions *) onerel->rd_options)->vacuum_index_cleanup)
1829 params->index_cleanup = VACOPT_TERNARY_ENABLED;
1830 else
1831 params->index_cleanup = VACOPT_TERNARY_DISABLED;
1834 /* Set truncate option based on reloptions if not yet */
1835 if (params->truncate == VACOPT_TERNARY_DEFAULT)
1837 if (onerel->rd_options == NULL ||
1838 ((StdRdOptions *) onerel->rd_options)->vacuum_truncate)
1839 params->truncate = VACOPT_TERNARY_ENABLED;
1840 else
1841 params->truncate = VACOPT_TERNARY_DISABLED;
1845 * Remember the relation's TOAST relation for later, if the caller asked
1846 * us to process it. In VACUUM FULL, though, the toast table is
1847 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1849 if (!(params->options & VACOPT_SKIPTOAST) && !(params->options & VACOPT_FULL))
1850 toast_relid = onerel->rd_rel->reltoastrelid;
1851 else
1852 toast_relid = InvalidOid;
1855 * Switch to the table owner's userid, so that any index functions are run
1856 * as that user. Also lock down security-restricted operations and
1857 * arrange to make GUC variable changes local to this command. (This is
1858 * unnecessary, but harmless, for lazy VACUUM.)
1860 GetUserIdAndSecContext(&save_userid, &save_sec_context);
1861 SetUserIdAndSecContext(onerel->rd_rel->relowner,
1862 save_sec_context | SECURITY_RESTRICTED_OPERATION);
1863 save_nestlevel = NewGUCNestLevel();
1866 * Do the actual work --- either FULL or "lazy" vacuum
1868 if (params->options & VACOPT_FULL)
1870 int cluster_options = 0;
1872 /* close relation before vacuuming, but hold lock until commit */
1873 relation_close(onerel, NoLock);
1874 onerel = NULL;
1876 if ((params->options & VACOPT_VERBOSE) != 0)
1877 cluster_options |= CLUOPT_VERBOSE;
1879 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1880 cluster_rel(relid, InvalidOid, cluster_options);
1882 else
1883 table_relation_vacuum(onerel, params, vac_strategy);
1885 /* Roll back any GUC changes executed by index functions */
1886 AtEOXact_GUC(false, save_nestlevel);
1888 /* Restore userid and security context */
1889 SetUserIdAndSecContext(save_userid, save_sec_context);
1891 /* all done with this class, but hold lock until commit */
1892 if (onerel)
1893 relation_close(onerel, NoLock);
1896 * Complete the transaction and free all temporary memory used.
1898 PopActiveSnapshot();
1899 CommitTransactionCommand();
1902 * If the relation has a secondary toast rel, vacuum that too while we
1903 * still hold the session lock on the master table. Note however that
1904 * "analyze" will not get done on the toast table. This is good, because
1905 * the toaster always uses hardcoded index access and statistics are
1906 * totally unimportant for toast relations.
1908 if (toast_relid != InvalidOid)
1909 vacuum_rel(toast_relid, NULL, params);
1912 * Now release the session-level lock on the master table.
1914 UnlockRelationIdForSession(&onerelid, lmode);
1916 /* Report that we really did it. */
1917 return true;
1922 * Open all the vacuumable indexes of the given relation, obtaining the
1923 * specified kind of lock on each. Return an array of Relation pointers for
1924 * the indexes into *Irel, and the number of indexes into *nindexes.
1926 * We consider an index vacuumable if it is marked insertable (indisready).
1927 * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1928 * execution, and what we have is too corrupt to be processable. We will
1929 * vacuum even if the index isn't indisvalid; this is important because in a
1930 * unique index, uniqueness checks will be performed anyway and had better not
1931 * hit dangling index pointers.
1933 void
1934 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1935 int *nindexes, Relation **Irel)
1937 List *indexoidlist;
1938 ListCell *indexoidscan;
1939 int i;
1941 Assert(lockmode != NoLock);
1943 indexoidlist = RelationGetIndexList(relation);
1945 /* allocate enough memory for all indexes */
1946 i = list_length(indexoidlist);
1948 if (i > 0)
1949 *Irel = (Relation *) palloc(i * sizeof(Relation));
1950 else
1951 *Irel = NULL;
1953 /* collect just the ready indexes */
1954 i = 0;
1955 foreach(indexoidscan, indexoidlist)
1957 Oid indexoid = lfirst_oid(indexoidscan);
1958 Relation indrel;
1960 indrel = index_open(indexoid, lockmode);
1961 if (indrel->rd_index->indisready)
1962 (*Irel)[i++] = indrel;
1963 else
1964 index_close(indrel, lockmode);
1967 *nindexes = i;
1969 list_free(indexoidlist);
1973 * Release the resources acquired by vac_open_indexes. Optionally release
1974 * the locks (say NoLock to keep 'em).
1976 void
1977 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1979 if (Irel == NULL)
1980 return;
1982 while (nindexes--)
1984 Relation ind = Irel[nindexes];
1986 index_close(ind, lockmode);
1988 pfree(Irel);
1992 * vacuum_delay_point --- check for interrupts and cost-based delay.
1994 * This should be called in each major loop of VACUUM processing,
1995 * typically once per page processed.
1997 void
1998 vacuum_delay_point(void)
2000 /* Always check for interrupts */
2001 CHECK_FOR_INTERRUPTS();
2003 /* Nap if appropriate */
2004 if (VacuumCostActive && !InterruptPending &&
2005 VacuumCostBalance >= VacuumCostLimit)
2007 double msec;
2009 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
2010 if (msec > VacuumCostDelay * 4)
2011 msec = VacuumCostDelay * 4;
2013 pg_usleep((long) (msec * 1000));
2015 VacuumCostBalance = 0;
2017 /* update balance values for workers */
2018 AutoVacuumUpdateDelay();
2020 /* Might have gotten an interrupt while sleeping */
2021 CHECK_FOR_INTERRUPTS();
2026 * A wrapper function of defGetBoolean().
2028 * This function returns VACOPT_TERNARY_ENABLED and VACOPT_TERNARY_DISABLED
2029 * instead of true and false.
2031 static VacOptTernaryValue
2032 get_vacopt_ternary_value(DefElem *def)
2034 return defGetBoolean(def) ? VACOPT_TERNARY_ENABLED : VACOPT_TERNARY_DISABLED;