1 /*-------------------------------------------------------------------------
5 * PostgreSQL Integrated Autovacuum Daemon
7 * The autovacuum system is structured in two different kinds of processes: the
8 * autovacuum launcher and the autovacuum worker. The launcher is an
9 * always-running process, started by the postmaster when the autovacuum GUC
10 * parameter is set. The launcher schedules autovacuum workers to be started
11 * when appropriate. The workers are the processes which execute the actual
12 * vacuuming; they connect to a database as determined in the launcher, and
13 * once connected they examine the catalogs to select the tables to vacuum.
15 * The autovacuum launcher cannot start the worker processes by itself,
16 * because doing so would cause robustness issues (namely, failure to shut
17 * them down on exceptional conditions, and also, since the launcher is
18 * connected to shared memory and is thus subject to corruption there, it is
19 * not as robust as the postmaster). So it leaves that task to the postmaster.
21 * There is an autovacuum shared memory area, where the launcher stores
22 * information about the database it wants vacuumed. When it wants a new
23 * worker to start, it sets a flag in shared memory and sends a signal to the
24 * postmaster. Then postmaster knows nothing more than it must start a worker;
25 * so it forks a new child, which turns into a worker. This new process
26 * connects to shared memory, and there it can inspect the information that the
27 * launcher has set up.
29 * If the fork() call fails in the postmaster, it sets a flag in the shared
30 * memory area, and sends a signal to the launcher. The launcher, upon
31 * noticing the flag, can try starting the worker again by resending the
32 * signal. Note that the failure can only be transient (fork failure due to
33 * high load, memory pressure, too many processes, etc); more permanent
34 * problems, like failure to connect to a database, are detected later in the
35 * worker and dealt with just by having the worker exit normally. The launcher
36 * will launch a new worker again later, per schedule.
38 * When the worker is done vacuuming it sends SIGUSR1 to the launcher. The
39 * launcher then wakes up and is able to launch another worker, if the schedule
40 * is so tight that a new worker is needed immediately. At this time the
41 * launcher can also balance the settings for the various remaining workers'
42 * cost-based vacuum delay feature.
44 * Note that there can be more than one worker in a database concurrently.
45 * They will store the table they are currently vacuuming in shared memory, so
46 * that other workers avoid being blocked waiting for the vacuum lock for that
47 * table. They will also reload the pgstats data just before vacuuming each
48 * table, to avoid vacuuming a table that was just finished being vacuumed by
49 * another worker and thus is no longer noted in shared memory. However,
50 * there is a window (caused by pgstat delay) on which a worker may choose a
51 * table that was already vacuumed; this is a bug in the current design.
53 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
54 * Portions Copyright (c) 1994, Regents of the University of California
60 *-------------------------------------------------------------------------
65 #include <sys/types.h>
70 #include "access/genam.h"
71 #include "access/heapam.h"
72 #include "access/transam.h"
73 #include "access/xact.h"
74 #include "catalog/indexing.h"
75 #include "catalog/namespace.h"
76 #include "catalog/pg_autovacuum.h"
77 #include "catalog/pg_database.h"
78 #include "commands/dbcommands.h"
79 #include "commands/vacuum.h"
80 #include "libpq/hba.h"
81 #include "libpq/pqsignal.h"
82 #include "miscadmin.h"
84 #include "postmaster/autovacuum.h"
85 #include "postmaster/fork_process.h"
86 #include "postmaster/postmaster.h"
87 #include "storage/fd.h"
88 #include "storage/ipc.h"
89 #include "storage/pmsignal.h"
90 #include "storage/proc.h"
91 #include "storage/procarray.h"
92 #include "storage/sinval.h"
93 #include "tcop/tcopprot.h"
94 #include "utils/flatfiles.h"
95 #include "utils/fmgroids.h"
96 #include "utils/lsyscache.h"
97 #include "utils/memutils.h"
98 #include "utils/ps_status.h"
99 #include "utils/syscache.h"
105 bool autovacuum_start_daemon
= false;
106 int autovacuum_max_workers
;
107 int autovacuum_naptime
;
108 int autovacuum_vac_thresh
;
109 double autovacuum_vac_scale
;
110 int autovacuum_anl_thresh
;
111 double autovacuum_anl_scale
;
112 int autovacuum_freeze_max_age
;
114 int autovacuum_vac_cost_delay
;
115 int autovacuum_vac_cost_limit
;
117 int Log_autovacuum_min_duration
= -1;
119 /* how long to keep pgstat data in the launcher, in milliseconds */
120 #define STATS_READ_DELAY 1000
123 /* Flags to tell if we are in an autovacuum process */
124 static bool am_autovacuum_launcher
= false;
125 static bool am_autovacuum_worker
= false;
127 /* Flags set by signal handlers */
128 static volatile sig_atomic_t got_SIGHUP
= false;
129 static volatile sig_atomic_t got_SIGUSR1
= false;
130 static volatile sig_atomic_t got_SIGTERM
= false;
132 /* Comparison point for determining whether freeze_max_age is exceeded */
133 static TransactionId recentXid
;
135 /* Default freeze_min_age to use for autovacuum (varies by database) */
136 static int default_freeze_min_age
;
138 /* Memory context for long-lived data */
139 static MemoryContext AutovacMemCxt
;
141 /* struct to keep track of databases in launcher */
142 typedef struct avl_dbase
144 Oid adl_datid
; /* hash key -- must be first */
145 TimestampTz adl_next_worker
;
149 /* struct to keep track of databases in worker */
150 typedef struct avw_dbase
154 TransactionId adw_frozenxid
;
155 PgStat_StatDBEntry
*adw_entry
;
158 /* struct to keep track of tables to vacuum and/or analyze, in 1st pass */
159 typedef struct av_relation
165 /* struct to keep track of tables to vacuum and/or analyze, after rechecking */
166 typedef struct autovac_table
172 int at_freeze_min_age
;
173 int at_vacuum_cost_delay
;
174 int at_vacuum_cost_limit
;
178 * This struct holds information about a single worker's whereabouts. We keep
179 * an array of these in shared memory, sized according to
180 * autovacuum_max_workers.
182 * wi_links entry into free list or running list
183 * wi_dboid OID of the database this worker is supposed to work on
184 * wi_tableoid OID of the table currently being vacuumed
185 * wi_workerpid PID of the running worker, 0 if not yet started
186 * wi_launchtime Time at which this worker was launched
187 * wi_cost_* Vacuum cost-based delay parameters current in this worker
189 * All fields are protected by AutovacuumLock, except for wi_tableoid which is
190 * protected by AutovacuumScheduleLock (which is read-only for everyone except
191 * that worker itself).
194 typedef struct WorkerInfoData
200 TimestampTz wi_launchtime
;
203 int wi_cost_limit_base
;
206 typedef struct WorkerInfoData
*WorkerInfo
;
209 * Possible signals received by the launcher from remote processes. These are
210 * stored atomically in shared memory so that other processes can set them
215 AutoVacForkFailed
, /* failed trying to start a worker */
216 AutoVacRebalance
, /* rebalance the cost limits */
217 AutoVacNumSignals
= AutoVacRebalance
/* must be last */
221 * The main autovacuum shmem struct. On shared memory we store this main
222 * struct and the array of WorkerInfo structs. This struct keeps:
224 * av_signal set by other processes to indicate various conditions
225 * av_launcherpid the PID of the autovacuum launcher
226 * av_freeWorkers the WorkerInfo freelist
227 * av_runningWorkers the WorkerInfo non-free queue
228 * av_startingWorker pointer to WorkerInfo currently being started (cleared by
229 * the worker itself as soon as it's up and running)
231 * This struct is protected by AutovacuumLock, except for av_signal and parts
232 * of the worker list (see above).
237 sig_atomic_t av_signal
[AutoVacNumSignals
];
238 pid_t av_launcherpid
;
239 SHMEM_OFFSET av_freeWorkers
;
240 SHM_QUEUE av_runningWorkers
;
241 SHMEM_OFFSET av_startingWorker
;
242 } AutoVacuumShmemStruct
;
244 static AutoVacuumShmemStruct
*AutoVacuumShmem
;
246 /* the database list in the launcher, and the context that contains it */
247 static Dllist
*DatabaseList
= NULL
;
248 static MemoryContext DatabaseListCxt
= NULL
;
250 /* Pointer to my own WorkerInfo, valid on each worker */
251 static WorkerInfo MyWorkerInfo
= NULL
;
253 /* PID of launcher, valid only in worker while shutting down */
254 int AutovacuumLauncherPid
= 0;
257 static pid_t
avlauncher_forkexec(void);
258 static pid_t
avworker_forkexec(void);
260 NON_EXEC_STATIC
void AutoVacWorkerMain(int argc
, char *argv
[]);
261 NON_EXEC_STATIC
void AutoVacLauncherMain(int argc
, char *argv
[]);
263 static Oid
do_start_worker(void);
264 static void launcher_determine_sleep(bool canlaunch
, bool recursing
,
265 struct timeval
*nap
);
266 static void launch_worker(TimestampTz now
);
267 static List
*get_database_list(void);
268 static void rebuild_database_list(Oid newdb
);
269 static int db_comparator(const void *a
, const void *b
);
270 static void autovac_balance_cost(void);
272 static void do_autovacuum(void);
273 static void FreeWorkerInfo(int code
, Datum arg
);
275 static void relation_check_autovac(Oid relid
, Form_pg_class classForm
,
276 Form_pg_autovacuum avForm
, PgStat_StatTabEntry
*tabentry
,
277 List
**table_oids
, List
**table_toast_list
,
279 static autovac_table
*table_recheck_autovac(Oid relid
);
280 static void relation_needs_vacanalyze(Oid relid
, Form_pg_autovacuum avForm
,
281 Form_pg_class classForm
,
282 PgStat_StatTabEntry
*tabentry
, bool *dovacuum
,
285 static void autovacuum_do_vac_analyze(Oid relid
, bool dovacuum
,
286 bool doanalyze
, int freeze_min_age
,
287 BufferAccessStrategy bstrategy
);
288 static HeapTuple
get_pg_autovacuum_tuple_relid(Relation avRel
, Oid relid
);
289 static PgStat_StatTabEntry
*get_pgstat_tabentry_relid(Oid relid
, bool isshared
,
290 PgStat_StatDBEntry
*shared
,
291 PgStat_StatDBEntry
*dbentry
);
292 static void autovac_report_activity(VacuumStmt
*vacstmt
, Oid relid
);
293 static void avl_sighup_handler(SIGNAL_ARGS
);
294 static void avl_sigusr1_handler(SIGNAL_ARGS
);
295 static void avl_sigterm_handler(SIGNAL_ARGS
);
296 static void avl_quickdie(SIGNAL_ARGS
);
297 static void autovac_refresh_stats(void);
301 /********************************************************************
302 * AUTOVACUUM LAUNCHER CODE
303 ********************************************************************/
307 * forkexec routine for the autovacuum launcher process.
309 * Format up the arglist, then fork and exec.
312 avlauncher_forkexec(void)
317 av
[ac
++] = "postgres";
318 av
[ac
++] = "--forkavlauncher";
319 av
[ac
++] = NULL
; /* filled in by postmaster_forkexec */
322 Assert(ac
< lengthof(av
));
324 return postmaster_forkexec(ac
, av
);
328 * We need this set from the outside, before InitProcess is called
331 AutovacuumLauncherIAm(void)
333 am_autovacuum_launcher
= true;
338 * Main entry point for autovacuum launcher process, to be called from the
342 StartAutoVacLauncher(void)
347 switch ((AutoVacPID
= avlauncher_forkexec()))
349 switch ((AutoVacPID
= fork_process()))
354 (errmsg("could not fork autovacuum process: %m")));
359 /* in postmaster child ... */
360 /* Close the postmaster's sockets */
361 ClosePostmasterPorts(false);
363 /* Lose the postmaster's on-exit routines */
366 AutoVacLauncherMain(0, NULL
);
370 return (int) AutoVacPID
;
373 /* shouldn't get here */
378 * Main loop for the autovacuum launcher process.
381 AutoVacLauncherMain(int argc
, char *argv
[])
383 sigjmp_buf local_sigjmp_buf
;
385 /* we are a postmaster subprocess now */
386 IsUnderPostmaster
= true;
387 am_autovacuum_launcher
= true;
389 /* reset MyProcPid */
390 MyProcPid
= getpid();
392 /* record Start Time for logging */
393 MyStartTime
= time(NULL
);
395 /* Identify myself via ps */
396 init_ps_display("autovacuum launcher process", "", "", "");
398 SetProcessingMode(InitProcessing
);
401 * If possible, make this process a group leader, so that the postmaster
402 * can signal any child processes too. (autovacuum probably never has
403 * any child processes, but for consistency we make all postmaster
404 * child processes do this.)
408 elog(FATAL
, "setsid() failed: %m");
412 * Set up signal handlers. Since this is an auxiliary process, it has
413 * particular signal requirements -- no deadlock checker or sinval
414 * catchup, for example.
416 pqsignal(SIGHUP
, avl_sighup_handler
);
418 pqsignal(SIGINT
, SIG_IGN
);
419 pqsignal(SIGTERM
, avl_sigterm_handler
);
420 pqsignal(SIGQUIT
, avl_quickdie
);
421 pqsignal(SIGALRM
, SIG_IGN
);
423 pqsignal(SIGPIPE
, SIG_IGN
);
424 pqsignal(SIGUSR1
, avl_sigusr1_handler
);
425 /* We don't listen for async notifies */
426 pqsignal(SIGUSR2
, SIG_IGN
);
427 pqsignal(SIGFPE
, FloatExceptionHandler
);
428 pqsignal(SIGCHLD
, SIG_DFL
);
430 /* Early initialization */
434 * Create a per-backend PGPROC struct in shared memory, except in the
435 * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
436 * this before we can use LWLocks (and in the EXEC_BACKEND case we already
437 * had to do some stuff with LWLocks).
440 InitAuxiliaryProcess();
444 * Create a memory context that we will do all our work in. We do this so
445 * that we can reset the context during error recovery and thereby avoid
446 * possible memory leaks.
448 AutovacMemCxt
= AllocSetContextCreate(TopMemoryContext
,
449 "Autovacuum Launcher",
450 ALLOCSET_DEFAULT_MINSIZE
,
451 ALLOCSET_DEFAULT_INITSIZE
,
452 ALLOCSET_DEFAULT_MAXSIZE
);
453 MemoryContextSwitchTo(AutovacMemCxt
);
457 * If an exception is encountered, processing resumes here.
459 * This code is heavily based on bgwriter.c, q.v.
461 if (sigsetjmp(local_sigjmp_buf
, 1) != 0)
463 /* since not using PG_TRY, must reset error stack by hand */
464 error_context_stack
= NULL
;
466 /* Prevents interrupts while cleaning up */
469 /* Report the error to the server log */
473 * These operations are really just a minimal subset of
474 * AbortTransaction(). We don't have very many resources to worry
475 * about, but we do have LWLocks.
479 AtEOXact_HashTables(false);
482 * Now return to normal top-level context and clear ErrorContext for
485 MemoryContextSwitchTo(AutovacMemCxt
);
488 /* Flush any leaked data in the top-level context */
489 MemoryContextResetAndDeleteChildren(AutovacMemCxt
);
491 /* don't leave dangling pointers to freed memory */
492 DatabaseListCxt
= NULL
;
496 * Make sure pgstat also considers our stat data as gone. Note: we
497 * mustn't use autovac_refresh_stats here.
499 pgstat_clear_snapshot();
501 /* Now we can allow interrupts again */
505 * Sleep at least 1 second after any error. We don't want to be
506 * filling the error logs as fast as we can.
511 /* We can now handle ereport(ERROR) */
512 PG_exception_stack
= &local_sigjmp_buf
;
515 (errmsg("autovacuum launcher started")));
517 /* must unblock signals before calling rebuild_database_list */
518 PG_SETMASK(&UnBlockSig
);
520 /* in emergency mode, just start a worker and go away */
521 if (!AutoVacuumingActive())
524 proc_exit(0); /* done */
527 AutoVacuumShmem
->av_launcherpid
= MyProcPid
;
530 * Create the initial database list. The invariant we want this list to
531 * keep is that it's ordered by decreasing next_time. As soon as an entry
532 * is updated to a higher time, it will be moved to the front (which is
533 * correct because the only operation is to add autovacuum_naptime to the
534 * entry, and time always increases).
536 rebuild_database_list(InvalidOid
);
541 TimestampTz current_time
= 0;
546 * Emergency bailout if postmaster has died. This is to avoid the
547 * necessity for manual cleanup of all postmaster children.
549 if (!PostmasterIsAlive(true))
552 launcher_determine_sleep(AutoVacuumShmem
->av_freeWorkers
!=
553 INVALID_OFFSET
, false, &nap
);
556 * Sleep for a while according to schedule.
558 * On some platforms, signals won't interrupt the sleep. To ensure we
559 * respond reasonably promptly when someone signals us, break down the
560 * sleep into 1-second increments, and check for interrupts after each
563 while (nap
.tv_sec
> 0 || nap
.tv_usec
> 0)
574 sleeptime
= nap
.tv_usec
;
577 pg_usleep(sleeptime
);
580 * Emergency bailout if postmaster has died. This is to avoid the
581 * necessity for manual cleanup of all postmaster children.
583 if (!PostmasterIsAlive(true))
586 if (got_SIGTERM
|| got_SIGHUP
|| got_SIGUSR1
)
590 /* the normal shutdown case */
597 ProcessConfigFile(PGC_SIGHUP
);
599 /* shutdown requested in config file */
600 if (!AutoVacuumingActive())
603 /* rebalance in case the default cost parameters changed */
604 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
605 autovac_balance_cost();
606 LWLockRelease(AutovacuumLock
);
608 /* rebuild the list in case the naptime changed */
609 rebuild_database_list(InvalidOid
);
613 * a worker finished, or postmaster signalled failure to start a
620 /* rebalance cost limits, if needed */
621 if (AutoVacuumShmem
->av_signal
[AutoVacRebalance
])
623 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
624 AutoVacuumShmem
->av_signal
[AutoVacRebalance
] = false;
625 autovac_balance_cost();
626 LWLockRelease(AutovacuumLock
);
629 if (AutoVacuumShmem
->av_signal
[AutoVacForkFailed
])
632 * If the postmaster failed to start a new worker, we sleep
633 * for a little while and resend the signal. The new worker's
634 * state is still in memory, so this is sufficient. After
635 * that, we restart the main loop.
637 * XXX should we put a limit to the number of times we retry?
638 * I don't think it makes much sense, because a future start
639 * of a worker will continue to fail in the same way.
641 AutoVacuumShmem
->av_signal
[AutoVacForkFailed
] = false;
642 pg_usleep(100000L); /* 100ms */
643 SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER
);
649 * There are some conditions that we need to check before trying to
650 * start a launcher. First, we need to make sure that there is a
651 * launcher slot available. Second, we need to make sure that no other
652 * worker failed while starting up.
655 current_time
= GetCurrentTimestamp();
656 LWLockAcquire(AutovacuumLock
, LW_SHARED
);
658 can_launch
= (AutoVacuumShmem
->av_freeWorkers
!= INVALID_OFFSET
);
660 if (AutoVacuumShmem
->av_startingWorker
!= INVALID_OFFSET
)
664 WorkerInfo worker
= (WorkerInfo
) MAKE_PTR(AutoVacuumShmem
->av_startingWorker
);
667 * We can't launch another worker when another one is still
668 * starting up (or failed while doing so), so just sleep for a bit
669 * more; that worker will wake us up again as soon as it's ready.
670 * We will only wait autovacuum_naptime seconds (up to a maximum of
671 * 60 seconds) for this to happen however. Note that failure to
672 * connect to a particular database is not a problem here, because
673 * the worker removes itself from the startingWorker pointer before
674 * trying to connect. Problems detected by the postmaster (like
675 * fork() failure) are also reported and handled differently. The
676 * only problems that may cause this code to fire are errors in the
677 * earlier sections of AutoVacWorkerMain, before the worker removes
678 * the WorkerInfo from the startingWorker pointer.
680 waittime
= Min(autovacuum_naptime
, 60) * 1000;
681 if (TimestampDifferenceExceeds(worker
->wi_launchtime
, current_time
,
684 LWLockRelease(AutovacuumLock
);
685 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
687 * No other process can put a worker in starting mode, so if
688 * startingWorker is still INVALID after exchanging our lock,
689 * we assume it's the same one we saw above (so we don't
690 * recheck the launch time).
692 if (AutoVacuumShmem
->av_startingWorker
!= INVALID_OFFSET
)
694 worker
= (WorkerInfo
) MAKE_PTR(AutoVacuumShmem
->av_startingWorker
);
695 worker
->wi_dboid
= InvalidOid
;
696 worker
->wi_tableoid
= InvalidOid
;
697 worker
->wi_workerpid
= 0;
698 worker
->wi_launchtime
= 0;
699 worker
->wi_links
.next
= AutoVacuumShmem
->av_freeWorkers
;
700 AutoVacuumShmem
->av_freeWorkers
= MAKE_OFFSET(worker
);
701 AutoVacuumShmem
->av_startingWorker
= INVALID_OFFSET
;
702 elog(WARNING
, "worker took too long to start; cancelled");
708 LWLockRelease(AutovacuumLock
); /* either shared or exclusive */
710 /* if we can't do anything, just go back to sleep */
714 /* We're OK to start a new worker */
716 elem
= DLGetTail(DatabaseList
);
719 avl_dbase
*avdb
= DLE_VAL(elem
);
722 * launch a worker if next_worker is right now or it is in the past
724 if (TimestampDifferenceExceeds(avdb
->adl_next_worker
,
726 launch_worker(current_time
);
731 * Special case when the list is empty: start a worker right away.
732 * This covers the initial case, when no database is in pgstats
733 * (thus the list is empty). Note that the constraints in
734 * launcher_determine_sleep keep us from starting workers too
735 * quickly (at most once every autovacuum_naptime when the list is
738 launch_worker(current_time
);
742 /* Normal exit from the autovac launcher is here */
744 (errmsg("autovacuum launcher shutting down")));
745 AutoVacuumShmem
->av_launcherpid
= 0;
747 proc_exit(0); /* done */
751 * Determine the time to sleep, based on the database list.
753 * The "canlaunch" parameter indicates whether we can start a worker right now,
754 * for example due to the workers being all busy. If this is false, we will
755 * cause a long sleep, which will be interrupted when a worker exits.
758 launcher_determine_sleep(bool canlaunch
, bool recursing
, struct timeval
*nap
)
763 * We sleep until the next scheduled vacuum. We trust that when the
764 * database list was built, care was taken so that no entries have times in
765 * the past; if the first entry has too close a next_worker value, or a
766 * time in the past, we will sleep a small nominal time.
770 nap
->tv_sec
= autovacuum_naptime
;
773 else if ((elem
= DLGetTail(DatabaseList
)) != NULL
)
775 avl_dbase
*avdb
= DLE_VAL(elem
);
776 TimestampTz current_time
= GetCurrentTimestamp();
777 TimestampTz next_wakeup
;
781 next_wakeup
= avdb
->adl_next_worker
;
782 TimestampDifference(current_time
, next_wakeup
, &secs
, &usecs
);
785 nap
->tv_usec
= usecs
;
789 /* list is empty, sleep for whole autovacuum_naptime seconds */
790 nap
->tv_sec
= autovacuum_naptime
;
795 * If the result is exactly zero, it means a database had an entry with
796 * time in the past. Rebuild the list so that the databases are evenly
797 * distributed again, and recalculate the time to sleep. This can happen
798 * if there are more tables needing vacuum than workers, and they all take
799 * longer to vacuum than autovacuum_naptime.
801 * We only recurse once. rebuild_database_list should always return times
802 * in the future, but it seems best not to trust too much on that.
804 if (nap
->tv_sec
== 0 && nap
->tv_usec
== 0 && !recursing
)
806 rebuild_database_list(InvalidOid
);
807 launcher_determine_sleep(canlaunch
, true, nap
);
811 /* 100ms is the smallest time we'll allow the launcher to sleep */
812 if (nap
->tv_sec
<= 0 && nap
->tv_usec
<= 100000)
815 nap
->tv_usec
= 100000; /* 100 ms */
820 * Build an updated DatabaseList. It must only contain databases that appear
821 * in pgstats, and must be sorted by next_worker from highest to lowest,
822 * distributed regularly across the next autovacuum_naptime interval.
824 * Receives the Oid of the database that made this list be generated (we call
825 * this the "new" database, because when the database was already present on
826 * the list, we expect that this function is not called at all). The
827 * preexisting list, if any, will be used to preserve the order of the
828 * databases in the autovacuum_naptime period. The new database is put at the
829 * end of the interval. The actual values are not saved, which should not be
833 rebuild_database_list(Oid newdb
)
837 MemoryContext newcxt
;
838 MemoryContext oldcxt
;
839 MemoryContext tmpcxt
;
845 /* use fresh stats */
846 autovac_refresh_stats();
848 newcxt
= AllocSetContextCreate(AutovacMemCxt
,
850 ALLOCSET_DEFAULT_MINSIZE
,
851 ALLOCSET_DEFAULT_INITSIZE
,
852 ALLOCSET_DEFAULT_MAXSIZE
);
853 tmpcxt
= AllocSetContextCreate(newcxt
,
855 ALLOCSET_DEFAULT_MINSIZE
,
856 ALLOCSET_DEFAULT_INITSIZE
,
857 ALLOCSET_DEFAULT_MAXSIZE
);
858 oldcxt
= MemoryContextSwitchTo(tmpcxt
);
861 * Implementing this is not as simple as it sounds, because we need to put
862 * the new database at the end of the list; next the databases that were
863 * already on the list, and finally (at the tail of the list) all the other
864 * databases that are not on the existing list.
866 * To do this, we build an empty hash table of scored databases. We will
867 * start with the lowest score (zero) for the new database, then increasing
868 * scores for the databases in the existing list, in order, and lastly
869 * increasing scores for all databases gotten via get_database_list() that
870 * are not already on the hash.
872 * Then we will put all the hash elements into an array, sort the array by
873 * score, and finally put the array elements into the new doubly linked
876 hctl
.keysize
= sizeof(Oid
);
877 hctl
.entrysize
= sizeof(avl_dbase
);
878 hctl
.hash
= oid_hash
;
880 dbhash
= hash_create("db hash", 20, &hctl
, /* magic number here FIXME */
881 HASH_ELEM
| HASH_FUNCTION
| HASH_CONTEXT
);
883 /* start by inserting the new database */
885 if (OidIsValid(newdb
))
888 PgStat_StatDBEntry
*entry
;
890 /* only consider this database if it has a pgstat entry */
891 entry
= pgstat_fetch_stat_dbentry(newdb
);
894 /* we assume it isn't found because the hash was just created */
895 db
= hash_search(dbhash
, &newdb
, HASH_ENTER
, NULL
);
897 /* hash_search already filled in the key */
898 db
->adl_score
= score
++;
899 /* next_worker is filled in later */
903 /* Now insert the databases from the existing list */
904 if (DatabaseList
!= NULL
)
908 elem
= DLGetHead(DatabaseList
);
911 avl_dbase
*avdb
= DLE_VAL(elem
);
914 PgStat_StatDBEntry
*entry
;
916 elem
= DLGetSucc(elem
);
919 * skip databases with no stat entries -- in particular, this
920 * gets rid of dropped databases
922 entry
= pgstat_fetch_stat_dbentry(avdb
->adl_datid
);
926 db
= hash_search(dbhash
, &(avdb
->adl_datid
), HASH_ENTER
, &found
);
930 /* hash_search already filled in the key */
931 db
->adl_score
= score
++;
932 /* next_worker is filled in later */
937 /* finally, insert all qualifying databases not previously inserted */
938 dblist
= get_database_list();
939 foreach(cell
, dblist
)
941 avw_dbase
*avdb
= lfirst(cell
);
944 PgStat_StatDBEntry
*entry
;
946 /* only consider databases with a pgstat entry */
947 entry
= pgstat_fetch_stat_dbentry(avdb
->adw_datid
);
951 db
= hash_search(dbhash
, &(avdb
->adw_datid
), HASH_ENTER
, &found
);
952 /* only update the score if the database was not already on the hash */
955 /* hash_search already filled in the key */
956 db
->adl_score
= score
++;
957 /* next_worker is filled in later */
962 /* from here on, the allocated memory belongs to the new list */
963 MemoryContextSwitchTo(newcxt
);
964 DatabaseList
= DLNewList();
968 TimestampTz current_time
;
969 int millis_increment
;
975 /* put all the hash elements into an array */
976 dbary
= palloc(nelems
* sizeof(avl_dbase
));
979 hash_seq_init(&seq
, dbhash
);
980 while ((db
= hash_seq_search(&seq
)) != NULL
)
981 memcpy(&(dbary
[i
++]), db
, sizeof(avl_dbase
));
984 qsort(dbary
, nelems
, sizeof(avl_dbase
), db_comparator
);
986 /* this is the time interval between databases in the schedule */
987 millis_increment
= 1000.0 * autovacuum_naptime
/ nelems
;
988 current_time
= GetCurrentTimestamp();
991 * move the elements from the array into the dllist, setting the
992 * next_worker while walking the array
994 for (i
= 0; i
< nelems
; i
++)
996 avl_dbase
*db
= &(dbary
[i
]);
999 current_time
= TimestampTzPlusMilliseconds(current_time
,
1001 db
->adl_next_worker
= current_time
;
1003 elem
= DLNewElem(db
);
1004 /* later elements should go closer to the head of the list */
1005 DLAddHead(DatabaseList
, elem
);
1009 /* all done, clean up memory */
1010 if (DatabaseListCxt
!= NULL
)
1011 MemoryContextDelete(DatabaseListCxt
);
1012 MemoryContextDelete(tmpcxt
);
1013 DatabaseListCxt
= newcxt
;
1014 MemoryContextSwitchTo(oldcxt
);
1017 /* qsort comparator for avl_dbase, using adl_score */
1019 db_comparator(const void *a
, const void *b
)
1021 if (((avl_dbase
*) a
)->adl_score
== ((avl_dbase
*) b
)->adl_score
)
1024 return (((avl_dbase
*) a
)->adl_score
< ((avl_dbase
*) b
)->adl_score
) ? 1 : -1;
1030 * Bare-bones procedure for starting an autovacuum worker from the launcher.
1031 * It determines what database to work on, sets up shared memory stuff and
1032 * signals postmaster to start the worker. It fails gracefully if invoked when
1033 * autovacuum_workers are already active.
1035 * Return value is the OID of the database that the worker is going to process,
1036 * or InvalidOid if no worker was actually started.
1039 do_start_worker(void)
1043 TransactionId xidForceLimit
;
1046 TimestampTz current_time
;
1047 bool skipit
= false;
1048 Oid retval
= InvalidOid
;
1049 MemoryContext tmpcxt
,
1052 /* return quickly when there are no free workers */
1053 LWLockAcquire(AutovacuumLock
, LW_SHARED
);
1054 if (AutoVacuumShmem
->av_freeWorkers
== INVALID_OFFSET
)
1056 LWLockRelease(AutovacuumLock
);
1059 LWLockRelease(AutovacuumLock
);
1062 * Create and switch to a temporary context to avoid leaking the memory
1063 * allocated for the database list.
1065 tmpcxt
= AllocSetContextCreate(CurrentMemoryContext
,
1066 "Start worker tmp cxt",
1067 ALLOCSET_DEFAULT_MINSIZE
,
1068 ALLOCSET_DEFAULT_INITSIZE
,
1069 ALLOCSET_DEFAULT_MAXSIZE
);
1070 oldcxt
= MemoryContextSwitchTo(tmpcxt
);
1072 /* use fresh stats */
1073 autovac_refresh_stats();
1075 /* Get a list of databases */
1076 dblist
= get_database_list();
1079 * Determine the oldest datfrozenxid/relfrozenxid that we will allow
1080 * to pass without forcing a vacuum. (This limit can be tightened for
1081 * particular tables, but not loosened.)
1083 recentXid
= ReadNewTransactionId();
1084 xidForceLimit
= recentXid
- autovacuum_freeze_max_age
;
1085 /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */
1086 if (xidForceLimit
< FirstNormalTransactionId
)
1087 xidForceLimit
-= FirstNormalTransactionId
;
1090 * Choose a database to connect to. We pick the database that was least
1091 * recently auto-vacuumed, or one that needs vacuuming to prevent Xid
1092 * wraparound-related data loss. If any db at risk of wraparound is
1093 * found, we pick the one with oldest datfrozenxid, independently of
1096 * Note that a database with no stats entry is not considered, except for
1097 * Xid wraparound purposes. The theory is that if no one has ever
1098 * connected to it since the stats were last initialized, it doesn't need
1101 * XXX This could be improved if we had more info about whether it needs
1102 * vacuuming before connecting to it. Perhaps look through the pgstats
1103 * data for the database's tables? One idea is to keep track of the
1104 * number of new and dead tuples per database in pgstats. However it
1105 * isn't clear how to construct a metric that measures that and not cause
1106 * starvation for less busy databases.
1109 for_xid_wrap
= false;
1110 current_time
= GetCurrentTimestamp();
1111 foreach(cell
, dblist
)
1113 avw_dbase
*tmp
= lfirst(cell
);
1116 /* Check to see if this one is at risk of wraparound */
1117 if (TransactionIdPrecedes(tmp
->adw_frozenxid
, xidForceLimit
))
1120 TransactionIdPrecedes(tmp
->adw_frozenxid
, avdb
->adw_frozenxid
))
1122 for_xid_wrap
= true;
1125 else if (for_xid_wrap
)
1126 continue; /* ignore not-at-risk DBs */
1128 /* Find pgstat entry if any */
1129 tmp
->adw_entry
= pgstat_fetch_stat_dbentry(tmp
->adw_datid
);
1132 * Skip a database with no pgstat entry; it means it hasn't seen any
1135 if (!tmp
->adw_entry
)
1139 * Also, skip a database that appears on the database list as having
1140 * been processed recently (less than autovacuum_naptime seconds ago).
1141 * We do this so that we don't select a database which we just
1142 * selected, but that pgstat hasn't gotten around to updating the last
1143 * autovacuum time yet.
1146 elem
= DatabaseList
? DLGetTail(DatabaseList
) : NULL
;
1148 while (elem
!= NULL
)
1150 avl_dbase
*dbp
= DLE_VAL(elem
);
1152 if (dbp
->adl_datid
== tmp
->adw_datid
)
1155 * Skip this database if its next_worker value falls between
1156 * the current time and the current time plus naptime.
1158 if (!TimestampDifferenceExceeds(dbp
->adl_next_worker
,
1160 !TimestampDifferenceExceeds(current_time
,
1161 dbp
->adl_next_worker
,
1162 autovacuum_naptime
* 1000))
1167 elem
= DLGetPred(elem
);
1173 * Remember the db with oldest autovac time. (If we are here,
1174 * both tmp->entry and db->entry must be non-null.)
1177 tmp
->adw_entry
->last_autovac_time
< avdb
->adw_entry
->last_autovac_time
)
1181 /* Found a database -- process it */
1185 SHMEM_OFFSET sworker
;
1187 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
1190 * Get a worker entry from the freelist. We checked above, so there
1191 * really should be a free slot -- complain very loudly if there isn't.
1193 sworker
= AutoVacuumShmem
->av_freeWorkers
;
1194 if (sworker
== INVALID_OFFSET
)
1195 elog(FATAL
, "no free worker found");
1197 worker
= (WorkerInfo
) MAKE_PTR(sworker
);
1198 AutoVacuumShmem
->av_freeWorkers
= worker
->wi_links
.next
;
1200 worker
->wi_dboid
= avdb
->adw_datid
;
1201 worker
->wi_workerpid
= 0;
1202 worker
->wi_launchtime
= GetCurrentTimestamp();
1204 AutoVacuumShmem
->av_startingWorker
= sworker
;
1206 LWLockRelease(AutovacuumLock
);
1208 SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER
);
1210 retval
= avdb
->adw_datid
;
1215 * If we skipped all databases on the list, rebuild it, because it
1216 * probably contains a dropped database.
1218 rebuild_database_list(InvalidOid
);
1221 MemoryContextSwitchTo(oldcxt
);
1222 MemoryContextDelete(tmpcxt
);
1230 * Wrapper for starting a worker from the launcher. Besides actually starting
1231 * it, update the database list to reflect the next time that another one will
1232 * need to be started on the selected database. The actual database choice is
1233 * left to do_start_worker.
1235 * This routine is also expected to insert an entry into the database list if
1236 * the selected database was previously absent from the list. It returns the
1237 * new database list.
1240 launch_worker(TimestampTz now
)
1245 dbid
= do_start_worker();
1246 if (OidIsValid(dbid
))
1249 * Walk the database list and update the corresponding entry. If the
1250 * database is not on the list, we'll recreate the list.
1252 elem
= (DatabaseList
== NULL
) ? NULL
: DLGetHead(DatabaseList
);
1253 while (elem
!= NULL
)
1255 avl_dbase
*avdb
= DLE_VAL(elem
);
1257 if (avdb
->adl_datid
== dbid
)
1260 * add autovacuum_naptime seconds to the current time, and use
1261 * that as the new "next_worker" field for this database.
1263 avdb
->adl_next_worker
=
1264 TimestampTzPlusMilliseconds(now
, autovacuum_naptime
* 1000);
1266 DLMoveToFront(elem
);
1269 elem
= DLGetSucc(elem
);
1273 * If the database was not present in the database list, we rebuild the
1274 * list. It's possible that the database does not get into the list
1275 * anyway, for example if it's a database that doesn't have a pgstat
1276 * entry, but this is not a problem because we don't want to schedule
1277 * workers regularly into those in any case.
1280 rebuild_database_list(dbid
);
1285 * Called from postmaster to signal a failure to fork a process to become
1286 * worker. The postmaster should kill(SIGUSR1) the launcher shortly
1287 * after calling this function.
1290 AutoVacWorkerFailed(void)
1292 AutoVacuumShmem
->av_signal
[AutoVacForkFailed
] = true;
1295 /* SIGHUP: set flag to re-read config file at next convenient time */
1297 avl_sighup_handler(SIGNAL_ARGS
)
1302 /* SIGUSR1: a worker is up and running, or just finished */
1304 avl_sigusr1_handler(SIGNAL_ARGS
)
1309 /* SIGTERM: time to die */
1311 avl_sigterm_handler(SIGNAL_ARGS
)
1317 * avl_quickdie occurs when signalled SIGQUIT from postmaster.
1319 * Some backend has bought the farm, so we need to stop what we're doing
1323 avl_quickdie(SIGNAL_ARGS
)
1325 PG_SETMASK(&BlockSig
);
1328 * DO NOT proc_exit() -- we're here because shared memory may be
1329 * corrupted, so we don't want to try to clean up our transaction. Just
1330 * nail the windows shut and get out of town.
1332 * Note we do exit(2) not exit(0). This is to force the postmaster into a
1333 * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
1334 * backend. This is necessary precisely because we don't clean up our
1335 * shared memory state.
1341 /********************************************************************
1342 * AUTOVACUUM WORKER CODE
1343 ********************************************************************/
1347 * forkexec routines for the autovacuum worker.
1349 * Format up the arglist, then fork and exec.
1352 avworker_forkexec(void)
1357 av
[ac
++] = "postgres";
1358 av
[ac
++] = "--forkavworker";
1359 av
[ac
++] = NULL
; /* filled in by postmaster_forkexec */
1362 Assert(ac
< lengthof(av
));
1364 return postmaster_forkexec(ac
, av
);
1368 * We need this set from the outside, before InitProcess is called
1371 AutovacuumWorkerIAm(void)
1373 am_autovacuum_worker
= true;
1378 * Main entry point for autovacuum worker process.
1380 * This code is heavily based on pgarch.c, q.v.
1383 StartAutoVacWorker(void)
1388 switch ((worker_pid
= avworker_forkexec()))
1390 switch ((worker_pid
= fork_process()))
1395 (errmsg("could not fork autovacuum process: %m")));
1398 #ifndef EXEC_BACKEND
1400 /* in postmaster child ... */
1401 /* Close the postmaster's sockets */
1402 ClosePostmasterPorts(false);
1404 /* Lose the postmaster's on-exit routines */
1407 AutoVacWorkerMain(0, NULL
);
1411 return (int) worker_pid
;
1414 /* shouldn't get here */
1421 NON_EXEC_STATIC
void
1422 AutoVacWorkerMain(int argc
, char *argv
[])
1424 sigjmp_buf local_sigjmp_buf
;
1427 /* we are a postmaster subprocess now */
1428 IsUnderPostmaster
= true;
1429 am_autovacuum_worker
= true;
1431 /* reset MyProcPid */
1432 MyProcPid
= getpid();
1434 /* record Start Time for logging */
1435 MyStartTime
= time(NULL
);
1437 /* Identify myself via ps */
1438 init_ps_display("autovacuum worker process", "", "", "");
1440 SetProcessingMode(InitProcessing
);
1443 * If possible, make this process a group leader, so that the postmaster
1444 * can signal any child processes too. (autovacuum probably never has
1445 * any child processes, but for consistency we make all postmaster
1446 * child processes do this.)
1450 elog(FATAL
, "setsid() failed: %m");
1454 * Set up signal handlers. We operate on databases much like a regular
1455 * backend, so we use the same signal handling. See equivalent code in
1458 * Currently, we don't pay attention to postgresql.conf changes that
1459 * happen during a single daemon iteration, so we can ignore SIGHUP.
1461 pqsignal(SIGHUP
, SIG_IGN
);
1464 * SIGINT is used to signal cancelling the current table's vacuum;
1465 * SIGTERM means abort and exit cleanly, and SIGQUIT means abandon ship.
1467 pqsignal(SIGINT
, StatementCancelHandler
);
1468 pqsignal(SIGTERM
, die
);
1469 pqsignal(SIGQUIT
, quickdie
);
1470 pqsignal(SIGALRM
, handle_sig_alarm
);
1472 pqsignal(SIGPIPE
, SIG_IGN
);
1473 pqsignal(SIGUSR1
, CatchupInterruptHandler
);
1474 /* We don't listen for async notifies */
1475 pqsignal(SIGUSR2
, SIG_IGN
);
1476 pqsignal(SIGFPE
, FloatExceptionHandler
);
1477 pqsignal(SIGCHLD
, SIG_DFL
);
1479 /* Early initialization */
1483 * Create a per-backend PGPROC struct in shared memory, except in the
1484 * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
1485 * this before we can use LWLocks (and in the EXEC_BACKEND case we already
1486 * had to do some stuff with LWLocks).
1488 #ifndef EXEC_BACKEND
1493 * If an exception is encountered, processing resumes here.
1495 * See notes in postgres.c about the design of this coding.
1497 if (sigsetjmp(local_sigjmp_buf
, 1) != 0)
1499 /* Prevents interrupts while cleaning up */
1502 /* Report the error to the server log */
1506 * We can now go away. Note that because we called InitProcess, a
1507 * callback was registered to do ProcKill, which will clean up
1513 /* We can now handle ereport(ERROR) */
1514 PG_exception_stack
= &local_sigjmp_buf
;
1516 PG_SETMASK(&UnBlockSig
);
1519 * Force zero_damaged_pages OFF in the autovac process, even if it is set
1520 * in postgresql.conf. We don't really want such a dangerous option being
1521 * applied non-interactively.
1523 SetConfigOption("zero_damaged_pages", "false", PGC_SUSET
, PGC_S_OVERRIDE
);
1526 * Force statement_timeout to zero to avoid a timeout setting from
1527 * preventing regular maintenance from being executed.
1529 SetConfigOption("statement_timeout", "0", PGC_SUSET
, PGC_S_OVERRIDE
);
1532 * Get the info about the database we're going to work on.
1534 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
1537 * beware of startingWorker being INVALID; this should normally not happen,
1538 * but if a worker fails after forking and before this, the launcher might
1539 * have decided to remove it from the queue and start again.
1541 if (AutoVacuumShmem
->av_startingWorker
!= INVALID_OFFSET
)
1543 MyWorkerInfo
= (WorkerInfo
) MAKE_PTR(AutoVacuumShmem
->av_startingWorker
);
1544 dbid
= MyWorkerInfo
->wi_dboid
;
1545 MyWorkerInfo
->wi_workerpid
= MyProcPid
;
1547 /* insert into the running list */
1548 SHMQueueInsertBefore(&AutoVacuumShmem
->av_runningWorkers
,
1549 &MyWorkerInfo
->wi_links
);
1552 * remove from the "starting" pointer, so that the launcher can start
1553 * a new worker if required
1555 AutoVacuumShmem
->av_startingWorker
= INVALID_OFFSET
;
1556 LWLockRelease(AutovacuumLock
);
1558 on_shmem_exit(FreeWorkerInfo
, 0);
1560 /* wake up the launcher */
1561 if (AutoVacuumShmem
->av_launcherpid
!= 0)
1562 kill(AutoVacuumShmem
->av_launcherpid
, SIGUSR1
);
1566 /* no worker entry for me, go away */
1567 elog(WARNING
, "autovacuum worker started without a worker entry");
1569 LWLockRelease(AutovacuumLock
);
1572 if (OidIsValid(dbid
))
1577 * Report autovac startup to the stats collector. We deliberately do
1578 * this before InitPostgres, so that the last_autovac_time will get
1579 * updated even if the connection attempt fails. This is to prevent
1580 * autovac from getting "stuck" repeatedly selecting an unopenable
1581 * database, rather than making any progress on stuff it can connect
1584 pgstat_report_autovac(dbid
);
1587 * Connect to the selected database
1589 * Note: if we have selected a just-deleted database (due to using
1590 * stale stats info), we'll fail and exit here.
1592 InitPostgres(NULL
, dbid
, NULL
, &dbname
);
1593 SetProcessingMode(NormalProcessing
);
1594 set_ps_display(dbname
, false);
1596 (errmsg("autovacuum: processing database \"%s\"", dbname
)));
1598 /* And do an appropriate amount of work */
1599 recentXid
= ReadNewTransactionId();
1604 * The launcher will be notified of my death in ProcKill, *if* we managed
1605 * to get a worker slot at all
1608 /* All done, go away */
1613 * Return a WorkerInfo to the free list
1616 FreeWorkerInfo(int code
, Datum arg
)
1618 if (MyWorkerInfo
!= NULL
)
1620 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
1623 * Wake the launcher up so that he can launch a new worker immediately
1624 * if required. We only save the launcher's PID in local memory here;
1625 * the actual signal will be sent when the PGPROC is recycled. Note
1626 * that we always do this, so that the launcher can rebalance the cost
1627 * limit setting of the remaining workers.
1629 * We somewhat ignore the risk that the launcher changes its PID
1630 * between we reading it and the actual kill; we expect ProcKill to be
1631 * called shortly after us, and we assume that PIDs are not reused too
1632 * quickly after a process exits.
1634 AutovacuumLauncherPid
= AutoVacuumShmem
->av_launcherpid
;
1636 SHMQueueDelete(&MyWorkerInfo
->wi_links
);
1637 MyWorkerInfo
->wi_links
.next
= AutoVacuumShmem
->av_freeWorkers
;
1638 MyWorkerInfo
->wi_dboid
= InvalidOid
;
1639 MyWorkerInfo
->wi_tableoid
= InvalidOid
;
1640 MyWorkerInfo
->wi_workerpid
= 0;
1641 MyWorkerInfo
->wi_launchtime
= 0;
1642 MyWorkerInfo
->wi_cost_delay
= 0;
1643 MyWorkerInfo
->wi_cost_limit
= 0;
1644 MyWorkerInfo
->wi_cost_limit_base
= 0;
1645 AutoVacuumShmem
->av_freeWorkers
= MAKE_OFFSET(MyWorkerInfo
);
1646 /* not mine anymore */
1647 MyWorkerInfo
= NULL
;
1650 * now that we're inactive, cause a rebalancing of the surviving
1653 AutoVacuumShmem
->av_signal
[AutoVacRebalance
] = true;
1654 LWLockRelease(AutovacuumLock
);
1659 * Update the cost-based delay parameters, so that multiple workers consume
1660 * each a fraction of the total available I/O.
1663 AutoVacuumUpdateDelay(void)
1667 VacuumCostDelay
= MyWorkerInfo
->wi_cost_delay
;
1668 VacuumCostLimit
= MyWorkerInfo
->wi_cost_limit
;
1673 * autovac_balance_cost
1674 * Recalculate the cost limit setting for each active workers.
1676 * Caller must hold the AutovacuumLock in exclusive mode.
1679 autovac_balance_cost(void)
1683 * note: in cost_limit, zero also means use value from elsewhere, because
1684 * zero is not a valid value.
1686 int vac_cost_limit
= (autovacuum_vac_cost_limit
> 0 ?
1687 autovacuum_vac_cost_limit
: VacuumCostLimit
);
1688 int vac_cost_delay
= (autovacuum_vac_cost_delay
>= 0 ?
1689 autovacuum_vac_cost_delay
: VacuumCostDelay
);
1693 /* not set? nothing to do */
1694 if (vac_cost_limit
<= 0 || vac_cost_delay
<= 0)
1697 /* caculate the total base cost limit of active workers */
1699 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
1700 &AutoVacuumShmem
->av_runningWorkers
,
1701 offsetof(WorkerInfoData
, wi_links
));
1704 if (worker
->wi_workerpid
!= 0 &&
1705 worker
->wi_cost_limit_base
> 0 && worker
->wi_cost_delay
> 0)
1707 (double) worker
->wi_cost_limit_base
/ worker
->wi_cost_delay
;
1709 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
1711 offsetof(WorkerInfoData
, wi_links
));
1713 /* there are no cost limits -- nothing to do */
1714 if (cost_total
<= 0)
1718 * Adjust each cost limit of active workers to balance the total of
1719 * cost limit to autovacuum_vacuum_cost_limit.
1721 cost_avail
= (double) vac_cost_limit
/ vac_cost_delay
;
1722 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
1723 &AutoVacuumShmem
->av_runningWorkers
,
1724 offsetof(WorkerInfoData
, wi_links
));
1727 if (worker
->wi_workerpid
!= 0 &&
1728 worker
->wi_cost_limit_base
> 0 && worker
->wi_cost_delay
> 0)
1731 (cost_avail
* worker
->wi_cost_limit_base
/ cost_total
);
1734 * We put a lower bound of 1 to the cost_limit, to avoid division-
1735 * by-zero in the vacuum code.
1737 worker
->wi_cost_limit
= Max(Min(limit
, worker
->wi_cost_limit_base
), 1);
1739 elog(DEBUG2
, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_delay=%d)",
1740 worker
->wi_workerpid
, worker
->wi_dboid
,
1741 worker
->wi_tableoid
, worker
->wi_cost_limit
, worker
->wi_cost_delay
);
1744 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
1746 offsetof(WorkerInfoData
, wi_links
));
1753 * Return a list of all databases. Note we cannot use pg_database,
1754 * because we aren't connected; we use the flat database file.
1757 get_database_list(void)
1761 char thisname
[NAMEDATALEN
];
1765 TransactionId db_frozenxid
;
1767 filename
= database_getflatfilename();
1768 db_file
= AllocateFile(filename
, "r");
1769 if (db_file
== NULL
)
1771 (errcode_for_file_access(),
1772 errmsg("could not open file \"%s\": %m", filename
)));
1774 while (read_pg_database_line(db_file
, thisname
, &db_id
,
1775 &db_tablespace
, &db_frozenxid
))
1779 avdb
= (avw_dbase
*) palloc(sizeof(avw_dbase
));
1781 avdb
->adw_datid
= db_id
;
1782 avdb
->adw_name
= pstrdup(thisname
);
1783 avdb
->adw_frozenxid
= db_frozenxid
;
1784 /* this gets set later: */
1785 avdb
->adw_entry
= NULL
;
1787 dblist
= lappend(dblist
, avdb
);
1797 * Process a database table-by-table
1799 * Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
1800 * order not to ignore shutdown commands for too long.
1808 HeapScanDesc relScan
;
1809 Form_pg_database dbForm
;
1810 List
*table_oids
= NIL
;
1811 List
*toast_oids
= NIL
;
1812 List
*table_toast_list
= NIL
;
1813 ListCell
* volatile cell
;
1814 PgStat_StatDBEntry
*shared
;
1815 PgStat_StatDBEntry
*dbentry
;
1816 BufferAccessStrategy bstrategy
;
1819 * StartTransactionCommand and CommitTransactionCommand will automatically
1820 * switch to other contexts. We need this one to keep the list of
1821 * relations to vacuum/analyze across transactions.
1823 AutovacMemCxt
= AllocSetContextCreate(TopMemoryContext
,
1825 ALLOCSET_DEFAULT_MINSIZE
,
1826 ALLOCSET_DEFAULT_INITSIZE
,
1827 ALLOCSET_DEFAULT_MAXSIZE
);
1828 MemoryContextSwitchTo(AutovacMemCxt
);
1831 * may be NULL if we couldn't find an entry (only happens if we
1832 * are forcing a vacuum for anti-wrap purposes).
1834 dbentry
= pgstat_fetch_stat_dbentry(MyDatabaseId
);
1836 /* Start a transaction so our commands have one to play into. */
1837 StartTransactionCommand();
1839 /* functions in indexes may want a snapshot set */
1840 ActiveSnapshot
= CopySnapshot(GetTransactionSnapshot());
1843 * Clean up any dead statistics collector entries for this DB. We always
1844 * want to do this exactly once per DB-processing cycle, even if we find
1845 * nothing worth vacuuming in the database.
1847 pgstat_vacuum_tabstat();
1850 * Find the pg_database entry and select the default freeze_min_age.
1851 * We use zero in template and nonconnectable databases,
1852 * else the system-wide default.
1854 tuple
= SearchSysCache(DATABASEOID
,
1855 ObjectIdGetDatum(MyDatabaseId
),
1857 if (!HeapTupleIsValid(tuple
))
1858 elog(ERROR
, "cache lookup failed for database %u", MyDatabaseId
);
1859 dbForm
= (Form_pg_database
) GETSTRUCT(tuple
);
1861 if (dbForm
->datistemplate
|| !dbForm
->datallowconn
)
1862 default_freeze_min_age
= 0;
1864 default_freeze_min_age
= vacuum_freeze_min_age
;
1866 ReleaseSysCache(tuple
);
1868 /* StartTransactionCommand changed elsewhere */
1869 MemoryContextSwitchTo(AutovacMemCxt
);
1871 /* The database hash where pgstat keeps shared relations */
1872 shared
= pgstat_fetch_stat_dbentry(InvalidOid
);
1874 classRel
= heap_open(RelationRelationId
, AccessShareLock
);
1875 avRel
= heap_open(AutovacuumRelationId
, AccessShareLock
);
1878 * Scan pg_class and determine which tables to vacuum.
1880 * The stats subsystem collects stats for toast tables independently of
1881 * the stats for their parent tables. We need to check those stats since
1882 * in cases with short, wide tables there might be proportionally much
1883 * more activity in the toast table than in its parent.
1885 * Since we can only issue VACUUM against the parent table, we need to
1886 * transpose a decision to vacuum a toast table into a decision to vacuum
1887 * its parent. There's no point in considering ANALYZE on a toast table,
1888 * either. To support this, we keep a list of OIDs of toast tables that
1889 * need vacuuming alongside the list of regular tables. Regular tables
1890 * will be entered into the table list even if they appear not to need
1891 * vacuuming; we go back and re-mark them after finding all the vacuumable
1894 relScan
= heap_beginscan(classRel
, SnapshotNow
, 0, NULL
);
1896 while ((tuple
= heap_getnext(relScan
, ForwardScanDirection
)) != NULL
)
1898 Form_pg_class classForm
= (Form_pg_class
) GETSTRUCT(tuple
);
1899 Form_pg_autovacuum avForm
= NULL
;
1900 PgStat_StatTabEntry
*tabentry
;
1904 /* Consider only regular and toast tables. */
1905 if (classForm
->relkind
!= RELKIND_RELATION
&&
1906 classForm
->relkind
!= RELKIND_TOASTVALUE
)
1910 * Skip temp tables (i.e. those in temp namespaces). We cannot safely
1911 * process other backends' temp tables.
1913 if (isAnyTempNamespace(classForm
->relnamespace
))
1916 relid
= HeapTupleGetOid(tuple
);
1918 /* Fetch the pg_autovacuum tuple for the relation, if any */
1919 avTup
= get_pg_autovacuum_tuple_relid(avRel
, relid
);
1920 if (HeapTupleIsValid(avTup
))
1921 avForm
= (Form_pg_autovacuum
) GETSTRUCT(avTup
);
1923 /* Fetch the pgstat entry for this table */
1924 tabentry
= get_pgstat_tabentry_relid(relid
, classForm
->relisshared
,
1927 relation_check_autovac(relid
, classForm
, avForm
, tabentry
,
1928 &table_oids
, &table_toast_list
, &toast_oids
);
1930 if (HeapTupleIsValid(avTup
))
1931 heap_freetuple(avTup
);
1934 heap_endscan(relScan
);
1935 heap_close(avRel
, AccessShareLock
);
1936 heap_close(classRel
, AccessShareLock
);
1939 * Add to the list of tables to vacuum, the OIDs of the tables that
1940 * correspond to the saved OIDs of toast tables needing vacuum.
1942 foreach(cell
, toast_oids
)
1944 Oid toastoid
= lfirst_oid(cell
);
1947 foreach(cell2
, table_toast_list
)
1949 av_relation
*ar
= lfirst(cell2
);
1951 if (ar
->ar_toastrelid
== toastoid
)
1953 table_oids
= lappend_oid(table_oids
, ar
->ar_relid
);
1959 list_free_deep(table_toast_list
);
1960 table_toast_list
= NIL
;
1961 list_free(toast_oids
);
1965 * Create a buffer access strategy object for VACUUM to use. We want
1966 * to use the same one across all the vacuum operations we perform,
1967 * since the point is for VACUUM not to blow out the shared cache.
1969 bstrategy
= GetAccessStrategy(BAS_VACUUM
);
1972 * create a memory context to act as fake PortalContext, so that the
1973 * contexts created in the vacuum code are cleaned up for each table.
1975 PortalContext
= AllocSetContextCreate(AutovacMemCxt
,
1976 "Autovacuum Portal",
1977 ALLOCSET_DEFAULT_INITSIZE
,
1978 ALLOCSET_DEFAULT_MINSIZE
,
1979 ALLOCSET_DEFAULT_MAXSIZE
);
1982 * Perform operations on collected tables.
1984 foreach(cell
, table_oids
)
1986 Oid relid
= lfirst_oid(cell
);
1991 CHECK_FOR_INTERRUPTS();
1994 * hold schedule lock from here until we're sure that this table
1995 * still needs vacuuming. We also need the AutovacuumLock to walk
1996 * the worker array, but we'll let go of that one quickly.
1998 LWLockAcquire(AutovacuumScheduleLock
, LW_EXCLUSIVE
);
1999 LWLockAcquire(AutovacuumLock
, LW_SHARED
);
2002 * Check whether the table is being vacuumed concurrently by another
2006 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
2007 &AutoVacuumShmem
->av_runningWorkers
,
2008 offsetof(WorkerInfoData
, wi_links
));
2012 if (worker
== MyWorkerInfo
)
2015 /* ignore workers in other databases */
2016 if (worker
->wi_dboid
!= MyDatabaseId
)
2019 if (worker
->wi_tableoid
== relid
)
2026 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
2028 offsetof(WorkerInfoData
, wi_links
));
2030 LWLockRelease(AutovacuumLock
);
2033 LWLockRelease(AutovacuumScheduleLock
);
2038 * Check whether pgstat data still says we need to vacuum this table.
2039 * It could have changed if something else processed the table while we
2042 * FIXME we ignore the possibility that the table was finished being
2043 * vacuumed in the last 500ms (PGSTAT_STAT_INTERVAL). This is a bug.
2045 MemoryContextSwitchTo(AutovacMemCxt
);
2046 tab
= table_recheck_autovac(relid
);
2049 /* someone else vacuumed the table */
2050 LWLockRelease(AutovacuumScheduleLock
);
2055 * Ok, good to go. Store the table in shared memory before releasing
2056 * the lock so that other workers don't vacuum it concurrently.
2058 MyWorkerInfo
->wi_tableoid
= relid
;
2059 LWLockRelease(AutovacuumScheduleLock
);
2061 /* Set the initial vacuum cost parameters for this table */
2062 VacuumCostDelay
= tab
->at_vacuum_cost_delay
;
2063 VacuumCostLimit
= tab
->at_vacuum_cost_limit
;
2066 * Advertise my cost delay parameters for the balancing algorithm, and
2069 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
2070 MyWorkerInfo
->wi_cost_delay
= tab
->at_vacuum_cost_delay
;
2071 MyWorkerInfo
->wi_cost_limit
= tab
->at_vacuum_cost_limit
;
2072 MyWorkerInfo
->wi_cost_limit_base
= tab
->at_vacuum_cost_limit
;
2073 autovac_balance_cost();
2074 LWLockRelease(AutovacuumLock
);
2076 /* clean up memory before each iteration */
2077 MemoryContextResetAndDeleteChildren(PortalContext
);
2080 * We will abort vacuuming the current table if we are interrupted, and
2081 * continue with the next one in schedule; but if anything else
2082 * happens, we will do our usual error handling which is to cause the
2083 * worker process to exit.
2088 MemoryContextSwitchTo(TopTransactionContext
);
2089 autovacuum_do_vac_analyze(tab
->at_relid
,
2092 tab
->at_freeze_min_age
,
2099 MemoryContextSwitchTo(TopTransactionContext
);
2100 errdata
= CopyErrorData();
2103 * If we errored out due to a cancel request, abort and restart the
2104 * transaction and go to the next table. Otherwise rethrow the
2105 * error so that the outermost handler deals with it.
2107 if (errdata
->sqlerrcode
== ERRCODE_QUERY_CANCELED
)
2110 elog(LOG
, "cancelling autovacuum of table \"%s.%s.%s\"",
2111 get_database_name(MyDatabaseId
),
2112 get_namespace_name(get_rel_namespace(tab
->at_relid
)),
2113 get_rel_name(tab
->at_relid
));
2115 AbortOutOfAnyTransaction();
2117 MemoryContextResetAndDeleteChildren(PortalContext
);
2119 /* restart our transaction for the following operations */
2120 StartTransactionCommand();
2121 RESUME_INTERRUPTS();
2133 * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
2134 * We only need to do this once, not after each table.
2136 vac_update_datfrozenxid();
2138 /* Finally close out the last transaction. */
2139 CommitTransactionCommand();
2143 * Returns a copy of the pg_autovacuum tuple for the given relid, or NULL if
2144 * there isn't any. avRel is pg_autovacuum, already open and suitably locked.
2147 get_pg_autovacuum_tuple_relid(Relation avRel
, Oid relid
)
2149 ScanKeyData entry
[1];
2153 ScanKeyInit(&entry
[0],
2154 Anum_pg_autovacuum_vacrelid
,
2155 BTEqualStrategyNumber
, F_OIDEQ
,
2156 ObjectIdGetDatum(relid
));
2158 avScan
= systable_beginscan(avRel
, AutovacuumRelidIndexId
, true,
2159 SnapshotNow
, 1, entry
);
2161 avTup
= systable_getnext(avScan
);
2163 if (HeapTupleIsValid(avTup
))
2164 avTup
= heap_copytuple(avTup
);
2166 systable_endscan(avScan
);
2172 * get_pgstat_tabentry_relid
2174 * Fetch the pgstat entry of a table, either local to a database or shared.
2176 static PgStat_StatTabEntry
*
2177 get_pgstat_tabentry_relid(Oid relid
, bool isshared
, PgStat_StatDBEntry
*shared
,
2178 PgStat_StatDBEntry
*dbentry
)
2180 PgStat_StatTabEntry
*tabentry
= NULL
;
2184 if (PointerIsValid(shared
))
2185 tabentry
= hash_search(shared
->tables
, &relid
,
2188 else if (PointerIsValid(dbentry
))
2189 tabentry
= hash_search(dbentry
->tables
, &relid
,
2196 * relation_check_autovac
2198 * For a given relation (either a plain table or TOAST table), check whether it
2199 * needs vacuum or analyze.
2201 * Plain tables that need either are added to the table_list. TOAST tables
2202 * that need vacuum are added to toast_list. Plain tables that don't need
2203 * either but which have a TOAST table are added, as a struct, to
2204 * table_toast_list. The latter is to allow appending the OIDs of the plain
2205 * tables whose TOAST table needs vacuuming into the plain tables list, which
2206 * allows us to substantially reduce the number of "rechecks" that we need to
2210 relation_check_autovac(Oid relid
, Form_pg_class classForm
,
2211 Form_pg_autovacuum avForm
, PgStat_StatTabEntry
*tabentry
,
2212 List
**table_oids
, List
**table_toast_list
,
2218 relation_needs_vacanalyze(relid
, avForm
, classForm
, tabentry
,
2219 &dovacuum
, &doanalyze
);
2221 if (classForm
->relkind
== RELKIND_TOASTVALUE
)
2224 *toast_oids
= lappend_oid(*toast_oids
, relid
);
2228 Assert(classForm
->relkind
== RELKIND_RELATION
);
2230 if (dovacuum
|| doanalyze
)
2231 *table_oids
= lappend_oid(*table_oids
, relid
);
2232 else if (OidIsValid(classForm
->reltoastrelid
))
2234 av_relation
*rel
= palloc(sizeof(av_relation
));
2236 rel
->ar_relid
= relid
;
2237 rel
->ar_toastrelid
= classForm
->reltoastrelid
;
2239 *table_toast_list
= lappend(*table_toast_list
, rel
);
2245 * table_recheck_autovac
2247 * Recheck whether a plain table still needs vacuum or analyze; be it because
2248 * it does directly, or because its TOAST table does. Return value is a valid
2249 * autovac_table pointer if it does, NULL otherwise.
2251 static autovac_table
*
2252 table_recheck_autovac(Oid relid
)
2254 Form_pg_autovacuum avForm
= NULL
;
2255 Form_pg_class classForm
;
2261 autovac_table
*tab
= NULL
;
2262 PgStat_StatTabEntry
*tabentry
;
2264 PgStat_StatDBEntry
*shared
;
2265 PgStat_StatDBEntry
*dbentry
;
2267 /* use fresh stats */
2268 autovac_refresh_stats();
2270 shared
= pgstat_fetch_stat_dbentry(InvalidOid
);
2271 dbentry
= pgstat_fetch_stat_dbentry(MyDatabaseId
);
2273 /* fetch the relation's relcache entry */
2274 classTup
= SearchSysCacheCopy(RELOID
,
2275 ObjectIdGetDatum(relid
),
2277 if (!HeapTupleIsValid(classTup
))
2279 classForm
= (Form_pg_class
) GETSTRUCT(classTup
);
2281 /* fetch the pg_autovacuum entry, if any */
2282 avRel
= heap_open(AutovacuumRelationId
, AccessShareLock
);
2283 avTup
= get_pg_autovacuum_tuple_relid(avRel
, relid
);
2284 if (HeapTupleIsValid(avTup
))
2285 avForm
= (Form_pg_autovacuum
) GETSTRUCT(avTup
);
2287 /* fetch the pgstat table entry */
2288 tabentry
= get_pgstat_tabentry_relid(relid
, classForm
->relisshared
,
2291 relation_needs_vacanalyze(relid
, avForm
, classForm
, tabentry
,
2292 &dovacuum
, &doanalyze
);
2294 /* OK, it needs vacuum by itself */
2297 /* it doesn't need vacuum, but what about it's TOAST table? */
2298 else if (OidIsValid(classForm
->reltoastrelid
))
2300 Oid toastrelid
= classForm
->reltoastrelid
;
2301 HeapTuple toastClassTup
;
2303 toastClassTup
= SearchSysCacheCopy(RELOID
,
2304 ObjectIdGetDatum(toastrelid
),
2306 if (HeapTupleIsValid(toastClassTup
))
2308 bool toast_dovacuum
;
2309 bool toast_doanalyze
;
2310 Form_pg_class toastClassForm
;
2311 PgStat_StatTabEntry
*toasttabentry
;
2313 toastClassForm
= (Form_pg_class
) GETSTRUCT(toastClassTup
);
2314 toasttabentry
= get_pgstat_tabentry_relid(toastrelid
,
2315 toastClassForm
->relisshared
,
2318 /* note we use the pg_autovacuum entry for the main table */
2319 relation_needs_vacanalyze(toastrelid
, avForm
, toastClassForm
,
2320 toasttabentry
, &toast_dovacuum
,
2322 /* we only consider VACUUM for toast tables */
2329 heap_freetuple(toastClassTup
);
2343 * Calculate the vacuum cost parameters and the minimum freeze age. If
2344 * there is a tuple in pg_autovacuum, use it; else, use the GUC
2345 * defaults. Note that the fields may contain "-1" (or indeed any
2346 * negative value), which means use the GUC defaults for each setting.
2347 * In cost_limit, the value 0 also means to use the value from
2352 vac_cost_limit
= (avForm
->vac_cost_limit
> 0) ?
2353 avForm
->vac_cost_limit
:
2354 ((autovacuum_vac_cost_limit
> 0) ?
2355 autovacuum_vac_cost_limit
: VacuumCostLimit
);
2357 vac_cost_delay
= (avForm
->vac_cost_delay
>= 0) ?
2358 avForm
->vac_cost_delay
:
2359 ((autovacuum_vac_cost_delay
>= 0) ?
2360 autovacuum_vac_cost_delay
: VacuumCostDelay
);
2362 freeze_min_age
= (avForm
->freeze_min_age
>= 0) ?
2363 avForm
->freeze_min_age
: default_freeze_min_age
;
2367 vac_cost_limit
= (autovacuum_vac_cost_limit
> 0) ?
2368 autovacuum_vac_cost_limit
: VacuumCostLimit
;
2370 vac_cost_delay
= (autovacuum_vac_cost_delay
>= 0) ?
2371 autovacuum_vac_cost_delay
: VacuumCostDelay
;
2373 freeze_min_age
= default_freeze_min_age
;
2376 tab
= palloc(sizeof(autovac_table
));
2377 tab
->at_relid
= relid
;
2378 tab
->at_dovacuum
= dovacuum
;
2379 tab
->at_doanalyze
= doanalyze
;
2380 tab
->at_freeze_min_age
= freeze_min_age
;
2381 tab
->at_vacuum_cost_limit
= vac_cost_limit
;
2382 tab
->at_vacuum_cost_delay
= vac_cost_delay
;
2385 heap_close(avRel
, AccessShareLock
);
2386 if (HeapTupleIsValid(avTup
))
2387 heap_freetuple(avTup
);
2388 heap_freetuple(classTup
);
2394 * relation_needs_vacanalyze
2396 * Check whether a relation needs to be vacuumed or analyzed; return each into
2397 * "dovacuum" and "doanalyze", respectively. avForm and tabentry can be NULL,
2398 * classForm shouldn't.
2400 * A table needs to be vacuumed if the number of dead tuples exceeds a
2401 * threshold. This threshold is calculated as
2403 * threshold = vac_base_thresh + vac_scale_factor * reltuples
2405 * For analyze, the analysis done is that the number of tuples inserted,
2406 * deleted and updated since the last analyze exceeds a threshold calculated
2407 * in the same fashion as above. Note that the collector actually stores
2408 * the number of tuples (both live and dead) that there were as of the last
2409 * analyze. This is asymmetric to the VACUUM case.
2411 * We also force vacuum if the table's relfrozenxid is more than freeze_max_age
2412 * transactions back.
2414 * A table whose pg_autovacuum.enabled value is false, is automatically
2415 * skipped (unless we have to vacuum it due to freeze_max_age). Thus
2416 * autovacuum can be disabled for specific tables. Also, when the stats
2417 * collector does not have data about a table, it will be skipped.
2419 * A table whose vac_base_thresh value is <0 takes the base value from the
2420 * autovacuum_vacuum_threshold GUC variable. Similarly, a vac_scale_factor
2421 * value <0 is substituted with the value of
2422 * autovacuum_vacuum_scale_factor GUC variable. Ditto for analyze.
2425 relation_needs_vacanalyze(Oid relid
,
2426 Form_pg_autovacuum avForm
,
2427 Form_pg_class classForm
,
2428 PgStat_StatTabEntry
*tabentry
,
2429 /* output params below */
2434 float4 reltuples
; /* pg_class.reltuples */
2435 /* constants from pg_autovacuum or GUC variables */
2436 int vac_base_thresh
,
2438 float4 vac_scale_factor
,
2440 /* thresholds calculated from above constants */
2443 /* number of vacuum (resp. analyze) tuples at this time */
2446 /* freeze parameters */
2448 TransactionId xidForceLimit
;
2450 AssertArg(classForm
!= NULL
);
2451 AssertArg(OidIsValid(relid
));
2454 * Determine vacuum/analyze equation parameters. If there is a tuple in
2455 * pg_autovacuum, use it; else, use the GUC defaults. Note that the fields
2456 * may contain "-1" (or indeed any negative value), which means use the GUC
2457 * defaults for each setting.
2461 vac_scale_factor
= (avForm
->vac_scale_factor
>= 0) ?
2462 avForm
->vac_scale_factor
: autovacuum_vac_scale
;
2463 vac_base_thresh
= (avForm
->vac_base_thresh
>= 0) ?
2464 avForm
->vac_base_thresh
: autovacuum_vac_thresh
;
2466 anl_scale_factor
= (avForm
->anl_scale_factor
>= 0) ?
2467 avForm
->anl_scale_factor
: autovacuum_anl_scale
;
2468 anl_base_thresh
= (avForm
->anl_base_thresh
>= 0) ?
2469 avForm
->anl_base_thresh
: autovacuum_anl_thresh
;
2471 freeze_max_age
= (avForm
->freeze_max_age
>= 0) ?
2472 Min(avForm
->freeze_max_age
, autovacuum_freeze_max_age
) :
2473 autovacuum_freeze_max_age
;
2477 vac_scale_factor
= autovacuum_vac_scale
;
2478 vac_base_thresh
= autovacuum_vac_thresh
;
2480 anl_scale_factor
= autovacuum_anl_scale
;
2481 anl_base_thresh
= autovacuum_anl_thresh
;
2483 freeze_max_age
= autovacuum_freeze_max_age
;
2486 /* Force vacuum if table is at risk of wraparound */
2487 xidForceLimit
= recentXid
- freeze_max_age
;
2488 if (xidForceLimit
< FirstNormalTransactionId
)
2489 xidForceLimit
-= FirstNormalTransactionId
;
2490 force_vacuum
= (TransactionIdIsNormal(classForm
->relfrozenxid
) &&
2491 TransactionIdPrecedes(classForm
->relfrozenxid
,
2494 /* User disabled it in pg_autovacuum? (But ignore if at risk) */
2495 if (avForm
&& !avForm
->enabled
&& !force_vacuum
)
2502 if (PointerIsValid(tabentry
))
2504 reltuples
= classForm
->reltuples
;
2505 vactuples
= tabentry
->n_dead_tuples
;
2506 anltuples
= tabentry
->n_live_tuples
+ tabentry
->n_dead_tuples
-
2507 tabentry
->last_anl_tuples
;
2509 vacthresh
= (float4
) vac_base_thresh
+ vac_scale_factor
* reltuples
;
2510 anlthresh
= (float4
) anl_base_thresh
+ anl_scale_factor
* reltuples
;
2513 * Note that we don't need to take special consideration for stat
2514 * reset, because if that happens, the last vacuum and analyze counts
2515 * will be reset too.
2517 elog(DEBUG3
, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
2518 NameStr(classForm
->relname
),
2519 vactuples
, vacthresh
, anltuples
, anlthresh
);
2521 /* Determine if this table needs vacuum or analyze. */
2522 *dovacuum
= force_vacuum
|| (vactuples
> vacthresh
);
2523 *doanalyze
= (anltuples
> anlthresh
);
2528 * Skip a table not found in stat hash, unless we have to force
2529 * vacuum for anti-wrap purposes. If it's not acted upon, there's
2530 * no need to vacuum it.
2532 *dovacuum
= force_vacuum
;
2536 /* ANALYZE refuses to work with pg_statistics */
2537 if (relid
== StatisticRelationId
)
2542 * autovacuum_do_vac_analyze
2543 * Vacuum and/or analyze the specified table
2546 autovacuum_do_vac_analyze(Oid relid
, bool dovacuum
, bool doanalyze
,
2548 BufferAccessStrategy bstrategy
)
2551 MemoryContext old_cxt
;
2553 MemSet(&vacstmt
, 0, sizeof(vacstmt
));
2556 * The list must survive transaction boundaries, so make sure we create it
2557 * in a long-lived context
2559 old_cxt
= MemoryContextSwitchTo(AutovacMemCxt
);
2561 /* Set up command parameters */
2562 vacstmt
.type
= T_VacuumStmt
;
2563 vacstmt
.vacuum
= dovacuum
;
2564 vacstmt
.full
= false;
2565 vacstmt
.analyze
= doanalyze
;
2566 vacstmt
.freeze_min_age
= freeze_min_age
;
2567 vacstmt
.verbose
= false;
2568 vacstmt
.relation
= NULL
; /* not used since we pass a relids list */
2569 vacstmt
.va_cols
= NIL
;
2571 /* Let pgstat know what we're doing */
2572 autovac_report_activity(&vacstmt
, relid
);
2574 vacuum(&vacstmt
, list_make1_oid(relid
), bstrategy
, true);
2575 MemoryContextSwitchTo(old_cxt
);
2579 * autovac_report_activity
2580 * Report to pgstat what autovacuum is doing
2582 * We send a SQL string corresponding to what the user would see if the
2583 * equivalent command was to be issued manually.
2585 * Note we assume that we are going to report the next command as soon as we're
2586 * done with the current one, and exit right after the last one, so we don't
2587 * bother to report "<IDLE>" or some such.
2590 autovac_report_activity(VacuumStmt
*vacstmt
, Oid relid
)
2592 char *relname
= get_rel_name(relid
);
2593 char *nspname
= get_namespace_name(get_rel_namespace(relid
));
2594 #define MAX_AUTOVAC_ACTIV_LEN (NAMEDATALEN * 2 + 32)
2595 char activity
[MAX_AUTOVAC_ACTIV_LEN
];
2597 /* Report the command and possible options */
2598 if (vacstmt
->vacuum
)
2599 snprintf(activity
, MAX_AUTOVAC_ACTIV_LEN
,
2601 vacstmt
->analyze
? " ANALYZE" : "");
2603 snprintf(activity
, MAX_AUTOVAC_ACTIV_LEN
,
2607 * Report the qualified name of the relation.
2609 * Paranoia is appropriate here in case relation was recently dropped
2610 * --- the lsyscache routines we just invoked will return NULL rather
2613 if (relname
&& nspname
)
2615 int len
= strlen(activity
);
2617 snprintf(activity
+ len
, MAX_AUTOVAC_ACTIV_LEN
- len
,
2618 " %s.%s", nspname
, relname
);
2621 /* Set statement_timestamp() to current time for pg_stat_activity */
2622 SetCurrentStatementStartTimestamp();
2624 pgstat_report_activity(activity
);
2628 * AutoVacuumingActive
2629 * Check GUC vars and report whether the autovacuum process should be
2633 AutoVacuumingActive(void)
2635 if (!autovacuum_start_daemon
|| !pgstat_track_counts
)
2642 * This is called at postmaster initialization.
2644 * All we do here is annoy the user if he got it wrong.
2649 if (autovacuum_start_daemon
&& !pgstat_track_counts
)
2651 (errmsg("autovacuum not started because of misconfiguration"),
2652 errhint("Enable the \"track_counts\" option.")));
2656 * IsAutoVacuum functions
2657 * Return whether this is either a launcher autovacuum process or a worker
2661 IsAutoVacuumLauncherProcess(void)
2663 return am_autovacuum_launcher
;
2667 IsAutoVacuumWorkerProcess(void)
2669 return am_autovacuum_worker
;
2674 * AutoVacuumShmemSize
2675 * Compute space needed for autovacuum-related shared memory
2678 AutoVacuumShmemSize(void)
2683 * Need the fixed struct and the array of WorkerInfoData.
2685 size
= sizeof(AutoVacuumShmemStruct
);
2686 size
= MAXALIGN(size
);
2687 size
= add_size(size
, mul_size(autovacuum_max_workers
,
2688 sizeof(WorkerInfoData
)));
2693 * AutoVacuumShmemInit
2694 * Allocate and initialize autovacuum-related shared memory
2697 AutoVacuumShmemInit(void)
2701 AutoVacuumShmem
= (AutoVacuumShmemStruct
*)
2702 ShmemInitStruct("AutoVacuum Data",
2703 AutoVacuumShmemSize(),
2705 if (AutoVacuumShmem
== NULL
)
2707 (errcode(ERRCODE_OUT_OF_MEMORY
),
2708 errmsg("not enough shared memory for autovacuum")));
2710 if (!IsUnderPostmaster
)
2717 AutoVacuumShmem
->av_launcherpid
= 0;
2718 AutoVacuumShmem
->av_freeWorkers
= INVALID_OFFSET
;
2719 SHMQueueInit(&AutoVacuumShmem
->av_runningWorkers
);
2720 AutoVacuumShmem
->av_startingWorker
= INVALID_OFFSET
;
2722 worker
= (WorkerInfo
) ((char *) AutoVacuumShmem
+
2723 MAXALIGN(sizeof(AutoVacuumShmemStruct
)));
2725 /* initialize the WorkerInfo free list */
2726 for (i
= 0; i
< autovacuum_max_workers
; i
++)
2728 worker
[i
].wi_links
.next
= AutoVacuumShmem
->av_freeWorkers
;
2729 AutoVacuumShmem
->av_freeWorkers
= MAKE_OFFSET(&worker
[i
]);
2737 * autovac_refresh_stats
2738 * Refresh pgstats data for an autovacuum process
2740 * Cause the next pgstats read operation to obtain fresh data, but throttle
2741 * such refreshing in the autovacuum launcher. This is mostly to avoid
2742 * rereading the pgstats files too many times in quick succession when there
2743 * are many databases.
2745 * Note: we avoid throttling in the autovac worker, as it would be
2746 * counterproductive in the recheck logic.
2749 autovac_refresh_stats(void)
2751 if (IsAutoVacuumLauncherProcess())
2753 static TimestampTz last_read
= 0;
2754 TimestampTz current_time
;
2756 current_time
= GetCurrentTimestamp();
2758 if (!TimestampDifferenceExceeds(last_read
, current_time
,
2762 last_read
= current_time
;
2765 pgstat_clear_snapshot();