1 /*-------------------------------------------------------------------------
5 * PostgreSQL WAL archiver
7 * All functions relating to archiver are included here
9 * - All functions executed by archiver process
11 * - archiver is forked from postmaster, and the two
12 * processes then communicate using signals. All functions
13 * executed by postmaster are included in this file.
15 * Initial author: Simon Riggs simon@2ndquadrant.com
17 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
24 *-------------------------------------------------------------------------
35 #include "access/xlog_internal.h"
36 #include "libpq/pqsignal.h"
37 #include "miscadmin.h"
38 #include "postmaster/fork_process.h"
39 #include "postmaster/pgarch.h"
40 #include "postmaster/postmaster.h"
41 #include "storage/fd.h"
42 #include "storage/ipc.h"
43 #include "storage/pg_shmem.h"
44 #include "storage/pmsignal.h"
45 #include "utils/guc.h"
46 #include "utils/ps_status.h"
53 #define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of the
54 * archive status directory; in
56 #define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to restart a
57 * failed archiver; in seconds. */
60 * Archiver control info.
62 * We expect that archivable files within pg_xlog will have names between
63 * MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters
64 * appearing in VALID_XFN_CHARS. The status files in archive_status have
65 * corresponding names with ".ready" or ".done" appended.
68 #define MIN_XFN_CHARS 16
69 #define MAX_XFN_CHARS 40
70 #define VALID_XFN_CHARS "0123456789ABCDEF.history.backup"
72 #define NUM_ARCHIVE_RETRIES 3
79 static time_t last_pgarch_start_time
;
82 * Flags set by interrupt handlers for later service in the main loop.
84 static volatile sig_atomic_t got_SIGHUP
= false;
85 static volatile sig_atomic_t wakened
= false;
88 * Local function forward declarations
92 static pid_t
pgarch_forkexec(void);
95 NON_EXEC_STATIC
void PgArchiverMain(int argc
, char *argv
[]);
96 static void pgarch_exit(SIGNAL_ARGS
);
97 static void ArchSigHupHandler(SIGNAL_ARGS
);
98 static void pgarch_waken(SIGNAL_ARGS
);
99 static void pgarch_MainLoop(void);
100 static void pgarch_ArchiverCopyLoop(void);
101 static bool pgarch_archiveXlog(char *xlog
);
102 static bool pgarch_readyXlog(char *xlog
);
103 static void pgarch_archiveDone(char *xlog
);
106 /* ------------------------------------------------------------
107 * Public functions called from postmaster follow
108 * ------------------------------------------------------------
114 * Called from postmaster at startup or after an existing archiver
115 * died. Attempt to fire up a fresh archiver process.
117 * Returns PID of child process, or 0 if fail.
119 * Note: if fail, we will be called again from the postmaster main loop.
128 * Do nothing if no archiver needed
130 if (!XLogArchivingActive())
134 * Do nothing if too soon since last archiver start. This is a safety
135 * valve to protect against continuous respawn attempts if the archiver is
136 * dying immediately at launch. Note that since we will be re-called from
137 * the postmaster main loop, we will get another chance later.
139 curtime
= time(NULL
);
140 if ((unsigned int) (curtime
- last_pgarch_start_time
) <
141 (unsigned int) PGARCH_RESTART_INTERVAL
)
143 last_pgarch_start_time
= curtime
;
146 switch ((pgArchPid
= pgarch_forkexec()))
148 switch ((pgArchPid
= fork_process()))
153 (errmsg("could not fork archiver: %m")));
158 /* in postmaster child ... */
159 /* Close the postmaster's sockets */
160 ClosePostmasterPorts(false);
162 /* Lose the postmaster's on-exit routines */
165 /* Drop our connection to postmaster's shared memory, as well */
166 PGSharedMemoryDetach();
168 PgArchiverMain(0, NULL
);
173 return (int) pgArchPid
;
176 /* shouldn't get here */
180 /* ------------------------------------------------------------
181 * Local functions called by archiver follow
182 * ------------------------------------------------------------
189 * pgarch_forkexec() -
191 * Format up the arglist for, then fork and exec, archive process
194 pgarch_forkexec(void)
199 av
[ac
++] = "postgres";
201 av
[ac
++] = "--forkarch";
203 av
[ac
++] = NULL
; /* filled in by postmaster_forkexec */
206 Assert(ac
< lengthof(av
));
208 return postmaster_forkexec(ac
, av
);
210 #endif /* EXEC_BACKEND */
216 * The argc/argv parameters are valid only in EXEC_BACKEND case. However,
217 * since we don't use 'em, it hardly matters...
220 PgArchiverMain(int argc
, char *argv
[])
222 IsUnderPostmaster
= true; /* we are a postmaster subprocess now */
224 MyProcPid
= getpid(); /* reset MyProcPid */
226 MyStartTime
= time(NULL
); /* record Start Time for logging */
229 * If possible, make this process a group leader, so that the postmaster
230 * can signal any child processes too.
234 elog(FATAL
, "setsid() failed: %m");
238 * Ignore all signals usually bound to some action in the postmaster,
239 * except for SIGHUP, SIGUSR1 and SIGQUIT.
241 pqsignal(SIGHUP
, ArchSigHupHandler
);
242 pqsignal(SIGINT
, SIG_IGN
);
243 pqsignal(SIGTERM
, SIG_IGN
);
244 pqsignal(SIGQUIT
, pgarch_exit
);
245 pqsignal(SIGALRM
, SIG_IGN
);
246 pqsignal(SIGPIPE
, SIG_IGN
);
247 pqsignal(SIGUSR1
, pgarch_waken
);
248 pqsignal(SIGUSR2
, SIG_IGN
);
249 pqsignal(SIGCHLD
, SIG_DFL
);
250 pqsignal(SIGTTIN
, SIG_DFL
);
251 pqsignal(SIGTTOU
, SIG_DFL
);
252 pqsignal(SIGCONT
, SIG_DFL
);
253 pqsignal(SIGWINCH
, SIG_DFL
);
254 PG_SETMASK(&UnBlockSig
);
257 * Identify myself via ps
259 init_ps_display("archiver process", "", "", "");
266 /* SIGQUIT signal handler for archiver process */
268 pgarch_exit(SIGNAL_ARGS
)
271 * For now, we just nail the doors shut and get out of town. It might
272 * seem cleaner to finish up any pending archive copies, but there's a
273 * nontrivial risk that init will kill us partway through.
278 /* SIGHUP: set flag to re-read config file at next convenient time */
280 ArchSigHupHandler(SIGNAL_ARGS
)
285 /* SIGUSR1 signal handler for archiver process */
287 pgarch_waken(SIGNAL_ARGS
)
295 * Main loop for archiver
298 pgarch_MainLoop(void)
300 time_t last_copy_time
= 0;
303 * We run the copy loop immediately upon entry, in case there are
304 * unarchived files left over from a previous database run (or maybe the
305 * archiver died unexpectedly). After that we wait for a signal or
306 * timeout before doing more.
312 /* Check for config update */
316 ProcessConfigFile(PGC_SIGHUP
);
319 /* Do what we're here for */
323 pgarch_ArchiverCopyLoop();
324 last_copy_time
= time(NULL
);
328 * There shouldn't be anything for the archiver to do except to wait
329 * for a signal ... however, the archiver exists to protect our data,
330 * so she wakes up occasionally to allow herself to be proactive.
332 * On some platforms, signals won't interrupt the sleep. To ensure we
333 * respond reasonably promptly when someone signals us, break down the
334 * sleep into 1-second increments, and check for interrupts after each
337 while (!(wakened
|| got_SIGHUP
))
342 curtime
= time(NULL
);
343 if ((unsigned int) (curtime
- last_copy_time
) >=
344 (unsigned int) PGARCH_AUTOWAKE_INTERVAL
)
347 } while (PostmasterIsAlive(true));
351 * pgarch_ArchiverCopyLoop
353 * Archives all outstanding xlogs then returns
356 pgarch_ArchiverCopyLoop(void)
358 char xlog
[MAX_XFN_CHARS
+ 1];
360 if (!XLogArchiveCommandSet())
363 (errmsg("archive_mode enabled, yet archive_command is not set")));
364 /* can't do anything if no command ... */
369 * loop through all xlogs with archive_status of .ready and archive
370 * them...mostly we expect this to be a single file, though it is possible
371 * some backend will add files onto the list of those that need archiving
372 * while we are still copying earlier archives
374 while (pgarch_readyXlog(xlog
))
380 /* Abandon processing if we notice our postmaster has died */
381 if (!PostmasterIsAlive(true))
384 if (pgarch_archiveXlog(xlog
))
387 pgarch_archiveDone(xlog
);
388 break; /* out of inner retry loop */
392 if (++failures
>= NUM_ARCHIVE_RETRIES
)
395 (errmsg("transaction log file \"%s\" could not be archived: too many failures",
397 return; /* give up archiving for now */
399 pg_usleep(1000000L); /* wait a bit before retrying */
408 * Invokes system(3) to copy one archive file to wherever it should go
410 * Returns true if successful
413 pgarch_archiveXlog(char *xlog
)
415 char xlogarchcmd
[MAXPGPATH
];
416 char pathname
[MAXPGPATH
];
422 snprintf(pathname
, MAXPGPATH
, XLOGDIR
"/%s", xlog
);
425 * construct the command to be executed
428 endp
= xlogarchcmd
+ MAXPGPATH
- 1;
431 for (sp
= XLogArchiveCommand
; *sp
; sp
++)
438 /* %p: relative path of source file */
440 strlcpy(dp
, pathname
, endp
- dp
);
441 make_native_path(dp
);
445 /* %f: filename of source file */
447 strlcpy(dp
, xlog
, endp
- dp
);
451 /* convert %% to a single % */
457 /* otherwise treat the % as not special */
472 (errmsg_internal("executing archive command \"%s\"",
474 rc
= system(xlogarchcmd
);
478 * If either the shell itself, or a called command, died on a signal,
479 * abort the archiver. We do this because system() ignores SIGINT and
480 * SIGQUIT while waiting; so a signal is very likely something that
481 * should have interrupted us too. If we overreact it's no big deal,
482 * the postmaster will just start the archiver again.
484 * Per the Single Unix Spec, shells report exit status > 128 when
485 * a called command died on a signal.
487 bool signaled
= WIFSIGNALED(rc
) || WEXITSTATUS(rc
) > 128;
489 ereport(signaled
? FATAL
: LOG
,
490 (errmsg("archive command \"%s\" failed: return code %d",
496 (errmsg("archived transaction log file \"%s\"", xlog
)));
504 * Return name of the oldest xlog file that has not yet been archived.
505 * No notification is set that file archiving is now in progress, so
506 * this would need to be extended if multiple concurrent archival
507 * tasks were created. If a failure occurs, we will completely
508 * re-copy the file at the next available opportunity.
510 * It is important that we return the oldest, so that we archive xlogs
511 * in order that they were written, for two reasons:
512 * 1) to maintain the sequential chain of xlogs required for recovery
513 * 2) because the oldest ones will sooner become candidates for
514 * recycling at time of checkpoint
516 * NOTE: the "oldest" comparison will presently consider all segments of
517 * a timeline with a smaller ID to be older than all segments of a timeline
518 * with a larger ID; the net result being that past timelines are given
519 * higher priority for archiving. This seems okay, or at least not
520 * obviously worth changing.
523 pgarch_readyXlog(char *xlog
)
526 * open xlog status directory and read through list of xlogs that have the
527 * .ready suffix, looking for earliest file. It is possible to optimise
528 * this code, though only a single file is expected on the vast majority
531 char XLogArchiveStatusDir
[MAXPGPATH
];
532 char newxlog
[MAX_XFN_CHARS
+ 6 + 1];
537 snprintf(XLogArchiveStatusDir
, MAXPGPATH
, XLOGDIR
"/archive_status");
538 rldir
= AllocateDir(XLogArchiveStatusDir
);
541 (errcode_for_file_access(),
542 errmsg("could not open archive status directory \"%s\": %m",
543 XLogArchiveStatusDir
)));
545 while ((rlde
= ReadDir(rldir
, XLogArchiveStatusDir
)) != NULL
)
547 int basenamelen
= (int) strlen(rlde
->d_name
) - 6;
549 if (basenamelen
>= MIN_XFN_CHARS
&&
550 basenamelen
<= MAX_XFN_CHARS
&&
551 strspn(rlde
->d_name
, VALID_XFN_CHARS
) >= basenamelen
&&
552 strcmp(rlde
->d_name
+ basenamelen
, ".ready") == 0)
556 strcpy(newxlog
, rlde
->d_name
);
561 if (strcmp(rlde
->d_name
, newxlog
) < 0)
562 strcpy(newxlog
, rlde
->d_name
);
570 /* truncate off the .ready */
571 newxlog
[strlen(newxlog
) - 6] = '\0';
572 strcpy(xlog
, newxlog
);
580 * Emit notification that an xlog file has been successfully archived.
581 * We do this by renaming the status file from NNN.ready to NNN.done.
582 * Eventually, a checkpoint process will notice this and delete both the
583 * NNN.done file and the xlog file itself.
586 pgarch_archiveDone(char *xlog
)
588 char rlogready
[MAXPGPATH
];
589 char rlogdone
[MAXPGPATH
];
591 StatusFilePath(rlogready
, xlog
, ".ready");
592 StatusFilePath(rlogdone
, xlog
, ".done");
593 if (rename(rlogready
, rlogdone
) < 0)
595 (errcode_for_file_access(),
596 errmsg("could not rename file \"%s\" to \"%s\": %m",
597 rlogready
, rlogdone
)));