Add comment about GNUWin32's cp not having the file system problem.
[PostgreSQL.git] / contrib / pg_standby / pg_standby.c
blob1d4e291d3fdfb68df9b7f1adbee1b7f8ebd0e62e
1 /*
2 * $PostgreSQL$
5 * pg_standby.c
7 * Production-ready example of how to create a Warm Standby
8 * database server using continuous archiving as a
9 * replication mechanism
11 * We separate the parameters for archive and nextWALfile
12 * so that we can check the archive exists, even if the
13 * WAL file doesn't (yet).
15 * This program will be executed once in full for each file
16 * requested by the warm standby server.
18 * It is designed to cater to a variety of needs, as well
19 * providing a customizable section.
21 * Original author: Simon Riggs simon@2ndquadrant.com
22 * Current maintainer: Simon Riggs
24 #include "postgres_fe.h"
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <sys/stat.h>
29 #include <signal.h>
31 #ifdef WIN32
32 int getopt(int argc, char *const argv[], const char *optstring);
33 #else
34 #include <sys/time.h>
35 #include <unistd.h>
37 #ifdef HAVE_GETOPT_H
38 #include <getopt.h>
39 #endif
40 #endif /* ! WIN32 */
42 extern char *optarg;
43 extern int optind;
45 /* Options and defaults */
46 int sleeptime = 5; /* amount of time to sleep between file checks */
47 int waittime = -1; /* how long we have been waiting, -1 no wait
48 * yet */
49 int maxwaittime = 0; /* how long are we prepared to wait for? */
50 int keepfiles = 0; /* number of WAL files to keep, 0 keep all */
51 int maxretries = 3; /* number of retries on restore command */
52 bool debug = false; /* are we debugging? */
53 bool triggered = false; /* have we been triggered? */
54 bool need_cleanup = false; /* do we need to remove files from
55 * archive? */
57 static volatile sig_atomic_t signaled = false;
59 char *archiveLocation; /* where to find the archive? */
60 char *triggerPath; /* where to find the trigger file? */
61 char *xlogFilePath; /* where we are going to restore to */
62 char *nextWALFileName; /* the file we need to get from archive */
63 char *restartWALFileName; /* the file from which we can restart restore */
64 char *priorWALFileName; /* the file we need to get from archive */
65 char WALFilePath[MAXPGPATH]; /* the file path including archive */
66 char restoreCommand[MAXPGPATH]; /* run this to restore */
67 char exclusiveCleanupFileName[MAXPGPATH]; /* the file we need to
68 * get from archive */
70 #define RESTORE_COMMAND_COPY 0
71 #define RESTORE_COMMAND_LINK 1
72 int restoreCommandType;
74 #define XLOG_DATA 0
75 #define XLOG_HISTORY 1
76 #define XLOG_BACKUP_LABEL 2
77 int nextWALFileType;
79 #define SET_RESTORE_COMMAND(cmd, arg1, arg2) \
80 snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2)
82 struct stat stat_buf;
84 /* =====================================================================
86 * Customizable section
88 * =====================================================================
90 * Currently, this section assumes that the Archive is a locally
91 * accessible directory. If you want to make other assumptions,
92 * such as using a vendor-specific archive and access API, these
93 * routines are the ones you'll need to change. You're
94 * enouraged to submit any changes to pgsql-patches@postgresql.org
95 * or personally to the current maintainer. Those changes may be
96 * folded in to later versions of this program.
99 #define XLOG_DATA_FNAME_LEN 24
100 /* Reworked from access/xlog_internal.h */
101 #define XLogFileName(fname, tli, log, seg) \
102 snprintf(fname, XLOG_DATA_FNAME_LEN + 1, "%08X%08X%08X", tli, log, seg)
105 * Initialize allows customized commands into the warm standby program.
107 * As an example, and probably the common case, we use either
108 * cp/ln commands on *nix, or copy/move command on Windows.
111 static void
112 CustomizableInitialize(void)
114 #ifdef WIN32
115 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
116 switch (restoreCommandType)
118 case RESTORE_COMMAND_LINK:
119 SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
120 break;
121 case RESTORE_COMMAND_COPY:
122 default:
123 SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
124 break;
126 #else
127 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
128 switch (restoreCommandType)
130 case RESTORE_COMMAND_LINK:
131 #if HAVE_WORKING_LINK
132 SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
133 break;
134 #endif
135 case RESTORE_COMMAND_COPY:
136 default:
137 SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath);
138 break;
140 #endif
143 * This code assumes that archiveLocation is a directory You may wish to
144 * add code to check for tape libraries, etc.. So, since it is a
145 * directory, we use stat to test if its accessible
147 if (stat(archiveLocation, &stat_buf) != 0)
149 fprintf(stderr, "pg_standby: archiveLocation \"%s\" does not exist\n", archiveLocation);
150 fflush(stderr);
151 exit(2);
156 * CustomizableNextWALFileReady()
158 * Is the requested file ready yet?
160 static bool
161 CustomizableNextWALFileReady()
163 if (stat(WALFilePath, &stat_buf) == 0)
166 * If its a backup file, return immediately If its a regular file
167 * return only if its the right size already
169 if (strlen(nextWALFileName) > 24 &&
170 strspn(nextWALFileName, "0123456789ABCDEF") == 24 &&
171 strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".backup"),
172 ".backup") == 0)
174 nextWALFileType = XLOG_BACKUP_LABEL;
175 return true;
177 else if (stat_buf.st_size == XLOG_SEG_SIZE)
179 #ifdef WIN32
182 * Windows reports that the file has the right number of bytes
183 * even though the file is still being copied and cannot be opened
184 * by pg_standby yet. So we wait for sleeptime secs before
185 * attempting to restore. If that is not enough, we will rely on
186 * the retry/holdoff mechanism. GNUWin32's cp does not have
187 * this problem.
189 pg_usleep(sleeptime * 1000000L);
190 #endif
191 nextWALFileType = XLOG_DATA;
192 return true;
196 * If still too small, wait until it is the correct size
198 if (stat_buf.st_size > XLOG_SEG_SIZE)
200 if (debug)
202 fprintf(stderr, "file size greater than expected\n");
203 fflush(stderr);
205 exit(3);
209 return false;
212 #define MaxSegmentsPerLogFile ( 0xFFFFFFFF / XLOG_SEG_SIZE )
214 static void
215 CustomizableCleanupPriorWALFiles(void)
218 * Work out name of prior file from current filename
220 if (nextWALFileType == XLOG_DATA)
222 int rc;
223 DIR *xldir;
224 struct dirent *xlde;
227 * Assume its OK to keep failing. The failure situation may change
228 * over time, so we'd rather keep going on the main processing than
229 * fail because we couldnt clean up yet.
231 if ((xldir = opendir(archiveLocation)) != NULL)
233 while ((xlde = readdir(xldir)) != NULL)
236 * We ignore the timeline part of the XLOG segment identifiers
237 * in deciding whether a segment is still needed. This
238 * ensures that we won't prematurely remove a segment from a
239 * parent timeline. We could probably be a little more
240 * proactive about removing segments of non-parent timelines,
241 * but that would be a whole lot more complicated.
243 * We use the alphanumeric sorting property of the filenames
244 * to decide which ones are earlier than the
245 * exclusiveCleanupFileName file. Note that this means files
246 * are not removed in the order they were originally written,
247 * in case this worries you.
249 if (strlen(xlde->d_name) == XLOG_DATA_FNAME_LEN &&
250 strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_DATA_FNAME_LEN &&
251 strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0)
253 #ifdef WIN32
254 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, xlde->d_name);
255 #else
256 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, xlde->d_name);
257 #endif
259 if (debug)
260 fprintf(stderr, "\nremoving \"%s\"", WALFilePath);
262 rc = unlink(WALFilePath);
263 if (rc != 0)
265 fprintf(stderr, "\npg_standby: ERROR failed to remove \"%s\": %s",
266 WALFilePath, strerror(errno));
267 break;
271 if (debug)
272 fprintf(stderr, "\n");
274 else
275 fprintf(stderr, "pg_standby: archiveLocation \"%s\" open error\n", archiveLocation);
277 closedir(xldir);
278 fflush(stderr);
282 /* =====================================================================
283 * End of Customizable section
284 * =====================================================================
288 * SetWALFileNameForCleanup()
290 * Set the earliest WAL filename that we want to keep on the archive
291 * and decide whether we need_cleanup
293 static bool
294 SetWALFileNameForCleanup(void)
296 uint32 tli = 1,
297 log = 0,
298 seg = 0;
299 uint32 log_diff = 0,
300 seg_diff = 0;
301 bool cleanup = false;
303 if (restartWALFileName)
306 * Don't do cleanup if the restartWALFileName provided
307 * is later than the xlog file requested. This is an error
308 * and we must not remove these files from archive.
309 * This shouldn't happen, but better safe than sorry.
311 if (strcmp(restartWALFileName, nextWALFileName) > 0)
312 return false;
314 strcpy(exclusiveCleanupFileName, restartWALFileName);
315 return true;
318 if (keepfiles > 0)
320 sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
321 if (tli > 0 && log >= 0 && seg > 0)
323 log_diff = keepfiles / MaxSegmentsPerLogFile;
324 seg_diff = keepfiles % MaxSegmentsPerLogFile;
325 if (seg_diff > seg)
327 log_diff++;
328 seg = MaxSegmentsPerLogFile - (seg_diff - seg);
330 else
331 seg -= seg_diff;
333 if (log >= log_diff)
335 log -= log_diff;
336 cleanup = true;
338 else
340 log = 0;
341 seg = 0;
346 XLogFileName(exclusiveCleanupFileName, tli, log, seg);
348 return cleanup;
352 * CheckForExternalTrigger()
354 * Is there a trigger file?
356 static bool
357 CheckForExternalTrigger(void)
359 int rc;
362 * Look for a trigger file, if that option has been selected
364 * We use stat() here because triggerPath is always a file rather than
365 * potentially being in an archive
367 if (triggerPath && stat(triggerPath, &stat_buf) == 0)
369 fprintf(stderr, "trigger file found\n");
370 fflush(stderr);
373 * If trigger file found, we *must* delete it. Here's why: When
374 * recovery completes, we will be asked again for the same file from
375 * the archive using pg_standby so must remove trigger file so we can
376 * reload file again and come up correctly.
378 rc = unlink(triggerPath);
379 if (rc != 0)
381 fprintf(stderr, "\n ERROR: could not remove \"%s\": %s", triggerPath, strerror(errno));
382 fflush(stderr);
383 exit(rc);
385 return true;
388 return false;
392 * RestoreWALFileForRecovery()
394 * Perform the action required to restore the file from archive
396 static bool
397 RestoreWALFileForRecovery(void)
399 int rc = 0;
400 int numretries = 0;
402 if (debug)
404 fprintf(stderr, "\nrunning restore :");
405 fflush(stderr);
408 while (numretries < maxretries)
410 rc = system(restoreCommand);
411 if (rc == 0)
413 if (debug)
415 fprintf(stderr, " OK");
416 fflush(stderr);
418 return true;
420 pg_usleep(numretries++ * sleeptime * 1000000L);
424 * Allow caller to add additional info
426 if (debug)
427 fprintf(stderr, "not restored : ");
428 return false;
431 static void
432 usage(void)
434 fprintf(stderr, "\npg_standby allows Warm Standby servers to be configured\n");
435 fprintf(stderr, "Usage:\n");
436 fprintf(stderr, " pg_standby [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n");
437 fprintf(stderr, " note space between ARCHIVELOCATION and NEXTWALFILE\n");
438 fprintf(stderr, "with main intended use as a restore_command in the recovery.conf\n");
439 fprintf(stderr, " restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n");
440 fprintf(stderr, "e.g. restore_command = 'pg_standby -l /mnt/server/archiverdir %%f %%p %%r'\n");
441 fprintf(stderr, "\nOptions:\n");
442 fprintf(stderr, " -c copies file from archive (default)\n");
443 fprintf(stderr, " -d generate lots of debugging output (testing only)\n");
444 fprintf(stderr, " -k NUMFILESTOKEEP if RESTARTWALFILE not used, removes files prior to limit (0 keeps all)\n");
445 fprintf(stderr, " -l links into archive (leaves file in archive)\n");
446 fprintf(stderr, " -r MAXRETRIES max number of times to retry, with progressive wait (default=3)\n");
447 fprintf(stderr, " -s SLEEPTIME seconds to wait between file checks (min=1, max=60, default=5)\n");
448 fprintf(stderr, " -t TRIGGERFILE defines a trigger file to initiate failover (no default)\n");
449 fprintf(stderr, " -w MAXWAITTIME max seconds to wait for a file (0=no limit)(default=0)\n");
450 fflush(stderr);
453 static void
454 sighandler(int sig)
456 signaled = true;
459 /*------------ MAIN ----------------------------------------*/
461 main(int argc, char **argv)
463 int c;
465 (void) signal(SIGINT, sighandler);
466 (void) signal(SIGQUIT, sighandler);
468 while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
470 switch (c)
472 case 'c': /* Use copy */
473 restoreCommandType = RESTORE_COMMAND_COPY;
474 break;
475 case 'd': /* Debug mode */
476 debug = true;
477 break;
478 case 'k': /* keepfiles */
479 keepfiles = atoi(optarg);
480 if (keepfiles < 0)
482 fprintf(stderr, "usage: pg_standby -k keepfiles must be >= 0\n");
483 usage();
484 exit(2);
486 break;
487 case 'l': /* Use link */
488 restoreCommandType = RESTORE_COMMAND_LINK;
489 break;
490 case 'r': /* Retries */
491 maxretries = atoi(optarg);
492 if (maxretries < 0)
494 fprintf(stderr, "usage: pg_standby -r maxretries must be >= 0\n");
495 usage();
496 exit(2);
498 break;
499 case 's': /* Sleep time */
500 sleeptime = atoi(optarg);
501 if (sleeptime <= 0 || sleeptime > 60)
503 fprintf(stderr, "usage: pg_standby -s sleeptime incorrectly set\n");
504 usage();
505 exit(2);
507 break;
508 case 't': /* Trigger file */
509 triggerPath = optarg;
510 if (CheckForExternalTrigger())
511 exit(1); /* Normal exit, with non-zero */
512 break;
513 case 'w': /* Max wait time */
514 maxwaittime = atoi(optarg);
515 if (maxwaittime < 0)
517 fprintf(stderr, "usage: pg_standby -w maxwaittime incorrectly set\n");
518 usage();
519 exit(2);
521 break;
522 default:
523 usage();
524 exit(2);
525 break;
530 * Parameter checking - after checking to see if trigger file present
532 if (argc == 1)
534 usage();
535 exit(2);
539 * We will go to the archiveLocation to get nextWALFileName.
540 * nextWALFileName may not exist yet, which would not be an error, so we
541 * separate the archiveLocation and nextWALFileName so we can check
542 * separately whether archiveLocation exists, if not that is an error
544 if (optind < argc)
546 archiveLocation = argv[optind];
547 optind++;
549 else
551 fprintf(stderr, "pg_standby: must specify archiveLocation\n");
552 usage();
553 exit(2);
556 if (optind < argc)
558 nextWALFileName = argv[optind];
559 optind++;
561 else
563 fprintf(stderr, "pg_standby: use %%f to specify nextWALFileName\n");
564 usage();
565 exit(2);
568 if (optind < argc)
570 xlogFilePath = argv[optind];
571 optind++;
573 else
575 fprintf(stderr, "pg_standby: use %%p to specify xlogFilePath\n");
576 usage();
577 exit(2);
580 if (optind < argc)
582 restartWALFileName = argv[optind];
583 optind++;
586 CustomizableInitialize();
588 need_cleanup = SetWALFileNameForCleanup();
590 if (debug)
592 fprintf(stderr, "\nTrigger file : %s", triggerPath ? triggerPath : "<not set>");
593 fprintf(stderr, "\nWaiting for WAL file : %s", nextWALFileName);
594 fprintf(stderr, "\nWAL file path : %s", WALFilePath);
595 fprintf(stderr, "\nRestoring to... : %s", xlogFilePath);
596 fprintf(stderr, "\nSleep interval : %d second%s",
597 sleeptime, (sleeptime > 1 ? "s" : " "));
598 fprintf(stderr, "\nMax wait interval : %d %s",
599 maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
600 fprintf(stderr, "\nCommand for restore : %s", restoreCommand);
601 fprintf(stderr, "\nKeep archive history : ");
602 if (need_cleanup)
603 fprintf(stderr, "%s and later", exclusiveCleanupFileName);
604 else
605 fprintf(stderr, "No cleanup required");
606 fflush(stderr);
610 * Check for initial history file: always the first file to be requested
611 * It's OK if the file isn't there - all other files need to wait
613 if (strlen(nextWALFileName) > 8 &&
614 strspn(nextWALFileName, "0123456789ABCDEF") == 8 &&
615 strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".history"),
616 ".history") == 0)
618 nextWALFileType = XLOG_HISTORY;
619 if (RestoreWALFileForRecovery())
620 exit(0);
621 else
623 if (debug)
625 fprintf(stderr, "history file not found\n");
626 fflush(stderr);
628 exit(1);
633 * Main wait loop
635 while (!CustomizableNextWALFileReady() && !triggered)
637 if (sleeptime <= 60)
638 pg_usleep(sleeptime * 1000000L);
640 if (signaled)
642 triggered = true;
643 if (debug)
645 fprintf(stderr, "\nsignaled to exit\n");
646 fflush(stderr);
649 else
652 if (debug)
654 fprintf(stderr, "\nWAL file not present yet.");
655 if (triggerPath)
656 fprintf(stderr, " Checking for trigger file...");
657 fflush(stderr);
660 waittime += sleeptime;
662 if (!triggered && (CheckForExternalTrigger() || (waittime >= maxwaittime && maxwaittime > 0)))
664 triggered = true;
665 if (debug && waittime >= maxwaittime && maxwaittime > 0)
666 fprintf(stderr, "\nTimed out after %d seconds\n", waittime);
672 * Action on exit
674 if (triggered)
675 exit(1); /* Normal exit, with non-zero */
678 * Once we have restored this file successfully we can remove some prior
679 * WAL files. If this restore fails we musn't remove any file because some
680 * of them will be requested again immediately after the failed restore,
681 * or when we restart recovery.
683 if (RestoreWALFileForRecovery() && need_cleanup)
684 CustomizableCleanupPriorWALFiles();
686 return 0;