Improve comment about GetWALAvailability's WALAVAIL_REMOVED code.
[pgsql.git] / src / backend / access / transam / xlogarchive.c
blob4b89addf9762d2d9b39de80909e297f4a6a8c572
1 /*-------------------------------------------------------------------------
3 * xlogarchive.c
4 * Functions for archiving WAL files and restoring from the archive.
7 * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/access/transam/xlogarchive.c
12 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include <sys/stat.h>
18 #include <sys/wait.h>
19 #include <signal.h>
20 #include <unistd.h>
22 #include "access/xlog.h"
23 #include "access/xlog_internal.h"
24 #include "access/xlogarchive.h"
25 #include "miscadmin.h"
26 #include "pgstat.h"
27 #include "postmaster/startup.h"
28 #include "postmaster/pgarch.h"
29 #include "replication/walsender.h"
30 #include "storage/fd.h"
31 #include "storage/ipc.h"
32 #include "storage/lwlock.h"
35 * Attempt to retrieve the specified file from off-line archival storage.
36 * If successful, fill "path" with its complete path (note that this will be
37 * a temp file name that doesn't follow the normal naming convention), and
38 * return true.
40 * If not successful, fill "path" with the name of the normal on-line file
41 * (which may or may not actually exist, but we'll try to use it), and return
42 * false.
44 * For fixed-size files, the caller may pass the expected size as an
45 * additional crosscheck on successful recovery. If the file size is not
46 * known, set expectedSize = 0.
48 * When 'cleanupEnabled' is false, refrain from deleting any old WAL segments
49 * in the archive. This is used when fetching the initial checkpoint record,
50 * when we are not yet sure how far back we need the WAL.
52 bool
53 RestoreArchivedFile(char *path, const char *xlogfname,
54 const char *recovername, off_t expectedSize,
55 bool cleanupEnabled)
57 char xlogpath[MAXPGPATH];
58 char lastRestartPointFname[MAXPGPATH];
59 bool ret;
60 struct stat stat_buf;
61 XLogSegNo restartSegNo;
62 XLogRecPtr restartRedoPtr;
63 TimeLineID restartTli;
66 * Ignore restore_command when not in archive recovery (meaning we are in
67 * crash recovery).
69 if (!ArchiveRecoveryRequested)
70 goto not_available;
72 /* In standby mode, restore_command might not be supplied */
73 if (recoveryRestoreCommand == NULL || strcmp(recoveryRestoreCommand, "") == 0)
74 goto not_available;
77 * When doing archive recovery, we always prefer an archived log file even
78 * if a file of the same name exists in XLOGDIR. The reason is that the
79 * file in XLOGDIR could be an old, un-filled or partly-filled version
80 * that was copied and restored as part of backing up $PGDATA.
82 * We could try to optimize this slightly by checking the local copy
83 * lastchange timestamp against the archived copy, but we have no API to
84 * do this, nor can we guarantee that the lastchange timestamp was
85 * preserved correctly when we copied to archive. Our aim is robustness,
86 * so we elect not to do this.
88 * If we cannot obtain the log file from the archive, however, we will try
89 * to use the XLOGDIR file if it exists. This is so that we can make use
90 * of log segments that weren't yet transferred to the archive.
92 * Notice that we don't actually overwrite any files when we copy back
93 * from archive because the restore_command may inadvertently restore
94 * inappropriate xlogs, or they may be corrupt, so we may wish to fallback
95 * to the segments remaining in current XLOGDIR later. The
96 * copy-from-archive filename is always the same, ensuring that we don't
97 * run out of disk space on long recoveries.
99 snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
102 * Make sure there is no existing file named recovername.
104 if (stat(xlogpath, &stat_buf) != 0)
106 if (errno != ENOENT)
107 ereport(FATAL,
108 (errcode_for_file_access(),
109 errmsg("could not stat file \"%s\": %m",
110 xlogpath)));
112 else
114 if (unlink(xlogpath) != 0)
115 ereport(FATAL,
116 (errcode_for_file_access(),
117 errmsg("could not remove file \"%s\": %m",
118 xlogpath)));
122 * Calculate the archive file cutoff point for use during log shipping
123 * replication. All files earlier than this point can be deleted from the
124 * archive, though there is no requirement to do so.
126 * If cleanup is not enabled, initialise this with the filename of
127 * InvalidXLogRecPtr, which will prevent the deletion of any WAL files
128 * from the archive because of the alphabetic sorting property of WAL
129 * filenames.
131 * Once we have successfully located the redo pointer of the checkpoint
132 * from which we start recovery we never request a file prior to the redo
133 * pointer of the last restartpoint. When redo begins we know that we have
134 * successfully located it, so there is no need for additional status
135 * flags to signify the point when we can begin deleting WAL files from
136 * the archive.
138 if (cleanupEnabled)
140 GetOldestRestartPoint(&restartRedoPtr, &restartTli);
141 XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
142 XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
143 wal_segment_size);
144 /* we shouldn't need anything earlier than last restart point */
145 Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
147 else
148 XLogFileName(lastRestartPointFname, 0, 0L, wal_segment_size);
151 * Check signals before restore command and reset afterwards.
153 PreRestoreCommand();
156 * Copy xlog from archival storage to XLOGDIR
158 ret = shell_restore(xlogfname, xlogpath, lastRestartPointFname);
160 PostRestoreCommand();
162 if (ret)
165 * command apparently succeeded, but let's make sure the file is
166 * really there now and has the correct size.
168 if (stat(xlogpath, &stat_buf) == 0)
170 if (expectedSize > 0 && stat_buf.st_size != expectedSize)
172 int elevel;
175 * If we find a partial file in standby mode, we assume it's
176 * because it's just being copied to the archive, and keep
177 * trying.
179 * Otherwise treat a wrong-sized file as FATAL to ensure the
180 * DBA would notice it, but is that too strong? We could try
181 * to plow ahead with a local copy of the file ... but the
182 * problem is that there probably isn't one, and we'd
183 * incorrectly conclude we've reached the end of WAL and we're
184 * done recovering ...
186 if (StandbyMode && stat_buf.st_size < expectedSize)
187 elevel = DEBUG1;
188 else
189 elevel = FATAL;
190 ereport(elevel,
191 (errmsg("archive file \"%s\" has wrong size: %lld instead of %lld",
192 xlogfname,
193 (long long int) stat_buf.st_size,
194 (long long int) expectedSize)));
195 return false;
197 else
199 ereport(LOG,
200 (errmsg("restored log file \"%s\" from archive",
201 xlogfname)));
202 strcpy(path, xlogpath);
203 return true;
206 else
208 /* stat failed */
209 int elevel = (errno == ENOENT) ? LOG : FATAL;
211 ereport(elevel,
212 (errcode_for_file_access(),
213 errmsg("could not stat file \"%s\": %m", xlogpath),
214 errdetail("restore_command returned a zero exit status, but stat() failed.")));
218 not_available:
221 * if an archived file is not available, there might still be a version of
222 * this file in XLOGDIR, so return that as the filename to open.
224 * In many recovery scenarios we expect this to fail also, but if so that
225 * just means we've reached the end of WAL.
227 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
228 return false;
232 * A file was restored from the archive under a temporary filename (path),
233 * and now we want to keep it. Rename it under the permanent filename in
234 * pg_wal (xlogfname), replacing any existing file with the same name.
236 void
237 KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
239 char xlogfpath[MAXPGPATH];
240 bool reload = false;
241 struct stat statbuf;
243 snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
245 if (stat(xlogfpath, &statbuf) == 0)
247 char oldpath[MAXPGPATH];
249 #ifdef WIN32
250 static unsigned int deletedcounter = 1;
253 * On Windows, if another process (e.g a walsender process) holds the
254 * file open in FILE_SHARE_DELETE mode, unlink will succeed, but the
255 * file will still show up in directory listing until the last handle
256 * is closed, and we cannot rename the new file in its place until
257 * that. To avoid that problem, rename the old file to a temporary
258 * name first. Use a counter to create a unique filename, because the
259 * same file might be restored from the archive multiple times, and a
260 * walsender could still be holding onto an old deleted version of it.
262 snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
263 xlogfpath, deletedcounter++);
264 if (rename(xlogfpath, oldpath) != 0)
266 ereport(ERROR,
267 (errcode_for_file_access(),
268 errmsg("could not rename file \"%s\" to \"%s\": %m",
269 xlogfpath, oldpath)));
271 #else
272 /* same-size buffers, so this never truncates */
273 strlcpy(oldpath, xlogfpath, MAXPGPATH);
274 #endif
275 if (unlink(oldpath) != 0)
276 ereport(FATAL,
277 (errcode_for_file_access(),
278 errmsg("could not remove file \"%s\": %m",
279 xlogfpath)));
280 reload = true;
283 durable_rename(path, xlogfpath, ERROR);
286 * Create .done file forcibly to prevent the restored segment from being
287 * archived again later.
289 if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
290 XLogArchiveForceDone(xlogfname);
291 else
292 XLogArchiveNotify(xlogfname);
295 * If the existing file was replaced, since walsenders might have it open,
296 * request them to reload a currently-open segment. This is only required
297 * for WAL segments, walsenders don't hold other files open, but there's
298 * no harm in doing this too often, and we don't know what kind of a file
299 * we're dealing with here.
301 if (reload)
302 WalSndRqstFileReload();
305 * Signal walsender that new WAL has arrived. Again, this isn't necessary
306 * if we restored something other than a WAL segment, but it does no harm
307 * either.
309 WalSndWakeup();
313 * XLogArchiveNotify
315 * Create an archive notification file
317 * The name of the notification file is the message that will be picked up
318 * by the archiver, e.g. we write 0000000100000001000000C6.ready
319 * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
320 * then when complete, rename it to 0000000100000001000000C6.done
322 void
323 XLogArchiveNotify(const char *xlog)
325 char archiveStatusPath[MAXPGPATH];
326 FILE *fd;
328 /* insert an otherwise empty file called <XLOG>.ready */
329 StatusFilePath(archiveStatusPath, xlog, ".ready");
330 fd = AllocateFile(archiveStatusPath, "w");
331 if (fd == NULL)
333 ereport(LOG,
334 (errcode_for_file_access(),
335 errmsg("could not create archive status file \"%s\": %m",
336 archiveStatusPath)));
337 return;
339 if (FreeFile(fd))
341 ereport(LOG,
342 (errcode_for_file_access(),
343 errmsg("could not write archive status file \"%s\": %m",
344 archiveStatusPath)));
345 return;
349 * Timeline history files are given the highest archival priority to lower
350 * the chance that a promoted standby will choose a timeline that is
351 * already in use. However, the archiver ordinarily tries to gather
352 * multiple files to archive from each scan of the archive_status
353 * directory, which means that newly created timeline history files could
354 * be left unarchived for a while. To ensure that the archiver picks up
355 * timeline history files as soon as possible, we force the archiver to
356 * scan the archive_status directory the next time it looks for a file to
357 * archive.
359 if (IsTLHistoryFileName(xlog))
360 PgArchForceDirScan();
362 /* Notify archiver that it's got something to do */
363 if (IsUnderPostmaster)
364 PgArchWakeup();
368 * Convenience routine to notify using segment number representation of filename
370 void
371 XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)
373 char xlog[MAXFNAMELEN];
375 Assert(tli != 0);
377 XLogFileName(xlog, tli, segno, wal_segment_size);
378 XLogArchiveNotify(xlog);
382 * XLogArchiveForceDone
384 * Emit notification forcibly that an XLOG segment file has been successfully
385 * archived, by creating <XLOG>.done regardless of whether <XLOG>.ready
386 * exists or not.
388 void
389 XLogArchiveForceDone(const char *xlog)
391 char archiveReady[MAXPGPATH];
392 char archiveDone[MAXPGPATH];
393 struct stat stat_buf;
394 FILE *fd;
396 /* Exit if already known done */
397 StatusFilePath(archiveDone, xlog, ".done");
398 if (stat(archiveDone, &stat_buf) == 0)
399 return;
401 /* If .ready exists, rename it to .done */
402 StatusFilePath(archiveReady, xlog, ".ready");
403 if (stat(archiveReady, &stat_buf) == 0)
405 (void) durable_rename(archiveReady, archiveDone, WARNING);
406 return;
409 /* insert an otherwise empty file called <XLOG>.done */
410 fd = AllocateFile(archiveDone, "w");
411 if (fd == NULL)
413 ereport(LOG,
414 (errcode_for_file_access(),
415 errmsg("could not create archive status file \"%s\": %m",
416 archiveDone)));
417 return;
419 if (FreeFile(fd))
421 ereport(LOG,
422 (errcode_for_file_access(),
423 errmsg("could not write archive status file \"%s\": %m",
424 archiveDone)));
425 return;
430 * XLogArchiveCheckDone
432 * This is called when we are ready to delete or recycle an old XLOG segment
433 * file or backup history file. If it is okay to delete it then return true.
434 * If it is not time to delete it, make sure a .ready file exists, and return
435 * false.
437 * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
438 * then return false; else create <XLOG>.ready and return false.
440 * The reason we do things this way is so that if the original attempt to
441 * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
443 bool
444 XLogArchiveCheckDone(const char *xlog)
446 char archiveStatusPath[MAXPGPATH];
447 struct stat stat_buf;
449 /* The file is always deletable if archive_mode is "off". */
450 if (!XLogArchivingActive())
451 return true;
454 * During archive recovery, the file is deletable if archive_mode is not
455 * "always".
457 if (!XLogArchivingAlways() &&
458 GetRecoveryState() == RECOVERY_STATE_ARCHIVE)
459 return true;
462 * At this point of the logic, note that we are either a primary with
463 * archive_mode set to "on" or "always", or a standby with archive_mode
464 * set to "always".
467 /* First check for .done --- this means archiver is done with it */
468 StatusFilePath(archiveStatusPath, xlog, ".done");
469 if (stat(archiveStatusPath, &stat_buf) == 0)
470 return true;
472 /* check for .ready --- this means archiver is still busy with it */
473 StatusFilePath(archiveStatusPath, xlog, ".ready");
474 if (stat(archiveStatusPath, &stat_buf) == 0)
475 return false;
477 /* Race condition --- maybe archiver just finished, so recheck */
478 StatusFilePath(archiveStatusPath, xlog, ".done");
479 if (stat(archiveStatusPath, &stat_buf) == 0)
480 return true;
482 /* Retry creation of the .ready file */
483 XLogArchiveNotify(xlog);
484 return false;
488 * XLogArchiveIsBusy
490 * Check to see if an XLOG segment file is still unarchived.
491 * This is almost but not quite the inverse of XLogArchiveCheckDone: in
492 * the first place we aren't chartered to recreate the .ready file, and
493 * in the second place we should consider that if the file is already gone
494 * then it's not busy. (This check is needed to handle the race condition
495 * that a checkpoint already deleted the no-longer-needed file.)
497 bool
498 XLogArchiveIsBusy(const char *xlog)
500 char archiveStatusPath[MAXPGPATH];
501 struct stat stat_buf;
503 /* First check for .done --- this means archiver is done with it */
504 StatusFilePath(archiveStatusPath, xlog, ".done");
505 if (stat(archiveStatusPath, &stat_buf) == 0)
506 return false;
508 /* check for .ready --- this means archiver is still busy with it */
509 StatusFilePath(archiveStatusPath, xlog, ".ready");
510 if (stat(archiveStatusPath, &stat_buf) == 0)
511 return true;
513 /* Race condition --- maybe archiver just finished, so recheck */
514 StatusFilePath(archiveStatusPath, xlog, ".done");
515 if (stat(archiveStatusPath, &stat_buf) == 0)
516 return false;
519 * Check to see if the WAL file has been removed by checkpoint, which
520 * implies it has already been archived, and explains why we can't see a
521 * status file for it.
523 snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
524 if (stat(archiveStatusPath, &stat_buf) != 0 &&
525 errno == ENOENT)
526 return false;
528 return true;
532 * XLogArchiveIsReadyOrDone
534 * Check to see if an XLOG segment file has a .ready or .done file.
535 * This is similar to XLogArchiveIsBusy(), but returns true if the file
536 * is already archived or is about to be archived.
538 * This is currently only used at recovery. During normal operation this
539 * would be racy: the file might get removed or marked with .ready as we're
540 * checking it, or immediately after we return.
542 bool
543 XLogArchiveIsReadyOrDone(const char *xlog)
545 char archiveStatusPath[MAXPGPATH];
546 struct stat stat_buf;
548 /* First check for .done --- this means archiver is done with it */
549 StatusFilePath(archiveStatusPath, xlog, ".done");
550 if (stat(archiveStatusPath, &stat_buf) == 0)
551 return true;
553 /* check for .ready --- this means archiver is still busy with it */
554 StatusFilePath(archiveStatusPath, xlog, ".ready");
555 if (stat(archiveStatusPath, &stat_buf) == 0)
556 return true;
558 /* Race condition --- maybe archiver just finished, so recheck */
559 StatusFilePath(archiveStatusPath, xlog, ".done");
560 if (stat(archiveStatusPath, &stat_buf) == 0)
561 return true;
563 return false;
567 * XLogArchiveIsReady
569 * Check to see if an XLOG segment file has an archive notification (.ready)
570 * file.
572 bool
573 XLogArchiveIsReady(const char *xlog)
575 char archiveStatusPath[MAXPGPATH];
576 struct stat stat_buf;
578 StatusFilePath(archiveStatusPath, xlog, ".ready");
579 if (stat(archiveStatusPath, &stat_buf) == 0)
580 return true;
582 return false;
586 * XLogArchiveCleanup
588 * Cleanup archive notification file(s) for a particular xlog segment
590 void
591 XLogArchiveCleanup(const char *xlog)
593 char archiveStatusPath[MAXPGPATH];
595 /* Remove the .done file */
596 StatusFilePath(archiveStatusPath, xlog, ".done");
597 unlink(archiveStatusPath);
598 /* should we complain about failure? */
600 /* Remove the .ready file if present --- normally it shouldn't be */
601 StatusFilePath(archiveStatusPath, xlog, ".ready");
602 unlink(archiveStatusPath);
603 /* should we complain about failure? */