Update copyright for 2022
[pgsql.git] / src / backend / replication / basebackup.c
blob53dedc73c2a3dbe608b56fac911a7eb3dca19ee2
1 /*-------------------------------------------------------------------------
3 * basebackup.c
4 * code for taking a base backup and streaming it to a standby
6 * Portions Copyright (c) 2010-2022, PostgreSQL Global Development Group
8 * IDENTIFICATION
9 * src/backend/replication/basebackup.c
11 *-------------------------------------------------------------------------
13 #include "postgres.h"
15 #include <sys/stat.h>
16 #include <unistd.h>
17 #include <time.h>
19 #include "access/xlog_internal.h" /* for pg_start/stop_backup */
20 #include "common/file_perm.h"
21 #include "commands/defrem.h"
22 #include "lib/stringinfo.h"
23 #include "miscadmin.h"
24 #include "nodes/pg_list.h"
25 #include "pgstat.h"
26 #include "pgtar.h"
27 #include "port.h"
28 #include "postmaster/syslogger.h"
29 #include "replication/basebackup.h"
30 #include "replication/basebackup_sink.h"
31 #include "replication/backup_manifest.h"
32 #include "replication/walsender.h"
33 #include "replication/walsender_private.h"
34 #include "storage/bufpage.h"
35 #include "storage/checksum.h"
36 #include "storage/dsm_impl.h"
37 #include "storage/fd.h"
38 #include "storage/ipc.h"
39 #include "storage/reinit.h"
40 #include "utils/builtins.h"
41 #include "utils/ps_status.h"
42 #include "utils/relcache.h"
43 #include "utils/resowner.h"
44 #include "utils/timestamp.h"
47 * How much data do we want to send in one CopyData message? Note that
48 * this may also result in reading the underlying files in chunks of this
49 * size.
51 * NB: The buffer size is required to be a multiple of the system block
52 * size, so use that value instead if it's bigger than our preference.
54 #define SINK_BUFFER_LENGTH Max(32768, BLCKSZ)
56 typedef struct
58 const char *label;
59 bool progress;
60 bool fastcheckpoint;
61 bool nowait;
62 bool includewal;
63 uint32 maxrate;
64 bool sendtblspcmapfile;
65 backup_manifest_option manifest;
66 pg_checksum_type manifest_checksum_type;
67 } basebackup_options;
69 static int64 sendTablespace(bbsink *sink, char *path, char *oid, bool sizeonly,
70 struct backup_manifest_info *manifest);
71 static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
72 List *tablespaces, bool sendtblspclinks,
73 backup_manifest_info *manifest, const char *spcoid);
74 static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
75 struct stat *statbuf, bool missing_ok, Oid dboid,
76 backup_manifest_info *manifest, const char *spcoid);
77 static void sendFileWithContent(bbsink *sink, const char *filename,
78 const char *content,
79 backup_manifest_info *manifest);
80 static int64 _tarWriteHeader(bbsink *sink, const char *filename,
81 const char *linktarget, struct stat *statbuf,
82 bool sizeonly);
83 static void _tarWritePadding(bbsink *sink, int len);
84 static void convert_link_to_directory(const char *pathbuf, struct stat *statbuf);
85 static void perform_base_backup(basebackup_options *opt, bbsink *sink);
86 static void parse_basebackup_options(List *options, basebackup_options *opt);
87 static int compareWalFileNames(const ListCell *a, const ListCell *b);
88 static bool is_checksummed_file(const char *fullpath, const char *filename);
89 static int basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
90 const char *filename, bool partial_read_ok);
92 /* Was the backup currently in-progress initiated in recovery mode? */
93 static bool backup_started_in_recovery = false;
95 /* Relative path of temporary statistics directory */
96 static char *statrelpath = NULL;
98 /* Total number of checksum failures during base backup. */
99 static long long int total_checksum_failures;
101 /* Do not verify checksums. */
102 static bool noverify_checksums = false;
105 * Definition of one element part of an exclusion list, used for paths part
106 * of checksum validation or base backups. "name" is the name of the file
107 * or path to check for exclusion. If "match_prefix" is true, any items
108 * matching the name as prefix are excluded.
110 struct exclude_list_item
112 const char *name;
113 bool match_prefix;
117 * The contents of these directories are removed or recreated during server
118 * start so they are not included in backups. The directories themselves are
119 * kept and included as empty to preserve access permissions.
121 * Note: this list should be kept in sync with the filter lists in pg_rewind's
122 * filemap.c.
124 static const char *const excludeDirContents[] =
127 * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
128 * when stats_temp_directory is set because PGSS_TEXT_FILE is always
129 * created there.
131 PG_STAT_TMP_DIR,
134 * It is generally not useful to backup the contents of this directory
135 * even if the intention is to restore to another primary. See backup.sgml
136 * for a more detailed description.
138 "pg_replslot",
140 /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
141 PG_DYNSHMEM_DIR,
143 /* Contents removed on startup, see AsyncShmemInit(). */
144 "pg_notify",
147 * Old contents are loaded for possible debugging but are not required for
148 * normal operation, see SerialInit().
150 "pg_serial",
152 /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
153 "pg_snapshots",
155 /* Contents zeroed on startup, see StartupSUBTRANS(). */
156 "pg_subtrans",
158 /* end of list */
159 NULL
163 * List of files excluded from backups.
165 static const struct exclude_list_item excludeFiles[] =
167 /* Skip auto conf temporary file. */
168 {PG_AUTOCONF_FILENAME ".tmp", false},
170 /* Skip current log file temporary file */
171 {LOG_METAINFO_DATAFILE_TMP, false},
174 * Skip relation cache because it is rebuilt on startup. This includes
175 * temporary files.
177 {RELCACHE_INIT_FILENAME, true},
180 * If there's a backup_label or tablespace_map file, it belongs to a
181 * backup started by the user with pg_start_backup(). It is *not* correct
182 * for this backup. Our backup_label/tablespace_map is injected into the
183 * tar separately.
185 {BACKUP_LABEL_FILE, false},
186 {TABLESPACE_MAP, false},
189 * If there's a backup_manifest, it belongs to a backup that was used to
190 * start this server. It is *not* correct for this backup. Our
191 * backup_manifest is injected into the backup separately if users want
192 * it.
194 {"backup_manifest", false},
196 {"postmaster.pid", false},
197 {"postmaster.opts", false},
199 /* end of list */
200 {NULL, false}
204 * List of files excluded from checksum validation.
206 * Note: this list should be kept in sync with what pg_checksums.c
207 * includes.
209 static const struct exclude_list_item noChecksumFiles[] = {
210 {"pg_control", false},
211 {"pg_filenode.map", false},
212 {"pg_internal.init", true},
213 {"PG_VERSION", false},
214 #ifdef EXEC_BACKEND
215 {"config_exec_params", true},
216 #endif
217 {NULL, false}
221 * Actually do a base backup for the specified tablespaces.
223 * This is split out mainly to avoid complaints about "variable might be
224 * clobbered by longjmp" from stupider versions of gcc.
226 static void
227 perform_base_backup(basebackup_options *opt, bbsink *sink)
229 bbsink_state state;
230 XLogRecPtr endptr;
231 TimeLineID endtli;
232 StringInfo labelfile;
233 StringInfo tblspc_map_file;
234 backup_manifest_info manifest;
235 int datadirpathlen;
237 /* Initial backup state, insofar as we know it now. */
238 state.tablespaces = NIL;
239 state.tablespace_num = 0;
240 state.bytes_done = 0;
241 state.bytes_total = 0;
242 state.bytes_total_is_valid = false;
244 /* we're going to use a BufFile, so we need a ResourceOwner */
245 Assert(CurrentResourceOwner == NULL);
246 CurrentResourceOwner = ResourceOwnerCreate(NULL, "base backup");
248 datadirpathlen = strlen(DataDir);
250 backup_started_in_recovery = RecoveryInProgress();
252 labelfile = makeStringInfo();
253 tblspc_map_file = makeStringInfo();
254 InitializeBackupManifest(&manifest, opt->manifest,
255 opt->manifest_checksum_type);
257 total_checksum_failures = 0;
259 basebackup_progress_wait_checkpoint();
260 state.startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint,
261 &state.starttli,
262 labelfile, &state.tablespaces,
263 tblspc_map_file);
266 * Once do_pg_start_backup has been called, ensure that any failure causes
267 * us to abort the backup so we don't "leak" a backup counter. For this
268 * reason, *all* functionality between do_pg_start_backup() and the end of
269 * do_pg_stop_backup() should be inside the error cleanup block!
272 PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
274 ListCell *lc;
275 tablespaceinfo *ti;
278 * Calculate the relative path of temporary statistics directory in
279 * order to skip the files which are located in that directory later.
281 if (is_absolute_path(pgstat_stat_directory) &&
282 strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
283 statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
284 else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
285 statrelpath = psprintf("./%s", pgstat_stat_directory);
286 else
287 statrelpath = pgstat_stat_directory;
289 /* Add a node for the base directory at the end */
290 ti = palloc0(sizeof(tablespaceinfo));
291 ti->size = -1;
292 state.tablespaces = lappend(state.tablespaces, ti);
295 * Calculate the total backup size by summing up the size of each
296 * tablespace
298 if (opt->progress)
300 basebackup_progress_estimate_backup_size();
302 foreach(lc, state.tablespaces)
304 tablespaceinfo *tmp = (tablespaceinfo *) lfirst(lc);
306 if (tmp->path == NULL)
307 tmp->size = sendDir(sink, ".", 1, true, state.tablespaces,
308 true, NULL, NULL);
309 else
310 tmp->size = sendTablespace(sink, tmp->path, tmp->oid, true,
311 NULL);
312 state.bytes_total += tmp->size;
314 state.bytes_total_is_valid = true;
317 /* notify basebackup sink about start of backup */
318 bbsink_begin_backup(sink, &state, SINK_BUFFER_LENGTH);
320 /* Send off our tablespaces one by one */
321 foreach(lc, state.tablespaces)
323 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
325 if (ti->path == NULL)
327 struct stat statbuf;
328 bool sendtblspclinks = true;
330 bbsink_begin_archive(sink, "base.tar");
332 /* In the main tar, include the backup_label first... */
333 sendFileWithContent(sink, BACKUP_LABEL_FILE, labelfile->data,
334 &manifest);
336 /* Then the tablespace_map file, if required... */
337 if (opt->sendtblspcmapfile)
339 sendFileWithContent(sink, TABLESPACE_MAP, tblspc_map_file->data,
340 &manifest);
341 sendtblspclinks = false;
344 /* Then the bulk of the files... */
345 sendDir(sink, ".", 1, false, state.tablespaces,
346 sendtblspclinks, &manifest, NULL);
348 /* ... and pg_control after everything else. */
349 if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
350 ereport(ERROR,
351 (errcode_for_file_access(),
352 errmsg("could not stat file \"%s\": %m",
353 XLOG_CONTROL_FILE)));
354 sendFile(sink, XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf,
355 false, InvalidOid, &manifest, NULL);
357 else
359 char *archive_name = psprintf("%s.tar", ti->oid);
361 bbsink_begin_archive(sink, archive_name);
363 sendTablespace(sink, ti->path, ti->oid, false, &manifest);
367 * If we're including WAL, and this is the main data directory we
368 * don't treat this as the end of the tablespace. Instead, we will
369 * include the xlog files below and stop afterwards. This is safe
370 * since the main data directory is always sent *last*.
372 if (opt->includewal && ti->path == NULL)
374 Assert(lnext(state.tablespaces, lc) == NULL);
376 else
378 /* Properly terminate the tarfile. */
379 StaticAssertStmt(2 * TAR_BLOCK_SIZE <= BLCKSZ,
380 "BLCKSZ too small for 2 tar blocks");
381 memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
382 bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
384 /* OK, that's the end of the archive. */
385 bbsink_end_archive(sink);
389 basebackup_progress_wait_wal_archive(&state);
390 endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
392 PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
395 if (opt->includewal)
398 * We've left the last tar file "open", so we can now append the
399 * required WAL files to it.
401 char pathbuf[MAXPGPATH];
402 XLogSegNo segno;
403 XLogSegNo startsegno;
404 XLogSegNo endsegno;
405 struct stat statbuf;
406 List *historyFileList = NIL;
407 List *walFileList = NIL;
408 char firstoff[MAXFNAMELEN];
409 char lastoff[MAXFNAMELEN];
410 DIR *dir;
411 struct dirent *de;
412 ListCell *lc;
413 TimeLineID tli;
415 basebackup_progress_transfer_wal();
418 * I'd rather not worry about timelines here, so scan pg_wal and
419 * include all WAL files in the range between 'startptr' and 'endptr',
420 * regardless of the timeline the file is stamped with. If there are
421 * some spurious WAL files belonging to timelines that don't belong in
422 * this server's history, they will be included too. Normally there
423 * shouldn't be such files, but if there are, there's little harm in
424 * including them.
426 XLByteToSeg(state.startptr, startsegno, wal_segment_size);
427 XLogFileName(firstoff, state.starttli, startsegno, wal_segment_size);
428 XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
429 XLogFileName(lastoff, endtli, endsegno, wal_segment_size);
431 dir = AllocateDir("pg_wal");
432 while ((de = ReadDir(dir, "pg_wal")) != NULL)
434 /* Does it look like a WAL segment, and is it in the range? */
435 if (IsXLogFileName(de->d_name) &&
436 strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
437 strcmp(de->d_name + 8, lastoff + 8) <= 0)
439 walFileList = lappend(walFileList, pstrdup(de->d_name));
441 /* Does it look like a timeline history file? */
442 else if (IsTLHistoryFileName(de->d_name))
444 historyFileList = lappend(historyFileList, pstrdup(de->d_name));
447 FreeDir(dir);
450 * Before we go any further, check that none of the WAL segments we
451 * need were removed.
453 CheckXLogRemoved(startsegno, state.starttli);
456 * Sort the WAL filenames. We want to send the files in order from
457 * oldest to newest, to reduce the chance that a file is recycled
458 * before we get a chance to send it over.
460 list_sort(walFileList, compareWalFileNames);
463 * There must be at least one xlog file in the pg_wal directory, since
464 * we are doing backup-including-xlog.
466 if (walFileList == NIL)
467 ereport(ERROR,
468 (errmsg("could not find any WAL files")));
471 * Sanity check: the first and last segment should cover startptr and
472 * endptr, with no gaps in between.
474 XLogFromFileName((char *) linitial(walFileList),
475 &tli, &segno, wal_segment_size);
476 if (segno != startsegno)
478 char startfname[MAXFNAMELEN];
480 XLogFileName(startfname, state.starttli, startsegno,
481 wal_segment_size);
482 ereport(ERROR,
483 (errmsg("could not find WAL file \"%s\"", startfname)));
485 foreach(lc, walFileList)
487 char *walFileName = (char *) lfirst(lc);
488 XLogSegNo currsegno = segno;
489 XLogSegNo nextsegno = segno + 1;
491 XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
492 if (!(nextsegno == segno || currsegno == segno))
494 char nextfname[MAXFNAMELEN];
496 XLogFileName(nextfname, tli, nextsegno, wal_segment_size);
497 ereport(ERROR,
498 (errmsg("could not find WAL file \"%s\"", nextfname)));
501 if (segno != endsegno)
503 char endfname[MAXFNAMELEN];
505 XLogFileName(endfname, endtli, endsegno, wal_segment_size);
506 ereport(ERROR,
507 (errmsg("could not find WAL file \"%s\"", endfname)));
510 /* Ok, we have everything we need. Send the WAL files. */
511 foreach(lc, walFileList)
513 char *walFileName = (char *) lfirst(lc);
514 int fd;
515 size_t cnt;
516 pgoff_t len = 0;
518 snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
519 XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
521 fd = OpenTransientFile(pathbuf, O_RDONLY | PG_BINARY);
522 if (fd < 0)
524 int save_errno = errno;
527 * Most likely reason for this is that the file was already
528 * removed by a checkpoint, so check for that to get a better
529 * error message.
531 CheckXLogRemoved(segno, tli);
533 errno = save_errno;
534 ereport(ERROR,
535 (errcode_for_file_access(),
536 errmsg("could not open file \"%s\": %m", pathbuf)));
539 if (fstat(fd, &statbuf) != 0)
540 ereport(ERROR,
541 (errcode_for_file_access(),
542 errmsg("could not stat file \"%s\": %m",
543 pathbuf)));
544 if (statbuf.st_size != wal_segment_size)
546 CheckXLogRemoved(segno, tli);
547 ereport(ERROR,
548 (errcode_for_file_access(),
549 errmsg("unexpected WAL file size \"%s\"", walFileName)));
552 /* send the WAL file itself */
553 _tarWriteHeader(sink, pathbuf, NULL, &statbuf, false);
555 while ((cnt = basebackup_read_file(fd, sink->bbs_buffer,
556 Min(sink->bbs_buffer_length,
557 wal_segment_size - len),
558 len, pathbuf, true)) > 0)
560 CheckXLogRemoved(segno, tli);
561 bbsink_archive_contents(sink, cnt);
563 len += cnt;
565 if (len == wal_segment_size)
566 break;
569 if (len != wal_segment_size)
571 CheckXLogRemoved(segno, tli);
572 ereport(ERROR,
573 (errcode_for_file_access(),
574 errmsg("unexpected WAL file size \"%s\"", walFileName)));
578 * wal_segment_size is a multiple of TAR_BLOCK_SIZE, so no need
579 * for padding.
581 Assert(wal_segment_size % TAR_BLOCK_SIZE == 0);
583 CloseTransientFile(fd);
586 * Mark file as archived, otherwise files can get archived again
587 * after promotion of a new node. This is in line with
588 * walreceiver.c always doing an XLogArchiveForceDone() after a
589 * complete segment.
591 StatusFilePath(pathbuf, walFileName, ".done");
592 sendFileWithContent(sink, pathbuf, "", &manifest);
596 * Send timeline history files too. Only the latest timeline history
597 * file is required for recovery, and even that only if there happens
598 * to be a timeline switch in the first WAL segment that contains the
599 * checkpoint record, or if we're taking a base backup from a standby
600 * server and the target timeline changes while the backup is taken.
601 * But they are small and highly useful for debugging purposes, so
602 * better include them all, always.
604 foreach(lc, historyFileList)
606 char *fname = lfirst(lc);
608 snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
610 if (lstat(pathbuf, &statbuf) != 0)
611 ereport(ERROR,
612 (errcode_for_file_access(),
613 errmsg("could not stat file \"%s\": %m", pathbuf)));
615 sendFile(sink, pathbuf, pathbuf, &statbuf, false, InvalidOid,
616 &manifest, NULL);
618 /* unconditionally mark file as archived */
619 StatusFilePath(pathbuf, fname, ".done");
620 sendFileWithContent(sink, pathbuf, "", &manifest);
623 /* Properly terminate the tar file. */
624 StaticAssertStmt(2 * TAR_BLOCK_SIZE <= BLCKSZ,
625 "BLCKSZ too small for 2 tar blocks");
626 memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
627 bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
629 /* OK, that's the end of the archive. */
630 bbsink_end_archive(sink);
633 AddWALInfoToBackupManifest(&manifest, state.startptr, state.starttli,
634 endptr, endtli);
636 SendBackupManifest(&manifest, sink);
638 bbsink_end_backup(sink, endptr, endtli);
640 if (total_checksum_failures)
642 if (total_checksum_failures > 1)
643 ereport(WARNING,
644 (errmsg_plural("%lld total checksum verification failure",
645 "%lld total checksum verification failures",
646 total_checksum_failures,
647 total_checksum_failures)));
649 ereport(ERROR,
650 (errcode(ERRCODE_DATA_CORRUPTED),
651 errmsg("checksum verification failure during base backup")));
655 * Make sure to free the manifest before the resource owners as manifests
656 * use cryptohash contexts that may depend on resource owners (like
657 * OpenSSL).
659 FreeBackupManifest(&manifest);
661 /* clean up the resource owner we created */
662 WalSndResourceCleanup(true);
664 basebackup_progress_done();
668 * list_sort comparison function, to compare log/seg portion of WAL segment
669 * filenames, ignoring the timeline portion.
671 static int
672 compareWalFileNames(const ListCell *a, const ListCell *b)
674 char *fna = (char *) lfirst(a);
675 char *fnb = (char *) lfirst(b);
677 return strcmp(fna + 8, fnb + 8);
681 * Parse the base backup options passed down by the parser
683 static void
684 parse_basebackup_options(List *options, basebackup_options *opt)
686 ListCell *lopt;
687 bool o_label = false;
688 bool o_progress = false;
689 bool o_checkpoint = false;
690 bool o_nowait = false;
691 bool o_wal = false;
692 bool o_maxrate = false;
693 bool o_tablespace_map = false;
694 bool o_noverify_checksums = false;
695 bool o_manifest = false;
696 bool o_manifest_checksums = false;
698 MemSet(opt, 0, sizeof(*opt));
699 opt->manifest = MANIFEST_OPTION_NO;
700 opt->manifest_checksum_type = CHECKSUM_TYPE_CRC32C;
702 foreach(lopt, options)
704 DefElem *defel = (DefElem *) lfirst(lopt);
706 if (strcmp(defel->defname, "label") == 0)
708 if (o_label)
709 ereport(ERROR,
710 (errcode(ERRCODE_SYNTAX_ERROR),
711 errmsg("duplicate option \"%s\"", defel->defname)));
712 opt->label = defGetString(defel);
713 o_label = true;
715 else if (strcmp(defel->defname, "progress") == 0)
717 if (o_progress)
718 ereport(ERROR,
719 (errcode(ERRCODE_SYNTAX_ERROR),
720 errmsg("duplicate option \"%s\"", defel->defname)));
721 opt->progress = defGetBoolean(defel);
722 o_progress = true;
724 else if (strcmp(defel->defname, "checkpoint") == 0)
726 char *optval = defGetString(defel);
728 if (o_checkpoint)
729 ereport(ERROR,
730 (errcode(ERRCODE_SYNTAX_ERROR),
731 errmsg("duplicate option \"%s\"", defel->defname)));
732 if (pg_strcasecmp(optval, "fast") == 0)
733 opt->fastcheckpoint = true;
734 else if (pg_strcasecmp(optval, "spread") == 0)
735 opt->fastcheckpoint = false;
736 else
737 ereport(ERROR,
738 (errcode(ERRCODE_SYNTAX_ERROR),
739 errmsg("unrecognized checkpoint type: \"%s\"",
740 optval)));
741 o_checkpoint = true;
743 else if (strcmp(defel->defname, "wait") == 0)
745 if (o_nowait)
746 ereport(ERROR,
747 (errcode(ERRCODE_SYNTAX_ERROR),
748 errmsg("duplicate option \"%s\"", defel->defname)));
749 opt->nowait = !defGetBoolean(defel);
750 o_nowait = true;
752 else if (strcmp(defel->defname, "wal") == 0)
754 if (o_wal)
755 ereport(ERROR,
756 (errcode(ERRCODE_SYNTAX_ERROR),
757 errmsg("duplicate option \"%s\"", defel->defname)));
758 opt->includewal = defGetBoolean(defel);
759 o_wal = true;
761 else if (strcmp(defel->defname, "max_rate") == 0)
763 int64 maxrate;
765 if (o_maxrate)
766 ereport(ERROR,
767 (errcode(ERRCODE_SYNTAX_ERROR),
768 errmsg("duplicate option \"%s\"", defel->defname)));
770 maxrate = defGetInt64(defel);
771 if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
772 ereport(ERROR,
773 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
774 errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
775 (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
777 opt->maxrate = (uint32) maxrate;
778 o_maxrate = true;
780 else if (strcmp(defel->defname, "tablespace_map") == 0)
782 if (o_tablespace_map)
783 ereport(ERROR,
784 (errcode(ERRCODE_SYNTAX_ERROR),
785 errmsg("duplicate option \"%s\"", defel->defname)));
786 opt->sendtblspcmapfile = defGetBoolean(defel);
787 o_tablespace_map = true;
789 else if (strcmp(defel->defname, "verify_checksums") == 0)
791 if (o_noverify_checksums)
792 ereport(ERROR,
793 (errcode(ERRCODE_SYNTAX_ERROR),
794 errmsg("duplicate option \"%s\"", defel->defname)));
795 noverify_checksums = !defGetBoolean(defel);
796 o_noverify_checksums = true;
798 else if (strcmp(defel->defname, "manifest") == 0)
800 char *optval = defGetString(defel);
801 bool manifest_bool;
803 if (o_manifest)
804 ereport(ERROR,
805 (errcode(ERRCODE_SYNTAX_ERROR),
806 errmsg("duplicate option \"%s\"", defel->defname)));
807 if (parse_bool(optval, &manifest_bool))
809 if (manifest_bool)
810 opt->manifest = MANIFEST_OPTION_YES;
811 else
812 opt->manifest = MANIFEST_OPTION_NO;
814 else if (pg_strcasecmp(optval, "force-encode") == 0)
815 opt->manifest = MANIFEST_OPTION_FORCE_ENCODE;
816 else
817 ereport(ERROR,
818 (errcode(ERRCODE_SYNTAX_ERROR),
819 errmsg("unrecognized manifest option: \"%s\"",
820 optval)));
821 o_manifest = true;
823 else if (strcmp(defel->defname, "manifest_checksums") == 0)
825 char *optval = defGetString(defel);
827 if (o_manifest_checksums)
828 ereport(ERROR,
829 (errcode(ERRCODE_SYNTAX_ERROR),
830 errmsg("duplicate option \"%s\"", defel->defname)));
831 if (!pg_checksum_parse_type(optval,
832 &opt->manifest_checksum_type))
833 ereport(ERROR,
834 (errcode(ERRCODE_SYNTAX_ERROR),
835 errmsg("unrecognized checksum algorithm: \"%s\"",
836 optval)));
837 o_manifest_checksums = true;
839 else
840 ereport(ERROR,
841 errcode(ERRCODE_SYNTAX_ERROR),
842 errmsg("option \"%s\" not recognized",
843 defel->defname));
845 if (opt->label == NULL)
846 opt->label = "base backup";
847 if (opt->manifest == MANIFEST_OPTION_NO)
849 if (o_manifest_checksums)
850 ereport(ERROR,
851 (errcode(ERRCODE_SYNTAX_ERROR),
852 errmsg("manifest checksums require a backup manifest")));
853 opt->manifest_checksum_type = CHECKSUM_TYPE_NONE;
859 * SendBaseBackup() - send a complete base backup.
861 * The function will put the system into backup mode like pg_start_backup()
862 * does, so that the backup is consistent even though we read directly from
863 * the filesystem, bypassing the buffer cache.
865 void
866 SendBaseBackup(BaseBackupCmd *cmd)
868 basebackup_options opt;
869 bbsink *sink;
871 parse_basebackup_options(cmd->options, &opt);
873 WalSndSetState(WALSNDSTATE_BACKUP);
875 if (update_process_title)
877 char activitymsg[50];
879 snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
880 opt.label);
881 set_ps_display(activitymsg);
884 /* Create a basic basebackup sink. */
885 sink = bbsink_copytblspc_new();
887 /* Set up network throttling, if client requested it */
888 if (opt.maxrate > 0)
889 sink = bbsink_throttle_new(sink, opt.maxrate);
891 /* Set up progress reporting. */
892 sink = bbsink_progress_new(sink, opt.progress);
895 * Perform the base backup, but make sure we clean up the bbsink even if
896 * an error occurs.
898 PG_TRY();
900 perform_base_backup(&opt, sink);
902 PG_FINALLY();
904 bbsink_cleanup(sink);
906 PG_END_TRY();
910 * Inject a file with given name and content in the output tar stream.
912 static void
913 sendFileWithContent(bbsink *sink, const char *filename, const char *content,
914 backup_manifest_info *manifest)
916 struct stat statbuf;
917 int bytes_done = 0,
918 len;
919 pg_checksum_context checksum_ctx;
921 if (pg_checksum_init(&checksum_ctx, manifest->checksum_type) < 0)
922 elog(ERROR, "could not initialize checksum of file \"%s\"",
923 filename);
925 len = strlen(content);
928 * Construct a stat struct for the backup_label file we're injecting in
929 * the tar.
931 /* Windows doesn't have the concept of uid and gid */
932 #ifdef WIN32
933 statbuf.st_uid = 0;
934 statbuf.st_gid = 0;
935 #else
936 statbuf.st_uid = geteuid();
937 statbuf.st_gid = getegid();
938 #endif
939 statbuf.st_mtime = time(NULL);
940 statbuf.st_mode = pg_file_create_mode;
941 statbuf.st_size = len;
943 _tarWriteHeader(sink, filename, NULL, &statbuf, false);
945 if (pg_checksum_update(&checksum_ctx, (uint8 *) content, len) < 0)
946 elog(ERROR, "could not update checksum of file \"%s\"",
947 filename);
949 while (bytes_done < len)
951 size_t remaining = len - bytes_done;
952 size_t nbytes = Min(sink->bbs_buffer_length, remaining);
954 memcpy(sink->bbs_buffer, content, nbytes);
955 bbsink_archive_contents(sink, nbytes);
956 bytes_done += nbytes;
959 _tarWritePadding(sink, len);
961 AddFileToBackupManifest(manifest, NULL, filename, len,
962 (pg_time_t) statbuf.st_mtime, &checksum_ctx);
966 * Include the tablespace directory pointed to by 'path' in the output tar
967 * stream. If 'sizeonly' is true, we just calculate a total length and return
968 * it, without actually sending anything.
970 * Only used to send auxiliary tablespaces, not PGDATA.
972 static int64
973 sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly,
974 backup_manifest_info *manifest)
976 int64 size;
977 char pathbuf[MAXPGPATH];
978 struct stat statbuf;
981 * 'path' points to the tablespace location, but we only want to include
982 * the version directory in it that belongs to us.
984 snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
985 TABLESPACE_VERSION_DIRECTORY);
988 * Store a directory entry in the tar file so we get the permissions
989 * right.
991 if (lstat(pathbuf, &statbuf) != 0)
993 if (errno != ENOENT)
994 ereport(ERROR,
995 (errcode_for_file_access(),
996 errmsg("could not stat file or directory \"%s\": %m",
997 pathbuf)));
999 /* If the tablespace went away while scanning, it's no error. */
1000 return 0;
1003 size = _tarWriteHeader(sink, TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
1004 sizeonly);
1006 /* Send all the files in the tablespace version directory */
1007 size += sendDir(sink, pathbuf, strlen(path), sizeonly, NIL, true, manifest,
1008 spcoid);
1010 return size;
1014 * Include all files from the given directory in the output tar stream. If
1015 * 'sizeonly' is true, we just calculate a total length and return it, without
1016 * actually sending anything.
1018 * Omit any directory in the tablespaces list, to avoid backing up
1019 * tablespaces twice when they were created inside PGDATA.
1021 * If sendtblspclinks is true, we need to include symlink
1022 * information in the tar file. If not, we can skip that
1023 * as it will be sent separately in the tablespace_map file.
1025 static int64
1026 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
1027 List *tablespaces, bool sendtblspclinks, backup_manifest_info *manifest,
1028 const char *spcoid)
1030 DIR *dir;
1031 struct dirent *de;
1032 char pathbuf[MAXPGPATH * 2];
1033 struct stat statbuf;
1034 int64 size = 0;
1035 const char *lastDir; /* Split last dir from parent path. */
1036 bool isDbDir = false; /* Does this directory contain relations? */
1039 * Determine if the current path is a database directory that can contain
1040 * relations.
1042 * Start by finding the location of the delimiter between the parent path
1043 * and the current path.
1045 lastDir = last_dir_separator(path);
1047 /* Does this path look like a database path (i.e. all digits)? */
1048 if (lastDir != NULL &&
1049 strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1))
1051 /* Part of path that contains the parent directory. */
1052 int parentPathLen = lastDir - path;
1055 * Mark path as a database directory if the parent path is either
1056 * $PGDATA/base or a tablespace version path.
1058 if (strncmp(path, "./base", parentPathLen) == 0 ||
1059 (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) &&
1060 strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1),
1061 TABLESPACE_VERSION_DIRECTORY,
1062 sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0))
1063 isDbDir = true;
1066 dir = AllocateDir(path);
1067 while ((de = ReadDir(dir, path)) != NULL)
1069 int excludeIdx;
1070 bool excludeFound;
1071 ForkNumber relForkNum; /* Type of fork if file is a relation */
1072 int relOidChars; /* Chars in filename that are the rel oid */
1074 /* Skip special stuff */
1075 if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
1076 continue;
1078 /* Skip temporary files */
1079 if (strncmp(de->d_name,
1080 PG_TEMP_FILE_PREFIX,
1081 strlen(PG_TEMP_FILE_PREFIX)) == 0)
1082 continue;
1085 * Check if the postmaster has signaled us to exit, and abort with an
1086 * error in that case. The error handler further up will call
1087 * do_pg_abort_backup() for us. Also check that if the backup was
1088 * started while still in recovery, the server wasn't promoted.
1089 * do_pg_stop_backup() will check that too, but it's better to stop
1090 * the backup early than continue to the end and fail there.
1092 CHECK_FOR_INTERRUPTS();
1093 if (RecoveryInProgress() != backup_started_in_recovery)
1094 ereport(ERROR,
1095 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1096 errmsg("the standby was promoted during online backup"),
1097 errhint("This means that the backup being taken is corrupt "
1098 "and should not be used. "
1099 "Try taking another online backup.")));
1101 /* Scan for files that should be excluded */
1102 excludeFound = false;
1103 for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
1105 int cmplen = strlen(excludeFiles[excludeIdx].name);
1107 if (!excludeFiles[excludeIdx].match_prefix)
1108 cmplen++;
1109 if (strncmp(de->d_name, excludeFiles[excludeIdx].name, cmplen) == 0)
1111 elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
1112 excludeFound = true;
1113 break;
1117 if (excludeFound)
1118 continue;
1120 /* Exclude all forks for unlogged tables except the init fork */
1121 if (isDbDir &&
1122 parse_filename_for_nontemp_relation(de->d_name, &relOidChars,
1123 &relForkNum))
1125 /* Never exclude init forks */
1126 if (relForkNum != INIT_FORKNUM)
1128 char initForkFile[MAXPGPATH];
1129 char relOid[OIDCHARS + 1];
1132 * If any other type of fork, check if there is an init fork
1133 * with the same OID. If so, the file can be excluded.
1135 memcpy(relOid, de->d_name, relOidChars);
1136 relOid[relOidChars] = '\0';
1137 snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
1138 path, relOid);
1140 if (lstat(initForkFile, &statbuf) == 0)
1142 elog(DEBUG2,
1143 "unlogged relation file \"%s\" excluded from backup",
1144 de->d_name);
1146 continue;
1151 /* Exclude temporary relations */
1152 if (isDbDir && looks_like_temp_rel_name(de->d_name))
1154 elog(DEBUG2,
1155 "temporary relation file \"%s\" excluded from backup",
1156 de->d_name);
1158 continue;
1161 snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
1163 /* Skip pg_control here to back up it last */
1164 if (strcmp(pathbuf, "./global/pg_control") == 0)
1165 continue;
1167 if (lstat(pathbuf, &statbuf) != 0)
1169 if (errno != ENOENT)
1170 ereport(ERROR,
1171 (errcode_for_file_access(),
1172 errmsg("could not stat file or directory \"%s\": %m",
1173 pathbuf)));
1175 /* If the file went away while scanning, it's not an error. */
1176 continue;
1179 /* Scan for directories whose contents should be excluded */
1180 excludeFound = false;
1181 for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
1183 if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
1185 elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
1186 convert_link_to_directory(pathbuf, &statbuf);
1187 size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL,
1188 &statbuf, sizeonly);
1189 excludeFound = true;
1190 break;
1194 if (excludeFound)
1195 continue;
1198 * Exclude contents of directory specified by statrelpath if not set
1199 * to the default (pg_stat_tmp) which is caught in the loop above.
1201 if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0)
1203 elog(DEBUG1, "contents of directory \"%s\" excluded from backup", statrelpath);
1204 convert_link_to_directory(pathbuf, &statbuf);
1205 size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL,
1206 &statbuf, sizeonly);
1207 continue;
1211 * We can skip pg_wal, the WAL segments need to be fetched from the
1212 * WAL archive anyway. But include it as an empty directory anyway, so
1213 * we get permissions right.
1215 if (strcmp(pathbuf, "./pg_wal") == 0)
1217 /* If pg_wal is a symlink, write it as a directory anyway */
1218 convert_link_to_directory(pathbuf, &statbuf);
1219 size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL,
1220 &statbuf, sizeonly);
1223 * Also send archive_status directory (by hackishly reusing
1224 * statbuf from above ...).
1226 size += _tarWriteHeader(sink, "./pg_wal/archive_status", NULL,
1227 &statbuf, sizeonly);
1229 continue; /* don't recurse into pg_wal */
1232 /* Allow symbolic links in pg_tblspc only */
1233 if (strcmp(path, "./pg_tblspc") == 0 &&
1234 #ifndef WIN32
1235 S_ISLNK(statbuf.st_mode)
1236 #else
1237 pgwin32_is_junction(pathbuf)
1238 #endif
1241 #if defined(HAVE_READLINK) || defined(WIN32)
1242 char linkpath[MAXPGPATH];
1243 int rllen;
1245 rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
1246 if (rllen < 0)
1247 ereport(ERROR,
1248 (errcode_for_file_access(),
1249 errmsg("could not read symbolic link \"%s\": %m",
1250 pathbuf)));
1251 if (rllen >= sizeof(linkpath))
1252 ereport(ERROR,
1253 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1254 errmsg("symbolic link \"%s\" target is too long",
1255 pathbuf)));
1256 linkpath[rllen] = '\0';
1258 size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, linkpath,
1259 &statbuf, sizeonly);
1260 #else
1263 * If the platform does not have symbolic links, it should not be
1264 * possible to have tablespaces - clearly somebody else created
1265 * them. Warn about it and ignore.
1267 ereport(WARNING,
1268 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1269 errmsg("tablespaces are not supported on this platform")));
1270 continue;
1271 #endif /* HAVE_READLINK */
1273 else if (S_ISDIR(statbuf.st_mode))
1275 bool skip_this_dir = false;
1276 ListCell *lc;
1279 * Store a directory entry in the tar file so we can get the
1280 * permissions right.
1282 size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL, &statbuf,
1283 sizeonly);
1286 * Call ourselves recursively for a directory, unless it happens
1287 * to be a separate tablespace located within PGDATA.
1289 foreach(lc, tablespaces)
1291 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
1294 * ti->rpath is the tablespace relative path within PGDATA, or
1295 * NULL if the tablespace has been properly located somewhere
1296 * else.
1298 * Skip past the leading "./" in pathbuf when comparing.
1300 if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
1302 skip_this_dir = true;
1303 break;
1308 * skip sending directories inside pg_tblspc, if not required.
1310 if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
1311 skip_this_dir = true;
1313 if (!skip_this_dir)
1314 size += sendDir(sink, pathbuf, basepathlen, sizeonly, tablespaces,
1315 sendtblspclinks, manifest, spcoid);
1317 else if (S_ISREG(statbuf.st_mode))
1319 bool sent = false;
1321 if (!sizeonly)
1322 sent = sendFile(sink, pathbuf, pathbuf + basepathlen + 1, &statbuf,
1323 true, isDbDir ? atooid(lastDir + 1) : InvalidOid,
1324 manifest, spcoid);
1326 if (sent || sizeonly)
1328 /* Add size. */
1329 size += statbuf.st_size;
1331 /* Pad to a multiple of the tar block size. */
1332 size += tarPaddingBytesRequired(statbuf.st_size);
1334 /* Size of the header for the file. */
1335 size += TAR_BLOCK_SIZE;
1338 else
1339 ereport(WARNING,
1340 (errmsg("skipping special file \"%s\"", pathbuf)));
1342 FreeDir(dir);
1343 return size;
1347 * Check if a file should have its checksum validated.
1348 * We validate checksums on files in regular tablespaces
1349 * (including global and default) only, and in those there
1350 * are some files that are explicitly excluded.
1352 static bool
1353 is_checksummed_file(const char *fullpath, const char *filename)
1355 /* Check that the file is in a tablespace */
1356 if (strncmp(fullpath, "./global/", 9) == 0 ||
1357 strncmp(fullpath, "./base/", 7) == 0 ||
1358 strncmp(fullpath, "/", 1) == 0)
1360 int excludeIdx;
1362 /* Compare file against noChecksumFiles skip list */
1363 for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++)
1365 int cmplen = strlen(noChecksumFiles[excludeIdx].name);
1367 if (!noChecksumFiles[excludeIdx].match_prefix)
1368 cmplen++;
1369 if (strncmp(filename, noChecksumFiles[excludeIdx].name,
1370 cmplen) == 0)
1371 return false;
1374 return true;
1376 else
1377 return false;
1380 /*****
1381 * Functions for handling tar file format
1383 * Copied from pg_dump, but modified to work with libpq for sending
1388 * Given the member, write the TAR header & send the file.
1390 * If 'missing_ok' is true, will not throw an error if the file is not found.
1392 * If dboid is anything other than InvalidOid then any checksum failures detected
1393 * will get reported to the stats collector.
1395 * Returns true if the file was successfully sent, false if 'missing_ok',
1396 * and the file did not exist.
1398 static bool
1399 sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
1400 struct stat *statbuf, bool missing_ok, Oid dboid,
1401 backup_manifest_info *manifest, const char *spcoid)
1403 int fd;
1404 BlockNumber blkno = 0;
1405 bool block_retry = false;
1406 uint16 checksum;
1407 int checksum_failures = 0;
1408 off_t cnt;
1409 int i;
1410 pgoff_t len = 0;
1411 char *page;
1412 PageHeader phdr;
1413 int segmentno = 0;
1414 char *segmentpath;
1415 bool verify_checksum = false;
1416 pg_checksum_context checksum_ctx;
1418 if (pg_checksum_init(&checksum_ctx, manifest->checksum_type) < 0)
1419 elog(ERROR, "could not initialize checksum of file \"%s\"",
1420 readfilename);
1422 fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY);
1423 if (fd < 0)
1425 if (errno == ENOENT && missing_ok)
1426 return false;
1427 ereport(ERROR,
1428 (errcode_for_file_access(),
1429 errmsg("could not open file \"%s\": %m", readfilename)));
1432 _tarWriteHeader(sink, tarfilename, NULL, statbuf, false);
1434 if (!noverify_checksums && DataChecksumsEnabled())
1436 char *filename;
1439 * Get the filename (excluding path). As last_dir_separator()
1440 * includes the last directory separator, we chop that off by
1441 * incrementing the pointer.
1443 filename = last_dir_separator(readfilename) + 1;
1445 if (is_checksummed_file(readfilename, filename))
1447 verify_checksum = true;
1450 * Cut off at the segment boundary (".") to get the segment number
1451 * in order to mix it into the checksum.
1453 segmentpath = strstr(filename, ".");
1454 if (segmentpath != NULL)
1456 segmentno = atoi(segmentpath + 1);
1457 if (segmentno == 0)
1458 ereport(ERROR,
1459 (errmsg("invalid segment number %d in file \"%s\"",
1460 segmentno, filename)));
1466 * Loop until we read the amount of data the caller told us to expect. The
1467 * file could be longer, if it was extended while we were sending it, but
1468 * for a base backup we can ignore such extended data. It will be restored
1469 * from WAL.
1471 while (len < statbuf->st_size)
1473 size_t remaining = statbuf->st_size - len;
1475 /* Try to read some more data. */
1476 cnt = basebackup_read_file(fd, sink->bbs_buffer,
1477 Min(sink->bbs_buffer_length, remaining),
1478 len, readfilename, true);
1481 * If we hit end-of-file, a concurrent truncation must have occurred.
1482 * That's not an error condition, because WAL replay will fix things
1483 * up.
1485 if (cnt == 0)
1486 break;
1489 * The checksums are verified at block level, so we iterate over the
1490 * buffer in chunks of BLCKSZ, after making sure that
1491 * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
1492 * BLCKSZ bytes.
1494 Assert((sink->bbs_buffer_length % BLCKSZ) == 0);
1496 if (verify_checksum && (cnt % BLCKSZ != 0))
1498 ereport(WARNING,
1499 (errmsg("could not verify checksum in file \"%s\", block "
1500 "%u: read buffer size %d and page size %d "
1501 "differ",
1502 readfilename, blkno, (int) cnt, BLCKSZ)));
1503 verify_checksum = false;
1506 if (verify_checksum)
1508 for (i = 0; i < cnt / BLCKSZ; i++)
1510 page = sink->bbs_buffer + BLCKSZ * i;
1513 * Only check pages which have not been modified since the
1514 * start of the base backup. Otherwise, they might have been
1515 * written only halfway and the checksum would not be valid.
1516 * However, replaying WAL would reinstate the correct page in
1517 * this case. We also skip completely new pages, since they
1518 * don't have a checksum yet.
1520 if (!PageIsNew(page) && PageGetLSN(page) < sink->bbs_state->startptr)
1522 checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
1523 phdr = (PageHeader) page;
1524 if (phdr->pd_checksum != checksum)
1527 * Retry the block on the first failure. It's
1528 * possible that we read the first 4K page of the
1529 * block just before postgres updated the entire block
1530 * so it ends up looking torn to us. We only need to
1531 * retry once because the LSN should be updated to
1532 * something we can ignore on the next pass. If the
1533 * error happens again then it is a true validation
1534 * failure.
1536 if (block_retry == false)
1538 int reread_cnt;
1540 /* Reread the failed block */
1541 reread_cnt =
1542 basebackup_read_file(fd,
1543 sink->bbs_buffer + BLCKSZ * i,
1544 BLCKSZ, len + BLCKSZ * i,
1545 readfilename,
1546 false);
1547 if (reread_cnt == 0)
1550 * If we hit end-of-file, a concurrent
1551 * truncation must have occurred, so break out
1552 * of this loop just as if the initial fread()
1553 * returned 0. We'll drop through to the same
1554 * code that handles that case. (We must fix
1555 * up cnt first, though.)
1557 cnt = BLCKSZ * i;
1558 break;
1561 /* Set flag so we know a retry was attempted */
1562 block_retry = true;
1564 /* Reset loop to validate the block again */
1565 i--;
1566 continue;
1569 checksum_failures++;
1571 if (checksum_failures <= 5)
1572 ereport(WARNING,
1573 (errmsg("checksum verification failed in "
1574 "file \"%s\", block %u: calculated "
1575 "%X but expected %X",
1576 readfilename, blkno, checksum,
1577 phdr->pd_checksum)));
1578 if (checksum_failures == 5)
1579 ereport(WARNING,
1580 (errmsg("further checksum verification "
1581 "failures in file \"%s\" will not "
1582 "be reported", readfilename)));
1585 block_retry = false;
1586 blkno++;
1590 bbsink_archive_contents(sink, cnt);
1592 /* Also feed it to the checksum machinery. */
1593 if (pg_checksum_update(&checksum_ctx,
1594 (uint8 *) sink->bbs_buffer, cnt) < 0)
1595 elog(ERROR, "could not update checksum of base backup");
1597 len += cnt;
1600 /* If the file was truncated while we were sending it, pad it with zeros */
1601 while (len < statbuf->st_size)
1603 size_t remaining = statbuf->st_size - len;
1604 size_t nbytes = Min(sink->bbs_buffer_length, remaining);
1606 MemSet(sink->bbs_buffer, 0, nbytes);
1607 if (pg_checksum_update(&checksum_ctx,
1608 (uint8 *) sink->bbs_buffer,
1609 nbytes) < 0)
1610 elog(ERROR, "could not update checksum of base backup");
1611 bbsink_archive_contents(sink, nbytes);
1612 len += nbytes;
1616 * Pad to a block boundary, per tar format requirements. (This small piece
1617 * of data is probably not worth throttling, and is not checksummed
1618 * because it's not actually part of the file.)
1620 _tarWritePadding(sink, len);
1622 CloseTransientFile(fd);
1624 if (checksum_failures > 1)
1626 ereport(WARNING,
1627 (errmsg_plural("file \"%s\" has a total of %d checksum verification failure",
1628 "file \"%s\" has a total of %d checksum verification failures",
1629 checksum_failures,
1630 readfilename, checksum_failures)));
1632 pgstat_report_checksum_failures_in_db(dboid, checksum_failures);
1635 total_checksum_failures += checksum_failures;
1637 AddFileToBackupManifest(manifest, spcoid, tarfilename, statbuf->st_size,
1638 (pg_time_t) statbuf->st_mtime, &checksum_ctx);
1640 return true;
1643 static int64
1644 _tarWriteHeader(bbsink *sink, const char *filename, const char *linktarget,
1645 struct stat *statbuf, bool sizeonly)
1647 enum tarError rc;
1649 if (!sizeonly)
1652 * As of this writing, the smallest supported block size is 1kB, which
1653 * is twice TAR_BLOCK_SIZE. Since the buffer size is required to be a
1654 * multiple of BLCKSZ, it should be safe to assume that the buffer is
1655 * large enough to fit an entire tar block. We double-check by means
1656 * of these assertions.
1658 StaticAssertStmt(TAR_BLOCK_SIZE <= BLCKSZ,
1659 "BLCKSZ too small for tar block");
1660 Assert(sink->bbs_buffer_length >= TAR_BLOCK_SIZE);
1662 rc = tarCreateHeader(sink->bbs_buffer, filename, linktarget,
1663 statbuf->st_size, statbuf->st_mode,
1664 statbuf->st_uid, statbuf->st_gid,
1665 statbuf->st_mtime);
1667 switch (rc)
1669 case TAR_OK:
1670 break;
1671 case TAR_NAME_TOO_LONG:
1672 ereport(ERROR,
1673 (errmsg("file name too long for tar format: \"%s\"",
1674 filename)));
1675 break;
1676 case TAR_SYMLINK_TOO_LONG:
1677 ereport(ERROR,
1678 (errmsg("symbolic link target too long for tar format: "
1679 "file name \"%s\", target \"%s\"",
1680 filename, linktarget)));
1681 break;
1682 default:
1683 elog(ERROR, "unrecognized tar error: %d", rc);
1686 bbsink_archive_contents(sink, TAR_BLOCK_SIZE);
1689 return TAR_BLOCK_SIZE;
1693 * Pad with zero bytes out to a multiple of TAR_BLOCK_SIZE.
1695 static void
1696 _tarWritePadding(bbsink *sink, int len)
1698 int pad = tarPaddingBytesRequired(len);
1701 * As in _tarWriteHeader, it should be safe to assume that the buffer is
1702 * large enough that we don't need to do this in multiple chunks.
1704 Assert(sink->bbs_buffer_length >= TAR_BLOCK_SIZE);
1705 Assert(pad <= TAR_BLOCK_SIZE);
1707 if (pad > 0)
1709 MemSet(sink->bbs_buffer, 0, pad);
1710 bbsink_archive_contents(sink, pad);
1715 * If the entry in statbuf is a link, then adjust statbuf to make it look like a
1716 * directory, so that it will be written that way.
1718 static void
1719 convert_link_to_directory(const char *pathbuf, struct stat *statbuf)
1721 /* If symlink, write it as a directory anyway */
1722 #ifndef WIN32
1723 if (S_ISLNK(statbuf->st_mode))
1724 #else
1725 if (pgwin32_is_junction(pathbuf))
1726 #endif
1727 statbuf->st_mode = S_IFDIR | pg_dir_create_mode;
1731 * Read some data from a file, setting a wait event and reporting any error
1732 * encountered.
1734 * If partial_read_ok is false, also report an error if the number of bytes
1735 * read is not equal to the number of bytes requested.
1737 * Returns the number of bytes read.
1739 static int
1740 basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
1741 const char *filename, bool partial_read_ok)
1743 int rc;
1745 pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ);
1746 rc = pg_pread(fd, buf, nbytes, offset);
1747 pgstat_report_wait_end();
1749 if (rc < 0)
1750 ereport(ERROR,
1751 (errcode_for_file_access(),
1752 errmsg("could not read file \"%s\": %m", filename)));
1753 if (!partial_read_ok && rc > 0 && rc != nbytes)
1754 ereport(ERROR,
1755 (errcode_for_file_access(),
1756 errmsg("could not read file \"%s\": read %d of %zu",
1757 filename, rc, nbytes)));
1759 return rc;