Update copyright for 2022
[pgsql.git] / src / bin / pg_rewind / libpq_source.c
blob997d4e2b482255489c767767e361c9bb9610cf06
1 /*-------------------------------------------------------------------------
3 * libpq_source.c
4 * Functions for fetching files from a remote server via libpq.
6 * Copyright (c) 2013-2022, PostgreSQL Global Development Group
8 *-------------------------------------------------------------------------
9 */
10 #include "postgres_fe.h"
12 #include "catalog/pg_type_d.h"
13 #include "common/connect.h"
14 #include "datapagemap.h"
15 #include "file_ops.h"
16 #include "filemap.h"
17 #include "lib/stringinfo.h"
18 #include "pg_rewind.h"
19 #include "port/pg_bswap.h"
20 #include "rewind_source.h"
23 * Files are fetched MAX_CHUNK_SIZE bytes at a time, and with a
24 * maximum of MAX_CHUNKS_PER_QUERY chunks in a single query.
26 #define MAX_CHUNK_SIZE (1024 * 1024)
27 #define MAX_CHUNKS_PER_QUERY 1000
29 /* represents a request to fetch a piece of a file from the source */
30 typedef struct
32 const char *path; /* path relative to data directory root */
33 off_t offset;
34 size_t length;
35 } fetch_range_request;
37 typedef struct
39 rewind_source common; /* common interface functions */
41 PGconn *conn;
44 * Queue of chunks that have been requested with the queue_fetch_range()
45 * function, but have not been fetched from the remote server yet.
47 int num_requests;
48 fetch_range_request request_queue[MAX_CHUNKS_PER_QUERY];
50 /* temporary space for process_queued_fetch_requests() */
51 StringInfoData paths;
52 StringInfoData offsets;
53 StringInfoData lengths;
54 } libpq_source;
56 static void init_libpq_conn(PGconn *conn);
57 static char *run_simple_query(PGconn *conn, const char *sql);
58 static void run_simple_command(PGconn *conn, const char *sql);
59 static void appendArrayEscapedString(StringInfo buf, const char *str);
61 static void process_queued_fetch_requests(libpq_source *src);
63 /* public interface functions */
64 static void libpq_traverse_files(rewind_source *source,
65 process_file_callback_t callback);
66 static void libpq_queue_fetch_range(rewind_source *source, const char *path,
67 off_t off, size_t len);
68 static void libpq_finish_fetch(rewind_source *source);
69 static char *libpq_fetch_file(rewind_source *source, const char *path,
70 size_t *filesize);
71 static XLogRecPtr libpq_get_current_wal_insert_lsn(rewind_source *source);
72 static void libpq_destroy(rewind_source *source);
75 * Create a new libpq source.
77 * The caller has already established the connection, but should not try
78 * to use it while the source is active.
80 rewind_source *
81 init_libpq_source(PGconn *conn)
83 libpq_source *src;
85 init_libpq_conn(conn);
87 src = pg_malloc0(sizeof(libpq_source));
89 src->common.traverse_files = libpq_traverse_files;
90 src->common.fetch_file = libpq_fetch_file;
91 src->common.queue_fetch_range = libpq_queue_fetch_range;
92 src->common.finish_fetch = libpq_finish_fetch;
93 src->common.get_current_wal_insert_lsn = libpq_get_current_wal_insert_lsn;
94 src->common.destroy = libpq_destroy;
96 src->conn = conn;
98 initStringInfo(&src->paths);
99 initStringInfo(&src->offsets);
100 initStringInfo(&src->lengths);
102 return &src->common;
106 * Initialize a libpq connection for use.
108 static void
109 init_libpq_conn(PGconn *conn)
111 PGresult *res;
112 char *str;
114 /* disable all types of timeouts */
115 run_simple_command(conn, "SET statement_timeout = 0");
116 run_simple_command(conn, "SET lock_timeout = 0");
117 run_simple_command(conn, "SET idle_in_transaction_session_timeout = 0");
120 * we don't intend to do any updates, put the connection in read-only mode
121 * to keep us honest
123 run_simple_command(conn, "SET default_transaction_read_only = on");
125 /* secure search_path */
126 res = PQexec(conn, ALWAYS_SECURE_SEARCH_PATH_SQL);
127 if (PQresultStatus(res) != PGRES_TUPLES_OK)
128 pg_fatal("could not clear search_path: %s",
129 PQresultErrorMessage(res));
130 PQclear(res);
133 * Also check that full_page_writes is enabled. We can get torn pages if
134 * a page is modified while we read it with pg_read_binary_file(), and we
135 * rely on full page images to fix them.
137 str = run_simple_query(conn, "SHOW full_page_writes");
138 if (strcmp(str, "on") != 0)
139 pg_fatal("full_page_writes must be enabled in the source server");
140 pg_free(str);
142 /* Prepare a statement we'll use to fetch files */
143 res = PQprepare(conn, "fetch_chunks_stmt",
144 "SELECT path, begin,\n"
145 " pg_read_binary_file(path, begin, len, true) AS chunk\n"
146 "FROM unnest ($1::text[], $2::int8[], $3::int4[]) as x(path, begin, len)",
147 3, NULL);
149 if (PQresultStatus(res) != PGRES_COMMAND_OK)
150 pg_fatal("could not prepare statement to fetch file contents: %s",
151 PQresultErrorMessage(res));
152 PQclear(res);
156 * Run a query that returns a single value.
158 * The result should be pg_free'd after use.
160 static char *
161 run_simple_query(PGconn *conn, const char *sql)
163 PGresult *res;
164 char *result;
166 res = PQexec(conn, sql);
168 if (PQresultStatus(res) != PGRES_TUPLES_OK)
169 pg_fatal("error running query (%s) on source server: %s",
170 sql, PQresultErrorMessage(res));
172 /* sanity check the result set */
173 if (PQnfields(res) != 1 || PQntuples(res) != 1 || PQgetisnull(res, 0, 0))
174 pg_fatal("unexpected result set from query");
176 result = pg_strdup(PQgetvalue(res, 0, 0));
178 PQclear(res);
180 return result;
184 * Run a command.
186 * In the event of a failure, exit immediately.
188 static void
189 run_simple_command(PGconn *conn, const char *sql)
191 PGresult *res;
193 res = PQexec(conn, sql);
195 if (PQresultStatus(res) != PGRES_COMMAND_OK)
196 pg_fatal("error running query (%s) in source server: %s",
197 sql, PQresultErrorMessage(res));
199 PQclear(res);
203 * Call the pg_current_wal_insert_lsn() function in the remote system.
205 static XLogRecPtr
206 libpq_get_current_wal_insert_lsn(rewind_source *source)
208 PGconn *conn = ((libpq_source *) source)->conn;
209 XLogRecPtr result;
210 uint32 hi;
211 uint32 lo;
212 char *val;
214 val = run_simple_query(conn, "SELECT pg_current_wal_insert_lsn()");
216 if (sscanf(val, "%X/%X", &hi, &lo) != 2)
217 pg_fatal("unrecognized result \"%s\" for current WAL insert location", val);
219 result = ((uint64) hi) << 32 | lo;
221 pg_free(val);
223 return result;
227 * Get a list of all files in the data directory.
229 static void
230 libpq_traverse_files(rewind_source *source, process_file_callback_t callback)
232 PGconn *conn = ((libpq_source *) source)->conn;
233 PGresult *res;
234 const char *sql;
235 int i;
238 * Create a recursive directory listing of the whole data directory.
240 * The WITH RECURSIVE part does most of the work. The second part gets the
241 * targets of the symlinks in pg_tblspc directory.
243 * XXX: There is no backend function to get a symbolic link's target in
244 * general, so if the admin has put any custom symbolic links in the data
245 * directory, they won't be copied correctly.
247 sql =
248 "WITH RECURSIVE files (path, filename, size, isdir) AS (\n"
249 " SELECT '' AS path, filename, size, isdir FROM\n"
250 " (SELECT pg_ls_dir('.', true, false) AS filename) AS fn,\n"
251 " pg_stat_file(fn.filename, true) AS this\n"
252 " UNION ALL\n"
253 " SELECT parent.path || parent.filename || '/' AS path,\n"
254 " fn, this.size, this.isdir\n"
255 " FROM files AS parent,\n"
256 " pg_ls_dir(parent.path || parent.filename, true, false) AS fn,\n"
257 " pg_stat_file(parent.path || parent.filename || '/' || fn, true) AS this\n"
258 " WHERE parent.isdir = 't'\n"
259 ")\n"
260 "SELECT path || filename, size, isdir,\n"
261 " pg_tablespace_location(pg_tablespace.oid) AS link_target\n"
262 "FROM files\n"
263 "LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n"
264 " AND oid::text = files.filename\n";
265 res = PQexec(conn, sql);
267 if (PQresultStatus(res) != PGRES_TUPLES_OK)
268 pg_fatal("could not fetch file list: %s",
269 PQresultErrorMessage(res));
271 /* sanity check the result set */
272 if (PQnfields(res) != 4)
273 pg_fatal("unexpected result set while fetching file list");
275 /* Read result to local variables */
276 for (i = 0; i < PQntuples(res); i++)
278 char *path;
279 int64 filesize;
280 bool isdir;
281 char *link_target;
282 file_type_t type;
284 if (PQgetisnull(res, i, 1))
287 * The file was removed from the server while the query was
288 * running. Ignore it.
290 continue;
293 path = PQgetvalue(res, i, 0);
294 filesize = atol(PQgetvalue(res, i, 1));
295 isdir = (strcmp(PQgetvalue(res, i, 2), "t") == 0);
296 link_target = PQgetvalue(res, i, 3);
298 if (link_target[0])
299 type = FILE_TYPE_SYMLINK;
300 else if (isdir)
301 type = FILE_TYPE_DIRECTORY;
302 else
303 type = FILE_TYPE_REGULAR;
305 process_source_file(path, type, filesize, link_target);
307 PQclear(res);
311 * Queue up a request to fetch a piece of a file from remote system.
313 static void
314 libpq_queue_fetch_range(rewind_source *source, const char *path, off_t off,
315 size_t len)
317 libpq_source *src = (libpq_source *) source;
320 * Does this request happen to be a continuation of the previous chunk? If
321 * so, merge it with the previous one.
323 * XXX: We use pointer equality to compare the path. That's good enough
324 * for our purposes; the caller always passes the same pointer for the
325 * same filename. If it didn't, we would fail to merge requests, but it
326 * wouldn't affect correctness.
328 if (src->num_requests > 0)
330 fetch_range_request *prev = &src->request_queue[src->num_requests - 1];
332 if (prev->offset + prev->length == off &&
333 prev->length < MAX_CHUNK_SIZE &&
334 prev->path == path)
337 * Extend the previous request to cover as much of this new
338 * request as possible, without exceeding MAX_CHUNK_SIZE.
340 size_t thislen;
342 thislen = Min(len, MAX_CHUNK_SIZE - prev->length);
343 prev->length += thislen;
345 off += thislen;
346 len -= thislen;
349 * Fall through to create new requests for any remaining 'len'
350 * that didn't fit in the previous chunk.
355 /* Divide the request into pieces of MAX_CHUNK_SIZE bytes each */
356 while (len > 0)
358 int32 thislen;
360 /* if the queue is full, perform all the work queued up so far */
361 if (src->num_requests == MAX_CHUNKS_PER_QUERY)
362 process_queued_fetch_requests(src);
364 thislen = Min(len, MAX_CHUNK_SIZE);
365 src->request_queue[src->num_requests].path = path;
366 src->request_queue[src->num_requests].offset = off;
367 src->request_queue[src->num_requests].length = thislen;
368 src->num_requests++;
370 off += thislen;
371 len -= thislen;
376 * Fetch all the queued chunks and write them to the target data directory.
378 static void
379 libpq_finish_fetch(rewind_source *source)
381 process_queued_fetch_requests((libpq_source *) source);
384 static void
385 process_queued_fetch_requests(libpq_source *src)
387 const char *params[3];
388 PGresult *res;
389 int chunkno;
391 if (src->num_requests == 0)
392 return;
394 pg_log_debug("getting %d file chunks", src->num_requests);
397 * The prepared statement, 'fetch_chunks_stmt', takes three arrays with
398 * the same length as parameters: paths, offsets and lengths. Construct
399 * the string representations of them.
401 resetStringInfo(&src->paths);
402 resetStringInfo(&src->offsets);
403 resetStringInfo(&src->lengths);
405 appendStringInfoChar(&src->paths, '{');
406 appendStringInfoChar(&src->offsets, '{');
407 appendStringInfoChar(&src->lengths, '{');
408 for (int i = 0; i < src->num_requests; i++)
410 fetch_range_request *rq = &src->request_queue[i];
412 if (i > 0)
414 appendStringInfoChar(&src->paths, ',');
415 appendStringInfoChar(&src->offsets, ',');
416 appendStringInfoChar(&src->lengths, ',');
419 appendArrayEscapedString(&src->paths, rq->path);
420 appendStringInfo(&src->offsets, INT64_FORMAT, (int64) rq->offset);
421 appendStringInfo(&src->lengths, INT64_FORMAT, (int64) rq->length);
423 appendStringInfoChar(&src->paths, '}');
424 appendStringInfoChar(&src->offsets, '}');
425 appendStringInfoChar(&src->lengths, '}');
428 * Execute the prepared statement.
430 params[0] = src->paths.data;
431 params[1] = src->offsets.data;
432 params[2] = src->lengths.data;
434 if (PQsendQueryPrepared(src->conn, "fetch_chunks_stmt", 3, params, NULL, NULL, 1) != 1)
435 pg_fatal("could not send query: %s", PQerrorMessage(src->conn));
437 if (PQsetSingleRowMode(src->conn) != 1)
438 pg_fatal("could not set libpq connection to single row mode");
440 /*----
441 * The result set is of format:
443 * path text -- path in the data directory, e.g "base/1/123"
444 * begin int8 -- offset within the file
445 * chunk bytea -- file content
446 *----
448 chunkno = 0;
449 while ((res = PQgetResult(src->conn)) != NULL)
451 fetch_range_request *rq = &src->request_queue[chunkno];
452 char *filename;
453 int filenamelen;
454 int64 chunkoff;
455 int chunksize;
456 char *chunk;
458 switch (PQresultStatus(res))
460 case PGRES_SINGLE_TUPLE:
461 break;
463 case PGRES_TUPLES_OK:
464 PQclear(res);
465 continue; /* final zero-row result */
467 default:
468 pg_fatal("unexpected result while fetching remote files: %s",
469 PQresultErrorMessage(res));
472 if (chunkno > src->num_requests)
473 pg_fatal("received more data chunks than requested");
475 /* sanity check the result set */
476 if (PQnfields(res) != 3 || PQntuples(res) != 1)
477 pg_fatal("unexpected result set size while fetching remote files");
479 if (PQftype(res, 0) != TEXTOID ||
480 PQftype(res, 1) != INT8OID ||
481 PQftype(res, 2) != BYTEAOID)
483 pg_fatal("unexpected data types in result set while fetching remote files: %u %u %u",
484 PQftype(res, 0), PQftype(res, 1), PQftype(res, 2));
487 if (PQfformat(res, 0) != 1 &&
488 PQfformat(res, 1) != 1 &&
489 PQfformat(res, 2) != 1)
491 pg_fatal("unexpected result format while fetching remote files");
494 if (PQgetisnull(res, 0, 0) ||
495 PQgetisnull(res, 0, 1))
497 pg_fatal("unexpected null values in result while fetching remote files");
500 if (PQgetlength(res, 0, 1) != sizeof(int64))
501 pg_fatal("unexpected result length while fetching remote files");
503 /* Read result set to local variables */
504 memcpy(&chunkoff, PQgetvalue(res, 0, 1), sizeof(int64));
505 chunkoff = pg_ntoh64(chunkoff);
506 chunksize = PQgetlength(res, 0, 2);
508 filenamelen = PQgetlength(res, 0, 0);
509 filename = pg_malloc(filenamelen + 1);
510 memcpy(filename, PQgetvalue(res, 0, 0), filenamelen);
511 filename[filenamelen] = '\0';
513 chunk = PQgetvalue(res, 0, 2);
516 * If a file has been deleted on the source, remove it on the target
517 * as well. Note that multiple unlink() calls may happen on the same
518 * file if multiple data chunks are associated with it, hence ignore
519 * unconditionally anything missing.
521 if (PQgetisnull(res, 0, 2))
523 pg_log_debug("received null value for chunk for file \"%s\", file has been deleted",
524 filename);
525 remove_target_file(filename, true);
527 else
529 pg_log_debug("received chunk for file \"%s\", offset %lld, size %d",
530 filename, (long long int) chunkoff, chunksize);
532 if (strcmp(filename, rq->path) != 0)
534 pg_fatal("received data for file \"%s\", when requested for \"%s\"",
535 filename, rq->path);
537 if (chunkoff != rq->offset)
538 pg_fatal("received data at offset %lld of file \"%s\", when requested for offset %lld",
539 (long long int) chunkoff, rq->path, (long long int) rq->offset);
542 * We should not receive more data than we requested, or
543 * pg_read_binary_file() messed up. We could receive less,
544 * though, if the file was truncated in the source after we
545 * checked its size. That's OK, there should be a WAL record of
546 * the truncation, which will get replayed when you start the
547 * target system for the first time after pg_rewind has completed.
549 if (chunksize > rq->length)
550 pg_fatal("received more than requested for file \"%s\"", rq->path);
552 open_target_file(filename, false);
554 write_target_range(chunk, chunkoff, chunksize);
557 pg_free(filename);
559 PQclear(res);
560 chunkno++;
562 if (chunkno != src->num_requests)
563 pg_fatal("unexpected number of data chunks received");
565 src->num_requests = 0;
569 * Escape a string to be used as element in a text array constant
571 static void
572 appendArrayEscapedString(StringInfo buf, const char *str)
574 appendStringInfoCharMacro(buf, '\"');
575 while (*str)
577 char ch = *str;
579 if (ch == '"' || ch == '\\')
580 appendStringInfoCharMacro(buf, '\\');
582 appendStringInfoCharMacro(buf, ch);
584 str++;
586 appendStringInfoCharMacro(buf, '\"');
590 * Fetch a single file as a malloc'd buffer.
592 static char *
593 libpq_fetch_file(rewind_source *source, const char *path, size_t *filesize)
595 PGconn *conn = ((libpq_source *) source)->conn;
596 PGresult *res;
597 char *result;
598 int len;
599 const char *paramValues[1];
601 paramValues[0] = path;
602 res = PQexecParams(conn, "SELECT pg_read_binary_file($1)",
603 1, NULL, paramValues, NULL, NULL, 1);
605 if (PQresultStatus(res) != PGRES_TUPLES_OK)
606 pg_fatal("could not fetch remote file \"%s\": %s",
607 path, PQresultErrorMessage(res));
609 /* sanity check the result set */
610 if (PQntuples(res) != 1 || PQgetisnull(res, 0, 0))
611 pg_fatal("unexpected result set while fetching remote file \"%s\"",
612 path);
614 /* Read result to local variables */
615 len = PQgetlength(res, 0, 0);
616 result = pg_malloc(len + 1);
617 memcpy(result, PQgetvalue(res, 0, 0), len);
618 result[len] = '\0';
620 PQclear(res);
622 pg_log_debug("fetched file \"%s\", length %d", path, len);
624 if (filesize)
625 *filesize = len;
626 return result;
630 * Close a libpq source.
632 static void
633 libpq_destroy(rewind_source *source)
635 libpq_source *src = (libpq_source *) source;
637 pfree(src->paths.data);
638 pfree(src->offsets.data);
639 pfree(src->lengths.data);
640 pfree(src);
642 /* NOTE: we don't close the connection here, as it was not opened by us. */