Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * Load data from a backup manifest into memory.
4 : *
5 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 : * Portions Copyright (c) 1994, Regents of the University of California
7 : *
8 : * src/bin/pg_combinebackup/load_manifest.c
9 : *
10 : *-------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres_fe.h"
14 :
15 : #include <sys/stat.h>
16 : #include <unistd.h>
17 :
18 : #include "common/hashfn_unstable.h"
19 : #include "common/logging.h"
20 : #include "common/parse_manifest.h"
21 : #include "load_manifest.h"
22 :
23 : /*
24 : * For efficiency, we'd like our hash table containing information about the
25 : * manifest to start out with approximately the correct number of entries.
26 : * There's no way to know the exact number of entries without reading the whole
27 : * file, but we can get an estimate by dividing the file size by the estimated
28 : * number of bytes per line.
29 : *
30 : * This could be off by about a factor of two in either direction, because the
31 : * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
32 : * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
33 : * might be no checksum at all.
34 : */
35 : #define ESTIMATED_BYTES_PER_MANIFEST_LINE 100
36 :
37 : /*
38 : * size of json chunk to be read in
39 : *
40 : */
41 : #define READ_CHUNK_SIZE (128 * 1024)
42 :
43 : /*
44 : * Define a hash table which we can use to store information about the files
45 : * mentioned in the backup manifest.
46 : */
47 : #define SH_PREFIX manifest_files
48 : #define SH_ELEMENT_TYPE manifest_file
49 : #define SH_KEY_TYPE const char *
50 : #define SH_KEY pathname
51 : #define SH_HASH_KEY(tb, key) hash_string(key)
52 : #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
53 : #define SH_SCOPE extern
54 : #define SH_RAW_ALLOCATOR pg_malloc0
55 : #define SH_DEFINE
56 : #include "lib/simplehash.h"
57 :
58 : static void combinebackup_version_cb(JsonManifestParseContext *context,
59 : int manifest_version);
60 : static void combinebackup_system_identifier_cb(JsonManifestParseContext *context,
61 : uint64 manifest_system_identifier);
62 : static void combinebackup_per_file_cb(JsonManifestParseContext *context,
63 : const char *pathname, uint64 size,
64 : pg_checksum_type checksum_type,
65 : int checksum_length,
66 : uint8 *checksum_payload);
67 : static void combinebackup_per_wal_range_cb(JsonManifestParseContext *context,
68 : TimeLineID tli,
69 : XLogRecPtr start_lsn,
70 : XLogRecPtr end_lsn);
71 : static void report_manifest_error(JsonManifestParseContext *context,
72 : const char *fmt,...)
73 : pg_attribute_printf(2, 3) pg_attribute_noreturn();
74 :
75 : /*
76 : * Load backup_manifest files from an array of backups and produces an array
77 : * of manifest_data objects.
78 : *
79 : * NB: Since load_backup_manifest() can return NULL, the resulting array could
80 : * contain NULL entries.
81 : */
82 : manifest_data **
83 26 : load_backup_manifests(int n_backups, char **backup_directories)
84 : {
85 : manifest_data **result;
86 : int i;
87 :
88 26 : result = pg_malloc(sizeof(manifest_data *) * n_backups);
89 78 : for (i = 0; i < n_backups; ++i)
90 52 : result[i] = load_backup_manifest(backup_directories[i]);
91 :
92 26 : return result;
93 : }
94 :
95 : /*
96 : * Parse the backup_manifest file in the named backup directory. Construct a
97 : * hash table with information about all the files it mentions, and a linked
98 : * list of all the WAL ranges it mentions.
99 : *
100 : * If the backup_manifest file simply doesn't exist, logs a warning and returns
101 : * NULL. Any other error, or any error parsing the contents of the file, is
102 : * fatal.
103 : */
104 : manifest_data *
105 52 : load_backup_manifest(char *backup_directory)
106 : {
107 : char pathname[MAXPGPATH];
108 : int fd;
109 : struct stat statbuf;
110 : off_t estimate;
111 : uint32 initial_size;
112 : manifest_files_hash *ht;
113 : char *buffer;
114 : int rc;
115 : JsonManifestParseContext context;
116 : manifest_data *result;
117 52 : int chunk_size = READ_CHUNK_SIZE;
118 :
119 : /* Open the manifest file. */
120 52 : snprintf(pathname, MAXPGPATH, "%s/backup_manifest", backup_directory);
121 52 : if ((fd = open(pathname, O_RDONLY | PG_BINARY, 0)) < 0)
122 : {
123 0 : if (errno == ENOENT)
124 : {
125 0 : pg_log_warning("file \"%s\" does not exist", pathname);
126 0 : return NULL;
127 : }
128 0 : pg_fatal("could not open file \"%s\": %m", pathname);
129 : }
130 :
131 : /* Figure out how big the manifest is. */
132 52 : if (fstat(fd, &statbuf) != 0)
133 0 : pg_fatal("could not stat file \"%s\": %m", pathname);
134 :
135 : /* Guess how large to make the hash table based on the manifest size. */
136 52 : estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE;
137 52 : initial_size = Min(PG_UINT32_MAX, Max(estimate, 256));
138 :
139 : /* Create the hash table. */
140 52 : ht = manifest_files_create(initial_size, NULL);
141 :
142 52 : result = pg_malloc0(sizeof(manifest_data));
143 52 : result->files = ht;
144 52 : context.private_data = result;
145 52 : context.version_cb = combinebackup_version_cb;
146 52 : context.system_identifier_cb = combinebackup_system_identifier_cb;
147 52 : context.per_file_cb = combinebackup_per_file_cb;
148 52 : context.per_wal_range_cb = combinebackup_per_wal_range_cb;
149 52 : context.error_cb = report_manifest_error;
150 :
151 : /*
152 : * Parse the file, in chunks if necessary.
153 : */
154 52 : if (statbuf.st_size <= chunk_size)
155 : {
156 0 : buffer = pg_malloc(statbuf.st_size);
157 0 : rc = read(fd, buffer, statbuf.st_size);
158 0 : if (rc != statbuf.st_size)
159 : {
160 0 : if (rc < 0)
161 0 : pg_fatal("could not read file \"%s\": %m", pathname);
162 : else
163 0 : pg_fatal("could not read file \"%s\": read %d of %lld",
164 : pathname, rc, (long long int) statbuf.st_size);
165 : }
166 :
167 : /* Close the manifest file. */
168 0 : close(fd);
169 :
170 : /* Parse the manifest. */
171 0 : json_parse_manifest(&context, buffer, statbuf.st_size);
172 : }
173 : else
174 : {
175 52 : int bytes_left = statbuf.st_size;
176 : JsonManifestParseIncrementalState *inc_state;
177 :
178 52 : inc_state = json_parse_manifest_incremental_init(&context);
179 :
180 52 : buffer = pg_malloc(chunk_size + 1);
181 :
182 156 : while (bytes_left > 0)
183 : {
184 104 : int bytes_to_read = chunk_size;
185 :
186 : /*
187 : * Make sure that the last chunk is sufficiently large. (i.e. at
188 : * least half the chunk size) so that it will contain fully the
189 : * piece at the end with the checksum.
190 : */
191 104 : if (bytes_left < chunk_size)
192 52 : bytes_to_read = bytes_left;
193 52 : else if (bytes_left < 2 * chunk_size)
194 52 : bytes_to_read = bytes_left / 2;
195 104 : rc = read(fd, buffer, bytes_to_read);
196 104 : if (rc != bytes_to_read)
197 : {
198 0 : if (rc < 0)
199 0 : pg_fatal("could not read file \"%s\": %m", pathname);
200 : else
201 0 : pg_fatal("could not read file \"%s\": read %lld of %lld",
202 : pathname,
203 : (long long int) (statbuf.st_size + rc - bytes_left),
204 : (long long int) statbuf.st_size);
205 : }
206 104 : bytes_left -= rc;
207 104 : json_parse_manifest_incremental_chunk(inc_state, buffer, rc, bytes_left == 0);
208 : }
209 :
210 : /* Release the incremental state memory */
211 52 : json_parse_manifest_incremental_shutdown(inc_state);
212 :
213 52 : close(fd);
214 : }
215 :
216 : /* All done. */
217 52 : pfree(buffer);
218 52 : return result;
219 : }
220 :
221 : /*
222 : * Report an error while parsing the manifest.
223 : *
224 : * We consider all such errors to be fatal errors. The manifest parser
225 : * expects this function not to return.
226 : */
227 : static void
228 0 : report_manifest_error(JsonManifestParseContext *context, const char *fmt,...)
229 : {
230 : va_list ap;
231 :
232 0 : va_start(ap, fmt);
233 0 : pg_log_generic_v(PG_LOG_ERROR, PG_LOG_PRIMARY, gettext(fmt), ap);
234 0 : va_end(ap);
235 :
236 0 : exit(1);
237 : }
238 :
239 : /*
240 : * This callback to validate the manifest version number for incremental backup.
241 : */
242 : static void
243 52 : combinebackup_version_cb(JsonManifestParseContext *context,
244 : int manifest_version)
245 : {
246 : /* Incremental backups supported on manifest version 2 or later */
247 52 : if (manifest_version == 1)
248 0 : pg_fatal("backup manifest version 1 does not support incremental backup");
249 52 : }
250 :
251 : /*
252 : * Record system identifier extracted from the backup manifest.
253 : */
254 : static void
255 52 : combinebackup_system_identifier_cb(JsonManifestParseContext *context,
256 : uint64 manifest_system_identifier)
257 : {
258 52 : manifest_data *manifest = context->private_data;
259 :
260 : /* Validation will be at the later stage */
261 52 : manifest->system_identifier = manifest_system_identifier;
262 52 : }
263 :
264 : /*
265 : * Record details extracted from the backup manifest for one file.
266 : */
267 : static void
268 53022 : combinebackup_per_file_cb(JsonManifestParseContext *context,
269 : const char *pathname, uint64 size,
270 : pg_checksum_type checksum_type,
271 : int checksum_length, uint8 *checksum_payload)
272 : {
273 53022 : manifest_data *manifest = context->private_data;
274 : manifest_file *m;
275 : bool found;
276 :
277 : /* Make a new entry in the hash table for this file. */
278 53022 : m = manifest_files_insert(manifest->files, pathname, &found);
279 53022 : if (found)
280 0 : pg_fatal("duplicate path name in backup manifest: \"%s\"", pathname);
281 :
282 : /* Initialize the entry. */
283 53022 : m->size = size;
284 53022 : m->checksum_type = checksum_type;
285 53022 : m->checksum_length = checksum_length;
286 53022 : m->checksum_payload = checksum_payload;
287 53022 : }
288 :
289 : /*
290 : * Record details extracted from the backup manifest for one WAL range.
291 : */
292 : static void
293 52 : combinebackup_per_wal_range_cb(JsonManifestParseContext *context,
294 : TimeLineID tli,
295 : XLogRecPtr start_lsn, XLogRecPtr end_lsn)
296 : {
297 52 : manifest_data *manifest = context->private_data;
298 : manifest_wal_range *range;
299 :
300 : /* Allocate and initialize a struct describing this WAL range. */
301 52 : range = palloc(sizeof(manifest_wal_range));
302 52 : range->tli = tli;
303 52 : range->start_lsn = start_lsn;
304 52 : range->end_lsn = end_lsn;
305 52 : range->prev = manifest->last_wal_range;
306 52 : range->next = NULL;
307 :
308 : /* Add it to the end of the list. */
309 52 : if (manifest->first_wal_range == NULL)
310 52 : manifest->first_wal_range = range;
311 : else
312 0 : manifest->last_wal_range->next = range;
313 52 : manifest->last_wal_range = range;
314 52 : }
|