Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * basebackup_incremental.c
4 : * code for incremental backup support
5 : *
6 : * This code isn't actually in charge of taking an incremental backup;
7 : * the actual construction of the incremental backup happens in
8 : * basebackup.c. Here, we're concerned with providing the necessary
9 : * supports for that operation. In particular, we need to parse the
10 : * backup manifest supplied by the user taking the incremental backup
11 : * and extract the required information from it.
12 : *
13 : * Portions Copyright (c) 2010-2026, PostgreSQL Global Development Group
14 : *
15 : * IDENTIFICATION
16 : * src/backend/backup/basebackup_incremental.c
17 : *
18 : *-------------------------------------------------------------------------
19 : */
20 : #include "postgres.h"
21 :
22 : #include "access/timeline.h"
23 : #include "access/xlog.h"
24 : #include "backup/basebackup_incremental.h"
25 : #include "backup/walsummary.h"
26 : #include "common/blkreftable.h"
27 : #include "common/hashfn.h"
28 : #include "common/int.h"
29 : #include "common/parse_manifest.h"
30 : #include "postmaster/walsummarizer.h"
31 :
32 : #define BLOCKS_PER_READ 512
33 :
34 : /*
35 : * We expect to find the last lines of the manifest, including the checksum,
36 : * in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
37 : * parse step if we are about to overflow MAX_CHUNK bytes.
38 : */
39 : #define MIN_CHUNK 1024
40 : #define MAX_CHUNK (128 * 1024)
41 :
42 : /*
43 : * Details extracted from the WAL ranges present in the supplied backup manifest.
44 : */
45 : typedef struct
46 : {
47 : TimeLineID tli;
48 : XLogRecPtr start_lsn;
49 : XLogRecPtr end_lsn;
50 : } backup_wal_range;
51 :
52 : /*
53 : * Details extracted from the file list present in the supplied backup manifest.
54 : */
55 : typedef struct
56 : {
57 : uint32 status;
58 : const char *path;
59 : uint64 size;
60 : } backup_file_entry;
61 :
62 : static uint32 hash_string_pointer(const char *s);
63 : #define SH_PREFIX backup_file
64 : #define SH_ELEMENT_TYPE backup_file_entry
65 : #define SH_KEY_TYPE const char *
66 : #define SH_KEY path
67 : #define SH_HASH_KEY(tb, key) hash_string_pointer(key)
68 : #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
69 : #define SH_SCOPE static inline
70 : #define SH_DECLARE
71 : #define SH_DEFINE
72 : #include "lib/simplehash.h"
73 :
74 : struct IncrementalBackupInfo
75 : {
76 : /* Memory context for this object and its subsidiary objects. */
77 : MemoryContext mcxt;
78 :
79 : /* Temporary buffer for storing the manifest while parsing it. */
80 : StringInfoData buf;
81 :
82 : /* WAL ranges extracted from the backup manifest. */
83 : List *manifest_wal_ranges;
84 :
85 : /*
86 : * Files extracted from the backup manifest.
87 : *
88 : * We don't really need this information, because we use WAL summaries to
89 : * figure out what's changed. It would be unsafe to just rely on the list
90 : * of files that existed before, because it's possible for a file to be
91 : * removed and a new one created with the same name and different
92 : * contents. In such cases, the whole file must still be sent. We can tell
93 : * from the WAL summaries whether that happened, but not from the file
94 : * list.
95 : *
96 : * Nonetheless, this data is useful for sanity checking. If a file that we
97 : * think we shouldn't need to send is not present in the manifest for the
98 : * prior backup, something has gone terribly wrong. We retain the file
99 : * names and sizes, but not the checksums or last modified times, for
100 : * which we have no use.
101 : *
102 : * One significant downside of storing this data is that it consumes
103 : * memory. If that turns out to be a problem, we might have to decide not
104 : * to retain this information, or to make it optional.
105 : */
106 : backup_file_hash *manifest_files;
107 :
108 : /*
109 : * Block-reference table for the incremental backup.
110 : *
111 : * It's possible that storing the entire block-reference table in memory
112 : * will be a problem for some users. The in-memory format that we're using
113 : * here is pretty efficient, converging to little more than 1 bit per
114 : * block for relation forks with large numbers of modified blocks. It's
115 : * possible, however, that if you try to perform an incremental backup of
116 : * a database with a sufficiently large number of relations on a
117 : * sufficiently small machine, you could run out of memory here. If that
118 : * turns out to be a problem in practice, we'll need to be more clever.
119 : */
120 : BlockRefTable *brtab;
121 :
122 : /*
123 : * State object for incremental JSON parsing
124 : */
125 : JsonManifestParseIncrementalState *inc_state;
126 : };
127 :
128 : static void manifest_process_version(JsonManifestParseContext *context,
129 : int manifest_version);
130 : static void manifest_process_system_identifier(JsonManifestParseContext *context,
131 : uint64 manifest_system_identifier);
132 : static void manifest_process_file(JsonManifestParseContext *context,
133 : const char *pathname,
134 : uint64 size,
135 : pg_checksum_type checksum_type,
136 : int checksum_length,
137 : uint8 *checksum_payload);
138 : static void manifest_process_wal_range(JsonManifestParseContext *context,
139 : TimeLineID tli,
140 : XLogRecPtr start_lsn,
141 : XLogRecPtr end_lsn);
142 : pg_noreturn static void manifest_report_error(JsonManifestParseContext *context,
143 : const char *fmt,...)
144 : pg_attribute_printf(2, 3);
145 : static int compare_block_numbers(const void *a, const void *b);
146 :
147 : /*
148 : * Create a new object for storing information extracted from the manifest
149 : * supplied when creating an incremental backup.
150 : */
151 : IncrementalBackupInfo *
152 13 : CreateIncrementalBackupInfo(MemoryContext mcxt)
153 : {
154 : IncrementalBackupInfo *ib;
155 : MemoryContext oldcontext;
156 : JsonManifestParseContext *context;
157 :
158 13 : oldcontext = MemoryContextSwitchTo(mcxt);
159 :
160 13 : ib = palloc0_object(IncrementalBackupInfo);
161 13 : ib->mcxt = mcxt;
162 13 : initStringInfo(&ib->buf);
163 :
164 : /*
165 : * It's hard to guess how many files a "typical" installation will have in
166 : * the data directory, but a fresh initdb creates almost 1000 files as of
167 : * this writing, so it seems to make sense for our estimate to
168 : * substantially higher.
169 : */
170 13 : ib->manifest_files = backup_file_create(mcxt, 10000, NULL);
171 :
172 13 : context = palloc0_object(JsonManifestParseContext);
173 : /* Parse the manifest. */
174 13 : context->private_data = ib;
175 13 : context->version_cb = manifest_process_version;
176 13 : context->system_identifier_cb = manifest_process_system_identifier;
177 13 : context->per_file_cb = manifest_process_file;
178 13 : context->per_wal_range_cb = manifest_process_wal_range;
179 13 : context->error_cb = manifest_report_error;
180 :
181 13 : ib->inc_state = json_parse_manifest_incremental_init(context);
182 :
183 13 : MemoryContextSwitchTo(oldcontext);
184 :
185 13 : return ib;
186 : }
187 :
188 : /*
189 : * Before taking an incremental backup, the caller must supply the backup
190 : * manifest from a prior backup. Each chunk of manifest data received
191 : * from the client should be passed to this function.
192 : */
193 : void
194 39 : AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data,
195 : int len)
196 : {
197 : MemoryContext oldcontext;
198 :
199 : /* Switch to our memory context. */
200 39 : oldcontext = MemoryContextSwitchTo(ib->mcxt);
201 :
202 39 : if (ib->buf.len > MIN_CHUNK && ib->buf.len + len > MAX_CHUNK)
203 : {
204 : /*
205 : * time for an incremental parse. We'll do all but the last MIN_CHUNK
206 : * so that we have enough left for the final piece.
207 : */
208 13 : json_parse_manifest_incremental_chunk(ib->inc_state, ib->buf.data,
209 13 : ib->buf.len - MIN_CHUNK, false);
210 : /* now remove what we just parsed */
211 12 : memmove(ib->buf.data, ib->buf.data + (ib->buf.len - MIN_CHUNK),
212 : MIN_CHUNK + 1);
213 12 : ib->buf.len = MIN_CHUNK;
214 : }
215 :
216 38 : appendBinaryStringInfo(&ib->buf, data, len);
217 :
218 : /* Switch back to previous memory context. */
219 38 : MemoryContextSwitchTo(oldcontext);
220 38 : }
221 :
222 : /*
223 : * Finalize an IncrementalBackupInfo object after all manifest data has
224 : * been supplied via calls to AppendIncrementalManifestData.
225 : */
226 : void
227 12 : FinalizeIncrementalManifest(IncrementalBackupInfo *ib)
228 : {
229 : MemoryContext oldcontext;
230 :
231 : /* Switch to our memory context. */
232 12 : oldcontext = MemoryContextSwitchTo(ib->mcxt);
233 :
234 : /* Parse the last chunk of the manifest */
235 12 : json_parse_manifest_incremental_chunk(ib->inc_state, ib->buf.data,
236 12 : ib->buf.len, true);
237 :
238 : /* Done with the buffer, so release memory. */
239 12 : pfree(ib->buf.data);
240 12 : ib->buf.data = NULL;
241 :
242 : /* Done with inc_state, so release that memory too */
243 12 : json_parse_manifest_incremental_shutdown(ib->inc_state);
244 :
245 : /* Switch back to previous memory context. */
246 12 : MemoryContextSwitchTo(oldcontext);
247 12 : }
248 :
249 : /*
250 : * Prepare to take an incremental backup.
251 : *
252 : * Before this function is called, AppendIncrementalManifestData and
253 : * FinalizeIncrementalManifest should have already been called to pass all
254 : * the manifest data to this object.
255 : *
256 : * This function performs sanity checks on the data extracted from the
257 : * manifest and figures out for which WAL ranges we need summaries, and
258 : * whether those summaries are available. Then, it reads and combines the
259 : * data from those summary files. It also updates the backup_state with the
260 : * reference TLI and LSN for the prior backup.
261 : */
262 : void
263 12 : PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
264 : BackupState *backup_state)
265 : {
266 : MemoryContext oldcontext;
267 : List *expectedTLEs;
268 : List *all_wslist,
269 12 : *required_wslist = NIL;
270 : ListCell *lc;
271 : TimeLineHistoryEntry **tlep;
272 : int num_wal_ranges;
273 12 : bool found_backup_start_tli = false;
274 12 : TimeLineID earliest_wal_range_tli = 0;
275 12 : XLogRecPtr earliest_wal_range_start_lsn = InvalidXLogRecPtr;
276 12 : TimeLineID latest_wal_range_tli = 0;
277 :
278 : Assert(ib->buf.data == NULL);
279 :
280 : /* Switch to our memory context. */
281 12 : oldcontext = MemoryContextSwitchTo(ib->mcxt);
282 :
283 : /*
284 : * A valid backup manifest must always contain at least one WAL range
285 : * (usually exactly one, unless the backup spanned a timeline switch).
286 : */
287 12 : num_wal_ranges = list_length(ib->manifest_wal_ranges);
288 12 : if (num_wal_ranges == 0)
289 0 : ereport(ERROR,
290 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
291 : errmsg("manifest contains no required WAL ranges")));
292 :
293 : /*
294 : * Match up the TLIs that appear in the WAL ranges of the backup manifest
295 : * with those that appear in this server's timeline history. We expect
296 : * every backup_wal_range to match to a TimeLineHistoryEntry; if it does
297 : * not, that's an error.
298 : *
299 : * This loop also decides which of the WAL ranges is the manifest is most
300 : * ancient and which one is the newest, according to the timeline history
301 : * of this server, and stores TLIs of those WAL ranges into
302 : * earliest_wal_range_tli and latest_wal_range_tli. It also updates
303 : * earliest_wal_range_start_lsn to the start LSN of the WAL range for
304 : * earliest_wal_range_tli.
305 : *
306 : * Note that the return value of readTimeLineHistory puts the latest
307 : * timeline at the beginning of the list, not the end. Hence, the earliest
308 : * TLI is the one that occurs nearest the end of the list returned by
309 : * readTimeLineHistory, and the latest TLI is the one that occurs closest
310 : * to the beginning.
311 : */
312 12 : expectedTLEs = readTimeLineHistory(backup_state->starttli);
313 12 : tlep = palloc0(num_wal_ranges * sizeof(TimeLineHistoryEntry *));
314 24 : for (int i = 0; i < num_wal_ranges; ++i)
315 : {
316 12 : backup_wal_range *range = list_nth(ib->manifest_wal_ranges, i);
317 12 : bool saw_earliest_wal_range_tli = false;
318 12 : bool saw_latest_wal_range_tli = false;
319 :
320 : /* Search this server's history for this WAL range's TLI. */
321 13 : foreach(lc, expectedTLEs)
322 : {
323 13 : TimeLineHistoryEntry *tle = lfirst(lc);
324 :
325 13 : if (tle->tli == range->tli)
326 : {
327 12 : tlep[i] = tle;
328 12 : break;
329 : }
330 :
331 1 : if (tle->tli == earliest_wal_range_tli)
332 0 : saw_earliest_wal_range_tli = true;
333 1 : if (tle->tli == latest_wal_range_tli)
334 0 : saw_latest_wal_range_tli = true;
335 : }
336 :
337 : /*
338 : * An incremental backup can only be taken relative to a backup that
339 : * represents a previous state of this server. If the backup requires
340 : * WAL from a timeline that's not in our history, that definitely
341 : * isn't the case.
342 : */
343 12 : if (tlep[i] == NULL)
344 0 : ereport(ERROR,
345 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
346 : errmsg("timeline %u found in manifest, but not in this server's history",
347 : range->tli)));
348 :
349 : /*
350 : * If we found this TLI in the server's history before encountering
351 : * the latest TLI seen so far in the server's history, then this TLI
352 : * is the latest one seen so far.
353 : *
354 : * If on the other hand we saw the earliest TLI seen so far before
355 : * finding this TLI, this TLI is earlier than the earliest one seen so
356 : * far. And if this is the first TLI for which we've searched, it's
357 : * also the earliest one seen so far.
358 : *
359 : * On the first loop iteration, both things should necessarily be
360 : * true.
361 : */
362 12 : if (!saw_latest_wal_range_tli)
363 12 : latest_wal_range_tli = range->tli;
364 12 : if (earliest_wal_range_tli == 0 || saw_earliest_wal_range_tli)
365 : {
366 12 : earliest_wal_range_tli = range->tli;
367 12 : earliest_wal_range_start_lsn = range->start_lsn;
368 : }
369 : }
370 :
371 : /*
372 : * Propagate information about the prior backup into the backup_label that
373 : * will be generated for this backup.
374 : */
375 12 : backup_state->istartpoint = earliest_wal_range_start_lsn;
376 12 : backup_state->istarttli = earliest_wal_range_tli;
377 :
378 : /*
379 : * Sanity check start and end LSNs for the WAL ranges in the manifest.
380 : *
381 : * Commonly, there won't be any timeline switches during the prior backup
382 : * at all, but if there are, they should happen at the same LSNs that this
383 : * server switched timelines.
384 : *
385 : * Whether there are any timeline switches during the prior backup or not,
386 : * the prior backup shouldn't require any WAL from a timeline prior to the
387 : * start of that timeline. It also shouldn't require any WAL from later
388 : * than the start of this backup.
389 : *
390 : * If any of these sanity checks fail, one possible explanation is that
391 : * the user has generated WAL on the same timeline with the same LSNs more
392 : * than once. For instance, if two standbys running on timeline 1 were
393 : * both promoted and (due to a broken archiving setup) both selected new
394 : * timeline ID 2, then it's possible that one of these checks might trip.
395 : *
396 : * Note that there are lots of ways for the user to do something very bad
397 : * without tripping any of these checks, and they are not intended to be
398 : * comprehensive. It's pretty hard to see how we could be certain of
399 : * anything here. However, if there's a problem staring us right in the
400 : * face, it's best to report it, so we do.
401 : */
402 24 : for (int i = 0; i < num_wal_ranges; ++i)
403 : {
404 12 : backup_wal_range *range = list_nth(ib->manifest_wal_ranges, i);
405 :
406 12 : if (range->tli == earliest_wal_range_tli)
407 : {
408 12 : if (range->start_lsn < tlep[i]->begin)
409 0 : ereport(ERROR,
410 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
411 : errmsg("manifest requires WAL from initial timeline %u starting at %X/%08X, but that timeline begins at %X/%08X",
412 : range->tli,
413 : LSN_FORMAT_ARGS(range->start_lsn),
414 : LSN_FORMAT_ARGS(tlep[i]->begin))));
415 : }
416 : else
417 : {
418 0 : if (range->start_lsn != tlep[i]->begin)
419 0 : ereport(ERROR,
420 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
421 : errmsg("manifest requires WAL from continuation timeline %u starting at %X/%08X, but that timeline begins at %X/%08X",
422 : range->tli,
423 : LSN_FORMAT_ARGS(range->start_lsn),
424 : LSN_FORMAT_ARGS(tlep[i]->begin))));
425 : }
426 :
427 12 : if (range->tli == latest_wal_range_tli)
428 : {
429 12 : if (range->end_lsn > backup_state->startpoint)
430 0 : ereport(ERROR,
431 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
432 : errmsg("manifest requires WAL from final timeline %u ending at %X/%08X, but this backup starts at %X/%08X",
433 : range->tli,
434 : LSN_FORMAT_ARGS(range->end_lsn),
435 : LSN_FORMAT_ARGS(backup_state->startpoint)),
436 : errhint("This can happen for incremental backups on a standby if there was little activity since the previous backup.")));
437 : }
438 : else
439 : {
440 0 : if (range->end_lsn != tlep[i]->end)
441 0 : ereport(ERROR,
442 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
443 : errmsg("manifest requires WAL from non-final timeline %u ending at %X/%08X, but this server switched timelines at %X/%08X",
444 : range->tli,
445 : LSN_FORMAT_ARGS(range->end_lsn),
446 : LSN_FORMAT_ARGS(tlep[i]->end))));
447 : }
448 :
449 : }
450 :
451 : /*
452 : * Wait for WAL summarization to catch up to the backup start LSN. This
453 : * will throw an error if the WAL summarizer appears to be stuck. If WAL
454 : * summarization gets disabled while we're waiting, this will return
455 : * immediately, and we'll error out further down if the WAL summaries are
456 : * incomplete.
457 : */
458 12 : WaitForWalSummarization(backup_state->startpoint);
459 :
460 : /*
461 : * Retrieve a list of all WAL summaries on any timeline that overlap with
462 : * the LSN range of interest. We could instead call GetWalSummaries() once
463 : * per timeline in the loop that follows, but that would involve reading
464 : * the directory multiple times. It should be mildly faster - and perhaps
465 : * a bit safer - to do it just once.
466 : */
467 12 : all_wslist = GetWalSummaries(0, earliest_wal_range_start_lsn,
468 : backup_state->startpoint);
469 :
470 : /*
471 : * We need WAL summaries for everything that happened during the prior
472 : * backup and everything that happened afterward up until the point where
473 : * the current backup started.
474 : */
475 13 : foreach(lc, expectedTLEs)
476 : {
477 13 : TimeLineHistoryEntry *tle = lfirst(lc);
478 13 : XLogRecPtr tli_start_lsn = tle->begin;
479 13 : XLogRecPtr tli_end_lsn = tle->end;
480 13 : XLogRecPtr tli_missing_lsn = InvalidXLogRecPtr;
481 : List *tli_wslist;
482 :
483 : /*
484 : * Working through the history of this server from the current
485 : * timeline backwards, we skip everything until we find the timeline
486 : * where this backup started. Most of the time, this means we won't
487 : * skip anything at all, as it's unlikely that the timeline has
488 : * changed since the beginning of the backup moments ago.
489 : */
490 13 : if (tle->tli == backup_state->starttli)
491 : {
492 12 : found_backup_start_tli = true;
493 12 : tli_end_lsn = backup_state->startpoint;
494 : }
495 1 : else if (!found_backup_start_tli)
496 0 : continue;
497 :
498 : /*
499 : * Find the summaries that overlap the LSN range of interest for this
500 : * timeline. If this is the earliest timeline involved, the range of
501 : * interest begins with the start LSN of the prior backup; otherwise,
502 : * it begins at the LSN at which this timeline came into existence. If
503 : * this is the latest TLI involved, the range of interest ends at the
504 : * start LSN of the current backup; otherwise, it ends at the point
505 : * where we switched from this timeline to the next one.
506 : */
507 13 : if (tle->tli == earliest_wal_range_tli)
508 12 : tli_start_lsn = earliest_wal_range_start_lsn;
509 13 : tli_wslist = FilterWalSummaries(all_wslist, tle->tli,
510 : tli_start_lsn, tli_end_lsn);
511 :
512 : /*
513 : * There is no guarantee that the WAL summaries we found cover the
514 : * entire range of LSNs for which summaries are required, or indeed
515 : * that we found any WAL summaries at all. Check whether we have a
516 : * problem of that sort.
517 : */
518 13 : if (!WalSummariesAreComplete(tli_wslist, tli_start_lsn, tli_end_lsn,
519 : &tli_missing_lsn))
520 : {
521 1 : if (!XLogRecPtrIsValid(tli_missing_lsn))
522 0 : ereport(ERROR,
523 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
524 : errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but no summaries for that timeline and LSN range exist",
525 : tle->tli,
526 : LSN_FORMAT_ARGS(tli_start_lsn),
527 : LSN_FORMAT_ARGS(tli_end_lsn))));
528 : else
529 1 : ereport(ERROR,
530 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
531 : errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but the summaries for that timeline and LSN range are incomplete",
532 : tle->tli,
533 : LSN_FORMAT_ARGS(tli_start_lsn),
534 : LSN_FORMAT_ARGS(tli_end_lsn)),
535 : errdetail("The first unsummarized LSN in this range is %X/%08X.",
536 : LSN_FORMAT_ARGS(tli_missing_lsn))));
537 : }
538 :
539 : /*
540 : * Remember that we need to read these summaries.
541 : *
542 : * Technically, it's possible that this could read more files than
543 : * required, since tli_wslist in theory could contain redundant
544 : * summaries. For instance, if we have a summary from 0/10000000 to
545 : * 0/20000000 and also one from 0/00000000 to 0/30000000, then the
546 : * latter subsumes the former and the former could be ignored.
547 : *
548 : * We ignore this possibility because the WAL summarizer only tries to
549 : * generate summaries that do not overlap. If somehow they exist,
550 : * we'll do a bit of extra work but the results should still be
551 : * correct.
552 : */
553 12 : required_wslist = list_concat(required_wslist, tli_wslist);
554 :
555 : /*
556 : * Timelines earlier than the one in which the prior backup began are
557 : * not relevant.
558 : */
559 12 : if (tle->tli == earliest_wal_range_tli)
560 11 : break;
561 : }
562 :
563 : /*
564 : * Read all of the required block reference table files and merge all of
565 : * the data into a single in-memory block reference table.
566 : *
567 : * See the comments for struct IncrementalBackupInfo for some thoughts on
568 : * memory usage.
569 : */
570 11 : ib->brtab = CreateEmptyBlockRefTable();
571 30 : foreach(lc, required_wslist)
572 : {
573 19 : WalSummaryFile *ws = lfirst(lc);
574 : WalSummaryIO wsio;
575 : BlockRefTableReader *reader;
576 : RelFileLocator rlocator;
577 : ForkNumber forknum;
578 : BlockNumber limit_block;
579 : BlockNumber blocks[BLOCKS_PER_READ];
580 :
581 19 : wsio.file = OpenWalSummaryFile(ws, false);
582 19 : wsio.filepos = 0;
583 19 : ereport(DEBUG1,
584 : (errmsg_internal("reading WAL summary file \"%s\"",
585 : FilePathName(wsio.file))));
586 19 : reader = CreateBlockRefTableReader(ReadWalSummary, &wsio,
587 : FilePathName(wsio.file),
588 : ReportWalSummaryError, NULL);
589 373 : while (BlockRefTableReaderNextRelation(reader, &rlocator, &forknum,
590 : &limit_block))
591 : {
592 354 : BlockRefTableSetLimitBlock(ib->brtab, &rlocator,
593 : forknum, limit_block);
594 :
595 : while (1)
596 256 : {
597 : unsigned int nblocks;
598 :
599 610 : nblocks = BlockRefTableReaderGetBlocks(reader, blocks,
600 : BLOCKS_PER_READ);
601 610 : if (nblocks == 0)
602 354 : break;
603 :
604 1181 : for (unsigned int i = 0; i < nblocks; ++i)
605 925 : BlockRefTableMarkBlockModified(ib->brtab, &rlocator,
606 : forknum, blocks[i]);
607 : }
608 : }
609 19 : DestroyBlockRefTableReader(reader);
610 19 : FileClose(wsio.file);
611 : }
612 :
613 : /* Switch back to previous memory context. */
614 11 : MemoryContextSwitchTo(oldcontext);
615 11 : }
616 :
617 : /*
618 : * Get the pathname that should be used when a file is sent incrementally.
619 : *
620 : * The result is a palloc'd string.
621 : */
622 : char *
623 1605 : GetIncrementalFilePath(Oid dboid, Oid spcoid, RelFileNumber relfilenumber,
624 : ForkNumber forknum, unsigned segno)
625 : {
626 : RelPathStr path;
627 : char *lastslash;
628 : char *ipath;
629 :
630 1605 : path = GetRelationPath(dboid, spcoid, relfilenumber, INVALID_PROC_NUMBER,
631 : forknum);
632 :
633 1605 : lastslash = strrchr(path.str, '/');
634 : Assert(lastslash != NULL);
635 1605 : *lastslash = '\0';
636 :
637 1605 : if (segno > 0)
638 0 : ipath = psprintf("%s/INCREMENTAL.%s.%u", path.str, lastslash + 1, segno);
639 : else
640 1605 : ipath = psprintf("%s/INCREMENTAL.%s", path.str, lastslash + 1);
641 :
642 1605 : return ipath;
643 : }
644 :
645 : /*
646 : * How should we back up a particular file as part of an incremental backup?
647 : *
648 : * If the return value is BACK_UP_FILE_FULLY, caller should back up the whole
649 : * file just as if this were not an incremental backup. The contents of the
650 : * relative_block_numbers array are unspecified in this case.
651 : *
652 : * If the return value is BACK_UP_FILE_INCREMENTALLY, caller should include
653 : * an incremental file in the backup instead of the entire file. On return,
654 : * *num_blocks_required will be set to the number of blocks that need to be
655 : * sent, and the actual block numbers will have been stored in
656 : * relative_block_numbers, which should be an array of at least RELSEG_SIZE.
657 : * In addition, *truncation_block_length will be set to the value that should
658 : * be included in the incremental file.
659 : */
660 : FileBackupMethod
661 11095 : GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
662 : Oid dboid, Oid spcoid,
663 : RelFileNumber relfilenumber, ForkNumber forknum,
664 : unsigned segno, size_t size,
665 : unsigned *num_blocks_required,
666 : BlockNumber *relative_block_numbers,
667 : unsigned *truncation_block_length)
668 : {
669 : BlockNumber limit_block;
670 : BlockNumber start_blkno;
671 : BlockNumber stop_blkno;
672 : RelFileLocator rlocator;
673 : BlockRefTableEntry *brtentry;
674 : unsigned i;
675 : unsigned nblocks;
676 :
677 : /* Should only be called after PrepareForIncrementalBackup. */
678 : Assert(ib->buf.data == NULL);
679 :
680 : /*
681 : * dboid could be InvalidOid if shared rel, but spcoid and relfilenumber
682 : * should have legal values.
683 : */
684 : Assert(OidIsValid(spcoid));
685 : Assert(RelFileNumberIsValid(relfilenumber));
686 :
687 : /*
688 : * If the file size is too large or not a multiple of BLCKSZ, then
689 : * something weird is happening, so give up and send the whole file.
690 : */
691 11095 : if ((size % BLCKSZ) != 0 || size / BLCKSZ > RELSEG_SIZE)
692 0 : return BACK_UP_FILE_FULLY;
693 :
694 : /*
695 : * The free-space map fork is not properly WAL-logged, so we need to
696 : * backup the entire file every time.
697 : */
698 11095 : if (forknum == FSM_FORKNUM)
699 1354 : return BACK_UP_FILE_FULLY;
700 :
701 : /*
702 : * If this file was not part of the prior backup, back it up fully.
703 : *
704 : * If this file was created after the prior backup and before the start of
705 : * the current backup, then the WAL summary information will tell us to
706 : * back up the whole file. However, if this file was created after the
707 : * start of the current backup, then the WAL summary won't know anything
708 : * about it. Without this logic, we would erroneously conclude that it was
709 : * OK to send it incrementally.
710 : *
711 : * Note that the file could have existed at the time of the prior backup,
712 : * gotten deleted, and then a new file with the same name could have been
713 : * created. In that case, this logic won't prevent the file from being
714 : * backed up incrementally. But, if the deletion happened before the start
715 : * of the current backup, the limit block will be 0, inducing a full
716 : * backup. If the deletion happened after the start of the current backup,
717 : * reconstruction will erroneously combine blocks from the current
718 : * lifespan of the file with blocks from the previous lifespan -- but in
719 : * this type of case, WAL replay to reach backup consistency should remove
720 : * and recreate the file anyway, so the initial bogus contents should not
721 : * matter.
722 : */
723 9741 : if (backup_file_lookup(ib->manifest_files, path) == NULL)
724 : {
725 : char *ipath;
726 :
727 1605 : ipath = GetIncrementalFilePath(dboid, spcoid, relfilenumber,
728 : forknum, segno);
729 1605 : if (backup_file_lookup(ib->manifest_files, ipath) == NULL)
730 274 : return BACK_UP_FILE_FULLY;
731 : }
732 :
733 : /*
734 : * Look up the special block reference table entry for the database as a
735 : * whole.
736 : */
737 9467 : rlocator.spcOid = spcoid;
738 9467 : rlocator.dbOid = dboid;
739 9467 : rlocator.relNumber = 0;
740 9467 : if (BlockRefTableGetEntry(ib->brtab, &rlocator, MAIN_FORKNUM,
741 : &limit_block) != NULL)
742 : {
743 : /*
744 : * According to the WAL summary, this database OID/tablespace OID
745 : * pairing has been created since the previous backup. So, everything
746 : * in it must be backed up fully.
747 : */
748 261 : return BACK_UP_FILE_FULLY;
749 : }
750 :
751 : /* Look up the block reference table entry for this relfilenode. */
752 9206 : rlocator.relNumber = relfilenumber;
753 9206 : brtentry = BlockRefTableGetEntry(ib->brtab, &rlocator, forknum,
754 : &limit_block);
755 :
756 : /*
757 : * If there is no entry, then there have been no WAL-logged changes to the
758 : * relation since the predecessor backup was taken, so we can back it up
759 : * incrementally and need not include any modified blocks.
760 : *
761 : * However, if the file is zero-length, we should do a full backup,
762 : * because an incremental file is always more than zero length, and it's
763 : * silly to take an incremental backup when a full backup would be
764 : * smaller.
765 : */
766 9206 : if (brtentry == NULL)
767 : {
768 9169 : if (size == 0)
769 1869 : return BACK_UP_FILE_FULLY;
770 7300 : *num_blocks_required = 0;
771 7300 : *truncation_block_length = size / BLCKSZ;
772 7300 : return BACK_UP_FILE_INCREMENTALLY;
773 : }
774 :
775 : /*
776 : * If the limit_block is less than or equal to the point where this
777 : * segment starts, send the whole file.
778 : */
779 37 : if (limit_block <= segno * RELSEG_SIZE)
780 0 : return BACK_UP_FILE_FULLY;
781 :
782 : /*
783 : * Get relevant entries from the block reference table entry.
784 : *
785 : * We shouldn't overflow computing the start or stop block numbers, but if
786 : * it manages to happen somehow, detect it and throw an error.
787 : */
788 37 : start_blkno = segno * RELSEG_SIZE;
789 37 : stop_blkno = start_blkno + (size / BLCKSZ);
790 37 : if (start_blkno / RELSEG_SIZE != segno || stop_blkno < start_blkno)
791 0 : ereport(ERROR,
792 : errcode(ERRCODE_INTERNAL_ERROR),
793 : errmsg_internal("overflow computing block number bounds for segment %u with size %zu",
794 : segno, size));
795 :
796 : /*
797 : * This will write *absolute* block numbers into the output array, but
798 : * we'll transpose them below.
799 : */
800 37 : nblocks = BlockRefTableEntryGetBlocks(brtentry, start_blkno, stop_blkno,
801 : relative_block_numbers, RELSEG_SIZE);
802 : Assert(nblocks <= RELSEG_SIZE);
803 :
804 : /*
805 : * If we're going to have to send nearly all of the blocks, then just send
806 : * the whole file, because that won't require much extra storage or
807 : * transfer and will speed up and simplify backup restoration. It's not
808 : * clear what threshold is most appropriate here and perhaps it ought to
809 : * be configurable, but for now we're just going to say that if we'd need
810 : * to send 90% of the blocks anyway, give up and send the whole file.
811 : *
812 : * NB: If you change the threshold here, at least make sure to back up the
813 : * file fully when every single block must be sent, because there's
814 : * nothing good about sending an incremental file in that case.
815 : */
816 37 : if (nblocks * BLCKSZ > size * 0.9)
817 10 : return BACK_UP_FILE_FULLY;
818 :
819 : /*
820 : * Looks like we can send an incremental file, so sort the block numbers
821 : * and then transpose them from absolute block numbers to relative block
822 : * numbers if necessary.
823 : *
824 : * NB: If the block reference table was using the bitmap representation
825 : * for a given chunk, the block numbers in that chunk will already be
826 : * sorted, but when the array-of-offsets representation is used, we can
827 : * receive block numbers here out of order.
828 : */
829 27 : qsort(relative_block_numbers, nblocks, sizeof(BlockNumber),
830 : compare_block_numbers);
831 27 : if (start_blkno != 0)
832 : {
833 0 : for (i = 0; i < nblocks; ++i)
834 0 : relative_block_numbers[i] -= start_blkno;
835 : }
836 27 : *num_blocks_required = nblocks;
837 :
838 : /*
839 : * The truncation block length is the minimum length of the reconstructed
840 : * file. Any block numbers below this threshold that are not present in
841 : * the backup need to be fetched from the prior backup. At or above this
842 : * threshold, blocks should only be included in the result if they are
843 : * present in the backup. (This may require inserting zero blocks if the
844 : * blocks included in the backup are non-consecutive.)
845 : */
846 27 : *truncation_block_length = size / BLCKSZ;
847 27 : if (BlockNumberIsValid(limit_block))
848 : {
849 1 : unsigned relative_limit = limit_block - segno * RELSEG_SIZE;
850 :
851 : /*
852 : * We can't set a truncation_block_length in excess of the limit block
853 : * number (relativized to the current segment). To do so would be to
854 : * treat blocks from older backups as valid current contents even if
855 : * they were subsequently truncated away.
856 : */
857 1 : if (*truncation_block_length < relative_limit)
858 0 : *truncation_block_length = relative_limit;
859 :
860 : /*
861 : * We also can't set a truncation_block_length in excess of the
862 : * segment size, since the reconstructed file can't be larger than
863 : * that.
864 : */
865 1 : if (*truncation_block_length > RELSEG_SIZE)
866 0 : *truncation_block_length = RELSEG_SIZE;
867 : }
868 :
869 : /* Send it incrementally. */
870 27 : return BACK_UP_FILE_INCREMENTALLY;
871 : }
872 :
873 : /*
874 : * Compute the size for a header of an incremental file containing a given
875 : * number of blocks. The header is rounded to a multiple of BLCKSZ, but
876 : * only if the file will store some block data.
877 : */
878 : size_t
879 7327 : GetIncrementalHeaderSize(unsigned num_blocks_required)
880 : {
881 : size_t result;
882 :
883 : /* Make sure we're not going to overflow. */
884 : Assert(num_blocks_required <= RELSEG_SIZE);
885 :
886 : /*
887 : * Three four byte quantities (magic number, truncation block length,
888 : * block count) followed by block numbers.
889 : */
890 7327 : result = 3 * sizeof(uint32) + (sizeof(BlockNumber) * num_blocks_required);
891 :
892 : /*
893 : * Round the header size to a multiple of BLCKSZ - when not a multiple of
894 : * BLCKSZ, add the missing fraction of a block. But do this only if the
895 : * file will store data for some blocks, otherwise keep it small.
896 : */
897 7327 : if ((num_blocks_required > 0) && (result % BLCKSZ != 0))
898 26 : result += BLCKSZ - (result % BLCKSZ);
899 :
900 7327 : return result;
901 : }
902 :
903 : /*
904 : * Compute the size for an incremental file containing a given number of blocks.
905 : */
906 : size_t
907 7327 : GetIncrementalFileSize(unsigned num_blocks_required)
908 : {
909 : size_t result;
910 :
911 : /* Make sure we're not going to overflow. */
912 : Assert(num_blocks_required <= RELSEG_SIZE);
913 :
914 : /*
915 : * Header with three four byte quantities (magic number, truncation block
916 : * length, block count) followed by block numbers, rounded to a multiple
917 : * of BLCKSZ (for files with block data), followed by block contents.
918 : */
919 7327 : result = GetIncrementalHeaderSize(num_blocks_required);
920 7327 : result += BLCKSZ * num_blocks_required;
921 :
922 7327 : return result;
923 : }
924 :
925 : /*
926 : * Helper function for filemap hash table.
927 : */
928 : static uint32
929 24001 : hash_string_pointer(const char *s)
930 : {
931 24001 : const unsigned char *ss = (const unsigned char *) s;
932 :
933 24001 : return hash_bytes(ss, strlen(s));
934 : }
935 :
936 : /*
937 : * This callback to validate the manifest version for incremental backup.
938 : */
939 : static void
940 13 : manifest_process_version(JsonManifestParseContext *context,
941 : int manifest_version)
942 : {
943 : /* Incremental backups don't work with manifest version 1 */
944 13 : if (manifest_version == 1)
945 0 : context->error_cb(context,
946 : "backup manifest version 1 does not support incremental backup");
947 13 : }
948 :
949 : /*
950 : * This callback to validate the manifest system identifier against the current
951 : * database server.
952 : */
953 : static void
954 13 : manifest_process_system_identifier(JsonManifestParseContext *context,
955 : uint64 manifest_system_identifier)
956 : {
957 : uint64 system_identifier;
958 :
959 : /* Get system identifier of current system */
960 13 : system_identifier = GetSystemIdentifier();
961 :
962 13 : if (manifest_system_identifier != system_identifier)
963 1 : context->error_cb(context,
964 : "system identifier in backup manifest is %" PRIu64 ", but database system identifier is %" PRIu64,
965 : manifest_system_identifier,
966 : system_identifier);
967 12 : }
968 :
969 : /*
970 : * This callback is invoked for each file mentioned in the backup manifest.
971 : *
972 : * We store the path to each file and the size of each file for sanity-checking
973 : * purposes. For further details, see comments for IncrementalBackupInfo.
974 : */
975 : static void
976 12267 : manifest_process_file(JsonManifestParseContext *context,
977 : const char *pathname, uint64 size,
978 : pg_checksum_type checksum_type,
979 : int checksum_length,
980 : uint8 *checksum_payload)
981 : {
982 12267 : IncrementalBackupInfo *ib = context->private_data;
983 : backup_file_entry *entry;
984 : bool found;
985 :
986 12267 : entry = backup_file_insert(ib->manifest_files, pathname, &found);
987 12267 : if (!found)
988 : {
989 12267 : entry->path = MemoryContextStrdup(ib->manifest_files->ctx,
990 : pathname);
991 12267 : entry->size = size;
992 : }
993 12267 : }
994 :
995 : /*
996 : * This callback is invoked for each WAL range mentioned in the backup
997 : * manifest.
998 : *
999 : * We're just interested in learning the oldest LSN and the corresponding TLI
1000 : * that appear in any WAL range.
1001 : */
1002 : static void
1003 12 : manifest_process_wal_range(JsonManifestParseContext *context,
1004 : TimeLineID tli, XLogRecPtr start_lsn,
1005 : XLogRecPtr end_lsn)
1006 : {
1007 12 : IncrementalBackupInfo *ib = context->private_data;
1008 12 : backup_wal_range *range = palloc_object(backup_wal_range);
1009 :
1010 12 : range->tli = tli;
1011 12 : range->start_lsn = start_lsn;
1012 12 : range->end_lsn = end_lsn;
1013 12 : ib->manifest_wal_ranges = lappend(ib->manifest_wal_ranges, range);
1014 12 : }
1015 :
1016 : /*
1017 : * This callback is invoked if an error occurs while parsing the backup
1018 : * manifest.
1019 : */
1020 : static void
1021 1 : manifest_report_error(JsonManifestParseContext *context, const char *fmt,...)
1022 : {
1023 : StringInfoData errbuf;
1024 :
1025 1 : initStringInfo(&errbuf);
1026 :
1027 : for (;;)
1028 0 : {
1029 : va_list ap;
1030 : int needed;
1031 :
1032 1 : va_start(ap, fmt);
1033 1 : needed = appendStringInfoVA(&errbuf, fmt, ap);
1034 1 : va_end(ap);
1035 1 : if (needed == 0)
1036 1 : break;
1037 0 : enlargeStringInfo(&errbuf, needed);
1038 : }
1039 :
1040 1 : ereport(ERROR,
1041 : errmsg_internal("%s", errbuf.data));
1042 : }
1043 :
1044 : /*
1045 : * Quicksort comparator for block numbers.
1046 : */
1047 : static int
1048 16 : compare_block_numbers(const void *a, const void *b)
1049 : {
1050 16 : BlockNumber aa = *(const BlockNumber *) a;
1051 16 : BlockNumber bb = *(const BlockNumber *) b;
1052 :
1053 16 : return pg_cmp_u32(aa, bb);
1054 : }
|