Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * reconstruct.c
4 : * Reconstruct full file from incremental file and backup chain.
5 : *
6 : * Copyright (c) 2017-2024, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/bin/pg_combinebackup/reconstruct.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres_fe.h"
14 :
15 : #include <unistd.h>
16 :
17 : #include "backup/basebackup_incremental.h"
18 : #include "common/file_perm.h"
19 : #include "common/logging.h"
20 : #include "copy_file.h"
21 : #include "lib/stringinfo.h"
22 : #include "reconstruct.h"
23 : #include "storage/block.h"
24 :
25 : /*
26 : * An rfile stores the data that we need in order to be able to use some file
27 : * on disk for reconstruction. For any given output file, we create one rfile
28 : * per backup that we need to consult when we constructing that output file.
29 : *
30 : * If we find a full version of the file in the backup chain, then only
31 : * filename and fd are initialized; the remaining fields are 0 or NULL.
32 : * For an incremental file, header_length, num_blocks, relative_block_numbers,
33 : * and truncation_block_length are also set.
34 : *
35 : * num_blocks_read and highest_offset_read always start out as 0.
36 : */
37 : typedef struct rfile
38 : {
39 : char *filename;
40 : int fd;
41 : size_t header_length;
42 : unsigned num_blocks;
43 : BlockNumber *relative_block_numbers;
44 : unsigned truncation_block_length;
45 : unsigned num_blocks_read;
46 : off_t highest_offset_read;
47 : } rfile;
48 :
49 : static void debug_reconstruction(int n_source,
50 : rfile **sources,
51 : bool dry_run);
52 : static unsigned find_reconstructed_block_length(rfile *s);
53 : static rfile *make_incremental_rfile(char *filename);
54 : static rfile *make_rfile(char *filename, bool missing_ok);
55 : static void write_reconstructed_file(char *input_filename,
56 : char *output_filename,
57 : unsigned block_length,
58 : rfile **sourcemap,
59 : off_t *offsetmap,
60 : pg_checksum_context *checksum_ctx,
61 : CopyMethod copy_method,
62 : bool debug,
63 : bool dry_run);
64 : static void read_bytes(rfile *rf, void *buffer, unsigned length);
65 : static void write_block(int fd, char *output_filename,
66 : uint8 *buffer,
67 : pg_checksum_context *checksum_ctx);
68 : static void read_block(rfile *s, off_t off, uint8 *buffer);
69 :
70 : /*
71 : * Reconstruct a full file from an incremental file and a chain of prior
72 : * backups.
73 : *
74 : * input_filename should be the path to the incremental file, and
75 : * output_filename should be the path where the reconstructed file is to be
76 : * written.
77 : *
78 : * relative_path should be the path to the directory containing this file,
79 : * relative to the root of the backup (NOT relative to the root of the
80 : * tablespace). It must always end with a trailing slash. bare_file_name
81 : * should be the name of the file within that directory, without
82 : * "INCREMENTAL.".
83 : *
84 : * n_prior_backups is the number of prior backups, and prior_backup_dirs is
85 : * an array of pathnames where those backups can be found.
86 : */
87 : void
88 11586 : reconstruct_from_incremental_file(char *input_filename,
89 : char *output_filename,
90 : char *relative_path,
91 : char *bare_file_name,
92 : int n_prior_backups,
93 : char **prior_backup_dirs,
94 : manifest_data **manifests,
95 : char *manifest_path,
96 : pg_checksum_type checksum_type,
97 : int *checksum_length,
98 : uint8 **checksum_payload,
99 : CopyMethod copy_method,
100 : bool debug,
101 : bool dry_run)
102 : {
103 : rfile **source;
104 11586 : rfile *latest_source = NULL;
105 : rfile **sourcemap;
106 : off_t *offsetmap;
107 : unsigned block_length;
108 : unsigned i;
109 11586 : unsigned sidx = n_prior_backups;
110 11586 : bool full_copy_possible = true;
111 11586 : int copy_source_index = -1;
112 11586 : rfile *copy_source = NULL;
113 : pg_checksum_context checksum_ctx;
114 :
115 : /* Sanity check the relative_path. */
116 : Assert(relative_path[0] != '\0');
117 : Assert(relative_path[strlen(relative_path) - 1] == '/');
118 :
119 : /*
120 : * Every block must come either from the latest version of the file or
121 : * from one of the prior backups.
122 : */
123 11586 : source = pg_malloc0(sizeof(rfile *) * (1 + n_prior_backups));
124 :
125 : /*
126 : * Use the information from the latest incremental file to figure out how
127 : * long the reconstructed file should be.
128 : */
129 11586 : latest_source = make_incremental_rfile(input_filename);
130 11586 : source[n_prior_backups] = latest_source;
131 11586 : block_length = find_reconstructed_block_length(latest_source);
132 :
133 : /*
134 : * For each block in the output file, we need to know from which file we
135 : * need to obtain it and at what offset in that file it's stored.
136 : * sourcemap gives us the first of these things, and offsetmap the latter.
137 : */
138 11586 : sourcemap = pg_malloc0(sizeof(rfile *) * block_length);
139 11586 : offsetmap = pg_malloc0(sizeof(off_t) * block_length);
140 :
141 : /*
142 : * Every block that is present in the newest incremental file should be
143 : * sourced from that file. If it precedes the truncation_block_length,
144 : * it's a block that we would otherwise have had to find in an older
145 : * backup and thus reduces the number of blocks remaining to be found by
146 : * one; otherwise, it's an extra block that needs to be included in the
147 : * output but would not have needed to be found in an older backup if it
148 : * had not been present.
149 : */
150 11662 : for (i = 0; i < latest_source->num_blocks; ++i)
151 : {
152 76 : BlockNumber b = latest_source->relative_block_numbers[i];
153 :
154 : Assert(b < block_length);
155 76 : sourcemap[b] = latest_source;
156 76 : offsetmap[b] = latest_source->header_length + (i * BLCKSZ);
157 :
158 : /*
159 : * A full copy of a file from an earlier backup is only possible if no
160 : * blocks are needed from any later incremental file.
161 : */
162 76 : full_copy_possible = false;
163 : }
164 :
165 : while (1)
166 2668 : {
167 : char source_filename[MAXPGPATH];
168 : rfile *s;
169 :
170 : /*
171 : * Move to the next backup in the chain. If there are no more, then
172 : * we're done.
173 : */
174 14254 : if (sidx == 0)
175 2 : break;
176 14252 : --sidx;
177 :
178 : /*
179 : * Look for the full file in the previous backup. If not found, then
180 : * look for an incremental file instead.
181 : */
182 14252 : snprintf(source_filename, MAXPGPATH, "%s/%s%s",
183 14252 : prior_backup_dirs[sidx], relative_path, bare_file_name);
184 14252 : if ((s = make_rfile(source_filename, true)) == NULL)
185 : {
186 2668 : snprintf(source_filename, MAXPGPATH, "%s/%sINCREMENTAL.%s",
187 2668 : prior_backup_dirs[sidx], relative_path, bare_file_name);
188 2668 : s = make_incremental_rfile(source_filename);
189 : }
190 14252 : source[sidx] = s;
191 :
192 : /*
193 : * If s->header_length == 0, then this is a full file; otherwise, it's
194 : * an incremental file.
195 : */
196 14252 : if (s->header_length == 0)
197 : {
198 : struct stat sb;
199 : BlockNumber b;
200 : BlockNumber blocklength;
201 :
202 : /* We need to know the length of the file. */
203 11584 : if (fstat(s->fd, &sb) < 0)
204 0 : pg_fatal("could not stat file \"%s\": %m", s->filename);
205 :
206 : /*
207 : * Since we found a full file, source all blocks from it that
208 : * exist in the file.
209 : *
210 : * Note that there may be blocks that don't exist either in this
211 : * file or in any incremental file but that precede
212 : * truncation_block_length. These are, presumably, zero-filled
213 : * blocks that result from the server extending the file but
214 : * taking no action on those blocks that generated any WAL.
215 : *
216 : * Sadly, we have no way of validating that this is really what
217 : * happened, and neither does the server. From it's perspective,
218 : * an unmodified block that contains data looks exactly the same
219 : * as a zero-filled block that never had any data: either way,
220 : * it's not mentioned in any WAL summary and the server has no
221 : * reason to read it. From our perspective, all we know is that
222 : * nobody had a reason to back up the block. That certainly means
223 : * that the block didn't exist at the time of the full backup, but
224 : * the supposition that it was all zeroes at the time of every
225 : * later backup is one that we can't validate.
226 : */
227 11584 : blocklength = sb.st_size / BLCKSZ;
228 55060 : for (b = 0; b < latest_source->truncation_block_length; ++b)
229 : {
230 43476 : if (sourcemap[b] == NULL && b < blocklength)
231 : {
232 43400 : sourcemap[b] = s;
233 43400 : offsetmap[b] = b * BLCKSZ;
234 : }
235 : }
236 :
237 : /*
238 : * If a full copy looks possible, check whether the resulting file
239 : * should be exactly as long as the source file is. If so, a full
240 : * copy is acceptable, otherwise not.
241 : */
242 11584 : if (full_copy_possible)
243 : {
244 : uint64 expected_length;
245 :
246 11538 : expected_length =
247 11538 : (uint64) latest_source->truncation_block_length;
248 11538 : expected_length *= BLCKSZ;
249 11538 : if (expected_length == sb.st_size)
250 : {
251 11538 : copy_source = s;
252 11538 : copy_source_index = sidx;
253 : }
254 : }
255 :
256 : /* We don't need to consider any further sources. */
257 11584 : break;
258 : }
259 :
260 : /*
261 : * Since we found another incremental file, source all blocks from it
262 : * that we need but don't yet have.
263 : */
264 2668 : for (i = 0; i < s->num_blocks; ++i)
265 : {
266 0 : BlockNumber b = s->relative_block_numbers[i];
267 :
268 0 : if (b < latest_source->truncation_block_length &&
269 0 : sourcemap[b] == NULL)
270 : {
271 0 : sourcemap[b] = s;
272 0 : offsetmap[b] = s->header_length + (i * BLCKSZ);
273 :
274 : /*
275 : * A full copy of a file from an earlier backup is only
276 : * possible if no blocks are needed from any later incremental
277 : * file.
278 : */
279 0 : full_copy_possible = false;
280 : }
281 : }
282 : }
283 :
284 : /*
285 : * If a checksum of the required type already exists in the
286 : * backup_manifest for the relevant input directory, we can save some work
287 : * by reusing that checksum instead of computing a new one.
288 : */
289 11586 : if (copy_source_index >= 0 && manifests[copy_source_index] != NULL &&
290 : checksum_type != CHECKSUM_TYPE_NONE)
291 : {
292 : manifest_file *mfile;
293 :
294 11538 : mfile = manifest_files_lookup(manifests[copy_source_index]->files,
295 : manifest_path);
296 11538 : if (mfile == NULL)
297 : {
298 0 : char *path = psprintf("%s/backup_manifest",
299 0 : prior_backup_dirs[copy_source_index]);
300 :
301 : /*
302 : * The directory is out of sync with the backup_manifest, so emit
303 : * a warning.
304 : */
305 0 : pg_log_warning("manifest file \"%s\" contains no entry for file \"%s\"",
306 : path,
307 : manifest_path);
308 0 : pfree(path);
309 : }
310 11538 : else if (mfile->checksum_type == checksum_type)
311 : {
312 11538 : *checksum_length = mfile->checksum_length;
313 11538 : *checksum_payload = pg_malloc(*checksum_length);
314 11538 : memcpy(*checksum_payload, mfile->checksum_payload,
315 11538 : *checksum_length);
316 11538 : checksum_type = CHECKSUM_TYPE_NONE;
317 : }
318 : }
319 :
320 : /* Prepare for checksum calculation, if required. */
321 11586 : pg_checksum_init(&checksum_ctx, checksum_type);
322 :
323 : /*
324 : * If the full file can be created by copying a file from an older backup
325 : * in the chain without needing to overwrite any blocks or truncate the
326 : * result, then forget about performing reconstruction and just copy that
327 : * file in its entirety.
328 : *
329 : * If we have only incremental files, and there's no full file at any
330 : * point in the backup chain, something has gone wrong. Emit an error.
331 : *
332 : * Otherwise, reconstruct.
333 : */
334 11586 : if (copy_source != NULL)
335 11538 : copy_file(copy_source->filename, output_filename,
336 : &checksum_ctx, copy_method, dry_run);
337 48 : else if (sidx == 0 && source[0]->header_length != 0)
338 : {
339 2 : pg_fatal("full backup contains unexpected incremental file \"%s\"",
340 : source[0]->filename);
341 : }
342 : else
343 : {
344 46 : write_reconstructed_file(input_filename, output_filename,
345 : block_length, sourcemap, offsetmap,
346 : &checksum_ctx, copy_method,
347 : debug, dry_run);
348 46 : debug_reconstruction(n_prior_backups + 1, source, dry_run);
349 : }
350 :
351 : /* Save results of checksum calculation. */
352 11584 : if (checksum_type != CHECKSUM_TYPE_NONE)
353 : {
354 46 : *checksum_payload = pg_malloc(PG_CHECKSUM_MAX_LENGTH);
355 46 : *checksum_length = pg_checksum_final(&checksum_ctx,
356 : *checksum_payload);
357 : }
358 :
359 : /*
360 : * Close files and release memory.
361 : */
362 37418 : for (i = 0; i <= n_prior_backups; ++i)
363 : {
364 25834 : rfile *s = source[i];
365 :
366 25834 : if (s == NULL)
367 0 : continue;
368 25834 : if (close(s->fd) != 0)
369 0 : pg_fatal("could not close file \"%s\": %m", s->filename);
370 25834 : if (s->relative_block_numbers != NULL)
371 46 : pfree(s->relative_block_numbers);
372 25834 : pg_free(s->filename);
373 : }
374 11584 : pfree(sourcemap);
375 11584 : pfree(offsetmap);
376 11584 : pfree(source);
377 11584 : }
378 :
379 : /*
380 : * Perform post-reconstruction logging and sanity checks.
381 : */
382 : static void
383 46 : debug_reconstruction(int n_source, rfile **sources, bool dry_run)
384 : {
385 : unsigned i;
386 :
387 144 : for (i = 0; i < n_source; ++i)
388 : {
389 98 : rfile *s = sources[i];
390 :
391 : /* Ignore source if not used. */
392 98 : if (s == NULL)
393 0 : continue;
394 :
395 : /* If no data is needed from this file, we can ignore it. */
396 98 : if (s->num_blocks_read == 0)
397 6 : continue;
398 :
399 : /* Debug logging. */
400 92 : if (dry_run)
401 0 : pg_log_debug("would have read %u blocks from \"%s\"",
402 : s->num_blocks_read, s->filename);
403 : else
404 92 : pg_log_debug("read %u blocks from \"%s\"",
405 : s->num_blocks_read, s->filename);
406 :
407 : /*
408 : * In dry-run mode, we don't actually try to read data from the file,
409 : * but we do try to verify that the file is long enough that we could
410 : * have read the data if we'd tried.
411 : *
412 : * If this fails, then it means that a non-dry-run attempt would fail,
413 : * complaining of not being able to read the required bytes from the
414 : * file.
415 : */
416 92 : if (dry_run)
417 : {
418 : struct stat sb;
419 :
420 0 : if (fstat(s->fd, &sb) < 0)
421 0 : pg_fatal("could not stat file \"%s\": %m", s->filename);
422 0 : if (sb.st_size < s->highest_offset_read)
423 0 : pg_fatal("file \"%s\" is too short: expected %llu, found %llu",
424 : s->filename,
425 : (unsigned long long) s->highest_offset_read,
426 : (unsigned long long) sb.st_size);
427 : }
428 : }
429 46 : }
430 :
431 : /*
432 : * When we perform reconstruction using an incremental file, the output file
433 : * should be at least as long as the truncation_block_length. Any blocks
434 : * present in the incremental file increase the output length as far as is
435 : * necessary to include those blocks.
436 : */
437 : static unsigned
438 11586 : find_reconstructed_block_length(rfile *s)
439 : {
440 11586 : unsigned block_length = s->truncation_block_length;
441 : unsigned i;
442 :
443 11662 : for (i = 0; i < s->num_blocks; ++i)
444 76 : if (s->relative_block_numbers[i] >= block_length)
445 0 : block_length = s->relative_block_numbers[i] + 1;
446 :
447 11586 : return block_length;
448 : }
449 :
450 : /*
451 : * Initialize an incremental rfile, reading the header so that we know which
452 : * blocks it contains.
453 : */
454 : static rfile *
455 14254 : make_incremental_rfile(char *filename)
456 : {
457 : rfile *rf;
458 : unsigned magic;
459 :
460 14254 : rf = make_rfile(filename, false);
461 :
462 : /* Read and validate magic number. */
463 14254 : read_bytes(rf, &magic, sizeof(magic));
464 14254 : if (magic != INCREMENTAL_MAGIC)
465 0 : pg_fatal("file \"%s\" has bad incremental magic number (0x%x, expected 0x%x)",
466 : filename, magic, INCREMENTAL_MAGIC);
467 :
468 : /* Read block count. */
469 14254 : read_bytes(rf, &rf->num_blocks, sizeof(rf->num_blocks));
470 14254 : if (rf->num_blocks > RELSEG_SIZE)
471 0 : pg_fatal("file \"%s\" has block count %u in excess of segment size %u",
472 : filename, rf->num_blocks, RELSEG_SIZE);
473 :
474 : /* Read truncation block length. */
475 14254 : read_bytes(rf, &rf->truncation_block_length,
476 : sizeof(rf->truncation_block_length));
477 14254 : if (rf->truncation_block_length > RELSEG_SIZE)
478 0 : pg_fatal("file \"%s\" has truncation block length %u in excess of segment size %u",
479 : filename, rf->truncation_block_length, RELSEG_SIZE);
480 :
481 : /* Read block numbers if there are any. */
482 14254 : if (rf->num_blocks > 0)
483 : {
484 46 : rf->relative_block_numbers =
485 46 : pg_malloc0(sizeof(BlockNumber) * rf->num_blocks);
486 46 : read_bytes(rf, rf->relative_block_numbers,
487 46 : sizeof(BlockNumber) * rf->num_blocks);
488 : }
489 :
490 : /* Remember length of header. */
491 14254 : rf->header_length = sizeof(magic) + sizeof(rf->num_blocks) +
492 14254 : sizeof(rf->truncation_block_length) +
493 14254 : sizeof(BlockNumber) * rf->num_blocks;
494 :
495 : /*
496 : * Round header length to a multiple of BLCKSZ, so that blocks contents
497 : * are properly aligned. Only do this when the file actually has data for
498 : * some blocks.
499 : */
500 14254 : if ((rf->num_blocks > 0) && ((rf->header_length % BLCKSZ) != 0))
501 46 : rf->header_length += (BLCKSZ - (rf->header_length % BLCKSZ));
502 :
503 14254 : return rf;
504 : }
505 :
506 : /*
507 : * Allocate and perform basic initialization of an rfile.
508 : */
509 : static rfile *
510 28506 : make_rfile(char *filename, bool missing_ok)
511 : {
512 : rfile *rf;
513 :
514 28506 : rf = pg_malloc0(sizeof(rfile));
515 28506 : rf->filename = pstrdup(filename);
516 28506 : if ((rf->fd = open(filename, O_RDONLY | PG_BINARY, 0)) < 0)
517 : {
518 2668 : if (missing_ok && errno == ENOENT)
519 : {
520 2668 : pg_free(rf);
521 2668 : return NULL;
522 : }
523 0 : pg_fatal("could not open file \"%s\": %m", filename);
524 : }
525 :
526 25838 : return rf;
527 : }
528 :
529 : /*
530 : * Read the indicated number of bytes from an rfile into the buffer.
531 : */
532 : static void
533 42808 : read_bytes(rfile *rf, void *buffer, unsigned length)
534 : {
535 42808 : int rb = read(rf->fd, buffer, length);
536 :
537 42808 : if (rb != length)
538 : {
539 0 : if (rb < 0)
540 0 : pg_fatal("could not read file \"%s\": %m", rf->filename);
541 : else
542 0 : pg_fatal("could not read file \"%s\": read %d of %u",
543 : rf->filename, rb, length);
544 : }
545 42808 : }
546 :
547 : /*
548 : * Write out a reconstructed file.
549 : */
550 : static void
551 46 : write_reconstructed_file(char *input_filename,
552 : char *output_filename,
553 : unsigned block_length,
554 : rfile **sourcemap,
555 : off_t *offsetmap,
556 : pg_checksum_context *checksum_ctx,
557 : CopyMethod copy_method,
558 : bool debug,
559 : bool dry_run)
560 : {
561 46 : int wfd = -1;
562 : unsigned i;
563 46 : unsigned zero_blocks = 0;
564 :
565 : /* Debugging output. */
566 46 : if (debug)
567 : {
568 : StringInfoData debug_buf;
569 46 : unsigned start_of_range = 0;
570 46 : unsigned current_block = 0;
571 :
572 : /* Basic information about the output file to be produced. */
573 46 : if (dry_run)
574 0 : pg_log_debug("would reconstruct \"%s\" (%u blocks, checksum %s)",
575 : output_filename, block_length,
576 : pg_checksum_type_name(checksum_ctx->type));
577 : else
578 46 : pg_log_debug("reconstructing \"%s\" (%u blocks, checksum %s)",
579 : output_filename, block_length,
580 : pg_checksum_type_name(checksum_ctx->type));
581 :
582 : /* Print out the plan for reconstructing this file. */
583 46 : initStringInfo(&debug_buf);
584 560 : while (current_block < block_length)
585 : {
586 514 : rfile *s = sourcemap[current_block];
587 :
588 : /* Extend range, if possible. */
589 514 : if (current_block + 1 < block_length &&
590 468 : s == sourcemap[current_block + 1])
591 : {
592 400 : ++current_block;
593 400 : continue;
594 : }
595 :
596 : /* Add details about this range. */
597 114 : if (s == NULL)
598 : {
599 0 : if (current_block == start_of_range)
600 0 : appendStringInfo(&debug_buf, " %u:zero", current_block);
601 : else
602 0 : appendStringInfo(&debug_buf, " %u-%u:zero",
603 : start_of_range, current_block);
604 : }
605 : else
606 : {
607 114 : if (current_block == start_of_range)
608 70 : appendStringInfo(&debug_buf, " %u:%s@" UINT64_FORMAT,
609 : current_block, s->filename,
610 70 : (uint64) offsetmap[current_block]);
611 : else
612 44 : appendStringInfo(&debug_buf, " %u-%u:%s@" UINT64_FORMAT,
613 : start_of_range, current_block,
614 : s->filename,
615 44 : (uint64) offsetmap[current_block]);
616 : }
617 :
618 : /* Begin new range. */
619 114 : start_of_range = ++current_block;
620 :
621 : /* If the output is very long or we are done, dump it now. */
622 114 : if (current_block == block_length || debug_buf.len > 1024)
623 : {
624 46 : pg_log_debug("reconstruction plan:%s", debug_buf.data);
625 46 : resetStringInfo(&debug_buf);
626 : }
627 : }
628 :
629 : /* Free memory. */
630 46 : pfree(debug_buf.data);
631 : }
632 :
633 : /* Open the output file, except in dry_run mode. */
634 92 : if (!dry_run &&
635 46 : (wfd = open(output_filename,
636 : O_RDWR | PG_BINARY | O_CREAT | O_EXCL,
637 : pg_file_create_mode)) < 0)
638 0 : pg_fatal("could not open file \"%s\": %m", output_filename);
639 :
640 : /* Read and write the blocks as required. */
641 560 : for (i = 0; i < block_length; ++i)
642 : {
643 : uint8 buffer[BLCKSZ];
644 514 : rfile *s = sourcemap[i];
645 :
646 : /* Update accounting information. */
647 514 : if (s == NULL)
648 0 : ++zero_blocks;
649 : else
650 : {
651 514 : s->num_blocks_read++;
652 514 : s->highest_offset_read = Max(s->highest_offset_read,
653 : offsetmap[i] + BLCKSZ);
654 : }
655 :
656 : /* Skip the rest of this in dry-run mode. */
657 514 : if (dry_run)
658 0 : continue;
659 :
660 : /* Read or zero-fill the block as appropriate. */
661 514 : if (s == NULL)
662 : {
663 : /*
664 : * New block not mentioned in the WAL summary. Should have been an
665 : * uninitialized block, so just zero-fill it.
666 : */
667 0 : memset(buffer, 0, BLCKSZ);
668 :
669 : /* Write out the block, update the checksum if needed. */
670 0 : write_block(wfd, output_filename, buffer, checksum_ctx);
671 :
672 : /* Nothing else to do for zero-filled blocks. */
673 0 : continue;
674 : }
675 :
676 : /* Copy the block using the appropriate copy method. */
677 514 : if (copy_method != COPY_METHOD_COPY_FILE_RANGE)
678 : {
679 : /*
680 : * Read the block from the correct source file, and then write it
681 : * out, possibly with a checksum update.
682 : */
683 514 : read_block(s, offsetmap[i], buffer);
684 514 : write_block(wfd, output_filename, buffer, checksum_ctx);
685 : }
686 : else /* use copy_file_range */
687 : {
688 : #if defined(HAVE_COPY_FILE_RANGE)
689 : /* copy_file_range modifies the offset, so use a local copy */
690 0 : off_t off = offsetmap[i];
691 0 : size_t nwritten = 0;
692 :
693 : /*
694 : * Retry until we've written all the bytes (the offset is updated
695 : * by copy_file_range, and so is the wfd file offset).
696 : */
697 : do
698 : {
699 : int wb;
700 :
701 0 : wb = copy_file_range(s->fd, &off, wfd, NULL, BLCKSZ - nwritten, 0);
702 :
703 0 : if (wb < 0)
704 0 : pg_fatal("error while copying file range from \"%s\" to \"%s\": %m",
705 : input_filename, output_filename);
706 :
707 0 : nwritten += wb;
708 :
709 0 : } while (BLCKSZ > nwritten);
710 :
711 : /*
712 : * When checksum calculation not needed, we're done, otherwise
713 : * read the block and pass it to the checksum calculation.
714 : */
715 0 : if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
716 0 : continue;
717 :
718 0 : read_block(s, offsetmap[i], buffer);
719 :
720 0 : if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
721 0 : pg_fatal("could not update checksum of file \"%s\"",
722 : output_filename);
723 : #else
724 : pg_fatal("copy_file_range not supported on this platform");
725 : #endif
726 : }
727 : }
728 :
729 : /* Debugging output. */
730 46 : if (zero_blocks > 0)
731 : {
732 0 : if (dry_run)
733 0 : pg_log_debug("would have zero-filled %u blocks", zero_blocks);
734 : else
735 0 : pg_log_debug("zero-filled %u blocks", zero_blocks);
736 : }
737 :
738 : /* Close the output file. */
739 46 : if (wfd >= 0 && close(wfd) != 0)
740 0 : pg_fatal("could not close file \"%s\": %m", output_filename);
741 46 : }
742 :
743 : /*
744 : * Write the block into the file (using the file descriptor), and
745 : * if needed update the checksum calculation.
746 : *
747 : * The buffer is expected to contain BLCKSZ bytes. The filename is
748 : * provided only for the error message.
749 : */
750 : static void
751 514 : write_block(int fd, char *output_filename,
752 : uint8 *buffer, pg_checksum_context *checksum_ctx)
753 : {
754 : int wb;
755 :
756 514 : if ((wb = write(fd, buffer, BLCKSZ)) != BLCKSZ)
757 : {
758 0 : if (wb < 0)
759 0 : pg_fatal("could not write file \"%s\": %m", output_filename);
760 : else
761 0 : pg_fatal("could not write file \"%s\": wrote %d of %d",
762 : output_filename, wb, BLCKSZ);
763 : }
764 :
765 : /* Update the checksum computation. */
766 514 : if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
767 0 : pg_fatal("could not update checksum of file \"%s\"",
768 : output_filename);
769 514 : }
770 :
771 : /*
772 : * Read a block of data (BLCKSZ bytes) into the buffer.
773 : */
774 : static void
775 514 : read_block(rfile *s, off_t off, uint8 *buffer)
776 : {
777 : int rb;
778 :
779 : /* Read the block from the correct source, except if dry-run. */
780 514 : rb = pg_pread(s->fd, buffer, BLCKSZ, off);
781 514 : if (rb != BLCKSZ)
782 : {
783 0 : if (rb < 0)
784 0 : pg_fatal("could not read from file \"%s\": %m", s->filename);
785 : else
786 0 : pg_fatal("could not read from file \"%s\", offset %llu: read %d of %d",
787 : s->filename, (unsigned long long) off, rb, BLCKSZ);
788 : }
789 514 : }
|