Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * reconstruct.c
4 : * Reconstruct full file from incremental file and backup chain.
5 : *
6 : * Copyright (c) 2017-2024, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/bin/pg_combinebackup/reconstruct.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres_fe.h"
14 :
15 : #include <unistd.h>
16 :
17 : #include "backup/basebackup_incremental.h"
18 : #include "common/file_perm.h"
19 : #include "common/logging.h"
20 : #include "copy_file.h"
21 : #include "lib/stringinfo.h"
22 : #include "reconstruct.h"
23 : #include "storage/block.h"
24 :
25 : /*
26 : * An rfile stores the data that we need in order to be able to use some file
27 : * on disk for reconstruction. For any given output file, we create one rfile
28 : * per backup that we need to consult when we constructing that output file.
29 : *
30 : * If we find a full version of the file in the backup chain, then only
31 : * filename and fd are initialized; the remaining fields are 0 or NULL.
32 : * For an incremental file, header_length, num_blocks, relative_block_numbers,
33 : * and truncation_block_length are also set.
34 : *
35 : * num_blocks_read and highest_offset_read always start out as 0.
36 : */
37 : typedef struct rfile
38 : {
39 : char *filename;
40 : int fd;
41 : size_t header_length;
42 : unsigned num_blocks;
43 : BlockNumber *relative_block_numbers;
44 : unsigned truncation_block_length;
45 : unsigned num_blocks_read;
46 : off_t highest_offset_read;
47 : } rfile;
48 :
49 : static void debug_reconstruction(int n_source,
50 : rfile **sources,
51 : bool dry_run);
52 : static unsigned find_reconstructed_block_length(rfile *s);
53 : static rfile *make_incremental_rfile(char *filename);
54 : static rfile *make_rfile(char *filename, bool missing_ok);
55 : static void write_reconstructed_file(char *input_filename,
56 : char *output_filename,
57 : unsigned block_length,
58 : rfile **sourcemap,
59 : off_t *offsetmap,
60 : pg_checksum_context *checksum_ctx,
61 : CopyMethod copy_method,
62 : bool debug,
63 : bool dry_run);
64 : static void read_bytes(rfile *rf, void *buffer, unsigned length);
65 : static void write_block(int fd, char *output_filename,
66 : uint8 *buffer,
67 : pg_checksum_context *checksum_ctx);
68 : static void read_block(rfile *s, off_t off, uint8 *buffer);
69 :
70 : /*
71 : * Reconstruct a full file from an incremental file and a chain of prior
72 : * backups.
73 : *
74 : * input_filename should be the path to the incremental file, and
75 : * output_filename should be the path where the reconstructed file is to be
76 : * written.
77 : *
78 : * relative_path should be the path to the directory containing this file,
79 : * relative to the root of the backup (NOT relative to the root of the
80 : * tablespace). bare_file_name should be the name of the file within that
81 : * directory, without "INCREMENTAL.".
82 : *
83 : * n_prior_backups is the number of prior backups, and prior_backup_dirs is
84 : * an array of pathnames where those backups can be found.
85 : */
86 : void
87 10638 : reconstruct_from_incremental_file(char *input_filename,
88 : char *output_filename,
89 : char *relative_path,
90 : char *bare_file_name,
91 : int n_prior_backups,
92 : char **prior_backup_dirs,
93 : manifest_data **manifests,
94 : char *manifest_path,
95 : pg_checksum_type checksum_type,
96 : int *checksum_length,
97 : uint8 **checksum_payload,
98 : CopyMethod copy_method,
99 : bool debug,
100 : bool dry_run)
101 : {
102 : rfile **source;
103 10638 : rfile *latest_source = NULL;
104 : rfile **sourcemap;
105 : off_t *offsetmap;
106 : unsigned block_length;
107 : unsigned i;
108 10638 : unsigned sidx = n_prior_backups;
109 10638 : bool full_copy_possible = true;
110 10638 : int copy_source_index = -1;
111 10638 : rfile *copy_source = NULL;
112 : pg_checksum_context checksum_ctx;
113 :
114 : /*
115 : * Every block must come either from the latest version of the file or
116 : * from one of the prior backups.
117 : */
118 10638 : source = pg_malloc0(sizeof(rfile *) * (1 + n_prior_backups));
119 :
120 : /*
121 : * Use the information from the latest incremental file to figure out how
122 : * long the reconstructed file should be.
123 : */
124 10638 : latest_source = make_incremental_rfile(input_filename);
125 10638 : source[n_prior_backups] = latest_source;
126 10638 : block_length = find_reconstructed_block_length(latest_source);
127 :
128 : /*
129 : * For each block in the output file, we need to know from which file we
130 : * need to obtain it and at what offset in that file it's stored.
131 : * sourcemap gives us the first of these things, and offsetmap the latter.
132 : */
133 10638 : sourcemap = pg_malloc0(sizeof(rfile *) * block_length);
134 10638 : offsetmap = pg_malloc0(sizeof(off_t) * block_length);
135 :
136 : /*
137 : * Every block that is present in the newest incremental file should be
138 : * sourced from that file. If it precedes the truncation_block_length,
139 : * it's a block that we would otherwise have had to find in an older
140 : * backup and thus reduces the number of blocks remaining to be found by
141 : * one; otherwise, it's an extra block that needs to be included in the
142 : * output but would not have needed to be found in an older backup if it
143 : * had not been present.
144 : */
145 10716 : for (i = 0; i < latest_source->num_blocks; ++i)
146 : {
147 78 : BlockNumber b = latest_source->relative_block_numbers[i];
148 :
149 : Assert(b < block_length);
150 78 : sourcemap[b] = latest_source;
151 78 : offsetmap[b] = latest_source->header_length + (i * BLCKSZ);
152 :
153 : /*
154 : * A full copy of a file from an earlier backup is only possible if no
155 : * blocks are needed from any later incremental file.
156 : */
157 78 : full_copy_possible = false;
158 : }
159 :
160 : while (1)
161 2658 : {
162 : char source_filename[MAXPGPATH];
163 : rfile *s;
164 :
165 : /*
166 : * Move to the next backup in the chain. If there are no more, then
167 : * we're done.
168 : */
169 13296 : if (sidx == 0)
170 0 : break;
171 13296 : --sidx;
172 :
173 : /*
174 : * Look for the full file in the previous backup. If not found, then
175 : * look for an incremental file instead.
176 : */
177 13296 : snprintf(source_filename, MAXPGPATH, "%s/%s/%s",
178 13296 : prior_backup_dirs[sidx], relative_path, bare_file_name);
179 13296 : if ((s = make_rfile(source_filename, true)) == NULL)
180 : {
181 2658 : snprintf(source_filename, MAXPGPATH, "%s/%s/INCREMENTAL.%s",
182 2658 : prior_backup_dirs[sidx], relative_path, bare_file_name);
183 2658 : s = make_incremental_rfile(source_filename);
184 : }
185 13296 : source[sidx] = s;
186 :
187 : /*
188 : * If s->header_length == 0, then this is a full file; otherwise, it's
189 : * an incremental file.
190 : */
191 13296 : if (s->header_length == 0)
192 : {
193 : struct stat sb;
194 : BlockNumber b;
195 : BlockNumber blocklength;
196 :
197 : /* We need to know the length of the file. */
198 10638 : if (fstat(s->fd, &sb) < 0)
199 0 : pg_fatal("could not stat file \"%s\": %m", s->filename);
200 :
201 : /*
202 : * Since we found a full file, source all blocks from it that
203 : * exist in the file.
204 : *
205 : * Note that there may be blocks that don't exist either in this
206 : * file or in any incremental file but that precede
207 : * truncation_block_length. These are, presumably, zero-filled
208 : * blocks that result from the server extending the file but
209 : * taking no action on those blocks that generated any WAL.
210 : *
211 : * Sadly, we have no way of validating that this is really what
212 : * happened, and neither does the server. From it's perspective,
213 : * an unmodified block that contains data looks exactly the same
214 : * as a zero-filled block that never had any data: either way,
215 : * it's not mentioned in any WAL summary and the server has no
216 : * reason to read it. From our perspective, all we know is that
217 : * nobody had a reason to back up the block. That certainly means
218 : * that the block didn't exist at the time of the full backup, but
219 : * the supposition that it was all zeroes at the time of every
220 : * later backup is one that we can't validate.
221 : */
222 10638 : blocklength = sb.st_size / BLCKSZ;
223 50086 : for (b = 0; b < latest_source->truncation_block_length; ++b)
224 : {
225 39448 : if (sourcemap[b] == NULL && b < blocklength)
226 : {
227 39370 : sourcemap[b] = s;
228 39370 : offsetmap[b] = b * BLCKSZ;
229 : }
230 : }
231 :
232 : /*
233 : * If a full copy looks possible, check whether the resulting file
234 : * should be exactly as long as the source file is. If so, a full
235 : * copy is acceptable, otherwise not.
236 : */
237 10638 : if (full_copy_possible)
238 : {
239 : uint64 expected_length;
240 :
241 10592 : expected_length =
242 10592 : (uint64) latest_source->truncation_block_length;
243 10592 : expected_length *= BLCKSZ;
244 10592 : if (expected_length == sb.st_size)
245 : {
246 10592 : copy_source = s;
247 10592 : copy_source_index = sidx;
248 : }
249 : }
250 :
251 : /* We don't need to consider any further sources. */
252 10638 : break;
253 : }
254 :
255 : /*
256 : * Since we found another incremental file, source all blocks from it
257 : * that we need but don't yet have.
258 : */
259 2658 : for (i = 0; i < s->num_blocks; ++i)
260 : {
261 0 : BlockNumber b = s->relative_block_numbers[i];
262 :
263 0 : if (b < latest_source->truncation_block_length &&
264 0 : sourcemap[b] == NULL)
265 : {
266 0 : sourcemap[b] = s;
267 0 : offsetmap[b] = s->header_length + (i * BLCKSZ);
268 :
269 : /*
270 : * A full copy of a file from an earlier backup is only
271 : * possible if no blocks are needed from any later incremental
272 : * file.
273 : */
274 0 : full_copy_possible = false;
275 : }
276 : }
277 : }
278 :
279 : /*
280 : * If a checksum of the required type already exists in the
281 : * backup_manifest for the relevant input directory, we can save some work
282 : * by reusing that checksum instead of computing a new one.
283 : */
284 10638 : if (copy_source_index >= 0 && manifests[copy_source_index] != NULL &&
285 : checksum_type != CHECKSUM_TYPE_NONE)
286 : {
287 : manifest_file *mfile;
288 :
289 10592 : mfile = manifest_files_lookup(manifests[copy_source_index]->files,
290 : manifest_path);
291 10592 : if (mfile == NULL)
292 : {
293 0 : char *path = psprintf("%s/backup_manifest",
294 0 : prior_backup_dirs[copy_source_index]);
295 :
296 : /*
297 : * The directory is out of sync with the backup_manifest, so emit
298 : * a warning.
299 : */
300 0 : pg_log_warning("manifest file \"%s\" contains no entry for file \"%s\"",
301 : path,
302 : manifest_path);
303 0 : pfree(path);
304 : }
305 10592 : else if (mfile->checksum_type == checksum_type)
306 : {
307 10592 : *checksum_length = mfile->checksum_length;
308 10592 : *checksum_payload = pg_malloc(*checksum_length);
309 10592 : memcpy(*checksum_payload, mfile->checksum_payload,
310 10592 : *checksum_length);
311 10592 : checksum_type = CHECKSUM_TYPE_NONE;
312 : }
313 : }
314 :
315 : /* Prepare for checksum calculation, if required. */
316 10638 : pg_checksum_init(&checksum_ctx, checksum_type);
317 :
318 : /*
319 : * If the full file can be created by copying a file from an older backup
320 : * in the chain without needing to overwrite any blocks or truncate the
321 : * result, then forget about performing reconstruction and just copy that
322 : * file in its entirety.
323 : *
324 : * Otherwise, reconstruct.
325 : */
326 10638 : if (copy_source != NULL)
327 10592 : copy_file(copy_source->filename, output_filename,
328 : &checksum_ctx, copy_method, dry_run);
329 : else
330 : {
331 46 : write_reconstructed_file(input_filename, output_filename,
332 : block_length, sourcemap, offsetmap,
333 : &checksum_ctx, copy_method,
334 : debug, dry_run);
335 46 : debug_reconstruction(n_prior_backups + 1, source, dry_run);
336 : }
337 :
338 : /* Save results of checksum calculation. */
339 10638 : if (checksum_type != CHECKSUM_TYPE_NONE)
340 : {
341 46 : *checksum_payload = pg_malloc(PG_CHECKSUM_MAX_LENGTH);
342 46 : *checksum_length = pg_checksum_final(&checksum_ctx,
343 : *checksum_payload);
344 : }
345 :
346 : /*
347 : * Close files and release memory.
348 : */
349 34572 : for (i = 0; i <= n_prior_backups; ++i)
350 : {
351 23934 : rfile *s = source[i];
352 :
353 23934 : if (s == NULL)
354 0 : continue;
355 23934 : if (close(s->fd) != 0)
356 0 : pg_fatal("could not close file \"%s\": %m", s->filename);
357 23934 : if (s->relative_block_numbers != NULL)
358 46 : pfree(s->relative_block_numbers);
359 23934 : pg_free(s->filename);
360 : }
361 10638 : pfree(sourcemap);
362 10638 : pfree(offsetmap);
363 10638 : pfree(source);
364 10638 : }
365 :
366 : /*
367 : * Perform post-reconstruction logging and sanity checks.
368 : */
369 : static void
370 46 : debug_reconstruction(int n_source, rfile **sources, bool dry_run)
371 : {
372 : unsigned i;
373 :
374 144 : for (i = 0; i < n_source; ++i)
375 : {
376 98 : rfile *s = sources[i];
377 :
378 : /* Ignore source if not used. */
379 98 : if (s == NULL)
380 0 : continue;
381 :
382 : /* If no data is needed from this file, we can ignore it. */
383 98 : if (s->num_blocks_read == 0)
384 6 : continue;
385 :
386 : /* Debug logging. */
387 92 : if (dry_run)
388 0 : pg_log_debug("would have read %u blocks from \"%s\"",
389 : s->num_blocks_read, s->filename);
390 : else
391 92 : pg_log_debug("read %u blocks from \"%s\"",
392 : s->num_blocks_read, s->filename);
393 :
394 : /*
395 : * In dry-run mode, we don't actually try to read data from the file,
396 : * but we do try to verify that the file is long enough that we could
397 : * have read the data if we'd tried.
398 : *
399 : * If this fails, then it means that a non-dry-run attempt would fail,
400 : * complaining of not being able to read the required bytes from the
401 : * file.
402 : */
403 92 : if (dry_run)
404 : {
405 : struct stat sb;
406 :
407 0 : if (fstat(s->fd, &sb) < 0)
408 0 : pg_fatal("could not stat file \"%s\": %m", s->filename);
409 0 : if (sb.st_size < s->highest_offset_read)
410 0 : pg_fatal("file \"%s\" is too short: expected %llu, found %llu",
411 : s->filename,
412 : (unsigned long long) s->highest_offset_read,
413 : (unsigned long long) sb.st_size);
414 : }
415 : }
416 46 : }
417 :
418 : /*
419 : * When we perform reconstruction using an incremental file, the output file
420 : * should be at least as long as the truncation_block_length. Any blocks
421 : * present in the incremental file increase the output length as far as is
422 : * necessary to include those blocks.
423 : */
424 : static unsigned
425 10638 : find_reconstructed_block_length(rfile *s)
426 : {
427 10638 : unsigned block_length = s->truncation_block_length;
428 : unsigned i;
429 :
430 10716 : for (i = 0; i < s->num_blocks; ++i)
431 78 : if (s->relative_block_numbers[i] >= block_length)
432 0 : block_length = s->relative_block_numbers[i] + 1;
433 :
434 10638 : return block_length;
435 : }
436 :
437 : /*
438 : * Initialize an incremental rfile, reading the header so that we know which
439 : * blocks it contains.
440 : */
441 : static rfile *
442 13296 : make_incremental_rfile(char *filename)
443 : {
444 : rfile *rf;
445 : unsigned magic;
446 :
447 13296 : rf = make_rfile(filename, false);
448 :
449 : /* Read and validate magic number. */
450 13296 : read_bytes(rf, &magic, sizeof(magic));
451 13296 : if (magic != INCREMENTAL_MAGIC)
452 0 : pg_fatal("file \"%s\" has bad incremental magic number (0x%x not 0x%x)",
453 : filename, magic, INCREMENTAL_MAGIC);
454 :
455 : /* Read block count. */
456 13296 : read_bytes(rf, &rf->num_blocks, sizeof(rf->num_blocks));
457 13296 : if (rf->num_blocks > RELSEG_SIZE)
458 0 : pg_fatal("file \"%s\" has block count %u in excess of segment size %u",
459 : filename, rf->num_blocks, RELSEG_SIZE);
460 :
461 : /* Read truncation block length. */
462 13296 : read_bytes(rf, &rf->truncation_block_length,
463 : sizeof(rf->truncation_block_length));
464 13296 : if (rf->truncation_block_length > RELSEG_SIZE)
465 0 : pg_fatal("file \"%s\" has truncation block length %u in excess of segment size %u",
466 : filename, rf->truncation_block_length, RELSEG_SIZE);
467 :
468 : /* Read block numbers if there are any. */
469 13296 : if (rf->num_blocks > 0)
470 : {
471 46 : rf->relative_block_numbers =
472 46 : pg_malloc0(sizeof(BlockNumber) * rf->num_blocks);
473 46 : read_bytes(rf, rf->relative_block_numbers,
474 46 : sizeof(BlockNumber) * rf->num_blocks);
475 : }
476 :
477 : /* Remember length of header. */
478 13296 : rf->header_length = sizeof(magic) + sizeof(rf->num_blocks) +
479 13296 : sizeof(rf->truncation_block_length) +
480 13296 : sizeof(BlockNumber) * rf->num_blocks;
481 :
482 : /*
483 : * Round header length to a multiple of BLCKSZ, so that blocks contents
484 : * are properly aligned. Only do this when the file actually has data for
485 : * some blocks.
486 : */
487 13296 : if ((rf->num_blocks > 0) && ((rf->header_length % BLCKSZ) != 0))
488 46 : rf->header_length += (BLCKSZ - (rf->header_length % BLCKSZ));
489 :
490 13296 : return rf;
491 : }
492 :
493 : /*
494 : * Allocate and perform basic initialization of an rfile.
495 : */
496 : static rfile *
497 26592 : make_rfile(char *filename, bool missing_ok)
498 : {
499 : rfile *rf;
500 :
501 26592 : rf = pg_malloc0(sizeof(rfile));
502 26592 : rf->filename = pstrdup(filename);
503 26592 : if ((rf->fd = open(filename, O_RDONLY | PG_BINARY, 0)) < 0)
504 : {
505 2658 : if (missing_ok && errno == ENOENT)
506 : {
507 2658 : pg_free(rf);
508 2658 : return NULL;
509 : }
510 0 : pg_fatal("could not open file \"%s\": %m", filename);
511 : }
512 :
513 23934 : return rf;
514 : }
515 :
516 : /*
517 : * Read the indicated number of bytes from an rfile into the buffer.
518 : */
519 : static void
520 39934 : read_bytes(rfile *rf, void *buffer, unsigned length)
521 : {
522 39934 : int rb = read(rf->fd, buffer, length);
523 :
524 39934 : if (rb != length)
525 : {
526 0 : if (rb < 0)
527 0 : pg_fatal("could not read file \"%s\": %m", rf->filename);
528 : else
529 0 : pg_fatal("could not read file \"%s\": read %d of %u",
530 : rf->filename, rb, length);
531 : }
532 39934 : }
533 :
534 : /*
535 : * Write out a reconstructed file.
536 : */
537 : static void
538 46 : write_reconstructed_file(char *input_filename,
539 : char *output_filename,
540 : unsigned block_length,
541 : rfile **sourcemap,
542 : off_t *offsetmap,
543 : pg_checksum_context *checksum_ctx,
544 : CopyMethod copy_method,
545 : bool debug,
546 : bool dry_run)
547 : {
548 46 : int wfd = -1;
549 : unsigned i;
550 46 : unsigned zero_blocks = 0;
551 :
552 : /* Debugging output. */
553 46 : if (debug)
554 : {
555 : StringInfoData debug_buf;
556 46 : unsigned start_of_range = 0;
557 46 : unsigned current_block = 0;
558 :
559 : /* Basic information about the output file to be produced. */
560 46 : if (dry_run)
561 0 : pg_log_debug("would reconstruct \"%s\" (%u blocks, checksum %s)",
562 : output_filename, block_length,
563 : pg_checksum_type_name(checksum_ctx->type));
564 : else
565 46 : pg_log_debug("reconstructing \"%s\" (%u blocks, checksum %s)",
566 : output_filename, block_length,
567 : pg_checksum_type_name(checksum_ctx->type));
568 :
569 : /* Print out the plan for reconstructing this file. */
570 46 : initStringInfo(&debug_buf);
571 560 : while (current_block < block_length)
572 : {
573 514 : rfile *s = sourcemap[current_block];
574 :
575 : /* Extend range, if possible. */
576 514 : if (current_block + 1 < block_length &&
577 468 : s == sourcemap[current_block + 1])
578 : {
579 400 : ++current_block;
580 400 : continue;
581 : }
582 :
583 : /* Add details about this range. */
584 114 : if (s == NULL)
585 : {
586 0 : if (current_block == start_of_range)
587 0 : appendStringInfo(&debug_buf, " %u:zero", current_block);
588 : else
589 0 : appendStringInfo(&debug_buf, " %u-%u:zero",
590 : start_of_range, current_block);
591 : }
592 : else
593 : {
594 114 : if (current_block == start_of_range)
595 70 : appendStringInfo(&debug_buf, " %u:%s@" UINT64_FORMAT,
596 : current_block, s->filename,
597 70 : (uint64) offsetmap[current_block]);
598 : else
599 44 : appendStringInfo(&debug_buf, " %u-%u:%s@" UINT64_FORMAT,
600 : start_of_range, current_block,
601 : s->filename,
602 44 : (uint64) offsetmap[current_block]);
603 : }
604 :
605 : /* Begin new range. */
606 114 : start_of_range = ++current_block;
607 :
608 : /* If the output is very long or we are done, dump it now. */
609 114 : if (current_block == block_length || debug_buf.len > 1024)
610 : {
611 46 : pg_log_debug("reconstruction plan:%s", debug_buf.data);
612 46 : resetStringInfo(&debug_buf);
613 : }
614 : }
615 :
616 : /* Free memory. */
617 46 : pfree(debug_buf.data);
618 : }
619 :
620 : /* Open the output file, except in dry_run mode. */
621 92 : if (!dry_run &&
622 46 : (wfd = open(output_filename,
623 : O_RDWR | PG_BINARY | O_CREAT | O_EXCL,
624 : pg_file_create_mode)) < 0)
625 0 : pg_fatal("could not open file \"%s\": %m", output_filename);
626 :
627 : /* Read and write the blocks as required. */
628 560 : for (i = 0; i < block_length; ++i)
629 : {
630 : uint8 buffer[BLCKSZ];
631 514 : rfile *s = sourcemap[i];
632 :
633 : /* Update accounting information. */
634 514 : if (s == NULL)
635 0 : ++zero_blocks;
636 : else
637 : {
638 514 : s->num_blocks_read++;
639 514 : s->highest_offset_read = Max(s->highest_offset_read,
640 : offsetmap[i] + BLCKSZ);
641 : }
642 :
643 : /* Skip the rest of this in dry-run mode. */
644 514 : if (dry_run)
645 0 : continue;
646 :
647 : /* Read or zero-fill the block as appropriate. */
648 514 : if (s == NULL)
649 : {
650 : /*
651 : * New block not mentioned in the WAL summary. Should have been an
652 : * uninitialized block, so just zero-fill it.
653 : */
654 0 : memset(buffer, 0, BLCKSZ);
655 :
656 : /* Write out the block, update the checksum if needed. */
657 0 : write_block(wfd, output_filename, buffer, checksum_ctx);
658 :
659 : /* Nothing else to do for zero-filled blocks. */
660 0 : continue;
661 : }
662 :
663 : /* Copy the block using the appropriate copy method. */
664 514 : if (copy_method != COPY_METHOD_COPY_FILE_RANGE)
665 : {
666 : /*
667 : * Read the block from the correct source file, and then write it
668 : * out, possibly with a checksum update.
669 : */
670 514 : read_block(s, offsetmap[i], buffer);
671 514 : write_block(wfd, output_filename, buffer, checksum_ctx);
672 : }
673 : else /* use copy_file_range */
674 : {
675 : #if defined(HAVE_COPY_FILE_RANGE)
676 : /* copy_file_range modifies the offset, so use a local copy */
677 0 : off_t off = offsetmap[i];
678 0 : size_t nwritten = 0;
679 :
680 : /*
681 : * Retry until we've written all the bytes (the offset is updated
682 : * by copy_file_range, and so is the wfd file offset).
683 : */
684 : do
685 : {
686 : int wb;
687 :
688 0 : wb = copy_file_range(s->fd, &off, wfd, NULL, BLCKSZ - nwritten, 0);
689 :
690 0 : if (wb < 0)
691 0 : pg_fatal("error while copying file range from \"%s\" to \"%s\": %m",
692 : input_filename, output_filename);
693 :
694 0 : nwritten += wb;
695 :
696 0 : } while (BLCKSZ > nwritten);
697 :
698 : /*
699 : * When checksum calculation not needed, we're done, otherwise
700 : * read the block and pass it to the checksum calculation.
701 : */
702 0 : if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
703 0 : continue;
704 :
705 0 : read_block(s, offsetmap[i], buffer);
706 :
707 0 : if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
708 0 : pg_fatal("could not update checksum of file \"%s\"",
709 : output_filename);
710 : #else
711 : pg_fatal("copy_file_range not supported on this platform");
712 : #endif
713 : }
714 : }
715 :
716 : /* Debugging output. */
717 46 : if (zero_blocks > 0)
718 : {
719 0 : if (dry_run)
720 0 : pg_log_debug("would have zero-filled %u blocks", zero_blocks);
721 : else
722 0 : pg_log_debug("zero-filled %u blocks", zero_blocks);
723 : }
724 :
725 : /* Close the output file. */
726 46 : if (wfd >= 0 && close(wfd) != 0)
727 0 : pg_fatal("could not close file \"%s\": %m", output_filename);
728 46 : }
729 :
730 : /*
731 : * Write the block into the file (using the file descriptor), and
732 : * if needed update the checksum calculation.
733 : *
734 : * The buffer is expected to contain BLCKSZ bytes. The filename is
735 : * provided only for the error message.
736 : */
737 : static void
738 514 : write_block(int fd, char *output_filename,
739 : uint8 *buffer, pg_checksum_context *checksum_ctx)
740 : {
741 : int wb;
742 :
743 514 : if ((wb = write(fd, buffer, BLCKSZ)) != BLCKSZ)
744 : {
745 0 : if (wb < 0)
746 0 : pg_fatal("could not write file \"%s\": %m", output_filename);
747 : else
748 0 : pg_fatal("could not write file \"%s\": wrote %d of %d",
749 : output_filename, wb, BLCKSZ);
750 : }
751 :
752 : /* Update the checksum computation. */
753 514 : if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
754 0 : pg_fatal("could not update checksum of file \"%s\"",
755 : output_filename);
756 514 : }
757 :
758 : /*
759 : * Read a block of data (BLCKSZ bytes) into the buffer.
760 : */
761 : static void
762 514 : read_block(rfile *s, off_t off, uint8 *buffer)
763 : {
764 : int rb;
765 :
766 : /* Read the block from the correct source, except if dry-run. */
767 514 : rb = pg_pread(s->fd, buffer, BLCKSZ, off);
768 514 : if (rb != BLCKSZ)
769 : {
770 0 : if (rb < 0)
771 0 : pg_fatal("could not read from file \"%s\": %m", s->filename);
772 : else
773 0 : pg_fatal("could not read from file \"%s\", offset %llu: read %d of %d",
774 : s->filename, (unsigned long long) off, rb, BLCKSZ);
775 : }
776 514 : }
|