Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * reconstruct.c
4 : * Reconstruct full file from incremental file and backup chain.
5 : *
6 : * Copyright (c) 2017-2024, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/bin/pg_combinebackup/reconstruct.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres_fe.h"
14 :
15 : #include <unistd.h>
16 :
17 : #include "backup/basebackup_incremental.h"
18 : #include "common/file_perm.h"
19 : #include "common/logging.h"
20 : #include "copy_file.h"
21 : #include "lib/stringinfo.h"
22 : #include "reconstruct.h"
23 : #include "storage/block.h"
24 :
25 : /*
26 : * An rfile stores the data that we need in order to be able to use some file
27 : * on disk for reconstruction. For any given output file, we create one rfile
28 : * per backup that we need to consult when we constructing that output file.
29 : *
30 : * If we find a full version of the file in the backup chain, then only
31 : * filename and fd are initialized; the remaining fields are 0 or NULL.
32 : * For an incremental file, header_length, num_blocks, relative_block_numbers,
33 : * and truncation_block_length are also set.
34 : *
35 : * num_blocks_read and highest_offset_read always start out as 0.
36 : */
37 : typedef struct rfile
38 : {
39 : char *filename;
40 : int fd;
41 : size_t header_length;
42 : unsigned num_blocks;
43 : BlockNumber *relative_block_numbers;
44 : unsigned truncation_block_length;
45 : unsigned num_blocks_read;
46 : off_t highest_offset_read;
47 : } rfile;
48 :
49 : static void debug_reconstruction(int n_source,
50 : rfile **sources,
51 : bool dry_run);
52 : static unsigned find_reconstructed_block_length(rfile *s);
53 : static rfile *make_incremental_rfile(char *filename);
54 : static rfile *make_rfile(char *filename, bool missing_ok);
55 : static void write_reconstructed_file(char *input_filename,
56 : char *output_filename,
57 : unsigned block_length,
58 : rfile **sourcemap,
59 : off_t *offsetmap,
60 : pg_checksum_context *checksum_ctx,
61 : CopyMethod copy_method,
62 : bool debug,
63 : bool dry_run);
64 : static void read_bytes(rfile *rf, void *buffer, unsigned length);
65 : static void write_block(int wfd, char *output_filename,
66 : uint8 *buffer,
67 : pg_checksum_context *checksum_ctx);
68 : static void read_block(rfile *s, off_t off, uint8 *buffer);
69 :
70 : /*
71 : * Reconstruct a full file from an incremental file and a chain of prior
72 : * backups.
73 : *
74 : * input_filename should be the path to the incremental file, and
75 : * output_filename should be the path where the reconstructed file is to be
76 : * written.
77 : *
78 : * relative_path should be the path to the directory containing this file,
79 : * relative to the root of the backup (NOT relative to the root of the
80 : * tablespace). bare_file_name should be the name of the file within that
81 : * directory, without "INCREMENTAL.".
82 : *
83 : * n_prior_backups is the number of prior backups, and prior_backup_dirs is
84 : * an array of pathnames where those backups can be found.
85 : */
86 : void
87 9308 : reconstruct_from_incremental_file(char *input_filename,
88 : char *output_filename,
89 : char *relative_path,
90 : char *bare_file_name,
91 : int n_prior_backups,
92 : char **prior_backup_dirs,
93 : manifest_data **manifests,
94 : char *manifest_path,
95 : pg_checksum_type checksum_type,
96 : int *checksum_length,
97 : uint8 **checksum_payload,
98 : CopyMethod copy_method,
99 : bool debug,
100 : bool dry_run)
101 : {
102 : rfile **source;
103 9308 : rfile *latest_source = NULL;
104 : rfile **sourcemap;
105 : off_t *offsetmap;
106 : unsigned block_length;
107 : unsigned i;
108 9308 : unsigned sidx = n_prior_backups;
109 9308 : bool full_copy_possible = true;
110 9308 : int copy_source_index = -1;
111 9308 : rfile *copy_source = NULL;
112 : pg_checksum_context checksum_ctx;
113 :
114 : /*
115 : * Every block must come either from the latest version of the file or
116 : * from one of the prior backups.
117 : */
118 9308 : source = pg_malloc0(sizeof(rfile *) * (1 + n_prior_backups));
119 :
120 : /*
121 : * Use the information from the latest incremental file to figure out how
122 : * long the reconstructed file should be.
123 : */
124 9308 : latest_source = make_incremental_rfile(input_filename);
125 9308 : source[n_prior_backups] = latest_source;
126 9308 : block_length = find_reconstructed_block_length(latest_source);
127 :
128 : /*
129 : * For each block in the output file, we need to know from which file we
130 : * need to obtain it and at what offset in that file it's stored.
131 : * sourcemap gives us the first of these things, and offsetmap the latter.
132 : */
133 9308 : sourcemap = pg_malloc0(sizeof(rfile *) * block_length);
134 9308 : offsetmap = pg_malloc0(sizeof(off_t) * block_length);
135 :
136 : /*
137 : * Every block that is present in the newest incremental file should be
138 : * sourced from that file. If it precedes the truncation_block_length,
139 : * it's a block that we would otherwise have had to find in an older
140 : * backup and thus reduces the number of blocks remaining to be found by
141 : * one; otherwise, it's an extra block that needs to be included in the
142 : * output but would not have needed to be found in an older backup if it
143 : * had not been present.
144 : */
145 9386 : for (i = 0; i < latest_source->num_blocks; ++i)
146 : {
147 78 : BlockNumber b = latest_source->relative_block_numbers[i];
148 :
149 : Assert(b < block_length);
150 78 : sourcemap[b] = latest_source;
151 78 : offsetmap[b] = latest_source->header_length + (i * BLCKSZ);
152 :
153 : /*
154 : * A full copy of a file from an earlier backup is only possible if no
155 : * blocks are needed from any later incremental file.
156 : */
157 78 : full_copy_possible = false;
158 : }
159 :
160 : while (1)
161 2658 : {
162 : char source_filename[MAXPGPATH];
163 : rfile *s;
164 :
165 : /*
166 : * Move to the next backup in the chain. If there are no more, then
167 : * we're done.
168 : */
169 11966 : if (sidx == 0)
170 0 : break;
171 11966 : --sidx;
172 :
173 : /*
174 : * Look for the full file in the previous backup. If not found, then
175 : * look for an incremental file instead.
176 : */
177 11966 : snprintf(source_filename, MAXPGPATH, "%s/%s/%s",
178 11966 : prior_backup_dirs[sidx], relative_path, bare_file_name);
179 11966 : if ((s = make_rfile(source_filename, true)) == NULL)
180 : {
181 2658 : snprintf(source_filename, MAXPGPATH, "%s/%s/INCREMENTAL.%s",
182 2658 : prior_backup_dirs[sidx], relative_path, bare_file_name);
183 2658 : s = make_incremental_rfile(source_filename);
184 : }
185 11966 : source[sidx] = s;
186 :
187 : /*
188 : * If s->header_length == 0, then this is a full file; otherwise, it's
189 : * an incremental file.
190 : */
191 11966 : if (s->header_length == 0)
192 : {
193 : struct stat sb;
194 : BlockNumber b;
195 : BlockNumber blocklength;
196 :
197 : /* We need to know the length of the file. */
198 9308 : if (fstat(s->fd, &sb) < 0)
199 0 : pg_fatal("could not stat \"%s\": %m", s->filename);
200 :
201 : /*
202 : * Since we found a full file, source all blocks from it that
203 : * exist in the file.
204 : *
205 : * Note that there may be blocks that don't exist either in this
206 : * file or in any incremental file but that precede
207 : * truncation_block_length. These are, presumably, zero-filled
208 : * blocks that result from the server extending the file but
209 : * taking no action on those blocks that generated any WAL.
210 : *
211 : * Sadly, we have no way of validating that this is really what
212 : * happened, and neither does the server. From it's perspective,
213 : * an unmodified block that contains data looks exactly the same
214 : * as a zero-filled block that never had any data: either way,
215 : * it's not mentioned in any WAL summary and the server has no
216 : * reason to read it. From our perspective, all we know is that
217 : * nobody had a reason to back up the block. That certainly means
218 : * that the block didn't exist at the time of the full backup, but
219 : * the supposition that it was all zeroes at the time of every
220 : * later backup is one that we can't validate.
221 : */
222 9308 : blocklength = sb.st_size / BLCKSZ;
223 43782 : for (b = 0; b < latest_source->truncation_block_length; ++b)
224 : {
225 34474 : if (sourcemap[b] == NULL && b < blocklength)
226 : {
227 34396 : sourcemap[b] = s;
228 34396 : offsetmap[b] = b * BLCKSZ;
229 : }
230 : }
231 :
232 : /*
233 : * If a full copy looks possible, check whether the resulting file
234 : * should be exactly as long as the source file is. If so, a full
235 : * copy is acceptable, otherwise not.
236 : */
237 9308 : if (full_copy_possible)
238 : {
239 : uint64 expected_length;
240 :
241 9262 : expected_length =
242 9262 : (uint64) latest_source->truncation_block_length;
243 9262 : expected_length *= BLCKSZ;
244 9262 : if (expected_length == sb.st_size)
245 : {
246 9262 : copy_source = s;
247 9262 : copy_source_index = sidx;
248 : }
249 : }
250 :
251 : /* We don't need to consider any further sources. */
252 9308 : break;
253 : }
254 :
255 : /*
256 : * Since we found another incremental file, source all blocks from it
257 : * that we need but don't yet have.
258 : */
259 2658 : for (i = 0; i < s->num_blocks; ++i)
260 : {
261 0 : BlockNumber b = s->relative_block_numbers[i];
262 :
263 0 : if (b < latest_source->truncation_block_length &&
264 0 : sourcemap[b] == NULL)
265 : {
266 0 : sourcemap[b] = s;
267 0 : offsetmap[b] = s->header_length + (i * BLCKSZ);
268 :
269 : /*
270 : * A full copy of a file from an earlier backup is only
271 : * possible if no blocks are needed from any later incremental
272 : * file.
273 : */
274 0 : full_copy_possible = false;
275 : }
276 : }
277 : }
278 :
279 : /*
280 : * If a checksum of the required type already exists in the
281 : * backup_manifest for the relevant input directory, we can save some work
282 : * by reusing that checksum instead of computing a new one.
283 : */
284 9308 : if (copy_source_index >= 0 && manifests[copy_source_index] != NULL &&
285 : checksum_type != CHECKSUM_TYPE_NONE)
286 : {
287 : manifest_file *mfile;
288 :
289 9262 : mfile = manifest_files_lookup(manifests[copy_source_index]->files,
290 : manifest_path);
291 9262 : if (mfile == NULL)
292 : {
293 0 : char *path = psprintf("%s/backup_manifest",
294 0 : prior_backup_dirs[copy_source_index]);
295 :
296 : /*
297 : * The directory is out of sync with the backup_manifest, so emit
298 : * a warning.
299 : */
300 : /*- translator: the first %s is a backup manifest file, the second is a file absent therein */
301 0 : pg_log_warning("\"%s\" contains no entry for \"%s\"",
302 : path,
303 : manifest_path);
304 0 : pfree(path);
305 : }
306 9262 : else if (mfile->checksum_type == checksum_type)
307 : {
308 9262 : *checksum_length = mfile->checksum_length;
309 9262 : *checksum_payload = pg_malloc(*checksum_length);
310 9262 : memcpy(*checksum_payload, mfile->checksum_payload,
311 9262 : *checksum_length);
312 9262 : checksum_type = CHECKSUM_TYPE_NONE;
313 : }
314 : }
315 :
316 : /* Prepare for checksum calculation, if required. */
317 9308 : pg_checksum_init(&checksum_ctx, checksum_type);
318 :
319 : /*
320 : * If the full file can be created by copying a file from an older backup
321 : * in the chain without needing to overwrite any blocks or truncate the
322 : * result, then forget about performing reconstruction and just copy that
323 : * file in its entirety.
324 : *
325 : * Otherwise, reconstruct.
326 : */
327 9308 : if (copy_source != NULL)
328 9262 : copy_file(copy_source->filename, output_filename,
329 : &checksum_ctx, copy_method, dry_run);
330 : else
331 : {
332 46 : write_reconstructed_file(input_filename, output_filename,
333 : block_length, sourcemap, offsetmap,
334 : &checksum_ctx, copy_method,
335 : debug, dry_run);
336 46 : debug_reconstruction(n_prior_backups + 1, source, dry_run);
337 : }
338 :
339 : /* Save results of checksum calculation. */
340 9308 : if (checksum_type != CHECKSUM_TYPE_NONE)
341 : {
342 46 : *checksum_payload = pg_malloc(PG_CHECKSUM_MAX_LENGTH);
343 46 : *checksum_length = pg_checksum_final(&checksum_ctx,
344 : *checksum_payload);
345 : }
346 :
347 : /*
348 : * Close files and release memory.
349 : */
350 30582 : for (i = 0; i <= n_prior_backups; ++i)
351 : {
352 21274 : rfile *s = source[i];
353 :
354 21274 : if (s == NULL)
355 0 : continue;
356 21274 : if (close(s->fd) != 0)
357 0 : pg_fatal("could not close \"%s\": %m", s->filename);
358 21274 : if (s->relative_block_numbers != NULL)
359 46 : pfree(s->relative_block_numbers);
360 21274 : pg_free(s->filename);
361 : }
362 9308 : pfree(sourcemap);
363 9308 : pfree(offsetmap);
364 9308 : pfree(source);
365 9308 : }
366 :
367 : /*
368 : * Perform post-reconstruction logging and sanity checks.
369 : */
370 : static void
371 46 : debug_reconstruction(int n_source, rfile **sources, bool dry_run)
372 : {
373 : unsigned i;
374 :
375 144 : for (i = 0; i < n_source; ++i)
376 : {
377 98 : rfile *s = sources[i];
378 :
379 : /* Ignore source if not used. */
380 98 : if (s == NULL)
381 0 : continue;
382 :
383 : /* If no data is needed from this file, we can ignore it. */
384 98 : if (s->num_blocks_read == 0)
385 6 : continue;
386 :
387 : /* Debug logging. */
388 92 : if (dry_run)
389 0 : pg_log_debug("would have read %u blocks from \"%s\"",
390 : s->num_blocks_read, s->filename);
391 : else
392 92 : pg_log_debug("read %u blocks from \"%s\"",
393 : s->num_blocks_read, s->filename);
394 :
395 : /*
396 : * In dry-run mode, we don't actually try to read data from the file,
397 : * but we do try to verify that the file is long enough that we could
398 : * have read the data if we'd tried.
399 : *
400 : * If this fails, then it means that a non-dry-run attempt would fail,
401 : * complaining of not being able to read the required bytes from the
402 : * file.
403 : */
404 92 : if (dry_run)
405 : {
406 : struct stat sb;
407 :
408 0 : if (fstat(s->fd, &sb) < 0)
409 0 : pg_fatal("could not stat \"%s\": %m", s->filename);
410 0 : if (sb.st_size < s->highest_offset_read)
411 0 : pg_fatal("file \"%s\" is too short: expected %llu, found %llu",
412 : s->filename,
413 : (unsigned long long) s->highest_offset_read,
414 : (unsigned long long) sb.st_size);
415 : }
416 : }
417 46 : }
418 :
419 : /*
420 : * When we perform reconstruction using an incremental file, the output file
421 : * should be at least as long as the truncation_block_length. Any blocks
422 : * present in the incremental file increase the output length as far as is
423 : * necessary to include those blocks.
424 : */
425 : static unsigned
426 9308 : find_reconstructed_block_length(rfile *s)
427 : {
428 9308 : unsigned block_length = s->truncation_block_length;
429 : unsigned i;
430 :
431 9386 : for (i = 0; i < s->num_blocks; ++i)
432 78 : if (s->relative_block_numbers[i] >= block_length)
433 0 : block_length = s->relative_block_numbers[i] + 1;
434 :
435 9308 : return block_length;
436 : }
437 :
438 : /*
439 : * Initialize an incremental rfile, reading the header so that we know which
440 : * blocks it contains.
441 : */
442 : static rfile *
443 11966 : make_incremental_rfile(char *filename)
444 : {
445 : rfile *rf;
446 : unsigned magic;
447 :
448 11966 : rf = make_rfile(filename, false);
449 :
450 : /* Read and validate magic number. */
451 11966 : read_bytes(rf, &magic, sizeof(magic));
452 11966 : if (magic != INCREMENTAL_MAGIC)
453 0 : pg_fatal("file \"%s\" has bad incremental magic number (0x%x not 0x%x)",
454 : filename, magic, INCREMENTAL_MAGIC);
455 :
456 : /* Read block count. */
457 11966 : read_bytes(rf, &rf->num_blocks, sizeof(rf->num_blocks));
458 11966 : if (rf->num_blocks > RELSEG_SIZE)
459 0 : pg_fatal("file \"%s\" has block count %u in excess of segment size %u",
460 : filename, rf->num_blocks, RELSEG_SIZE);
461 :
462 : /* Read truncation block length. */
463 11966 : read_bytes(rf, &rf->truncation_block_length,
464 : sizeof(rf->truncation_block_length));
465 11966 : if (rf->truncation_block_length > RELSEG_SIZE)
466 0 : pg_fatal("file \"%s\" has truncation block length %u in excess of segment size %u",
467 : filename, rf->truncation_block_length, RELSEG_SIZE);
468 :
469 : /* Read block numbers if there are any. */
470 11966 : if (rf->num_blocks > 0)
471 : {
472 46 : rf->relative_block_numbers =
473 46 : pg_malloc0(sizeof(BlockNumber) * rf->num_blocks);
474 46 : read_bytes(rf, rf->relative_block_numbers,
475 46 : sizeof(BlockNumber) * rf->num_blocks);
476 : }
477 :
478 : /* Remember length of header. */
479 11966 : rf->header_length = sizeof(magic) + sizeof(rf->num_blocks) +
480 11966 : sizeof(rf->truncation_block_length) +
481 11966 : sizeof(BlockNumber) * rf->num_blocks;
482 :
483 : /*
484 : * Round header length to a multiple of BLCKSZ, so that blocks contents
485 : * are properly aligned. Only do this when the file actually has data for
486 : * some blocks.
487 : */
488 11966 : if ((rf->num_blocks > 0) && ((rf->header_length % BLCKSZ) != 0))
489 46 : rf->header_length += (BLCKSZ - (rf->header_length % BLCKSZ));
490 :
491 11966 : return rf;
492 : }
493 :
494 : /*
495 : * Allocate and perform basic initialization of an rfile.
496 : */
497 : static rfile *
498 23932 : make_rfile(char *filename, bool missing_ok)
499 : {
500 : rfile *rf;
501 :
502 23932 : rf = pg_malloc0(sizeof(rfile));
503 23932 : rf->filename = pstrdup(filename);
504 23932 : if ((rf->fd = open(filename, O_RDONLY | PG_BINARY, 0)) < 0)
505 : {
506 2658 : if (missing_ok && errno == ENOENT)
507 : {
508 2658 : pg_free(rf);
509 2658 : return NULL;
510 : }
511 0 : pg_fatal("could not open file \"%s\": %m", filename);
512 : }
513 :
514 21274 : return rf;
515 : }
516 :
517 : /*
518 : * Read the indicated number of bytes from an rfile into the buffer.
519 : */
520 : static void
521 35944 : read_bytes(rfile *rf, void *buffer, unsigned length)
522 : {
523 35944 : int rb = read(rf->fd, buffer, length);
524 :
525 35944 : if (rb != length)
526 : {
527 0 : if (rb < 0)
528 0 : pg_fatal("could not read file \"%s\": %m", rf->filename);
529 : else
530 0 : pg_fatal("could not read file \"%s\": read only %d of %u bytes",
531 : rf->filename, rb, length);
532 : }
533 35944 : }
534 :
535 : /*
536 : * Write out a reconstructed file.
537 : */
538 : static void
539 46 : write_reconstructed_file(char *input_filename,
540 : char *output_filename,
541 : unsigned block_length,
542 : rfile **sourcemap,
543 : off_t *offsetmap,
544 : pg_checksum_context *checksum_ctx,
545 : CopyMethod copy_method,
546 : bool debug,
547 : bool dry_run)
548 : {
549 46 : int wfd = -1;
550 : unsigned i;
551 46 : unsigned zero_blocks = 0;
552 :
553 : /* Debugging output. */
554 46 : if (debug)
555 : {
556 : StringInfoData debug_buf;
557 46 : unsigned start_of_range = 0;
558 46 : unsigned current_block = 0;
559 :
560 : /* Basic information about the output file to be produced. */
561 46 : if (dry_run)
562 0 : pg_log_debug("would reconstruct \"%s\" (%u blocks, checksum %s)",
563 : output_filename, block_length,
564 : pg_checksum_type_name(checksum_ctx->type));
565 : else
566 46 : pg_log_debug("reconstructing \"%s\" (%u blocks, checksum %s)",
567 : output_filename, block_length,
568 : pg_checksum_type_name(checksum_ctx->type));
569 :
570 : /* Print out the plan for reconstructing this file. */
571 46 : initStringInfo(&debug_buf);
572 560 : while (current_block < block_length)
573 : {
574 514 : rfile *s = sourcemap[current_block];
575 :
576 : /* Extend range, if possible. */
577 514 : if (current_block + 1 < block_length &&
578 468 : s == sourcemap[current_block + 1])
579 : {
580 400 : ++current_block;
581 400 : continue;
582 : }
583 :
584 : /* Add details about this range. */
585 114 : if (s == NULL)
586 : {
587 0 : if (current_block == start_of_range)
588 0 : appendStringInfo(&debug_buf, " %u:zero", current_block);
589 : else
590 0 : appendStringInfo(&debug_buf, " %u-%u:zero",
591 : start_of_range, current_block);
592 : }
593 : else
594 : {
595 114 : if (current_block == start_of_range)
596 70 : appendStringInfo(&debug_buf, " %u:%s@" UINT64_FORMAT,
597 : current_block, s->filename,
598 70 : (uint64) offsetmap[current_block]);
599 : else
600 44 : appendStringInfo(&debug_buf, " %u-%u:%s@" UINT64_FORMAT,
601 : start_of_range, current_block,
602 : s->filename,
603 44 : (uint64) offsetmap[current_block]);
604 : }
605 :
606 : /* Begin new range. */
607 114 : start_of_range = ++current_block;
608 :
609 : /* If the output is very long or we are done, dump it now. */
610 114 : if (current_block == block_length || debug_buf.len > 1024)
611 : {
612 46 : pg_log_debug("reconstruction plan:%s", debug_buf.data);
613 46 : resetStringInfo(&debug_buf);
614 : }
615 : }
616 :
617 : /* Free memory. */
618 46 : pfree(debug_buf.data);
619 : }
620 :
621 : /* Open the output file, except in dry_run mode. */
622 92 : if (!dry_run &&
623 46 : (wfd = open(output_filename,
624 : O_RDWR | PG_BINARY | O_CREAT | O_EXCL,
625 : pg_file_create_mode)) < 0)
626 0 : pg_fatal("could not open file \"%s\": %m", output_filename);
627 :
628 : /* Read and write the blocks as required. */
629 560 : for (i = 0; i < block_length; ++i)
630 : {
631 : uint8 buffer[BLCKSZ];
632 514 : rfile *s = sourcemap[i];
633 :
634 : /* Update accounting information. */
635 514 : if (s == NULL)
636 0 : ++zero_blocks;
637 : else
638 : {
639 514 : s->num_blocks_read++;
640 514 : s->highest_offset_read = Max(s->highest_offset_read,
641 : offsetmap[i] + BLCKSZ);
642 : }
643 :
644 : /* Skip the rest of this in dry-run mode. */
645 514 : if (dry_run)
646 0 : continue;
647 :
648 : /* Read or zero-fill the block as appropriate. */
649 514 : if (s == NULL)
650 : {
651 : /*
652 : * New block not mentioned in the WAL summary. Should have been an
653 : * uninitialized block, so just zero-fill it.
654 : */
655 0 : memset(buffer, 0, BLCKSZ);
656 :
657 : /* Write out the block, update the checksum if needed. */
658 0 : write_block(wfd, output_filename, buffer, checksum_ctx);
659 :
660 : /* Nothing else to do for zero-filled blocks. */
661 0 : continue;
662 : }
663 :
664 : /* Copy the block using the appropriate copy method. */
665 514 : if (copy_method != COPY_METHOD_COPY_FILE_RANGE)
666 : {
667 : /*
668 : * Read the block from the correct source file, and then write it
669 : * out, possibly with a checksum update.
670 : */
671 514 : read_block(s, offsetmap[i], buffer);
672 514 : write_block(wfd, output_filename, buffer, checksum_ctx);
673 : }
674 : else /* use copy_file_range */
675 : {
676 : #if defined(HAVE_COPY_FILE_RANGE)
677 : /* copy_file_range modifies the offset, so use a local copy */
678 0 : off_t off = offsetmap[i];
679 0 : size_t nwritten = 0;
680 :
681 : /*
682 : * Retry until we've written all the bytes (the offset is updated
683 : * by copy_file_range, and so is the wfd file offset).
684 : */
685 : do
686 : {
687 : int wb;
688 :
689 0 : wb = copy_file_range(s->fd, &off, wfd, NULL, BLCKSZ - nwritten, 0);
690 :
691 0 : if (wb < 0)
692 0 : pg_fatal("error while copying file range from \"%s\" to \"%s\": %m",
693 : input_filename, output_filename);
694 :
695 0 : nwritten += wb;
696 :
697 0 : } while (BLCKSZ > nwritten);
698 :
699 : /*
700 : * When checksum calculation not needed, we're done, otherwise
701 : * read the block and pass it to the checksum calculation.
702 : */
703 0 : if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
704 0 : continue;
705 :
706 0 : read_block(s, offsetmap[i], buffer);
707 :
708 0 : if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
709 0 : pg_fatal("could not update checksum of file \"%s\"",
710 : output_filename);
711 : #else
712 : pg_fatal("copy_file_range not supported on this platform");
713 : #endif
714 : }
715 : }
716 :
717 : /* Debugging output. */
718 46 : if (zero_blocks > 0)
719 : {
720 0 : if (dry_run)
721 0 : pg_log_debug("would have zero-filled %u blocks", zero_blocks);
722 : else
723 0 : pg_log_debug("zero-filled %u blocks", zero_blocks);
724 : }
725 :
726 : /* Close the output file. */
727 46 : if (wfd >= 0 && close(wfd) != 0)
728 0 : pg_fatal("could not close \"%s\": %m", output_filename);
729 46 : }
730 :
731 : /*
732 : * Write the block into the file (using the file descriptor), and
733 : * if needed update the checksum calculation.
734 : *
735 : * The buffer is expected to contain BLCKSZ bytes. The filename is
736 : * provided only for the error message.
737 : */
738 : static void
739 514 : write_block(int fd, char *output_filename,
740 : uint8 *buffer, pg_checksum_context *checksum_ctx)
741 : {
742 : int wb;
743 :
744 514 : if ((wb = write(fd, buffer, BLCKSZ)) != BLCKSZ)
745 : {
746 0 : if (wb < 0)
747 0 : pg_fatal("could not write file \"%s\": %m", output_filename);
748 : else
749 0 : pg_fatal("could not write file \"%s\": wrote only %d of %d bytes",
750 : output_filename, wb, BLCKSZ);
751 : }
752 :
753 : /* Update the checksum computation. */
754 514 : if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
755 0 : pg_fatal("could not update checksum of file \"%s\"",
756 : output_filename);
757 514 : }
758 :
759 : /*
760 : * Read a block of data (BLCKSZ bytes) into the buffer.
761 : */
762 : static void
763 514 : read_block(rfile *s, off_t off, uint8 *buffer)
764 : {
765 : int rb;
766 :
767 : /* Read the block from the correct source, except if dry-run. */
768 514 : rb = pg_pread(s->fd, buffer, BLCKSZ, off);
769 514 : if (rb != BLCKSZ)
770 : {
771 0 : if (rb < 0)
772 0 : pg_fatal("could not read file \"%s\": %m", s->filename);
773 : else
774 0 : pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu",
775 : s->filename, rb, BLCKSZ,
776 : (unsigned long long) off);
777 : }
778 514 : }
|