Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * astreamer_verify.c
4 : *
5 : * Archive streamer for verification of a tar format backup (including
6 : * compressed tar format backups).
7 : *
8 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 : *
10 : * src/bin/pg_verifybackup/astreamer_verify.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres_fe.h"
16 :
17 : #include "access/xlog_internal.h"
18 : #include "catalog/pg_control.h"
19 : #include "pg_verifybackup.h"
20 :
21 : typedef struct astreamer_verify
22 : {
23 : /* These fields don't change once initialized. */
24 : astreamer base;
25 : verifier_context *context;
26 : char *archive_name;
27 : Oid tblspc_oid;
28 :
29 : /* These fields change for each archive member. */
30 : manifest_file *mfile;
31 : bool verify_checksum;
32 : bool verify_control_data;
33 : pg_checksum_context *checksum_ctx;
34 : uint64 checksum_bytes;
35 : ControlFileData control_file;
36 : uint64 control_file_bytes;
37 : } astreamer_verify;
38 :
39 : static void astreamer_verify_content(astreamer *streamer,
40 : astreamer_member *member,
41 : const char *data, int len,
42 : astreamer_archive_context context);
43 : static void astreamer_verify_finalize(astreamer *streamer);
44 : static void astreamer_verify_free(astreamer *streamer);
45 :
46 : static void member_verify_header(astreamer *streamer, astreamer_member *member);
47 : static void member_compute_checksum(astreamer *streamer,
48 : astreamer_member *member,
49 : const char *data, int len);
50 : static void member_verify_checksum(astreamer *streamer);
51 : static void member_copy_control_data(astreamer *streamer,
52 : astreamer_member *member,
53 : const char *data, int len);
54 : static void member_verify_control_data(astreamer *streamer);
55 : static void member_reset_info(astreamer *streamer);
56 :
57 : static const astreamer_ops astreamer_verify_ops = {
58 : .content = astreamer_verify_content,
59 : .finalize = astreamer_verify_finalize,
60 : .free = astreamer_verify_free
61 : };
62 :
63 : /*
64 : * Create an astreamer that can verify a tar file.
65 : */
66 : astreamer *
67 58 : astreamer_verify_content_new(astreamer *next, verifier_context *context,
68 : char *archive_name, Oid tblspc_oid)
69 : {
70 : astreamer_verify *streamer;
71 :
72 58 : streamer = palloc0(sizeof(astreamer_verify));
73 58 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
74 : &astreamer_verify_ops;
75 :
76 58 : streamer->base.bbs_next = next;
77 58 : streamer->context = context;
78 58 : streamer->archive_name = archive_name;
79 58 : streamer->tblspc_oid = tblspc_oid;
80 :
81 58 : if (!context->skip_checksums)
82 58 : streamer->checksum_ctx = pg_malloc(sizeof(pg_checksum_context));
83 :
84 58 : return &streamer->base;
85 : }
86 :
87 : /*
88 : * Main entry point of the archive streamer for verifying tar members.
89 : */
90 : static void
91 425264 : astreamer_verify_content(astreamer *streamer, astreamer_member *member,
92 : const char *data, int len,
93 : astreamer_archive_context context)
94 : {
95 425264 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
96 :
97 : Assert(context != ASTREAMER_UNKNOWN);
98 :
99 425264 : switch (context)
100 : {
101 38050 : case ASTREAMER_MEMBER_HEADER:
102 : /* Initial setup plus decide which checks to perform. */
103 38050 : member_verify_header(streamer, member);
104 38050 : break;
105 :
106 349108 : case ASTREAMER_MEMBER_CONTENTS:
107 : /* Incremental work required to verify file contents. */
108 349108 : if (mystreamer->verify_checksum)
109 215520 : member_compute_checksum(streamer, member, data, len);
110 349108 : if (mystreamer->verify_control_data)
111 98 : member_copy_control_data(streamer, member, data, len);
112 349108 : break;
113 :
114 38050 : case ASTREAMER_MEMBER_TRAILER:
115 : /* Now we've got all the file data. */
116 38050 : if (mystreamer->verify_checksum)
117 35006 : member_verify_checksum(streamer);
118 38050 : if (mystreamer->verify_control_data)
119 38 : member_verify_control_data(streamer);
120 :
121 : /* Reset for next archive member. */
122 38048 : member_reset_info(streamer);
123 38048 : break;
124 :
125 56 : case ASTREAMER_ARCHIVE_TRAILER:
126 56 : break;
127 :
128 0 : default:
129 : /* Shouldn't happen. */
130 0 : pg_fatal("unexpected state while parsing tar file");
131 : }
132 425262 : }
133 :
134 : /*
135 : * End-of-stream processing for a astreamer_verify stream.
136 : */
137 : static void
138 56 : astreamer_verify_finalize(astreamer *streamer)
139 : {
140 : Assert(streamer->bbs_next == NULL);
141 56 : }
142 :
143 : /*
144 : * Free memory associated with a astreamer_verify stream.
145 : */
146 : static void
147 56 : astreamer_verify_free(astreamer *streamer)
148 : {
149 56 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
150 :
151 56 : if (mystreamer->checksum_ctx)
152 56 : pfree(mystreamer->checksum_ctx);
153 :
154 56 : pfree(streamer);
155 56 : }
156 :
157 : /*
158 : * Prepare to validate the next archive member.
159 : */
160 : static void
161 38050 : member_verify_header(astreamer *streamer, astreamer_member *member)
162 : {
163 38050 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
164 : manifest_file *m;
165 : char pathname[MAXPGPATH];
166 :
167 : /* We are only interested in normal files. */
168 38050 : if (member->is_directory || member->is_link)
169 1104 : return;
170 :
171 : /*
172 : * The backup manifest stores a relative path to the base directory for
173 : * files belonging to a tablespace, while the tablespace backup tar
174 : * archive does not include this path.
175 : *
176 : * The pathname taken from the tar file could contain '.' or '..'
177 : * references, which we want to remove, so apply canonicalize_path(). It
178 : * could also be an absolute pathname, which we want to treat as a
179 : * relative path, so prepend "./" if we're not adding a tablespace prefix
180 : * to make sure that canonicalize_path() does what we want.
181 : */
182 37016 : if (OidIsValid(mystreamer->tblspc_oid))
183 20 : snprintf(pathname, MAXPGPATH, "%s/%u/%s",
184 20 : "pg_tblspc", mystreamer->tblspc_oid, member->pathname);
185 : else
186 36996 : snprintf(pathname, MAXPGPATH, "./%s", member->pathname);
187 37016 : canonicalize_path(pathname);
188 :
189 : /* Ignore any files that are listed in the ignore list. */
190 37016 : if (should_ignore_relpath(mystreamer->context, pathname))
191 62 : return;
192 :
193 : /* Check whether there's an entry in the manifest hash. */
194 36954 : m = manifest_files_lookup(mystreamer->context->manifest->files, pathname);
195 36954 : if (m == NULL)
196 : {
197 4 : report_backup_error(mystreamer->context,
198 : "\"%s\" is present in \"%s\" but not in the manifest",
199 4 : member->pathname, mystreamer->archive_name);
200 4 : return;
201 : }
202 36950 : mystreamer->mfile = m;
203 :
204 : /* Flag this entry as having been encountered in a tar archive. */
205 36950 : m->matched = true;
206 :
207 : /* Check that the size matches. */
208 36950 : if (m->size != member->size)
209 : {
210 4 : report_backup_error(mystreamer->context,
211 : "\"%s\" has size %llu in \"%s\" but size %" PRIu64 " in the manifest",
212 4 : member->pathname,
213 4 : (unsigned long long) member->size,
214 : mystreamer->archive_name,
215 : m->size);
216 4 : m->bad = true;
217 4 : return;
218 : }
219 :
220 : /*
221 : * Decide whether we're going to verify the checksum for this file, and
222 : * whether we're going to perform the additional validation that we do
223 : * only for the control file.
224 : */
225 36946 : mystreamer->verify_checksum =
226 36946 : (!mystreamer->context->skip_checksums && should_verify_checksum(m));
227 36946 : mystreamer->verify_control_data =
228 73892 : mystreamer->context->manifest->version != 1 &&
229 36946 : !m->bad && strcmp(m->pathname, XLOG_CONTROL_FILE) == 0;
230 :
231 : /* If we're going to verify the checksum, initial a checksum context. */
232 71952 : if (mystreamer->verify_checksum &&
233 35006 : pg_checksum_init(mystreamer->checksum_ctx, m->checksum_type) < 0)
234 : {
235 0 : report_backup_error(mystreamer->context,
236 : "%s: could not initialize checksum of file \"%s\"",
237 : mystreamer->archive_name, m->pathname);
238 :
239 : /*
240 : * Checksum verification cannot be performed without proper context
241 : * initialization.
242 : */
243 0 : mystreamer->verify_checksum = false;
244 : }
245 : }
246 :
247 : /*
248 : * Computes the checksum incrementally for the received file content.
249 : *
250 : * Should have a correctly initialized checksum_ctx, which will be used for
251 : * incremental checksum computation.
252 : */
253 : static void
254 215520 : member_compute_checksum(astreamer *streamer, astreamer_member *member,
255 : const char *data, int len)
256 : {
257 215520 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
258 215520 : pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx;
259 215520 : manifest_file *m = mystreamer->mfile;
260 :
261 : Assert(mystreamer->verify_checksum);
262 : Assert(m->checksum_type == checksum_ctx->type);
263 :
264 : /*
265 : * Update the total count of computed checksum bytes so that we can
266 : * cross-check against the file size.
267 : */
268 215520 : mystreamer->checksum_bytes += len;
269 :
270 : /* Feed these bytes to the checksum calculation. */
271 215520 : if (pg_checksum_update(checksum_ctx, (uint8 *) data, len) < 0)
272 : {
273 0 : report_backup_error(mystreamer->context,
274 : "could not update checksum of file \"%s\"",
275 : m->pathname);
276 0 : mystreamer->verify_checksum = false;
277 : }
278 215520 : }
279 :
280 : /*
281 : * Perform the final computation and checksum verification after the entire
282 : * file content has been processed.
283 : */
284 : static void
285 35006 : member_verify_checksum(astreamer *streamer)
286 : {
287 35006 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
288 35006 : manifest_file *m = mystreamer->mfile;
289 : uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
290 : int checksumlen;
291 :
292 : Assert(mystreamer->verify_checksum);
293 :
294 : /*
295 : * It's unclear how this could fail, but let's check anyway to be safe.
296 : */
297 35006 : if (mystreamer->checksum_bytes != m->size)
298 : {
299 0 : report_backup_error(mystreamer->context,
300 : "file \"%s\" in \"%s\" should contain %" PRIu64 " bytes, but read %" PRIu64 " bytes",
301 : m->pathname, mystreamer->archive_name,
302 : m->size,
303 : mystreamer->checksum_bytes);
304 0 : return;
305 : }
306 :
307 : /* Get the final checksum. */
308 35006 : checksumlen = pg_checksum_final(mystreamer->checksum_ctx, checksumbuf);
309 35006 : if (checksumlen < 0)
310 : {
311 0 : report_backup_error(mystreamer->context,
312 : "could not finalize checksum of file \"%s\"",
313 : m->pathname);
314 0 : return;
315 : }
316 :
317 : /* And check it against the manifest. */
318 35006 : if (checksumlen != m->checksum_length)
319 0 : report_backup_error(mystreamer->context,
320 : "file \"%s\" in \"%s\" has checksum of length %d, but expected %d",
321 : m->pathname, mystreamer->archive_name,
322 : m->checksum_length, checksumlen);
323 35006 : else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
324 12 : report_backup_error(mystreamer->context,
325 : "checksum mismatch for file \"%s\" in \"%s\"",
326 : m->pathname, mystreamer->archive_name);
327 : }
328 :
329 : /*
330 : * Stores the pg_control file contents into a local buffer; we need the entire
331 : * control file data for verification.
332 : */
333 : static void
334 98 : member_copy_control_data(astreamer *streamer, astreamer_member *member,
335 : const char *data, int len)
336 : {
337 98 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
338 :
339 : /* Should be here only for control file */
340 : Assert(mystreamer->verify_control_data);
341 :
342 : /*
343 : * Copy the new data into the control file buffer, but do not overrun the
344 : * buffer. Note that the on-disk length of the control file is expected to
345 : * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
346 : * shorter, just sizeof(ControlFileData).
347 : */
348 98 : if (mystreamer->control_file_bytes < sizeof(ControlFileData))
349 : {
350 : size_t remaining;
351 :
352 38 : remaining = sizeof(ControlFileData) - mystreamer->control_file_bytes;
353 38 : memcpy(((char *) &mystreamer->control_file)
354 38 : + mystreamer->control_file_bytes,
355 38 : data, Min((size_t) len, remaining));
356 : }
357 :
358 : /* Remember how many bytes we saw, even if we didn't buffer them. */
359 98 : mystreamer->control_file_bytes += len;
360 98 : }
361 :
362 : /*
363 : * Performs the CRC calculation of pg_control data and then calls the routines
364 : * that execute the final verification of the control file information.
365 : */
366 : static void
367 38 : member_verify_control_data(astreamer *streamer)
368 : {
369 38 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
370 38 : manifest_data *manifest = mystreamer->context->manifest;
371 : pg_crc32c crc;
372 :
373 : /* Should be here only for control file */
374 : Assert(strcmp(mystreamer->mfile->pathname, XLOG_CONTROL_FILE) == 0);
375 : Assert(mystreamer->verify_control_data);
376 :
377 : /*
378 : * If the control file is not the right length, that's a big problem.
379 : *
380 : * NB: There is a theoretical overflow risk here from casting to int, but
381 : * it isn't likely to be a real problem and this enables us to match the
382 : * same format string that pg_rewind uses for this case. Perhaps both this
383 : * and pg_rewind should use an unsigned 64-bit value, but for now we don't
384 : * worry about it.
385 : */
386 38 : if (mystreamer->control_file_bytes != PG_CONTROL_FILE_SIZE)
387 0 : report_fatal_error("unexpected control file size %d, expected %d",
388 0 : (int) mystreamer->control_file_bytes,
389 : PG_CONTROL_FILE_SIZE);
390 :
391 : /* Compute the CRC. */
392 38 : INIT_CRC32C(crc);
393 38 : COMP_CRC32C(crc, &mystreamer->control_file,
394 : offsetof(ControlFileData, crc));
395 38 : FIN_CRC32C(crc);
396 :
397 : /* Control file contents not meaningful if CRC is bad. */
398 38 : if (!EQ_CRC32C(crc, mystreamer->control_file.crc))
399 0 : report_fatal_error("%s: %s: CRC is incorrect",
400 : mystreamer->archive_name,
401 0 : mystreamer->mfile->pathname);
402 :
403 : /* Can't interpret control file if not current version. */
404 38 : if (mystreamer->control_file.pg_control_version != PG_CONTROL_VERSION)
405 0 : report_fatal_error("%s: %s: unexpected control file version",
406 : mystreamer->archive_name,
407 0 : mystreamer->mfile->pathname);
408 :
409 : /* System identifiers should match. */
410 38 : if (manifest->system_identifier !=
411 38 : mystreamer->control_file.system_identifier)
412 2 : report_fatal_error("%s: %s: manifest system identifier is %" PRIu64 ", but control file has %" PRIu64,
413 : mystreamer->archive_name,
414 2 : mystreamer->mfile->pathname,
415 : manifest->system_identifier,
416 : mystreamer->control_file.system_identifier);
417 36 : }
418 :
419 : /*
420 : * Reset flags and free memory allocations for member file verification.
421 : */
422 : static void
423 38048 : member_reset_info(astreamer *streamer)
424 : {
425 38048 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
426 :
427 38048 : mystreamer->mfile = NULL;
428 38048 : mystreamer->verify_checksum = false;
429 38048 : mystreamer->verify_control_data = false;
430 38048 : mystreamer->checksum_bytes = 0;
431 38048 : mystreamer->control_file_bytes = 0;
432 38048 : }
|