Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * astreamer_verify.c
4 : *
5 : * Archive streamer for verification of a tar format backup (including
6 : * compressed tar format backups).
7 : *
8 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 : *
10 : * src/bin/pg_verifybackup/astreamer_verify.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres_fe.h"
16 :
17 : #include "catalog/pg_control.h"
18 : #include "pg_verifybackup.h"
19 :
20 : typedef struct astreamer_verify
21 : {
22 : /* These fields don't change once initialized. */
23 : astreamer base;
24 : verifier_context *context;
25 : char *archive_name;
26 : Oid tblspc_oid;
27 :
28 : /* These fields change for each archive member. */
29 : manifest_file *mfile;
30 : bool verify_checksum;
31 : bool verify_control_data;
32 : pg_checksum_context *checksum_ctx;
33 : uint64 checksum_bytes;
34 : ControlFileData control_file;
35 : uint64 control_file_bytes;
36 : } astreamer_verify;
37 :
38 : static void astreamer_verify_content(astreamer *streamer,
39 : astreamer_member *member,
40 : const char *data, int len,
41 : astreamer_archive_context context);
42 : static void astreamer_verify_finalize(astreamer *streamer);
43 : static void astreamer_verify_free(astreamer *streamer);
44 :
45 : static void member_verify_header(astreamer *streamer, astreamer_member *member);
46 : static void member_compute_checksum(astreamer *streamer,
47 : astreamer_member *member,
48 : const char *data, int len);
49 : static void member_verify_checksum(astreamer *streamer);
50 : static void member_copy_control_data(astreamer *streamer,
51 : astreamer_member *member,
52 : const char *data, int len);
53 : static void member_verify_control_data(astreamer *streamer);
54 : static void member_reset_info(astreamer *streamer);
55 :
56 : static const astreamer_ops astreamer_verify_ops = {
57 : .content = astreamer_verify_content,
58 : .finalize = astreamer_verify_finalize,
59 : .free = astreamer_verify_free
60 : };
61 :
62 : /*
63 : * Create an astreamer that can verify a tar file.
64 : */
65 : astreamer *
66 58 : astreamer_verify_content_new(astreamer *next, verifier_context *context,
67 : char *archive_name, Oid tblspc_oid)
68 : {
69 : astreamer_verify *streamer;
70 :
71 58 : streamer = palloc0(sizeof(astreamer_verify));
72 58 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
73 : &astreamer_verify_ops;
74 :
75 58 : streamer->base.bbs_next = next;
76 58 : streamer->context = context;
77 58 : streamer->archive_name = archive_name;
78 58 : streamer->tblspc_oid = tblspc_oid;
79 :
80 58 : if (!context->skip_checksums)
81 58 : streamer->checksum_ctx = pg_malloc(sizeof(pg_checksum_context));
82 :
83 58 : return &streamer->base;
84 : }
85 :
86 : /*
87 : * Main entry point of the archive streamer for verifying tar members.
88 : */
89 : static void
90 423936 : astreamer_verify_content(astreamer *streamer, astreamer_member *member,
91 : const char *data, int len,
92 : astreamer_archive_context context)
93 : {
94 423936 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
95 :
96 : Assert(context != ASTREAMER_UNKNOWN);
97 :
98 423936 : switch (context)
99 : {
100 38050 : case ASTREAMER_MEMBER_HEADER:
101 : /* Initial setup plus decide which checks to perform. */
102 38050 : member_verify_header(streamer, member);
103 38050 : break;
104 :
105 347780 : case ASTREAMER_MEMBER_CONTENTS:
106 : /* Incremental work required to verify file contents. */
107 347780 : if (mystreamer->verify_checksum)
108 214190 : member_compute_checksum(streamer, member, data, len);
109 347780 : if (mystreamer->verify_control_data)
110 98 : member_copy_control_data(streamer, member, data, len);
111 347780 : break;
112 :
113 38050 : case ASTREAMER_MEMBER_TRAILER:
114 : /* Now we've got all the file data. */
115 38050 : if (mystreamer->verify_checksum)
116 35006 : member_verify_checksum(streamer);
117 38050 : if (mystreamer->verify_control_data)
118 38 : member_verify_control_data(streamer);
119 :
120 : /* Reset for next archive member. */
121 38048 : member_reset_info(streamer);
122 38048 : break;
123 :
124 56 : case ASTREAMER_ARCHIVE_TRAILER:
125 56 : break;
126 :
127 0 : default:
128 : /* Shouldn't happen. */
129 0 : pg_fatal("unexpected state while parsing tar file");
130 : }
131 423934 : }
132 :
133 : /*
134 : * End-of-stream processing for a astreamer_verify stream.
135 : */
136 : static void
137 56 : astreamer_verify_finalize(astreamer *streamer)
138 : {
139 : Assert(streamer->bbs_next == NULL);
140 56 : }
141 :
142 : /*
143 : * Free memory associated with a astreamer_verify stream.
144 : */
145 : static void
146 56 : astreamer_verify_free(astreamer *streamer)
147 : {
148 56 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
149 :
150 56 : if (mystreamer->checksum_ctx)
151 56 : pfree(mystreamer->checksum_ctx);
152 :
153 56 : pfree(streamer);
154 56 : }
155 :
156 : /*
157 : * Prepare to validate the next archive member.
158 : */
159 : static void
160 38050 : member_verify_header(astreamer *streamer, astreamer_member *member)
161 : {
162 38050 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
163 : manifest_file *m;
164 : char pathname[MAXPGPATH];
165 :
166 : /* We are only interested in normal files. */
167 38050 : if (member->is_directory || member->is_link)
168 1104 : return;
169 :
170 : /*
171 : * The backup manifest stores a relative path to the base directory for
172 : * files belonging to a tablespace, while the tablespace backup tar
173 : * archive does not include this path.
174 : *
175 : * The pathname taken from the tar file could contain '.' or '..'
176 : * references, which we want to remove, so apply canonicalize_path(). It
177 : * could also be an absolute pathname, which we want to treat as a
178 : * relative path, so prepend "./" if we're not adding a tablespace prefix
179 : * to make sure that canonicalize_path() does what we want.
180 : */
181 37016 : if (OidIsValid(mystreamer->tblspc_oid))
182 20 : snprintf(pathname, MAXPGPATH, "%s/%u/%s",
183 20 : "pg_tblspc", mystreamer->tblspc_oid, member->pathname);
184 : else
185 36996 : snprintf(pathname, MAXPGPATH, "./%s", member->pathname);
186 37016 : canonicalize_path(pathname);
187 :
188 : /* Ignore any files that are listed in the ignore list. */
189 37016 : if (should_ignore_relpath(mystreamer->context, pathname))
190 62 : return;
191 :
192 : /* Check whether there's an entry in the manifest hash. */
193 36954 : m = manifest_files_lookup(mystreamer->context->manifest->files, pathname);
194 36954 : if (m == NULL)
195 : {
196 4 : report_backup_error(mystreamer->context,
197 : "\"%s\" is present in \"%s\" but not in the manifest",
198 4 : member->pathname, mystreamer->archive_name);
199 4 : return;
200 : }
201 36950 : mystreamer->mfile = m;
202 :
203 : /* Flag this entry as having been encountered in a tar archive. */
204 36950 : m->matched = true;
205 :
206 : /* Check that the size matches. */
207 36950 : if (m->size != member->size)
208 : {
209 4 : report_backup_error(mystreamer->context,
210 : "\"%s\" has size %llu in \"%s\" but size %llu in the manifest",
211 4 : member->pathname,
212 4 : (unsigned long long) member->size,
213 : mystreamer->archive_name,
214 4 : (unsigned long long) m->size);
215 4 : m->bad = true;
216 4 : return;
217 : }
218 :
219 : /*
220 : * Decide whether we're going to verify the checksum for this file, and
221 : * whether we're going to perform the additional validation that we do
222 : * only for the control file.
223 : */
224 36946 : mystreamer->verify_checksum =
225 36946 : (!mystreamer->context->skip_checksums && should_verify_checksum(m));
226 36946 : mystreamer->verify_control_data =
227 73892 : mystreamer->context->manifest->version != 1 &&
228 36946 : !m->bad && strcmp(m->pathname, "global/pg_control") == 0;
229 :
230 : /* If we're going to verify the checksum, initial a checksum context. */
231 71952 : if (mystreamer->verify_checksum &&
232 35006 : pg_checksum_init(mystreamer->checksum_ctx, m->checksum_type) < 0)
233 : {
234 0 : report_backup_error(mystreamer->context,
235 : "%s: could not initialize checksum of file \"%s\"",
236 : mystreamer->archive_name, m->pathname);
237 :
238 : /*
239 : * Checksum verification cannot be performed without proper context
240 : * initialization.
241 : */
242 0 : mystreamer->verify_checksum = false;
243 : }
244 : }
245 :
246 : /*
247 : * Computes the checksum incrementally for the received file content.
248 : *
249 : * Should have a correctly initialized checksum_ctx, which will be used for
250 : * incremental checksum computation.
251 : */
252 : static void
253 214190 : member_compute_checksum(astreamer *streamer, astreamer_member *member,
254 : const char *data, int len)
255 : {
256 214190 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
257 214190 : pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx;
258 214190 : manifest_file *m = mystreamer->mfile;
259 :
260 : Assert(mystreamer->verify_checksum);
261 : Assert(m->checksum_type == checksum_ctx->type);
262 :
263 : /*
264 : * Update the total count of computed checksum bytes so that we can
265 : * cross-check against the file size.
266 : */
267 214190 : mystreamer->checksum_bytes += len;
268 :
269 : /* Feed these bytes to the checksum calculation. */
270 214190 : if (pg_checksum_update(checksum_ctx, (uint8 *) data, len) < 0)
271 : {
272 0 : report_backup_error(mystreamer->context,
273 : "could not update checksum of file \"%s\"",
274 : m->pathname);
275 0 : mystreamer->verify_checksum = false;
276 : }
277 214190 : }
278 :
279 : /*
280 : * Perform the final computation and checksum verification after the entire
281 : * file content has been processed.
282 : */
283 : static void
284 35006 : member_verify_checksum(astreamer *streamer)
285 : {
286 35006 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
287 35006 : manifest_file *m = mystreamer->mfile;
288 : uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
289 : int checksumlen;
290 :
291 : Assert(mystreamer->verify_checksum);
292 :
293 : /*
294 : * It's unclear how this could fail, but let's check anyway to be safe.
295 : */
296 35006 : if (mystreamer->checksum_bytes != m->size)
297 : {
298 0 : report_backup_error(mystreamer->context,
299 : "file \"%s\" in \"%s\" should contain %llu bytes, but read %llu bytes",
300 : m->pathname, mystreamer->archive_name,
301 0 : (unsigned long long) m->size,
302 0 : (unsigned long long) mystreamer->checksum_bytes);
303 0 : return;
304 : }
305 :
306 : /* Get the final checksum. */
307 35006 : checksumlen = pg_checksum_final(mystreamer->checksum_ctx, checksumbuf);
308 35006 : if (checksumlen < 0)
309 : {
310 0 : report_backup_error(mystreamer->context,
311 : "could not finalize checksum of file \"%s\"",
312 : m->pathname);
313 0 : return;
314 : }
315 :
316 : /* And check it against the manifest. */
317 35006 : if (checksumlen != m->checksum_length)
318 0 : report_backup_error(mystreamer->context,
319 : "file \"%s\" in \"%s\" has checksum of length %d, but expected %d",
320 : m->pathname, mystreamer->archive_name,
321 : m->checksum_length, checksumlen);
322 35006 : else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
323 12 : report_backup_error(mystreamer->context,
324 : "checksum mismatch for file \"%s\" in \"%s\"",
325 : m->pathname, mystreamer->archive_name);
326 : }
327 :
328 : /*
329 : * Stores the pg_control file contents into a local buffer; we need the entire
330 : * control file data for verification.
331 : */
332 : static void
333 98 : member_copy_control_data(astreamer *streamer, astreamer_member *member,
334 : const char *data, int len)
335 : {
336 98 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
337 :
338 : /* Should be here only for control file */
339 : Assert(mystreamer->verify_control_data);
340 :
341 : /*
342 : * Copy the new data into the control file buffer, but do not overrun the
343 : * buffer. Note that the on-disk length of the control file is expected to
344 : * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
345 : * shorter, just sizeof(ControlFileData).
346 : */
347 98 : if (mystreamer->control_file_bytes < sizeof(ControlFileData))
348 : {
349 : size_t remaining;
350 :
351 38 : remaining = sizeof(ControlFileData) - mystreamer->control_file_bytes;
352 38 : memcpy(((char *) &mystreamer->control_file)
353 38 : + mystreamer->control_file_bytes,
354 38 : data, Min((size_t) len, remaining));
355 : }
356 :
357 : /* Remember how many bytes we saw, even if we didn't buffer them. */
358 98 : mystreamer->control_file_bytes += len;
359 98 : }
360 :
361 : /*
362 : * Performs the CRC calculation of pg_control data and then calls the routines
363 : * that execute the final verification of the control file information.
364 : */
365 : static void
366 38 : member_verify_control_data(astreamer *streamer)
367 : {
368 38 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
369 38 : manifest_data *manifest = mystreamer->context->manifest;
370 : pg_crc32c crc;
371 :
372 : /* Should be here only for control file */
373 : Assert(strcmp(mystreamer->mfile->pathname, "global/pg_control") == 0);
374 : Assert(mystreamer->verify_control_data);
375 :
376 : /*
377 : * If the control file is not the right length, that's a big problem.
378 : *
379 : * NB: There is a theoretical overflow risk here from casting to int, but
380 : * it isn't likely to be a real problem and this enables us to match the
381 : * same format string that pg_rewind uses for this case. Perhaps both this
382 : * and pg_rewind should use an unsigned 64-bit value, but for now we don't
383 : * worry about it.
384 : */
385 38 : if (mystreamer->control_file_bytes != PG_CONTROL_FILE_SIZE)
386 0 : report_fatal_error("unexpected control file size %d, expected %d",
387 0 : (int) mystreamer->control_file_bytes,
388 : PG_CONTROL_FILE_SIZE);
389 :
390 : /* Compute the CRC. */
391 38 : INIT_CRC32C(crc);
392 38 : COMP_CRC32C(crc, &mystreamer->control_file,
393 : offsetof(ControlFileData, crc));
394 38 : FIN_CRC32C(crc);
395 :
396 : /* Control file contents not meaningful if CRC is bad. */
397 38 : if (!EQ_CRC32C(crc, mystreamer->control_file.crc))
398 0 : report_fatal_error("%s: %s: CRC is incorrect",
399 : mystreamer->archive_name,
400 0 : mystreamer->mfile->pathname);
401 :
402 : /* Can't interpret control file if not current version. */
403 38 : if (mystreamer->control_file.pg_control_version != PG_CONTROL_VERSION)
404 0 : report_fatal_error("%s: %s: unexpected control file version",
405 : mystreamer->archive_name,
406 0 : mystreamer->mfile->pathname);
407 :
408 : /* System identifiers should match. */
409 38 : if (manifest->system_identifier !=
410 38 : mystreamer->control_file.system_identifier)
411 2 : report_fatal_error("%s: %s: manifest system identifier is %llu, but control file has %llu",
412 : mystreamer->archive_name,
413 2 : mystreamer->mfile->pathname,
414 2 : (unsigned long long) manifest->system_identifier,
415 2 : (unsigned long long) mystreamer->control_file.system_identifier);
416 36 : }
417 :
418 : /*
419 : * Reset flags and free memory allocations for member file verification.
420 : */
421 : static void
422 38048 : member_reset_info(astreamer *streamer)
423 : {
424 38048 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
425 :
426 38048 : mystreamer->mfile = NULL;
427 38048 : mystreamer->verify_checksum = false;
428 38048 : mystreamer->verify_control_data = false;
429 38048 : mystreamer->checksum_bytes = 0;
430 38048 : mystreamer->control_file_bytes = 0;
431 38048 : }
|