LCOV - code coverage report
Current view: top level - src/bin/pg_verifybackup - astreamer_verify.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 118 135 87.4 %
Date: 2025-04-24 13:15:39 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * astreamer_verify.c
       4             :  *
       5             :  * Archive streamer for verification of a tar format backup (including
       6             :  * compressed tar format backups).
       7             :  *
       8             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       9             :  *
      10             :  * src/bin/pg_verifybackup/astreamer_verify.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres_fe.h"
      16             : 
      17             : #include "access/xlog_internal.h"
      18             : #include "catalog/pg_control.h"
      19             : #include "pg_verifybackup.h"
      20             : 
      21             : typedef struct astreamer_verify
      22             : {
      23             :     /* These fields don't change once initialized. */
      24             :     astreamer   base;
      25             :     verifier_context *context;
      26             :     char       *archive_name;
      27             :     Oid         tblspc_oid;
      28             : 
      29             :     /* These fields change for each archive member. */
      30             :     manifest_file *mfile;
      31             :     bool        verify_checksum;
      32             :     bool        verify_control_data;
      33             :     pg_checksum_context *checksum_ctx;
      34             :     uint64      checksum_bytes;
      35             :     ControlFileData control_file;
      36             :     uint64      control_file_bytes;
      37             : } astreamer_verify;
      38             : 
      39             : static void astreamer_verify_content(astreamer *streamer,
      40             :                                      astreamer_member *member,
      41             :                                      const char *data, int len,
      42             :                                      astreamer_archive_context context);
      43             : static void astreamer_verify_finalize(astreamer *streamer);
      44             : static void astreamer_verify_free(astreamer *streamer);
      45             : 
      46             : static void member_verify_header(astreamer *streamer, astreamer_member *member);
      47             : static void member_compute_checksum(astreamer *streamer,
      48             :                                     astreamer_member *member,
      49             :                                     const char *data, int len);
      50             : static void member_verify_checksum(astreamer *streamer);
      51             : static void member_copy_control_data(astreamer *streamer,
      52             :                                      astreamer_member *member,
      53             :                                      const char *data, int len);
      54             : static void member_verify_control_data(astreamer *streamer);
      55             : static void member_reset_info(astreamer *streamer);
      56             : 
      57             : static const astreamer_ops astreamer_verify_ops = {
      58             :     .content = astreamer_verify_content,
      59             :     .finalize = astreamer_verify_finalize,
      60             :     .free = astreamer_verify_free
      61             : };
      62             : 
      63             : /*
      64             :  * Create an astreamer that can verify a tar file.
      65             :  */
      66             : astreamer *
      67          58 : astreamer_verify_content_new(astreamer *next, verifier_context *context,
      68             :                              char *archive_name, Oid tblspc_oid)
      69             : {
      70             :     astreamer_verify *streamer;
      71             : 
      72          58 :     streamer = palloc0(sizeof(astreamer_verify));
      73          58 :     *((const astreamer_ops **) &streamer->base.bbs_ops) =
      74             :         &astreamer_verify_ops;
      75             : 
      76          58 :     streamer->base.bbs_next = next;
      77          58 :     streamer->context = context;
      78          58 :     streamer->archive_name = archive_name;
      79          58 :     streamer->tblspc_oid = tblspc_oid;
      80             : 
      81          58 :     if (!context->skip_checksums)
      82          58 :         streamer->checksum_ctx = pg_malloc(sizeof(pg_checksum_context));
      83             : 
      84          58 :     return &streamer->base;
      85             : }
      86             : 
      87             : /*
      88             :  * Main entry point of the archive streamer for verifying tar members.
      89             :  */
      90             : static void
      91      425264 : astreamer_verify_content(astreamer *streamer, astreamer_member *member,
      92             :                          const char *data, int len,
      93             :                          astreamer_archive_context context)
      94             : {
      95      425264 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
      96             : 
      97             :     Assert(context != ASTREAMER_UNKNOWN);
      98             : 
      99      425264 :     switch (context)
     100             :     {
     101       38050 :         case ASTREAMER_MEMBER_HEADER:
     102             :             /* Initial setup plus decide which checks to perform. */
     103       38050 :             member_verify_header(streamer, member);
     104       38050 :             break;
     105             : 
     106      349108 :         case ASTREAMER_MEMBER_CONTENTS:
     107             :             /* Incremental work required to verify file contents. */
     108      349108 :             if (mystreamer->verify_checksum)
     109      215520 :                 member_compute_checksum(streamer, member, data, len);
     110      349108 :             if (mystreamer->verify_control_data)
     111          98 :                 member_copy_control_data(streamer, member, data, len);
     112      349108 :             break;
     113             : 
     114       38050 :         case ASTREAMER_MEMBER_TRAILER:
     115             :             /* Now we've got all the file data. */
     116       38050 :             if (mystreamer->verify_checksum)
     117       35006 :                 member_verify_checksum(streamer);
     118       38050 :             if (mystreamer->verify_control_data)
     119          38 :                 member_verify_control_data(streamer);
     120             : 
     121             :             /* Reset for next archive member. */
     122       38048 :             member_reset_info(streamer);
     123       38048 :             break;
     124             : 
     125          56 :         case ASTREAMER_ARCHIVE_TRAILER:
     126          56 :             break;
     127             : 
     128           0 :         default:
     129             :             /* Shouldn't happen. */
     130           0 :             pg_fatal("unexpected state while parsing tar file");
     131             :     }
     132      425262 : }
     133             : 
     134             : /*
     135             :  * End-of-stream processing for a astreamer_verify stream.
     136             :  */
     137             : static void
     138          56 : astreamer_verify_finalize(astreamer *streamer)
     139             : {
     140             :     Assert(streamer->bbs_next == NULL);
     141          56 : }
     142             : 
     143             : /*
     144             :  * Free memory associated with a astreamer_verify stream.
     145             :  */
     146             : static void
     147          56 : astreamer_verify_free(astreamer *streamer)
     148             : {
     149          56 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     150             : 
     151          56 :     if (mystreamer->checksum_ctx)
     152          56 :         pfree(mystreamer->checksum_ctx);
     153             : 
     154          56 :     pfree(streamer);
     155          56 : }
     156             : 
     157             : /*
     158             :  * Prepare to validate the next archive member.
     159             :  */
     160             : static void
     161       38050 : member_verify_header(astreamer *streamer, astreamer_member *member)
     162             : {
     163       38050 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     164             :     manifest_file *m;
     165             :     char        pathname[MAXPGPATH];
     166             : 
     167             :     /* We are only interested in normal files. */
     168       38050 :     if (member->is_directory || member->is_link)
     169        1104 :         return;
     170             : 
     171             :     /*
     172             :      * The backup manifest stores a relative path to the base directory for
     173             :      * files belonging to a tablespace, while the tablespace backup tar
     174             :      * archive does not include this path.
     175             :      *
     176             :      * The pathname taken from the tar file could contain '.' or '..'
     177             :      * references, which we want to remove, so apply canonicalize_path(). It
     178             :      * could also be an absolute pathname, which we want to treat as a
     179             :      * relative path, so prepend "./" if we're not adding a tablespace prefix
     180             :      * to make sure that canonicalize_path() does what we want.
     181             :      */
     182       37016 :     if (OidIsValid(mystreamer->tblspc_oid))
     183          20 :         snprintf(pathname, MAXPGPATH, "%s/%u/%s",
     184          20 :                  "pg_tblspc", mystreamer->tblspc_oid, member->pathname);
     185             :     else
     186       36996 :         snprintf(pathname, MAXPGPATH, "./%s", member->pathname);
     187       37016 :     canonicalize_path(pathname);
     188             : 
     189             :     /* Ignore any files that are listed in the ignore list. */
     190       37016 :     if (should_ignore_relpath(mystreamer->context, pathname))
     191          62 :         return;
     192             : 
     193             :     /* Check whether there's an entry in the manifest hash. */
     194       36954 :     m = manifest_files_lookup(mystreamer->context->manifest->files, pathname);
     195       36954 :     if (m == NULL)
     196             :     {
     197           4 :         report_backup_error(mystreamer->context,
     198             :                             "\"%s\" is present in \"%s\" but not in the manifest",
     199           4 :                             member->pathname, mystreamer->archive_name);
     200           4 :         return;
     201             :     }
     202       36950 :     mystreamer->mfile = m;
     203             : 
     204             :     /* Flag this entry as having been encountered in a tar archive. */
     205       36950 :     m->matched = true;
     206             : 
     207             :     /* Check that the size matches. */
     208       36950 :     if (m->size != member->size)
     209             :     {
     210           4 :         report_backup_error(mystreamer->context,
     211             :                             "\"%s\" has size %llu in \"%s\" but size %" PRIu64 " in the manifest",
     212           4 :                             member->pathname,
     213           4 :                             (unsigned long long) member->size,
     214             :                             mystreamer->archive_name,
     215             :                             m->size);
     216           4 :         m->bad = true;
     217           4 :         return;
     218             :     }
     219             : 
     220             :     /*
     221             :      * Decide whether we're going to verify the checksum for this file, and
     222             :      * whether we're going to perform the additional validation that we do
     223             :      * only for the control file.
     224             :      */
     225       36946 :     mystreamer->verify_checksum =
     226       36946 :         (!mystreamer->context->skip_checksums && should_verify_checksum(m));
     227       36946 :     mystreamer->verify_control_data =
     228       73892 :         mystreamer->context->manifest->version != 1 &&
     229       36946 :         !m->bad && strcmp(m->pathname, XLOG_CONTROL_FILE) == 0;
     230             : 
     231             :     /* If we're going to verify the checksum, initial a checksum context. */
     232       71952 :     if (mystreamer->verify_checksum &&
     233       35006 :         pg_checksum_init(mystreamer->checksum_ctx, m->checksum_type) < 0)
     234             :     {
     235           0 :         report_backup_error(mystreamer->context,
     236             :                             "%s: could not initialize checksum of file \"%s\"",
     237             :                             mystreamer->archive_name, m->pathname);
     238             : 
     239             :         /*
     240             :          * Checksum verification cannot be performed without proper context
     241             :          * initialization.
     242             :          */
     243           0 :         mystreamer->verify_checksum = false;
     244             :     }
     245             : }
     246             : 
     247             : /*
     248             :  * Computes the checksum incrementally for the received file content.
     249             :  *
     250             :  * Should have a correctly initialized checksum_ctx, which will be used for
     251             :  * incremental checksum computation.
     252             :  */
     253             : static void
     254      215520 : member_compute_checksum(astreamer *streamer, astreamer_member *member,
     255             :                         const char *data, int len)
     256             : {
     257      215520 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     258      215520 :     pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx;
     259      215520 :     manifest_file *m = mystreamer->mfile;
     260             : 
     261             :     Assert(mystreamer->verify_checksum);
     262             :     Assert(m->checksum_type == checksum_ctx->type);
     263             : 
     264             :     /*
     265             :      * Update the total count of computed checksum bytes so that we can
     266             :      * cross-check against the file size.
     267             :      */
     268      215520 :     mystreamer->checksum_bytes += len;
     269             : 
     270             :     /* Feed these bytes to the checksum calculation. */
     271      215520 :     if (pg_checksum_update(checksum_ctx, (uint8 *) data, len) < 0)
     272             :     {
     273           0 :         report_backup_error(mystreamer->context,
     274             :                             "could not update checksum of file \"%s\"",
     275             :                             m->pathname);
     276           0 :         mystreamer->verify_checksum = false;
     277             :     }
     278      215520 : }
     279             : 
     280             : /*
     281             :  * Perform the final computation and checksum verification after the entire
     282             :  * file content has been processed.
     283             :  */
     284             : static void
     285       35006 : member_verify_checksum(astreamer *streamer)
     286             : {
     287       35006 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     288       35006 :     manifest_file *m = mystreamer->mfile;
     289             :     uint8       checksumbuf[PG_CHECKSUM_MAX_LENGTH];
     290             :     int         checksumlen;
     291             : 
     292             :     Assert(mystreamer->verify_checksum);
     293             : 
     294             :     /*
     295             :      * It's unclear how this could fail, but let's check anyway to be safe.
     296             :      */
     297       35006 :     if (mystreamer->checksum_bytes != m->size)
     298             :     {
     299           0 :         report_backup_error(mystreamer->context,
     300             :                             "file \"%s\" in \"%s\" should contain %" PRIu64 " bytes, but read %" PRIu64 " bytes",
     301             :                             m->pathname, mystreamer->archive_name,
     302             :                             m->size,
     303             :                             mystreamer->checksum_bytes);
     304           0 :         return;
     305             :     }
     306             : 
     307             :     /* Get the final checksum. */
     308       35006 :     checksumlen = pg_checksum_final(mystreamer->checksum_ctx, checksumbuf);
     309       35006 :     if (checksumlen < 0)
     310             :     {
     311           0 :         report_backup_error(mystreamer->context,
     312             :                             "could not finalize checksum of file \"%s\"",
     313             :                             m->pathname);
     314           0 :         return;
     315             :     }
     316             : 
     317             :     /* And check it against the manifest. */
     318       35006 :     if (checksumlen != m->checksum_length)
     319           0 :         report_backup_error(mystreamer->context,
     320             :                             "file \"%s\" in \"%s\" has checksum of length %d, but expected %d",
     321             :                             m->pathname, mystreamer->archive_name,
     322             :                             m->checksum_length, checksumlen);
     323       35006 :     else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
     324          12 :         report_backup_error(mystreamer->context,
     325             :                             "checksum mismatch for file \"%s\" in \"%s\"",
     326             :                             m->pathname, mystreamer->archive_name);
     327             : }
     328             : 
     329             : /*
     330             :  * Stores the pg_control file contents into a local buffer; we need the entire
     331             :  * control file data for verification.
     332             :  */
     333             : static void
     334          98 : member_copy_control_data(astreamer *streamer, astreamer_member *member,
     335             :                          const char *data, int len)
     336             : {
     337          98 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     338             : 
     339             :     /* Should be here only for control file */
     340             :     Assert(mystreamer->verify_control_data);
     341             : 
     342             :     /*
     343             :      * Copy the new data into the control file buffer, but do not overrun the
     344             :      * buffer. Note that the on-disk length of the control file is expected to
     345             :      * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
     346             :      * shorter, just sizeof(ControlFileData).
     347             :      */
     348          98 :     if (mystreamer->control_file_bytes < sizeof(ControlFileData))
     349             :     {
     350             :         size_t      remaining;
     351             : 
     352          38 :         remaining = sizeof(ControlFileData) - mystreamer->control_file_bytes;
     353          38 :         memcpy(((char *) &mystreamer->control_file)
     354          38 :                + mystreamer->control_file_bytes,
     355          38 :                data, Min((size_t) len, remaining));
     356             :     }
     357             : 
     358             :     /* Remember how many bytes we saw, even if we didn't buffer them. */
     359          98 :     mystreamer->control_file_bytes += len;
     360          98 : }
     361             : 
     362             : /*
     363             :  * Performs the CRC calculation of pg_control data and then calls the routines
     364             :  * that execute the final verification of the control file information.
     365             :  */
     366             : static void
     367          38 : member_verify_control_data(astreamer *streamer)
     368             : {
     369          38 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     370          38 :     manifest_data *manifest = mystreamer->context->manifest;
     371             :     pg_crc32c   crc;
     372             : 
     373             :     /* Should be here only for control file */
     374             :     Assert(strcmp(mystreamer->mfile->pathname, XLOG_CONTROL_FILE) == 0);
     375             :     Assert(mystreamer->verify_control_data);
     376             : 
     377             :     /*
     378             :      * If the control file is not the right length, that's a big problem.
     379             :      *
     380             :      * NB: There is a theoretical overflow risk here from casting to int, but
     381             :      * it isn't likely to be a real problem and this enables us to match the
     382             :      * same format string that pg_rewind uses for this case. Perhaps both this
     383             :      * and pg_rewind should use an unsigned 64-bit value, but for now we don't
     384             :      * worry about it.
     385             :      */
     386          38 :     if (mystreamer->control_file_bytes != PG_CONTROL_FILE_SIZE)
     387           0 :         report_fatal_error("unexpected control file size %d, expected %d",
     388           0 :                            (int) mystreamer->control_file_bytes,
     389             :                            PG_CONTROL_FILE_SIZE);
     390             : 
     391             :     /* Compute the CRC. */
     392          38 :     INIT_CRC32C(crc);
     393          38 :     COMP_CRC32C(crc, &mystreamer->control_file,
     394             :                 offsetof(ControlFileData, crc));
     395          38 :     FIN_CRC32C(crc);
     396             : 
     397             :     /* Control file contents not meaningful if CRC is bad. */
     398          38 :     if (!EQ_CRC32C(crc, mystreamer->control_file.crc))
     399           0 :         report_fatal_error("%s: %s: CRC is incorrect",
     400             :                            mystreamer->archive_name,
     401           0 :                            mystreamer->mfile->pathname);
     402             : 
     403             :     /* Can't interpret control file if not current version. */
     404          38 :     if (mystreamer->control_file.pg_control_version != PG_CONTROL_VERSION)
     405           0 :         report_fatal_error("%s: %s: unexpected control file version",
     406             :                            mystreamer->archive_name,
     407           0 :                            mystreamer->mfile->pathname);
     408             : 
     409             :     /* System identifiers should match. */
     410          38 :     if (manifest->system_identifier !=
     411          38 :         mystreamer->control_file.system_identifier)
     412           2 :         report_fatal_error("%s: %s: manifest system identifier is %" PRIu64 ", but control file has %" PRIu64,
     413             :                            mystreamer->archive_name,
     414           2 :                            mystreamer->mfile->pathname,
     415             :                            manifest->system_identifier,
     416             :                            mystreamer->control_file.system_identifier);
     417          36 : }
     418             : 
     419             : /*
     420             :  * Reset flags and free memory allocations for member file verification.
     421             :  */
     422             : static void
     423       38048 : member_reset_info(astreamer *streamer)
     424             : {
     425       38048 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     426             : 
     427       38048 :     mystreamer->mfile = NULL;
     428       38048 :     mystreamer->verify_checksum = false;
     429       38048 :     mystreamer->verify_control_data = false;
     430       38048 :     mystreamer->checksum_bytes = 0;
     431       38048 :     mystreamer->control_file_bytes = 0;
     432       38048 : }

Generated by: LCOV version 1.14