LCOV - code coverage report
Current view: top level - src/bin/pg_verifybackup - astreamer_verify.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 121 140 86.4 %
Date: 2025-01-18 04:15:08 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * astreamer_verify.c
       4             :  *
       5             :  * Archive streamer for verification of a tar format backup (including
       6             :  * compressed tar format backups).
       7             :  *
       8             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       9             :  *
      10             :  * src/bin/pg_verifybackup/astreamer_verify.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres_fe.h"
      16             : 
      17             : #include "catalog/pg_control.h"
      18             : #include "pg_verifybackup.h"
      19             : 
      20             : typedef struct astreamer_verify
      21             : {
      22             :     /* These fields don't change once initialized. */
      23             :     astreamer   base;
      24             :     verifier_context *context;
      25             :     char       *archive_name;
      26             :     Oid         tblspc_oid;
      27             : 
      28             :     /* These fields change for each archive member. */
      29             :     manifest_file *mfile;
      30             :     bool        verify_checksum;
      31             :     bool        verify_control_data;
      32             :     pg_checksum_context *checksum_ctx;
      33             :     uint64      checksum_bytes;
      34             :     ControlFileData control_file;
      35             :     uint64      control_file_bytes;
      36             : } astreamer_verify;
      37             : 
      38             : static void astreamer_verify_content(astreamer *streamer,
      39             :                                      astreamer_member *member,
      40             :                                      const char *data, int len,
      41             :                                      astreamer_archive_context context);
      42             : static void astreamer_verify_finalize(astreamer *streamer);
      43             : static void astreamer_verify_free(astreamer *streamer);
      44             : 
      45             : static void member_verify_header(astreamer *streamer, astreamer_member *member);
      46             : static void member_compute_checksum(astreamer *streamer,
      47             :                                     astreamer_member *member,
      48             :                                     const char *data, int len);
      49             : static void member_verify_checksum(astreamer *streamer);
      50             : static void member_copy_control_data(astreamer *streamer,
      51             :                                      astreamer_member *member,
      52             :                                      const char *data, int len);
      53             : static void member_verify_control_data(astreamer *streamer);
      54             : static void member_reset_info(astreamer *streamer);
      55             : 
      56             : static const astreamer_ops astreamer_verify_ops = {
      57             :     .content = astreamer_verify_content,
      58             :     .finalize = astreamer_verify_finalize,
      59             :     .free = astreamer_verify_free
      60             : };
      61             : 
      62             : /*
      63             :  * Create an astreamer that can verify a tar file.
      64             :  */
      65             : astreamer *
      66          58 : astreamer_verify_content_new(astreamer *next, verifier_context *context,
      67             :                              char *archive_name, Oid tblspc_oid)
      68             : {
      69             :     astreamer_verify *streamer;
      70             : 
      71          58 :     streamer = palloc0(sizeof(astreamer_verify));
      72          58 :     *((const astreamer_ops **) &streamer->base.bbs_ops) =
      73             :         &astreamer_verify_ops;
      74             : 
      75          58 :     streamer->base.bbs_next = next;
      76          58 :     streamer->context = context;
      77          58 :     streamer->archive_name = archive_name;
      78          58 :     streamer->tblspc_oid = tblspc_oid;
      79             : 
      80          58 :     if (!context->skip_checksums)
      81          58 :         streamer->checksum_ctx = pg_malloc(sizeof(pg_checksum_context));
      82             : 
      83          58 :     return &streamer->base;
      84             : }
      85             : 
      86             : /*
      87             :  * Main entry point of the archive streamer for verifying tar members.
      88             :  */
      89             : static void
      90      423936 : astreamer_verify_content(astreamer *streamer, astreamer_member *member,
      91             :                          const char *data, int len,
      92             :                          astreamer_archive_context context)
      93             : {
      94      423936 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
      95             : 
      96             :     Assert(context != ASTREAMER_UNKNOWN);
      97             : 
      98      423936 :     switch (context)
      99             :     {
     100       38050 :         case ASTREAMER_MEMBER_HEADER:
     101             :             /* Initial setup plus decide which checks to perform. */
     102       38050 :             member_verify_header(streamer, member);
     103       38050 :             break;
     104             : 
     105      347780 :         case ASTREAMER_MEMBER_CONTENTS:
     106             :             /* Incremental work required to verify file contents. */
     107      347780 :             if (mystreamer->verify_checksum)
     108      214190 :                 member_compute_checksum(streamer, member, data, len);
     109      347780 :             if (mystreamer->verify_control_data)
     110          98 :                 member_copy_control_data(streamer, member, data, len);
     111      347780 :             break;
     112             : 
     113       38050 :         case ASTREAMER_MEMBER_TRAILER:
     114             :             /* Now we've got all the file data. */
     115       38050 :             if (mystreamer->verify_checksum)
     116       35006 :                 member_verify_checksum(streamer);
     117       38050 :             if (mystreamer->verify_control_data)
     118          38 :                 member_verify_control_data(streamer);
     119             : 
     120             :             /* Reset for next archive member. */
     121       38048 :             member_reset_info(streamer);
     122       38048 :             break;
     123             : 
     124          56 :         case ASTREAMER_ARCHIVE_TRAILER:
     125          56 :             break;
     126             : 
     127           0 :         default:
     128             :             /* Shouldn't happen. */
     129           0 :             pg_fatal("unexpected state while parsing tar file");
     130             :     }
     131      423934 : }
     132             : 
     133             : /*
     134             :  * End-of-stream processing for a astreamer_verify stream.
     135             :  */
     136             : static void
     137          56 : astreamer_verify_finalize(astreamer *streamer)
     138             : {
     139             :     Assert(streamer->bbs_next == NULL);
     140          56 : }
     141             : 
     142             : /*
     143             :  * Free memory associated with a astreamer_verify stream.
     144             :  */
     145             : static void
     146          56 : astreamer_verify_free(astreamer *streamer)
     147             : {
     148          56 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     149             : 
     150          56 :     if (mystreamer->checksum_ctx)
     151          56 :         pfree(mystreamer->checksum_ctx);
     152             : 
     153          56 :     pfree(streamer);
     154          56 : }
     155             : 
     156             : /*
     157             :  * Prepare to validate the next archive member.
     158             :  */
     159             : static void
     160       38050 : member_verify_header(astreamer *streamer, astreamer_member *member)
     161             : {
     162       38050 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     163             :     manifest_file *m;
     164             :     char        pathname[MAXPGPATH];
     165             : 
     166             :     /* We are only interested in normal files. */
     167       38050 :     if (member->is_directory || member->is_link)
     168        1104 :         return;
     169             : 
     170             :     /*
     171             :      * The backup manifest stores a relative path to the base directory for
     172             :      * files belonging to a tablespace, while the tablespace backup tar
     173             :      * archive does not include this path.
     174             :      *
     175             :      * The pathname taken from the tar file could contain '.' or '..'
     176             :      * references, which we want to remove, so apply canonicalize_path(). It
     177             :      * could also be an absolute pathname, which we want to treat as a
     178             :      * relative path, so prepend "./" if we're not adding a tablespace prefix
     179             :      * to make sure that canonicalize_path() does what we want.
     180             :      */
     181       37016 :     if (OidIsValid(mystreamer->tblspc_oid))
     182          20 :         snprintf(pathname, MAXPGPATH, "%s/%u/%s",
     183          20 :                  "pg_tblspc", mystreamer->tblspc_oid, member->pathname);
     184             :     else
     185       36996 :         snprintf(pathname, MAXPGPATH, "./%s", member->pathname);
     186       37016 :     canonicalize_path(pathname);
     187             : 
     188             :     /* Ignore any files that are listed in the ignore list. */
     189       37016 :     if (should_ignore_relpath(mystreamer->context, pathname))
     190          62 :         return;
     191             : 
     192             :     /* Check whether there's an entry in the manifest hash. */
     193       36954 :     m = manifest_files_lookup(mystreamer->context->manifest->files, pathname);
     194       36954 :     if (m == NULL)
     195             :     {
     196           4 :         report_backup_error(mystreamer->context,
     197             :                             "\"%s\" is present in \"%s\" but not in the manifest",
     198           4 :                             member->pathname, mystreamer->archive_name);
     199           4 :         return;
     200             :     }
     201       36950 :     mystreamer->mfile = m;
     202             : 
     203             :     /* Flag this entry as having been encountered in a tar archive. */
     204       36950 :     m->matched = true;
     205             : 
     206             :     /* Check that the size matches. */
     207       36950 :     if (m->size != member->size)
     208             :     {
     209           4 :         report_backup_error(mystreamer->context,
     210             :                             "\"%s\" has size %llu in \"%s\" but size %llu in the manifest",
     211           4 :                             member->pathname,
     212           4 :                             (unsigned long long) member->size,
     213             :                             mystreamer->archive_name,
     214           4 :                             (unsigned long long) m->size);
     215           4 :         m->bad = true;
     216           4 :         return;
     217             :     }
     218             : 
     219             :     /*
     220             :      * Decide whether we're going to verify the checksum for this file, and
     221             :      * whether we're going to perform the additional validation that we do
     222             :      * only for the control file.
     223             :      */
     224       36946 :     mystreamer->verify_checksum =
     225       36946 :         (!mystreamer->context->skip_checksums && should_verify_checksum(m));
     226       36946 :     mystreamer->verify_control_data =
     227       73892 :         mystreamer->context->manifest->version != 1 &&
     228       36946 :         !m->bad && strcmp(m->pathname, "global/pg_control") == 0;
     229             : 
     230             :     /* If we're going to verify the checksum, initial a checksum context. */
     231       71952 :     if (mystreamer->verify_checksum &&
     232       35006 :         pg_checksum_init(mystreamer->checksum_ctx, m->checksum_type) < 0)
     233             :     {
     234           0 :         report_backup_error(mystreamer->context,
     235             :                             "%s: could not initialize checksum of file \"%s\"",
     236             :                             mystreamer->archive_name, m->pathname);
     237             : 
     238             :         /*
     239             :          * Checksum verification cannot be performed without proper context
     240             :          * initialization.
     241             :          */
     242           0 :         mystreamer->verify_checksum = false;
     243             :     }
     244             : }
     245             : 
     246             : /*
     247             :  * Computes the checksum incrementally for the received file content.
     248             :  *
     249             :  * Should have a correctly initialized checksum_ctx, which will be used for
     250             :  * incremental checksum computation.
     251             :  */
     252             : static void
     253      214190 : member_compute_checksum(astreamer *streamer, astreamer_member *member,
     254             :                         const char *data, int len)
     255             : {
     256      214190 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     257      214190 :     pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx;
     258      214190 :     manifest_file *m = mystreamer->mfile;
     259             : 
     260             :     Assert(mystreamer->verify_checksum);
     261             :     Assert(m->checksum_type == checksum_ctx->type);
     262             : 
     263             :     /*
     264             :      * Update the total count of computed checksum bytes so that we can
     265             :      * cross-check against the file size.
     266             :      */
     267      214190 :     mystreamer->checksum_bytes += len;
     268             : 
     269             :     /* Feed these bytes to the checksum calculation. */
     270      214190 :     if (pg_checksum_update(checksum_ctx, (uint8 *) data, len) < 0)
     271             :     {
     272           0 :         report_backup_error(mystreamer->context,
     273             :                             "could not update checksum of file \"%s\"",
     274             :                             m->pathname);
     275           0 :         mystreamer->verify_checksum = false;
     276             :     }
     277      214190 : }
     278             : 
     279             : /*
     280             :  * Perform the final computation and checksum verification after the entire
     281             :  * file content has been processed.
     282             :  */
     283             : static void
     284       35006 : member_verify_checksum(astreamer *streamer)
     285             : {
     286       35006 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     287       35006 :     manifest_file *m = mystreamer->mfile;
     288             :     uint8       checksumbuf[PG_CHECKSUM_MAX_LENGTH];
     289             :     int         checksumlen;
     290             : 
     291             :     Assert(mystreamer->verify_checksum);
     292             : 
     293             :     /*
     294             :      * It's unclear how this could fail, but let's check anyway to be safe.
     295             :      */
     296       35006 :     if (mystreamer->checksum_bytes != m->size)
     297             :     {
     298           0 :         report_backup_error(mystreamer->context,
     299             :                             "file \"%s\" in \"%s\" should contain %llu bytes, but read %llu bytes",
     300             :                             m->pathname, mystreamer->archive_name,
     301           0 :                             (unsigned long long) m->size,
     302           0 :                             (unsigned long long) mystreamer->checksum_bytes);
     303           0 :         return;
     304             :     }
     305             : 
     306             :     /* Get the final checksum. */
     307       35006 :     checksumlen = pg_checksum_final(mystreamer->checksum_ctx, checksumbuf);
     308       35006 :     if (checksumlen < 0)
     309             :     {
     310           0 :         report_backup_error(mystreamer->context,
     311             :                             "could not finalize checksum of file \"%s\"",
     312             :                             m->pathname);
     313           0 :         return;
     314             :     }
     315             : 
     316             :     /* And check it against the manifest. */
     317       35006 :     if (checksumlen != m->checksum_length)
     318           0 :         report_backup_error(mystreamer->context,
     319             :                             "file \"%s\" in \"%s\" has checksum of length %d, but expected %d",
     320             :                             m->pathname, mystreamer->archive_name,
     321             :                             m->checksum_length, checksumlen);
     322       35006 :     else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
     323          12 :         report_backup_error(mystreamer->context,
     324             :                             "checksum mismatch for file \"%s\" in \"%s\"",
     325             :                             m->pathname, mystreamer->archive_name);
     326             : }
     327             : 
     328             : /*
     329             :  * Stores the pg_control file contents into a local buffer; we need the entire
     330             :  * control file data for verification.
     331             :  */
     332             : static void
     333          98 : member_copy_control_data(astreamer *streamer, astreamer_member *member,
     334             :                          const char *data, int len)
     335             : {
     336          98 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     337             : 
     338             :     /* Should be here only for control file */
     339             :     Assert(mystreamer->verify_control_data);
     340             : 
     341             :     /*
     342             :      * Copy the new data into the control file buffer, but do not overrun the
     343             :      * buffer. Note that the on-disk length of the control file is expected to
     344             :      * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
     345             :      * shorter, just sizeof(ControlFileData).
     346             :      */
     347          98 :     if (mystreamer->control_file_bytes < sizeof(ControlFileData))
     348             :     {
     349             :         size_t      remaining;
     350             : 
     351          38 :         remaining = sizeof(ControlFileData) - mystreamer->control_file_bytes;
     352          38 :         memcpy(((char *) &mystreamer->control_file)
     353          38 :                + mystreamer->control_file_bytes,
     354          38 :                data, Min((size_t) len, remaining));
     355             :     }
     356             : 
     357             :     /* Remember how many bytes we saw, even if we didn't buffer them. */
     358          98 :     mystreamer->control_file_bytes += len;
     359          98 : }
     360             : 
     361             : /*
     362             :  * Performs the CRC calculation of pg_control data and then calls the routines
     363             :  * that execute the final verification of the control file information.
     364             :  */
     365             : static void
     366          38 : member_verify_control_data(astreamer *streamer)
     367             : {
     368          38 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     369          38 :     manifest_data *manifest = mystreamer->context->manifest;
     370             :     pg_crc32c   crc;
     371             : 
     372             :     /* Should be here only for control file */
     373             :     Assert(strcmp(mystreamer->mfile->pathname, "global/pg_control") == 0);
     374             :     Assert(mystreamer->verify_control_data);
     375             : 
     376             :     /*
     377             :      * If the control file is not the right length, that's a big problem.
     378             :      *
     379             :      * NB: There is a theoretical overflow risk here from casting to int, but
     380             :      * it isn't likely to be a real problem and this enables us to match the
     381             :      * same format string that pg_rewind uses for this case. Perhaps both this
     382             :      * and pg_rewind should use an unsigned 64-bit value, but for now we don't
     383             :      * worry about it.
     384             :      */
     385          38 :     if (mystreamer->control_file_bytes != PG_CONTROL_FILE_SIZE)
     386           0 :         report_fatal_error("unexpected control file size %d, expected %d",
     387           0 :                            (int) mystreamer->control_file_bytes,
     388             :                            PG_CONTROL_FILE_SIZE);
     389             : 
     390             :     /* Compute the CRC. */
     391          38 :     INIT_CRC32C(crc);
     392          38 :     COMP_CRC32C(crc, &mystreamer->control_file,
     393             :                 offsetof(ControlFileData, crc));
     394          38 :     FIN_CRC32C(crc);
     395             : 
     396             :     /* Control file contents not meaningful if CRC is bad. */
     397          38 :     if (!EQ_CRC32C(crc, mystreamer->control_file.crc))
     398           0 :         report_fatal_error("%s: %s: CRC is incorrect",
     399             :                            mystreamer->archive_name,
     400           0 :                            mystreamer->mfile->pathname);
     401             : 
     402             :     /* Can't interpret control file if not current version. */
     403          38 :     if (mystreamer->control_file.pg_control_version != PG_CONTROL_VERSION)
     404           0 :         report_fatal_error("%s: %s: unexpected control file version",
     405             :                            mystreamer->archive_name,
     406           0 :                            mystreamer->mfile->pathname);
     407             : 
     408             :     /* System identifiers should match. */
     409          38 :     if (manifest->system_identifier !=
     410          38 :         mystreamer->control_file.system_identifier)
     411           2 :         report_fatal_error("%s: %s: manifest system identifier is %llu, but control file has %llu",
     412             :                            mystreamer->archive_name,
     413           2 :                            mystreamer->mfile->pathname,
     414           2 :                            (unsigned long long) manifest->system_identifier,
     415           2 :                            (unsigned long long) mystreamer->control_file.system_identifier);
     416          36 : }
     417             : 
     418             : /*
     419             :  * Reset flags and free memory allocations for member file verification.
     420             :  */
     421             : static void
     422       38048 : member_reset_info(astreamer *streamer)
     423             : {
     424       38048 :     astreamer_verify *mystreamer = (astreamer_verify *) streamer;
     425             : 
     426       38048 :     mystreamer->mfile = NULL;
     427       38048 :     mystreamer->verify_checksum = false;
     428       38048 :     mystreamer->verify_control_data = false;
     429       38048 :     mystreamer->checksum_bytes = 0;
     430       38048 :     mystreamer->control_file_bytes = 0;
     431       38048 : }

Generated by: LCOV version 1.14