LCOV - code coverage report
Current view: top level - src/bin/pg_combinebackup - load_manifest.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 67 91 73.6 %
Date: 2024-05-09 04:10:49 Functions: 6 7 85.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * Load data from a backup manifest into memory.
       4             :  *
       5             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       6             :  * Portions Copyright (c) 1994, Regents of the University of California
       7             :  *
       8             :  * src/bin/pg_combinebackup/load_manifest.c
       9             :  *
      10             :  *-------------------------------------------------------------------------
      11             :  */
      12             : 
      13             : #include "postgres_fe.h"
      14             : 
      15             : #include <sys/stat.h>
      16             : #include <unistd.h>
      17             : 
      18             : #include "common/hashfn_unstable.h"
      19             : #include "common/logging.h"
      20             : #include "common/parse_manifest.h"
      21             : #include "load_manifest.h"
      22             : 
      23             : /*
      24             :  * For efficiency, we'd like our hash table containing information about the
      25             :  * manifest to start out with approximately the correct number of entries.
      26             :  * There's no way to know the exact number of entries without reading the whole
      27             :  * file, but we can get an estimate by dividing the file size by the estimated
      28             :  * number of bytes per line.
      29             :  *
      30             :  * This could be off by about a factor of two in either direction, because the
      31             :  * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
      32             :  * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
      33             :  * might be no checksum at all.
      34             :  */
      35             : #define ESTIMATED_BYTES_PER_MANIFEST_LINE   100
      36             : 
      37             : /*
      38             :  * size of json chunk to be read in
      39             :  *
      40             :  */
      41             : #define READ_CHUNK_SIZE (128  * 1024)
      42             : 
      43             : /*
      44             :  * Define a hash table which we can use to store information about the files
      45             :  * mentioned in the backup manifest.
      46             :  */
      47             : #define SH_PREFIX       manifest_files
      48             : #define SH_ELEMENT_TYPE manifest_file
      49             : #define SH_KEY_TYPE     char *
      50             : #define SH_KEY          pathname
      51             : #define SH_HASH_KEY(tb, key)    hash_string(key)
      52             : #define SH_EQUAL(tb, a, b)      (strcmp(a, b) == 0)
      53             : #define SH_SCOPE        extern
      54             : #define SH_RAW_ALLOCATOR    pg_malloc0
      55             : #define SH_DEFINE
      56             : #include "lib/simplehash.h"
      57             : 
      58             : static void combinebackup_version_cb(JsonManifestParseContext *context,
      59             :                                      int manifest_version);
      60             : static void combinebackup_system_identifier_cb(JsonManifestParseContext *context,
      61             :                                                uint64 manifest_system_identifier);
      62             : static void combinebackup_per_file_cb(JsonManifestParseContext *context,
      63             :                                       char *pathname, size_t size,
      64             :                                       pg_checksum_type checksum_type,
      65             :                                       int checksum_length,
      66             :                                       uint8 *checksum_payload);
      67             : static void combinebackup_per_wal_range_cb(JsonManifestParseContext *context,
      68             :                                            TimeLineID tli,
      69             :                                            XLogRecPtr start_lsn,
      70             :                                            XLogRecPtr end_lsn);
      71             : static void report_manifest_error(JsonManifestParseContext *context,
      72             :                                   const char *fmt,...)
      73             :             pg_attribute_printf(2, 3) pg_attribute_noreturn();
      74             : 
      75             : /*
      76             :  * Load backup_manifest files from an array of backups and produces an array
      77             :  * of manifest_data objects.
      78             :  *
      79             :  * NB: Since load_backup_manifest() can return NULL, the resulting array could
      80             :  * contain NULL entries.
      81             :  */
      82             : manifest_data **
      83          22 : load_backup_manifests(int n_backups, char **backup_directories)
      84             : {
      85             :     manifest_data **result;
      86             :     int         i;
      87             : 
      88          22 :     result = pg_malloc(sizeof(manifest_data *) * n_backups);
      89          66 :     for (i = 0; i < n_backups; ++i)
      90          44 :         result[i] = load_backup_manifest(backup_directories[i]);
      91             : 
      92          22 :     return result;
      93             : }
      94             : 
      95             : /*
      96             :  * Parse the backup_manifest file in the named backup directory. Construct a
      97             :  * hash table with information about all the files it mentions, and a linked
      98             :  * list of all the WAL ranges it mentions.
      99             :  *
     100             :  * If the backup_manifest file simply doesn't exist, logs a warning and returns
     101             :  * NULL. Any other error, or any error parsing the contents of the file, is
     102             :  * fatal.
     103             :  */
     104             : manifest_data *
     105          44 : load_backup_manifest(char *backup_directory)
     106             : {
     107             :     char        pathname[MAXPGPATH];
     108             :     int         fd;
     109             :     struct stat statbuf;
     110             :     off_t       estimate;
     111             :     uint32      initial_size;
     112             :     manifest_files_hash *ht;
     113             :     char       *buffer;
     114             :     int         rc;
     115             :     JsonManifestParseContext context;
     116             :     manifest_data *result;
     117          44 :     int         chunk_size = READ_CHUNK_SIZE;
     118             : 
     119             :     /* Open the manifest file. */
     120          44 :     snprintf(pathname, MAXPGPATH, "%s/backup_manifest", backup_directory);
     121          44 :     if ((fd = open(pathname, O_RDONLY | PG_BINARY, 0)) < 0)
     122             :     {
     123           0 :         if (errno == ENOENT)
     124             :         {
     125           0 :             pg_log_warning("\"%s\" does not exist", pathname);
     126           0 :             return NULL;
     127             :         }
     128           0 :         pg_fatal("could not open file \"%s\": %m", pathname);
     129             :     }
     130             : 
     131             :     /* Figure out how big the manifest is. */
     132          44 :     if (fstat(fd, &statbuf) != 0)
     133           0 :         pg_fatal("could not stat file \"%s\": %m", pathname);
     134             : 
     135             :     /* Guess how large to make the hash table based on the manifest size. */
     136          44 :     estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE;
     137          44 :     initial_size = Min(PG_UINT32_MAX, Max(estimate, 256));
     138             : 
     139             :     /* Create the hash table. */
     140          44 :     ht = manifest_files_create(initial_size, NULL);
     141             : 
     142          44 :     result = pg_malloc0(sizeof(manifest_data));
     143          44 :     result->files = ht;
     144          44 :     context.private_data = result;
     145          44 :     context.version_cb = combinebackup_version_cb;
     146          44 :     context.system_identifier_cb = combinebackup_system_identifier_cb;
     147          44 :     context.per_file_cb = combinebackup_per_file_cb;
     148          44 :     context.per_wal_range_cb = combinebackup_per_wal_range_cb;
     149          44 :     context.error_cb = report_manifest_error;
     150             : 
     151             :     /*
     152             :      * Parse the file, in chunks if necessary.
     153             :      */
     154          44 :     if (statbuf.st_size <= chunk_size)
     155             :     {
     156           0 :         buffer = pg_malloc(statbuf.st_size);
     157           0 :         rc = read(fd, buffer, statbuf.st_size);
     158           0 :         if (rc != statbuf.st_size)
     159             :         {
     160           0 :             if (rc < 0)
     161           0 :                 pg_fatal("could not read file \"%s\": %m", pathname);
     162             :             else
     163           0 :                 pg_fatal("could not read file \"%s\": read %d of %lld",
     164             :                          pathname, rc, (long long int) statbuf.st_size);
     165             :         }
     166             : 
     167             :         /* Close the manifest file. */
     168           0 :         close(fd);
     169             : 
     170             :         /* Parse the manifest. */
     171           0 :         json_parse_manifest(&context, buffer, statbuf.st_size);
     172             :     }
     173             :     else
     174             :     {
     175          44 :         int         bytes_left = statbuf.st_size;
     176             :         JsonManifestParseIncrementalState *inc_state;
     177             : 
     178          44 :         inc_state = json_parse_manifest_incremental_init(&context);
     179             : 
     180          44 :         buffer = pg_malloc(chunk_size + 1);
     181             : 
     182         132 :         while (bytes_left > 0)
     183             :         {
     184          88 :             int         bytes_to_read = chunk_size;
     185             : 
     186             :             /*
     187             :              * Make sure that the last chunk is sufficiently large. (i.e. at
     188             :              * least half the chunk size) so that it will contain fully the
     189             :              * piece at the end with the checksum.
     190             :              */
     191          88 :             if (bytes_left < chunk_size)
     192          44 :                 bytes_to_read = bytes_left;
     193          44 :             else if (bytes_left < 2 * chunk_size)
     194          44 :                 bytes_to_read = bytes_left / 2;
     195          88 :             rc = read(fd, buffer, bytes_to_read);
     196          88 :             if (rc != bytes_to_read)
     197             :             {
     198           0 :                 if (rc < 0)
     199           0 :                     pg_fatal("could not read file \"%s\": %m", pathname);
     200             :                 else
     201           0 :                     pg_fatal("could not read file \"%s\": read %lld of %lld",
     202             :                              pathname,
     203             :                              (long long int) (statbuf.st_size + rc - bytes_left),
     204             :                              (long long int) statbuf.st_size);
     205             :             }
     206          88 :             bytes_left -= rc;
     207          88 :             json_parse_manifest_incremental_chunk(
     208             :                                                   inc_state, buffer, rc, bytes_left == 0);
     209             :         }
     210             : 
     211             :         /* Release the incremental state memory */
     212          44 :         json_parse_manifest_incremental_shutdown(inc_state);
     213             : 
     214          44 :         close(fd);
     215             :     }
     216             : 
     217             :     /* All done. */
     218          44 :     pfree(buffer);
     219          44 :     return result;
     220             : }
     221             : 
     222             : /*
     223             :  * Report an error while parsing the manifest.
     224             :  *
     225             :  * We consider all such errors to be fatal errors. The manifest parser
     226             :  * expects this function not to return.
     227             :  */
     228             : static void
     229           0 : report_manifest_error(JsonManifestParseContext *context, const char *fmt,...)
     230             : {
     231             :     va_list     ap;
     232             : 
     233           0 :     va_start(ap, fmt);
     234           0 :     pg_log_generic_v(PG_LOG_ERROR, PG_LOG_PRIMARY, gettext(fmt), ap);
     235           0 :     va_end(ap);
     236             : 
     237           0 :     exit(1);
     238             : }
     239             : 
     240             : /*
     241             :  * This callback to validate the manifest version number for incremental backup.
     242             :  */
     243             : static void
     244          44 : combinebackup_version_cb(JsonManifestParseContext *context,
     245             :                          int manifest_version)
     246             : {
     247             :     /* Incremental backups supported on manifest version 2 or later */
     248          44 :     if (manifest_version == 1)
     249           0 :         pg_fatal("backup manifest version 1 does not support incremental backup");
     250          44 : }
     251             : 
     252             : /*
     253             :  * Record system identifier extracted from the backup manifest.
     254             :  */
     255             : static void
     256          44 : combinebackup_system_identifier_cb(JsonManifestParseContext *context,
     257             :                                    uint64 manifest_system_identifier)
     258             : {
     259          44 :     manifest_data *manifest = context->private_data;
     260             : 
     261             :     /* Validation will be at the later stage */
     262          44 :     manifest->system_identifier = manifest_system_identifier;
     263          44 : }
     264             : 
     265             : /*
     266             :  * Record details extracted from the backup manifest for one file.
     267             :  */
     268             : static void
     269       45056 : combinebackup_per_file_cb(JsonManifestParseContext *context,
     270             :                           char *pathname, size_t size,
     271             :                           pg_checksum_type checksum_type,
     272             :                           int checksum_length, uint8 *checksum_payload)
     273             : {
     274       45056 :     manifest_data *manifest = context->private_data;
     275             :     manifest_file *m;
     276             :     bool        found;
     277             : 
     278             :     /* Make a new entry in the hash table for this file. */
     279       45056 :     m = manifest_files_insert(manifest->files, pathname, &found);
     280       45056 :     if (found)
     281           0 :         pg_fatal("duplicate path name in backup manifest: \"%s\"", pathname);
     282             : 
     283             :     /* Initialize the entry. */
     284       45056 :     m->size = size;
     285       45056 :     m->checksum_type = checksum_type;
     286       45056 :     m->checksum_length = checksum_length;
     287       45056 :     m->checksum_payload = checksum_payload;
     288       45056 : }
     289             : 
     290             : /*
     291             :  * Record details extracted from the backup manifest for one WAL range.
     292             :  */
     293             : static void
     294          44 : combinebackup_per_wal_range_cb(JsonManifestParseContext *context,
     295             :                                TimeLineID tli,
     296             :                                XLogRecPtr start_lsn, XLogRecPtr end_lsn)
     297             : {
     298          44 :     manifest_data *manifest = context->private_data;
     299             :     manifest_wal_range *range;
     300             : 
     301             :     /* Allocate and initialize a struct describing this WAL range. */
     302          44 :     range = palloc(sizeof(manifest_wal_range));
     303          44 :     range->tli = tli;
     304          44 :     range->start_lsn = start_lsn;
     305          44 :     range->end_lsn = end_lsn;
     306          44 :     range->prev = manifest->last_wal_range;
     307          44 :     range->next = NULL;
     308             : 
     309             :     /* Add it to the end of the list. */
     310          44 :     if (manifest->first_wal_range == NULL)
     311          44 :         manifest->first_wal_range = range;
     312             :     else
     313           0 :         manifest->last_wal_range->next = range;
     314          44 :     manifest->last_wal_range = range;
     315          44 : }

Generated by: LCOV version 1.14