LCOV - code coverage report
Current view: top level - src/bin/pg_dump - compress_io.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 73 87 83.9 %
Date: 2024-04-23 10:11:02 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * compress_io.c
       4             :  *   Routines for archivers to write an uncompressed or compressed data
       5             :  *   stream.
       6             :  *
       7             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * This file includes two APIs for dealing with compressed data. The first
      11             :  * provides more flexibility, using callbacks to read/write data from the
      12             :  * underlying stream. The second API is a wrapper around fopen and
      13             :  * friends, providing an interface similar to those, but abstracts away
      14             :  * the possible compression. The second API is aimed for the resulting
      15             :  * files to be easily manipulated with an external compression utility
      16             :  * program.
      17             :  *
      18             :  * Compressor API
      19             :  * --------------
      20             :  *
      21             :  *  The interface for writing to an archive consists of three functions:
      22             :  *  AllocateCompressor, writeData, and EndCompressor. First you call
      23             :  *  AllocateCompressor, then write all the data by calling writeData as many
      24             :  *  times as needed, and finally EndCompressor. writeData will call the
      25             :  *  WriteFunc that was provided to AllocateCompressor for each chunk of
      26             :  *  compressed data.
      27             :  *
      28             :  *  The interface for reading an archive consists of the same three functions:
      29             :  *  AllocateCompressor, readData, and EndCompressor. First you call
      30             :  *  AllocateCompressor, then read all the data by calling readData to read the
      31             :  *  whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
      32             :  *  returns the compressed data one chunk at a time. Then readData decompresses
      33             :  *  it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
      34             :  *  to signal EOF. The interface is the same for compressed and uncompressed
      35             :  *  streams.
      36             :  *
      37             :  * Compressed stream API
      38             :  * ----------------------
      39             :  *
      40             :  *  The compressed stream API is providing a set of function pointers for
      41             :  *  opening, reading, writing, and finally closing files. The implemented
      42             :  *  function pointers are documented in the corresponding header file and are
      43             :  *  common for all streams. It allows the caller to use the same functions for
      44             :  *  both compressed and uncompressed streams.
      45             :  *
      46             :  *  The interface consists of three functions, InitCompressFileHandle,
      47             :  *  InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
      48             :  *  compression is known, then start by calling InitCompressFileHandle,
      49             :  *  otherwise discover it by using InitDiscoverCompressFileHandle. Then call
      50             :  *  the function pointers as required for the read/write operations. Finally
      51             :  *  call EndCompressFileHandle to end the stream.
      52             :  *
      53             :  *  InitDiscoverCompressFileHandle tries to infer the compression by the
      54             :  *  filename suffix. If the suffix is not yet known then it tries to simply
      55             :  *  open the file and if it fails, it tries to open the same file with
      56             :  *  compressed suffixes (.gz, .lz4 and .zst, in this order).
      57             :  *
      58             :  * IDENTIFICATION
      59             :  *     src/bin/pg_dump/compress_io.c
      60             :  *
      61             :  *-------------------------------------------------------------------------
      62             :  */
      63             : #include "postgres_fe.h"
      64             : 
      65             : #include <sys/stat.h>
      66             : #include <unistd.h>
      67             : 
      68             : #include "compress_gzip.h"
      69             : #include "compress_io.h"
      70             : #include "compress_lz4.h"
      71             : #include "compress_none.h"
      72             : #include "compress_zstd.h"
      73             : #include "pg_backup_utils.h"
      74             : 
      75             : /*----------------------
      76             :  * Generic functions
      77             :  *----------------------
      78             :  */
      79             : 
      80             : /*
      81             :  * Checks whether support for a compression algorithm is implemented in
      82             :  * pg_dump/restore.
      83             :  *
      84             :  * On success returns NULL, otherwise returns a malloc'ed string which can be
      85             :  * used by the caller in an error message.
      86             :  */
      87             : char *
      88         606 : supports_compression(const pg_compress_specification compression_spec)
      89             : {
      90         606 :     const pg_compress_algorithm algorithm = compression_spec.algorithm;
      91         606 :     bool        supported = false;
      92             : 
      93         606 :     if (algorithm == PG_COMPRESSION_NONE)
      94         430 :         supported = true;
      95             : #ifdef HAVE_LIBZ
      96         606 :     if (algorithm == PG_COMPRESSION_GZIP)
      97         158 :         supported = true;
      98             : #endif
      99             : #ifdef USE_LZ4
     100         606 :     if (algorithm == PG_COMPRESSION_LZ4)
     101          18 :         supported = true;
     102             : #endif
     103             : #ifdef USE_ZSTD
     104             :     if (algorithm == PG_COMPRESSION_ZSTD)
     105             :         supported = true;
     106             : #endif
     107             : 
     108         606 :     if (!supported)
     109           0 :         return psprintf(_("this build does not support compression with %s"),
     110             :                         get_compress_algorithm_name(algorithm));
     111             : 
     112         606 :     return NULL;
     113             : }
     114             : 
     115             : /*----------------------
     116             :  * Compressor API
     117             :  *----------------------
     118             :  */
     119             : 
     120             : /*
     121             :  * Allocate a new compressor.
     122             :  */
     123             : CompressorState *
     124         430 : AllocateCompressor(const pg_compress_specification compression_spec,
     125             :                    ReadFunc readF, WriteFunc writeF)
     126             : {
     127             :     CompressorState *cs;
     128             : 
     129         430 :     cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
     130         430 :     cs->readF = readF;
     131         430 :     cs->writeF = writeF;
     132             : 
     133         430 :     if (compression_spec.algorithm == PG_COMPRESSION_NONE)
     134           0 :         InitCompressorNone(cs, compression_spec);
     135         430 :     else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
     136         302 :         InitCompressorGzip(cs, compression_spec);
     137         128 :     else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
     138         128 :         InitCompressorLZ4(cs, compression_spec);
     139           0 :     else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
     140           0 :         InitCompressorZstd(cs, compression_spec);
     141             : 
     142         430 :     return cs;
     143             : }
     144             : 
     145             : /*
     146             :  * Terminate compression library context and flush its buffers.
     147             :  */
     148             : void
     149         430 : EndCompressor(ArchiveHandle *AH, CompressorState *cs)
     150             : {
     151         430 :     cs->end(AH, cs);
     152         430 :     pg_free(cs);
     153         430 : }
     154             : 
     155             : /*----------------------
     156             :  * Compressed stream API
     157             :  *----------------------
     158             :  */
     159             : 
     160             : /*
     161             :  * Private routines
     162             :  */
     163             : static int
     164        1044 : hasSuffix(const char *filename, const char *suffix)
     165             : {
     166        1044 :     int         filenamelen = strlen(filename);
     167        1044 :     int         suffixlen = strlen(suffix);
     168             : 
     169        1044 :     if (filenamelen < suffixlen)
     170           0 :         return 0;
     171             : 
     172        1044 :     return memcmp(&filename[filenamelen - suffixlen],
     173             :                   suffix,
     174        1044 :                   suffixlen) == 0;
     175             : }
     176             : 
     177             : /* free() without changing errno; useful in several places below */
     178             : static void
     179        2056 : free_keep_errno(void *p)
     180             : {
     181        2056 :     int         save_errno = errno;
     182             : 
     183        2056 :     free(p);
     184        2056 :     errno = save_errno;
     185        2056 : }
     186             : 
     187             : /*
     188             :  * Public interface
     189             :  */
     190             : 
     191             : /*
     192             :  * Initialize a compress file handle for the specified compression algorithm.
     193             :  */
     194             : CompressFileHandle *
     195        1374 : InitCompressFileHandle(const pg_compress_specification compression_spec)
     196             : {
     197             :     CompressFileHandle *CFH;
     198             : 
     199        1374 :     CFH = pg_malloc0(sizeof(CompressFileHandle));
     200             : 
     201        1374 :     if (compression_spec.algorithm == PG_COMPRESSION_NONE)
     202         752 :         InitCompressFileHandleNone(CFH, compression_spec);
     203         622 :     else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
     204         492 :         InitCompressFileHandleGzip(CFH, compression_spec);
     205         130 :     else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
     206         130 :         InitCompressFileHandleLZ4(CFH, compression_spec);
     207           0 :     else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
     208           0 :         InitCompressFileHandleZstd(CFH, compression_spec);
     209             : 
     210        1374 :     return CFH;
     211             : }
     212             : 
     213             : /*
     214             :  * Checks if a compressed file (with the specified extension) exists.
     215             :  *
     216             :  * The filename of the tested file is stored to fname buffer (the existing
     217             :  * buffer is freed, new buffer is allocated and returned through the pointer).
     218             :  */
     219             : static bool
     220         376 : check_compressed_file(const char *path, char **fname, char *ext)
     221             : {
     222         376 :     free_keep_errno(*fname);
     223         376 :     *fname = psprintf("%s.%s", path, ext);
     224         376 :     return (access(*fname, F_OK) == 0);
     225             : }
     226             : 
     227             : /*
     228             :  * Open a file for reading. 'path' is the file to open, and 'mode' should
     229             :  * be either "r" or "rb".
     230             :  *
     231             :  * If the file at 'path' contains the suffix of a supported compression method,
     232             :  * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
     233             :  * throughout. Otherwise the compression will be inferred by iteratively trying
     234             :  * to open the file at 'path', first as is, then by appending known compression
     235             :  * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
     236             :  * "foo.{gz,lz4,zst}", trying in that order.
     237             :  *
     238             :  * On failure, return NULL with an error code in errno.
     239             :  */
     240             : CompressFileHandle *
     241         348 : InitDiscoverCompressFileHandle(const char *path, const char *mode)
     242             : {
     243         348 :     CompressFileHandle *CFH = NULL;
     244             :     struct stat st;
     245             :     char       *fname;
     246         348 :     pg_compress_specification compression_spec = {0};
     247             : 
     248         348 :     compression_spec.algorithm = PG_COMPRESSION_NONE;
     249             : 
     250             :     Assert(strcmp(mode, PG_BINARY_R) == 0);
     251             : 
     252         348 :     fname = pg_strdup(path);
     253             : 
     254         348 :     if (hasSuffix(fname, ".gz"))
     255           0 :         compression_spec.algorithm = PG_COMPRESSION_GZIP;
     256         348 :     else if (hasSuffix(fname, ".lz4"))
     257           0 :         compression_spec.algorithm = PG_COMPRESSION_LZ4;
     258         348 :     else if (hasSuffix(fname, ".zst"))
     259           0 :         compression_spec.algorithm = PG_COMPRESSION_ZSTD;
     260             :     else
     261             :     {
     262         348 :         if (stat(path, &st) == 0)
     263          36 :             compression_spec.algorithm = PG_COMPRESSION_NONE;
     264         312 :         else if (check_compressed_file(path, &fname, "gz"))
     265         248 :             compression_spec.algorithm = PG_COMPRESSION_GZIP;
     266          64 :         else if (check_compressed_file(path, &fname, "lz4"))
     267          64 :             compression_spec.algorithm = PG_COMPRESSION_LZ4;
     268           0 :         else if (check_compressed_file(path, &fname, "zst"))
     269           0 :             compression_spec.algorithm = PG_COMPRESSION_ZSTD;
     270             :     }
     271             : 
     272         348 :     CFH = InitCompressFileHandle(compression_spec);
     273         348 :     if (!CFH->open_func(fname, -1, mode, CFH))
     274             :     {
     275           0 :         free_keep_errno(CFH);
     276           0 :         CFH = NULL;
     277             :     }
     278         348 :     free_keep_errno(fname);
     279             : 
     280         348 :     return CFH;
     281             : }
     282             : 
     283             : /*
     284             :  * Close an open file handle and release its memory.
     285             :  *
     286             :  * On failure, returns false and sets errno appropriately.
     287             :  */
     288             : bool
     289        1332 : EndCompressFileHandle(CompressFileHandle *CFH)
     290             : {
     291        1332 :     bool        ret = false;
     292             : 
     293        1332 :     if (CFH->private_data)
     294        1332 :         ret = CFH->close_func(CFH);
     295             : 
     296        1332 :     free_keep_errno(CFH);
     297             : 
     298        1332 :     return ret;
     299             : }

Generated by: LCOV version 1.14