LCOV - code coverage report
Current view: top level - src/bin/pg_dump - compress_io.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 73 87 83.9 %
Date: 2025-01-18 05:15:39 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * compress_io.c
       4             :  *   Routines for archivers to write an uncompressed or compressed data
       5             :  *   stream.
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * This file includes two APIs for dealing with compressed data. The first
      11             :  * provides more flexibility, using callbacks to read/write data from the
      12             :  * underlying stream. The second API is a wrapper around fopen and
      13             :  * friends, providing an interface similar to those, but abstracts away
      14             :  * the possible compression. The second API is aimed for the resulting
      15             :  * files to be easily manipulated with an external compression utility
      16             :  * program.
      17             :  *
      18             :  * Compressor API
      19             :  * --------------
      20             :  *
      21             :  *  The interface for writing to an archive consists of three functions:
      22             :  *  AllocateCompressor, writeData, and EndCompressor. First you call
      23             :  *  AllocateCompressor, then write all the data by calling writeData as many
      24             :  *  times as needed, and finally EndCompressor. writeData will call the
      25             :  *  WriteFunc that was provided to AllocateCompressor for each chunk of
      26             :  *  compressed data.
      27             :  *
      28             :  *  The interface for reading an archive consists of the same three functions:
      29             :  *  AllocateCompressor, readData, and EndCompressor. First you call
      30             :  *  AllocateCompressor, then read all the data by calling readData to read the
      31             :  *  whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
      32             :  *  returns the compressed data one chunk at a time. Then readData decompresses
      33             :  *  it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
      34             :  *  to signal EOF. The interface is the same for compressed and uncompressed
      35             :  *  streams.
      36             :  *
      37             :  * Compressed stream API
      38             :  * ----------------------
      39             :  *
      40             :  *  The compressed stream API is providing a set of function pointers for
      41             :  *  opening, reading, writing, and finally closing files. The implemented
      42             :  *  function pointers are documented in the corresponding header file and are
      43             :  *  common for all streams. It allows the caller to use the same functions for
      44             :  *  both compressed and uncompressed streams.
      45             :  *
      46             :  *  The interface consists of three functions, InitCompressFileHandle,
      47             :  *  InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
      48             :  *  compression is known, then start by calling InitCompressFileHandle,
      49             :  *  otherwise discover it by using InitDiscoverCompressFileHandle. Then call
      50             :  *  the function pointers as required for the read/write operations. Finally
      51             :  *  call EndCompressFileHandle to end the stream.
      52             :  *
      53             :  *  InitDiscoverCompressFileHandle tries to infer the compression by the
      54             :  *  filename suffix. If the suffix is not yet known then it tries to simply
      55             :  *  open the file and if it fails, it tries to open the same file with
      56             :  *  compressed suffixes (.gz, .lz4 and .zst, in this order).
      57             :  *
      58             :  * IDENTIFICATION
      59             :  *     src/bin/pg_dump/compress_io.c
      60             :  *
      61             :  *-------------------------------------------------------------------------
      62             :  */
      63             : #include "postgres_fe.h"
      64             : 
      65             : #include <sys/stat.h>
      66             : #include <unistd.h>
      67             : 
      68             : #include "compress_gzip.h"
      69             : #include "compress_io.h"
      70             : #include "compress_lz4.h"
      71             : #include "compress_none.h"
      72             : #include "compress_zstd.h"
      73             : 
      74             : /*----------------------
      75             :  * Generic functions
      76             :  *----------------------
      77             :  */
      78             : 
      79             : /*
      80             :  * Checks whether support for a compression algorithm is implemented in
      81             :  * pg_dump/restore.
      82             :  *
      83             :  * On success returns NULL, otherwise returns a malloc'ed string which can be
      84             :  * used by the caller in an error message.
      85             :  */
      86             : char *
      87         614 : supports_compression(const pg_compress_specification compression_spec)
      88             : {
      89         614 :     const pg_compress_algorithm algorithm = compression_spec.algorithm;
      90         614 :     bool        supported = false;
      91             : 
      92         614 :     if (algorithm == PG_COMPRESSION_NONE)
      93         438 :         supported = true;
      94             : #ifdef HAVE_LIBZ
      95         614 :     if (algorithm == PG_COMPRESSION_GZIP)
      96         158 :         supported = true;
      97             : #endif
      98             : #ifdef USE_LZ4
      99         614 :     if (algorithm == PG_COMPRESSION_LZ4)
     100          18 :         supported = true;
     101             : #endif
     102             : #ifdef USE_ZSTD
     103             :     if (algorithm == PG_COMPRESSION_ZSTD)
     104             :         supported = true;
     105             : #endif
     106             : 
     107         614 :     if (!supported)
     108           0 :         return psprintf(_("this build does not support compression with %s"),
     109             :                         get_compress_algorithm_name(algorithm));
     110             : 
     111         614 :     return NULL;
     112             : }
     113             : 
     114             : /*----------------------
     115             :  * Compressor API
     116             :  *----------------------
     117             :  */
     118             : 
     119             : /*
     120             :  * Allocate a new compressor.
     121             :  */
     122             : CompressorState *
     123         430 : AllocateCompressor(const pg_compress_specification compression_spec,
     124             :                    ReadFunc readF, WriteFunc writeF)
     125             : {
     126             :     CompressorState *cs;
     127             : 
     128         430 :     cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
     129         430 :     cs->readF = readF;
     130         430 :     cs->writeF = writeF;
     131             : 
     132         430 :     if (compression_spec.algorithm == PG_COMPRESSION_NONE)
     133           0 :         InitCompressorNone(cs, compression_spec);
     134         430 :     else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
     135         302 :         InitCompressorGzip(cs, compression_spec);
     136         128 :     else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
     137         128 :         InitCompressorLZ4(cs, compression_spec);
     138           0 :     else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
     139           0 :         InitCompressorZstd(cs, compression_spec);
     140             : 
     141         430 :     return cs;
     142             : }
     143             : 
     144             : /*
     145             :  * Terminate compression library context and flush its buffers.
     146             :  */
     147             : void
     148         430 : EndCompressor(ArchiveHandle *AH, CompressorState *cs)
     149             : {
     150         430 :     cs->end(AH, cs);
     151         430 :     pg_free(cs);
     152         430 : }
     153             : 
     154             : /*----------------------
     155             :  * Compressed stream API
     156             :  *----------------------
     157             :  */
     158             : 
     159             : /*
     160             :  * Private routines
     161             :  */
     162             : static int
     163        1044 : hasSuffix(const char *filename, const char *suffix)
     164             : {
     165        1044 :     int         filenamelen = strlen(filename);
     166        1044 :     int         suffixlen = strlen(suffix);
     167             : 
     168        1044 :     if (filenamelen < suffixlen)
     169           0 :         return 0;
     170             : 
     171        1044 :     return memcmp(&filename[filenamelen - suffixlen],
     172             :                   suffix,
     173        1044 :                   suffixlen) == 0;
     174             : }
     175             : 
     176             : /* free() without changing errno; useful in several places below */
     177             : static void
     178        2064 : free_keep_errno(void *p)
     179             : {
     180        2064 :     int         save_errno = errno;
     181             : 
     182        2064 :     free(p);
     183        2064 :     errno = save_errno;
     184        2064 : }
     185             : 
     186             : /*
     187             :  * Public interface
     188             :  */
     189             : 
     190             : /*
     191             :  * Initialize a compress file handle for the specified compression algorithm.
     192             :  */
     193             : CompressFileHandle *
     194        1382 : InitCompressFileHandle(const pg_compress_specification compression_spec)
     195             : {
     196             :     CompressFileHandle *CFH;
     197             : 
     198        1382 :     CFH = pg_malloc0(sizeof(CompressFileHandle));
     199             : 
     200        1382 :     if (compression_spec.algorithm == PG_COMPRESSION_NONE)
     201         760 :         InitCompressFileHandleNone(CFH, compression_spec);
     202         622 :     else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
     203         492 :         InitCompressFileHandleGzip(CFH, compression_spec);
     204         130 :     else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
     205         130 :         InitCompressFileHandleLZ4(CFH, compression_spec);
     206           0 :     else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
     207           0 :         InitCompressFileHandleZstd(CFH, compression_spec);
     208             : 
     209        1382 :     return CFH;
     210             : }
     211             : 
     212             : /*
     213             :  * Checks if a compressed file (with the specified extension) exists.
     214             :  *
     215             :  * The filename of the tested file is stored to fname buffer (the existing
     216             :  * buffer is freed, new buffer is allocated and returned through the pointer).
     217             :  */
     218             : static bool
     219         376 : check_compressed_file(const char *path, char **fname, char *ext)
     220             : {
     221         376 :     free_keep_errno(*fname);
     222         376 :     *fname = psprintf("%s.%s", path, ext);
     223         376 :     return (access(*fname, F_OK) == 0);
     224             : }
     225             : 
     226             : /*
     227             :  * Open a file for reading. 'path' is the file to open, and 'mode' should
     228             :  * be either "r" or "rb".
     229             :  *
     230             :  * If the file at 'path' contains the suffix of a supported compression method,
     231             :  * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
     232             :  * throughout. Otherwise the compression will be inferred by iteratively trying
     233             :  * to open the file at 'path', first as is, then by appending known compression
     234             :  * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
     235             :  * "foo.{gz,lz4,zst}", trying in that order.
     236             :  *
     237             :  * On failure, return NULL with an error code in errno.
     238             :  */
     239             : CompressFileHandle *
     240         348 : InitDiscoverCompressFileHandle(const char *path, const char *mode)
     241             : {
     242         348 :     CompressFileHandle *CFH = NULL;
     243             :     struct stat st;
     244             :     char       *fname;
     245         348 :     pg_compress_specification compression_spec = {0};
     246             : 
     247         348 :     compression_spec.algorithm = PG_COMPRESSION_NONE;
     248             : 
     249             :     Assert(strcmp(mode, PG_BINARY_R) == 0);
     250             : 
     251         348 :     fname = pg_strdup(path);
     252             : 
     253         348 :     if (hasSuffix(fname, ".gz"))
     254           0 :         compression_spec.algorithm = PG_COMPRESSION_GZIP;
     255         348 :     else if (hasSuffix(fname, ".lz4"))
     256           0 :         compression_spec.algorithm = PG_COMPRESSION_LZ4;
     257         348 :     else if (hasSuffix(fname, ".zst"))
     258           0 :         compression_spec.algorithm = PG_COMPRESSION_ZSTD;
     259             :     else
     260             :     {
     261         348 :         if (stat(path, &st) == 0)
     262          36 :             compression_spec.algorithm = PG_COMPRESSION_NONE;
     263         312 :         else if (check_compressed_file(path, &fname, "gz"))
     264         248 :             compression_spec.algorithm = PG_COMPRESSION_GZIP;
     265          64 :         else if (check_compressed_file(path, &fname, "lz4"))
     266          64 :             compression_spec.algorithm = PG_COMPRESSION_LZ4;
     267           0 :         else if (check_compressed_file(path, &fname, "zst"))
     268           0 :             compression_spec.algorithm = PG_COMPRESSION_ZSTD;
     269             :     }
     270             : 
     271         348 :     CFH = InitCompressFileHandle(compression_spec);
     272         348 :     if (!CFH->open_func(fname, -1, mode, CFH))
     273             :     {
     274           0 :         free_keep_errno(CFH);
     275           0 :         CFH = NULL;
     276             :     }
     277         348 :     free_keep_errno(fname);
     278             : 
     279         348 :     return CFH;
     280             : }
     281             : 
     282             : /*
     283             :  * Close an open file handle and release its memory.
     284             :  *
     285             :  * On failure, returns false and sets errno appropriately.
     286             :  */
     287             : bool
     288        1340 : EndCompressFileHandle(CompressFileHandle *CFH)
     289             : {
     290        1340 :     bool        ret = false;
     291             : 
     292        1340 :     if (CFH->private_data)
     293        1340 :         ret = CFH->close_func(CFH);
     294             : 
     295        1340 :     free_keep_errno(CFH);
     296             : 
     297        1340 :     return ret;
     298             : }

Generated by: LCOV version 1.14