LCOV - code coverage report
Current view: top level - src/bin/pg_dump - compress_io.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 73 87 83.9 %
Date: 2025-02-22 07:14:56 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * compress_io.c
       4             :  *   Routines for archivers to write an uncompressed or compressed data
       5             :  *   stream.
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * This file includes two APIs for dealing with compressed data. The first
      11             :  * provides more flexibility, using callbacks to read/write data from the
      12             :  * underlying stream. The second API is a wrapper around fopen and
      13             :  * friends, providing an interface similar to those, but abstracts away
      14             :  * the possible compression. The second API is aimed for the resulting
      15             :  * files to be easily manipulated with an external compression utility
      16             :  * program.
      17             :  *
      18             :  * Compressor API
      19             :  * --------------
      20             :  *
      21             :  *  The interface for writing to an archive consists of three functions:
      22             :  *  AllocateCompressor, writeData, and EndCompressor. First you call
      23             :  *  AllocateCompressor, then write all the data by calling writeData as many
      24             :  *  times as needed, and finally EndCompressor. writeData will call the
      25             :  *  WriteFunc that was provided to AllocateCompressor for each chunk of
      26             :  *  compressed data.
      27             :  *
      28             :  *  The interface for reading an archive consists of the same three functions:
      29             :  *  AllocateCompressor, readData, and EndCompressor. First you call
      30             :  *  AllocateCompressor, then read all the data by calling readData to read the
      31             :  *  whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
      32             :  *  returns the compressed data one chunk at a time. Then readData decompresses
      33             :  *  it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
      34             :  *  to signal EOF. The interface is the same for compressed and uncompressed
      35             :  *  streams.
      36             :  *
      37             :  * Compressed stream API
      38             :  * ----------------------
      39             :  *
      40             :  *  The compressed stream API is providing a set of function pointers for
      41             :  *  opening, reading, writing, and finally closing files. The implemented
      42             :  *  function pointers are documented in the corresponding header file and are
      43             :  *  common for all streams. It allows the caller to use the same functions for
      44             :  *  both compressed and uncompressed streams.
      45             :  *
      46             :  *  The interface consists of three functions, InitCompressFileHandle,
      47             :  *  InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
      48             :  *  compression is known, then start by calling InitCompressFileHandle,
      49             :  *  otherwise discover it by using InitDiscoverCompressFileHandle. Then call
      50             :  *  the function pointers as required for the read/write operations. Finally
      51             :  *  call EndCompressFileHandle to end the stream.
      52             :  *
      53             :  *  InitDiscoverCompressFileHandle tries to infer the compression by the
      54             :  *  filename suffix. If the suffix is not yet known then it tries to simply
      55             :  *  open the file and if it fails, it tries to open the same file with
      56             :  *  compressed suffixes (.gz, .lz4 and .zst, in this order).
      57             :  *
      58             :  * IDENTIFICATION
      59             :  *     src/bin/pg_dump/compress_io.c
      60             :  *
      61             :  *-------------------------------------------------------------------------
      62             :  */
      63             : #include "postgres_fe.h"
      64             : 
      65             : #include <sys/stat.h>
      66             : #include <unistd.h>
      67             : 
      68             : #include "compress_gzip.h"
      69             : #include "compress_io.h"
      70             : #include "compress_lz4.h"
      71             : #include "compress_none.h"
      72             : #include "compress_zstd.h"
      73             : 
      74             : /*----------------------
      75             :  * Generic functions
      76             :  *----------------------
      77             :  */
      78             : 
      79             : /*
      80             :  * Checks whether support for a compression algorithm is implemented in
      81             :  * pg_dump/restore.
      82             :  *
      83             :  * On success returns NULL, otherwise returns a malloc'ed string which can be
      84             :  * used by the caller in an error message.
      85             :  */
      86             : char *
      87         634 : supports_compression(const pg_compress_specification compression_spec)
      88             : {
      89         634 :     const pg_compress_algorithm algorithm = compression_spec.algorithm;
      90         634 :     bool        supported = false;
      91             : 
      92         634 :     if (algorithm == PG_COMPRESSION_NONE)
      93         450 :         supported = true;
      94             : #ifdef HAVE_LIBZ
      95         634 :     if (algorithm == PG_COMPRESSION_GZIP)
      96         166 :         supported = true;
      97             : #endif
      98             : #ifdef USE_LZ4
      99         634 :     if (algorithm == PG_COMPRESSION_LZ4)
     100          18 :         supported = true;
     101             : #endif
     102             : #ifdef USE_ZSTD
     103             :     if (algorithm == PG_COMPRESSION_ZSTD)
     104             :         supported = true;
     105             : #endif
     106             : 
     107         634 :     if (!supported)
     108           0 :         return psprintf(_("this build does not support compression with %s"),
     109             :                         get_compress_algorithm_name(algorithm));
     110             : 
     111         634 :     return NULL;
     112             : }
     113             : 
     114             : /*----------------------
     115             :  * Compressor API
     116             :  *----------------------
     117             :  */
     118             : 
     119             : /*
     120             :  * Allocate a new compressor.
     121             :  */
     122             : CompressorState *
     123         442 : AllocateCompressor(const pg_compress_specification compression_spec,
     124             :                    ReadFunc readF, WriteFunc writeF)
     125             : {
     126             :     CompressorState *cs;
     127             : 
     128         442 :     cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
     129         442 :     cs->readF = readF;
     130         442 :     cs->writeF = writeF;
     131             : 
     132         442 :     if (compression_spec.algorithm == PG_COMPRESSION_NONE)
     133           0 :         InitCompressorNone(cs, compression_spec);
     134         442 :     else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
     135         310 :         InitCompressorGzip(cs, compression_spec);
     136         132 :     else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
     137         132 :         InitCompressorLZ4(cs, compression_spec);
     138           0 :     else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
     139           0 :         InitCompressorZstd(cs, compression_spec);
     140             : 
     141         442 :     return cs;
     142             : }
     143             : 
     144             : /*
     145             :  * Terminate compression library context and flush its buffers.
     146             :  */
     147             : void
     148         442 : EndCompressor(ArchiveHandle *AH, CompressorState *cs)
     149             : {
     150         442 :     cs->end(AH, cs);
     151         442 :     pg_free(cs);
     152         442 : }
     153             : 
     154             : /*----------------------
     155             :  * Compressed stream API
     156             :  *----------------------
     157             :  */
     158             : 
     159             : /*
     160             :  * Private routines
     161             :  */
     162             : static int
     163        1068 : hasSuffix(const char *filename, const char *suffix)
     164             : {
     165        1068 :     int         filenamelen = strlen(filename);
     166        1068 :     int         suffixlen = strlen(suffix);
     167             : 
     168        1068 :     if (filenamelen < suffixlen)
     169           0 :         return 0;
     170             : 
     171        1068 :     return memcmp(&filename[filenamelen - suffixlen],
     172             :                   suffix,
     173        1068 :                   suffixlen) == 0;
     174             : }
     175             : 
     176             : /* free() without changing errno; useful in several places below */
     177             : static void
     178        2122 : free_keep_errno(void *p)
     179             : {
     180        2122 :     int         save_errno = errno;
     181             : 
     182        2122 :     free(p);
     183        2122 :     errno = save_errno;
     184        2122 : }
     185             : 
     186             : /*
     187             :  * Public interface
     188             :  */
     189             : 
     190             : /*
     191             :  * Initialize a compress file handle for the specified compression algorithm.
     192             :  */
     193             : CompressFileHandle *
     194        1422 : InitCompressFileHandle(const pg_compress_specification compression_spec)
     195             : {
     196             :     CompressFileHandle *CFH;
     197             : 
     198        1422 :     CFH = pg_malloc0(sizeof(CompressFileHandle));
     199             : 
     200        1422 :     if (compression_spec.algorithm == PG_COMPRESSION_NONE)
     201         784 :         InitCompressFileHandleNone(CFH, compression_spec);
     202         638 :     else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
     203         504 :         InitCompressFileHandleGzip(CFH, compression_spec);
     204         134 :     else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
     205         134 :         InitCompressFileHandleLZ4(CFH, compression_spec);
     206           0 :     else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
     207           0 :         InitCompressFileHandleZstd(CFH, compression_spec);
     208             : 
     209        1422 :     return CFH;
     210             : }
     211             : 
     212             : /*
     213             :  * Checks if a compressed file (with the specified extension) exists.
     214             :  *
     215             :  * The filename of the tested file is stored to fname buffer (the existing
     216             :  * buffer is freed, new buffer is allocated and returned through the pointer).
     217             :  */
     218             : static bool
     219         386 : check_compressed_file(const char *path, char **fname, char *ext)
     220             : {
     221         386 :     free_keep_errno(*fname);
     222         386 :     *fname = psprintf("%s.%s", path, ext);
     223         386 :     return (access(*fname, F_OK) == 0);
     224             : }
     225             : 
     226             : /*
     227             :  * Open a file for reading. 'path' is the file to open, and 'mode' should
     228             :  * be either "r" or "rb".
     229             :  *
     230             :  * If the file at 'path' contains the suffix of a supported compression method,
     231             :  * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
     232             :  * throughout. Otherwise the compression will be inferred by iteratively trying
     233             :  * to open the file at 'path', first as is, then by appending known compression
     234             :  * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
     235             :  * "foo.{gz,lz4,zst}", trying in that order.
     236             :  *
     237             :  * On failure, return NULL with an error code in errno.
     238             :  */
     239             : CompressFileHandle *
     240         356 : InitDiscoverCompressFileHandle(const char *path, const char *mode)
     241             : {
     242         356 :     CompressFileHandle *CFH = NULL;
     243             :     struct stat st;
     244             :     char       *fname;
     245         356 :     pg_compress_specification compression_spec = {0};
     246             : 
     247         356 :     compression_spec.algorithm = PG_COMPRESSION_NONE;
     248             : 
     249             :     Assert(strcmp(mode, PG_BINARY_R) == 0);
     250             : 
     251         356 :     fname = pg_strdup(path);
     252             : 
     253         356 :     if (hasSuffix(fname, ".gz"))
     254           0 :         compression_spec.algorithm = PG_COMPRESSION_GZIP;
     255         356 :     else if (hasSuffix(fname, ".lz4"))
     256           0 :         compression_spec.algorithm = PG_COMPRESSION_LZ4;
     257         356 :     else if (hasSuffix(fname, ".zst"))
     258           0 :         compression_spec.algorithm = PG_COMPRESSION_ZSTD;
     259             :     else
     260             :     {
     261         356 :         if (stat(path, &st) == 0)
     262          36 :             compression_spec.algorithm = PG_COMPRESSION_NONE;
     263         320 :         else if (check_compressed_file(path, &fname, "gz"))
     264         254 :             compression_spec.algorithm = PG_COMPRESSION_GZIP;
     265          66 :         else if (check_compressed_file(path, &fname, "lz4"))
     266          66 :             compression_spec.algorithm = PG_COMPRESSION_LZ4;
     267           0 :         else if (check_compressed_file(path, &fname, "zst"))
     268           0 :             compression_spec.algorithm = PG_COMPRESSION_ZSTD;
     269             :     }
     270             : 
     271         356 :     CFH = InitCompressFileHandle(compression_spec);
     272         356 :     if (!CFH->open_func(fname, -1, mode, CFH))
     273             :     {
     274           0 :         free_keep_errno(CFH);
     275           0 :         CFH = NULL;
     276             :     }
     277         356 :     free_keep_errno(fname);
     278             : 
     279         356 :     return CFH;
     280             : }
     281             : 
     282             : /*
     283             :  * Close an open file handle and release its memory.
     284             :  *
     285             :  * On failure, returns false and sets errno appropriately.
     286             :  */
     287             : bool
     288        1380 : EndCompressFileHandle(CompressFileHandle *CFH)
     289             : {
     290        1380 :     bool        ret = false;
     291             : 
     292        1380 :     if (CFH->private_data)
     293        1380 :         ret = CFH->close_func(CFH);
     294             : 
     295        1380 :     free_keep_errno(CFH);
     296             : 
     297        1380 :     return ret;
     298             : }

Generated by: LCOV version 1.14