Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * compress_io.c
4 : * Routines for archivers to write an uncompressed or compressed data
5 : * stream.
6 : *
7 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * This file includes two APIs for dealing with compressed data. The first
11 : * provides more flexibility, using callbacks to read/write data from the
12 : * underlying stream. The second API is a wrapper around fopen and
13 : * friends, providing an interface similar to those, but abstracts away
14 : * the possible compression. The second API is aimed for the resulting
15 : * files to be easily manipulated with an external compression utility
16 : * program.
17 : *
18 : * Compressor API
19 : * --------------
20 : *
21 : * The interface for writing to an archive consists of three functions:
22 : * AllocateCompressor, writeData, and EndCompressor. First you call
23 : * AllocateCompressor, then write all the data by calling writeData as many
24 : * times as needed, and finally EndCompressor. writeData will call the
25 : * WriteFunc that was provided to AllocateCompressor for each chunk of
26 : * compressed data.
27 : *
28 : * The interface for reading an archive consists of the same three functions:
29 : * AllocateCompressor, readData, and EndCompressor. First you call
30 : * AllocateCompressor, then read all the data by calling readData to read the
31 : * whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
32 : * returns the compressed data one chunk at a time. Then readData decompresses
33 : * it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
34 : * to signal EOF. The interface is the same for compressed and uncompressed
35 : * streams.
36 : *
37 : * Compressed stream API
38 : * ----------------------
39 : *
40 : * The compressed stream API is providing a set of function pointers for
41 : * opening, reading, writing, and finally closing files. The implemented
42 : * function pointers are documented in the corresponding header file and are
43 : * common for all streams. It allows the caller to use the same functions for
44 : * both compressed and uncompressed streams.
45 : *
46 : * The interface consists of three functions, InitCompressFileHandle,
47 : * InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
48 : * compression is known, then start by calling InitCompressFileHandle,
49 : * otherwise discover it by using InitDiscoverCompressFileHandle. Then call
50 : * the function pointers as required for the read/write operations. Finally
51 : * call EndCompressFileHandle to end the stream.
52 : *
53 : * InitDiscoverCompressFileHandle tries to infer the compression by the
54 : * filename suffix. If the suffix is not yet known then it tries to simply
55 : * open the file and if it fails, it tries to open the same file with
56 : * compressed suffixes (.gz, .lz4 and .zst, in this order).
57 : *
58 : * IDENTIFICATION
59 : * src/bin/pg_dump/compress_io.c
60 : *
61 : *-------------------------------------------------------------------------
62 : */
63 : #include "postgres_fe.h"
64 :
65 : #include <sys/stat.h>
66 : #include <unistd.h>
67 :
68 : #include "compress_gzip.h"
69 : #include "compress_io.h"
70 : #include "compress_lz4.h"
71 : #include "compress_none.h"
72 : #include "compress_zstd.h"
73 :
74 : /*----------------------
75 : * Generic functions
76 : *----------------------
77 : */
78 :
79 : /*
80 : * Checks whether support for a compression algorithm is implemented in
81 : * pg_dump/restore.
82 : *
83 : * On success returns NULL, otherwise returns a malloc'ed string which can be
84 : * used by the caller in an error message.
85 : */
86 : char *
87 614 : supports_compression(const pg_compress_specification compression_spec)
88 : {
89 614 : const pg_compress_algorithm algorithm = compression_spec.algorithm;
90 614 : bool supported = false;
91 :
92 614 : if (algorithm == PG_COMPRESSION_NONE)
93 438 : supported = true;
94 : #ifdef HAVE_LIBZ
95 614 : if (algorithm == PG_COMPRESSION_GZIP)
96 158 : supported = true;
97 : #endif
98 : #ifdef USE_LZ4
99 614 : if (algorithm == PG_COMPRESSION_LZ4)
100 18 : supported = true;
101 : #endif
102 : #ifdef USE_ZSTD
103 : if (algorithm == PG_COMPRESSION_ZSTD)
104 : supported = true;
105 : #endif
106 :
107 614 : if (!supported)
108 0 : return psprintf(_("this build does not support compression with %s"),
109 : get_compress_algorithm_name(algorithm));
110 :
111 614 : return NULL;
112 : }
113 :
114 : /*----------------------
115 : * Compressor API
116 : *----------------------
117 : */
118 :
119 : /*
120 : * Allocate a new compressor.
121 : */
122 : CompressorState *
123 430 : AllocateCompressor(const pg_compress_specification compression_spec,
124 : ReadFunc readF, WriteFunc writeF)
125 : {
126 : CompressorState *cs;
127 :
128 430 : cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
129 430 : cs->readF = readF;
130 430 : cs->writeF = writeF;
131 :
132 430 : if (compression_spec.algorithm == PG_COMPRESSION_NONE)
133 0 : InitCompressorNone(cs, compression_spec);
134 430 : else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
135 302 : InitCompressorGzip(cs, compression_spec);
136 128 : else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
137 128 : InitCompressorLZ4(cs, compression_spec);
138 0 : else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
139 0 : InitCompressorZstd(cs, compression_spec);
140 :
141 430 : return cs;
142 : }
143 :
144 : /*
145 : * Terminate compression library context and flush its buffers.
146 : */
147 : void
148 430 : EndCompressor(ArchiveHandle *AH, CompressorState *cs)
149 : {
150 430 : cs->end(AH, cs);
151 430 : pg_free(cs);
152 430 : }
153 :
154 : /*----------------------
155 : * Compressed stream API
156 : *----------------------
157 : */
158 :
159 : /*
160 : * Private routines
161 : */
162 : static int
163 1044 : hasSuffix(const char *filename, const char *suffix)
164 : {
165 1044 : int filenamelen = strlen(filename);
166 1044 : int suffixlen = strlen(suffix);
167 :
168 1044 : if (filenamelen < suffixlen)
169 0 : return 0;
170 :
171 1044 : return memcmp(&filename[filenamelen - suffixlen],
172 : suffix,
173 1044 : suffixlen) == 0;
174 : }
175 :
176 : /* free() without changing errno; useful in several places below */
177 : static void
178 2064 : free_keep_errno(void *p)
179 : {
180 2064 : int save_errno = errno;
181 :
182 2064 : free(p);
183 2064 : errno = save_errno;
184 2064 : }
185 :
186 : /*
187 : * Public interface
188 : */
189 :
190 : /*
191 : * Initialize a compress file handle for the specified compression algorithm.
192 : */
193 : CompressFileHandle *
194 1382 : InitCompressFileHandle(const pg_compress_specification compression_spec)
195 : {
196 : CompressFileHandle *CFH;
197 :
198 1382 : CFH = pg_malloc0(sizeof(CompressFileHandle));
199 :
200 1382 : if (compression_spec.algorithm == PG_COMPRESSION_NONE)
201 760 : InitCompressFileHandleNone(CFH, compression_spec);
202 622 : else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
203 492 : InitCompressFileHandleGzip(CFH, compression_spec);
204 130 : else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
205 130 : InitCompressFileHandleLZ4(CFH, compression_spec);
206 0 : else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
207 0 : InitCompressFileHandleZstd(CFH, compression_spec);
208 :
209 1382 : return CFH;
210 : }
211 :
212 : /*
213 : * Checks if a compressed file (with the specified extension) exists.
214 : *
215 : * The filename of the tested file is stored to fname buffer (the existing
216 : * buffer is freed, new buffer is allocated and returned through the pointer).
217 : */
218 : static bool
219 376 : check_compressed_file(const char *path, char **fname, char *ext)
220 : {
221 376 : free_keep_errno(*fname);
222 376 : *fname = psprintf("%s.%s", path, ext);
223 376 : return (access(*fname, F_OK) == 0);
224 : }
225 :
226 : /*
227 : * Open a file for reading. 'path' is the file to open, and 'mode' should
228 : * be either "r" or "rb".
229 : *
230 : * If the file at 'path' contains the suffix of a supported compression method,
231 : * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
232 : * throughout. Otherwise the compression will be inferred by iteratively trying
233 : * to open the file at 'path', first as is, then by appending known compression
234 : * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
235 : * "foo.{gz,lz4,zst}", trying in that order.
236 : *
237 : * On failure, return NULL with an error code in errno.
238 : */
239 : CompressFileHandle *
240 348 : InitDiscoverCompressFileHandle(const char *path, const char *mode)
241 : {
242 348 : CompressFileHandle *CFH = NULL;
243 : struct stat st;
244 : char *fname;
245 348 : pg_compress_specification compression_spec = {0};
246 :
247 348 : compression_spec.algorithm = PG_COMPRESSION_NONE;
248 :
249 : Assert(strcmp(mode, PG_BINARY_R) == 0);
250 :
251 348 : fname = pg_strdup(path);
252 :
253 348 : if (hasSuffix(fname, ".gz"))
254 0 : compression_spec.algorithm = PG_COMPRESSION_GZIP;
255 348 : else if (hasSuffix(fname, ".lz4"))
256 0 : compression_spec.algorithm = PG_COMPRESSION_LZ4;
257 348 : else if (hasSuffix(fname, ".zst"))
258 0 : compression_spec.algorithm = PG_COMPRESSION_ZSTD;
259 : else
260 : {
261 348 : if (stat(path, &st) == 0)
262 36 : compression_spec.algorithm = PG_COMPRESSION_NONE;
263 312 : else if (check_compressed_file(path, &fname, "gz"))
264 248 : compression_spec.algorithm = PG_COMPRESSION_GZIP;
265 64 : else if (check_compressed_file(path, &fname, "lz4"))
266 64 : compression_spec.algorithm = PG_COMPRESSION_LZ4;
267 0 : else if (check_compressed_file(path, &fname, "zst"))
268 0 : compression_spec.algorithm = PG_COMPRESSION_ZSTD;
269 : }
270 :
271 348 : CFH = InitCompressFileHandle(compression_spec);
272 348 : if (!CFH->open_func(fname, -1, mode, CFH))
273 : {
274 0 : free_keep_errno(CFH);
275 0 : CFH = NULL;
276 : }
277 348 : free_keep_errno(fname);
278 :
279 348 : return CFH;
280 : }
281 :
282 : /*
283 : * Close an open file handle and release its memory.
284 : *
285 : * On failure, returns false and sets errno appropriately.
286 : */
287 : bool
288 1340 : EndCompressFileHandle(CompressFileHandle *CFH)
289 : {
290 1340 : bool ret = false;
291 :
292 1340 : if (CFH->private_data)
293 1340 : ret = CFH->close_func(CFH);
294 :
295 1340 : free_keep_errno(CFH);
296 :
297 1340 : return ret;
298 : }
|