Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * compression.c
4 : *
5 : * Shared code for compression methods and specifications.
6 : *
7 : * A compression specification specifies the parameters that should be used
8 : * when performing compression with a specific algorithm. The simplest
9 : * possible compression specification is an integer, which sets the
10 : * compression level.
11 : *
12 : * Otherwise, a compression specification is a comma-separated list of items,
13 : * each having the form keyword or keyword=value.
14 : *
15 : * Currently, the supported keywords are "level", "long", and "workers".
16 : *
17 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
18 : *
19 : * IDENTIFICATION
20 : * src/common/compression.c
21 : *-------------------------------------------------------------------------
22 : */
23 :
24 : #ifndef FRONTEND
25 : #include "postgres.h"
26 : #else
27 : #include "postgres_fe.h"
28 : #endif
29 :
30 : #ifdef USE_ZSTD
31 : #include <zstd.h>
32 : #endif
33 : #ifdef HAVE_LIBZ
34 : #include <zlib.h>
35 : #endif
36 :
37 : #include "common/compression.h"
38 :
39 : static int expect_integer_value(char *keyword, char *value,
40 : pg_compress_specification *result);
41 : static bool expect_boolean_value(char *keyword, char *value,
42 : pg_compress_specification *result);
43 :
44 : /*
45 : * Look up a compression algorithm by archive file extension. Returns true and
46 : * sets *algorithm if the extension is recognized. Otherwise returns false.
47 : */
48 : bool
49 373 : parse_tar_compress_algorithm(const char *fname, pg_compress_algorithm *algorithm)
50 : {
51 373 : size_t fname_len = strlen(fname);
52 :
53 373 : if (fname_len >= 4 &&
54 372 : strcmp(fname + fname_len - 4, ".tar") == 0)
55 266 : *algorithm = PG_COMPRESSION_NONE;
56 107 : else if (fname_len >= 4 &&
57 106 : strcmp(fname + fname_len - 4, ".tgz") == 0)
58 0 : *algorithm = PG_COMPRESSION_GZIP;
59 107 : else if (fname_len >= 7 &&
60 106 : strcmp(fname + fname_len - 7, ".tar.gz") == 0)
61 22 : *algorithm = PG_COMPRESSION_GZIP;
62 85 : else if (fname_len >= 8 &&
63 84 : strcmp(fname + fname_len - 8, ".tar.lz4") == 0)
64 11 : *algorithm = PG_COMPRESSION_LZ4;
65 74 : else if (fname_len >= 8 &&
66 73 : strcmp(fname + fname_len - 8, ".tar.zst") == 0)
67 0 : *algorithm = PG_COMPRESSION_ZSTD;
68 : else
69 74 : return false;
70 :
71 299 : return true;
72 : }
73 :
74 : /*
75 : * Look up a compression algorithm by name. Returns true and sets *algorithm
76 : * if the name is recognized. Otherwise returns false.
77 : */
78 : bool
79 504 : parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm)
80 : {
81 504 : if (strcmp(name, "none") == 0)
82 344 : *algorithm = PG_COMPRESSION_NONE;
83 160 : else if (strcmp(name, "gzip") == 0)
84 147 : *algorithm = PG_COMPRESSION_GZIP;
85 13 : else if (strcmp(name, "lz4") == 0)
86 9 : *algorithm = PG_COMPRESSION_LZ4;
87 4 : else if (strcmp(name, "zstd") == 0)
88 0 : *algorithm = PG_COMPRESSION_ZSTD;
89 : else
90 4 : return false;
91 500 : return true;
92 : }
93 :
94 : /*
95 : * Get the human-readable name corresponding to a particular compression
96 : * algorithm.
97 : */
98 : const char *
99 14 : get_compress_algorithm_name(pg_compress_algorithm algorithm)
100 : {
101 14 : switch (algorithm)
102 : {
103 3 : case PG_COMPRESSION_NONE:
104 3 : return "none";
105 10 : case PG_COMPRESSION_GZIP:
106 10 : return "gzip";
107 1 : case PG_COMPRESSION_LZ4:
108 1 : return "lz4";
109 0 : case PG_COMPRESSION_ZSTD:
110 0 : return "zstd";
111 : /* no default, to provoke compiler warnings if values are added */
112 : }
113 : Assert(false);
114 0 : return "???"; /* placate compiler */
115 : }
116 :
117 : /*
118 : * Parse a compression specification for a specified algorithm.
119 : *
120 : * See the file header comments for a brief description of what a compression
121 : * specification is expected to look like.
122 : *
123 : * On return, all fields of the result object will be initialized.
124 : * In particular, result->parse_error will be NULL if no errors occurred
125 : * during parsing, and will otherwise contain an appropriate error message.
126 : * The caller may free this error message string using pfree, if desired.
127 : * Note, however, even if there's no parse error, the string might not make
128 : * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK.
129 : *
130 : * The compression level is assigned by default if not directly specified
131 : * by the specification.
132 : *
133 : * Use validate_compress_specification() to find out whether a compression
134 : * specification is semantically sensible.
135 : */
136 : void
137 498 : parse_compress_specification(pg_compress_algorithm algorithm, char *specification,
138 : pg_compress_specification *result)
139 : {
140 : int bare_level;
141 : char *bare_level_endp;
142 :
143 : /* Initial setup of result object. */
144 498 : result->algorithm = algorithm;
145 498 : result->options = 0;
146 498 : result->parse_error = NULL;
147 :
148 : /*
149 : * Assign a default level depending on the compression method. This may
150 : * be enforced later.
151 : */
152 498 : switch (result->algorithm)
153 : {
154 342 : case PG_COMPRESSION_NONE:
155 342 : result->level = 0;
156 342 : break;
157 9 : case PG_COMPRESSION_LZ4:
158 : #ifdef USE_LZ4
159 9 : result->level = 0; /* fast compression mode */
160 : #else
161 : result->parse_error =
162 : psprintf(_("this build does not support compression with %s"),
163 : "LZ4");
164 : #endif
165 9 : break;
166 0 : case PG_COMPRESSION_ZSTD:
167 : #ifdef USE_ZSTD
168 : result->level = ZSTD_CLEVEL_DEFAULT;
169 : #else
170 0 : result->parse_error =
171 0 : psprintf(_("this build does not support compression with %s"),
172 : "ZSTD");
173 : #endif
174 0 : break;
175 147 : case PG_COMPRESSION_GZIP:
176 : #ifdef HAVE_LIBZ
177 147 : result->level = Z_DEFAULT_COMPRESSION;
178 : #else
179 : result->parse_error =
180 : psprintf(_("this build does not support compression with %s"),
181 : "gzip");
182 : #endif
183 147 : break;
184 : }
185 :
186 : /* If there is no specification, we're done already. */
187 498 : if (specification == NULL)
188 479 : return;
189 :
190 : /* As a special case, the specification can be a bare integer. */
191 38 : bare_level = strtol(specification, &bare_level_endp, 10);
192 38 : if (specification != bare_level_endp && *bare_level_endp == '\0')
193 : {
194 19 : result->level = bare_level;
195 19 : return;
196 : }
197 :
198 : /* Look for comma-separated keyword or keyword=value entries. */
199 : while (1)
200 2 : {
201 : char *kwstart;
202 : char *kwend;
203 : char *vstart;
204 : char *vend;
205 : int kwlen;
206 : int vlen;
207 : bool has_value;
208 : char *keyword;
209 : char *value;
210 :
211 : /* Figure start, end, and length of next keyword and any value. */
212 21 : kwstart = kwend = specification;
213 109 : while (*kwend != '\0' && *kwend != ',' && *kwend != '=')
214 88 : ++kwend;
215 21 : kwlen = kwend - kwstart;
216 21 : if (*kwend != '=')
217 : {
218 11 : vstart = vend = NULL;
219 11 : vlen = 0;
220 11 : has_value = false;
221 : }
222 : else
223 : {
224 10 : vstart = vend = kwend + 1;
225 28 : while (*vend != '\0' && *vend != ',')
226 18 : ++vend;
227 10 : vlen = vend - vstart;
228 10 : has_value = true;
229 : }
230 :
231 : /* Reject empty keyword. */
232 21 : if (kwlen == 0)
233 : {
234 4 : result->parse_error =
235 4 : pstrdup(_("found empty string where a compression option was expected"));
236 4 : break;
237 : }
238 :
239 : /* Extract keyword and value as separate C strings. */
240 17 : keyword = palloc(kwlen + 1);
241 17 : memcpy(keyword, kwstart, kwlen);
242 17 : keyword[kwlen] = '\0';
243 17 : if (!has_value)
244 7 : value = NULL;
245 : else
246 : {
247 10 : value = palloc(vlen + 1);
248 10 : memcpy(value, vstart, vlen);
249 10 : value[vlen] = '\0';
250 : }
251 :
252 : /* Handle whatever keyword we found. */
253 17 : if (strcmp(keyword, "level") == 0)
254 : {
255 10 : result->level = expect_integer_value(keyword, value, result);
256 :
257 : /*
258 : * No need to set a flag in "options", there is a default level
259 : * set at least thanks to the logic above.
260 : */
261 : }
262 7 : else if (strcmp(keyword, "workers") == 0)
263 : {
264 2 : result->workers = expect_integer_value(keyword, value, result);
265 2 : result->options |= PG_COMPRESSION_OPTION_WORKERS;
266 : }
267 5 : else if (strcmp(keyword, "long") == 0)
268 : {
269 2 : result->long_distance = expect_boolean_value(keyword, value, result);
270 2 : result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE;
271 : }
272 : else
273 3 : result->parse_error =
274 3 : psprintf(_("unrecognized compression option: \"%s\""), keyword);
275 :
276 : /* Release memory, just to be tidy. */
277 17 : pfree(keyword);
278 17 : if (value != NULL)
279 10 : pfree(value);
280 :
281 : /*
282 : * If we got an error or have reached the end of the string, stop.
283 : *
284 : * If there is no value, then the end of the keyword might have been
285 : * the end of the string. If there is a value, then the end of the
286 : * keyword cannot have been the end of the string, but the end of the
287 : * value might have been.
288 : */
289 17 : if (result->parse_error != NULL ||
290 8 : (vend == NULL ? *kwend == '\0' : *vend == '\0'))
291 : break;
292 :
293 : /* Advance to next entry and loop around. */
294 2 : specification = vend == NULL ? kwend + 1 : vend + 1;
295 : }
296 : }
297 :
298 : /*
299 : * Parse 'value' as an integer and return the result.
300 : *
301 : * If parsing fails, set result->parse_error to an appropriate message
302 : * and return -1.
303 : */
304 : static int
305 12 : expect_integer_value(char *keyword, char *value, pg_compress_specification *result)
306 : {
307 : int ivalue;
308 : char *ivalue_endp;
309 :
310 12 : if (value == NULL)
311 : {
312 2 : result->parse_error =
313 2 : psprintf(_("compression option \"%s\" requires a value"),
314 : keyword);
315 2 : return -1;
316 : }
317 :
318 10 : ivalue = strtol(value, &ivalue_endp, 10);
319 10 : if (ivalue_endp == value || *ivalue_endp != '\0')
320 : {
321 4 : result->parse_error =
322 4 : psprintf(_("value for compression option \"%s\" must be an integer"),
323 : keyword);
324 4 : return -1;
325 : }
326 6 : return ivalue;
327 : }
328 :
329 : /*
330 : * Parse 'value' as a boolean and return the result.
331 : *
332 : * If parsing fails, set result->parse_error to an appropriate message
333 : * and return -1. The caller must check result->parse_error to determine if
334 : * the call was successful.
335 : *
336 : * Valid values are: yes, no, on, off, 1, 0.
337 : *
338 : * Inspired by ParseVariableBool().
339 : */
340 : static bool
341 2 : expect_boolean_value(char *keyword, char *value, pg_compress_specification *result)
342 : {
343 2 : if (value == NULL)
344 2 : return true;
345 :
346 0 : if (pg_strcasecmp(value, "yes") == 0)
347 0 : return true;
348 0 : if (pg_strcasecmp(value, "on") == 0)
349 0 : return true;
350 0 : if (pg_strcasecmp(value, "1") == 0)
351 0 : return true;
352 :
353 0 : if (pg_strcasecmp(value, "no") == 0)
354 0 : return false;
355 0 : if (pg_strcasecmp(value, "off") == 0)
356 0 : return false;
357 0 : if (pg_strcasecmp(value, "0") == 0)
358 0 : return false;
359 :
360 0 : result->parse_error =
361 0 : psprintf(_("value for compression option \"%s\" must be a Boolean value"),
362 : keyword);
363 0 : return false;
364 : }
365 :
366 : /*
367 : * Returns NULL if the compression specification string was syntactically
368 : * valid and semantically sensible. Otherwise, returns an error message.
369 : *
370 : * Does not test whether this build of PostgreSQL supports the requested
371 : * compression method.
372 : */
373 : char *
374 498 : validate_compress_specification(pg_compress_specification *spec)
375 : {
376 498 : int min_level = 1;
377 498 : int max_level = 1;
378 498 : int default_level = 0;
379 :
380 : /* If it didn't even parse OK, it's definitely no good. */
381 498 : if (spec->parse_error != NULL)
382 13 : return spec->parse_error;
383 :
384 : /*
385 : * Check that the algorithm expects a compression level and it is within
386 : * the legal range for the algorithm.
387 : */
388 485 : switch (spec->algorithm)
389 : {
390 134 : case PG_COMPRESSION_GZIP:
391 134 : max_level = 9;
392 : #ifdef HAVE_LIBZ
393 134 : default_level = Z_DEFAULT_COMPRESSION;
394 : #endif
395 134 : break;
396 9 : case PG_COMPRESSION_LZ4:
397 9 : max_level = 12;
398 9 : default_level = 0; /* fast mode */
399 9 : break;
400 0 : case PG_COMPRESSION_ZSTD:
401 : #ifdef USE_ZSTD
402 : max_level = ZSTD_maxCLevel();
403 : min_level = ZSTD_minCLevel();
404 : default_level = ZSTD_CLEVEL_DEFAULT;
405 : #endif
406 0 : break;
407 342 : case PG_COMPRESSION_NONE:
408 342 : if (spec->level != 0)
409 3 : return psprintf(_("compression algorithm \"%s\" does not accept a compression level"),
410 : get_compress_algorithm_name(spec->algorithm));
411 339 : break;
412 : }
413 :
414 482 : if ((spec->level < min_level || spec->level > max_level) &&
415 467 : spec->level != default_level)
416 3 : return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"),
417 : get_compress_algorithm_name(spec->algorithm),
418 : min_level, max_level, default_level);
419 :
420 : /*
421 : * Of the compression algorithms that we currently support, only zstd
422 : * allows parallel workers.
423 : */
424 479 : if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 &&
425 2 : (spec->algorithm != PG_COMPRESSION_ZSTD))
426 : {
427 2 : return psprintf(_("compression algorithm \"%s\" does not accept a worker count"),
428 : get_compress_algorithm_name(spec->algorithm));
429 : }
430 :
431 : /*
432 : * Of the compression algorithms that we currently support, only zstd
433 : * supports long-distance mode.
434 : */
435 477 : if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 &&
436 2 : (spec->algorithm != PG_COMPRESSION_ZSTD))
437 : {
438 2 : return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"),
439 : get_compress_algorithm_name(spec->algorithm));
440 : }
441 :
442 475 : return NULL;
443 : }
444 :
445 : #ifdef FRONTEND
446 :
447 : /*
448 : * Basic parsing of a value specified through a command-line option, commonly
449 : * -Z/--compress.
450 : *
451 : * The parsing consists of a METHOD:DETAIL string fed later to
452 : * parse_compress_specification(). This only extracts METHOD and DETAIL.
453 : * If only an integer is found, the method is implied by the value specified.
454 : */
455 : void
456 48 : parse_compress_options(const char *option, char **algorithm, char **detail)
457 : {
458 : const char *sep;
459 : char *endp;
460 : long result;
461 :
462 : /*
463 : * Check whether the compression specification consists of a bare integer.
464 : *
465 : * For backward-compatibility, assume "none" if the integer found is zero
466 : * and "gzip" otherwise.
467 : */
468 48 : result = strtol(option, &endp, 10);
469 48 : if (*endp == '\0')
470 : {
471 6 : if (result == 0)
472 : {
473 0 : *algorithm = pstrdup("none");
474 0 : *detail = NULL;
475 : }
476 : else
477 : {
478 6 : *algorithm = pstrdup("gzip");
479 6 : *detail = pstrdup(option);
480 : }
481 6 : return;
482 : }
483 :
484 : /*
485 : * Check whether there is a compression detail following the algorithm
486 : * name.
487 : */
488 42 : sep = strchr(option, ':');
489 42 : if (sep == NULL)
490 : {
491 10 : *algorithm = pstrdup(option);
492 10 : *detail = NULL;
493 : }
494 : else
495 : {
496 : char *alg;
497 :
498 32 : alg = palloc((sep - option) + 1);
499 32 : memcpy(alg, option, sep - option);
500 32 : alg[sep - option] = '\0';
501 :
502 32 : *algorithm = alg;
503 32 : *detail = pstrdup(sep + 1);
504 : }
505 : }
506 : #endif /* FRONTEND */
|