Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * compression.c
4 : *
5 : * Shared code for compression methods and specifications.
6 : *
7 : * A compression specification specifies the parameters that should be used
8 : * when performing compression with a specific algorithm. The simplest
9 : * possible compression specification is an integer, which sets the
10 : * compression level.
11 : *
12 : * Otherwise, a compression specification is a comma-separated list of items,
13 : * each having the form keyword or keyword=value.
14 : *
15 : * Currently, the supported keywords are "level", "long", and "workers".
16 : *
17 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
18 : *
19 : * IDENTIFICATION
20 : * src/common/compression.c
21 : *-------------------------------------------------------------------------
22 : */
23 :
24 : #ifndef FRONTEND
25 : #include "postgres.h"
26 : #else
27 : #include "postgres_fe.h"
28 : #endif
29 :
30 : #ifdef USE_ZSTD
31 : #include <zstd.h>
32 : #endif
33 : #ifdef HAVE_LIBZ
34 : #include <zlib.h>
35 : #endif
36 :
37 : #include "common/compression.h"
38 :
39 : static int expect_integer_value(char *keyword, char *value,
40 : pg_compress_specification *result);
41 : static bool expect_boolean_value(char *keyword, char *value,
42 : pg_compress_specification *result);
43 :
44 : /*
45 : * Look up a compression algorithm by name. Returns true and sets *algorithm
46 : * if the name is recognized. Otherwise returns false.
47 : */
48 : bool
49 768 : parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm)
50 : {
51 768 : if (strcmp(name, "none") == 0)
52 634 : *algorithm = PG_COMPRESSION_NONE;
53 134 : else if (strcmp(name, "gzip") == 0)
54 112 : *algorithm = PG_COMPRESSION_GZIP;
55 22 : else if (strcmp(name, "lz4") == 0)
56 14 : *algorithm = PG_COMPRESSION_LZ4;
57 8 : else if (strcmp(name, "zstd") == 0)
58 0 : *algorithm = PG_COMPRESSION_ZSTD;
59 : else
60 8 : return false;
61 760 : return true;
62 : }
63 :
64 : /*
65 : * Get the human-readable name corresponding to a particular compression
66 : * algorithm.
67 : */
68 : const char *
69 28 : get_compress_algorithm_name(pg_compress_algorithm algorithm)
70 : {
71 28 : switch (algorithm)
72 : {
73 6 : case PG_COMPRESSION_NONE:
74 6 : return "none";
75 20 : case PG_COMPRESSION_GZIP:
76 20 : return "gzip";
77 2 : case PG_COMPRESSION_LZ4:
78 2 : return "lz4";
79 0 : case PG_COMPRESSION_ZSTD:
80 0 : return "zstd";
81 : /* no default, to provoke compiler warnings if values are added */
82 : }
83 : Assert(false);
84 0 : return "???"; /* placate compiler */
85 : }
86 :
87 : /*
88 : * Parse a compression specification for a specified algorithm.
89 : *
90 : * See the file header comments for a brief description of what a compression
91 : * specification is expected to look like.
92 : *
93 : * On return, all fields of the result object will be initialized.
94 : * In particular, result->parse_error will be NULL if no errors occurred
95 : * during parsing, and will otherwise contain an appropriate error message.
96 : * The caller may free this error message string using pfree, if desired.
97 : * Note, however, even if there's no parse error, the string might not make
98 : * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK.
99 : *
100 : * The compression level is assigned by default if not directly specified
101 : * by the specification.
102 : *
103 : * Use validate_compress_specification() to find out whether a compression
104 : * specification is semantically sensible.
105 : */
106 : void
107 756 : parse_compress_specification(pg_compress_algorithm algorithm, char *specification,
108 : pg_compress_specification *result)
109 : {
110 : int bare_level;
111 : char *bare_level_endp;
112 :
113 : /* Initial setup of result object. */
114 756 : result->algorithm = algorithm;
115 756 : result->options = 0;
116 756 : result->parse_error = NULL;
117 :
118 : /*
119 : * Assign a default level depending on the compression method. This may
120 : * be enforced later.
121 : */
122 756 : switch (result->algorithm)
123 : {
124 630 : case PG_COMPRESSION_NONE:
125 630 : result->level = 0;
126 630 : break;
127 14 : case PG_COMPRESSION_LZ4:
128 : #ifdef USE_LZ4
129 14 : result->level = 0; /* fast compression mode */
130 : #else
131 : result->parse_error =
132 : psprintf(_("this build does not support compression with %s"),
133 : "LZ4");
134 : #endif
135 14 : break;
136 0 : case PG_COMPRESSION_ZSTD:
137 : #ifdef USE_ZSTD
138 : result->level = ZSTD_CLEVEL_DEFAULT;
139 : #else
140 0 : result->parse_error =
141 0 : psprintf(_("this build does not support compression with %s"),
142 : "ZSTD");
143 : #endif
144 0 : break;
145 112 : case PG_COMPRESSION_GZIP:
146 : #ifdef HAVE_LIBZ
147 112 : result->level = Z_DEFAULT_COMPRESSION;
148 : #else
149 : result->parse_error =
150 : psprintf(_("this build does not support compression with %s"),
151 : "gzip");
152 : #endif
153 112 : break;
154 : }
155 :
156 : /* If there is no specification, we're done already. */
157 756 : if (specification == NULL)
158 718 : return;
159 :
160 : /* As a special case, the specification can be a bare integer. */
161 72 : bare_level = strtol(specification, &bare_level_endp, 10);
162 72 : if (specification != bare_level_endp && *bare_level_endp == '\0')
163 : {
164 34 : result->level = bare_level;
165 34 : return;
166 : }
167 :
168 : /* Look for comma-separated keyword or keyword=value entries. */
169 : while (1)
170 4 : {
171 : char *kwstart;
172 : char *kwend;
173 : char *vstart;
174 : char *vend;
175 : int kwlen;
176 : int vlen;
177 : bool has_value;
178 : char *keyword;
179 : char *value;
180 :
181 : /* Figure start, end, and length of next keyword and any value. */
182 42 : kwstart = kwend = specification;
183 218 : while (*kwend != '\0' && *kwend != ',' && *kwend != '=')
184 176 : ++kwend;
185 42 : kwlen = kwend - kwstart;
186 42 : if (*kwend != '=')
187 : {
188 22 : vstart = vend = NULL;
189 22 : vlen = 0;
190 22 : has_value = false;
191 : }
192 : else
193 : {
194 20 : vstart = vend = kwend + 1;
195 56 : while (*vend != '\0' && *vend != ',')
196 36 : ++vend;
197 20 : vlen = vend - vstart;
198 20 : has_value = true;
199 : }
200 :
201 : /* Reject empty keyword. */
202 42 : if (kwlen == 0)
203 : {
204 8 : result->parse_error =
205 8 : pstrdup(_("found empty string where a compression option was expected"));
206 8 : break;
207 : }
208 :
209 : /* Extract keyword and value as separate C strings. */
210 34 : keyword = palloc(kwlen + 1);
211 34 : memcpy(keyword, kwstart, kwlen);
212 34 : keyword[kwlen] = '\0';
213 34 : if (!has_value)
214 14 : value = NULL;
215 : else
216 : {
217 20 : value = palloc(vlen + 1);
218 20 : memcpy(value, vstart, vlen);
219 20 : value[vlen] = '\0';
220 : }
221 :
222 : /* Handle whatever keyword we found. */
223 34 : if (strcmp(keyword, "level") == 0)
224 : {
225 20 : result->level = expect_integer_value(keyword, value, result);
226 :
227 : /*
228 : * No need to set a flag in "options", there is a default level
229 : * set at least thanks to the logic above.
230 : */
231 : }
232 14 : else if (strcmp(keyword, "workers") == 0)
233 : {
234 4 : result->workers = expect_integer_value(keyword, value, result);
235 4 : result->options |= PG_COMPRESSION_OPTION_WORKERS;
236 : }
237 10 : else if (strcmp(keyword, "long") == 0)
238 : {
239 4 : result->long_distance = expect_boolean_value(keyword, value, result);
240 4 : result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE;
241 : }
242 : else
243 6 : result->parse_error =
244 6 : psprintf(_("unrecognized compression option: \"%s\""), keyword);
245 :
246 : /* Release memory, just to be tidy. */
247 34 : pfree(keyword);
248 34 : if (value != NULL)
249 20 : pfree(value);
250 :
251 : /*
252 : * If we got an error or have reached the end of the string, stop.
253 : *
254 : * If there is no value, then the end of the keyword might have been
255 : * the end of the string. If there is a value, then the end of the
256 : * keyword cannot have been the end of the string, but the end of the
257 : * value might have been.
258 : */
259 34 : if (result->parse_error != NULL ||
260 16 : (vend == NULL ? *kwend == '\0' : *vend == '\0'))
261 : break;
262 :
263 : /* Advance to next entry and loop around. */
264 4 : specification = vend == NULL ? kwend + 1 : vend + 1;
265 : }
266 : }
267 :
268 : /*
269 : * Parse 'value' as an integer and return the result.
270 : *
271 : * If parsing fails, set result->parse_error to an appropriate message
272 : * and return -1.
273 : */
274 : static int
275 24 : expect_integer_value(char *keyword, char *value, pg_compress_specification *result)
276 : {
277 : int ivalue;
278 : char *ivalue_endp;
279 :
280 24 : if (value == NULL)
281 : {
282 4 : result->parse_error =
283 4 : psprintf(_("compression option \"%s\" requires a value"),
284 : keyword);
285 4 : return -1;
286 : }
287 :
288 20 : ivalue = strtol(value, &ivalue_endp, 10);
289 20 : if (ivalue_endp == value || *ivalue_endp != '\0')
290 : {
291 8 : result->parse_error =
292 8 : psprintf(_("value for compression option \"%s\" must be an integer"),
293 : keyword);
294 8 : return -1;
295 : }
296 12 : return ivalue;
297 : }
298 :
299 : /*
300 : * Parse 'value' as a boolean and return the result.
301 : *
302 : * If parsing fails, set result->parse_error to an appropriate message
303 : * and return -1. The caller must check result->parse_error to determine if
304 : * the call was successful.
305 : *
306 : * Valid values are: yes, no, on, off, 1, 0.
307 : *
308 : * Inspired by ParseVariableBool().
309 : */
310 : static bool
311 4 : expect_boolean_value(char *keyword, char *value, pg_compress_specification *result)
312 : {
313 4 : if (value == NULL)
314 4 : return true;
315 :
316 0 : if (pg_strcasecmp(value, "yes") == 0)
317 0 : return true;
318 0 : if (pg_strcasecmp(value, "on") == 0)
319 0 : return true;
320 0 : if (pg_strcasecmp(value, "1") == 0)
321 0 : return true;
322 :
323 0 : if (pg_strcasecmp(value, "no") == 0)
324 0 : return false;
325 0 : if (pg_strcasecmp(value, "off") == 0)
326 0 : return false;
327 0 : if (pg_strcasecmp(value, "0") == 0)
328 0 : return false;
329 :
330 0 : result->parse_error =
331 0 : psprintf(_("value for compression option \"%s\" must be a Boolean value"),
332 : keyword);
333 0 : return false;
334 : }
335 :
336 : /*
337 : * Returns NULL if the compression specification string was syntactically
338 : * valid and semantically sensible. Otherwise, returns an error message.
339 : *
340 : * Does not test whether this build of PostgreSQL supports the requested
341 : * compression method.
342 : */
343 : char *
344 756 : validate_compress_specification(pg_compress_specification *spec)
345 : {
346 756 : int min_level = 1;
347 756 : int max_level = 1;
348 756 : int default_level = 0;
349 :
350 : /* If it didn't even parse OK, it's definitely no good. */
351 756 : if (spec->parse_error != NULL)
352 26 : return spec->parse_error;
353 :
354 : /*
355 : * Check that the algorithm expects a compression level and it is within
356 : * the legal range for the algorithm.
357 : */
358 730 : switch (spec->algorithm)
359 : {
360 86 : case PG_COMPRESSION_GZIP:
361 86 : max_level = 9;
362 : #ifdef HAVE_LIBZ
363 86 : default_level = Z_DEFAULT_COMPRESSION;
364 : #endif
365 86 : break;
366 14 : case PG_COMPRESSION_LZ4:
367 14 : max_level = 12;
368 14 : default_level = 0; /* fast mode */
369 14 : break;
370 0 : case PG_COMPRESSION_ZSTD:
371 : #ifdef USE_ZSTD
372 : max_level = ZSTD_maxCLevel();
373 : min_level = ZSTD_minCLevel();
374 : default_level = ZSTD_CLEVEL_DEFAULT;
375 : #endif
376 0 : break;
377 630 : case PG_COMPRESSION_NONE:
378 630 : if (spec->level != 0)
379 6 : return psprintf(_("compression algorithm \"%s\" does not accept a compression level"),
380 : get_compress_algorithm_name(spec->algorithm));
381 624 : break;
382 : }
383 :
384 724 : if ((spec->level < min_level || spec->level > max_level) &&
385 698 : spec->level != default_level)
386 6 : return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"),
387 : get_compress_algorithm_name(spec->algorithm),
388 : min_level, max_level, default_level);
389 :
390 : /*
391 : * Of the compression algorithms that we currently support, only zstd
392 : * allows parallel workers.
393 : */
394 718 : if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 &&
395 4 : (spec->algorithm != PG_COMPRESSION_ZSTD))
396 : {
397 4 : return psprintf(_("compression algorithm \"%s\" does not accept a worker count"),
398 : get_compress_algorithm_name(spec->algorithm));
399 : }
400 :
401 : /*
402 : * Of the compression algorithms that we currently support, only zstd
403 : * supports long-distance mode.
404 : */
405 714 : if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 &&
406 4 : (spec->algorithm != PG_COMPRESSION_ZSTD))
407 : {
408 4 : return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"),
409 : get_compress_algorithm_name(spec->algorithm));
410 : }
411 :
412 710 : return NULL;
413 : }
414 :
415 : #ifdef FRONTEND
416 :
417 : /*
418 : * Basic parsing of a value specified through a command-line option, commonly
419 : * -Z/--compress.
420 : *
421 : * The parsing consists of a METHOD:DETAIL string fed later to
422 : * parse_compress_specification(). This only extracts METHOD and DETAIL.
423 : * If only an integer is found, the method is implied by the value specified.
424 : */
425 : void
426 90 : parse_compress_options(const char *option, char **algorithm, char **detail)
427 : {
428 : char *sep;
429 : char *endp;
430 : long result;
431 :
432 : /*
433 : * Check whether the compression specification consists of a bare integer.
434 : *
435 : * For backward-compatibility, assume "none" if the integer found is zero
436 : * and "gzip" otherwise.
437 : */
438 90 : result = strtol(option, &endp, 10);
439 90 : if (*endp == '\0')
440 : {
441 12 : if (result == 0)
442 : {
443 0 : *algorithm = pstrdup("none");
444 0 : *detail = NULL;
445 : }
446 : else
447 : {
448 12 : *algorithm = pstrdup("gzip");
449 12 : *detail = pstrdup(option);
450 : }
451 12 : return;
452 : }
453 :
454 : /*
455 : * Check whether there is a compression detail following the algorithm
456 : * name.
457 : */
458 78 : sep = strchr(option, ':');
459 78 : if (sep == NULL)
460 : {
461 18 : *algorithm = pstrdup(option);
462 18 : *detail = NULL;
463 : }
464 : else
465 : {
466 : char *alg;
467 :
468 60 : alg = palloc((sep - option) + 1);
469 60 : memcpy(alg, option, sep - option);
470 60 : alg[sep - option] = '\0';
471 :
472 60 : *algorithm = alg;
473 60 : *detail = pstrdup(sep + 1);
474 : }
475 : }
476 : #endif /* FRONTEND */
|