Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * detoast.c
4 : * Retrieve compressed or external variable size attributes.
5 : *
6 : * Copyright (c) 2000-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/access/common/detoast.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres.h"
15 :
16 : #include "access/detoast.h"
17 : #include "access/table.h"
18 : #include "access/tableam.h"
19 : #include "access/toast_internals.h"
20 : #include "common/int.h"
21 : #include "common/pg_lzcompress.h"
22 : #include "utils/expandeddatum.h"
23 : #include "utils/rel.h"
24 :
25 : static struct varlena *toast_fetch_datum(struct varlena *attr);
26 : static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
27 : int32 sliceoffset,
28 : int32 slicelength);
29 : static struct varlena *toast_decompress_datum(struct varlena *attr);
30 : static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
31 :
32 : /* ----------
33 : * detoast_external_attr -
34 : *
35 : * Public entry point to get back a toasted value from
36 : * external source (possibly still in compressed format).
37 : *
38 : * This will return a datum that contains all the data internally, ie, not
39 : * relying on external storage or memory, but it can still be compressed or
40 : * have a short header. Note some callers assume that if the input is an
41 : * EXTERNAL datum, the result will be a pfree'able chunk.
42 : * ----------
43 : */
44 : struct varlena *
45 13652 : detoast_external_attr(struct varlena *attr)
46 : {
47 : struct varlena *result;
48 :
49 13652 : if (VARATT_IS_EXTERNAL_ONDISK(attr))
50 : {
51 : /*
52 : * This is an external stored plain value
53 : */
54 4968 : result = toast_fetch_datum(attr);
55 : }
56 8684 : else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
57 492 : {
58 : /*
59 : * This is an indirect pointer --- dereference it
60 : */
61 : struct varatt_indirect redirect;
62 :
63 492 : VARATT_EXTERNAL_GET_POINTER(redirect, attr);
64 492 : attr = (struct varlena *) redirect.pointer;
65 :
66 : /* nested indirect Datums aren't allowed */
67 : Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
68 :
69 : /* recurse if value is still external in some other way */
70 492 : if (VARATT_IS_EXTERNAL(attr))
71 0 : return detoast_external_attr(attr);
72 :
73 : /*
74 : * Copy into the caller's memory context, in case caller tries to
75 : * pfree the result.
76 : */
77 492 : result = (struct varlena *) palloc(VARSIZE_ANY(attr));
78 492 : memcpy(result, attr, VARSIZE_ANY(attr));
79 : }
80 8192 : else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
81 8192 : {
82 : /*
83 : * This is an expanded-object pointer --- get flat format
84 : */
85 : ExpandedObjectHeader *eoh;
86 : Size resultsize;
87 :
88 8192 : eoh = DatumGetEOHP(PointerGetDatum(attr));
89 8192 : resultsize = EOH_get_flat_size(eoh);
90 8192 : result = (struct varlena *) palloc(resultsize);
91 8192 : EOH_flatten_into(eoh, result, resultsize);
92 : }
93 : else
94 : {
95 : /*
96 : * This is a plain value inside of the main tuple - why am I called?
97 : */
98 0 : result = attr;
99 : }
100 :
101 13652 : return result;
102 : }
103 :
104 :
105 : /* ----------
106 : * detoast_attr -
107 : *
108 : * Public entry point to get back a toasted value from compression
109 : * or external storage. The result is always non-extended varlena form.
110 : *
111 : * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
112 : * datum, the result will be a pfree'able chunk.
113 : * ----------
114 : */
115 : struct varlena *
116 26637424 : detoast_attr(struct varlena *attr)
117 : {
118 26637424 : if (VARATT_IS_EXTERNAL_ONDISK(attr))
119 : {
120 : /*
121 : * This is an externally stored datum --- fetch it back from there
122 : */
123 14906 : attr = toast_fetch_datum(attr);
124 : /* If it's compressed, decompress it */
125 14906 : if (VARATT_IS_COMPRESSED(attr))
126 : {
127 14770 : struct varlena *tmp = attr;
128 :
129 14770 : attr = toast_decompress_datum(tmp);
130 14770 : pfree(tmp);
131 : }
132 : }
133 26622518 : else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
134 108 : {
135 : /*
136 : * This is an indirect pointer --- dereference it
137 : */
138 : struct varatt_indirect redirect;
139 :
140 108 : VARATT_EXTERNAL_GET_POINTER(redirect, attr);
141 108 : attr = (struct varlena *) redirect.pointer;
142 :
143 : /* nested indirect Datums aren't allowed */
144 : Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
145 :
146 : /* recurse in case value is still extended in some other way */
147 108 : attr = detoast_attr(attr);
148 :
149 : /* if it isn't, we'd better copy it */
150 108 : if (attr == (struct varlena *) redirect.pointer)
151 : {
152 : struct varlena *result;
153 :
154 38 : result = (struct varlena *) palloc(VARSIZE_ANY(attr));
155 38 : memcpy(result, attr, VARSIZE_ANY(attr));
156 38 : attr = result;
157 : }
158 : }
159 26622410 : else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
160 : {
161 : /*
162 : * This is an expanded-object pointer --- get flat format
163 : */
164 8192 : attr = detoast_external_attr(attr);
165 : /* flatteners are not allowed to produce compressed/short output */
166 8192 : Assert(!VARATT_IS_EXTENDED(attr));
167 : }
168 26614218 : else if (VARATT_IS_COMPRESSED(attr))
169 : {
170 : /*
171 : * This is a compressed value inside of the main tuple
172 : */
173 118242 : attr = toast_decompress_datum(attr);
174 : }
175 26495976 : else if (VARATT_IS_SHORT(attr))
176 : {
177 : /*
178 : * This is a short-header varlena --- convert to 4-byte header format
179 : */
180 26495938 : Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
181 26495938 : Size new_size = data_size + VARHDRSZ;
182 : struct varlena *new_attr;
183 :
184 26495938 : new_attr = (struct varlena *) palloc(new_size);
185 26495938 : SET_VARSIZE(new_attr, new_size);
186 26495938 : memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
187 26495938 : attr = new_attr;
188 : }
189 :
190 26637424 : return attr;
191 : }
192 :
193 :
194 : /* ----------
195 : * detoast_attr_slice -
196 : *
197 : * Public entry point to get back part of a toasted value
198 : * from compression or external storage.
199 : *
200 : * sliceoffset is where to start (zero or more)
201 : * If slicelength < 0, return everything beyond sliceoffset
202 : * ----------
203 : */
204 : struct varlena *
205 4356 : detoast_attr_slice(struct varlena *attr,
206 : int32 sliceoffset, int32 slicelength)
207 : {
208 : struct varlena *preslice;
209 : struct varlena *result;
210 : char *attrdata;
211 : int32 slicelimit;
212 : int32 attrsize;
213 :
214 4356 : if (sliceoffset < 0)
215 0 : elog(ERROR, "invalid sliceoffset: %d", sliceoffset);
216 :
217 : /*
218 : * Compute slicelimit = offset + length, or -1 if we must fetch all of the
219 : * value. In case of integer overflow, we must fetch all.
220 : */
221 4356 : if (slicelength < 0)
222 3948 : slicelimit = -1;
223 408 : else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit))
224 0 : slicelength = slicelimit = -1;
225 :
226 4356 : if (VARATT_IS_EXTERNAL_ONDISK(attr))
227 42 : {
228 : struct varatt_external toast_pointer;
229 :
230 306 : VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
231 :
232 : /* fast path for non-compressed external datums */
233 306 : if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
234 264 : return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
235 :
236 : /*
237 : * For compressed values, we need to fetch enough slices to decompress
238 : * at least the requested part (when a prefix is requested).
239 : * Otherwise, just fetch all slices.
240 : */
241 42 : if (slicelimit >= 0)
242 : {
243 42 : int32 max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
244 :
245 : /*
246 : * Determine maximum amount of compressed data needed for a prefix
247 : * of a given length (after decompression).
248 : *
249 : * At least for now, if it's LZ4 data, we'll have to fetch the
250 : * whole thing, because there doesn't seem to be an API call to
251 : * determine how much compressed data we need to be sure of being
252 : * able to decompress the required slice.
253 : */
254 42 : if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
255 : TOAST_PGLZ_COMPRESSION_ID)
256 36 : max_size = pglz_maximum_compressed_size(slicelimit, max_size);
257 :
258 : /*
259 : * Fetch enough compressed slices (compressed marker will get set
260 : * automatically).
261 : */
262 42 : preslice = toast_fetch_datum_slice(attr, 0, max_size);
263 : }
264 : else
265 0 : preslice = toast_fetch_datum(attr);
266 : }
267 4050 : else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
268 : {
269 : struct varatt_indirect redirect;
270 :
271 0 : VARATT_EXTERNAL_GET_POINTER(redirect, attr);
272 :
273 : /* nested indirect Datums aren't allowed */
274 : Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
275 :
276 0 : return detoast_attr_slice(redirect.pointer,
277 : sliceoffset, slicelength);
278 : }
279 4050 : else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
280 : {
281 : /* pass it off to detoast_external_attr to flatten */
282 0 : preslice = detoast_external_attr(attr);
283 : }
284 : else
285 4050 : preslice = attr;
286 :
287 : Assert(!VARATT_IS_EXTERNAL(preslice));
288 :
289 4092 : if (VARATT_IS_COMPRESSED(preslice))
290 : {
291 132 : struct varlena *tmp = preslice;
292 :
293 : /* Decompress enough to encompass the slice and the offset */
294 132 : if (slicelimit >= 0)
295 108 : preslice = toast_decompress_datum_slice(tmp, slicelimit);
296 : else
297 24 : preslice = toast_decompress_datum(tmp);
298 :
299 132 : if (tmp != attr)
300 42 : pfree(tmp);
301 : }
302 :
303 4092 : if (VARATT_IS_SHORT(preslice))
304 : {
305 3158 : attrdata = VARDATA_SHORT(preslice);
306 3158 : attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
307 : }
308 : else
309 : {
310 934 : attrdata = VARDATA(preslice);
311 934 : attrsize = VARSIZE(preslice) - VARHDRSZ;
312 : }
313 :
314 : /* slicing of datum for compressed cases and plain value */
315 :
316 4092 : if (sliceoffset >= attrsize)
317 : {
318 18 : sliceoffset = 0;
319 18 : slicelength = 0;
320 : }
321 4074 : else if (slicelength < 0 || slicelimit > attrsize)
322 3936 : slicelength = attrsize - sliceoffset;
323 :
324 4092 : result = (struct varlena *) palloc(slicelength + VARHDRSZ);
325 4092 : SET_VARSIZE(result, slicelength + VARHDRSZ);
326 :
327 4092 : memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
328 :
329 4092 : if (preslice != attr)
330 132 : pfree(preslice);
331 :
332 4092 : return result;
333 : }
334 :
335 : /* ----------
336 : * toast_fetch_datum -
337 : *
338 : * Reconstruct an in memory Datum from the chunks saved
339 : * in the toast relation
340 : * ----------
341 : */
342 : static struct varlena *
343 19874 : toast_fetch_datum(struct varlena *attr)
344 : {
345 : Relation toastrel;
346 : struct varlena *result;
347 : struct varatt_external toast_pointer;
348 : int32 attrsize;
349 :
350 19874 : if (!VARATT_IS_EXTERNAL_ONDISK(attr))
351 0 : elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
352 :
353 : /* Must copy to access aligned fields */
354 19874 : VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
355 :
356 19874 : attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
357 :
358 19874 : result = (struct varlena *) palloc(attrsize + VARHDRSZ);
359 :
360 19874 : if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
361 18794 : SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
362 : else
363 1080 : SET_VARSIZE(result, attrsize + VARHDRSZ);
364 :
365 19874 : if (attrsize == 0)
366 0 : return result; /* Probably shouldn't happen, but just in
367 : * case. */
368 :
369 : /*
370 : * Open the toast relation and its indexes
371 : */
372 19874 : toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
373 :
374 : /* Fetch all chunks */
375 19874 : table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
376 : attrsize, 0, attrsize, result);
377 :
378 : /* Close toast table */
379 19874 : table_close(toastrel, AccessShareLock);
380 :
381 19874 : return result;
382 : }
383 :
384 : /* ----------
385 : * toast_fetch_datum_slice -
386 : *
387 : * Reconstruct a segment of a Datum from the chunks saved
388 : * in the toast relation
389 : *
390 : * Note that this function supports non-compressed external datums
391 : * and compressed external datums (in which case the requested slice
392 : * has to be a prefix, i.e. sliceoffset has to be 0).
393 : * ----------
394 : */
395 : static struct varlena *
396 306 : toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
397 : int32 slicelength)
398 : {
399 : Relation toastrel;
400 : struct varlena *result;
401 : struct varatt_external toast_pointer;
402 : int32 attrsize;
403 :
404 306 : if (!VARATT_IS_EXTERNAL_ONDISK(attr))
405 0 : elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
406 :
407 : /* Must copy to access aligned fields */
408 306 : VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
409 :
410 : /*
411 : * It's nonsense to fetch slices of a compressed datum unless when it's a
412 : * prefix -- this isn't lo_* we can't return a compressed datum which is
413 : * meaningful to toast later.
414 : */
415 : Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
416 :
417 306 : attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
418 :
419 306 : if (sliceoffset >= attrsize)
420 : {
421 0 : sliceoffset = 0;
422 0 : slicelength = 0;
423 : }
424 :
425 : /*
426 : * When fetching a prefix of a compressed external datum, account for the
427 : * space required by va_tcinfo, which is stored at the beginning as an
428 : * int32 value.
429 : */
430 306 : if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0)
431 42 : slicelength = slicelength + sizeof(int32);
432 :
433 : /*
434 : * Adjust length request if needed. (Note: our sole caller,
435 : * detoast_attr_slice, protects us against sliceoffset + slicelength
436 : * overflowing.)
437 : */
438 306 : if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
439 54 : slicelength = attrsize - sliceoffset;
440 :
441 306 : result = (struct varlena *) palloc(slicelength + VARHDRSZ);
442 :
443 306 : if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
444 42 : SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
445 : else
446 264 : SET_VARSIZE(result, slicelength + VARHDRSZ);
447 :
448 306 : if (slicelength == 0)
449 0 : return result; /* Can save a lot of work at this point! */
450 :
451 : /* Open the toast relation */
452 306 : toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
453 :
454 : /* Fetch all chunks */
455 306 : table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
456 : attrsize, sliceoffset, slicelength,
457 : result);
458 :
459 : /* Close toast table */
460 306 : table_close(toastrel, AccessShareLock);
461 :
462 306 : return result;
463 : }
464 :
465 : /* ----------
466 : * toast_decompress_datum -
467 : *
468 : * Decompress a compressed version of a varlena datum
469 : */
470 : static struct varlena *
471 133060 : toast_decompress_datum(struct varlena *attr)
472 : {
473 : ToastCompressionId cmid;
474 :
475 : Assert(VARATT_IS_COMPRESSED(attr));
476 :
477 : /*
478 : * Fetch the compression method id stored in the compression header and
479 : * decompress the data using the appropriate decompression routine.
480 : */
481 133060 : cmid = TOAST_COMPRESS_METHOD(attr);
482 133060 : switch (cmid)
483 : {
484 132960 : case TOAST_PGLZ_COMPRESSION_ID:
485 132960 : return pglz_decompress_datum(attr);
486 100 : case TOAST_LZ4_COMPRESSION_ID:
487 100 : return lz4_decompress_datum(attr);
488 0 : default:
489 0 : elog(ERROR, "invalid compression method id %d", cmid);
490 : return NULL; /* keep compiler quiet */
491 : }
492 : }
493 :
494 :
495 : /* ----------
496 : * toast_decompress_datum_slice -
497 : *
498 : * Decompress the front of a compressed version of a varlena datum.
499 : * offset handling happens in detoast_attr_slice.
500 : * Here we just decompress a slice from the front.
501 : */
502 : static struct varlena *
503 108 : toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
504 : {
505 : ToastCompressionId cmid;
506 :
507 : Assert(VARATT_IS_COMPRESSED(attr));
508 :
509 : /*
510 : * Some callers may pass a slicelength that's more than the actual
511 : * decompressed size. If so, just decompress normally. This avoids
512 : * possibly allocating a larger-than-necessary result object, and may be
513 : * faster and/or more robust as well. Notably, some versions of liblz4
514 : * have been seen to give wrong results if passed an output size that is
515 : * more than the data's true decompressed size.
516 : */
517 108 : if ((uint32) slicelength >= TOAST_COMPRESS_EXTSIZE(attr))
518 24 : return toast_decompress_datum(attr);
519 :
520 : /*
521 : * Fetch the compression method id stored in the compression header and
522 : * decompress the data slice using the appropriate decompression routine.
523 : */
524 84 : cmid = TOAST_COMPRESS_METHOD(attr);
525 84 : switch (cmid)
526 : {
527 66 : case TOAST_PGLZ_COMPRESSION_ID:
528 66 : return pglz_decompress_datum_slice(attr, slicelength);
529 18 : case TOAST_LZ4_COMPRESSION_ID:
530 18 : return lz4_decompress_datum_slice(attr, slicelength);
531 0 : default:
532 0 : elog(ERROR, "invalid compression method id %d", cmid);
533 : return NULL; /* keep compiler quiet */
534 : }
535 : }
536 :
537 : /* ----------
538 : * toast_raw_datum_size -
539 : *
540 : * Return the raw (detoasted) size of a varlena datum
541 : * (including the VARHDRSZ header)
542 : * ----------
543 : */
544 : Size
545 21758722 : toast_raw_datum_size(Datum value)
546 : {
547 21758722 : struct varlena *attr = (struct varlena *) DatumGetPointer(value);
548 : Size result;
549 :
550 21758722 : if (VARATT_IS_EXTERNAL_ONDISK(attr))
551 13362 : {
552 : /* va_rawsize is the size of the original datum -- including header */
553 : struct varatt_external toast_pointer;
554 :
555 13362 : VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
556 13362 : result = toast_pointer.va_rawsize;
557 : }
558 21745360 : else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
559 : {
560 : struct varatt_indirect toast_pointer;
561 :
562 0 : VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
563 :
564 : /* nested indirect Datums aren't allowed */
565 : Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
566 :
567 0 : return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
568 : }
569 21745360 : else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
570 : {
571 0 : result = EOH_get_flat_size(DatumGetEOHP(value));
572 : }
573 21745360 : else if (VARATT_IS_COMPRESSED(attr))
574 : {
575 : /* here, va_rawsize is just the payload size */
576 15986 : result = VARDATA_COMPRESSED_GET_EXTSIZE(attr) + VARHDRSZ;
577 : }
578 21729374 : else if (VARATT_IS_SHORT(attr))
579 : {
580 : /*
581 : * we have to normalize the header length to VARHDRSZ or else the
582 : * callers of this function will be confused.
583 : */
584 13805396 : result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
585 : }
586 : else
587 : {
588 : /* plain untoasted datum */
589 7923978 : result = VARSIZE(attr);
590 : }
591 21758722 : return result;
592 : }
593 :
594 : /* ----------
595 : * toast_datum_size
596 : *
597 : * Return the physical storage size (possibly compressed) of a varlena datum
598 : * ----------
599 : */
600 : Size
601 122 : toast_datum_size(Datum value)
602 : {
603 122 : struct varlena *attr = (struct varlena *) DatumGetPointer(value);
604 : Size result;
605 :
606 122 : if (VARATT_IS_EXTERNAL_ONDISK(attr))
607 2 : {
608 : /*
609 : * Attribute is stored externally - return the extsize whether
610 : * compressed or not. We do not count the size of the toast pointer
611 : * ... should we?
612 : */
613 : struct varatt_external toast_pointer;
614 :
615 2 : VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
616 2 : result = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
617 : }
618 120 : else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
619 : {
620 : struct varatt_indirect toast_pointer;
621 :
622 0 : VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
623 :
624 : /* nested indirect Datums aren't allowed */
625 : Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
626 :
627 0 : return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
628 : }
629 120 : else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
630 : {
631 0 : result = EOH_get_flat_size(DatumGetEOHP(value));
632 : }
633 120 : else if (VARATT_IS_SHORT(attr))
634 : {
635 0 : result = VARSIZE_SHORT(attr);
636 : }
637 : else
638 : {
639 : /*
640 : * Attribute is stored inline either compressed or not, just calculate
641 : * the size of the datum in either case.
642 : */
643 120 : result = VARSIZE(attr);
644 : }
645 122 : return result;
646 : }
|