Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * datum.c
4 : * POSTGRES Datum (abstract data type) manipulation routines.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/datum.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : /*
17 : * In the implementation of these routines we assume the following:
18 : *
19 : * A) if a type is "byVal" then all the information is stored in the
20 : * Datum itself (i.e. no pointers involved!). In this case the
21 : * length of the type is always greater than zero and not more than
22 : * "sizeof(Datum)"
23 : *
24 : * B) if a type is not "byVal" and it has a fixed length (typlen > 0),
25 : * then the "Datum" always contains a pointer to a stream of bytes.
26 : * The number of significant bytes are always equal to the typlen.
27 : *
28 : * C) if a type is not "byVal" and has typlen == -1,
29 : * then the "Datum" always points to a "varlena".
30 : * This varlena structure has information about the actual length of this
31 : * particular instance of the type and about its value.
32 : *
33 : * D) if a type is not "byVal" and has typlen == -2,
34 : * then the "Datum" always points to a null-terminated C string.
35 : *
36 : * Note that we do not treat "toasted" datums specially; therefore what
37 : * will be copied or compared is the compressed data or toast reference.
38 : * An exception is made for datumCopy() of an expanded object, however,
39 : * because most callers expect to get a simple contiguous (and pfree'able)
40 : * result from datumCopy(). See also datumTransfer().
41 : */
42 :
43 : #include "postgres.h"
44 :
45 : #include "access/detoast.h"
46 : #include "common/hashfn.h"
47 : #include "fmgr.h"
48 : #include "utils/datum.h"
49 : #include "utils/expandeddatum.h"
50 : #include "utils/fmgrprotos.h"
51 :
52 :
53 : /*-------------------------------------------------------------------------
54 : * datumGetSize
55 : *
56 : * Find the "real" size of a datum, given the datum value,
57 : * whether it is a "by value", and the declared type length.
58 : * (For TOAST pointer datums, this is the size of the pointer datum.)
59 : *
60 : * This is essentially an out-of-line version of the att_addlength_datum()
61 : * macro in access/tupmacs.h. We do a tad more error checking though.
62 : *-------------------------------------------------------------------------
63 : */
64 : Size
65 7370937 : datumGetSize(Datum value, bool typByVal, int typLen)
66 : {
67 : Size size;
68 :
69 7370937 : if (typByVal)
70 : {
71 : /* Pass-by-value types are always fixed-length */
72 : Assert(typLen > 0 && typLen <= sizeof(Datum));
73 1078723 : size = (Size) typLen;
74 : }
75 : else
76 : {
77 6292214 : if (typLen > 0)
78 : {
79 : /* Fixed-length pass-by-ref type */
80 4386453 : size = (Size) typLen;
81 : }
82 1905761 : else if (typLen == -1)
83 : {
84 : /* It is a varlena datatype */
85 1642931 : varlena *s = (varlena *) DatumGetPointer(value);
86 :
87 1642931 : if (!s)
88 0 : ereport(ERROR,
89 : (errcode(ERRCODE_DATA_EXCEPTION),
90 : errmsg("invalid Datum pointer")));
91 :
92 1642931 : size = (Size) VARSIZE_ANY(s);
93 : }
94 262830 : else if (typLen == -2)
95 : {
96 : /* It is a cstring datatype */
97 262830 : char *s = (char *) DatumGetPointer(value);
98 :
99 262830 : if (!s)
100 0 : ereport(ERROR,
101 : (errcode(ERRCODE_DATA_EXCEPTION),
102 : errmsg("invalid Datum pointer")));
103 :
104 262830 : size = (Size) (strlen(s) + 1);
105 : }
106 : else
107 : {
108 0 : elog(ERROR, "invalid typLen: %d", typLen);
109 : size = 0; /* keep compiler quiet */
110 : }
111 : }
112 :
113 7370937 : return size;
114 : }
115 :
116 : /*-------------------------------------------------------------------------
117 : * datumCopy
118 : *
119 : * Make a copy of a non-NULL datum.
120 : *
121 : * If the datatype is pass-by-reference, memory is obtained with palloc().
122 : *
123 : * If the value is a reference to an expanded object, we flatten into memory
124 : * obtained with palloc(). We need to copy because one of the main uses of
125 : * this function is to copy a datum out of a transient memory context that's
126 : * about to be destroyed, and the expanded object is probably in a child
127 : * context that will also go away. Moreover, many callers assume that the
128 : * result is a single pfree-able chunk.
129 : *-------------------------------------------------------------------------
130 : */
131 : Datum
132 16688449 : datumCopy(Datum value, bool typByVal, int typLen)
133 : {
134 : Datum res;
135 :
136 16688449 : if (typByVal)
137 8304746 : res = value;
138 8383703 : else if (typLen == -1)
139 : {
140 : /* It is a varlena datatype */
141 4686336 : varlena *vl = (varlena *) DatumGetPointer(value);
142 :
143 4686336 : if (VARATT_IS_EXTERNAL_EXPANDED(vl))
144 : {
145 : /* Flatten into the caller's memory context */
146 582 : ExpandedObjectHeader *eoh = DatumGetEOHP(value);
147 : Size resultsize;
148 : char *resultptr;
149 :
150 582 : resultsize = EOH_get_flat_size(eoh);
151 582 : resultptr = (char *) palloc(resultsize);
152 582 : EOH_flatten_into(eoh, resultptr, resultsize);
153 582 : res = PointerGetDatum(resultptr);
154 : }
155 : else
156 : {
157 : /* Otherwise, just copy the varlena datum verbatim */
158 : Size realSize;
159 : char *resultptr;
160 :
161 4685754 : realSize = (Size) VARSIZE_ANY(vl);
162 4685754 : resultptr = (char *) palloc(realSize);
163 4685754 : memcpy(resultptr, vl, realSize);
164 4685754 : res = PointerGetDatum(resultptr);
165 : }
166 : }
167 : else
168 : {
169 : /* Pass by reference, but not varlena, so not toasted */
170 : Size realSize;
171 : char *resultptr;
172 :
173 3697367 : realSize = datumGetSize(value, typByVal, typLen);
174 :
175 3697367 : resultptr = (char *) palloc(realSize);
176 3697367 : memcpy(resultptr, DatumGetPointer(value), realSize);
177 3697367 : res = PointerGetDatum(resultptr);
178 : }
179 16688449 : return res;
180 : }
181 :
182 : /*-------------------------------------------------------------------------
183 : * datumTransfer
184 : *
185 : * Transfer a non-NULL datum into the current memory context.
186 : *
187 : * This is equivalent to datumCopy() except when the datum is a read-write
188 : * pointer to an expanded object. In that case we merely reparent the object
189 : * into the current context, and return its standard R/W pointer (in case the
190 : * given one is a transient pointer of shorter lifespan).
191 : *-------------------------------------------------------------------------
192 : */
193 : Datum
194 77964 : datumTransfer(Datum value, bool typByVal, int typLen)
195 : {
196 153924 : if (!typByVal && typLen == -1 &&
197 75960 : VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value)))
198 2065 : value = TransferExpandedObject(value, CurrentMemoryContext);
199 : else
200 75899 : value = datumCopy(value, typByVal, typLen);
201 77964 : return value;
202 : }
203 :
204 : /*-------------------------------------------------------------------------
205 : * datumIsEqual
206 : *
207 : * Return true if two datums are equal, false otherwise
208 : *
209 : * NOTE: XXX!
210 : * We just compare the bytes of the two values, one by one.
211 : * This routine will return false if there are 2 different
212 : * representations of the same value (something along the lines
213 : * of say the representation of zero in one's complement arithmetic).
214 : * Also, it will probably not give the answer you want if either
215 : * datum has been "toasted".
216 : *
217 : * Do not try to make this any smarter than it currently is with respect
218 : * to "toasted" datums, because some of the callers could be working in the
219 : * context of an aborted transaction.
220 : *-------------------------------------------------------------------------
221 : */
222 : bool
223 2602342 : datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
224 : {
225 : bool res;
226 :
227 2602342 : if (typByVal)
228 : {
229 : /*
230 : * just compare the two datums. NOTE: just comparing "len" bytes will
231 : * not do the work, because we do not know how these bytes are aligned
232 : * inside the "Datum". We assume instead that any given datatype is
233 : * consistent about how it fills extraneous bits in the Datum.
234 : */
235 1860693 : res = (value1 == value2);
236 : }
237 : else
238 : {
239 : Size size1,
240 : size2;
241 : char *s1,
242 : *s2;
243 :
244 : /*
245 : * Compare the bytes pointed by the pointers stored in the datums.
246 : */
247 741649 : size1 = datumGetSize(value1, typByVal, typLen);
248 741649 : size2 = datumGetSize(value2, typByVal, typLen);
249 741649 : if (size1 != size2)
250 8430 : return false;
251 733219 : s1 = (char *) DatumGetPointer(value1);
252 733219 : s2 = (char *) DatumGetPointer(value2);
253 733219 : res = (memcmp(s1, s2, size1) == 0);
254 : }
255 2593912 : return res;
256 : }
257 :
258 : /*-------------------------------------------------------------------------
259 : * datum_image_eq
260 : *
261 : * Compares two datums for identical contents when coerced to a signed integer
262 : * of typLen bytes. Return true if the two datums are equal, false otherwise.
263 : *
264 : * The coercion is required as we're not always careful to use the correct
265 : * PG_RETURN_* macro. If we didn't do this, a Datum that's been formed and
266 : * deformed into a tuple may not have the same signed representation as the
267 : * other datum value.
268 : *-------------------------------------------------------------------------
269 : */
270 : bool
271 14245692 : datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
272 : {
273 : Size len1,
274 : len2;
275 14245692 : bool result = true;
276 :
277 14245692 : if (typByVal)
278 : {
279 12093010 : switch (typLen)
280 : {
281 12264 : case sizeof(char):
282 12264 : result = (DatumGetChar(value1) == DatumGetChar(value2));
283 12264 : break;
284 219718 : case sizeof(int16):
285 219718 : result = (DatumGetInt16(value1) == DatumGetInt16(value2));
286 219718 : break;
287 11655068 : case sizeof(int32):
288 11655068 : result = (DatumGetInt32(value1) == DatumGetInt32(value2));
289 11655068 : break;
290 205960 : default:
291 205960 : result = (value1 == value2);
292 205960 : break;
293 : }
294 : }
295 2152682 : else if (typLen > 0)
296 : {
297 1600722 : result = (memcmp(DatumGetPointer(value1),
298 1600722 : DatumGetPointer(value2),
299 : typLen) == 0);
300 : }
301 551960 : else if (typLen == -1)
302 : {
303 215575 : len1 = toast_raw_datum_size(value1);
304 215575 : len2 = toast_raw_datum_size(value2);
305 : /* No need to de-toast if lengths don't match. */
306 215575 : if (len1 != len2)
307 14282 : result = false;
308 : else
309 : {
310 : varlena *arg1val;
311 : varlena *arg2val;
312 :
313 201293 : arg1val = PG_DETOAST_DATUM_PACKED(value1);
314 201293 : arg2val = PG_DETOAST_DATUM_PACKED(value2);
315 :
316 201293 : result = (memcmp(VARDATA_ANY(arg1val),
317 201293 : VARDATA_ANY(arg2val),
318 : len1 - VARHDRSZ) == 0);
319 :
320 : /* Only free memory if it's a copy made here. */
321 201293 : if (arg1val != DatumGetPointer(value1))
322 4 : pfree(arg1val);
323 201293 : if (arg2val != DatumGetPointer(value2))
324 4 : pfree(arg2val);
325 : }
326 : }
327 336385 : else if (typLen == -2)
328 : {
329 : char *s1,
330 : *s2;
331 :
332 : /* Compare cstring datums */
333 336385 : s1 = DatumGetCString(value1);
334 336385 : s2 = DatumGetCString(value2);
335 336385 : len1 = strlen(s1) + 1;
336 336385 : len2 = strlen(s2) + 1;
337 336385 : if (len1 != len2)
338 180669 : return false;
339 155716 : result = (memcmp(s1, s2, len1) == 0);
340 : }
341 : else
342 0 : elog(ERROR, "unexpected typLen: %d", typLen);
343 :
344 14065023 : return result;
345 : }
346 :
347 : /*-------------------------------------------------------------------------
348 : * datum_image_hash
349 : *
350 : * Generate a hash value based on the binary representation of 'value' when
351 : * represented as a signed integer of typLen bytes. Most use cases will want
352 : * to use the hash function specific to the Datum's type, however, some corner
353 : * cases require generating a hash value based on the actual bits rather than
354 : * the logical value.
355 : *-------------------------------------------------------------------------
356 : */
357 : uint32
358 80826 : datum_image_hash(Datum value, bool typByVal, int typLen)
359 : {
360 : Size len;
361 : uint32 result;
362 :
363 80826 : if (typByVal)
364 : {
365 80690 : switch (typLen)
366 : {
367 0 : case sizeof(char):
368 0 : value = CharGetDatum(DatumGetChar(value));
369 0 : break;
370 0 : case sizeof(int16):
371 0 : value = Int16GetDatum(DatumGetInt16(value));
372 0 : break;
373 80658 : case sizeof(int32):
374 80658 : value = Int32GetDatum(DatumGetInt32(value));
375 80658 : break;
376 : /* Nothing needs done for 64-bit types */
377 : }
378 :
379 80690 : result = hash_bytes((unsigned char *) &value, sizeof(Datum));
380 : }
381 136 : else if (typLen > 0)
382 24 : result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen);
383 112 : else if (typLen == -1)
384 : {
385 : varlena *val;
386 :
387 112 : len = toast_raw_datum_size(value);
388 :
389 112 : val = PG_DETOAST_DATUM_PACKED(value);
390 :
391 112 : result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ);
392 :
393 : /* Only free memory if it's a copy made here. */
394 112 : if (val != DatumGetPointer(value))
395 8 : pfree(val);
396 : }
397 0 : else if (typLen == -2)
398 : {
399 : char *s;
400 :
401 0 : s = DatumGetCString(value);
402 0 : len = strlen(s) + 1;
403 :
404 0 : result = hash_bytes((unsigned char *) s, len);
405 : }
406 : else
407 : {
408 0 : elog(ERROR, "unexpected typLen: %d", typLen);
409 : result = 0; /* keep compiler quiet */
410 : }
411 :
412 80826 : return result;
413 : }
414 :
415 : /*-------------------------------------------------------------------------
416 : * btequalimage
417 : *
418 : * Generic "equalimage" support function.
419 : *
420 : * B-Tree operator classes whose equality function could safely be replaced by
421 : * datum_image_eq() in all cases can use this as their "equalimage" support
422 : * function.
423 : *
424 : * Currently, we unconditionally assume that any B-Tree operator class that
425 : * registers btequalimage as its support function 4 must be able to safely use
426 : * optimizations like deduplication (i.e. we return true unconditionally). If
427 : * it ever proved necessary to rescind support for an operator class, we could
428 : * do that in a targeted fashion by doing something with the opcintype
429 : * argument.
430 : *-------------------------------------------------------------------------
431 : */
432 : Datum
433 55186 : btequalimage(PG_FUNCTION_ARGS)
434 : {
435 : #ifdef NOT_USED
436 : Oid opcintype = PG_GETARG_OID(0);
437 : #endif
438 :
439 55186 : PG_RETURN_BOOL(true);
440 : }
441 :
442 : /*-------------------------------------------------------------------------
443 : * datumEstimateSpace
444 : *
445 : * Compute the amount of space that datumSerialize will require for a
446 : * particular Datum.
447 : *-------------------------------------------------------------------------
448 : */
449 : Size
450 100 : datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
451 : {
452 100 : Size sz = sizeof(int);
453 :
454 100 : if (!isnull)
455 : {
456 : /* no need to use add_size, can't overflow */
457 100 : if (typByVal)
458 88 : sz += sizeof(Datum);
459 24 : else if (typLen == -1 &&
460 12 : VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value)))
461 : {
462 : /* Expanded objects need to be flattened, see comment below */
463 4 : sz += EOH_get_flat_size(DatumGetEOHP(value));
464 : }
465 : else
466 8 : sz += datumGetSize(value, typByVal, typLen);
467 : }
468 :
469 100 : return sz;
470 : }
471 :
472 : /*-------------------------------------------------------------------------
473 : * datumSerialize
474 : *
475 : * Serialize a possibly-NULL datum into caller-provided storage.
476 : *
477 : * Note: "expanded" objects are flattened so as to produce a self-contained
478 : * representation, but other sorts of toast pointers are transferred as-is.
479 : * This is because the intended use of this function is to pass the value
480 : * to another process within the same database server. The other process
481 : * could not access an "expanded" object within this process's memory, but
482 : * we assume it can dereference the same TOAST pointers this one can.
483 : *
484 : * The format is as follows: first, we write a 4-byte header word, which
485 : * is either the length of a pass-by-reference datum, -1 for a
486 : * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing
487 : * further is written. If it is pass-by-value, sizeof(Datum) bytes
488 : * follow. Otherwise, the number of bytes indicated by the header word
489 : * follow. The caller is responsible for ensuring that there is enough
490 : * storage to store the number of bytes that will be written; use
491 : * datumEstimateSpace() to find out how many will be needed.
492 : * *start_address is updated to point to the byte immediately following
493 : * those written.
494 : *-------------------------------------------------------------------------
495 : */
496 : void
497 76 : datumSerialize(Datum value, bool isnull, bool typByVal, int typLen,
498 : char **start_address)
499 : {
500 76 : ExpandedObjectHeader *eoh = NULL;
501 : int header;
502 :
503 : /* Write header word. */
504 76 : if (isnull)
505 0 : header = -2;
506 76 : else if (typByVal)
507 64 : header = -1;
508 24 : else if (typLen == -1 &&
509 12 : VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value)))
510 : {
511 4 : eoh = DatumGetEOHP(value);
512 4 : header = EOH_get_flat_size(eoh);
513 : }
514 : else
515 8 : header = datumGetSize(value, typByVal, typLen);
516 76 : memcpy(*start_address, &header, sizeof(int));
517 76 : *start_address += sizeof(int);
518 :
519 : /* If not null, write payload bytes. */
520 76 : if (!isnull)
521 : {
522 76 : if (typByVal)
523 : {
524 64 : memcpy(*start_address, &value, sizeof(Datum));
525 64 : *start_address += sizeof(Datum);
526 : }
527 12 : else if (eoh)
528 : {
529 : char *tmp;
530 :
531 : /*
532 : * EOH_flatten_into expects the target address to be maxaligned,
533 : * so we can't store directly to *start_address.
534 : */
535 4 : tmp = (char *) palloc(header);
536 4 : EOH_flatten_into(eoh, tmp, header);
537 4 : memcpy(*start_address, tmp, header);
538 4 : *start_address += header;
539 :
540 : /* be tidy. */
541 4 : pfree(tmp);
542 : }
543 : else
544 : {
545 8 : memcpy(*start_address, DatumGetPointer(value), header);
546 8 : *start_address += header;
547 : }
548 : }
549 76 : }
550 :
551 : /*-------------------------------------------------------------------------
552 : * datumRestore
553 : *
554 : * Restore a possibly-NULL datum previously serialized by datumSerialize.
555 : * *start_address is updated according to the number of bytes consumed.
556 : *-------------------------------------------------------------------------
557 : */
558 : Datum
559 178 : datumRestore(char **start_address, bool *isnull)
560 : {
561 : int header;
562 : void *d;
563 :
564 : /* Read header word. */
565 178 : memcpy(&header, *start_address, sizeof(int));
566 178 : *start_address += sizeof(int);
567 :
568 : /* If this datum is NULL, we can stop here. */
569 178 : if (header == -2)
570 : {
571 0 : *isnull = true;
572 0 : return (Datum) 0;
573 : }
574 :
575 : /* OK, datum is not null. */
576 178 : *isnull = false;
577 :
578 : /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */
579 178 : if (header == -1)
580 : {
581 : Datum val;
582 :
583 138 : memcpy(&val, *start_address, sizeof(Datum));
584 138 : *start_address += sizeof(Datum);
585 138 : return val;
586 : }
587 :
588 : /* Pass-by-reference case; copy indicated number of bytes. */
589 : Assert(header > 0);
590 40 : d = palloc(header);
591 40 : memcpy(d, *start_address, header);
592 40 : *start_address += header;
593 40 : return PointerGetDatum(d);
594 : }
|