LCOV - code coverage report
Current view: top level - src/backend/utils/adt - datum.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 143 155 92.3 %
Date: 2025-01-18 05:15:39 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * datum.c
       4             :  *    POSTGRES Datum (abstract data type) manipulation routines.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/utils/adt/datum.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : 
      16             : /*
      17             :  * In the implementation of these routines we assume the following:
      18             :  *
      19             :  * A) if a type is "byVal" then all the information is stored in the
      20             :  * Datum itself (i.e. no pointers involved!). In this case the
      21             :  * length of the type is always greater than zero and not more than
      22             :  * "sizeof(Datum)"
      23             :  *
      24             :  * B) if a type is not "byVal" and it has a fixed length (typlen > 0),
      25             :  * then the "Datum" always contains a pointer to a stream of bytes.
      26             :  * The number of significant bytes are always equal to the typlen.
      27             :  *
      28             :  * C) if a type is not "byVal" and has typlen == -1,
      29             :  * then the "Datum" always points to a "struct varlena".
      30             :  * This varlena structure has information about the actual length of this
      31             :  * particular instance of the type and about its value.
      32             :  *
      33             :  * D) if a type is not "byVal" and has typlen == -2,
      34             :  * then the "Datum" always points to a null-terminated C string.
      35             :  *
      36             :  * Note that we do not treat "toasted" datums specially; therefore what
      37             :  * will be copied or compared is the compressed data or toast reference.
      38             :  * An exception is made for datumCopy() of an expanded object, however,
      39             :  * because most callers expect to get a simple contiguous (and pfree'able)
      40             :  * result from datumCopy().  See also datumTransfer().
      41             :  */
      42             : 
      43             : #include "postgres.h"
      44             : 
      45             : #include "access/detoast.h"
      46             : #include "common/hashfn.h"
      47             : #include "fmgr.h"
      48             : #include "utils/datum.h"
      49             : #include "utils/expandeddatum.h"
      50             : #include "utils/fmgrprotos.h"
      51             : 
      52             : 
      53             : /*-------------------------------------------------------------------------
      54             :  * datumGetSize
      55             :  *
      56             :  * Find the "real" size of a datum, given the datum value,
      57             :  * whether it is a "by value", and the declared type length.
      58             :  * (For TOAST pointer datums, this is the size of the pointer datum.)
      59             :  *
      60             :  * This is essentially an out-of-line version of the att_addlength_datum()
      61             :  * macro in access/tupmacs.h.  We do a tad more error checking though.
      62             :  *-------------------------------------------------------------------------
      63             :  */
      64             : Size
      65    10318594 : datumGetSize(Datum value, bool typByVal, int typLen)
      66             : {
      67             :     Size        size;
      68             : 
      69    10318594 :     if (typByVal)
      70             :     {
      71             :         /* Pass-by-value types are always fixed-length */
      72             :         Assert(typLen > 0 && typLen <= sizeof(Datum));
      73     1551324 :         size = (Size) typLen;
      74             :     }
      75             :     else
      76             :     {
      77     8767270 :         if (typLen > 0)
      78             :         {
      79             :             /* Fixed-length pass-by-ref type */
      80     6457450 :             size = (Size) typLen;
      81             :         }
      82     2309820 :         else if (typLen == -1)
      83             :         {
      84             :             /* It is a varlena datatype */
      85     2276616 :             struct varlena *s = (struct varlena *) DatumGetPointer(value);
      86             : 
      87     2276616 :             if (!PointerIsValid(s))
      88           0 :                 ereport(ERROR,
      89             :                         (errcode(ERRCODE_DATA_EXCEPTION),
      90             :                          errmsg("invalid Datum pointer")));
      91             : 
      92     2276616 :             size = (Size) VARSIZE_ANY(s);
      93             :         }
      94       33204 :         else if (typLen == -2)
      95             :         {
      96             :             /* It is a cstring datatype */
      97       33204 :             char       *s = (char *) DatumGetPointer(value);
      98             : 
      99       33204 :             if (!PointerIsValid(s))
     100           0 :                 ereport(ERROR,
     101             :                         (errcode(ERRCODE_DATA_EXCEPTION),
     102             :                          errmsg("invalid Datum pointer")));
     103             : 
     104       33204 :             size = (Size) (strlen(s) + 1);
     105             :         }
     106             :         else
     107             :         {
     108           0 :             elog(ERROR, "invalid typLen: %d", typLen);
     109             :             size = 0;           /* keep compiler quiet */
     110             :         }
     111             :     }
     112             : 
     113    10318594 :     return size;
     114             : }
     115             : 
     116             : /*-------------------------------------------------------------------------
     117             :  * datumCopy
     118             :  *
     119             :  * Make a copy of a non-NULL datum.
     120             :  *
     121             :  * If the datatype is pass-by-reference, memory is obtained with palloc().
     122             :  *
     123             :  * If the value is a reference to an expanded object, we flatten into memory
     124             :  * obtained with palloc().  We need to copy because one of the main uses of
     125             :  * this function is to copy a datum out of a transient memory context that's
     126             :  * about to be destroyed, and the expanded object is probably in a child
     127             :  * context that will also go away.  Moreover, many callers assume that the
     128             :  * result is a single pfree-able chunk.
     129             :  *-------------------------------------------------------------------------
     130             :  */
     131             : Datum
     132    24043426 : datumCopy(Datum value, bool typByVal, int typLen)
     133             : {
     134             :     Datum       res;
     135             : 
     136    24043426 :     if (typByVal)
     137    12077640 :         res = value;
     138    11965786 :     else if (typLen == -1)
     139             :     {
     140             :         /* It is a varlena datatype */
     141     6638536 :         struct varlena *vl = (struct varlena *) DatumGetPointer(value);
     142             : 
     143     6638536 :         if (VARATT_IS_EXTERNAL_EXPANDED(vl))
     144         878 :         {
     145             :             /* Flatten into the caller's memory context */
     146         878 :             ExpandedObjectHeader *eoh = DatumGetEOHP(value);
     147             :             Size        resultsize;
     148             :             char       *resultptr;
     149             : 
     150         878 :             resultsize = EOH_get_flat_size(eoh);
     151         878 :             resultptr = (char *) palloc(resultsize);
     152         878 :             EOH_flatten_into(eoh, resultptr, resultsize);
     153         878 :             res = PointerGetDatum(resultptr);
     154             :         }
     155             :         else
     156             :         {
     157             :             /* Otherwise, just copy the varlena datum verbatim */
     158             :             Size        realSize;
     159             :             char       *resultptr;
     160             : 
     161     6637658 :             realSize = (Size) VARSIZE_ANY(vl);
     162     6637658 :             resultptr = (char *) palloc(realSize);
     163     6637658 :             memcpy(resultptr, vl, realSize);
     164     6637658 :             res = PointerGetDatum(resultptr);
     165             :         }
     166             :     }
     167             :     else
     168             :     {
     169             :         /* Pass by reference, but not varlena, so not toasted */
     170             :         Size        realSize;
     171             :         char       *resultptr;
     172             : 
     173     5327250 :         realSize = datumGetSize(value, typByVal, typLen);
     174             : 
     175     5327250 :         resultptr = (char *) palloc(realSize);
     176     5327250 :         memcpy(resultptr, DatumGetPointer(value), realSize);
     177     5327250 :         res = PointerGetDatum(resultptr);
     178             :     }
     179    24043426 :     return res;
     180             : }
     181             : 
     182             : /*-------------------------------------------------------------------------
     183             :  * datumTransfer
     184             :  *
     185             :  * Transfer a non-NULL datum into the current memory context.
     186             :  *
     187             :  * This is equivalent to datumCopy() except when the datum is a read-write
     188             :  * pointer to an expanded object.  In that case we merely reparent the object
     189             :  * into the current context, and return its standard R/W pointer (in case the
     190             :  * given one is a transient pointer of shorter lifespan).
     191             :  *-------------------------------------------------------------------------
     192             :  */
     193             : Datum
     194      104354 : datumTransfer(Datum value, bool typByVal, int typLen)
     195             : {
     196      104354 :     if (!typByVal && typLen == -1 &&
     197      101714 :         VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value)))
     198        3744 :         value = TransferExpandedObject(value, CurrentMemoryContext);
     199             :     else
     200      100610 :         value = datumCopy(value, typByVal, typLen);
     201      104354 :     return value;
     202             : }
     203             : 
     204             : /*-------------------------------------------------------------------------
     205             :  * datumIsEqual
     206             :  *
     207             :  * Return true if two datums are equal, false otherwise
     208             :  *
     209             :  * NOTE: XXX!
     210             :  * We just compare the bytes of the two values, one by one.
     211             :  * This routine will return false if there are 2 different
     212             :  * representations of the same value (something along the lines
     213             :  * of say the representation of zero in one's complement arithmetic).
     214             :  * Also, it will probably not give the answer you want if either
     215             :  * datum has been "toasted".
     216             :  *
     217             :  * Do not try to make this any smarter than it currently is with respect
     218             :  * to "toasted" datums, because some of the callers could be working in the
     219             :  * context of an aborted transaction.
     220             :  *-------------------------------------------------------------------------
     221             :  */
     222             : bool
     223     3489394 : datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
     224             : {
     225             :     bool        res;
     226             : 
     227     3489394 :     if (typByVal)
     228             :     {
     229             :         /*
     230             :          * just compare the two datums. NOTE: just comparing "len" bytes will
     231             :          * not do the work, because we do not know how these bytes are aligned
     232             :          * inside the "Datum".  We assume instead that any given datatype is
     233             :          * consistent about how it fills extraneous bits in the Datum.
     234             :          */
     235     2529836 :         res = (value1 == value2);
     236             :     }
     237             :     else
     238             :     {
     239             :         Size        size1,
     240             :                     size2;
     241             :         char       *s1,
     242             :                    *s2;
     243             : 
     244             :         /*
     245             :          * Compare the bytes pointed by the pointers stored in the datums.
     246             :          */
     247      959558 :         size1 = datumGetSize(value1, typByVal, typLen);
     248      959558 :         size2 = datumGetSize(value2, typByVal, typLen);
     249      959558 :         if (size1 != size2)
     250       13458 :             return false;
     251      946100 :         s1 = (char *) DatumGetPointer(value1);
     252      946100 :         s2 = (char *) DatumGetPointer(value2);
     253      946100 :         res = (memcmp(s1, s2, size1) == 0);
     254             :     }
     255     3475936 :     return res;
     256             : }
     257             : 
     258             : /*-------------------------------------------------------------------------
     259             :  * datum_image_eq
     260             :  *
     261             :  * Compares two datums for identical contents, based on byte images.  Return
     262             :  * true if the two datums are equal, false otherwise.
     263             :  *-------------------------------------------------------------------------
     264             :  */
     265             : bool
     266    19131974 : datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
     267             : {
     268             :     Size        len1,
     269             :                 len2;
     270    19131974 :     bool        result = true;
     271             : 
     272    19131974 :     if (typByVal)
     273             :     {
     274    15973862 :         result = (value1 == value2);
     275             :     }
     276     3158112 :     else if (typLen > 0)
     277             :     {
     278     2401080 :         result = (memcmp(DatumGetPointer(value1),
     279     2401080 :                          DatumGetPointer(value2),
     280             :                          typLen) == 0);
     281             :     }
     282      757032 :     else if (typLen == -1)
     283             :     {
     284      322644 :         len1 = toast_raw_datum_size(value1);
     285      322644 :         len2 = toast_raw_datum_size(value2);
     286             :         /* No need to de-toast if lengths don't match. */
     287      322644 :         if (len1 != len2)
     288       21188 :             result = false;
     289             :         else
     290             :         {
     291             :             struct varlena *arg1val;
     292             :             struct varlena *arg2val;
     293             : 
     294      301456 :             arg1val = PG_DETOAST_DATUM_PACKED(value1);
     295      301456 :             arg2val = PG_DETOAST_DATUM_PACKED(value2);
     296             : 
     297      301456 :             result = (memcmp(VARDATA_ANY(arg1val),
     298      301456 :                              VARDATA_ANY(arg2val),
     299             :                              len1 - VARHDRSZ) == 0);
     300             : 
     301             :             /* Only free memory if it's a copy made here. */
     302      301456 :             if ((Pointer) arg1val != (Pointer) value1)
     303           6 :                 pfree(arg1val);
     304      301456 :             if ((Pointer) arg2val != (Pointer) value2)
     305           6 :                 pfree(arg2val);
     306             :         }
     307             :     }
     308      434388 :     else if (typLen == -2)
     309             :     {
     310             :         char       *s1,
     311             :                    *s2;
     312             : 
     313             :         /* Compare cstring datums */
     314      434388 :         s1 = DatumGetCString(value1);
     315      434388 :         s2 = DatumGetCString(value2);
     316      434388 :         len1 = strlen(s1) + 1;
     317      434388 :         len2 = strlen(s2) + 1;
     318      434388 :         if (len1 != len2)
     319      225148 :             return false;
     320      209240 :         result = (memcmp(s1, s2, len1) == 0);
     321             :     }
     322             :     else
     323           0 :         elog(ERROR, "unexpected typLen: %d", typLen);
     324             : 
     325    18906826 :     return result;
     326             : }
     327             : 
     328             : /*-------------------------------------------------------------------------
     329             :  * datum_image_hash
     330             :  *
     331             :  * Generate a hash value based on the binary representation of 'value'.  Most
     332             :  * use cases will want to use the hash function specific to the Datum's type,
     333             :  * however, some corner cases require generating a hash value based on the
     334             :  * actual bits rather than the logical value.
     335             :  *-------------------------------------------------------------------------
     336             :  */
     337             : uint32
     338      118172 : datum_image_hash(Datum value, bool typByVal, int typLen)
     339             : {
     340             :     Size        len;
     341             :     uint32      result;
     342             : 
     343      118172 :     if (typByVal)
     344      117968 :         result = hash_bytes((unsigned char *) &value, sizeof(Datum));
     345         204 :     else if (typLen > 0)
     346          36 :         result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen);
     347         168 :     else if (typLen == -1)
     348             :     {
     349             :         struct varlena *val;
     350             : 
     351         168 :         len = toast_raw_datum_size(value);
     352             : 
     353         168 :         val = PG_DETOAST_DATUM_PACKED(value);
     354             : 
     355         168 :         result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ);
     356             : 
     357             :         /* Only free memory if it's a copy made here. */
     358         168 :         if ((Pointer) val != (Pointer) value)
     359          12 :             pfree(val);
     360             :     }
     361           0 :     else if (typLen == -2)
     362             :     {
     363             :         char       *s;
     364             : 
     365           0 :         s = DatumGetCString(value);
     366           0 :         len = strlen(s) + 1;
     367             : 
     368           0 :         result = hash_bytes((unsigned char *) s, len);
     369             :     }
     370             :     else
     371             :     {
     372           0 :         elog(ERROR, "unexpected typLen: %d", typLen);
     373             :         result = 0;             /* keep compiler quiet */
     374             :     }
     375             : 
     376      118172 :     return result;
     377             : }
     378             : 
     379             : /*-------------------------------------------------------------------------
     380             :  * btequalimage
     381             :  *
     382             :  * Generic "equalimage" support function.
     383             :  *
     384             :  * B-Tree operator classes whose equality function could safely be replaced by
     385             :  * datum_image_eq() in all cases can use this as their "equalimage" support
     386             :  * function.
     387             :  *
     388             :  * Currently, we unconditionally assume that any B-Tree operator class that
     389             :  * registers btequalimage as its support function 4 must be able to safely use
     390             :  * optimizations like deduplication (i.e. we return true unconditionally).  If
     391             :  * it ever proved necessary to rescind support for an operator class, we could
     392             :  * do that in a targeted fashion by doing something with the opcintype
     393             :  * argument.
     394             :  *-------------------------------------------------------------------------
     395             :  */
     396             : Datum
     397       83356 : btequalimage(PG_FUNCTION_ARGS)
     398             : {
     399             :     /* Oid      opcintype = PG_GETARG_OID(0); */
     400             : 
     401       83356 :     PG_RETURN_BOOL(true);
     402             : }
     403             : 
     404             : /*-------------------------------------------------------------------------
     405             :  * datumEstimateSpace
     406             :  *
     407             :  * Compute the amount of space that datumSerialize will require for a
     408             :  * particular Datum.
     409             :  *-------------------------------------------------------------------------
     410             :  */
     411             : Size
     412         114 : datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
     413             : {
     414         114 :     Size        sz = sizeof(int);
     415             : 
     416         114 :     if (!isnull)
     417             :     {
     418             :         /* no need to use add_size, can't overflow */
     419         114 :         if (typByVal)
     420          96 :             sz += sizeof(Datum);
     421          18 :         else if (typLen == -1 &&
     422          18 :                  VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value)))
     423             :         {
     424             :             /* Expanded objects need to be flattened, see comment below */
     425           6 :             sz += EOH_get_flat_size(DatumGetEOHP(value));
     426             :         }
     427             :         else
     428          12 :             sz += datumGetSize(value, typByVal, typLen);
     429             :     }
     430             : 
     431         114 :     return sz;
     432             : }
     433             : 
     434             : /*-------------------------------------------------------------------------
     435             :  * datumSerialize
     436             :  *
     437             :  * Serialize a possibly-NULL datum into caller-provided storage.
     438             :  *
     439             :  * Note: "expanded" objects are flattened so as to produce a self-contained
     440             :  * representation, but other sorts of toast pointers are transferred as-is.
     441             :  * This is because the intended use of this function is to pass the value
     442             :  * to another process within the same database server.  The other process
     443             :  * could not access an "expanded" object within this process's memory, but
     444             :  * we assume it can dereference the same TOAST pointers this one can.
     445             :  *
     446             :  * The format is as follows: first, we write a 4-byte header word, which
     447             :  * is either the length of a pass-by-reference datum, -1 for a
     448             :  * pass-by-value datum, or -2 for a NULL.  If the value is NULL, nothing
     449             :  * further is written.  If it is pass-by-value, sizeof(Datum) bytes
     450             :  * follow.  Otherwise, the number of bytes indicated by the header word
     451             :  * follow.  The caller is responsible for ensuring that there is enough
     452             :  * storage to store the number of bytes that will be written; use
     453             :  * datumEstimateSpace() to find out how many will be needed.
     454             :  * *start_address is updated to point to the byte immediately following
     455             :  * those written.
     456             :  *-------------------------------------------------------------------------
     457             :  */
     458             : void
     459         114 : datumSerialize(Datum value, bool isnull, bool typByVal, int typLen,
     460             :                char **start_address)
     461             : {
     462         114 :     ExpandedObjectHeader *eoh = NULL;
     463             :     int         header;
     464             : 
     465             :     /* Write header word. */
     466         114 :     if (isnull)
     467           0 :         header = -2;
     468         114 :     else if (typByVal)
     469          96 :         header = -1;
     470          18 :     else if (typLen == -1 &&
     471          18 :              VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value)))
     472             :     {
     473           6 :         eoh = DatumGetEOHP(value);
     474           6 :         header = EOH_get_flat_size(eoh);
     475             :     }
     476             :     else
     477          12 :         header = datumGetSize(value, typByVal, typLen);
     478         114 :     memcpy(*start_address, &header, sizeof(int));
     479         114 :     *start_address += sizeof(int);
     480             : 
     481             :     /* If not null, write payload bytes. */
     482         114 :     if (!isnull)
     483             :     {
     484         114 :         if (typByVal)
     485             :         {
     486          96 :             memcpy(*start_address, &value, sizeof(Datum));
     487          96 :             *start_address += sizeof(Datum);
     488             :         }
     489          18 :         else if (eoh)
     490             :         {
     491             :             char       *tmp;
     492             : 
     493             :             /*
     494             :              * EOH_flatten_into expects the target address to be maxaligned,
     495             :              * so we can't store directly to *start_address.
     496             :              */
     497           6 :             tmp = (char *) palloc(header);
     498           6 :             EOH_flatten_into(eoh, tmp, header);
     499           6 :             memcpy(*start_address, tmp, header);
     500           6 :             *start_address += header;
     501             : 
     502             :             /* be tidy. */
     503           6 :             pfree(tmp);
     504             :         }
     505             :         else
     506             :         {
     507          12 :             memcpy(*start_address, DatumGetPointer(value), header);
     508          12 :             *start_address += header;
     509             :         }
     510             :     }
     511         114 : }
     512             : 
     513             : /*-------------------------------------------------------------------------
     514             :  * datumRestore
     515             :  *
     516             :  * Restore a possibly-NULL datum previously serialized by datumSerialize.
     517             :  * *start_address is updated according to the number of bytes consumed.
     518             :  *-------------------------------------------------------------------------
     519             :  */
     520             : Datum
     521         276 : datumRestore(char **start_address, bool *isnull)
     522             : {
     523             :     int         header;
     524             :     void       *d;
     525             : 
     526             :     /* Read header word. */
     527         276 :     memcpy(&header, *start_address, sizeof(int));
     528         276 :     *start_address += sizeof(int);
     529             : 
     530             :     /* If this datum is NULL, we can stop here. */
     531         276 :     if (header == -2)
     532             :     {
     533           0 :         *isnull = true;
     534           0 :         return (Datum) 0;
     535             :     }
     536             : 
     537             :     /* OK, datum is not null. */
     538         276 :     *isnull = false;
     539             : 
     540             :     /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */
     541         276 :     if (header == -1)
     542             :     {
     543             :         Datum       val;
     544             : 
     545         216 :         memcpy(&val, *start_address, sizeof(Datum));
     546         216 :         *start_address += sizeof(Datum);
     547         216 :         return val;
     548             :     }
     549             : 
     550             :     /* Pass-by-reference case; copy indicated number of bytes. */
     551             :     Assert(header > 0);
     552          60 :     d = palloc(header);
     553          60 :     memcpy(d, *start_address, header);
     554          60 :     *start_address += header;
     555          60 :     return PointerGetDatum(d);
     556             : }

Generated by: LCOV version 1.14