LCOV - code coverage report
Current view: top level - src/include/utils - jsonb.h (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 6 6 100.0 %
Date: 2024-04-19 17:11:40 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * jsonb.h
       4             :  *    Declarations for jsonb data type support.
       5             :  *
       6             :  * Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  *
       8             :  * src/include/utils/jsonb.h
       9             :  *
      10             :  *-------------------------------------------------------------------------
      11             :  */
      12             : #ifndef __JSONB_H__
      13             : #define __JSONB_H__
      14             : 
      15             : #include "lib/stringinfo.h"
      16             : #include "utils/array.h"
      17             : #include "utils/numeric.h"
      18             : 
      19             : /* Tokens used when sequentially processing a jsonb value */
      20             : typedef enum
      21             : {
      22             :     WJB_DONE,
      23             :     WJB_KEY,
      24             :     WJB_VALUE,
      25             :     WJB_ELEM,
      26             :     WJB_BEGIN_ARRAY,
      27             :     WJB_END_ARRAY,
      28             :     WJB_BEGIN_OBJECT,
      29             :     WJB_END_OBJECT,
      30             : } JsonbIteratorToken;
      31             : 
      32             : /* Strategy numbers for GIN index opclasses */
      33             : #define JsonbContainsStrategyNumber     7
      34             : #define JsonbExistsStrategyNumber       9
      35             : #define JsonbExistsAnyStrategyNumber    10
      36             : #define JsonbExistsAllStrategyNumber    11
      37             : #define JsonbJsonpathExistsStrategyNumber       15
      38             : #define JsonbJsonpathPredicateStrategyNumber    16
      39             : 
      40             : 
      41             : /*
      42             :  * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
      43             :  * keys and values.  The storage format is text.  The first byte of the text
      44             :  * string distinguishes whether this is a key (always a string), null value,
      45             :  * boolean value, numeric value, or string value.  However, array elements
      46             :  * that are strings are marked as though they were keys; this imprecision
      47             :  * supports the definition of the "exists" operator, which treats array
      48             :  * elements like keys.  The remainder of the text string is empty for a null
      49             :  * value, "t" or "f" for a boolean value, a normalized print representation of
      50             :  * a numeric value, or the text of a string value.  However, if the length of
      51             :  * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
      52             :  * the text representation and store an 8-hex-digit representation of the
      53             :  * uint32 hash value, marking the prefix byte with an additional bit to
      54             :  * distinguish that this has happened.  Hashing long strings saves space and
      55             :  * ensures that we won't overrun the maximum entry length for a GIN index.
      56             :  * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit.  It's chosen
      57             :  * to ensure that the on-disk text datum will have a short varlena header.)
      58             :  * Note that when any hashed item appears in a query, we must recheck index
      59             :  * matches against the heap tuple; currently, this costs nothing because we
      60             :  * must always recheck for other reasons.
      61             :  */
      62             : #define JGINFLAG_KEY    0x01    /* key (or string array element) */
      63             : #define JGINFLAG_NULL   0x02    /* null value */
      64             : #define JGINFLAG_BOOL   0x03    /* boolean value */
      65             : #define JGINFLAG_NUM    0x04    /* numeric value */
      66             : #define JGINFLAG_STR    0x05    /* string value (if not an array element) */
      67             : #define JGINFLAG_HASHED 0x10    /* OR'd into flag if value was hashed */
      68             : #define JGIN_MAXLENGTH  125     /* max length of text part before hashing */
      69             : 
      70             : typedef struct JsonbPair JsonbPair;
      71             : typedef struct JsonbValue JsonbValue;
      72             : 
      73             : /*
      74             :  * Jsonbs are varlena objects, so must meet the varlena convention that the
      75             :  * first int32 of the object contains the total object size in bytes.  Be sure
      76             :  * to use VARSIZE() and SET_VARSIZE() to access it, though!
      77             :  *
      78             :  * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
      79             :  * representation.  Often, JsonbValues are just shims through which a Jsonb
      80             :  * buffer is accessed, but they can also be deep copied and passed around.
      81             :  *
      82             :  * Jsonb is a tree structure. Each node in the tree consists of a JEntry
      83             :  * header and a variable-length content (possibly of zero size).  The JEntry
      84             :  * header indicates what kind of a node it is, e.g. a string or an array,
      85             :  * and provides the length of its variable-length portion.
      86             :  *
      87             :  * The JEntry and the content of a node are not stored physically together.
      88             :  * Instead, the container array or object has an array that holds the JEntrys
      89             :  * of all the child nodes, followed by their variable-length portions.
      90             :  *
      91             :  * The root node is an exception; it has no parent array or object that could
      92             :  * hold its JEntry. Hence, no JEntry header is stored for the root node.  It
      93             :  * is implicitly known that the root node must be an array or an object,
      94             :  * so we can get away without the type indicator as long as we can distinguish
      95             :  * the two.  For that purpose, both an array and an object begin with a uint32
      96             :  * header field, which contains an JB_FOBJECT or JB_FARRAY flag.  When a naked
      97             :  * scalar value needs to be stored as a Jsonb value, what we actually store is
      98             :  * an array with one element, with the flags in the array's header field set
      99             :  * to JB_FSCALAR | JB_FARRAY.
     100             :  *
     101             :  * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
     102             :  * the variable-length portion of some node types is aligned to a 4-byte
     103             :  * boundary, while others are not. When alignment is needed, the padding is
     104             :  * in the beginning of the node that requires it. For example, if a numeric
     105             :  * node is stored after a string node, so that the numeric node begins at
     106             :  * offset 3, the variable-length portion of the numeric node will begin with
     107             :  * one padding byte so that the actual numeric data is 4-byte aligned.
     108             :  */
     109             : 
     110             : /*
     111             :  * JEntry format.
     112             :  *
     113             :  * The least significant 28 bits store either the data length of the entry,
     114             :  * or its end+1 offset from the start of the variable-length portion of the
     115             :  * containing object.  The next three bits store the type of the entry, and
     116             :  * the high-order bit tells whether the least significant bits store a length
     117             :  * or an offset.
     118             :  *
     119             :  * The reason for the offset-or-length complication is to compromise between
     120             :  * access speed and data compressibility.  In the initial design each JEntry
     121             :  * always stored an offset, but this resulted in JEntry arrays with horrible
     122             :  * compressibility properties, so that TOAST compression of a JSONB did not
     123             :  * work well.  Storing only lengths would greatly improve compressibility,
     124             :  * but it makes random access into large arrays expensive (O(N) not O(1)).
     125             :  * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
     126             :  * a length in the rest.  This results in reasonably compressible data (as
     127             :  * long as the stride isn't too small).  We may have to examine as many as
     128             :  * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
     129             :  * given item, but that's still O(1) no matter how large the container is.
     130             :  *
     131             :  * We could avoid eating a flag bit for this purpose if we were to store
     132             :  * the stride in the container header, or if we were willing to treat the
     133             :  * stride as an unchangeable constant.  Neither of those options is very
     134             :  * attractive though.
     135             :  */
     136             : typedef uint32 JEntry;
     137             : 
     138             : #define JENTRY_OFFLENMASK       0x0FFFFFFF
     139             : #define JENTRY_TYPEMASK         0x70000000
     140             : #define JENTRY_HAS_OFF          0x80000000
     141             : 
     142             : /* values stored in the type bits */
     143             : #define JENTRY_ISSTRING         0x00000000
     144             : #define JENTRY_ISNUMERIC        0x10000000
     145             : #define JENTRY_ISBOOL_FALSE     0x20000000
     146             : #define JENTRY_ISBOOL_TRUE      0x30000000
     147             : #define JENTRY_ISNULL           0x40000000
     148             : #define JENTRY_ISCONTAINER      0x50000000  /* array or object */
     149             : 
     150             : /* Access macros.  Note possible multiple evaluations */
     151             : #define JBE_OFFLENFLD(je_)      ((je_) & JENTRY_OFFLENMASK)
     152             : #define JBE_HAS_OFF(je_)        (((je_) & JENTRY_HAS_OFF) != 0)
     153             : #define JBE_ISSTRING(je_)       (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
     154             : #define JBE_ISNUMERIC(je_)      (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
     155             : #define JBE_ISCONTAINER(je_)    (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
     156             : #define JBE_ISNULL(je_)         (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
     157             : #define JBE_ISBOOL_TRUE(je_)    (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
     158             : #define JBE_ISBOOL_FALSE(je_)   (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
     159             : #define JBE_ISBOOL(je_)         (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
     160             : 
     161             : /* Macro for advancing an offset variable to the next JEntry */
     162             : #define JBE_ADVANCE_OFFSET(offset, je) \
     163             :     do { \
     164             :         JEntry  je_ = (je); \
     165             :         if (JBE_HAS_OFF(je_)) \
     166             :             (offset) = JBE_OFFLENFLD(je_); \
     167             :         else \
     168             :             (offset) += JBE_OFFLENFLD(je_); \
     169             :     } while(0)
     170             : 
     171             : /*
     172             :  * We store an offset, not a length, every JB_OFFSET_STRIDE children.
     173             :  * Caution: this macro should only be referenced when creating a JSONB
     174             :  * value.  When examining an existing value, pay attention to the HAS_OFF
     175             :  * bits instead.  This allows changes in the offset-placement heuristic
     176             :  * without breaking on-disk compatibility.
     177             :  */
     178             : #define JB_OFFSET_STRIDE        32
     179             : 
     180             : /*
     181             :  * A jsonb array or object node, within a Jsonb Datum.
     182             :  *
     183             :  * An array has one child for each element, stored in array order.
     184             :  *
     185             :  * An object has two children for each key/value pair.  The keys all appear
     186             :  * first, in key sort order; then the values appear, in an order matching the
     187             :  * key order.  This arrangement keeps the keys compact in memory, making a
     188             :  * search for a particular key more cache-friendly.
     189             :  */
     190             : typedef struct JsonbContainer
     191             : {
     192             :     uint32      header;         /* number of elements or key/value pairs, and
     193             :                                  * flags */
     194             :     JEntry      children[FLEXIBLE_ARRAY_MEMBER];
     195             : 
     196             :     /* the data for each child node follows. */
     197             : } JsonbContainer;
     198             : 
     199             : /* flags for the header-field in JsonbContainer */
     200             : #define JB_CMASK                0x0FFFFFFF  /* mask for count field */
     201             : #define JB_FSCALAR              0x10000000  /* flag bits */
     202             : #define JB_FOBJECT              0x20000000
     203             : #define JB_FARRAY               0x40000000
     204             : 
     205             : /* convenience macros for accessing a JsonbContainer struct */
     206             : #define JsonContainerSize(jc)       ((jc)->header & JB_CMASK)
     207             : #define JsonContainerIsScalar(jc)   (((jc)->header & JB_FSCALAR) != 0)
     208             : #define JsonContainerIsObject(jc)   (((jc)->header & JB_FOBJECT) != 0)
     209             : #define JsonContainerIsArray(jc)    (((jc)->header & JB_FARRAY) != 0)
     210             : 
     211             : /* The top-level on-disk format for a jsonb datum. */
     212             : typedef struct
     213             : {
     214             :     int32       vl_len_;        /* varlena header (do not touch directly!) */
     215             :     JsonbContainer root;
     216             : } Jsonb;
     217             : 
     218             : /* convenience macros for accessing the root container in a Jsonb datum */
     219             : #define JB_ROOT_COUNT(jbp_)     (*(uint32 *) VARDATA(jbp_) & JB_CMASK)
     220             : #define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0)
     221             : #define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0)
     222             : #define JB_ROOT_IS_ARRAY(jbp_)  ((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0)
     223             : 
     224             : 
     225             : enum jbvType
     226             : {
     227             :     /* Scalar types */
     228             :     jbvNull = 0x0,
     229             :     jbvString,
     230             :     jbvNumeric,
     231             :     jbvBool,
     232             :     /* Composite types */
     233             :     jbvArray = 0x10,
     234             :     jbvObject,
     235             :     /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
     236             :     jbvBinary,
     237             : 
     238             :     /*
     239             :      * Virtual types.
     240             :      *
     241             :      * These types are used only for in-memory JSON processing and serialized
     242             :      * into JSON strings when outputted to json/jsonb.
     243             :      */
     244             :     jbvDatetime = 0x20,
     245             : };
     246             : 
     247             : /*
     248             :  * JsonbValue:  In-memory representation of Jsonb.  This is a convenient
     249             :  * deserialized representation, that can easily support using the "val"
     250             :  * union across underlying types during manipulation.  The Jsonb on-disk
     251             :  * representation has various alignment considerations.
     252             :  */
     253             : struct JsonbValue
     254             : {
     255             :     enum jbvType type;          /* Influences sort order */
     256             : 
     257             :     union
     258             :     {
     259             :         Numeric numeric;
     260             :         bool        boolean;
     261             :         struct
     262             :         {
     263             :             int         len;
     264             :             char       *val;    /* Not necessarily null-terminated */
     265             :         }           string;     /* String primitive type */
     266             : 
     267             :         struct
     268             :         {
     269             :             int         nElems;
     270             :             JsonbValue *elems;
     271             :             bool        rawScalar;  /* Top-level "raw scalar" array? */
     272             :         }           array;      /* Array container type */
     273             : 
     274             :         struct
     275             :         {
     276             :             int         nPairs; /* 1 pair, 2 elements */
     277             :             JsonbPair  *pairs;
     278             :         }           object;     /* Associative container type */
     279             : 
     280             :         struct
     281             :         {
     282             :             int         len;
     283             :             JsonbContainer *data;
     284             :         }           binary;     /* Array or object, in on-disk format */
     285             : 
     286             :         struct
     287             :         {
     288             :             Datum       value;
     289             :             Oid         typid;
     290             :             int32       typmod;
     291             :             int         tz;     /* Numeric time zone, in seconds, for
     292             :                                  * TimestampTz data type */
     293             :         }           datetime;
     294             :     }           val;
     295             : };
     296             : 
     297             : #define IsAJsonbScalar(jsonbval)    (((jsonbval)->type >= jbvNull && \
     298             :                                       (jsonbval)->type <= jbvBool) || \
     299             :                                       (jsonbval)->type == jbvDatetime)
     300             : 
     301             : /*
     302             :  * Key/value pair within an Object.
     303             :  *
     304             :  * This struct type is only used briefly while constructing a Jsonb; it is
     305             :  * *not* the on-disk representation.
     306             :  *
     307             :  * Pairs with duplicate keys are de-duplicated.  We store the originally
     308             :  * observed pair ordering for the purpose of removing duplicates in a
     309             :  * well-defined way (which is "last observed wins").
     310             :  */
     311             : struct JsonbPair
     312             : {
     313             :     JsonbValue  key;            /* Must be a jbvString */
     314             :     JsonbValue  value;          /* May be of any type */
     315             :     uint32      order;          /* Pair's index in original sequence */
     316             : };
     317             : 
     318             : /* Conversion state used when parsing Jsonb from text, or for type coercion */
     319             : typedef struct JsonbParseState
     320             : {
     321             :     JsonbValue  contVal;
     322             :     Size        size;
     323             :     struct JsonbParseState *next;
     324             :     bool        unique_keys;    /* Check object key uniqueness */
     325             :     bool        skip_nulls;     /* Skip null object fields */
     326             : } JsonbParseState;
     327             : 
     328             : /*
     329             :  * JsonbIterator holds details of the type for each iteration. It also stores a
     330             :  * Jsonb varlena buffer, which can be directly accessed in some contexts.
     331             :  */
     332             : typedef enum
     333             : {
     334             :     JBI_ARRAY_START,
     335             :     JBI_ARRAY_ELEM,
     336             :     JBI_OBJECT_START,
     337             :     JBI_OBJECT_KEY,
     338             :     JBI_OBJECT_VALUE,
     339             : } JsonbIterState;
     340             : 
     341             : typedef struct JsonbIterator
     342             : {
     343             :     /* Container being iterated */
     344             :     JsonbContainer *container;
     345             :     uint32      nElems;         /* Number of elements in children array (will
     346             :                                  * be nPairs for objects) */
     347             :     bool        isScalar;       /* Pseudo-array scalar value? */
     348             :     JEntry     *children;       /* JEntrys for child nodes */
     349             :     /* Data proper.  This points to the beginning of the variable-length data */
     350             :     char       *dataProper;
     351             : 
     352             :     /* Current item in buffer (up to nElems) */
     353             :     int         curIndex;
     354             : 
     355             :     /* Data offset corresponding to current item */
     356             :     uint32      curDataOffset;
     357             : 
     358             :     /*
     359             :      * If the container is an object, we want to return keys and values
     360             :      * alternately; so curDataOffset points to the current key, and
     361             :      * curValueOffset points to the current value.
     362             :      */
     363             :     uint32      curValueOffset;
     364             : 
     365             :     /* Private state */
     366             :     JsonbIterState state;
     367             : 
     368             :     struct JsonbIterator *parent;
     369             : } JsonbIterator;
     370             : 
     371             : 
     372             : /* Convenience macros */
     373             : static inline Jsonb *
     374      943276 : DatumGetJsonbP(Datum d)
     375             : {
     376      943276 :     return (Jsonb *) PG_DETOAST_DATUM(d);
     377             : }
     378             : 
     379             : static inline Jsonb *
     380        7956 : DatumGetJsonbPCopy(Datum d)
     381             : {
     382        7956 :     return (Jsonb *) PG_DETOAST_DATUM_COPY(d);
     383             : }
     384             : 
     385             : static inline Datum
     386        8286 : JsonbPGetDatum(const Jsonb *p)
     387             : {
     388        8286 :     return PointerGetDatum(p);
     389             : }
     390             : 
     391             : #define PG_GETARG_JSONB_P(x)    DatumGetJsonbP(PG_GETARG_DATUM(x))
     392             : #define PG_GETARG_JSONB_P_COPY(x)   DatumGetJsonbPCopy(PG_GETARG_DATUM(x))
     393             : #define PG_RETURN_JSONB_P(x)    PG_RETURN_POINTER(x)
     394             : 
     395             : /* Support functions */
     396             : extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
     397             : extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
     398             : extern int  compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
     399             : extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *container,
     400             :                                                uint32 flags,
     401             :                                                JsonbValue *key);
     402             : extern JsonbValue *getKeyJsonValueFromContainer(JsonbContainer *container,
     403             :                                                 const char *keyVal, int keyLen,
     404             :                                                 JsonbValue *res);
     405             : extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *container,
     406             :                                                  uint32 i);
     407             : extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
     408             :                                   JsonbIteratorToken seq, JsonbValue *jbval);
     409             : extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
     410             : extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
     411             :                                             bool skipNested);
     412             : extern void JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val);
     413             : extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
     414             : extern bool JsonbDeepContains(JsonbIterator **val,
     415             :                               JsonbIterator **mContained);
     416             : extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
     417             : extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal,
     418             :                                          uint64 *hash, uint64 seed);
     419             : 
     420             : /* jsonb.c support functions */
     421             : extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
     422             :                             int estimated_len);
     423             : extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
     424             :                                   int estimated_len);
     425             : extern char *JsonbUnquote(Jsonb *jb);
     426             : extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res);
     427             : extern const char *JsonbTypeName(JsonbValue *val);
     428             : 
     429             : extern Datum jsonb_set_element(Jsonb *jb, Datum *path, int path_len,
     430             :                                JsonbValue *newval);
     431             : extern Datum jsonb_get_element(Jsonb *jb, Datum *path, int npath,
     432             :                                bool *isnull, bool as_text);
     433             : extern bool to_jsonb_is_immutable(Oid typoid);
     434             : extern Datum jsonb_build_object_worker(int nargs, const Datum *args, const bool *nulls,
     435             :                                        const Oid *types, bool absent_on_null,
     436             :                                        bool unique_keys);
     437             : extern Datum jsonb_build_array_worker(int nargs, const Datum *args, const bool *nulls,
     438             :                                       const Oid *types, bool absent_on_null);
     439             : 
     440             : #endif                          /* __JSONB_H__ */

Generated by: LCOV version 1.14