Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * jsonb.h
4 : * Declarations for jsonb data type support.
5 : *
6 : * Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : *
8 : * src/include/utils/jsonb.h
9 : *
10 : *-------------------------------------------------------------------------
11 : */
12 : #ifndef __JSONB_H__
13 : #define __JSONB_H__
14 :
15 : #include "lib/stringinfo.h"
16 : #include "utils/array.h"
17 : #include "utils/numeric.h"
18 :
19 : /* Tokens used when sequentially processing a jsonb value */
20 : typedef enum
21 : {
22 : WJB_DONE,
23 : WJB_KEY,
24 : WJB_VALUE,
25 : WJB_ELEM,
26 : WJB_BEGIN_ARRAY,
27 : WJB_END_ARRAY,
28 : WJB_BEGIN_OBJECT,
29 : WJB_END_OBJECT,
30 : } JsonbIteratorToken;
31 :
32 : /* Strategy numbers for GIN index opclasses */
33 : #define JsonbContainsStrategyNumber 7
34 : #define JsonbExistsStrategyNumber 9
35 : #define JsonbExistsAnyStrategyNumber 10
36 : #define JsonbExistsAllStrategyNumber 11
37 : #define JsonbJsonpathExistsStrategyNumber 15
38 : #define JsonbJsonpathPredicateStrategyNumber 16
39 :
40 :
41 : /*
42 : * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
43 : * keys and values. The storage format is text. The first byte of the text
44 : * string distinguishes whether this is a key (always a string), null value,
45 : * boolean value, numeric value, or string value. However, array elements
46 : * that are strings are marked as though they were keys; this imprecision
47 : * supports the definition of the "exists" operator, which treats array
48 : * elements like keys. The remainder of the text string is empty for a null
49 : * value, "t" or "f" for a boolean value, a normalized print representation of
50 : * a numeric value, or the text of a string value. However, if the length of
51 : * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
52 : * the text representation and store an 8-hex-digit representation of the
53 : * uint32 hash value, marking the prefix byte with an additional bit to
54 : * distinguish that this has happened. Hashing long strings saves space and
55 : * ensures that we won't overrun the maximum entry length for a GIN index.
56 : * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
57 : * to ensure that the on-disk text datum will have a short varlena header.)
58 : * Note that when any hashed item appears in a query, we must recheck index
59 : * matches against the heap tuple; currently, this costs nothing because we
60 : * must always recheck for other reasons.
61 : */
62 : #define JGINFLAG_KEY 0x01 /* key (or string array element) */
63 : #define JGINFLAG_NULL 0x02 /* null value */
64 : #define JGINFLAG_BOOL 0x03 /* boolean value */
65 : #define JGINFLAG_NUM 0x04 /* numeric value */
66 : #define JGINFLAG_STR 0x05 /* string value (if not an array element) */
67 : #define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
68 : #define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
69 :
70 : /* Forward struct references */
71 : typedef struct JsonbPair JsonbPair;
72 : typedef struct JsonbValue JsonbValue;
73 : typedef struct JsonbParseState JsonbParseState;
74 :
75 : /*
76 : * Jsonbs are varlena objects, so must meet the varlena convention that the
77 : * first int32 of the object contains the total object size in bytes. Be sure
78 : * to use VARSIZE() and SET_VARSIZE() to access it, though!
79 : *
80 : * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
81 : * representation. Often, JsonbValues are just shims through which a Jsonb
82 : * buffer is accessed, but they can also be deep copied and passed around.
83 : *
84 : * Jsonb is a tree structure. Each node in the tree consists of a JEntry
85 : * header and a variable-length content (possibly of zero size). The JEntry
86 : * header indicates what kind of a node it is, e.g. a string or an array,
87 : * and provides the length of its variable-length portion.
88 : *
89 : * The JEntry and the content of a node are not stored physically together.
90 : * Instead, the container array or object has an array that holds the JEntrys
91 : * of all the child nodes, followed by their variable-length portions.
92 : *
93 : * The root node is an exception; it has no parent array or object that could
94 : * hold its JEntry. Hence, no JEntry header is stored for the root node. It
95 : * is implicitly known that the root node must be an array or an object,
96 : * so we can get away without the type indicator as long as we can distinguish
97 : * the two. For that purpose, both an array and an object begin with a uint32
98 : * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
99 : * scalar value needs to be stored as a Jsonb value, what we actually store is
100 : * an array with one element, with the flags in the array's header field set
101 : * to JB_FSCALAR | JB_FARRAY.
102 : *
103 : * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
104 : * the variable-length portion of some node types is aligned to a 4-byte
105 : * boundary, while others are not. When alignment is needed, the padding is
106 : * in the beginning of the node that requires it. For example, if a numeric
107 : * node is stored after a string node, so that the numeric node begins at
108 : * offset 3, the variable-length portion of the numeric node will begin with
109 : * one padding byte so that the actual numeric data is 4-byte aligned.
110 : */
111 :
112 : /*
113 : * JEntry format.
114 : *
115 : * The least significant 28 bits store either the data length of the entry,
116 : * or its end+1 offset from the start of the variable-length portion of the
117 : * containing object. The next three bits store the type of the entry, and
118 : * the high-order bit tells whether the least significant bits store a length
119 : * or an offset.
120 : *
121 : * The reason for the offset-or-length complication is to compromise between
122 : * access speed and data compressibility. In the initial design each JEntry
123 : * always stored an offset, but this resulted in JEntry arrays with horrible
124 : * compressibility properties, so that TOAST compression of a JSONB did not
125 : * work well. Storing only lengths would greatly improve compressibility,
126 : * but it makes random access into large arrays expensive (O(N) not O(1)).
127 : * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
128 : * a length in the rest. This results in reasonably compressible data (as
129 : * long as the stride isn't too small). We may have to examine as many as
130 : * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
131 : * given item, but that's still O(1) no matter how large the container is.
132 : *
133 : * We could avoid eating a flag bit for this purpose if we were to store
134 : * the stride in the container header, or if we were willing to treat the
135 : * stride as an unchangeable constant. Neither of those options is very
136 : * attractive though.
137 : */
138 : typedef uint32 JEntry;
139 :
140 : #define JENTRY_OFFLENMASK 0x0FFFFFFF
141 : #define JENTRY_TYPEMASK 0x70000000
142 : #define JENTRY_HAS_OFF 0x80000000
143 :
144 : /* values stored in the type bits */
145 : #define JENTRY_ISSTRING 0x00000000
146 : #define JENTRY_ISNUMERIC 0x10000000
147 : #define JENTRY_ISBOOL_FALSE 0x20000000
148 : #define JENTRY_ISBOOL_TRUE 0x30000000
149 : #define JENTRY_ISNULL 0x40000000
150 : #define JENTRY_ISCONTAINER 0x50000000 /* array or object */
151 :
152 : /* Access macros. Note possible multiple evaluations */
153 : #define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
154 : #define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
155 : #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
156 : #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
157 : #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
158 : #define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
159 : #define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
160 : #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
161 : #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
162 :
163 : /* Macro for advancing an offset variable to the next JEntry */
164 : #define JBE_ADVANCE_OFFSET(offset, je) \
165 : do { \
166 : JEntry je_ = (je); \
167 : if (JBE_HAS_OFF(je_)) \
168 : (offset) = JBE_OFFLENFLD(je_); \
169 : else \
170 : (offset) += JBE_OFFLENFLD(je_); \
171 : } while(0)
172 :
173 : /*
174 : * We store an offset, not a length, every JB_OFFSET_STRIDE children.
175 : * Caution: this macro should only be referenced when creating a JSONB
176 : * value. When examining an existing value, pay attention to the HAS_OFF
177 : * bits instead. This allows changes in the offset-placement heuristic
178 : * without breaking on-disk compatibility.
179 : */
180 : #define JB_OFFSET_STRIDE 32
181 :
182 : /*
183 : * A jsonb array or object node, within a Jsonb Datum.
184 : *
185 : * An array has one child for each element, stored in array order.
186 : *
187 : * An object has two children for each key/value pair. The keys all appear
188 : * first, in key sort order; then the values appear, in an order matching the
189 : * key order. This arrangement keeps the keys compact in memory, making a
190 : * search for a particular key more cache-friendly.
191 : */
192 : typedef struct JsonbContainer
193 : {
194 : uint32 header; /* number of elements or key/value pairs, and
195 : * flags */
196 : JEntry children[FLEXIBLE_ARRAY_MEMBER];
197 :
198 : /* the data for each child node follows. */
199 : } JsonbContainer;
200 :
201 : /* flags for the header-field in JsonbContainer */
202 : #define JB_CMASK 0x0FFFFFFF /* mask for count field */
203 : #define JB_FSCALAR 0x10000000 /* flag bits */
204 : #define JB_FOBJECT 0x20000000
205 : #define JB_FARRAY 0x40000000
206 :
207 : /* convenience macros for accessing a JsonbContainer struct */
208 : #define JsonContainerSize(jc) ((jc)->header & JB_CMASK)
209 : #define JsonContainerIsScalar(jc) (((jc)->header & JB_FSCALAR) != 0)
210 : #define JsonContainerIsObject(jc) (((jc)->header & JB_FOBJECT) != 0)
211 : #define JsonContainerIsArray(jc) (((jc)->header & JB_FARRAY) != 0)
212 :
213 : /* The top-level on-disk format for a jsonb datum. */
214 : typedef struct
215 : {
216 : int32 vl_len_; /* varlena header (do not touch directly!) */
217 : JsonbContainer root;
218 : } Jsonb;
219 :
220 : /* convenience macros for accessing the root container in a Jsonb datum */
221 : #define JB_ROOT_COUNT(jbp_) (*(uint32 *) VARDATA(jbp_) & JB_CMASK)
222 : #define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0)
223 : #define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0)
224 : #define JB_ROOT_IS_ARRAY(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0)
225 :
226 :
227 : enum jbvType
228 : {
229 : /* Scalar types */
230 : jbvNull = 0x0,
231 : jbvString,
232 : jbvNumeric,
233 : jbvBool,
234 : /* Composite types */
235 : jbvArray = 0x10,
236 : jbvObject,
237 : /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
238 : jbvBinary,
239 :
240 : /*
241 : * Virtual types.
242 : *
243 : * These types are used only for in-memory JSON processing and serialized
244 : * into JSON strings when outputted to json/jsonb.
245 : */
246 : jbvDatetime = 0x20,
247 : };
248 :
249 : /*
250 : * JsonbValue: In-memory representation of Jsonb. This is a convenient
251 : * deserialized representation, that can easily support using the "val"
252 : * union across underlying types during manipulation. The Jsonb on-disk
253 : * representation has various alignment considerations.
254 : */
255 : struct JsonbValue
256 : {
257 : enum jbvType type; /* Influences sort order */
258 :
259 : union
260 : {
261 : Numeric numeric;
262 : bool boolean;
263 : struct
264 : {
265 : int len;
266 : char *val; /* Not necessarily null-terminated */
267 : } string; /* String primitive type */
268 :
269 : struct
270 : {
271 : int nElems;
272 : JsonbValue *elems;
273 : bool rawScalar; /* Top-level "raw scalar" array? */
274 : } array; /* Array container type */
275 :
276 : struct
277 : {
278 : int nPairs; /* 1 pair, 2 elements */
279 : JsonbPair *pairs;
280 : } object; /* Associative container type */
281 :
282 : struct
283 : {
284 : int len;
285 : JsonbContainer *data;
286 : } binary; /* Array or object, in on-disk format */
287 :
288 : struct
289 : {
290 : Datum value;
291 : Oid typid;
292 : int32 typmod;
293 : int tz; /* Numeric time zone, in seconds, for
294 : * TimestampTz data type */
295 : } datetime;
296 : } val;
297 : };
298 :
299 : #define IsAJsonbScalar(jsonbval) (((jsonbval)->type >= jbvNull && \
300 : (jsonbval)->type <= jbvBool) || \
301 : (jsonbval)->type == jbvDatetime)
302 :
303 : /*
304 : * Key/value pair within an Object.
305 : *
306 : * This struct type is only used briefly while constructing a Jsonb; it is
307 : * *not* the on-disk representation.
308 : *
309 : * Pairs with duplicate keys are de-duplicated. We store the originally
310 : * observed pair ordering for the purpose of removing duplicates in a
311 : * well-defined way (which is "last observed wins").
312 : */
313 : struct JsonbPair
314 : {
315 : JsonbValue key; /* Must be a jbvString */
316 : JsonbValue value; /* May be of any type */
317 : uint32 order; /* Pair's index in original sequence */
318 : };
319 :
320 : /*
321 : * State used while constructing or manipulating a JsonbValue.
322 : * For example, when parsing Jsonb from text, we construct a JsonbValue
323 : * data structure and then flatten that into the Jsonb on-disk format.
324 : * JsonbValues are also useful in aggregation and type coercion.
325 : *
326 : * Callers providing a JsonbInState must initialize it to zeroes/nulls,
327 : * except for optionally setting outcontext (if that's left NULL,
328 : * CurrentMemoryContext is used) and escontext (if that's left NULL,
329 : * parsing errors are thrown via ereport).
330 : */
331 : typedef struct JsonbInState
332 : {
333 : JsonbValue *result; /* The completed value; NULL until complete */
334 : MemoryContext outcontext; /* The context to build it in, or NULL */
335 : struct Node *escontext; /* Optional soft-error-reporting context */
336 : /* Remaining fields should be treated as private to jsonb.c/jsonb_util.c */
337 : JsonbParseState *parseState; /* Stack of parsing contexts */
338 : bool unique_keys; /* Check object key uniqueness */
339 : } JsonbInState;
340 :
341 : /*
342 : * Parsing context for one level of Jsonb array or object nesting.
343 : * The contVal will be part of the constructed JsonbValue tree,
344 : * but the other fields are just transient state.
345 : */
346 : struct JsonbParseState
347 : {
348 : JsonbValue contVal; /* An array or object JsonbValue */
349 : Size size; /* Allocated length of array or object */
350 : JsonbParseState *next; /* Link to next outer level, if any */
351 : bool unique_keys; /* Check object key uniqueness */
352 : bool skip_nulls; /* Skip null object fields */
353 : };
354 :
355 : /*
356 : * JsonbIterator holds details of the type for each iteration. It also stores a
357 : * Jsonb varlena buffer, which can be directly accessed in some contexts.
358 : */
359 : typedef enum
360 : {
361 : JBI_ARRAY_START,
362 : JBI_ARRAY_ELEM,
363 : JBI_OBJECT_START,
364 : JBI_OBJECT_KEY,
365 : JBI_OBJECT_VALUE,
366 : } JsonbIterState;
367 :
368 : typedef struct JsonbIterator
369 : {
370 : /* Container being iterated */
371 : JsonbContainer *container;
372 : uint32 nElems; /* Number of elements in children array (will
373 : * be nPairs for objects) */
374 : bool isScalar; /* Pseudo-array scalar value? */
375 : JEntry *children; /* JEntrys for child nodes */
376 : /* Data proper. This points to the beginning of the variable-length data */
377 : char *dataProper;
378 :
379 : /* Current item in buffer (up to nElems) */
380 : int curIndex;
381 :
382 : /* Data offset corresponding to current item */
383 : uint32 curDataOffset;
384 :
385 : /*
386 : * If the container is an object, we want to return keys and values
387 : * alternately; so curDataOffset points to the current key, and
388 : * curValueOffset points to the current value.
389 : */
390 : uint32 curValueOffset;
391 :
392 : /* Private state */
393 : JsonbIterState state;
394 :
395 : struct JsonbIterator *parent;
396 : } JsonbIterator;
397 :
398 :
399 : /* Convenience macros */
400 : static inline Jsonb *
401 1000992 : DatumGetJsonbP(Datum d)
402 : {
403 1000992 : return (Jsonb *) PG_DETOAST_DATUM(d);
404 : }
405 :
406 : static inline Jsonb *
407 8136 : DatumGetJsonbPCopy(Datum d)
408 : {
409 8136 : return (Jsonb *) PG_DETOAST_DATUM_COPY(d);
410 : }
411 :
412 : static inline Datum
413 7830 : JsonbPGetDatum(const Jsonb *p)
414 : {
415 7830 : return PointerGetDatum(p);
416 : }
417 :
418 : #define PG_GETARG_JSONB_P(x) DatumGetJsonbP(PG_GETARG_DATUM(x))
419 : #define PG_GETARG_JSONB_P_COPY(x) DatumGetJsonbPCopy(PG_GETARG_DATUM(x))
420 : #define PG_RETURN_JSONB_P(x) PG_RETURN_POINTER(x)
421 :
422 : /* Support functions */
423 : extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
424 : extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
425 : extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
426 : extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *container,
427 : uint32 flags,
428 : JsonbValue *key);
429 : extern JsonbValue *getKeyJsonValueFromContainer(JsonbContainer *container,
430 : const char *keyVal, int keyLen,
431 : JsonbValue *res);
432 : extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *container,
433 : uint32 i);
434 : extern void pushJsonbValue(JsonbInState *pstate,
435 : JsonbIteratorToken seq, JsonbValue *jbval);
436 : extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
437 : extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
438 : bool skipNested);
439 : extern void JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val);
440 : extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
441 : extern bool JsonbDeepContains(JsonbIterator **val,
442 : JsonbIterator **mContained);
443 : extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
444 : extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal,
445 : uint64 *hash, uint64 seed);
446 :
447 : /* jsonb.c support functions */
448 : extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
449 : int estimated_len);
450 : extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
451 : int estimated_len);
452 : extern char *JsonbUnquote(Jsonb *jb);
453 : extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res);
454 : extern const char *JsonbTypeName(JsonbValue *val);
455 :
456 : extern Datum jsonb_set_element(Jsonb *jb, const Datum *path, int path_len,
457 : JsonbValue *newval);
458 : extern Datum jsonb_get_element(Jsonb *jb, const Datum *path, int npath,
459 : bool *isnull, bool as_text);
460 : extern bool to_jsonb_is_immutable(Oid typoid);
461 : extern Datum jsonb_build_object_worker(int nargs, const Datum *args, const bool *nulls,
462 : const Oid *types, bool absent_on_null,
463 : bool unique_keys);
464 : extern Datum jsonb_build_array_worker(int nargs, const Datum *args, const bool *nulls,
465 : const Oid *types, bool absent_on_null);
466 :
467 : #endif /* __JSONB_H__ */
|