Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * radixtree.h
4 : * Template for adaptive radix tree.
5 : *
6 : * A template to generate an "adaptive radix tree", specialized for value
7 : * types and for local/shared memory.
8 : *
9 : * The concept originates from the paper "The Adaptive Radix Tree: ARTful
10 : * Indexing for Main-Memory Databases" by Viktor Leis, Alfons Kemper,
11 : * and Thomas Neumann, 2013.
12 : *
13 : * Radix trees have some advantages over hash tables:
14 : * - The keys are logically ordered, allowing efficient sorted iteration
15 : * and range queries
16 : * - Operations using keys that are lexicographically close together
17 : * will have favorable memory locality
18 : * - Memory use grows gradually rather than by doubling
19 : * - The key does not need to be stored with the value, since the key
20 : * is implicitly contained in the path to the value
21 : *
22 : * Some disadvantages are:
23 : * - Point queries (along with insertion and deletion) are slower than
24 : * a linear probing hash table as in simplehash.h
25 : * - Memory usage varies by key distribution, so is difficult to predict
26 : *
27 : * A classic radix tree consists of nodes, each containing an array of
28 : * pointers to child nodes. The size of the array is determined by the
29 : * "span" of the tree, which is the number of bits of the key used to
30 : * index into the array. For example, with a span of 6, a "chunk"
31 : * of 6 bits is extracted from the key at each node traversal, and
32 : * the arrays thus have a "fanout" of 2^6 or 64 entries. A large span
33 : * allows a shorter tree, but requires larger arrays that may be mostly
34 : * wasted space.
35 : *
36 : * The key idea of the adaptive radix tree is to choose different
37 : * data structures based on the number of child nodes. A node will
38 : * start out small when it is first populated, and when it is full,
39 : * it is replaced by the next larger size. Conversely, when a node
40 : * becomes mostly empty, it is replaced by the next smaller node. The
41 : * bulk of the code complexity in this module stems from this dynamic
42 : * switching. One mitigating factor is using a span of 8, since bytes
43 : * are directly addressable.
44 : *
45 : * The ART paper mentions three ways to implement leaves:
46 : *
47 : * "- Single-value leaves: The values are stored using an addi-
48 : * tional leaf node type which stores one value.
49 : * - Multi-value leaves: The values are stored in one of four
50 : * different leaf node types, which mirror the structure of
51 : * inner nodes, but contain values instead of pointers.
52 : * - Combined pointer/value slots: If values fit into point-
53 : * ers, no separate node types are necessary. Instead, each
54 : * pointer storage location in an inner node can either
55 : * store a pointer or a value."
56 : *
57 : * We use a form of "combined pointer/value slots", as recommended. Values
58 : * of size (if fixed at compile time) equal or smaller than the platform's
59 : * pointer type are stored in the child slots of the last level node,
60 : * while larger values are the same as "single-value" leaves above. This
61 : * offers flexibility and efficiency. Variable-length types are currently
62 : * treated as single-value leaves for simplicity, but future work may
63 : * allow those to be stored in the child pointer arrays, when they're
64 : * small enough.
65 : *
66 : * There are two other techniques described in the paper that are not
67 : * implemented here:
68 : * - path compression "...removes all inner nodes that have only a single child."
69 : * - lazy path expansion "...inner nodes are only created if they are required
70 : * to distinguish at least two leaf nodes."
71 : *
72 : * We do have a form of "poor man's path compression", however, enabled by
73 : * only supporting unsigned integer keys (for now assumed to be 64-bit):
74 : * A tree doesn't contain paths where the highest bytes of all keys are
75 : * zero. That way, the tree's height adapts to the distribution of keys.
76 : *
77 : * To handle concurrency, we use a single reader-writer lock for the
78 : * radix tree. If concurrent write operations are possible, the tree
79 : * must be exclusively locked during write operations such as RT_SET()
80 : * and RT_DELETE(), and share locked during read operations such as
81 : * RT_FIND() and RT_BEGIN_ITERATE().
82 : *
83 : * TODO: The current locking mechanism is not optimized for high
84 : * concurrency with mixed read-write workloads. In the future it might
85 : * be worthwhile to replace it with the Optimistic Lock Coupling or
86 : * ROWEX mentioned in the paper "The ART of Practical Synchronization"
87 : * by the same authors as the ART paper, 2016.
88 : *
89 : * To generate a radix tree and associated functions for a use case
90 : * several macros have to be #define'ed before this file is included.
91 : * Including the file #undef's all those, so a new radix tree can be
92 : * generated afterwards.
93 : *
94 : * The relevant parameters are:
95 : * - RT_PREFIX - prefix for all symbol names generated. A prefix of "foo"
96 : * will result in radix tree type "foo_radix_tree" and functions like
97 : * "foo_create"/"foo_free" and so forth.
98 : * - RT_DECLARE - if defined function prototypes and type declarations are
99 : * generated
100 : * - RT_DEFINE - if defined function definitions are generated
101 : * - RT_SCOPE - in which scope (e.g. extern, static inline) do function
102 : * declarations reside
103 : * - RT_VALUE_TYPE - the type of the value.
104 : * - RT_VARLEN_VALUE_SIZE() - for variable length values, an expression
105 : * involving a pointer to the value type, to calculate size.
106 : * NOTE: implies that the value is in fact variable-length,
107 : * so do not set for fixed-length values.
108 : * - RT_RUNTIME_EMBEDDABLE_VALUE - for variable length values, allows
109 : * storing the value in a child pointer slot, rather than as a single-
110 : * value leaf, if small enough. This requires that the value, when
111 : * read as a child pointer, can be tagged in the lowest bit.
112 : *
113 : * Optional parameters:
114 : * - RT_SHMEM - if defined, the radix tree is created in the DSA area
115 : * so that multiple processes can access it simultaneously.
116 : * - RT_DEBUG - if defined add stats tracking and debugging functions
117 : *
118 : * Interface
119 : * ---------
120 : *
121 : * RT_CREATE - Create a new, empty radix tree
122 : * RT_FREE - Free the radix tree
123 : * RT_FIND - Lookup the value for a given key
124 : * RT_SET - Set a key-value pair
125 : * RT_BEGIN_ITERATE - Begin iterating through all key-value pairs
126 : * RT_ITERATE_NEXT - Return next key-value pair, if any
127 : * RT_END_ITERATE - End iteration
128 : * RT_MEMORY_USAGE - Get the memory as measured by space in memory context blocks
129 : *
130 : * Interface for Shared Memory
131 : * ---------
132 : *
133 : * RT_ATTACH - Attach to the radix tree
134 : * RT_DETACH - Detach from the radix tree
135 : * RT_LOCK_EXCLUSIVE - Lock the radix tree in exclusive mode
136 : * RT_LOCK_SHARE - Lock the radix tree in share mode
137 : * RT_UNLOCK - Unlock the radix tree
138 : * RT_GET_HANDLE - Return the handle of the radix tree
139 : *
140 : * Optional Interface
141 : * ---------
142 : *
143 : * RT_DELETE - Delete a key-value pair. Declared/defined if RT_USE_DELETE is defined
144 : *
145 : *
146 : * Copyright (c) 2024-2025, PostgreSQL Global Development Group
147 : *
148 : * IDENTIFICATION
149 : * src/include/lib/radixtree.h
150 : *
151 : *-------------------------------------------------------------------------
152 : */
153 :
154 : #include "nodes/bitmapset.h"
155 : #include "port/pg_bitutils.h"
156 : #include "port/simd.h"
157 : #include "utils/dsa.h"
158 : #include "utils/memutils.h"
159 : #ifdef RT_SHMEM
160 : #include "miscadmin.h"
161 : #include "storage/lwlock.h"
162 : #endif
163 :
164 : /* helpers */
165 : #define RT_MAKE_PREFIX(a) CppConcat(a,_)
166 : #define RT_MAKE_NAME(name) RT_MAKE_NAME_(RT_MAKE_PREFIX(RT_PREFIX),name)
167 : #define RT_MAKE_NAME_(a,b) CppConcat(a,b)
168 : /*
169 : * stringify a macro constant, from https://gcc.gnu.org/onlinedocs/cpp/Stringizing.html
170 : */
171 : #define RT_STR(s) RT_STR_(s)
172 : #define RT_STR_(s) #s
173 :
174 : /* function declarations */
175 : #define RT_CREATE RT_MAKE_NAME(create)
176 : #define RT_FREE RT_MAKE_NAME(free)
177 : #define RT_FIND RT_MAKE_NAME(find)
178 : #ifdef RT_SHMEM
179 : #define RT_ATTACH RT_MAKE_NAME(attach)
180 : #define RT_DETACH RT_MAKE_NAME(detach)
181 : #define RT_GET_HANDLE RT_MAKE_NAME(get_handle)
182 : #define RT_LOCK_EXCLUSIVE RT_MAKE_NAME(lock_exclusive)
183 : #define RT_LOCK_SHARE RT_MAKE_NAME(lock_share)
184 : #define RT_UNLOCK RT_MAKE_NAME(unlock)
185 : #endif
186 : #define RT_SET RT_MAKE_NAME(set)
187 : #define RT_BEGIN_ITERATE RT_MAKE_NAME(begin_iterate)
188 : #define RT_ITERATE_NEXT RT_MAKE_NAME(iterate_next)
189 : #define RT_END_ITERATE RT_MAKE_NAME(end_iterate)
190 : #ifdef RT_USE_DELETE
191 : #define RT_DELETE RT_MAKE_NAME(delete)
192 : #endif
193 : #define RT_MEMORY_USAGE RT_MAKE_NAME(memory_usage)
194 : #define RT_DUMP_NODE RT_MAKE_NAME(dump_node)
195 : #define RT_STATS RT_MAKE_NAME(stats)
196 :
197 : /* internal helper functions (no externally visible prototypes) */
198 : #define RT_CHILDPTR_IS_VALUE RT_MAKE_NAME(childptr_is_value)
199 : #define RT_VALUE_IS_EMBEDDABLE RT_MAKE_NAME(value_is_embeddable)
200 : #define RT_GET_SLOT_RECURSIVE RT_MAKE_NAME(get_slot_recursive)
201 : #define RT_DELETE_RECURSIVE RT_MAKE_NAME(delete_recursive)
202 : #define RT_ALLOC_NODE RT_MAKE_NAME(alloc_node)
203 : #define RT_ALLOC_LEAF RT_MAKE_NAME(alloc_leaf)
204 : #define RT_FREE_NODE RT_MAKE_NAME(free_node)
205 : #define RT_FREE_LEAF RT_MAKE_NAME(free_leaf)
206 : #define RT_FREE_RECURSE RT_MAKE_NAME(free_recurse)
207 : #define RT_EXTEND_UP RT_MAKE_NAME(extend_up)
208 : #define RT_EXTEND_DOWN RT_MAKE_NAME(extend_down)
209 : #define RT_COPY_COMMON RT_MAKE_NAME(copy_common)
210 : #define RT_PTR_SET_LOCAL RT_MAKE_NAME(ptr_set_local)
211 : #define RT_NODE_16_SEARCH_EQ RT_MAKE_NAME(node_16_search_eq)
212 : #define RT_NODE_4_GET_INSERTPOS RT_MAKE_NAME(node_4_get_insertpos)
213 : #define RT_NODE_16_GET_INSERTPOS RT_MAKE_NAME(node_16_get_insertpos)
214 : #define RT_SHIFT_ARRAYS_FOR_INSERT RT_MAKE_NAME(shift_arrays_for_insert)
215 : #define RT_SHIFT_ARRAYS_AND_DELETE RT_MAKE_NAME(shift_arrays_and_delete)
216 : #define RT_COPY_ARRAYS_FOR_INSERT RT_MAKE_NAME(copy_arrays_for_insert)
217 : #define RT_COPY_ARRAYS_AND_DELETE RT_MAKE_NAME(copy_arrays_and_delete)
218 : #define RT_NODE_48_IS_CHUNK_USED RT_MAKE_NAME(node_48_is_chunk_used)
219 : #define RT_NODE_48_GET_CHILD RT_MAKE_NAME(node_48_get_child)
220 : #define RT_NODE_256_IS_CHUNK_USED RT_MAKE_NAME(node_256_is_chunk_used)
221 : #define RT_NODE_256_GET_CHILD RT_MAKE_NAME(node_256_get_child)
222 : #define RT_KEY_GET_SHIFT RT_MAKE_NAME(key_get_shift)
223 : #define RT_SHIFT_GET_MAX_VAL RT_MAKE_NAME(shift_get_max_val)
224 : #define RT_NODE_SEARCH RT_MAKE_NAME(node_search)
225 : #define RT_NODE_DELETE RT_MAKE_NAME(node_delete)
226 : #define RT_NODE_INSERT RT_MAKE_NAME(node_insert)
227 : #define RT_ADD_CHILD_4 RT_MAKE_NAME(add_child_4)
228 : #define RT_ADD_CHILD_16 RT_MAKE_NAME(add_child_16)
229 : #define RT_ADD_CHILD_48 RT_MAKE_NAME(add_child_48)
230 : #define RT_ADD_CHILD_256 RT_MAKE_NAME(add_child_256)
231 : #define RT_GROW_NODE_4 RT_MAKE_NAME(grow_node_4)
232 : #define RT_GROW_NODE_16 RT_MAKE_NAME(grow_node_16)
233 : #define RT_GROW_NODE_48 RT_MAKE_NAME(grow_node_48)
234 : #define RT_REMOVE_CHILD_4 RT_MAKE_NAME(remove_child_4)
235 : #define RT_REMOVE_CHILD_16 RT_MAKE_NAME(remove_child_16)
236 : #define RT_REMOVE_CHILD_48 RT_MAKE_NAME(remove_child_48)
237 : #define RT_REMOVE_CHILD_256 RT_MAKE_NAME(remove_child_256)
238 : #define RT_SHRINK_NODE_16 RT_MAKE_NAME(shrink_child_16)
239 : #define RT_SHRINK_NODE_48 RT_MAKE_NAME(shrink_child_48)
240 : #define RT_SHRINK_NODE_256 RT_MAKE_NAME(shrink_child_256)
241 : #define RT_NODE_ITERATE_NEXT RT_MAKE_NAME(node_iterate_next)
242 : #define RT_VERIFY_NODE RT_MAKE_NAME(verify_node)
243 :
244 : /* type declarations */
245 : #define RT_RADIX_TREE RT_MAKE_NAME(radix_tree)
246 : #define RT_RADIX_TREE_CONTROL RT_MAKE_NAME(radix_tree_control)
247 : #define RT_ITER RT_MAKE_NAME(iter)
248 : #ifdef RT_SHMEM
249 : #define RT_HANDLE RT_MAKE_NAME(handle)
250 : #endif
251 : #define RT_NODE RT_MAKE_NAME(node)
252 : #define RT_CHILD_PTR RT_MAKE_NAME(child_ptr)
253 : #define RT_NODE_ITER RT_MAKE_NAME(node_iter)
254 : #define RT_NODE_4 RT_MAKE_NAME(node_4)
255 : #define RT_NODE_16 RT_MAKE_NAME(node_16)
256 : #define RT_NODE_48 RT_MAKE_NAME(node_48)
257 : #define RT_NODE_256 RT_MAKE_NAME(node_256)
258 : #define RT_SIZE_CLASS RT_MAKE_NAME(size_class)
259 : #define RT_SIZE_CLASS_ELEM RT_MAKE_NAME(size_class_elem)
260 : #define RT_SIZE_CLASS_INFO RT_MAKE_NAME(size_class_info)
261 : #define RT_CLASS_4 RT_MAKE_NAME(class_4)
262 : #define RT_CLASS_16_LO RT_MAKE_NAME(class_32_min)
263 : #define RT_CLASS_16_HI RT_MAKE_NAME(class_32_max)
264 : #define RT_CLASS_48 RT_MAKE_NAME(class_48)
265 : #define RT_CLASS_256 RT_MAKE_NAME(class_256)
266 :
267 : /* generate forward declarations necessary to use the radix tree */
268 : #ifdef RT_DECLARE
269 :
270 : typedef struct RT_RADIX_TREE RT_RADIX_TREE;
271 : typedef struct RT_ITER RT_ITER;
272 :
273 : #ifdef RT_SHMEM
274 : typedef dsa_pointer RT_HANDLE;
275 : #endif
276 :
277 : #ifdef RT_SHMEM
278 : RT_SCOPE RT_RADIX_TREE *RT_CREATE(dsa_area *dsa, int tranche_id);
279 : RT_SCOPE RT_RADIX_TREE *RT_ATTACH(dsa_area *dsa, dsa_pointer dp);
280 : RT_SCOPE void RT_DETACH(RT_RADIX_TREE * tree);
281 : RT_SCOPE RT_HANDLE RT_GET_HANDLE(RT_RADIX_TREE * tree);
282 : RT_SCOPE void RT_LOCK_EXCLUSIVE(RT_RADIX_TREE * tree);
283 : RT_SCOPE void RT_LOCK_SHARE(RT_RADIX_TREE * tree);
284 : RT_SCOPE void RT_UNLOCK(RT_RADIX_TREE * tree);
285 : #else
286 : RT_SCOPE RT_RADIX_TREE *RT_CREATE(MemoryContext ctx);
287 : #endif
288 : RT_SCOPE void RT_FREE(RT_RADIX_TREE * tree);
289 :
290 : RT_SCOPE RT_VALUE_TYPE *RT_FIND(RT_RADIX_TREE * tree, uint64 key);
291 : RT_SCOPE bool RT_SET(RT_RADIX_TREE * tree, uint64 key, RT_VALUE_TYPE * value_p);
292 :
293 : #ifdef RT_USE_DELETE
294 : RT_SCOPE bool RT_DELETE(RT_RADIX_TREE * tree, uint64 key);
295 : #endif
296 :
297 : RT_SCOPE RT_ITER *RT_BEGIN_ITERATE(RT_RADIX_TREE * tree);
298 : RT_SCOPE RT_VALUE_TYPE *RT_ITERATE_NEXT(RT_ITER * iter, uint64 *key_p);
299 : RT_SCOPE void RT_END_ITERATE(RT_ITER * iter);
300 :
301 : RT_SCOPE uint64 RT_MEMORY_USAGE(RT_RADIX_TREE * tree);
302 :
303 : #ifdef RT_DEBUG
304 : RT_SCOPE void RT_STATS(RT_RADIX_TREE * tree);
305 : #endif
306 :
307 : #endif /* RT_DECLARE */
308 :
309 :
310 : /* generate implementation of the radix tree */
311 : #ifdef RT_DEFINE
312 :
313 : /* The number of bits encoded in one tree level */
314 : #define RT_SPAN BITS_PER_BYTE
315 :
316 : /*
317 : * The number of possible partial keys, and thus the maximum number of
318 : * child pointers, for a node.
319 : */
320 : #define RT_NODE_MAX_SLOTS (1 << RT_SPAN)
321 :
322 : /* Mask for extracting a chunk from a key */
323 : #define RT_CHUNK_MASK ((1 << RT_SPAN) - 1)
324 :
325 : /* Maximum shift needed to extract a chunk from a key */
326 : #define RT_MAX_SHIFT RT_KEY_GET_SHIFT(UINT64_MAX)
327 :
328 : /* Maximum level a tree can reach for a key */
329 : #define RT_MAX_LEVEL ((sizeof(uint64) * BITS_PER_BYTE) / RT_SPAN)
330 :
331 : /* Get a chunk from the key */
332 : #define RT_GET_KEY_CHUNK(key, shift) ((uint8) (((key) >> (shift)) & RT_CHUNK_MASK))
333 :
334 : /* For accessing bitmaps */
335 : #define RT_BM_IDX(x) ((x) / BITS_PER_BITMAPWORD)
336 : #define RT_BM_BIT(x) ((x) % BITS_PER_BITMAPWORD)
337 :
338 : /*
339 : * Node kinds
340 : *
341 : * The different node kinds are what make the tree "adaptive".
342 : *
343 : * Each node kind is associated with a different datatype and different
344 : * search/set/delete/iterate algorithms adapted for its size. The largest
345 : * kind, node256 is basically the same as a traditional radix tree,
346 : * and would be most wasteful of memory when sparsely populated. The
347 : * smaller nodes expend some additional CPU time to enable a smaller
348 : * memory footprint.
349 : *
350 : * NOTE: There are 4 node kinds, and this should never be increased,
351 : * for several reasons:
352 : * 1. With 5 or more kinds, gcc tends to use a jump table for switch
353 : * statements.
354 : * 2. The 4 kinds can be represented with 2 bits, so we have the option
355 : * in the future to tag the node pointer with the kind, even on
356 : * platforms with 32-bit pointers. That would touch fewer cache lines
357 : * during traversal and allow faster recovery from branch mispredicts.
358 : * 3. We can have multiple size classes per node kind.
359 : */
360 : #define RT_NODE_KIND_4 0x00
361 : #define RT_NODE_KIND_16 0x01
362 : #define RT_NODE_KIND_48 0x02
363 : #define RT_NODE_KIND_256 0x03
364 : #define RT_NODE_KIND_COUNT 4
365 :
366 : /*
367 : * Calculate the slab block size so that we can allocate at least 32 chunks
368 : * from the block.
369 : */
370 : #define RT_SLAB_BLOCK_SIZE(size) \
371 : Max(SLAB_DEFAULT_BLOCK_SIZE, pg_nextpower2_32(size * 32))
372 :
373 : /* Common header for all nodes */
374 : typedef struct RT_NODE
375 : {
376 : /* Node kind, one per search/set algorithm */
377 : uint8 kind;
378 :
379 : /*
380 : * Max capacity for the current size class. Storing this in the node
381 : * enables multiple size classes per node kind. uint8 is sufficient for
382 : * all node kinds, because we only use this number to test if the node
383 : * needs to grow. Since node256 never needs to grow, we let this overflow
384 : * to zero.
385 : */
386 : uint8 fanout;
387 :
388 : /*
389 : * Number of children. uint8 is sufficient for all node kinds, because
390 : * nodes shrink when this number gets lower than some threshold. Since
391 : * node256 cannot possibly have zero children, we let the counter overflow
392 : * and we interpret zero as "256" for this node kind.
393 : */
394 : uint8 count;
395 : } RT_NODE;
396 :
397 :
398 : /* pointer returned by allocation */
399 : #ifdef RT_SHMEM
400 : #define RT_PTR_ALLOC dsa_pointer
401 : #define RT_INVALID_PTR_ALLOC InvalidDsaPointer
402 : #define RT_PTR_ALLOC_IS_VALID(ptr) DsaPointerIsValid(ptr)
403 : #else
404 : #define RT_PTR_ALLOC RT_NODE *
405 : #define RT_INVALID_PTR_ALLOC NULL
406 : #define RT_PTR_ALLOC_IS_VALID(ptr) PointerIsValid(ptr)
407 : #endif
408 :
409 : /*
410 : * A convenience type used when we need to work with a DSA pointer as well
411 : * as its local pointer. For local memory, both members are the same, so
412 : * we use a union.
413 : */
414 : #ifdef RT_SHMEM
415 : typedef struct RT_CHILD_PTR
416 : #else
417 : typedef union RT_CHILD_PTR
418 : #endif
419 : {
420 : RT_PTR_ALLOC alloc;
421 : RT_NODE *local;
422 : } RT_CHILD_PTR;
423 :
424 :
425 : /*
426 : * Helper macros and functions for value storage.
427 : * We either embed values in the child slots of the last level
428 : * node or store pointers to values to the child slots,
429 : * depending on the value size.
430 : */
431 :
432 : #ifdef RT_VARLEN_VALUE_SIZE
433 : #define RT_GET_VALUE_SIZE(v) RT_VARLEN_VALUE_SIZE(v)
434 : #else
435 : #define RT_GET_VALUE_SIZE(v) sizeof(RT_VALUE_TYPE)
436 : #endif
437 :
438 : /*
439 : * Return true if the value can be stored in the child array
440 : * of the lowest-level node, false otherwise.
441 : */
442 : static inline bool
443 244612 : RT_VALUE_IS_EMBEDDABLE(RT_VALUE_TYPE * value_p)
444 : {
445 : #ifdef RT_VARLEN_VALUE_SIZE
446 :
447 : #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
448 31544 : return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
449 : #else
450 : return false;
451 : #endif
452 :
453 : #else
454 213068 : return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
455 : #endif
456 : }
457 :
458 : /*
459 : * Return true if the child pointer contains the value, false
460 : * if the child pointer is a leaf pointer.
461 : */
462 : static inline bool
463 9213176 : RT_CHILDPTR_IS_VALUE(RT_PTR_ALLOC child)
464 : {
465 : #ifdef RT_VARLEN_VALUE_SIZE
466 :
467 : #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
468 : /* check for pointer tag */
469 : #ifdef RT_SHMEM
470 783756 : return child & 1;
471 : #else
472 7821256 : return ((uintptr_t) child) & 1;
473 : #endif
474 :
475 : #else
476 : return false;
477 : #endif
478 :
479 : #else
480 608164 : return sizeof(RT_VALUE_TYPE) <= sizeof(RT_PTR_ALLOC);
481 : #endif
482 : }
483 :
484 : /*
485 : * Symbols for maximum possible fanout are declared first as they are
486 : * required to declare each node kind. The declarations of other fanout
487 : * values are followed as they need the struct sizes of each node kind.
488 : */
489 :
490 : /* max possible key chunks without struct padding */
491 : #define RT_FANOUT_4_MAX (8 - sizeof(RT_NODE))
492 :
493 : /* equal to two 128-bit SIMD registers, regardless of availability */
494 : #define RT_FANOUT_16_MAX 32
495 :
496 : /*
497 : * This also determines the number of bits necessary for the isset array,
498 : * so we need to be mindful of the size of bitmapword. Since bitmapword
499 : * can be 64 bits, the only values that make sense here are 64 and 128.
500 : * The ART paper uses at most 64 for this node kind, and one advantage
501 : * for us is that "isset" is a single bitmapword on most platforms,
502 : * rather than an array, allowing the compiler to get rid of loops.
503 : */
504 : #define RT_FANOUT_48_MAX 64
505 :
506 : #define RT_FANOUT_256 RT_NODE_MAX_SLOTS
507 :
508 : /*
509 : * Node structs, one for each "kind"
510 : */
511 :
512 : /*
513 : * node4 and node16 use one array for key chunks and another
514 : * array of the same length for children. The keys and children
515 : * are stored at corresponding positions, sorted by chunk.
516 : */
517 :
518 : typedef struct RT_NODE_4
519 : {
520 : RT_NODE base;
521 :
522 : uint8 chunks[RT_FANOUT_4_MAX];
523 :
524 : /* number of children depends on size class */
525 : RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
526 : } RT_NODE_4;
527 :
528 : typedef struct RT_NODE_16
529 : {
530 : RT_NODE base;
531 :
532 : uint8 chunks[RT_FANOUT_16_MAX];
533 :
534 : /* number of children depends on size class */
535 : RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
536 : } RT_NODE_16;
537 :
538 : /*
539 : * node48 uses a 256-element array indexed by key chunks. This array
540 : * stores indexes into a second array containing the children.
541 : */
542 : typedef struct RT_NODE_48
543 : {
544 : RT_NODE base;
545 :
546 : /* bitmap to track which slots are in use */
547 : bitmapword isset[RT_BM_IDX(RT_FANOUT_48_MAX)];
548 :
549 : /*
550 : * Lookup table for indexes into the children[] array. We make this the
551 : * last fixed-size member so that it's convenient to memset separately
552 : * from the previous members.
553 : */
554 : uint8 slot_idxs[RT_NODE_MAX_SLOTS];
555 :
556 : /* Invalid index */
557 : #define RT_INVALID_SLOT_IDX 0xFF
558 :
559 : /* number of children depends on size class */
560 : RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
561 : } RT_NODE_48;
562 :
563 : /*
564 : * node256 is the largest node type. This node has an array of
565 : * children directly indexed by chunk. Unlike other node kinds,
566 : * its array size is by definition fixed.
567 : */
568 : typedef struct RT_NODE_256
569 : {
570 : RT_NODE base;
571 :
572 : /* bitmap to track which slots are in use */
573 : bitmapword isset[RT_BM_IDX(RT_FANOUT_256)];
574 :
575 : /* slots for 256 children */
576 : RT_PTR_ALLOC children[RT_FANOUT_256];
577 : } RT_NODE_256;
578 :
579 : #if defined(RT_SHMEM)
580 : /*
581 : * Make sure the all nodes (except for node256) fit neatly into a DSA
582 : * size class. We assume the RT_FANOUT_4 is in the range where DSA size
583 : * classes increment by 8 (as of PG17 up to 64 bytes), so we just hard
584 : * code that one.
585 : */
586 :
587 : #if SIZEOF_DSA_POINTER < 8
588 : #define RT_FANOUT_16_LO ((96 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
589 : #define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
590 : #define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (512 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC))
591 : #else
592 : #define RT_FANOUT_16_LO ((160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
593 : #define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (320 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
594 : #define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (768 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC))
595 : #endif /* SIZEOF_DSA_POINTER < 8 */
596 :
597 : #else /* ! RT_SHMEM */
598 :
599 : /* doesn't really matter, but may as well use the namesake */
600 : #define RT_FANOUT_16_LO 16
601 : /* use maximum possible */
602 : #define RT_FANOUT_16_HI RT_FANOUT_16_MAX
603 : #define RT_FANOUT_48 RT_FANOUT_48_MAX
604 :
605 : #endif /* RT_SHMEM */
606 :
607 : /*
608 : * To save memory in trees with sparse keys, it would make sense to have two
609 : * size classes for the smallest kind (perhaps a high class of 5 and a low class
610 : * of 2), but it would be more effective to utilize lazy expansion and
611 : * path compression.
612 : */
613 : #define RT_FANOUT_4 4
614 :
615 : StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_MAX, "watch struct padding");
616 : StaticAssertDecl(RT_FANOUT_16_LO < RT_FANOUT_16_HI, "LO subclass bigger than HI");
617 : StaticAssertDecl(RT_FANOUT_48 <= RT_FANOUT_48_MAX, "more slots than isset bits");
618 :
619 : /*
620 : * Node size classes
621 : *
622 : * Nodes of different kinds necessarily belong to different size classes.
623 : * One innovation in our implementation compared to the ART paper is
624 : * decoupling the notion of size class from kind.
625 : *
626 : * The size classes within a given node kind have the same underlying
627 : * type, but a variable number of children/values. This is possible
628 : * because each type (except node256) contains metadata that work the
629 : * same way regardless of how many child slots there are. The nodes
630 : * can introspect their allocated capacity at runtime.
631 : */
632 : typedef enum RT_SIZE_CLASS
633 : {
634 : RT_CLASS_4 = 0,
635 : RT_CLASS_16_LO,
636 : RT_CLASS_16_HI,
637 : RT_CLASS_48,
638 : RT_CLASS_256
639 : } RT_SIZE_CLASS;
640 :
641 : /* Information for each size class */
642 : typedef struct RT_SIZE_CLASS_ELEM
643 : {
644 : const char *name;
645 : int fanout;
646 : size_t allocsize;
647 : } RT_SIZE_CLASS_ELEM;
648 :
649 :
650 : static const RT_SIZE_CLASS_ELEM RT_SIZE_CLASS_INFO[] = {
651 : [RT_CLASS_4] = {
652 : .name = RT_STR(RT_PREFIX) "_radix_tree node4",
653 : .fanout = RT_FANOUT_4,
654 : .allocsize = sizeof(RT_NODE_4) + RT_FANOUT_4 * sizeof(RT_PTR_ALLOC),
655 : },
656 : [RT_CLASS_16_LO] = {
657 : .name = RT_STR(RT_PREFIX) "_radix_tree node16_lo",
658 : .fanout = RT_FANOUT_16_LO,
659 : .allocsize = sizeof(RT_NODE_16) + RT_FANOUT_16_LO * sizeof(RT_PTR_ALLOC),
660 : },
661 : [RT_CLASS_16_HI] = {
662 : .name = RT_STR(RT_PREFIX) "_radix_tree node16_hi",
663 : .fanout = RT_FANOUT_16_HI,
664 : .allocsize = sizeof(RT_NODE_16) + RT_FANOUT_16_HI * sizeof(RT_PTR_ALLOC),
665 : },
666 : [RT_CLASS_48] = {
667 : .name = RT_STR(RT_PREFIX) "_radix_tree node48",
668 : .fanout = RT_FANOUT_48,
669 : .allocsize = sizeof(RT_NODE_48) + RT_FANOUT_48 * sizeof(RT_PTR_ALLOC),
670 : },
671 : [RT_CLASS_256] = {
672 : .name = RT_STR(RT_PREFIX) "_radix_tree node256",
673 : .fanout = RT_FANOUT_256,
674 : .allocsize = sizeof(RT_NODE_256),
675 : },
676 : };
677 :
678 : #define RT_NUM_SIZE_CLASSES lengthof(RT_SIZE_CLASS_INFO)
679 :
680 : #ifdef RT_SHMEM
681 : /* A magic value used to identify our radix tree */
682 : #define RT_RADIX_TREE_MAGIC 0x54A48167
683 : #endif
684 :
685 : /* Contains the actual tree, plus ancillary info */
686 : typedef struct RT_RADIX_TREE_CONTROL
687 : {
688 : #ifdef RT_SHMEM
689 : RT_HANDLE handle;
690 : uint32 magic;
691 : LWLock lock;
692 : #endif
693 :
694 : RT_PTR_ALLOC root;
695 : uint64 max_val;
696 : int64 num_keys;
697 : int start_shift;
698 :
699 : /* statistics */
700 : #ifdef RT_DEBUG
701 : int64 num_nodes[RT_NUM_SIZE_CLASSES];
702 : int64 num_leaves;
703 : #endif
704 : } RT_RADIX_TREE_CONTROL;
705 :
706 : /* Entry point for allocating and accessing the tree */
707 : struct RT_RADIX_TREE
708 : {
709 : /* pointing to either local memory or DSA */
710 : RT_RADIX_TREE_CONTROL *ctl;
711 :
712 : #ifdef RT_SHMEM
713 : dsa_area *dsa;
714 : #else
715 : MemoryContextData *node_slabs[RT_NUM_SIZE_CLASSES];
716 :
717 : /* leaf_context is used only for single-value leaves */
718 : MemoryContextData *leaf_context;
719 : #endif
720 : };
721 :
722 : /*
723 : * Iteration support.
724 : *
725 : * Iterating over the radix tree produces each key/value pair in ascending
726 : * order of the key.
727 : */
728 :
729 : /* state for iterating over a single node */
730 : typedef struct RT_NODE_ITER
731 : {
732 : RT_CHILD_PTR node;
733 :
734 : /*
735 : * The next index of the chunk array in RT_NODE_KIND_4 and RT_NODE_KIND_16
736 : * nodes, or the next chunk in RT_NODE_KIND_48 and RT_NODE_KIND_256 nodes.
737 : * 0 for the initial value.
738 : */
739 : int idx;
740 : } RT_NODE_ITER;
741 :
742 : /* state for iterating over the whole radix tree */
743 : struct RT_ITER
744 : {
745 : RT_RADIX_TREE *tree;
746 :
747 : /*
748 : * A stack to track iteration for each level. Level 0 is the lowest (or
749 : * leaf) level
750 : */
751 : RT_NODE_ITER node_iters[RT_MAX_LEVEL];
752 : int top_level;
753 : int cur_level;
754 :
755 : /* The key constructed during iteration */
756 : uint64 key;
757 : };
758 :
759 :
760 : /* verification (available only in assert-enabled builds) */
761 : static void RT_VERIFY_NODE(RT_NODE * node);
762 :
763 : static inline void
764 36630934 : RT_PTR_SET_LOCAL(RT_RADIX_TREE * tree, RT_CHILD_PTR * node)
765 : {
766 : #ifdef RT_SHMEM
767 2129348 : node->local = dsa_get_address(tree->dsa, node->alloc);
768 : #endif
769 36630934 : }
770 :
771 : /* Convenience functions for node48 and node256 */
772 :
773 : /* Return true if there is an entry for "chunk" */
774 : static inline bool
775 1054924 : RT_NODE_48_IS_CHUNK_USED(RT_NODE_48 * node, uint8 chunk)
776 : {
777 1054924 : return node->slot_idxs[chunk] != RT_INVALID_SLOT_IDX;
778 : }
779 :
780 : static inline RT_PTR_ALLOC *
781 1098700 : RT_NODE_48_GET_CHILD(RT_NODE_48 * node, uint8 chunk)
782 : {
783 1098700 : return &node->children[node->slot_idxs[chunk]];
784 : }
785 :
786 : /* Return true if there is an entry for "chunk" */
787 : static inline bool
788 8274296 : RT_NODE_256_IS_CHUNK_USED(RT_NODE_256 * node, uint8 chunk)
789 : {
790 8274296 : int idx = RT_BM_IDX(chunk);
791 8274296 : int bitnum = RT_BM_BIT(chunk);
792 :
793 8274296 : return (node->isset[idx] & ((bitmapword) 1 << bitnum)) != 0;
794 : }
795 :
796 : static inline RT_PTR_ALLOC *
797 8082858 : RT_NODE_256_GET_CHILD(RT_NODE_256 * node, uint8 chunk)
798 : {
799 : Assert(RT_NODE_256_IS_CHUNK_USED(node, chunk));
800 8082858 : return &node->children[chunk];
801 : }
802 :
803 : /*
804 : * Return the smallest shift that will allowing storing the given key.
805 : */
806 : static inline int
807 115772 : RT_KEY_GET_SHIFT(uint64 key)
808 : {
809 115772 : if (key == 0)
810 0 : return 0;
811 : else
812 115772 : return (pg_leftmost_one_pos64(key) / RT_SPAN) * RT_SPAN;
813 : }
814 :
815 : /*
816 : * Return the max value that can be stored in the tree with the given shift.
817 : */
818 : static uint64
819 115692 : RT_SHIFT_GET_MAX_VAL(int shift)
820 : {
821 115692 : if (shift == RT_MAX_SHIFT)
822 20 : return UINT64_MAX;
823 : else
824 115672 : return (UINT64CONST(1) << (shift + RT_SPAN)) - 1;
825 : }
826 :
827 : /*
828 : * Allocate a new node with the given node kind and size class.
829 : */
830 : static inline RT_CHILD_PTR
831 168894 : RT_ALLOC_NODE(RT_RADIX_TREE * tree, const uint8 kind, const RT_SIZE_CLASS size_class)
832 : {
833 : RT_CHILD_PTR allocnode;
834 : RT_NODE *node;
835 : size_t allocsize;
836 :
837 168894 : allocsize = RT_SIZE_CLASS_INFO[size_class].allocsize;
838 :
839 : #ifdef RT_SHMEM
840 98 : allocnode.alloc = dsa_allocate(tree->dsa, allocsize);
841 : #else
842 168796 : allocnode.alloc = (RT_PTR_ALLOC) MemoryContextAlloc(tree->node_slabs[size_class],
843 : allocsize);
844 : #endif
845 :
846 168894 : RT_PTR_SET_LOCAL(tree, &allocnode);
847 168894 : node = allocnode.local;
848 :
849 : /* initialize contents */
850 :
851 168894 : switch (kind)
852 : {
853 151280 : case RT_NODE_KIND_4:
854 151280 : memset(node, 0, offsetof(RT_NODE_4, children));
855 151280 : break;
856 13150 : case RT_NODE_KIND_16:
857 13150 : memset(node, 0, offsetof(RT_NODE_16, children));
858 13150 : break;
859 4310 : case RT_NODE_KIND_48:
860 : {
861 4310 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
862 :
863 4310 : memset(n48, 0, offsetof(RT_NODE_48, slot_idxs));
864 4310 : memset(n48->slot_idxs, RT_INVALID_SLOT_IDX, sizeof(n48->slot_idxs));
865 4310 : break;
866 : }
867 154 : case RT_NODE_KIND_256:
868 154 : memset(node, 0, offsetof(RT_NODE_256, children));
869 154 : break;
870 0 : default:
871 0 : pg_unreachable();
872 : }
873 :
874 168894 : node->kind = kind;
875 :
876 : /*
877 : * For node256, this will actually overflow to zero, but that's okay
878 : * because that node doesn't need to introspect this value.
879 : */
880 168894 : node->fanout = RT_SIZE_CLASS_INFO[size_class].fanout;
881 :
882 : #ifdef RT_DEBUG
883 : /* update the statistics */
884 52100 : tree->ctl->num_nodes[size_class]++;
885 : #endif
886 :
887 168894 : return allocnode;
888 : }
889 :
890 : /*
891 : * Allocate a new leaf.
892 : */
893 : static RT_CHILD_PTR
894 26906 : RT_ALLOC_LEAF(RT_RADIX_TREE * tree, size_t allocsize)
895 : {
896 : RT_CHILD_PTR leaf;
897 :
898 : #ifdef RT_SHMEM
899 1262 : leaf.alloc = dsa_allocate(tree->dsa, allocsize);
900 1262 : RT_PTR_SET_LOCAL(tree, &leaf);
901 : #else
902 25644 : leaf.alloc = (RT_PTR_ALLOC) MemoryContextAlloc(tree->leaf_context, allocsize);
903 : #endif
904 :
905 : #ifdef RT_DEBUG
906 0 : tree->ctl->num_leaves++;
907 : #endif
908 :
909 26906 : return leaf;
910 : }
911 :
912 : /*
913 : * Copy relevant members of the node header.
914 : * This is a separate function in case other fields are added.
915 : */
916 : static inline void
917 21776 : RT_COPY_COMMON(RT_CHILD_PTR newnode, RT_CHILD_PTR oldnode)
918 : {
919 21776 : (newnode.local)->count = (oldnode.local)->count;
920 21776 : }
921 :
922 : /* Free the given node */
923 : static void
924 53216 : RT_FREE_NODE(RT_RADIX_TREE * tree, RT_CHILD_PTR node)
925 : {
926 : #ifdef RT_DEBUG
927 : int i;
928 :
929 : /* update the statistics */
930 :
931 80870 : for (i = 0; i < RT_NUM_SIZE_CLASSES; i++)
932 : {
933 80838 : if ((node.local)->fanout == RT_SIZE_CLASS_INFO[i].fanout)
934 52004 : break;
935 : }
936 :
937 : /*
938 : * The fanout of node256 will appear to be zero within the node header
939 : * because of overflow, so we need an extra check here.
940 : */
941 52036 : if (i == RT_NUM_SIZE_CLASSES)
942 32 : i = RT_CLASS_256;
943 :
944 52036 : tree->ctl->num_nodes[i]--;
945 : Assert(tree->ctl->num_nodes[i] >= 0);
946 : #endif
947 :
948 : #ifdef RT_SHMEM
949 54 : dsa_free(tree->dsa, node.alloc);
950 : #else
951 53162 : pfree(node.alloc);
952 : #endif
953 53216 : }
954 :
955 : static inline void
956 6 : RT_FREE_LEAF(RT_RADIX_TREE * tree, RT_PTR_ALLOC leaf)
957 : {
958 : Assert(leaf != tree->ctl->root);
959 :
960 : #ifdef RT_DEBUG
961 : /* update the statistics */
962 0 : tree->ctl->num_leaves--;
963 : Assert(tree->ctl->num_leaves >= 0);
964 : #endif
965 :
966 : #ifdef RT_SHMEM
967 0 : dsa_free(tree->dsa, leaf);
968 : #else
969 6 : pfree(leaf);
970 : #endif
971 6 : }
972 :
973 : /***************** SEARCH *****************/
974 :
975 : /*
976 : * Return the address of the child corresponding to "chunk",
977 : * or NULL if there is no such element.
978 : */
979 : static inline RT_PTR_ALLOC *
980 6063364 : RT_NODE_16_SEARCH_EQ(RT_NODE_16 * node, uint8 chunk)
981 : {
982 6063364 : int count = node->base.count;
983 : #ifndef USE_NO_SIMD
984 : Vector8 spread_chunk;
985 : Vector8 haystack1;
986 : Vector8 haystack2;
987 : Vector8 cmp1;
988 : Vector8 cmp2;
989 : uint32 bitfield;
990 6063364 : RT_PTR_ALLOC *slot_simd = NULL;
991 : #endif
992 :
993 : #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
994 : RT_PTR_ALLOC *slot = NULL;
995 :
996 : for (int i = 0; i < count; i++)
997 : {
998 : if (node->chunks[i] == chunk)
999 : {
1000 : slot = &node->children[i];
1001 : break;
1002 : }
1003 : }
1004 : #endif
1005 :
1006 : #ifndef USE_NO_SIMD
1007 : /* replicate the search key */
1008 6063364 : spread_chunk = vector8_broadcast(chunk);
1009 :
1010 : /* compare to all 32 keys stored in the node */
1011 6063364 : vector8_load(&haystack1, &node->chunks[0]);
1012 6063364 : vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]);
1013 6063364 : cmp1 = vector8_eq(spread_chunk, haystack1);
1014 6063364 : cmp2 = vector8_eq(spread_chunk, haystack2);
1015 :
1016 : /* convert comparison to a bitfield */
1017 6063364 : bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8));
1018 :
1019 : /* mask off invalid entries */
1020 6063364 : bitfield &= ((UINT64CONST(1) << count) - 1);
1021 :
1022 : /* convert bitfield to index by counting trailing zeros */
1023 6063364 : if (bitfield)
1024 5485324 : slot_simd = &node->children[pg_rightmost_one_pos32(bitfield)];
1025 :
1026 : Assert(slot_simd == slot);
1027 6063364 : return slot_simd;
1028 : #else
1029 : return slot;
1030 : #endif
1031 : }
1032 :
1033 : /*
1034 : * Search for the child pointer corresponding to "key" in the given node.
1035 : *
1036 : * Return child if the key is found, otherwise return NULL.
1037 : */
1038 : static inline RT_PTR_ALLOC *
1039 28194622 : RT_NODE_SEARCH(RT_NODE * node, uint8 chunk)
1040 : {
1041 : /* Make sure we already converted to local pointer */
1042 : Assert(node != NULL);
1043 :
1044 28194622 : switch (node->kind)
1045 : {
1046 12377778 : case RT_NODE_KIND_4:
1047 : {
1048 12377778 : RT_NODE_4 *n4 = (RT_NODE_4 *) node;
1049 :
1050 15707020 : for (int i = 0; i < n4->base.count; i++)
1051 : {
1052 14606406 : if (n4->chunks[i] == chunk)
1053 11277164 : return &n4->children[i];
1054 : }
1055 1100614 : return NULL;
1056 : }
1057 6063364 : case RT_NODE_KIND_16:
1058 6063364 : return RT_NODE_16_SEARCH_EQ((RT_NODE_16 *) node, chunk);
1059 1523216 : case RT_NODE_KIND_48:
1060 : {
1061 1523216 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
1062 1523216 : int slotpos = n48->slot_idxs[chunk];
1063 :
1064 1523216 : if (slotpos == RT_INVALID_SLOT_IDX)
1065 609344 : return NULL;
1066 :
1067 913872 : return RT_NODE_48_GET_CHILD(n48, chunk);
1068 : }
1069 8230264 : case RT_NODE_KIND_256:
1070 : {
1071 8230264 : RT_NODE_256 *n256 = (RT_NODE_256 *) node;
1072 :
1073 8230264 : if (!RT_NODE_256_IS_CHUNK_USED(n256, chunk))
1074 196250 : return NULL;
1075 :
1076 8034014 : return RT_NODE_256_GET_CHILD(n256, chunk);
1077 : }
1078 0 : default:
1079 0 : pg_unreachable();
1080 : }
1081 : }
1082 :
1083 : /*
1084 : * Search the given key in the radix tree. Return the pointer to the value if found,
1085 : * otherwise return NULL.
1086 : *
1087 : * Since the function returns a pointer (to support variable-length values),
1088 : * the caller is responsible for locking until it's finished with the value.
1089 : */
1090 : RT_SCOPE RT_VALUE_TYPE *
1091 11032430 : RT_FIND(RT_RADIX_TREE * tree, uint64 key)
1092 : {
1093 : RT_CHILD_PTR node;
1094 11032430 : RT_PTR_ALLOC *slot = NULL;
1095 : int shift;
1096 :
1097 : #ifdef RT_SHMEM
1098 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1099 : #endif
1100 :
1101 11032430 : if (key > tree->ctl->max_val)
1102 2866 : return NULL;
1103 :
1104 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
1105 11029564 : node.alloc = tree->ctl->root;
1106 11029564 : shift = tree->ctl->start_shift;
1107 :
1108 : /* Descend the tree */
1109 35287914 : while (shift >= 0)
1110 : {
1111 26502384 : RT_PTR_SET_LOCAL(tree, &node);
1112 26502384 : slot = RT_NODE_SEARCH(node.local, RT_GET_KEY_CHUNK(key, shift));
1113 26502384 : if (slot == NULL)
1114 2244034 : return NULL;
1115 :
1116 24258350 : node.alloc = *slot;
1117 24258350 : shift -= RT_SPAN;
1118 : }
1119 :
1120 8785530 : if (RT_CHILDPTR_IS_VALUE(*slot))
1121 562816 : return (RT_VALUE_TYPE *) slot;
1122 : else
1123 : {
1124 8222714 : RT_PTR_SET_LOCAL(tree, &node);
1125 8222714 : return (RT_VALUE_TYPE *) node.local;
1126 : }
1127 : }
1128 :
1129 : /***************** INSERTION *****************/
1130 :
1131 : #define RT_NODE_MUST_GROW(node) \
1132 : ((node)->count == (node)->fanout)
1133 :
1134 : /*
1135 : * Return index of the chunk and slot arrays for inserting into the node,
1136 : * such that the arrays remain ordered.
1137 : */
1138 : static inline int
1139 20362 : RT_NODE_4_GET_INSERTPOS(RT_NODE_4 * node, uint8 chunk, int count)
1140 : {
1141 : int idx;
1142 :
1143 47254 : for (idx = 0; idx < count; idx++)
1144 : {
1145 38158 : if (node->chunks[idx] >= chunk)
1146 11266 : break;
1147 : }
1148 :
1149 20362 : return idx;
1150 : }
1151 :
1152 : /*
1153 : * Return index of the chunk and slot arrays for inserting into the node,
1154 : * such that the arrays remain ordered.
1155 : */
1156 : static inline int
1157 120832 : RT_NODE_16_GET_INSERTPOS(RT_NODE_16 * node, uint8 chunk)
1158 : {
1159 120832 : int count = node->base.count;
1160 : #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
1161 : int index;
1162 : #endif
1163 :
1164 : #ifndef USE_NO_SIMD
1165 : Vector8 spread_chunk;
1166 : Vector8 haystack1;
1167 : Vector8 haystack2;
1168 : Vector8 cmp1;
1169 : Vector8 cmp2;
1170 : Vector8 min1;
1171 : Vector8 min2;
1172 : uint32 bitfield;
1173 : int index_simd;
1174 : #endif
1175 :
1176 : /*
1177 : * First compare the last element. There are two reasons to branch here:
1178 : *
1179 : * 1) A realistic pattern is inserting ordered keys. In that case,
1180 : * non-SIMD platforms must do a linear search to the last chunk to find
1181 : * the insert position. This will get slower as the node fills up.
1182 : *
1183 : * 2) On SIMD platforms, we must branch anyway to make sure we don't bit
1184 : * scan an empty bitfield. Doing the branch here eliminates some work that
1185 : * we might otherwise throw away.
1186 : */
1187 : Assert(count > 0);
1188 120832 : if (node->chunks[count - 1] < chunk)
1189 16064 : return count;
1190 :
1191 : #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
1192 :
1193 : for (index = 0; index < count; index++)
1194 : {
1195 : if (node->chunks[index] > chunk)
1196 : break;
1197 : }
1198 : #endif
1199 :
1200 : #ifndef USE_NO_SIMD
1201 :
1202 : /*
1203 : * This is a bit more complicated than RT_NODE_16_SEARCH_EQ(), because no
1204 : * unsigned uint8 comparison instruction exists, at least for SSE2. So we
1205 : * need to play some trickery using vector8_min() to effectively get >=.
1206 : * There'll never be any equal elements in current uses, but that's what
1207 : * we get here...
1208 : */
1209 104768 : spread_chunk = vector8_broadcast(chunk);
1210 104768 : vector8_load(&haystack1, &node->chunks[0]);
1211 104768 : vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]);
1212 104768 : min1 = vector8_min(spread_chunk, haystack1);
1213 104768 : min2 = vector8_min(spread_chunk, haystack2);
1214 104768 : cmp1 = vector8_eq(spread_chunk, min1);
1215 104768 : cmp2 = vector8_eq(spread_chunk, min2);
1216 104768 : bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8));
1217 :
1218 : Assert((bitfield & ((UINT64CONST(1) << count) - 1)) != 0);
1219 104768 : index_simd = pg_rightmost_one_pos32(bitfield);
1220 :
1221 : Assert(index_simd == index);
1222 104768 : return index_simd;
1223 : #else
1224 : return index;
1225 : #endif
1226 : }
1227 :
1228 : /* Shift the elements right at "insertpos" by one */
1229 : static inline void
1230 132074 : RT_SHIFT_ARRAYS_FOR_INSERT(uint8 *chunks, RT_PTR_ALLOC * children, int count, int insertpos)
1231 : {
1232 : /*
1233 : * This is basically a memmove, but written in a simple loop for speed on
1234 : * small inputs.
1235 : */
1236 1124232 : for (int i = count - 1; i >= insertpos; i--)
1237 : {
1238 : /* workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101481 */
1239 : #ifdef __GNUC__
1240 992158 : __asm__("");
1241 : #endif
1242 992158 : chunks[i + 1] = chunks[i];
1243 992158 : children[i + 1] = children[i];
1244 : }
1245 132074 : }
1246 :
1247 : /*
1248 : * Copy both chunk and slot arrays into the right
1249 : * place. The caller is responsible for inserting the new element.
1250 : */
1251 : static inline void
1252 9120 : RT_COPY_ARRAYS_FOR_INSERT(uint8 *dst_chunks, RT_PTR_ALLOC * dst_children,
1253 : uint8 *src_chunks, RT_PTR_ALLOC * src_children,
1254 : int count, int insertpos)
1255 : {
1256 98936 : for (int i = 0; i < count; i++)
1257 : {
1258 89816 : int sourceidx = i;
1259 :
1260 : /* use a branch-free computation to skip the index of the new element */
1261 89816 : int destidx = i + (i >= insertpos);
1262 :
1263 89816 : dst_chunks[destidx] = src_chunks[sourceidx];
1264 89816 : dst_children[destidx] = src_children[sourceidx];
1265 : }
1266 9120 : }
1267 :
1268 : static inline RT_PTR_ALLOC *
1269 22824 : RT_ADD_CHILD_256(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1270 : {
1271 22824 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
1272 22824 : int idx = RT_BM_IDX(chunk);
1273 22824 : int bitnum = RT_BM_BIT(chunk);
1274 :
1275 : /* Mark the slot used for "chunk" */
1276 22824 : n256->isset[idx] |= ((bitmapword) 1 << bitnum);
1277 :
1278 22824 : n256->base.count++;
1279 22824 : RT_VERIFY_NODE((RT_NODE *) n256);
1280 :
1281 22824 : return RT_NODE_256_GET_CHILD(n256, chunk);
1282 : }
1283 :
1284 : static pg_noinline RT_PTR_ALLOC *
1285 154 : RT_GROW_NODE_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1286 : uint8 chunk)
1287 : {
1288 154 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1289 : RT_CHILD_PTR newnode;
1290 : RT_NODE_256 *new256;
1291 154 : int i = 0;
1292 :
1293 : /* initialize new node */
1294 154 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_256, RT_CLASS_256);
1295 154 : new256 = (RT_NODE_256 *) newnode.local;
1296 :
1297 : /* copy over the entries */
1298 154 : RT_COPY_COMMON(newnode, node);
1299 770 : for (int word_num = 0; word_num < RT_BM_IDX(RT_NODE_MAX_SLOTS); word_num++)
1300 : {
1301 616 : bitmapword bitmap = 0;
1302 :
1303 : /*
1304 : * Bit manipulation is a surprisingly large portion of the overhead in
1305 : * the naive implementation. Doing stores word-at-a-time removes a lot
1306 : * of that overhead.
1307 : */
1308 40040 : for (int bit = 0; bit < BITS_PER_BITMAPWORD; bit++)
1309 : {
1310 39424 : uint8 offset = n48->slot_idxs[i];
1311 :
1312 39424 : if (offset != RT_INVALID_SLOT_IDX)
1313 : {
1314 9840 : bitmap |= ((bitmapword) 1 << bit);
1315 9840 : new256->children[i] = n48->children[offset];
1316 : }
1317 :
1318 39424 : i++;
1319 : }
1320 :
1321 616 : new256->isset[word_num] = bitmap;
1322 : }
1323 :
1324 : /* free old node and update reference in parent */
1325 154 : *parent_slot = newnode.alloc;
1326 154 : RT_FREE_NODE(tree, node);
1327 :
1328 154 : return RT_ADD_CHILD_256(tree, newnode, chunk);
1329 : }
1330 :
1331 : static inline RT_PTR_ALLOC *
1332 53944 : RT_ADD_CHILD_48(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1333 : {
1334 53944 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1335 : int insertpos;
1336 53944 : int idx = 0;
1337 : bitmapword w,
1338 : inverse;
1339 :
1340 : /* get the first word with at least one bit not set */
1341 53944 : for (int i = 0; i < RT_BM_IDX(RT_FANOUT_48_MAX); i++)
1342 : {
1343 53944 : w = n48->isset[i];
1344 53944 : if (w < ~((bitmapword) 0))
1345 : {
1346 53944 : idx = i;
1347 53944 : break;
1348 : }
1349 : }
1350 :
1351 : /* To get the first unset bit in w, get the first set bit in ~w */
1352 53944 : inverse = ~w;
1353 53944 : insertpos = idx * BITS_PER_BITMAPWORD;
1354 53944 : insertpos += bmw_rightmost_one_pos(inverse);
1355 : Assert(insertpos < n48->base.fanout);
1356 :
1357 : /* mark the slot used by setting the rightmost zero bit */
1358 53944 : n48->isset[idx] |= w + 1;
1359 :
1360 : /* insert new chunk into place */
1361 53944 : n48->slot_idxs[chunk] = insertpos;
1362 :
1363 53944 : n48->base.count++;
1364 53944 : RT_VERIFY_NODE((RT_NODE *) n48);
1365 :
1366 53944 : return &n48->children[insertpos];
1367 : }
1368 :
1369 : static pg_noinline RT_PTR_ALLOC *
1370 8724 : RT_GROW_NODE_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1371 : uint8 chunk)
1372 : {
1373 8724 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1374 : int insertpos;
1375 :
1376 8724 : if (n16->base.fanout < RT_FANOUT_16_HI)
1377 : {
1378 : RT_CHILD_PTR newnode;
1379 : RT_NODE_16 *new16;
1380 :
1381 : Assert(n16->base.fanout == RT_FANOUT_16_LO);
1382 :
1383 : /* initialize new node */
1384 4446 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_HI);
1385 4446 : new16 = (RT_NODE_16 *) newnode.local;
1386 :
1387 : /* copy over existing entries */
1388 4446 : RT_COPY_COMMON(newnode, node);
1389 : Assert(n16->base.count == RT_FANOUT_16_LO);
1390 4446 : insertpos = RT_NODE_16_GET_INSERTPOS(n16, chunk);
1391 4446 : RT_COPY_ARRAYS_FOR_INSERT(new16->chunks, new16->children,
1392 4446 : n16->chunks, n16->children,
1393 : RT_FANOUT_16_LO, insertpos);
1394 :
1395 : /* insert new chunk into place */
1396 4446 : new16->chunks[insertpos] = chunk;
1397 :
1398 4446 : new16->base.count++;
1399 4446 : RT_VERIFY_NODE((RT_NODE *) new16);
1400 :
1401 : /* free old node and update references */
1402 4446 : RT_FREE_NODE(tree, node);
1403 4446 : *parent_slot = newnode.alloc;
1404 :
1405 4446 : return &new16->children[insertpos];
1406 : }
1407 : else
1408 : {
1409 : RT_CHILD_PTR newnode;
1410 : RT_NODE_48 *new48;
1411 : int idx,
1412 : bit;
1413 :
1414 : Assert(n16->base.fanout == RT_FANOUT_16_HI);
1415 :
1416 : /* initialize new node */
1417 4278 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_48, RT_CLASS_48);
1418 4278 : new48 = (RT_NODE_48 *) newnode.local;
1419 :
1420 : /* copy over the entries */
1421 4278 : RT_COPY_COMMON(newnode, node);
1422 141174 : for (int i = 0; i < RT_FANOUT_16_HI; i++)
1423 136896 : new48->slot_idxs[n16->chunks[i]] = i;
1424 4278 : memcpy(&new48->children[0], &n16->children[0], RT_FANOUT_16_HI * sizeof(new48->children[0]));
1425 :
1426 : /*
1427 : * Since we just copied a dense array, we can fill "isset" using a
1428 : * single store, provided the length of that array is at most the
1429 : * number of bits in a bitmapword.
1430 : */
1431 : Assert(RT_FANOUT_16_HI <= BITS_PER_BITMAPWORD);
1432 4278 : new48->isset[0] = (bitmapword) (((uint64) 1 << RT_FANOUT_16_HI) - 1);
1433 :
1434 : /* put the new child at the end of the copied entries */
1435 4278 : insertpos = RT_FANOUT_16_HI;
1436 4278 : idx = RT_BM_IDX(insertpos);
1437 4278 : bit = RT_BM_BIT(insertpos);
1438 :
1439 : /* mark the slot used */
1440 4278 : new48->isset[idx] |= ((bitmapword) 1 << bit);
1441 :
1442 : /* insert new chunk into place */
1443 4278 : new48->slot_idxs[chunk] = insertpos;
1444 :
1445 4278 : new48->base.count++;
1446 4278 : RT_VERIFY_NODE((RT_NODE *) new48);
1447 :
1448 : /* free old node and update reference in parent */
1449 4278 : *parent_slot = newnode.alloc;
1450 4278 : RT_FREE_NODE(tree, node);
1451 :
1452 4278 : return &new48->children[insertpos];
1453 : }
1454 : }
1455 :
1456 : static inline RT_PTR_ALLOC *
1457 116386 : RT_ADD_CHILD_16(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1458 : {
1459 116386 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1460 116386 : int insertpos = RT_NODE_16_GET_INSERTPOS(n16, chunk);
1461 :
1462 : /* shift chunks and children */
1463 116386 : RT_SHIFT_ARRAYS_FOR_INSERT(n16->chunks, n16->children,
1464 116386 : n16->base.count, insertpos);
1465 :
1466 : /* insert new chunk into place */
1467 116386 : n16->chunks[insertpos] = chunk;
1468 :
1469 116386 : n16->base.count++;
1470 116386 : RT_VERIFY_NODE((RT_NODE *) n16);
1471 :
1472 116386 : return &n16->children[insertpos];
1473 : }
1474 :
1475 : static pg_noinline RT_PTR_ALLOC *
1476 4674 : RT_GROW_NODE_4(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1477 : uint8 chunk)
1478 : {
1479 4674 : RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
1480 : RT_CHILD_PTR newnode;
1481 : RT_NODE_16 *new16;
1482 : int insertpos;
1483 :
1484 : /* initialize new node */
1485 4674 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_LO);
1486 4674 : new16 = (RT_NODE_16 *) newnode.local;
1487 :
1488 : /* copy over existing entries */
1489 4674 : RT_COPY_COMMON(newnode, node);
1490 : Assert(n4->base.count == RT_FANOUT_4);
1491 4674 : insertpos = RT_NODE_4_GET_INSERTPOS(n4, chunk, RT_FANOUT_4);
1492 4674 : RT_COPY_ARRAYS_FOR_INSERT(new16->chunks, new16->children,
1493 4674 : n4->chunks, n4->children,
1494 : RT_FANOUT_4, insertpos);
1495 :
1496 : /* insert new chunk into place */
1497 4674 : new16->chunks[insertpos] = chunk;
1498 :
1499 4674 : new16->base.count++;
1500 4674 : RT_VERIFY_NODE((RT_NODE *) new16);
1501 :
1502 : /* free old node and update reference in parent */
1503 4674 : *parent_slot = newnode.alloc;
1504 4674 : RT_FREE_NODE(tree, node);
1505 :
1506 4674 : return &new16->children[insertpos];
1507 : }
1508 :
1509 : static inline RT_PTR_ALLOC *
1510 15688 : RT_ADD_CHILD_4(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1511 : {
1512 15688 : RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
1513 15688 : int count = n4->base.count;
1514 15688 : int insertpos = RT_NODE_4_GET_INSERTPOS(n4, chunk, count);
1515 :
1516 : /* shift chunks and children */
1517 15688 : RT_SHIFT_ARRAYS_FOR_INSERT(n4->chunks, n4->children,
1518 : count, insertpos);
1519 :
1520 : /* insert new chunk into place */
1521 15688 : n4->chunks[insertpos] = chunk;
1522 :
1523 15688 : n4->base.count++;
1524 15688 : RT_VERIFY_NODE((RT_NODE *) n4);
1525 :
1526 15688 : return &n4->children[insertpos];
1527 : }
1528 :
1529 : /*
1530 : * Reserve slot in "node"'s child array. The caller will populate it
1531 : * with the actual child pointer.
1532 : *
1533 : * "parent_slot" is the address of the parent's child pointer to "node".
1534 : * If the node we're inserting into needs to grow, we update the parent's
1535 : * child pointer with the pointer to the new larger node.
1536 : */
1537 : static inline RT_PTR_ALLOC *
1538 222240 : RT_NODE_INSERT(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1539 : uint8 chunk)
1540 : {
1541 222240 : RT_NODE *n = node.local;
1542 :
1543 222240 : switch (n->kind)
1544 : {
1545 20362 : case RT_NODE_KIND_4:
1546 : {
1547 20362 : if (unlikely(RT_NODE_MUST_GROW(n)))
1548 4674 : return RT_GROW_NODE_4(tree, parent_slot, node, chunk);
1549 :
1550 15688 : return RT_ADD_CHILD_4(tree, node, chunk);
1551 : }
1552 125110 : case RT_NODE_KIND_16:
1553 : {
1554 125110 : if (unlikely(RT_NODE_MUST_GROW(n)))
1555 8724 : return RT_GROW_NODE_16(tree, parent_slot, node, chunk);
1556 :
1557 116386 : return RT_ADD_CHILD_16(tree, node, chunk);
1558 : }
1559 54098 : case RT_NODE_KIND_48:
1560 : {
1561 54098 : if (unlikely(RT_NODE_MUST_GROW(n)))
1562 154 : return RT_GROW_NODE_48(tree, parent_slot, node, chunk);
1563 :
1564 53944 : return RT_ADD_CHILD_48(tree, node, chunk);
1565 : }
1566 22670 : case RT_NODE_KIND_256:
1567 22670 : return RT_ADD_CHILD_256(tree, node, chunk);
1568 0 : default:
1569 0 : pg_unreachable();
1570 : }
1571 : }
1572 :
1573 : /*
1574 : * The radix tree doesn't have sufficient height. Put new node(s) on top,
1575 : * and move the old node below it.
1576 : */
1577 : static pg_noinline void
1578 52 : RT_EXTEND_UP(RT_RADIX_TREE * tree, uint64 key)
1579 : {
1580 52 : int target_shift = RT_KEY_GET_SHIFT(key);
1581 52 : int shift = tree->ctl->start_shift;
1582 :
1583 : Assert(shift < target_shift);
1584 :
1585 : /* Grow tree upwards until start shift can accommodate the key */
1586 164 : while (shift < target_shift)
1587 : {
1588 : RT_CHILD_PTR node;
1589 : RT_NODE_4 *n4;
1590 :
1591 112 : node = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1592 112 : n4 = (RT_NODE_4 *) node.local;
1593 112 : n4->base.count = 1;
1594 112 : n4->chunks[0] = 0;
1595 112 : n4->children[0] = tree->ctl->root;
1596 :
1597 : /* Update the root */
1598 112 : tree->ctl->root = node.alloc;
1599 :
1600 112 : shift += RT_SPAN;
1601 : }
1602 :
1603 52 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(target_shift);
1604 52 : tree->ctl->start_shift = target_shift;
1605 52 : }
1606 :
1607 : /*
1608 : * Insert a chain of nodes until we reach the lowest level,
1609 : * and return the address of a slot to be filled further up
1610 : * the call stack.
1611 : */
1612 : static pg_noinline RT_PTR_ALLOC *
1613 9968 : RT_EXTEND_DOWN(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift)
1614 : {
1615 : RT_CHILD_PTR node,
1616 : child;
1617 : RT_NODE_4 *n4;
1618 :
1619 : /*
1620 : * The child pointer of the first node in the chain goes in the
1621 : * caller-provided slot.
1622 : */
1623 9968 : child = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1624 9968 : *parent_slot = child.alloc;
1625 :
1626 9968 : node = child;
1627 9968 : shift -= RT_SPAN;
1628 :
1629 31456 : while (shift > 0)
1630 : {
1631 21488 : child = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1632 :
1633 : /* We open-code the insertion ourselves, for speed. */
1634 21488 : n4 = (RT_NODE_4 *) node.local;
1635 21488 : n4->base.count = 1;
1636 21488 : n4->chunks[0] = RT_GET_KEY_CHUNK(key, shift);
1637 21488 : n4->children[0] = child.alloc;
1638 :
1639 21488 : node = child;
1640 21488 : shift -= RT_SPAN;
1641 : }
1642 : Assert(shift == 0);
1643 :
1644 : /* Reserve slot for the value. */
1645 9968 : n4 = (RT_NODE_4 *) node.local;
1646 9968 : n4->chunks[0] = RT_GET_KEY_CHUNK(key, 0);
1647 9968 : n4->base.count = 1;
1648 :
1649 9968 : return &n4->children[0];
1650 : }
1651 :
1652 : /*
1653 : * Workhorse for RT_SET
1654 : *
1655 : * "parent_slot" is the address of the child pointer we just followed,
1656 : * in the parent's array of children, needed if inserting into the
1657 : * current node causes it to grow.
1658 : */
1659 : static RT_PTR_ALLOC *
1660 861974 : RT_GET_SLOT_RECURSIVE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift, bool *found)
1661 : {
1662 : RT_PTR_ALLOC *slot;
1663 : RT_CHILD_PTR node;
1664 861974 : uint8 chunk = RT_GET_KEY_CHUNK(key, shift);
1665 :
1666 861974 : node.alloc = *parent_slot;
1667 861974 : RT_PTR_SET_LOCAL(tree, &node);
1668 861974 : slot = RT_NODE_SEARCH(node.local, chunk);
1669 :
1670 861974 : if (slot == NULL)
1671 : {
1672 222240 : *found = false;
1673 :
1674 : /* reserve slot for the caller to populate */
1675 :
1676 222240 : slot = RT_NODE_INSERT(tree, parent_slot, node, chunk);
1677 :
1678 222240 : if (shift == 0)
1679 212300 : return slot;
1680 : else
1681 9940 : return RT_EXTEND_DOWN(tree, slot, key, shift);
1682 : }
1683 : else
1684 : {
1685 639734 : if (shift == 0)
1686 : {
1687 22344 : *found = true;
1688 22344 : return slot;
1689 : }
1690 : else
1691 617390 : return RT_GET_SLOT_RECURSIVE(tree, slot, key, shift - RT_SPAN, found);
1692 : }
1693 : }
1694 :
1695 : /*
1696 : * Set key to value that value_p points to. If the entry already exists, we
1697 : * update its value and return true. Returns false if entry doesn't yet exist.
1698 : *
1699 : * Taking a lock in exclusive mode is the caller's responsibility.
1700 : */
1701 : RT_SCOPE bool
1702 244612 : RT_SET(RT_RADIX_TREE * tree, uint64 key, RT_VALUE_TYPE * value_p)
1703 : {
1704 : bool found;
1705 : RT_PTR_ALLOC *slot;
1706 244612 : size_t value_sz = RT_GET_VALUE_SIZE(value_p);
1707 :
1708 : #ifdef RT_SHMEM
1709 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1710 : #endif
1711 :
1712 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
1713 :
1714 : /* Extend the tree if necessary */
1715 244612 : if (unlikely(key > tree->ctl->max_val))
1716 : {
1717 80 : if (tree->ctl->num_keys == 0)
1718 : {
1719 : RT_CHILD_PTR node;
1720 : RT_NODE_4 *n4;
1721 28 : int start_shift = RT_KEY_GET_SHIFT(key);
1722 :
1723 : /*
1724 : * With an empty root node, we don't extend the tree upwards,
1725 : * since that would result in orphan empty nodes. Instead we open
1726 : * code inserting into the root node and extend downward from
1727 : * there.
1728 : */
1729 28 : node.alloc = tree->ctl->root;
1730 28 : RT_PTR_SET_LOCAL(tree, &node);
1731 28 : n4 = (RT_NODE_4 *) node.local;
1732 28 : n4->base.count = 1;
1733 28 : n4->chunks[0] = RT_GET_KEY_CHUNK(key, start_shift);
1734 :
1735 28 : slot = RT_EXTEND_DOWN(tree, &n4->children[0], key, start_shift);
1736 28 : found = false;
1737 28 : tree->ctl->start_shift = start_shift;
1738 28 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(start_shift);
1739 28 : goto have_slot;
1740 : }
1741 : else
1742 52 : RT_EXTEND_UP(tree, key);
1743 : }
1744 :
1745 244584 : slot = RT_GET_SLOT_RECURSIVE(tree, &tree->ctl->root,
1746 244584 : key, tree->ctl->start_shift, &found);
1747 :
1748 244612 : have_slot:
1749 : Assert(slot != NULL);
1750 :
1751 244612 : if (RT_VALUE_IS_EMBEDDABLE(value_p))
1752 : {
1753 : /* free the existing leaf */
1754 217706 : if (found && !RT_CHILDPTR_IS_VALUE(*slot))
1755 2 : RT_FREE_LEAF(tree, *slot);
1756 :
1757 : /* store value directly in child pointer slot */
1758 217706 : memcpy(slot, value_p, value_sz);
1759 :
1760 : #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
1761 : /* tag child pointer */
1762 : #ifdef RT_SHMEM
1763 6 : *slot |= 1;
1764 : #else
1765 4632 : *((uintptr_t *) slot) |= 1;
1766 : #endif
1767 : #endif
1768 : }
1769 : else
1770 : {
1771 : RT_CHILD_PTR leaf;
1772 :
1773 26906 : if (found && !RT_CHILDPTR_IS_VALUE(*slot))
1774 : {
1775 : Assert(RT_PTR_ALLOC_IS_VALID(*slot));
1776 4 : leaf.alloc = *slot;
1777 4 : RT_PTR_SET_LOCAL(tree, &leaf);
1778 :
1779 4 : if (RT_GET_VALUE_SIZE((RT_VALUE_TYPE *) leaf.local) != value_sz)
1780 : {
1781 : /*
1782 : * different sizes, so first free the existing leaf before
1783 : * allocating a new one
1784 : */
1785 4 : RT_FREE_LEAF(tree, *slot);
1786 4 : leaf = RT_ALLOC_LEAF(tree, value_sz);
1787 4 : *slot = leaf.alloc;
1788 : }
1789 : }
1790 : else
1791 : {
1792 : /* allocate new leaf and store it in the child array */
1793 26902 : leaf = RT_ALLOC_LEAF(tree, value_sz);
1794 26902 : *slot = leaf.alloc;
1795 : }
1796 :
1797 26906 : memcpy(leaf.local, value_p, value_sz);
1798 : }
1799 :
1800 : /* Update the statistics */
1801 244612 : if (!found)
1802 222268 : tree->ctl->num_keys++;
1803 :
1804 244612 : return found;
1805 : }
1806 :
1807 : /***************** SETUP / TEARDOWN *****************/
1808 :
1809 : /*
1810 : * Create the radix tree root in the caller's memory context and return it.
1811 : *
1812 : * The tree's nodes and leaves are allocated in "ctx" and its children for
1813 : * local memory, or in "dsa" for shared memory.
1814 : */
1815 : RT_SCOPE RT_RADIX_TREE *
1816 : #ifdef RT_SHMEM
1817 34 : RT_CREATE(dsa_area *dsa, int tranche_id)
1818 : #else
1819 115516 : RT_CREATE(MemoryContext ctx)
1820 : #endif
1821 : {
1822 : RT_RADIX_TREE *tree;
1823 : RT_CHILD_PTR rootnode;
1824 : #ifdef RT_SHMEM
1825 : dsa_pointer dp;
1826 : #endif
1827 :
1828 115550 : tree = (RT_RADIX_TREE *) palloc0(sizeof(RT_RADIX_TREE));
1829 :
1830 : #ifdef RT_SHMEM
1831 34 : tree->dsa = dsa;
1832 34 : dp = dsa_allocate0(dsa, sizeof(RT_RADIX_TREE_CONTROL));
1833 34 : tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, dp);
1834 34 : tree->ctl->handle = dp;
1835 34 : tree->ctl->magic = RT_RADIX_TREE_MAGIC;
1836 34 : LWLockInitialize(&tree->ctl->lock, tranche_id);
1837 : #else
1838 115516 : tree->ctl = (RT_RADIX_TREE_CONTROL *) palloc0(sizeof(RT_RADIX_TREE_CONTROL));
1839 :
1840 : /* Create a slab context for each size class */
1841 693096 : for (int i = 0; i < RT_NUM_SIZE_CLASSES; i++)
1842 : {
1843 577580 : RT_SIZE_CLASS_ELEM size_class = RT_SIZE_CLASS_INFO[i];
1844 577580 : size_t inner_blocksize = RT_SLAB_BLOCK_SIZE(size_class.allocsize);
1845 :
1846 577580 : tree->node_slabs[i] = SlabContextCreate(ctx,
1847 : size_class.name,
1848 : inner_blocksize,
1849 : size_class.allocsize);
1850 : }
1851 :
1852 115516 : tree->leaf_context = ctx;
1853 : #endif /* RT_SHMEM */
1854 :
1855 : /* add root node now so that RT_SET can assume it exists */
1856 115550 : rootnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1857 115550 : tree->ctl->root = rootnode.alloc;
1858 115550 : tree->ctl->start_shift = 0;
1859 115550 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(0);
1860 :
1861 115550 : return tree;
1862 : }
1863 :
1864 : #ifdef RT_SHMEM
1865 : RT_SCOPE RT_RADIX_TREE *
1866 34 : RT_ATTACH(dsa_area *dsa, RT_HANDLE handle)
1867 : {
1868 : RT_RADIX_TREE *tree;
1869 : dsa_pointer control;
1870 :
1871 34 : tree = (RT_RADIX_TREE *) palloc0(sizeof(RT_RADIX_TREE));
1872 :
1873 : /* Find the control object in shared memory */
1874 34 : control = handle;
1875 :
1876 34 : tree->dsa = dsa;
1877 34 : tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, control);
1878 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1879 :
1880 34 : return tree;
1881 : }
1882 :
1883 : RT_SCOPE void
1884 34 : RT_DETACH(RT_RADIX_TREE * tree)
1885 : {
1886 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1887 34 : pfree(tree);
1888 34 : }
1889 :
1890 : RT_SCOPE RT_HANDLE
1891 32 : RT_GET_HANDLE(RT_RADIX_TREE * tree)
1892 : {
1893 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1894 32 : return tree->ctl->handle;
1895 : }
1896 :
1897 : RT_SCOPE void
1898 206 : RT_LOCK_EXCLUSIVE(RT_RADIX_TREE * tree)
1899 : {
1900 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1901 206 : LWLockAcquire(&tree->ctl->lock, LW_EXCLUSIVE);
1902 206 : }
1903 :
1904 : RT_SCOPE void
1905 421684 : RT_LOCK_SHARE(RT_RADIX_TREE * tree)
1906 : {
1907 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1908 421684 : LWLockAcquire(&tree->ctl->lock, LW_SHARED);
1909 421684 : }
1910 :
1911 : RT_SCOPE void
1912 421890 : RT_UNLOCK(RT_RADIX_TREE * tree)
1913 : {
1914 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1915 421890 : LWLockRelease(&tree->ctl->lock);
1916 421890 : }
1917 :
1918 : /*
1919 : * Recursively free all nodes allocated in the DSA area.
1920 : */
1921 : static void
1922 44 : RT_FREE_RECURSE(RT_RADIX_TREE * tree, RT_PTR_ALLOC ptr, int shift)
1923 : {
1924 : RT_CHILD_PTR node;
1925 :
1926 44 : check_stack_depth();
1927 :
1928 44 : node.alloc = ptr;
1929 44 : RT_PTR_SET_LOCAL(tree, &node);
1930 :
1931 44 : switch (node.local->kind)
1932 : {
1933 28 : case RT_NODE_KIND_4:
1934 : {
1935 28 : RT_NODE_4 *n4 = (RT_NODE_4 *) node.local;
1936 :
1937 42 : for (int i = 0; i < n4->base.count; i++)
1938 : {
1939 14 : RT_PTR_ALLOC child = n4->children[i];
1940 :
1941 14 : if (shift > 0)
1942 10 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
1943 4 : else if (!RT_CHILDPTR_IS_VALUE(child))
1944 0 : dsa_free(tree->dsa, child);
1945 : }
1946 :
1947 28 : break;
1948 : }
1949 2 : case RT_NODE_KIND_16:
1950 : {
1951 2 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1952 :
1953 50 : for (int i = 0; i < n16->base.count; i++)
1954 : {
1955 48 : RT_PTR_ALLOC child = n16->children[i];
1956 :
1957 48 : if (shift > 0)
1958 0 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
1959 48 : else if (!RT_CHILDPTR_IS_VALUE(child))
1960 48 : dsa_free(tree->dsa, child);
1961 : }
1962 :
1963 2 : break;
1964 : }
1965 6 : case RT_NODE_KIND_48:
1966 : {
1967 6 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1968 :
1969 1542 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
1970 : {
1971 : RT_PTR_ALLOC child;
1972 :
1973 1536 : if (!RT_NODE_48_IS_CHUNK_USED(n48, i))
1974 1266 : continue;
1975 :
1976 270 : child = *RT_NODE_48_GET_CHILD(n48, i);
1977 :
1978 270 : if (shift > 0)
1979 0 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
1980 270 : else if (!RT_CHILDPTR_IS_VALUE(child))
1981 270 : dsa_free(tree->dsa, child);
1982 : }
1983 :
1984 6 : break;
1985 : }
1986 8 : case RT_NODE_KIND_256:
1987 : {
1988 8 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
1989 :
1990 2056 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
1991 : {
1992 : RT_PTR_ALLOC child;
1993 :
1994 2048 : if (!RT_NODE_256_IS_CHUNK_USED(n256, i))
1995 1102 : continue;
1996 :
1997 946 : child = *RT_NODE_256_GET_CHILD(n256, i);
1998 :
1999 946 : if (shift > 0)
2000 0 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
2001 946 : else if (!RT_CHILDPTR_IS_VALUE(child))
2002 944 : dsa_free(tree->dsa, child);
2003 : }
2004 :
2005 8 : break;
2006 : }
2007 : }
2008 :
2009 : /* Free the inner node */
2010 44 : dsa_free(tree->dsa, ptr);
2011 44 : }
2012 : #endif
2013 :
2014 : /*
2015 : * Free the radix tree, including all nodes and leaves.
2016 : */
2017 : RT_SCOPE void
2018 1152 : RT_FREE(RT_RADIX_TREE * tree)
2019 : {
2020 : #ifdef RT_SHMEM
2021 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2022 :
2023 : /* Free all memory used for radix tree nodes */
2024 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2025 34 : RT_FREE_RECURSE(tree, tree->ctl->root, tree->ctl->start_shift);
2026 :
2027 : /*
2028 : * Vandalize the control block to help catch programming error where other
2029 : * backends access the memory formerly occupied by this radix tree.
2030 : */
2031 34 : tree->ctl->magic = 0;
2032 34 : dsa_free(tree->dsa, tree->ctl->handle);
2033 : #else
2034 : /*
2035 : * Free all space allocated within the leaf context and delete all child
2036 : * contexts such as those used for nodes.
2037 : */
2038 1118 : MemoryContextReset(tree->leaf_context);
2039 :
2040 1118 : pfree(tree->ctl);
2041 : #endif
2042 1152 : pfree(tree);
2043 1152 : }
2044 :
2045 : /***************** ITERATION *****************/
2046 :
2047 : /*
2048 : * Create and return an iterator for the given radix tree
2049 : * in the caller's memory context.
2050 : *
2051 : * Taking a lock in shared mode during the iteration is the caller's
2052 : * responsibility.
2053 : */
2054 : RT_SCOPE RT_ITER *
2055 1128 : RT_BEGIN_ITERATE(RT_RADIX_TREE * tree)
2056 : {
2057 : RT_ITER *iter;
2058 : RT_CHILD_PTR root;
2059 :
2060 1128 : iter = (RT_ITER *) palloc0(sizeof(RT_ITER));
2061 1128 : iter->tree = tree;
2062 :
2063 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2064 1128 : root.alloc = iter->tree->ctl->root;
2065 1128 : RT_PTR_SET_LOCAL(tree, &root);
2066 :
2067 1128 : iter->top_level = iter->tree->ctl->start_shift / RT_SPAN;
2068 :
2069 : /* Set the root to start */
2070 1128 : iter->cur_level = iter->top_level;
2071 1128 : iter->node_iters[iter->cur_level].node = root;
2072 1128 : iter->node_iters[iter->cur_level].idx = 0;
2073 :
2074 1128 : return iter;
2075 : }
2076 :
2077 : /*
2078 : * Scan the inner node and return the next child pointer if one exists, otherwise
2079 : * return NULL.
2080 : */
2081 : static inline RT_PTR_ALLOC *
2082 254054 : RT_NODE_ITERATE_NEXT(RT_ITER * iter, int level)
2083 : {
2084 254054 : uint8 key_chunk = 0;
2085 : RT_NODE_ITER *node_iter;
2086 : RT_CHILD_PTR node;
2087 254054 : RT_PTR_ALLOC *slot = NULL;
2088 :
2089 : #ifdef RT_SHMEM
2090 : Assert(iter->tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2091 : #endif
2092 :
2093 254054 : node_iter = &(iter->node_iters[level]);
2094 254054 : node = node_iter->node;
2095 :
2096 : Assert(node.local != NULL);
2097 :
2098 254054 : switch ((node.local)->kind)
2099 : {
2100 33176 : case RT_NODE_KIND_4:
2101 : {
2102 33176 : RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
2103 :
2104 33176 : if (node_iter->idx >= n4->base.count)
2105 16240 : return NULL;
2106 :
2107 16936 : slot = &n4->children[node_iter->idx];
2108 16936 : key_chunk = n4->chunks[node_iter->idx];
2109 16936 : node_iter->idx++;
2110 16936 : break;
2111 : }
2112 7018 : case RT_NODE_KIND_16:
2113 : {
2114 7018 : RT_NODE_16 *n16 = (RT_NODE_16 *) (node.local);
2115 :
2116 7018 : if (node_iter->idx >= n16->base.count)
2117 362 : return NULL;
2118 :
2119 6656 : slot = &n16->children[node_iter->idx];
2120 6656 : key_chunk = n16->chunks[node_iter->idx];
2121 6656 : node_iter->idx++;
2122 6656 : break;
2123 : }
2124 188668 : case RT_NODE_KIND_48:
2125 : {
2126 188668 : RT_NODE_48 *n48 = (RT_NODE_48 *) (node.local);
2127 : int chunk;
2128 :
2129 1057498 : for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++)
2130 : {
2131 1053388 : if (RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2132 184558 : break;
2133 : }
2134 :
2135 188668 : if (chunk >= RT_NODE_MAX_SLOTS)
2136 4110 : return NULL;
2137 :
2138 184558 : slot = RT_NODE_48_GET_CHILD(n48, chunk);
2139 :
2140 184558 : key_chunk = chunk;
2141 184558 : node_iter->idx = chunk + 1;
2142 184558 : break;
2143 : }
2144 25192 : case RT_NODE_KIND_256:
2145 : {
2146 25192 : RT_NODE_256 *n256 = (RT_NODE_256 *) (node.local);
2147 : int chunk;
2148 :
2149 33910 : for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++)
2150 : {
2151 33792 : if (RT_NODE_256_IS_CHUNK_USED(n256, chunk))
2152 25074 : break;
2153 : }
2154 :
2155 25192 : if (chunk >= RT_NODE_MAX_SLOTS)
2156 118 : return NULL;
2157 :
2158 25074 : slot = RT_NODE_256_GET_CHILD(n256, chunk);
2159 :
2160 25074 : key_chunk = chunk;
2161 25074 : node_iter->idx = chunk + 1;
2162 25074 : break;
2163 : }
2164 : }
2165 :
2166 : /* Update the key */
2167 233224 : iter->key &= ~(((uint64) RT_CHUNK_MASK) << (level * RT_SPAN));
2168 233224 : iter->key |= (((uint64) key_chunk) << (level * RT_SPAN));
2169 :
2170 233224 : return slot;
2171 : }
2172 :
2173 : /*
2174 : * Return pointer to value and set key_p as long as there is a key. Otherwise
2175 : * return NULL.
2176 : */
2177 : RT_SCOPE RT_VALUE_TYPE *
2178 214360 : RT_ITERATE_NEXT(RT_ITER * iter, uint64 *key_p)
2179 : {
2180 214360 : RT_PTR_ALLOC *slot = NULL;
2181 :
2182 255120 : while (iter->cur_level <= iter->top_level)
2183 : {
2184 : RT_CHILD_PTR node;
2185 :
2186 254054 : slot = RT_NODE_ITERATE_NEXT(iter, iter->cur_level);
2187 :
2188 254054 : if (iter->cur_level == 0 && slot != NULL)
2189 : {
2190 : /* Found a value at the leaf node */
2191 213294 : *key_p = iter->key;
2192 213294 : node.alloc = *slot;
2193 :
2194 213294 : if (RT_CHILDPTR_IS_VALUE(*slot))
2195 213294 : return (RT_VALUE_TYPE *) slot;
2196 : else
2197 : {
2198 22308 : RT_PTR_SET_LOCAL(iter->tree, &node);
2199 22308 : return (RT_VALUE_TYPE *) node.local;
2200 : }
2201 : }
2202 :
2203 40760 : if (slot != NULL)
2204 : {
2205 : /* Found the child slot, move down the tree */
2206 19930 : node.alloc = *slot;
2207 19930 : RT_PTR_SET_LOCAL(iter->tree, &node);
2208 :
2209 19930 : iter->cur_level--;
2210 19930 : iter->node_iters[iter->cur_level].node = node;
2211 19930 : iter->node_iters[iter->cur_level].idx = 0;
2212 : }
2213 : else
2214 : {
2215 : /* Not found the child slot, move up the tree */
2216 20830 : iter->cur_level++;
2217 : }
2218 : }
2219 :
2220 : /* We've visited all nodes, so the iteration finished */
2221 1066 : return NULL;
2222 : }
2223 :
2224 : /*
2225 : * Terminate the iteration. The caller is responsible for releasing any locks.
2226 : */
2227 : RT_SCOPE void
2228 1128 : RT_END_ITERATE(RT_ITER * iter)
2229 : {
2230 1128 : pfree(iter);
2231 1128 : }
2232 :
2233 : /***************** DELETION *****************/
2234 :
2235 : #ifdef RT_USE_DELETE
2236 :
2237 : /* Delete the element at "deletepos" */
2238 : static inline void
2239 44382 : RT_SHIFT_ARRAYS_AND_DELETE(uint8 *chunks, RT_PTR_ALLOC * children, int count, int deletepos)
2240 : {
2241 : /*
2242 : * This is basically a memmove, but written in a simple loop for speed on
2243 : * small inputs.
2244 : */
2245 203522 : for (int i = deletepos; i < count - 1; i++)
2246 : {
2247 : /* workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101481 */
2248 : #ifdef __GNUC__
2249 159140 : __asm__("");
2250 : #endif
2251 159140 : chunks[i] = chunks[i + 1];
2252 159140 : children[i] = children[i + 1];
2253 : }
2254 44382 : }
2255 :
2256 : /*
2257 : * Copy both chunk and slot arrays into the right
2258 : * place. The element at "deletepos" is deleted by skipping it.
2259 : */
2260 : static inline void
2261 4162 : RT_COPY_ARRAYS_AND_DELETE(uint8 *dst_chunks, RT_PTR_ALLOC * dst_children,
2262 : uint8 *src_chunks, RT_PTR_ALLOC * src_children,
2263 : int count, int deletepos)
2264 : {
2265 16648 : for (int i = 0; i < count - 1; i++)
2266 : {
2267 : /*
2268 : * use a branch-free computation to skip the index of the deleted
2269 : * element
2270 : */
2271 12486 : int sourceidx = i + (i >= deletepos);
2272 12486 : int destidx = i;
2273 :
2274 12486 : dst_chunks[destidx] = src_chunks[sourceidx];
2275 12486 : dst_children[destidx] = src_children[sourceidx];
2276 : }
2277 4162 : }
2278 :
2279 : /*
2280 : * Note: While all node-growing functions are called to perform an insertion
2281 : * when no more space is available, shrinking is not a hard-and-fast requirement.
2282 : * When shrinking nodes, we generally wait until the count is about 3/4 of
2283 : * the next lower node's fanout. This prevents ping-ponging between different
2284 : * node sizes.
2285 : *
2286 : * Some shrinking functions delete first and then shrink, either because we
2287 : * must or because it's fast and simple that way. Sometimes it's faster to
2288 : * delete while shrinking.
2289 : */
2290 :
2291 : /*
2292 : * Move contents of a node256 to a node48. Any deletion should have happened
2293 : * in the caller.
2294 : */
2295 : static void pg_noinline
2296 32 : RT_SHRINK_NODE_256(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2297 : {
2298 32 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2299 : RT_CHILD_PTR newnode;
2300 : RT_NODE_48 *new48;
2301 32 : int slot_idx = 0;
2302 :
2303 : /* initialize new node */
2304 32 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_48, RT_CLASS_48);
2305 32 : new48 = (RT_NODE_48 *) newnode.local;
2306 :
2307 : /* copy over the entries */
2308 32 : RT_COPY_COMMON(newnode, node);
2309 8224 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2310 : {
2311 8192 : if (RT_NODE_256_IS_CHUNK_USED(n256, i))
2312 : {
2313 1536 : new48->slot_idxs[i] = slot_idx;
2314 1536 : new48->children[slot_idx] = n256->children[i];
2315 1536 : slot_idx++;
2316 : }
2317 : }
2318 :
2319 : /*
2320 : * Since we just copied a dense array, we can fill "isset" using a single
2321 : * store, provided the length of that array is at most the number of bits
2322 : * in a bitmapword.
2323 : */
2324 : Assert(n256->base.count <= BITS_PER_BITMAPWORD);
2325 32 : new48->isset[0] = (bitmapword) (((uint64) 1 << n256->base.count) - 1);
2326 :
2327 : /* free old node and update reference in parent */
2328 32 : *parent_slot = newnode.alloc;
2329 32 : RT_FREE_NODE(tree, node);
2330 32 : }
2331 :
2332 : static inline void
2333 8976 : RT_REMOVE_CHILD_256(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2334 : {
2335 : int shrink_threshold;
2336 8976 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2337 8976 : int idx = RT_BM_IDX(chunk);
2338 8976 : int bitnum = RT_BM_BIT(chunk);
2339 :
2340 : /* Mark the slot free for "chunk" */
2341 8976 : n256->isset[idx] &= ~((bitmapword) 1 << bitnum);
2342 :
2343 8976 : n256->base.count--;
2344 :
2345 : /*
2346 : * A full node256 will have a count of zero because of overflow, so we
2347 : * delete first before checking the shrink threshold.
2348 : */
2349 : Assert(n256->base.count > 0);
2350 :
2351 : /* This simplifies RT_SHRINK_NODE_256() */
2352 8976 : shrink_threshold = BITS_PER_BITMAPWORD;
2353 8976 : shrink_threshold = Min(RT_FANOUT_48 / 4 * 3, shrink_threshold);
2354 :
2355 8976 : if (n256->base.count <= shrink_threshold)
2356 32 : RT_SHRINK_NODE_256(tree, parent_slot, node, chunk);
2357 8976 : }
2358 :
2359 : /*
2360 : * Move contents of a node48 to a node16. Any deletion should have happened
2361 : * in the caller.
2362 : */
2363 : static void pg_noinline
2364 4030 : RT_SHRINK_NODE_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2365 : {
2366 4030 : RT_NODE_48 *n48 = (RT_NODE_48 *) (node.local);
2367 : RT_CHILD_PTR newnode;
2368 : RT_NODE_16 *new16;
2369 4030 : int destidx = 0;
2370 :
2371 : /*
2372 : * Initialize new node. For now we skip the larger node16 size class for
2373 : * simplicity.
2374 : */
2375 4030 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_LO);
2376 4030 : new16 = (RT_NODE_16 *) newnode.local;
2377 :
2378 : /* copy over all existing entries */
2379 4030 : RT_COPY_COMMON(newnode, node);
2380 1035710 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2381 : {
2382 1031680 : if (n48->slot_idxs[chunk] != RT_INVALID_SLOT_IDX)
2383 : {
2384 48360 : new16->chunks[destidx] = chunk;
2385 48360 : new16->children[destidx] = n48->children[n48->slot_idxs[chunk]];
2386 48360 : destidx++;
2387 : }
2388 : }
2389 :
2390 : Assert(destidx < new16->base.fanout);
2391 :
2392 4030 : RT_VERIFY_NODE((RT_NODE *) new16);
2393 :
2394 : /* free old node and update reference in parent */
2395 4030 : *parent_slot = newnode.alloc;
2396 4030 : RT_FREE_NODE(tree, node);
2397 4030 : }
2398 :
2399 : static inline void
2400 133158 : RT_REMOVE_CHILD_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2401 : {
2402 133158 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
2403 133158 : int deletepos = n48->slot_idxs[chunk];
2404 :
2405 : /* For now we skip the larger node16 size class for simplicity */
2406 133158 : int shrink_threshold = RT_FANOUT_16_LO / 4 * 3;
2407 : int idx;
2408 : int bitnum;
2409 :
2410 : Assert(deletepos != RT_INVALID_SLOT_IDX);
2411 :
2412 133158 : idx = RT_BM_IDX(deletepos);
2413 133158 : bitnum = RT_BM_BIT(deletepos);
2414 133158 : n48->isset[idx] &= ~((bitmapword) 1 << bitnum);
2415 133158 : n48->slot_idxs[chunk] = RT_INVALID_SLOT_IDX;
2416 :
2417 133158 : n48->base.count--;
2418 :
2419 : /*
2420 : * To keep shrinking simple, do it after deleting, which is fast for
2421 : * node48 anyway.
2422 : */
2423 133158 : if (n48->base.count <= shrink_threshold)
2424 4030 : RT_SHRINK_NODE_48(tree, parent_slot, node, chunk);
2425 133158 : }
2426 :
2427 : /*
2428 : * Move contents of a node16 to a node4, and delete the one at "deletepos".
2429 : * By deleting as we move, we can avoid memmove operations in the new
2430 : * node.
2431 : */
2432 : static void pg_noinline
2433 4162 : RT_SHRINK_NODE_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 deletepos)
2434 : {
2435 4162 : RT_NODE_16 *n16 = (RT_NODE_16 *) (node.local);
2436 : RT_CHILD_PTR newnode;
2437 : RT_NODE_4 *new4;
2438 :
2439 : /* initialize new node */
2440 4162 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
2441 4162 : new4 = (RT_NODE_4 *) newnode.local;
2442 :
2443 : /* copy over existing entries, except for the one at "deletepos" */
2444 4162 : RT_COPY_COMMON(newnode, node);
2445 4162 : RT_COPY_ARRAYS_AND_DELETE(new4->chunks, new4->children,
2446 4162 : n16->chunks, n16->children,
2447 4162 : n16->base.count, deletepos);
2448 :
2449 4162 : new4->base.count--;
2450 4162 : RT_VERIFY_NODE((RT_NODE *) new4);
2451 :
2452 : /* free old node and update reference in parent */
2453 4162 : *parent_slot = newnode.alloc;
2454 4162 : RT_FREE_NODE(tree, node);
2455 4162 : }
2456 :
2457 : static inline void
2458 40184 : RT_REMOVE_CHILD_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
2459 : {
2460 40184 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
2461 40184 : int deletepos = slot - n16->children;
2462 :
2463 : /*
2464 : * When shrinking to node4, 4 is hard-coded. After shrinking, the new node
2465 : * will end up with 3 elements and 3 is the largest count where linear
2466 : * search is faster than SIMD, at least on x86-64.
2467 : */
2468 40184 : if (n16->base.count <= 4)
2469 : {
2470 4162 : RT_SHRINK_NODE_16(tree, parent_slot, node, deletepos);
2471 4162 : return;
2472 : }
2473 :
2474 : Assert(deletepos >= 0);
2475 : Assert(n16->chunks[deletepos] == chunk);
2476 :
2477 36022 : RT_SHIFT_ARRAYS_AND_DELETE(n16->chunks, n16->children,
2478 36022 : n16->base.count, deletepos);
2479 36022 : n16->base.count--;
2480 : }
2481 :
2482 : static inline void
2483 39862 : RT_REMOVE_CHILD_4(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
2484 : {
2485 39862 : RT_NODE_4 *n4 = (RT_NODE_4 *) node.local;
2486 :
2487 39862 : if (n4->base.count == 1)
2488 : {
2489 : Assert(n4->chunks[0] == chunk);
2490 :
2491 : /*
2492 : * If we're deleting the last entry from the root child node don't
2493 : * free it, but mark both the tree and the root child node empty. That
2494 : * way, RT_SET can assume it exists.
2495 : */
2496 31502 : if (parent_slot == &tree->ctl->root)
2497 : {
2498 62 : n4->base.count = 0;
2499 62 : tree->ctl->start_shift = 0;
2500 62 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(0);
2501 : }
2502 : else
2503 : {
2504 : /*
2505 : * Deleting last entry, so just free the entire node.
2506 : * RT_DELETE_RECURSIVE has already freed the value and lower-level
2507 : * children.
2508 : */
2509 31440 : RT_FREE_NODE(tree, node);
2510 :
2511 : /*
2512 : * Also null out the parent's slot -- this tells the next higher
2513 : * level to delete its child pointer
2514 : */
2515 31440 : *parent_slot = RT_INVALID_PTR_ALLOC;
2516 : }
2517 : }
2518 : else
2519 : {
2520 8360 : int deletepos = slot - n4->children;
2521 :
2522 : Assert(deletepos >= 0);
2523 : Assert(n4->chunks[deletepos] == chunk);
2524 :
2525 8360 : RT_SHIFT_ARRAYS_AND_DELETE(n4->chunks, n4->children,
2526 8360 : n4->base.count, deletepos);
2527 :
2528 8360 : n4->base.count--;
2529 : }
2530 39862 : }
2531 :
2532 : /*
2533 : * Delete the child pointer corresponding to "key" in the given node.
2534 : */
2535 : static inline void
2536 222180 : RT_NODE_DELETE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
2537 : {
2538 222180 : switch ((node.local)->kind)
2539 : {
2540 39862 : case RT_NODE_KIND_4:
2541 : {
2542 39862 : RT_REMOVE_CHILD_4(tree, parent_slot, node, chunk, slot);
2543 39862 : return;
2544 : }
2545 40184 : case RT_NODE_KIND_16:
2546 : {
2547 40184 : RT_REMOVE_CHILD_16(tree, parent_slot, node, chunk, slot);
2548 40184 : return;
2549 : }
2550 133158 : case RT_NODE_KIND_48:
2551 : {
2552 133158 : RT_REMOVE_CHILD_48(tree, parent_slot, node, chunk);
2553 133158 : return;
2554 : }
2555 8976 : case RT_NODE_KIND_256:
2556 : {
2557 8976 : RT_REMOVE_CHILD_256(tree, parent_slot, node, chunk);
2558 8976 : return;
2559 : }
2560 0 : default:
2561 0 : pg_unreachable();
2562 : }
2563 : }
2564 :
2565 : /* workhorse for RT_DELETE */
2566 : static bool
2567 830264 : RT_DELETE_RECURSIVE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift)
2568 : {
2569 : RT_PTR_ALLOC *slot;
2570 : RT_CHILD_PTR node;
2571 830264 : uint8 chunk = RT_GET_KEY_CHUNK(key, shift);
2572 :
2573 830264 : node.alloc = *parent_slot;
2574 830264 : RT_PTR_SET_LOCAL(tree, &node);
2575 830264 : slot = RT_NODE_SEARCH(node.local, chunk);
2576 :
2577 830264 : if (slot == NULL)
2578 17974 : return false;
2579 :
2580 812290 : if (shift == 0)
2581 : {
2582 190740 : if (!RT_CHILDPTR_IS_VALUE(*slot))
2583 0 : RT_FREE_LEAF(tree, *slot);
2584 :
2585 190740 : RT_NODE_DELETE(tree, parent_slot, node, chunk, slot);
2586 190740 : return true;
2587 : }
2588 : else
2589 : {
2590 : bool deleted;
2591 :
2592 621550 : deleted = RT_DELETE_RECURSIVE(tree, slot, key, shift - RT_SPAN);
2593 :
2594 : /* Child node was freed, so delete its slot now */
2595 621550 : if (*slot == RT_INVALID_PTR_ALLOC)
2596 : {
2597 : Assert(deleted);
2598 31440 : RT_NODE_DELETE(tree, parent_slot, node, chunk, slot);
2599 : }
2600 :
2601 621550 : return deleted;
2602 : }
2603 : }
2604 :
2605 : /*
2606 : * Delete the given key from the radix tree. If the key is found delete it
2607 : * and return true, otherwise do nothing and return false.
2608 : *
2609 : * Taking a lock in exclusive mode is the caller's responsibility.
2610 : */
2611 : RT_SCOPE bool
2612 208714 : RT_DELETE(RT_RADIX_TREE * tree, uint64 key)
2613 : {
2614 : bool deleted;
2615 :
2616 : #ifdef RT_SHMEM
2617 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2618 : #endif
2619 :
2620 208714 : if (key > tree->ctl->max_val)
2621 0 : return false;
2622 :
2623 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2624 208714 : deleted = RT_DELETE_RECURSIVE(tree, &tree->ctl->root,
2625 208714 : key, tree->ctl->start_shift);
2626 :
2627 : /* Found the key to delete. Update the statistics */
2628 208714 : if (deleted)
2629 : {
2630 190740 : tree->ctl->num_keys--;
2631 : Assert(tree->ctl->num_keys >= 0);
2632 : }
2633 :
2634 208714 : return deleted;
2635 : }
2636 :
2637 : #endif /* RT_USE_DELETE */
2638 :
2639 : /***************** UTILITY FUNCTIONS *****************/
2640 :
2641 : /*
2642 : * Return the statistics of the amount of memory used by the radix tree.
2643 : *
2644 : * Since dsa_get_total_size() does appropriate locking, the caller doesn't
2645 : * need to take a lock.
2646 : */
2647 : RT_SCOPE uint64
2648 773092 : RT_MEMORY_USAGE(RT_RADIX_TREE * tree)
2649 : {
2650 773092 : size_t total = 0;
2651 :
2652 : #ifdef RT_SHMEM
2653 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2654 2370 : total = dsa_get_total_size(tree->dsa);
2655 : #else
2656 770722 : total = MemoryContextMemAllocated(tree->leaf_context, true);
2657 : #endif
2658 :
2659 773092 : return total;
2660 : }
2661 :
2662 : /*
2663 : * Perform some sanity checks on the given node.
2664 : */
2665 : static void
2666 230432 : RT_VERIFY_NODE(RT_NODE * node)
2667 : {
2668 : #ifdef USE_ASSERT_CHECKING
2669 :
2670 : switch (node->kind)
2671 : {
2672 : case RT_NODE_KIND_4:
2673 : {
2674 : RT_NODE_4 *n4 = (RT_NODE_4 *) node;
2675 :
2676 : /* RT_DUMP_NODE(node); */
2677 :
2678 : for (int i = 1; i < n4->base.count; i++)
2679 : Assert(n4->chunks[i - 1] < n4->chunks[i]);
2680 :
2681 : break;
2682 : }
2683 : case RT_NODE_KIND_16:
2684 : {
2685 : RT_NODE_16 *n16 = (RT_NODE_16 *) node;
2686 :
2687 : /* RT_DUMP_NODE(node); */
2688 :
2689 : for (int i = 1; i < n16->base.count; i++)
2690 : Assert(n16->chunks[i - 1] < n16->chunks[i]);
2691 :
2692 : break;
2693 : }
2694 : case RT_NODE_KIND_48:
2695 : {
2696 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
2697 : int cnt = 0;
2698 :
2699 : /* RT_DUMP_NODE(node); */
2700 :
2701 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2702 : {
2703 : uint8 slot = n48->slot_idxs[i];
2704 : int idx = RT_BM_IDX(slot);
2705 : int bitnum = RT_BM_BIT(slot);
2706 :
2707 : if (!RT_NODE_48_IS_CHUNK_USED(n48, i))
2708 : continue;
2709 :
2710 : /* Check if the corresponding slot is used */
2711 : Assert(slot < node->fanout);
2712 : Assert((n48->isset[idx] & ((bitmapword) 1 << bitnum)) != 0);
2713 :
2714 : cnt++;
2715 : }
2716 :
2717 : Assert(n48->base.count == cnt);
2718 :
2719 : break;
2720 : }
2721 : case RT_NODE_KIND_256:
2722 : {
2723 : RT_NODE_256 *n256 = (RT_NODE_256 *) node;
2724 : int cnt = 0;
2725 :
2726 : /* RT_DUMP_NODE(node); */
2727 :
2728 : for (int i = 0; i < RT_BM_IDX(RT_NODE_MAX_SLOTS); i++)
2729 : cnt += bmw_popcount(n256->isset[i]);
2730 :
2731 : /*
2732 : * Check if the number of used chunk matches, accounting for
2733 : * overflow
2734 : */
2735 : if (cnt == RT_FANOUT_256)
2736 : Assert(n256->base.count == 0);
2737 : else
2738 : Assert(n256->base.count == cnt);
2739 :
2740 : break;
2741 : }
2742 : }
2743 : #endif
2744 230432 : }
2745 :
2746 : /***************** DEBUG FUNCTIONS *****************/
2747 :
2748 : #ifdef RT_DEBUG
2749 :
2750 : /*
2751 : * Print out tree stats, some of which are only collected in debugging builds.
2752 : */
2753 : RT_SCOPE void
2754 122 : RT_STATS(RT_RADIX_TREE * tree)
2755 : {
2756 122 : fprintf(stderr, "max_val = " UINT64_FORMAT "\n", tree->ctl->max_val);
2757 122 : fprintf(stderr, "num_keys = %lld\n", (long long) tree->ctl->num_keys);
2758 :
2759 : #ifdef RT_SHMEM
2760 : fprintf(stderr, "handle = " DSA_POINTER_FORMAT "\n", tree->ctl->handle);
2761 : #endif
2762 :
2763 122 : fprintf(stderr, "height = %d", tree->ctl->start_shift / RT_SPAN);
2764 :
2765 732 : for (int i = 0; i < RT_NUM_SIZE_CLASSES; i++)
2766 : {
2767 610 : RT_SIZE_CLASS_ELEM size_class = RT_SIZE_CLASS_INFO[i];
2768 :
2769 610 : fprintf(stderr, ", n%d = %lld", size_class.fanout, (long long) tree->ctl->num_nodes[i]);
2770 : }
2771 :
2772 122 : fprintf(stderr, ", leaves = %lld", (long long) tree->ctl->num_leaves);
2773 :
2774 122 : fprintf(stderr, "\n");
2775 122 : }
2776 :
2777 : /*
2778 : * Print out debugging information about the given node.
2779 : */
2780 : static void
2781 : pg_attribute_unused()
2782 0 : RT_DUMP_NODE(RT_NODE * node)
2783 : {
2784 : #ifdef RT_SHMEM
2785 : #define RT_CHILD_PTR_FORMAT DSA_POINTER_FORMAT
2786 : #else
2787 : #define RT_CHILD_PTR_FORMAT "%p"
2788 : #endif
2789 :
2790 0 : fprintf(stderr, "kind %d, fanout %d, count %u\n",
2791 0 : (node->kind == RT_NODE_KIND_4) ? 4 :
2792 0 : (node->kind == RT_NODE_KIND_16) ? 16 :
2793 0 : (node->kind == RT_NODE_KIND_48) ? 48 : 256,
2794 0 : node->fanout == 0 ? 256 : node->fanout,
2795 0 : node->count == 0 ? 256 : node->count);
2796 :
2797 0 : switch (node->kind)
2798 : {
2799 0 : case RT_NODE_KIND_4:
2800 : {
2801 0 : RT_NODE_4 *n4 = (RT_NODE_4 *) node;
2802 :
2803 0 : fprintf(stderr, "chunks and slots:\n");
2804 0 : for (int i = 0; i < n4->base.count; i++)
2805 : {
2806 0 : fprintf(stderr, " [%d] chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2807 0 : i, n4->chunks[i], n4->children[i]);
2808 : }
2809 :
2810 0 : break;
2811 : }
2812 0 : case RT_NODE_KIND_16:
2813 : {
2814 0 : RT_NODE_16 *n16 = (RT_NODE_16 *) node;
2815 :
2816 0 : fprintf(stderr, "chunks and slots:\n");
2817 0 : for (int i = 0; i < n16->base.count; i++)
2818 : {
2819 0 : fprintf(stderr, " [%d] chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2820 0 : i, n16->chunks[i], n16->children[i]);
2821 : }
2822 0 : break;
2823 : }
2824 0 : case RT_NODE_KIND_48:
2825 : {
2826 0 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
2827 0 : char *sep = "";
2828 :
2829 0 : fprintf(stderr, "slot_idxs: \n");
2830 0 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2831 : {
2832 0 : if (!RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2833 0 : continue;
2834 :
2835 0 : fprintf(stderr, " idx[%d] = %d\n",
2836 0 : chunk, n48->slot_idxs[chunk]);
2837 : }
2838 :
2839 0 : fprintf(stderr, "isset-bitmap: ");
2840 0 : for (int i = 0; i < (RT_FANOUT_48_MAX / BITS_PER_BYTE); i++)
2841 : {
2842 0 : fprintf(stderr, "%s%x", sep, ((uint8 *) n48->isset)[i]);
2843 0 : sep = " ";
2844 : }
2845 0 : fprintf(stderr, "\n");
2846 :
2847 0 : fprintf(stderr, "chunks and slots:\n");
2848 0 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2849 : {
2850 0 : if (!RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2851 0 : continue;
2852 :
2853 0 : fprintf(stderr, " chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2854 : chunk,
2855 0 : *RT_NODE_48_GET_CHILD(n48, chunk));
2856 : }
2857 0 : break;
2858 : }
2859 0 : case RT_NODE_KIND_256:
2860 : {
2861 0 : RT_NODE_256 *n256 = (RT_NODE_256 *) node;
2862 0 : char *sep = "";
2863 :
2864 0 : fprintf(stderr, "isset-bitmap: ");
2865 0 : for (int i = 0; i < (RT_FANOUT_256 / BITS_PER_BYTE); i++)
2866 : {
2867 0 : fprintf(stderr, "%s%x", sep, ((uint8 *) n256->isset)[i]);
2868 0 : sep = " ";
2869 : }
2870 0 : fprintf(stderr, "\n");
2871 :
2872 0 : fprintf(stderr, "chunks and slots:\n");
2873 0 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2874 : {
2875 0 : if (!RT_NODE_256_IS_CHUNK_USED(n256, chunk))
2876 0 : continue;
2877 :
2878 0 : fprintf(stderr, " chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2879 : chunk,
2880 0 : *RT_NODE_256_GET_CHILD(n256, chunk));
2881 : }
2882 0 : break;
2883 : }
2884 : }
2885 0 : }
2886 : #endif /* RT_DEBUG */
2887 :
2888 : #endif /* RT_DEFINE */
2889 :
2890 :
2891 : /* undefine external parameters, so next radix tree can be defined */
2892 : #undef RT_PREFIX
2893 : #undef RT_SCOPE
2894 : #undef RT_DECLARE
2895 : #undef RT_DEFINE
2896 : #undef RT_VALUE_TYPE
2897 : #undef RT_VARLEN_VALUE_SIZE
2898 : #undef RT_RUNTIME_EMBEDDABLE_VALUE
2899 : #undef RT_SHMEM
2900 : #undef RT_USE_DELETE
2901 : #undef RT_DEBUG
2902 :
2903 : /* locally declared macros */
2904 : #undef RT_MAKE_PREFIX
2905 : #undef RT_MAKE_NAME
2906 : #undef RT_MAKE_NAME_
2907 : #undef RT_STR
2908 : #undef RT_STR_
2909 : #undef RT_SPAN
2910 : #undef RT_NODE_MAX_SLOTS
2911 : #undef RT_CHUNK_MASK
2912 : #undef RT_MAX_SHIFT
2913 : #undef RT_MAX_LEVEL
2914 : #undef RT_GET_KEY_CHUNK
2915 : #undef RT_BM_IDX
2916 : #undef RT_BM_BIT
2917 : #undef RT_NODE_MUST_GROW
2918 : #undef RT_NODE_KIND_COUNT
2919 : #undef RT_NUM_SIZE_CLASSES
2920 : #undef RT_INVALID_SLOT_IDX
2921 : #undef RT_SLAB_BLOCK_SIZE
2922 : #undef RT_RADIX_TREE_MAGIC
2923 : #undef RT_CHILD_PTR_FORMAT
2924 :
2925 : /* type declarations */
2926 : #undef RT_RADIX_TREE
2927 : #undef RT_RADIX_TREE_CONTROL
2928 : #undef RT_CHILD_PTR
2929 : #undef RT_PTR_ALLOC
2930 : #undef RT_INVALID_PTR_ALLOC
2931 : #undef RT_HANDLE
2932 : #undef RT_ITER
2933 : #undef RT_NODE
2934 : #undef RT_NODE_ITER
2935 : #undef RT_NODE_KIND_4
2936 : #undef RT_NODE_KIND_16
2937 : #undef RT_NODE_KIND_48
2938 : #undef RT_NODE_KIND_256
2939 : #undef RT_NODE_4
2940 : #undef RT_NODE_16
2941 : #undef RT_NODE_48
2942 : #undef RT_NODE_256
2943 : #undef RT_SIZE_CLASS
2944 : #undef RT_SIZE_CLASS_ELEM
2945 : #undef RT_SIZE_CLASS_INFO
2946 : #undef RT_CLASS_4
2947 : #undef RT_CLASS_16_LO
2948 : #undef RT_CLASS_16_HI
2949 : #undef RT_CLASS_48
2950 : #undef RT_CLASS_256
2951 : #undef RT_FANOUT_4
2952 : #undef RT_FANOUT_4_MAX
2953 : #undef RT_FANOUT_16_LO
2954 : #undef RT_FANOUT_16_HI
2955 : #undef RT_FANOUT_16_MAX
2956 : #undef RT_FANOUT_48
2957 : #undef RT_FANOUT_48_MAX
2958 : #undef RT_FANOUT_256
2959 :
2960 : /* function declarations */
2961 : #undef RT_CREATE
2962 : #undef RT_FREE
2963 : #undef RT_ATTACH
2964 : #undef RT_DETACH
2965 : #undef RT_LOCK_EXCLUSIVE
2966 : #undef RT_LOCK_SHARE
2967 : #undef RT_UNLOCK
2968 : #undef RT_GET_HANDLE
2969 : #undef RT_FIND
2970 : #undef RT_SET
2971 : #undef RT_BEGIN_ITERATE
2972 : #undef RT_ITERATE_NEXT
2973 : #undef RT_END_ITERATE
2974 : #undef RT_DELETE
2975 : #undef RT_MEMORY_USAGE
2976 : #undef RT_DUMP_NODE
2977 : #undef RT_STATS
2978 :
2979 : /* internal helper functions */
2980 : #undef RT_GET_VALUE_SIZE
2981 : #undef RT_VALUE_IS_EMBEDDABLE
2982 : #undef RT_CHILDPTR_IS_VALUE
2983 : #undef RT_GET_SLOT_RECURSIVE
2984 : #undef RT_DELETE_RECURSIVE
2985 : #undef RT_ALLOC_NODE
2986 : #undef RT_ALLOC_LEAF
2987 : #undef RT_FREE_NODE
2988 : #undef RT_FREE_LEAF
2989 : #undef RT_FREE_RECURSE
2990 : #undef RT_EXTEND_UP
2991 : #undef RT_EXTEND_DOWN
2992 : #undef RT_COPY_COMMON
2993 : #undef RT_PTR_SET_LOCAL
2994 : #undef RT_PTR_ALLOC_IS_VALID
2995 : #undef RT_NODE_16_SEARCH_EQ
2996 : #undef RT_NODE_4_GET_INSERTPOS
2997 : #undef RT_NODE_16_GET_INSERTPOS
2998 : #undef RT_SHIFT_ARRAYS_FOR_INSERT
2999 : #undef RT_SHIFT_ARRAYS_AND_DELETE
3000 : #undef RT_COPY_ARRAYS_FOR_INSERT
3001 : #undef RT_COPY_ARRAYS_AND_DELETE
3002 : #undef RT_NODE_48_IS_CHUNK_USED
3003 : #undef RT_NODE_48_GET_CHILD
3004 : #undef RT_NODE_256_IS_CHUNK_USED
3005 : #undef RT_NODE_256_GET_CHILD
3006 : #undef RT_KEY_GET_SHIFT
3007 : #undef RT_SHIFT_GET_MAX_VAL
3008 : #undef RT_NODE_SEARCH
3009 : #undef RT_ADD_CHILD_4
3010 : #undef RT_ADD_CHILD_16
3011 : #undef RT_ADD_CHILD_48
3012 : #undef RT_ADD_CHILD_256
3013 : #undef RT_GROW_NODE_4
3014 : #undef RT_GROW_NODE_16
3015 : #undef RT_GROW_NODE_48
3016 : #undef RT_REMOVE_CHILD_4
3017 : #undef RT_REMOVE_CHILD_16
3018 : #undef RT_REMOVE_CHILD_48
3019 : #undef RT_REMOVE_CHILD_256
3020 : #undef RT_SHRINK_NODE_16
3021 : #undef RT_SHRINK_NODE_48
3022 : #undef RT_SHRINK_NODE_256
3023 : #undef RT_NODE_DELETE
3024 : #undef RT_NODE_INSERT
3025 : #undef RT_NODE_ITERATE_NEXT
3026 : #undef RT_VERIFY_NODE
|