Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * radixtree.h
4 : * Template for adaptive radix tree.
5 : *
6 : * A template to generate an "adaptive radix tree", specialized for value
7 : * types and for local/shared memory.
8 : *
9 : * The concept originates from the paper "The Adaptive Radix Tree: ARTful
10 : * Indexing for Main-Memory Databases" by Viktor Leis, Alfons Kemper,
11 : * and Thomas Neumann, 2013.
12 : *
13 : * Radix trees have some advantages over hash tables:
14 : * - The keys are logically ordered, allowing efficient sorted iteration
15 : * and range queries
16 : * - Operations using keys that are lexicographically close together
17 : * will have favorable memory locality
18 : * - Memory use grows gradually rather than by doubling
19 : * - The key does not need to be stored with the value, since the key
20 : * is implicitly contained in the path to the value
21 : *
22 : * Some disadvantages are:
23 : * - Point queries (along with insertion and deletion) are slower than
24 : * a linear probing hash table as in simplehash.h
25 : * - Memory usage varies by key distribution, so is difficult to predict
26 : *
27 : * A classic radix tree consists of nodes, each containing an array of
28 : * pointers to child nodes. The size of the array is determined by the
29 : * "span" of the tree, which is the number of bits of the key used to
30 : * index into the array. For example, with a span of 6, a "chunk"
31 : * of 6 bits is extracted from the key at each node traversal, and
32 : * the arrays thus have a "fanout" of 2^6 or 64 entries. A large span
33 : * allows a shorter tree, but requires larger arrays that may be mostly
34 : * wasted space.
35 : *
36 : * The key idea of the adaptive radix tree is to choose different
37 : * data structures based on the number of child nodes. A node will
38 : * start out small when it is first populated, and when it is full,
39 : * it is replaced by the next larger size. Conversely, when a node
40 : * becomes mostly empty, it is replaced by the next smaller node. The
41 : * bulk of the code complexity in this module stems from this dynamic
42 : * switching. One mitigating factor is using a span of 8, since bytes
43 : * are directly addressable.
44 : *
45 : * The ART paper mentions three ways to implement leaves:
46 : *
47 : * "- Single-value leaves: The values are stored using an addi-
48 : * tional leaf node type which stores one value.
49 : * - Multi-value leaves: The values are stored in one of four
50 : * different leaf node types, which mirror the structure of
51 : * inner nodes, but contain values instead of pointers.
52 : * - Combined pointer/value slots: If values fit into point-
53 : * ers, no separate node types are necessary. Instead, each
54 : * pointer storage location in an inner node can either
55 : * store a pointer or a value."
56 : *
57 : * We use a form of "combined pointer/value slots", as recommended. Values
58 : * of size (if fixed at compile time) equal or smaller than the platform's
59 : * pointer type are stored in the child slots of the last level node,
60 : * while larger values are the same as "single-value" leaves above. This
61 : * offers flexibility and efficiency. Variable-length types are currently
62 : * treated as single-value leaves for simplicity, but future work may
63 : * allow those to be stored in the child pointer arrays, when they're
64 : * small enough.
65 : *
66 : * There are two other techniques described in the paper that are not
67 : * implemented here:
68 : * - path compression "...removes all inner nodes that have only a single child."
69 : * - lazy path expansion "...inner nodes are only created if they are required
70 : * to distinguish at least two leaf nodes."
71 : *
72 : * We do have a form of "poor man's path compression", however, enabled by
73 : * only supporting unsigned integer keys (for now assumed to be 64-bit):
74 : * A tree doesn't contain paths where the highest bytes of all keys are
75 : * zero. That way, the tree's height adapts to the distribution of keys.
76 : *
77 : * To handle concurrency, we use a single reader-writer lock for the
78 : * radix tree. If concurrent write operations are possible, the tree
79 : * must be exclusively locked during write operations such as RT_SET()
80 : * and RT_DELETE(), and share locked during read operations such as
81 : * RT_FIND() and RT_BEGIN_ITERATE().
82 : *
83 : * TODO: The current locking mechanism is not optimized for high
84 : * concurrency with mixed read-write workloads. In the future it might
85 : * be worthwhile to replace it with the Optimistic Lock Coupling or
86 : * ROWEX mentioned in the paper "The ART of Practical Synchronization"
87 : * by the same authors as the ART paper, 2016.
88 : *
89 : * To generate a radix tree and associated functions for a use case
90 : * several macros have to be #define'ed before this file is included.
91 : * Including the file #undef's all those, so a new radix tree can be
92 : * generated afterwards.
93 : *
94 : * The relevant parameters are:
95 : * - RT_PREFIX - prefix for all symbol names generated. A prefix of "foo"
96 : * will result in radix tree type "foo_radix_tree" and functions like
97 : * "foo_create"/"foo_free" and so forth.
98 : * - RT_DECLARE - if defined function prototypes and type declarations are
99 : * generated
100 : * - RT_DEFINE - if defined function definitions are generated
101 : * - RT_SCOPE - in which scope (e.g. extern, static inline) do function
102 : * declarations reside
103 : * - RT_VALUE_TYPE - the type of the value.
104 : * - RT_VARLEN_VALUE_SIZE() - for variable length values, an expression
105 : * involving a pointer to the value type, to calculate size.
106 : * NOTE: implies that the value is in fact variable-length,
107 : * so do not set for fixed-length values.
108 : * - RT_RUNTIME_EMBEDDABLE_VALUE - for variable length values, allows
109 : * storing the value in a child pointer slot, rather than as a single-
110 : * value leaf, if small enough. This requires that the value, when
111 : * read as a child pointer, can be tagged in the lowest bit.
112 : *
113 : * Optional parameters:
114 : * - RT_SHMEM - if defined, the radix tree is created in the DSA area
115 : * so that multiple processes can access it simultaneously.
116 : * - RT_DEBUG - if defined add stats tracking and debugging functions
117 : *
118 : * Interface
119 : * ---------
120 : *
121 : * RT_CREATE - Create a new, empty radix tree
122 : * RT_FREE - Free the radix tree
123 : * RT_FIND - Lookup the value for a given key
124 : * RT_SET - Set a key-value pair
125 : * RT_BEGIN_ITERATE - Begin iterating through all key-value pairs
126 : * RT_ITERATE_NEXT - Return next key-value pair, if any
127 : * RT_END_ITERATE - End iteration
128 : * RT_MEMORY_USAGE - Get the memory as measured by space in memory context blocks
129 : *
130 : * Interface for Shared Memory
131 : * ---------
132 : *
133 : * RT_ATTACH - Attach to the radix tree
134 : * RT_DETACH - Detach from the radix tree
135 : * RT_LOCK_EXCLUSIVE - Lock the radix tree in exclusive mode
136 : * RT_LOCK_SHARE - Lock the radix tree in share mode
137 : * RT_UNLOCK - Unlock the radix tree
138 : * RT_GET_HANDLE - Return the handle of the radix tree
139 : *
140 : * Optional Interface
141 : * ---------
142 : *
143 : * RT_DELETE - Delete a key-value pair. Declared/defined if RT_USE_DELETE is defined
144 : *
145 : *
146 : * Copyright (c) 2024, PostgreSQL Global Development Group
147 : *
148 : * IDENTIFICATION
149 : * src/include/lib/radixtree.h
150 : *
151 : *-------------------------------------------------------------------------
152 : */
153 :
154 : #include "postgres.h"
155 :
156 : #include "nodes/bitmapset.h"
157 : #include "port/pg_bitutils.h"
158 : #include "port/simd.h"
159 : #include "utils/dsa.h"
160 : #include "utils/memutils.h"
161 :
162 : /* helpers */
163 : #define RT_MAKE_PREFIX(a) CppConcat(a,_)
164 : #define RT_MAKE_NAME(name) RT_MAKE_NAME_(RT_MAKE_PREFIX(RT_PREFIX),name)
165 : #define RT_MAKE_NAME_(a,b) CppConcat(a,b)
166 : /*
167 : * stringify a macro constant, from https://gcc.gnu.org/onlinedocs/cpp/Stringizing.html
168 : */
169 : #define RT_STR(s) RT_STR_(s)
170 : #define RT_STR_(s) #s
171 :
172 : /* function declarations */
173 : #define RT_CREATE RT_MAKE_NAME(create)
174 : #define RT_FREE RT_MAKE_NAME(free)
175 : #define RT_FIND RT_MAKE_NAME(find)
176 : #ifdef RT_SHMEM
177 : #define RT_ATTACH RT_MAKE_NAME(attach)
178 : #define RT_DETACH RT_MAKE_NAME(detach)
179 : #define RT_GET_HANDLE RT_MAKE_NAME(get_handle)
180 : #define RT_LOCK_EXCLUSIVE RT_MAKE_NAME(lock_exclusive)
181 : #define RT_LOCK_SHARE RT_MAKE_NAME(lock_share)
182 : #define RT_UNLOCK RT_MAKE_NAME(unlock)
183 : #endif
184 : #define RT_SET RT_MAKE_NAME(set)
185 : #define RT_BEGIN_ITERATE RT_MAKE_NAME(begin_iterate)
186 : #define RT_ITERATE_NEXT RT_MAKE_NAME(iterate_next)
187 : #define RT_END_ITERATE RT_MAKE_NAME(end_iterate)
188 : #ifdef RT_USE_DELETE
189 : #define RT_DELETE RT_MAKE_NAME(delete)
190 : #endif
191 : #define RT_MEMORY_USAGE RT_MAKE_NAME(memory_usage)
192 : #define RT_DUMP_NODE RT_MAKE_NAME(dump_node)
193 : #define RT_STATS RT_MAKE_NAME(stats)
194 :
195 : /* internal helper functions (no externally visible prototypes) */
196 : #define RT_CHILDPTR_IS_VALUE RT_MAKE_NAME(childptr_is_value)
197 : #define RT_VALUE_IS_EMBEDDABLE RT_MAKE_NAME(value_is_embeddable)
198 : #define RT_GET_SLOT_RECURSIVE RT_MAKE_NAME(get_slot_recursive)
199 : #define RT_DELETE_RECURSIVE RT_MAKE_NAME(delete_recursive)
200 : #define RT_ALLOC_NODE RT_MAKE_NAME(alloc_node)
201 : #define RT_ALLOC_LEAF RT_MAKE_NAME(alloc_leaf)
202 : #define RT_FREE_NODE RT_MAKE_NAME(free_node)
203 : #define RT_FREE_LEAF RT_MAKE_NAME(free_leaf)
204 : #define RT_FREE_RECURSE RT_MAKE_NAME(free_recurse)
205 : #define RT_EXTEND_UP RT_MAKE_NAME(extend_up)
206 : #define RT_EXTEND_DOWN RT_MAKE_NAME(extend_down)
207 : #define RT_COPY_COMMON RT_MAKE_NAME(copy_common)
208 : #define RT_PTR_SET_LOCAL RT_MAKE_NAME(ptr_set_local)
209 : #define RT_NODE_16_SEARCH_EQ RT_MAKE_NAME(node_16_search_eq)
210 : #define RT_NODE_4_GET_INSERTPOS RT_MAKE_NAME(node_4_get_insertpos)
211 : #define RT_NODE_16_GET_INSERTPOS RT_MAKE_NAME(node_16_get_insertpos)
212 : #define RT_SHIFT_ARRAYS_FOR_INSERT RT_MAKE_NAME(shift_arrays_for_insert)
213 : #define RT_SHIFT_ARRAYS_AND_DELETE RT_MAKE_NAME(shift_arrays_and_delete)
214 : #define RT_COPY_ARRAYS_FOR_INSERT RT_MAKE_NAME(copy_arrays_for_insert)
215 : #define RT_COPY_ARRAYS_AND_DELETE RT_MAKE_NAME(copy_arrays_and_delete)
216 : #define RT_NODE_48_IS_CHUNK_USED RT_MAKE_NAME(node_48_is_chunk_used)
217 : #define RT_NODE_48_GET_CHILD RT_MAKE_NAME(node_48_get_child)
218 : #define RT_NODE_256_IS_CHUNK_USED RT_MAKE_NAME(node_256_is_chunk_used)
219 : #define RT_NODE_256_GET_CHILD RT_MAKE_NAME(node_256_get_child)
220 : #define RT_KEY_GET_SHIFT RT_MAKE_NAME(key_get_shift)
221 : #define RT_SHIFT_GET_MAX_VAL RT_MAKE_NAME(shift_get_max_val)
222 : #define RT_NODE_SEARCH RT_MAKE_NAME(node_search)
223 : #define RT_NODE_DELETE RT_MAKE_NAME(node_delete)
224 : #define RT_NODE_INSERT RT_MAKE_NAME(node_insert)
225 : #define RT_ADD_CHILD_4 RT_MAKE_NAME(add_child_4)
226 : #define RT_ADD_CHILD_16 RT_MAKE_NAME(add_child_16)
227 : #define RT_ADD_CHILD_48 RT_MAKE_NAME(add_child_48)
228 : #define RT_ADD_CHILD_256 RT_MAKE_NAME(add_child_256)
229 : #define RT_GROW_NODE_4 RT_MAKE_NAME(grow_node_4)
230 : #define RT_GROW_NODE_16 RT_MAKE_NAME(grow_node_16)
231 : #define RT_GROW_NODE_48 RT_MAKE_NAME(grow_node_48)
232 : #define RT_REMOVE_CHILD_4 RT_MAKE_NAME(remove_child_4)
233 : #define RT_REMOVE_CHILD_16 RT_MAKE_NAME(remove_child_16)
234 : #define RT_REMOVE_CHILD_48 RT_MAKE_NAME(remove_child_48)
235 : #define RT_REMOVE_CHILD_256 RT_MAKE_NAME(remove_child_256)
236 : #define RT_SHRINK_NODE_16 RT_MAKE_NAME(shrink_child_16)
237 : #define RT_SHRINK_NODE_48 RT_MAKE_NAME(shrink_child_48)
238 : #define RT_SHRINK_NODE_256 RT_MAKE_NAME(shrink_child_256)
239 : #define RT_NODE_ITERATE_NEXT RT_MAKE_NAME(node_iterate_next)
240 : #define RT_VERIFY_NODE RT_MAKE_NAME(verify_node)
241 :
242 : /* type declarations */
243 : #define RT_RADIX_TREE RT_MAKE_NAME(radix_tree)
244 : #define RT_RADIX_TREE_CONTROL RT_MAKE_NAME(radix_tree_control)
245 : #define RT_ITER RT_MAKE_NAME(iter)
246 : #ifdef RT_SHMEM
247 : #define RT_HANDLE RT_MAKE_NAME(handle)
248 : #endif
249 : #define RT_NODE RT_MAKE_NAME(node)
250 : #define RT_CHILD_PTR RT_MAKE_NAME(child_ptr)
251 : #define RT_NODE_ITER RT_MAKE_NAME(node_iter)
252 : #define RT_NODE_4 RT_MAKE_NAME(node_4)
253 : #define RT_NODE_16 RT_MAKE_NAME(node_16)
254 : #define RT_NODE_48 RT_MAKE_NAME(node_48)
255 : #define RT_NODE_256 RT_MAKE_NAME(node_256)
256 : #define RT_SIZE_CLASS RT_MAKE_NAME(size_class)
257 : #define RT_SIZE_CLASS_ELEM RT_MAKE_NAME(size_class_elem)
258 : #define RT_SIZE_CLASS_INFO RT_MAKE_NAME(size_class_info)
259 : #define RT_CLASS_4 RT_MAKE_NAME(class_4)
260 : #define RT_CLASS_16_LO RT_MAKE_NAME(class_32_min)
261 : #define RT_CLASS_16_HI RT_MAKE_NAME(class_32_max)
262 : #define RT_CLASS_48 RT_MAKE_NAME(class_48)
263 : #define RT_CLASS_256 RT_MAKE_NAME(class_256)
264 :
265 : /* generate forward declarations necessary to use the radix tree */
266 : #ifdef RT_DECLARE
267 :
268 : typedef struct RT_RADIX_TREE RT_RADIX_TREE;
269 : typedef struct RT_ITER RT_ITER;
270 :
271 : #ifdef RT_SHMEM
272 : typedef dsa_pointer RT_HANDLE;
273 : #endif
274 :
275 : #ifdef RT_SHMEM
276 : RT_SCOPE RT_RADIX_TREE *RT_CREATE(MemoryContext ctx, dsa_area *dsa, int tranche_id);
277 : RT_SCOPE RT_RADIX_TREE *RT_ATTACH(dsa_area *dsa, dsa_pointer dp);
278 : RT_SCOPE void RT_DETACH(RT_RADIX_TREE * tree);
279 : RT_SCOPE RT_HANDLE RT_GET_HANDLE(RT_RADIX_TREE * tree);
280 : RT_SCOPE void RT_LOCK_EXCLUSIVE(RT_RADIX_TREE * tree);
281 : RT_SCOPE void RT_LOCK_SHARE(RT_RADIX_TREE * tree);
282 : RT_SCOPE void RT_UNLOCK(RT_RADIX_TREE * tree);
283 : #else
284 : RT_SCOPE RT_RADIX_TREE *RT_CREATE(MemoryContext ctx);
285 : #endif
286 : RT_SCOPE void RT_FREE(RT_RADIX_TREE * tree);
287 :
288 : RT_SCOPE RT_VALUE_TYPE *RT_FIND(RT_RADIX_TREE * tree, uint64 key);
289 : RT_SCOPE bool RT_SET(RT_RADIX_TREE * tree, uint64 key, RT_VALUE_TYPE * value_p);
290 :
291 : #ifdef RT_USE_DELETE
292 : RT_SCOPE bool RT_DELETE(RT_RADIX_TREE * tree, uint64 key);
293 : #endif
294 :
295 : RT_SCOPE RT_ITER *RT_BEGIN_ITERATE(RT_RADIX_TREE * tree);
296 : RT_SCOPE RT_VALUE_TYPE *RT_ITERATE_NEXT(RT_ITER * iter, uint64 *key_p);
297 : RT_SCOPE void RT_END_ITERATE(RT_ITER * iter);
298 :
299 : RT_SCOPE uint64 RT_MEMORY_USAGE(RT_RADIX_TREE * tree);
300 :
301 : #ifdef RT_DEBUG
302 : RT_SCOPE void RT_STATS(RT_RADIX_TREE * tree);
303 : #endif
304 :
305 : #endif /* RT_DECLARE */
306 :
307 :
308 : /* generate implementation of the radix tree */
309 : #ifdef RT_DEFINE
310 :
311 : /* The number of bits encoded in one tree level */
312 : #define RT_SPAN BITS_PER_BYTE
313 :
314 : /*
315 : * The number of possible partial keys, and thus the maximum number of
316 : * child pointers, for a node.
317 : */
318 : #define RT_NODE_MAX_SLOTS (1 << RT_SPAN)
319 :
320 : /* Mask for extracting a chunk from a key */
321 : #define RT_CHUNK_MASK ((1 << RT_SPAN) - 1)
322 :
323 : /* Maximum shift needed to extract a chunk from a key */
324 : #define RT_MAX_SHIFT RT_KEY_GET_SHIFT(UINT64_MAX)
325 :
326 : /* Maximum level a tree can reach for a key */
327 : #define RT_MAX_LEVEL ((sizeof(uint64) * BITS_PER_BYTE) / RT_SPAN)
328 :
329 : /* Get a chunk from the key */
330 : #define RT_GET_KEY_CHUNK(key, shift) ((uint8) (((key) >> (shift)) & RT_CHUNK_MASK))
331 :
332 : /* For accessing bitmaps */
333 : #define RT_BM_IDX(x) ((x) / BITS_PER_BITMAPWORD)
334 : #define RT_BM_BIT(x) ((x) % BITS_PER_BITMAPWORD)
335 :
336 : /*
337 : * Node kinds
338 : *
339 : * The different node kinds are what make the tree "adaptive".
340 : *
341 : * Each node kind is associated with a different datatype and different
342 : * search/set/delete/iterate algorithms adapted for its size. The largest
343 : * kind, node256 is basically the same as a traditional radix tree,
344 : * and would be most wasteful of memory when sparsely populated. The
345 : * smaller nodes expend some additional CPU time to enable a smaller
346 : * memory footprint.
347 : *
348 : * NOTE: There are 4 node kinds, and this should never be increased,
349 : * for several reasons:
350 : * 1. With 5 or more kinds, gcc tends to use a jump table for switch
351 : * statements.
352 : * 2. The 4 kinds can be represented with 2 bits, so we have the option
353 : * in the future to tag the node pointer with the kind, even on
354 : * platforms with 32-bit pointers. That would touch fewer cache lines
355 : * during traversal and allow faster recovery from branch mispredicts.
356 : * 3. We can have multiple size classes per node kind.
357 : */
358 : #define RT_NODE_KIND_4 0x00
359 : #define RT_NODE_KIND_16 0x01
360 : #define RT_NODE_KIND_48 0x02
361 : #define RT_NODE_KIND_256 0x03
362 : #define RT_NODE_KIND_COUNT 4
363 :
364 : /*
365 : * Calculate the slab block size so that we can allocate at least 32 chunks
366 : * from the block.
367 : */
368 : #define RT_SLAB_BLOCK_SIZE(size) \
369 : Max(SLAB_DEFAULT_BLOCK_SIZE, pg_nextpower2_32(size * 32))
370 :
371 : /* Common header for all nodes */
372 : typedef struct RT_NODE
373 : {
374 : /* Node kind, one per search/set algorithm */
375 : uint8 kind;
376 :
377 : /*
378 : * Max capacity for the current size class. Storing this in the node
379 : * enables multiple size classes per node kind. uint8 is sufficient for
380 : * all node kinds, because we only use this number to test if the node
381 : * needs to grow. Since node256 never needs to grow, we let this overflow
382 : * to zero.
383 : */
384 : uint8 fanout;
385 :
386 : /*
387 : * Number of children. uint8 is sufficient for all node kinds, because
388 : * nodes shrink when this number gets lower than some threshold. Since
389 : * node256 cannot possibly have zero children, we let the counter overflow
390 : * and we interpret zero as "256" for this node kind.
391 : */
392 : uint8 count;
393 : } RT_NODE;
394 :
395 :
396 : /* pointer returned by allocation */
397 : #ifdef RT_SHMEM
398 : #define RT_PTR_ALLOC dsa_pointer
399 : #define RT_INVALID_PTR_ALLOC InvalidDsaPointer
400 : #define RT_PTR_ALLOC_IS_VALID(ptr) DsaPointerIsValid(ptr)
401 : #else
402 : #define RT_PTR_ALLOC RT_NODE *
403 : #define RT_INVALID_PTR_ALLOC NULL
404 : #define RT_PTR_ALLOC_IS_VALID(ptr) PointerIsValid(ptr)
405 : #endif
406 :
407 : /*
408 : * A convenience type used when we need to work with a DSA pointer as well
409 : * as its local pointer. For local memory, both members are the same, so
410 : * we use a union.
411 : */
412 : #ifdef RT_SHMEM
413 : typedef struct RT_CHILD_PTR
414 : #else
415 : typedef union RT_CHILD_PTR
416 : #endif
417 : {
418 : RT_PTR_ALLOC alloc;
419 : RT_NODE *local;
420 : } RT_CHILD_PTR;
421 :
422 :
423 : /*
424 : * Helper macros and functions for value storage.
425 : * We either embed values in the child slots of the last level
426 : * node or store pointers to values to the child slots,
427 : * depending on the value size.
428 : */
429 :
430 : #ifdef RT_VARLEN_VALUE_SIZE
431 : #define RT_GET_VALUE_SIZE(v) RT_VARLEN_VALUE_SIZE(v)
432 : #else
433 : #define RT_GET_VALUE_SIZE(v) sizeof(RT_VALUE_TYPE)
434 : #endif
435 :
436 : /*
437 : * Return true if the value can be stored in the child array
438 : * of the lowest-level node, false otherwise.
439 : */
440 : static inline bool
441 242040 : RT_VALUE_IS_EMBEDDABLE(RT_VALUE_TYPE * value_p)
442 : {
443 : #ifdef RT_VARLEN_VALUE_SIZE
444 :
445 : #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
446 28972 : return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
447 : #else
448 : return false;
449 : #endif
450 :
451 : #else
452 213068 : return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
453 : #endif
454 : }
455 :
456 : /*
457 : * Return true if the child pointer contains the value, false
458 : * if the child pointer is a leaf pointer.
459 : */
460 : static inline bool
461 8940280 : RT_CHILDPTR_IS_VALUE(RT_PTR_ALLOC child)
462 : {
463 : #ifdef RT_VARLEN_VALUE_SIZE
464 :
465 : #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
466 : /* check for pointer tag */
467 : #ifdef RT_SHMEM
468 813746 : return child & 1;
469 : #else
470 7518358 : return ((uintptr_t) child) & 1;
471 : #endif
472 :
473 : #else
474 : return false;
475 : #endif
476 :
477 : #else
478 608176 : return sizeof(RT_VALUE_TYPE) <= sizeof(RT_PTR_ALLOC);
479 : #endif
480 : }
481 :
482 : /*
483 : * Symbols for maximum possible fanout are declared first as they are
484 : * required to declare each node kind. The declarations of other fanout
485 : * values are followed as they need the struct sizes of each node kind.
486 : */
487 :
488 : /* max possible key chunks without struct padding */
489 : #define RT_FANOUT_4_MAX (8 - sizeof(RT_NODE))
490 :
491 : /* equal to two 128-bit SIMD registers, regardless of availability */
492 : #define RT_FANOUT_16_MAX 32
493 :
494 : /*
495 : * This also determines the number of bits necessary for the isset array,
496 : * so we need to be mindful of the size of bitmapword. Since bitmapword
497 : * can be 64 bits, the only values that make sense here are 64 and 128.
498 : * The ART paper uses at most 64 for this node kind, and one advantage
499 : * for us is that "isset" is a single bitmapword on most platforms,
500 : * rather than an array, allowing the compiler to get rid of loops.
501 : */
502 : #define RT_FANOUT_48_MAX 64
503 :
504 : #define RT_FANOUT_256 RT_NODE_MAX_SLOTS
505 :
506 : /*
507 : * Node structs, one for each "kind"
508 : */
509 :
510 : /*
511 : * node4 and node16 use one array for key chunks and another
512 : * array of the same length for children. The keys and children
513 : * are stored at corresponding positions, sorted by chunk.
514 : */
515 :
516 : typedef struct RT_NODE_4
517 : {
518 : RT_NODE base;
519 :
520 : uint8 chunks[RT_FANOUT_4_MAX];
521 :
522 : /* number of children depends on size class */
523 : RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
524 : } RT_NODE_4;
525 :
526 : typedef struct RT_NODE_16
527 : {
528 : RT_NODE base;
529 :
530 : uint8 chunks[RT_FANOUT_16_MAX];
531 :
532 : /* number of children depends on size class */
533 : RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
534 : } RT_NODE_16;
535 :
536 : /*
537 : * node48 uses a 256-element array indexed by key chunks. This array
538 : * stores indexes into a second array containing the children.
539 : */
540 : typedef struct RT_NODE_48
541 : {
542 : RT_NODE base;
543 :
544 : /* bitmap to track which slots are in use */
545 : bitmapword isset[RT_BM_IDX(RT_FANOUT_48_MAX)];
546 :
547 : /*
548 : * Lookup table for indexes into the children[] array. We make this the
549 : * last fixed-size member so that it's convenient to memset separately
550 : * from the previous members.
551 : */
552 : uint8 slot_idxs[RT_NODE_MAX_SLOTS];
553 :
554 : /* Invalid index */
555 : #define RT_INVALID_SLOT_IDX 0xFF
556 :
557 : /* number of children depends on size class */
558 : RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
559 : } RT_NODE_48;
560 :
561 : /*
562 : * node256 is the largest node type. This node has an array of
563 : * children directly indexed by chunk. Unlike other node kinds,
564 : * its array size is by definition fixed.
565 : */
566 : typedef struct RT_NODE_256
567 : {
568 : RT_NODE base;
569 :
570 : /* bitmap to track which slots are in use */
571 : bitmapword isset[RT_BM_IDX(RT_FANOUT_256)];
572 :
573 : /* slots for 256 children */
574 : RT_PTR_ALLOC children[RT_FANOUT_256];
575 : } RT_NODE_256;
576 :
577 : #if defined(RT_SHMEM)
578 : /*
579 : * Make sure the all nodes (except for node256) fit neatly into a DSA
580 : * size class. We assume the RT_FANOUT_4 is in the range where DSA size
581 : * classes increment by 8 (as of PG17 up to 64 bytes), so we just hard
582 : * code that one.
583 : */
584 :
585 : #if SIZEOF_DSA_POINTER < 8
586 : #define RT_FANOUT_16_LO ((96 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
587 : #define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
588 : #define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (512 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC))
589 : #else
590 : #define RT_FANOUT_16_LO ((160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
591 : #define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (320 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
592 : #define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (768 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC))
593 : #endif /* SIZEOF_DSA_POINTER < 8 */
594 :
595 : #else /* ! RT_SHMEM */
596 :
597 : /* doesn't really matter, but may as well use the namesake */
598 : #define RT_FANOUT_16_LO 16
599 : /* use maximum possible */
600 : #define RT_FANOUT_16_HI RT_FANOUT_16_MAX
601 : #define RT_FANOUT_48 RT_FANOUT_48_MAX
602 :
603 : #endif /* RT_SHMEM */
604 :
605 : /*
606 : * To save memory in trees with sparse keys, it would make sense to have two
607 : * size classes for the smallest kind (perhaps a high class of 5 and a low class
608 : * of 2), but it would be more effective to utilize lazy expansion and
609 : * path compression.
610 : */
611 : #define RT_FANOUT_4 4
612 :
613 : StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_MAX, "watch struct padding");
614 : StaticAssertDecl(RT_FANOUT_16_LO < RT_FANOUT_16_HI, "LO subclass bigger than HI");
615 : StaticAssertDecl(RT_FANOUT_48 <= RT_FANOUT_48_MAX, "more slots than isset bits");
616 :
617 : /*
618 : * Node size classes
619 : *
620 : * Nodes of different kinds necessarily belong to different size classes.
621 : * One innovation in our implementation compared to the ART paper is
622 : * decoupling the notion of size class from kind.
623 : *
624 : * The size classes within a given node kind have the same underlying
625 : * type, but a variable number of children/values. This is possible
626 : * because each type (except node256) contains metadata that work the
627 : * same way regardless of how many child slots there are. The nodes
628 : * can introspect their allocated capacity at runtime.
629 : */
630 : typedef enum RT_SIZE_CLASS
631 : {
632 : RT_CLASS_4 = 0,
633 : RT_CLASS_16_LO,
634 : RT_CLASS_16_HI,
635 : RT_CLASS_48,
636 : RT_CLASS_256
637 : } RT_SIZE_CLASS;
638 :
639 : /* Information for each size class */
640 : typedef struct RT_SIZE_CLASS_ELEM
641 : {
642 : const char *name;
643 : int fanout;
644 : size_t allocsize;
645 : } RT_SIZE_CLASS_ELEM;
646 :
647 :
648 : static const RT_SIZE_CLASS_ELEM RT_SIZE_CLASS_INFO[] = {
649 : [RT_CLASS_4] = {
650 : .name = RT_STR(RT_PREFIX) "_radix_tree node4",
651 : .fanout = RT_FANOUT_4,
652 : .allocsize = sizeof(RT_NODE_4) + RT_FANOUT_4 * sizeof(RT_PTR_ALLOC),
653 : },
654 : [RT_CLASS_16_LO] = {
655 : .name = RT_STR(RT_PREFIX) "_radix_tree node16_lo",
656 : .fanout = RT_FANOUT_16_LO,
657 : .allocsize = sizeof(RT_NODE_16) + RT_FANOUT_16_LO * sizeof(RT_PTR_ALLOC),
658 : },
659 : [RT_CLASS_16_HI] = {
660 : .name = RT_STR(RT_PREFIX) "_radix_tree node16_hi",
661 : .fanout = RT_FANOUT_16_HI,
662 : .allocsize = sizeof(RT_NODE_16) + RT_FANOUT_16_HI * sizeof(RT_PTR_ALLOC),
663 : },
664 : [RT_CLASS_48] = {
665 : .name = RT_STR(RT_PREFIX) "_radix_tree node48",
666 : .fanout = RT_FANOUT_48,
667 : .allocsize = sizeof(RT_NODE_48) + RT_FANOUT_48 * sizeof(RT_PTR_ALLOC),
668 : },
669 : [RT_CLASS_256] = {
670 : .name = RT_STR(RT_PREFIX) "_radix_tree node256",
671 : .fanout = RT_FANOUT_256,
672 : .allocsize = sizeof(RT_NODE_256),
673 : },
674 : };
675 :
676 : #define RT_NUM_SIZE_CLASSES lengthof(RT_SIZE_CLASS_INFO)
677 :
678 : #ifdef RT_SHMEM
679 : /* A magic value used to identify our radix tree */
680 : #define RT_RADIX_TREE_MAGIC 0x54A48167
681 : #endif
682 :
683 : /* Contains the actual tree, plus ancillary info */
684 : typedef struct RT_RADIX_TREE_CONTROL
685 : {
686 : #ifdef RT_SHMEM
687 : RT_HANDLE handle;
688 : uint32 magic;
689 : LWLock lock;
690 : #endif
691 :
692 : RT_PTR_ALLOC root;
693 : uint64 max_val;
694 : int64 num_keys;
695 : int start_shift;
696 :
697 : /* statistics */
698 : #ifdef RT_DEBUG
699 : int64 num_nodes[RT_NUM_SIZE_CLASSES];
700 : int64 num_leaves;
701 : #endif
702 : } RT_RADIX_TREE_CONTROL;
703 :
704 : /* Entry point for allocating and accessing the tree */
705 : struct RT_RADIX_TREE
706 : {
707 : MemoryContext context;
708 :
709 : /* pointing to either local memory or DSA */
710 : RT_RADIX_TREE_CONTROL *ctl;
711 :
712 : #ifdef RT_SHMEM
713 : dsa_area *dsa;
714 : #else
715 : MemoryContextData *node_slabs[RT_NUM_SIZE_CLASSES];
716 :
717 : /* leaf_context is used only for single-value leaves */
718 : MemoryContextData *leaf_context;
719 : #endif
720 : MemoryContextData *iter_context;
721 : };
722 :
723 : /*
724 : * Iteration support.
725 : *
726 : * Iterating over the radix tree produces each key/value pair in ascending
727 : * order of the key.
728 : */
729 :
730 : /* state for iterating over a single node */
731 : typedef struct RT_NODE_ITER
732 : {
733 : RT_CHILD_PTR node;
734 :
735 : /*
736 : * The next index of the chunk array in RT_NODE_KIND_4 and RT_NODE_KIND_16
737 : * nodes, or the next chunk in RT_NODE_KIND_48 and RT_NODE_KIND_256 nodes.
738 : * 0 for the initial value.
739 : */
740 : int idx;
741 : } RT_NODE_ITER;
742 :
743 : /* state for iterating over the whole radix tree */
744 : struct RT_ITER
745 : {
746 : RT_RADIX_TREE *tree;
747 :
748 : /*
749 : * A stack to track iteration for each level. Level 0 is the lowest (or
750 : * leaf) level
751 : */
752 : RT_NODE_ITER node_iters[RT_MAX_LEVEL];
753 : int top_level;
754 : int cur_level;
755 :
756 : /* The key constructed during iteration */
757 : uint64 key;
758 : };
759 :
760 :
761 : /* verification (available only in assert-enabled builds) */
762 : static void RT_VERIFY_NODE(RT_NODE * node);
763 :
764 : static inline void
765 35406776 : RT_PTR_SET_LOCAL(RT_RADIX_TREE * tree, RT_CHILD_PTR * node)
766 : {
767 : #ifdef RT_SHMEM
768 2215422 : node->local = dsa_get_address(tree->dsa, node->alloc);
769 : #endif
770 35406776 : }
771 :
772 : /* Convenience functions for node48 and node256 */
773 :
774 : /* Return true if there is an entry for "chunk" */
775 : static inline bool
776 1057470 : RT_NODE_48_IS_CHUNK_USED(RT_NODE_48 * node, uint8 chunk)
777 : {
778 1057470 : return node->slot_idxs[chunk] != RT_INVALID_SLOT_IDX;
779 : }
780 :
781 : static inline RT_PTR_ALLOC *
782 1066722 : RT_NODE_48_GET_CHILD(RT_NODE_48 * node, uint8 chunk)
783 : {
784 1066722 : return &node->children[node->slot_idxs[chunk]];
785 : }
786 :
787 : /* Return true if there is an entry for "chunk" */
788 : static inline bool
789 8036196 : RT_NODE_256_IS_CHUNK_USED(RT_NODE_256 * node, uint8 chunk)
790 : {
791 8036196 : int idx = RT_BM_IDX(chunk);
792 8036196 : int bitnum = RT_BM_BIT(chunk);
793 :
794 8036196 : return (node->isset[idx] & ((bitmapword) 1 << bitnum)) != 0;
795 : }
796 :
797 : static inline RT_PTR_ALLOC *
798 7881994 : RT_NODE_256_GET_CHILD(RT_NODE_256 * node, uint8 chunk)
799 : {
800 : Assert(RT_NODE_256_IS_CHUNK_USED(node, chunk));
801 7881994 : return &node->children[chunk];
802 : }
803 :
804 : /*
805 : * Return the smallest shift that will allowing storing the given key.
806 : */
807 : static inline int
808 98422 : RT_KEY_GET_SHIFT(uint64 key)
809 : {
810 98422 : if (key == 0)
811 0 : return 0;
812 : else
813 98422 : return (pg_leftmost_one_pos64(key) / RT_SPAN) * RT_SPAN;
814 : }
815 :
816 : /*
817 : * Return the max value that can be stored in the tree with the given shift.
818 : */
819 : static uint64
820 98348 : RT_SHIFT_GET_MAX_VAL(int shift)
821 : {
822 98348 : if (shift == RT_MAX_SHIFT)
823 20 : return UINT64_MAX;
824 : else
825 98328 : return (UINT64CONST(1) << (shift + RT_SPAN)) - 1;
826 : }
827 :
828 : /*
829 : * Allocate a new node with the given node kind and size class.
830 : */
831 : static inline RT_CHILD_PTR
832 151478 : RT_ALLOC_NODE(RT_RADIX_TREE * tree, const uint8 kind, const RT_SIZE_CLASS size_class)
833 : {
834 : RT_CHILD_PTR allocnode;
835 : RT_NODE *node;
836 : size_t allocsize;
837 :
838 151478 : allocsize = RT_SIZE_CLASS_INFO[size_class].allocsize;
839 :
840 : #ifdef RT_SHMEM
841 106 : allocnode.alloc = dsa_allocate(tree->dsa, allocsize);
842 : #else
843 151372 : allocnode.alloc = (RT_PTR_ALLOC) MemoryContextAlloc(tree->node_slabs[size_class],
844 : allocsize);
845 : #endif
846 :
847 151478 : RT_PTR_SET_LOCAL(tree, &allocnode);
848 151478 : node = allocnode.local;
849 :
850 : /* initialize contents */
851 :
852 151478 : switch (kind)
853 : {
854 133924 : case RT_NODE_KIND_4:
855 133924 : memset(node, 0, offsetof(RT_NODE_4, children));
856 133924 : break;
857 13108 : case RT_NODE_KIND_16:
858 13108 : memset(node, 0, offsetof(RT_NODE_16, children));
859 13108 : break;
860 4306 : case RT_NODE_KIND_48:
861 : {
862 4306 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
863 :
864 4306 : memset(n48, 0, offsetof(RT_NODE_48, slot_idxs));
865 4306 : memset(n48->slot_idxs, RT_INVALID_SLOT_IDX, sizeof(n48->slot_idxs));
866 4306 : break;
867 : }
868 140 : case RT_NODE_KIND_256:
869 140 : memset(node, 0, offsetof(RT_NODE_256, children));
870 140 : break;
871 0 : default:
872 0 : pg_unreachable();
873 : }
874 :
875 151478 : node->kind = kind;
876 :
877 : /*
878 : * For node256, this will actually overflow to zero, but that's okay
879 : * because that node doesn't need to introspect this value.
880 : */
881 151478 : node->fanout = RT_SIZE_CLASS_INFO[size_class].fanout;
882 :
883 : #ifdef RT_DEBUG
884 : /* update the statistics */
885 52124 : tree->ctl->num_nodes[size_class]++;
886 : #endif
887 :
888 151478 : return allocnode;
889 : }
890 :
891 : /*
892 : * Allocate a new leaf.
893 : */
894 : static RT_CHILD_PTR
895 24744 : RT_ALLOC_LEAF(RT_RADIX_TREE * tree, size_t allocsize)
896 : {
897 : RT_CHILD_PTR leaf;
898 :
899 : #ifdef RT_SHMEM
900 1306 : leaf.alloc = dsa_allocate(tree->dsa, allocsize);
901 1306 : RT_PTR_SET_LOCAL(tree, &leaf);
902 : #else
903 23438 : leaf.alloc = (RT_PTR_ALLOC) MemoryContextAlloc(tree->leaf_context, allocsize);
904 : #endif
905 :
906 : #ifdef RT_DEBUG
907 0 : tree->ctl->num_leaves++;
908 : #endif
909 :
910 24744 : return leaf;
911 : }
912 :
913 : /*
914 : * Copy relevant members of the node header.
915 : * This is a separate function in case other fields are added.
916 : */
917 : static inline void
918 21716 : RT_COPY_COMMON(RT_CHILD_PTR newnode, RT_CHILD_PTR oldnode)
919 : {
920 21716 : (newnode.local)->count = (oldnode.local)->count;
921 21716 : }
922 :
923 : /* Free the given node */
924 : static void
925 53156 : RT_FREE_NODE(RT_RADIX_TREE * tree, RT_CHILD_PTR node)
926 : {
927 : #ifdef RT_DEBUG
928 : int i;
929 :
930 : /* update the statistics */
931 :
932 80942 : for (i = 0; i < RT_NUM_SIZE_CLASSES; i++)
933 : {
934 80910 : if ((node.local)->fanout == RT_SIZE_CLASS_INFO[i].fanout)
935 52028 : break;
936 : }
937 :
938 : /*
939 : * The fanout of node256 will appear to be zero within the node header
940 : * because of overflow, so we need an extra check here.
941 : */
942 52060 : if (i == RT_NUM_SIZE_CLASSES)
943 32 : i = RT_CLASS_256;
944 :
945 52060 : tree->ctl->num_nodes[i]--;
946 : Assert(tree->ctl->num_nodes[i] >= 0);
947 : #endif
948 :
949 : #ifdef RT_SHMEM
950 58 : dsa_free(tree->dsa, node.alloc);
951 : #else
952 53098 : pfree(node.alloc);
953 : #endif
954 53156 : }
955 :
956 : static inline void
957 6 : RT_FREE_LEAF(RT_RADIX_TREE * tree, RT_PTR_ALLOC leaf)
958 : {
959 : Assert(leaf != tree->ctl->root);
960 :
961 : #ifdef RT_DEBUG
962 : /* update the statistics */
963 0 : tree->ctl->num_leaves--;
964 : Assert(tree->ctl->num_leaves >= 0);
965 : #endif
966 :
967 : #ifdef RT_SHMEM
968 0 : dsa_free(tree->dsa, leaf);
969 : #else
970 6 : pfree(leaf);
971 : #endif
972 6 : }
973 :
974 : /***************** SEARCH *****************/
975 :
976 : /*
977 : * Return the address of the child corresponding to "chunk",
978 : * or NULL if there is no such element.
979 : */
980 : static inline RT_PTR_ALLOC *
981 5813160 : RT_NODE_16_SEARCH_EQ(RT_NODE_16 * node, uint8 chunk)
982 : {
983 5813160 : int count = node->base.count;
984 : #ifndef USE_NO_SIMD
985 : Vector8 spread_chunk;
986 : Vector8 haystack1;
987 : Vector8 haystack2;
988 : Vector8 cmp1;
989 : Vector8 cmp2;
990 : uint32 bitfield;
991 5813160 : RT_PTR_ALLOC *slot_simd = NULL;
992 : #endif
993 :
994 : #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
995 : RT_PTR_ALLOC *slot = NULL;
996 :
997 : for (int i = 0; i < count; i++)
998 : {
999 : if (node->chunks[i] == chunk)
1000 : {
1001 : slot = &node->children[i];
1002 : break;
1003 : }
1004 : }
1005 : #endif
1006 :
1007 : #ifndef USE_NO_SIMD
1008 : /* replicate the search key */
1009 5813160 : spread_chunk = vector8_broadcast(chunk);
1010 :
1011 : /* compare to all 32 keys stored in the node */
1012 5813160 : vector8_load(&haystack1, &node->chunks[0]);
1013 5813160 : vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]);
1014 5813160 : cmp1 = vector8_eq(spread_chunk, haystack1);
1015 5813160 : cmp2 = vector8_eq(spread_chunk, haystack2);
1016 :
1017 : /* convert comparison to a bitfield */
1018 5813160 : bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8));
1019 :
1020 : /* mask off invalid entries */
1021 5813160 : bitfield &= ((UINT64CONST(1) << count) - 1);
1022 :
1023 : /* convert bitfield to index by counting trailing zeros */
1024 5813160 : if (bitfield)
1025 5453026 : slot_simd = &node->children[pg_rightmost_one_pos32(bitfield)];
1026 :
1027 : Assert(slot_simd == slot);
1028 5813160 : return slot_simd;
1029 : #else
1030 : return slot;
1031 : #endif
1032 : }
1033 :
1034 : /*
1035 : * Search for the child pointer corresponding to "key" in the given node.
1036 : *
1037 : * Return child if the key is found, otherwise return NULL.
1038 : */
1039 : static inline RT_PTR_ALLOC *
1040 27234348 : RT_NODE_SEARCH(RT_NODE * node, uint8 chunk)
1041 : {
1042 : /* Make sure we already converted to local pointer */
1043 : Assert(node != NULL);
1044 :
1045 27234348 : switch (node->kind)
1046 : {
1047 11926890 : case RT_NODE_KIND_4:
1048 : {
1049 11926890 : RT_NODE_4 *n4 = (RT_NODE_4 *) node;
1050 :
1051 14825926 : for (int i = 0; i < n4->base.count; i++)
1052 : {
1053 13868912 : if (n4->chunks[i] == chunk)
1054 10969876 : return &n4->children[i];
1055 : }
1056 957014 : return NULL;
1057 : }
1058 5813160 : case RT_NODE_KIND_16:
1059 5813160 : return RT_NODE_16_SEARCH_EQ((RT_NODE_16 *) node, chunk);
1060 1498550 : case RT_NODE_KIND_48:
1061 : {
1062 1498550 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
1063 1498550 : int slotpos = n48->slot_idxs[chunk];
1064 :
1065 1498550 : if (slotpos == RT_INVALID_SLOT_IDX)
1066 616738 : return NULL;
1067 :
1068 881812 : return RT_NODE_48_GET_CHILD(n48, chunk);
1069 : }
1070 7995748 : case RT_NODE_KIND_256:
1071 : {
1072 7995748 : RT_NODE_256 *n256 = (RT_NODE_256 *) node;
1073 :
1074 7995748 : if (!RT_NODE_256_IS_CHUNK_USED(n256, chunk))
1075 159710 : return NULL;
1076 :
1077 7836038 : return RT_NODE_256_GET_CHILD(n256, chunk);
1078 : }
1079 0 : default:
1080 0 : pg_unreachable();
1081 : }
1082 : }
1083 :
1084 : /*
1085 : * Search the given key in the radix tree. Return the pointer to the value if found,
1086 : * otherwise return NULL.
1087 : *
1088 : * Since the function returns a pointer (to support variable-length values),
1089 : * the caller is responsible for locking until it's finished with the value.
1090 : */
1091 : RT_SCOPE RT_VALUE_TYPE *
1092 10374036 : RT_FIND(RT_RADIX_TREE * tree, uint64 key)
1093 : {
1094 : RT_CHILD_PTR node;
1095 10374036 : RT_PTR_ALLOC *slot = NULL;
1096 : int shift;
1097 :
1098 : #ifdef RT_SHMEM
1099 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1100 : #endif
1101 :
1102 10374036 : if (key > tree->ctl->max_val)
1103 2866 : return NULL;
1104 :
1105 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
1106 10371170 : node.alloc = tree->ctl->root;
1107 10371170 : shift = tree->ctl->start_shift;
1108 :
1109 : /* Descend the tree */
1110 34060580 : while (shift >= 0)
1111 : {
1112 25545362 : RT_PTR_SET_LOCAL(tree, &node);
1113 25545362 : slot = RT_NODE_SEARCH(node.local, RT_GET_KEY_CHUNK(key, shift));
1114 25545362 : if (slot == NULL)
1115 1855952 : return NULL;
1116 :
1117 23689410 : node.alloc = *slot;
1118 23689410 : shift -= RT_SPAN;
1119 : }
1120 :
1121 8515218 : if (RT_CHILDPTR_IS_VALUE(*slot))
1122 536706 : return (RT_VALUE_TYPE *) slot;
1123 : else
1124 : {
1125 7978512 : RT_PTR_SET_LOCAL(tree, &node);
1126 7978512 : return (RT_VALUE_TYPE *) node.local;
1127 : }
1128 : }
1129 :
1130 : /***************** INSERTION *****************/
1131 :
1132 : #define RT_NODE_MUST_GROW(node) \
1133 : ((node)->count == (node)->fanout)
1134 :
1135 : /*
1136 : * Return index of the chunk and slot arrays for inserting into the node,
1137 : * such that the arrays remain ordered.
1138 : */
1139 : static inline int
1140 20184 : RT_NODE_4_GET_INSERTPOS(RT_NODE_4 * node, uint8 chunk, int count)
1141 : {
1142 : int idx;
1143 :
1144 46698 : for (idx = 0; idx < count; idx++)
1145 : {
1146 37652 : if (node->chunks[idx] >= chunk)
1147 11138 : break;
1148 : }
1149 :
1150 20184 : return idx;
1151 : }
1152 :
1153 : /*
1154 : * Return index of the chunk and slot arrays for inserting into the node,
1155 : * such that the arrays remain ordered.
1156 : */
1157 : static inline int
1158 120134 : RT_NODE_16_GET_INSERTPOS(RT_NODE_16 * node, uint8 chunk)
1159 : {
1160 120134 : int count = node->base.count;
1161 : #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
1162 : int index;
1163 : #endif
1164 :
1165 : #ifndef USE_NO_SIMD
1166 : Vector8 spread_chunk;
1167 : Vector8 haystack1;
1168 : Vector8 haystack2;
1169 : Vector8 cmp1;
1170 : Vector8 cmp2;
1171 : Vector8 min1;
1172 : Vector8 min2;
1173 : uint32 bitfield;
1174 : int index_simd;
1175 : #endif
1176 :
1177 : /*
1178 : * First compare the last element. There are two reasons to branch here:
1179 : *
1180 : * 1) A realistic pattern is inserting ordered keys. In that case,
1181 : * non-SIMD platforms must do a linear search to the last chunk to find
1182 : * the insert position. This will get slower as the node fills up.
1183 : *
1184 : * 2) On SIMD platforms, we must branch anyway to make sure we don't bit
1185 : * scan an empty bitfield. Doing the branch here eliminates some work that
1186 : * we might otherwise throw away.
1187 : */
1188 : Assert(count > 0);
1189 120134 : if (node->chunks[count - 1] < chunk)
1190 15512 : return count;
1191 :
1192 : #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
1193 :
1194 : for (index = 0; index < count; index++)
1195 : {
1196 : if (node->chunks[index] > chunk)
1197 : break;
1198 : }
1199 : #endif
1200 :
1201 : #ifndef USE_NO_SIMD
1202 :
1203 : /*
1204 : * This is a bit more complicated than RT_NODE_16_SEARCH_EQ(), because no
1205 : * unsigned uint8 comparison instruction exists, at least for SSE2. So we
1206 : * need to play some trickery using vector8_min() to effectively get >=.
1207 : * There'll never be any equal elements in current uses, but that's what
1208 : * we get here...
1209 : */
1210 104622 : spread_chunk = vector8_broadcast(chunk);
1211 104622 : vector8_load(&haystack1, &node->chunks[0]);
1212 104622 : vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]);
1213 104622 : min1 = vector8_min(spread_chunk, haystack1);
1214 104622 : min2 = vector8_min(spread_chunk, haystack2);
1215 104622 : cmp1 = vector8_eq(spread_chunk, min1);
1216 104622 : cmp2 = vector8_eq(spread_chunk, min2);
1217 104622 : bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8));
1218 :
1219 : Assert((bitfield & ((UINT64CONST(1) << count) - 1)) != 0);
1220 104622 : index_simd = pg_rightmost_one_pos32(bitfield);
1221 :
1222 : Assert(index_simd == index);
1223 104622 : return index_simd;
1224 : #else
1225 : return index;
1226 : #endif
1227 : }
1228 :
1229 : /* Shift the elements right at "insertpos" by one */
1230 : static inline void
1231 131252 : RT_SHIFT_ARRAYS_FOR_INSERT(uint8 *chunks, RT_PTR_ALLOC * children, int count, int insertpos)
1232 : {
1233 : /*
1234 : * This is basically a memmove, but written in a simple loop for speed on
1235 : * small inputs.
1236 : */
1237 1125866 : for (int i = count - 1; i >= insertpos; i--)
1238 : {
1239 : /* workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101481 */
1240 : #ifdef __GNUC__
1241 994614 : __asm__("");
1242 : #endif
1243 994614 : chunks[i + 1] = chunks[i];
1244 994614 : children[i + 1] = children[i];
1245 : }
1246 131252 : }
1247 :
1248 : /*
1249 : * Copy both chunk and slot arrays into the right
1250 : * place. The caller is responsible for inserting the new element.
1251 : */
1252 : static inline void
1253 9066 : RT_COPY_ARRAYS_FOR_INSERT(uint8 *dst_chunks, RT_PTR_ALLOC * dst_children,
1254 : uint8 *src_chunks, RT_PTR_ALLOC * src_children,
1255 : int count, int insertpos)
1256 : {
1257 98426 : for (int i = 0; i < count; i++)
1258 : {
1259 89360 : int sourceidx = i;
1260 :
1261 : /* use a branch-free computation to skip the index of the new element */
1262 89360 : int destidx = i + (i >= insertpos);
1263 :
1264 89360 : dst_chunks[destidx] = src_chunks[sourceidx];
1265 89360 : dst_children[destidx] = src_children[sourceidx];
1266 : }
1267 9066 : }
1268 :
1269 : static inline RT_PTR_ALLOC *
1270 21828 : RT_ADD_CHILD_256(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1271 : {
1272 21828 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
1273 21828 : int idx = RT_BM_IDX(chunk);
1274 21828 : int bitnum = RT_BM_BIT(chunk);
1275 :
1276 : /* Mark the slot used for "chunk" */
1277 21828 : n256->isset[idx] |= ((bitmapword) 1 << bitnum);
1278 :
1279 21828 : n256->base.count++;
1280 21828 : RT_VERIFY_NODE((RT_NODE *) n256);
1281 :
1282 21828 : return RT_NODE_256_GET_CHILD(n256, chunk);
1283 : }
1284 :
1285 : static pg_noinline RT_PTR_ALLOC *
1286 140 : RT_GROW_NODE_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1287 : uint8 chunk)
1288 : {
1289 140 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1290 : RT_CHILD_PTR newnode;
1291 : RT_NODE_256 *new256;
1292 140 : int i = 0;
1293 :
1294 : /* initialize new node */
1295 140 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_256, RT_CLASS_256);
1296 140 : new256 = (RT_NODE_256 *) newnode.local;
1297 :
1298 : /* copy over the entries */
1299 140 : RT_COPY_COMMON(newnode, node);
1300 700 : for (int word_num = 0; word_num < RT_BM_IDX(RT_NODE_MAX_SLOTS); word_num++)
1301 : {
1302 560 : bitmapword bitmap = 0;
1303 :
1304 : /*
1305 : * Bit manipulation is a surprisingly large portion of the overhead in
1306 : * the naive implementation. Doing stores word-at-a-time removes a lot
1307 : * of that overhead.
1308 : */
1309 36400 : for (int bit = 0; bit < BITS_PER_BITMAPWORD; bit++)
1310 : {
1311 35840 : uint8 offset = n48->slot_idxs[i];
1312 :
1313 35840 : if (offset != RT_INVALID_SLOT_IDX)
1314 : {
1315 8944 : bitmap |= ((bitmapword) 1 << bit);
1316 8944 : new256->children[i] = n48->children[offset];
1317 : }
1318 :
1319 35840 : i++;
1320 : }
1321 :
1322 560 : new256->isset[word_num] = bitmap;
1323 : }
1324 :
1325 : /* free old node and update reference in parent */
1326 140 : *parent_slot = newnode.alloc;
1327 140 : RT_FREE_NODE(tree, node);
1328 :
1329 140 : return RT_ADD_CHILD_256(tree, newnode, chunk);
1330 : }
1331 :
1332 : static inline RT_PTR_ALLOC *
1333 53262 : RT_ADD_CHILD_48(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1334 : {
1335 53262 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1336 : int insertpos;
1337 53262 : int idx = 0;
1338 : bitmapword w,
1339 : inverse;
1340 :
1341 : /* get the first word with at least one bit not set */
1342 53262 : for (int i = 0; i < RT_BM_IDX(RT_FANOUT_48_MAX); i++)
1343 : {
1344 53262 : w = n48->isset[i];
1345 53262 : if (w < ~((bitmapword) 0))
1346 : {
1347 53262 : idx = i;
1348 53262 : break;
1349 : }
1350 : }
1351 :
1352 : /* To get the first unset bit in w, get the first set bit in ~w */
1353 53262 : inverse = ~w;
1354 53262 : insertpos = idx * BITS_PER_BITMAPWORD;
1355 53262 : insertpos += bmw_rightmost_one_pos(inverse);
1356 : Assert(insertpos < n48->base.fanout);
1357 :
1358 : /* mark the slot used by setting the rightmost zero bit */
1359 53262 : n48->isset[idx] |= w + 1;
1360 :
1361 : /* insert new chunk into place */
1362 53262 : n48->slot_idxs[chunk] = insertpos;
1363 :
1364 53262 : n48->base.count++;
1365 53262 : RT_VERIFY_NODE((RT_NODE *) n48);
1366 :
1367 53262 : return &n48->children[insertpos];
1368 : }
1369 :
1370 : static pg_noinline RT_PTR_ALLOC *
1371 8700 : RT_GROW_NODE_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1372 : uint8 chunk)
1373 : {
1374 8700 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1375 : int insertpos;
1376 :
1377 8700 : if (n16->base.fanout < RT_FANOUT_16_HI)
1378 : {
1379 : RT_CHILD_PTR newnode;
1380 : RT_NODE_16 *new16;
1381 :
1382 : Assert(n16->base.fanout == RT_FANOUT_16_LO);
1383 :
1384 : /* initialize new node */
1385 4426 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_HI);
1386 4426 : new16 = (RT_NODE_16 *) newnode.local;
1387 :
1388 : /* copy over existing entries */
1389 4426 : RT_COPY_COMMON(newnode, node);
1390 : Assert(n16->base.count == RT_FANOUT_16_LO);
1391 4426 : insertpos = RT_NODE_16_GET_INSERTPOS(n16, chunk);
1392 4426 : RT_COPY_ARRAYS_FOR_INSERT(new16->chunks, new16->children,
1393 4426 : n16->chunks, n16->children,
1394 : RT_FANOUT_16_LO, insertpos);
1395 :
1396 : /* insert new chunk into place */
1397 4426 : new16->chunks[insertpos] = chunk;
1398 :
1399 4426 : new16->base.count++;
1400 4426 : RT_VERIFY_NODE((RT_NODE *) new16);
1401 :
1402 : /* free old node and update references */
1403 4426 : RT_FREE_NODE(tree, node);
1404 4426 : *parent_slot = newnode.alloc;
1405 :
1406 4426 : return &new16->children[insertpos];
1407 : }
1408 : else
1409 : {
1410 : RT_CHILD_PTR newnode;
1411 : RT_NODE_48 *new48;
1412 : int idx,
1413 : bit;
1414 :
1415 : Assert(n16->base.fanout == RT_FANOUT_16_HI);
1416 :
1417 : /* initialize new node */
1418 4274 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_48, RT_CLASS_48);
1419 4274 : new48 = (RT_NODE_48 *) newnode.local;
1420 :
1421 : /* copy over the entries */
1422 4274 : RT_COPY_COMMON(newnode, node);
1423 141042 : for (int i = 0; i < RT_FANOUT_16_HI; i++)
1424 136768 : new48->slot_idxs[n16->chunks[i]] = i;
1425 4274 : memcpy(&new48->children[0], &n16->children[0], RT_FANOUT_16_HI * sizeof(new48->children[0]));
1426 :
1427 : /*
1428 : * Since we just copied a dense array, we can fill "isset" using a
1429 : * single store, provided the length of that array is at most the
1430 : * number of bits in a bitmapword.
1431 : */
1432 : Assert(RT_FANOUT_16_HI <= BITS_PER_BITMAPWORD);
1433 4274 : new48->isset[0] = (bitmapword) (((uint64) 1 << RT_FANOUT_16_HI) - 1);
1434 :
1435 : /* put the new child at the end of the copied entries */
1436 4274 : insertpos = RT_FANOUT_16_HI;
1437 4274 : idx = RT_BM_IDX(insertpos);
1438 4274 : bit = RT_BM_BIT(insertpos);
1439 :
1440 : /* mark the slot used */
1441 4274 : new48->isset[idx] |= ((bitmapword) 1 << bit);
1442 :
1443 : /* insert new chunk into place */
1444 4274 : new48->slot_idxs[chunk] = insertpos;
1445 :
1446 4274 : new48->base.count++;
1447 4274 : RT_VERIFY_NODE((RT_NODE *) new48);
1448 :
1449 : /* free old node and update reference in parent */
1450 4274 : *parent_slot = newnode.alloc;
1451 4274 : RT_FREE_NODE(tree, node);
1452 :
1453 4274 : return &new48->children[insertpos];
1454 : }
1455 : }
1456 :
1457 : static inline RT_PTR_ALLOC *
1458 115708 : RT_ADD_CHILD_16(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1459 : {
1460 115708 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1461 115708 : int insertpos = RT_NODE_16_GET_INSERTPOS(n16, chunk);
1462 :
1463 : /* shift chunks and children */
1464 115708 : RT_SHIFT_ARRAYS_FOR_INSERT(n16->chunks, n16->children,
1465 115708 : n16->base.count, insertpos);
1466 :
1467 : /* insert new chunk into place */
1468 115708 : n16->chunks[insertpos] = chunk;
1469 :
1470 115708 : n16->base.count++;
1471 115708 : RT_VERIFY_NODE((RT_NODE *) n16);
1472 :
1473 115708 : return &n16->children[insertpos];
1474 : }
1475 :
1476 : static pg_noinline RT_PTR_ALLOC *
1477 4640 : RT_GROW_NODE_4(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1478 : uint8 chunk)
1479 : {
1480 4640 : RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
1481 : RT_CHILD_PTR newnode;
1482 : RT_NODE_16 *new16;
1483 : int insertpos;
1484 :
1485 : /* initialize new node */
1486 4640 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_LO);
1487 4640 : new16 = (RT_NODE_16 *) newnode.local;
1488 :
1489 : /* copy over existing entries */
1490 4640 : RT_COPY_COMMON(newnode, node);
1491 : Assert(n4->base.count == RT_FANOUT_4);
1492 4640 : insertpos = RT_NODE_4_GET_INSERTPOS(n4, chunk, RT_FANOUT_4);
1493 4640 : RT_COPY_ARRAYS_FOR_INSERT(new16->chunks, new16->children,
1494 4640 : n4->chunks, n4->children,
1495 : RT_FANOUT_4, insertpos);
1496 :
1497 : /* insert new chunk into place */
1498 4640 : new16->chunks[insertpos] = chunk;
1499 :
1500 4640 : new16->base.count++;
1501 4640 : RT_VERIFY_NODE((RT_NODE *) new16);
1502 :
1503 : /* free old node and update reference in parent */
1504 4640 : *parent_slot = newnode.alloc;
1505 4640 : RT_FREE_NODE(tree, node);
1506 :
1507 4640 : return &new16->children[insertpos];
1508 : }
1509 :
1510 : static inline RT_PTR_ALLOC *
1511 15544 : RT_ADD_CHILD_4(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1512 : {
1513 15544 : RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
1514 15544 : int count = n4->base.count;
1515 15544 : int insertpos = RT_NODE_4_GET_INSERTPOS(n4, chunk, count);
1516 :
1517 : /* shift chunks and children */
1518 15544 : RT_SHIFT_ARRAYS_FOR_INSERT(n4->chunks, n4->children,
1519 : count, insertpos);
1520 :
1521 : /* insert new chunk into place */
1522 15544 : n4->chunks[insertpos] = chunk;
1523 :
1524 15544 : n4->base.count++;
1525 15544 : RT_VERIFY_NODE((RT_NODE *) n4);
1526 :
1527 15544 : return &n4->children[insertpos];
1528 : }
1529 :
1530 : /*
1531 : * Reserve slot in "node"'s child array. The caller will populate it
1532 : * with the actual child pointer.
1533 : *
1534 : * "parent_slot" is the address of the parent's child pointer to "node".
1535 : * If the node we're inserting into needs to grow, we update the parent's
1536 : * child pointer with the pointer to the new larger node.
1537 : */
1538 : static inline RT_PTR_ALLOC *
1539 219682 : RT_NODE_INSERT(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1540 : uint8 chunk)
1541 : {
1542 219682 : RT_NODE *n = node.local;
1543 :
1544 219682 : switch (n->kind)
1545 : {
1546 20184 : case RT_NODE_KIND_4:
1547 : {
1548 20184 : if (unlikely(RT_NODE_MUST_GROW(n)))
1549 4640 : return RT_GROW_NODE_4(tree, parent_slot, node, chunk);
1550 :
1551 15544 : return RT_ADD_CHILD_4(tree, node, chunk);
1552 : }
1553 124408 : case RT_NODE_KIND_16:
1554 : {
1555 124408 : if (unlikely(RT_NODE_MUST_GROW(n)))
1556 8700 : return RT_GROW_NODE_16(tree, parent_slot, node, chunk);
1557 :
1558 115708 : return RT_ADD_CHILD_16(tree, node, chunk);
1559 : }
1560 53402 : case RT_NODE_KIND_48:
1561 : {
1562 53402 : if (unlikely(RT_NODE_MUST_GROW(n)))
1563 140 : return RT_GROW_NODE_48(tree, parent_slot, node, chunk);
1564 :
1565 53262 : return RT_ADD_CHILD_48(tree, node, chunk);
1566 : }
1567 21688 : case RT_NODE_KIND_256:
1568 21688 : return RT_ADD_CHILD_256(tree, node, chunk);
1569 0 : default:
1570 0 : pg_unreachable();
1571 : }
1572 : }
1573 :
1574 : /*
1575 : * The radix tree doesn't have sufficient height. Put new node(s) on top,
1576 : * and move the old node below it.
1577 : */
1578 : static pg_noinline void
1579 48 : RT_EXTEND_UP(RT_RADIX_TREE * tree, uint64 key)
1580 : {
1581 48 : int target_shift = RT_KEY_GET_SHIFT(key);
1582 48 : int shift = tree->ctl->start_shift;
1583 :
1584 : Assert(shift < target_shift);
1585 :
1586 : /* Grow tree upwards until start shift can accommodate the key */
1587 156 : while (shift < target_shift)
1588 : {
1589 : RT_CHILD_PTR node;
1590 : RT_NODE_4 *n4;
1591 :
1592 108 : node = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1593 108 : n4 = (RT_NODE_4 *) node.local;
1594 108 : n4->base.count = 1;
1595 108 : n4->chunks[0] = 0;
1596 108 : n4->children[0] = tree->ctl->root;
1597 :
1598 : /* Update the root */
1599 108 : tree->ctl->root = node.alloc;
1600 :
1601 108 : shift += RT_SPAN;
1602 : }
1603 :
1604 48 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(target_shift);
1605 48 : tree->ctl->start_shift = target_shift;
1606 48 : }
1607 :
1608 : /*
1609 : * Insert a chain of nodes until we reach the lowest level,
1610 : * and return the address of a slot to be filled further up
1611 : * the call stack.
1612 : */
1613 : static pg_noinline RT_PTR_ALLOC *
1614 9954 : RT_EXTEND_DOWN(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift)
1615 : {
1616 : RT_CHILD_PTR node,
1617 : child;
1618 : RT_NODE_4 *n4;
1619 :
1620 : /*
1621 : * The child pointer of the first node in the chain goes in the
1622 : * caller-provided slot.
1623 : */
1624 9954 : child = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1625 9954 : *parent_slot = child.alloc;
1626 :
1627 9954 : node = child;
1628 9954 : shift -= RT_SPAN;
1629 :
1630 31442 : while (shift > 0)
1631 : {
1632 21488 : child = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1633 :
1634 : /* We open-code the insertion ourselves, for speed. */
1635 21488 : n4 = (RT_NODE_4 *) node.local;
1636 21488 : n4->base.count = 1;
1637 21488 : n4->chunks[0] = RT_GET_KEY_CHUNK(key, shift);
1638 21488 : n4->children[0] = child.alloc;
1639 :
1640 21488 : node = child;
1641 21488 : shift -= RT_SPAN;
1642 : }
1643 : Assert(shift == 0);
1644 :
1645 : /* Reserve slot for the value. */
1646 9954 : n4 = (RT_NODE_4 *) node.local;
1647 9954 : n4->chunks[0] = RT_GET_KEY_CHUNK(key, 0);
1648 9954 : n4->base.count = 1;
1649 :
1650 9954 : return &n4->children[0];
1651 : }
1652 :
1653 : /*
1654 : * Workhorse for RT_SET
1655 : *
1656 : * "parent_slot" is the address of the child pointer we just followed,
1657 : * in the parent's array of children, needed if inserting into the
1658 : * current node causes it to grow.
1659 : */
1660 : static RT_PTR_ALLOC *
1661 858642 : RT_GET_SLOT_RECURSIVE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift, bool *found)
1662 : {
1663 : RT_PTR_ALLOC *slot;
1664 : RT_CHILD_PTR node;
1665 858642 : uint8 chunk = RT_GET_KEY_CHUNK(key, shift);
1666 :
1667 858642 : node.alloc = *parent_slot;
1668 858642 : RT_PTR_SET_LOCAL(tree, &node);
1669 858642 : slot = RT_NODE_SEARCH(node.local, chunk);
1670 :
1671 858642 : if (slot == NULL)
1672 : {
1673 219682 : *found = false;
1674 :
1675 : /* reserve slot for the caller to populate */
1676 :
1677 219682 : slot = RT_NODE_INSERT(tree, parent_slot, node, chunk);
1678 :
1679 219682 : if (shift == 0)
1680 209754 : return slot;
1681 : else
1682 9928 : return RT_EXTEND_DOWN(tree, slot, key, shift);
1683 : }
1684 : else
1685 : {
1686 638960 : if (shift == 0)
1687 : {
1688 22332 : *found = true;
1689 22332 : return slot;
1690 : }
1691 : else
1692 616628 : return RT_GET_SLOT_RECURSIVE(tree, slot, key, shift - RT_SPAN, found);
1693 : }
1694 : }
1695 :
1696 : /*
1697 : * Set key to value that value_p points to. If the entry already exists, we
1698 : * update its value and return true. Returns false if entry doesn't yet exist.
1699 : *
1700 : * Taking a lock in exclusive mode is the caller's responsibility.
1701 : */
1702 : RT_SCOPE bool
1703 242040 : RT_SET(RT_RADIX_TREE * tree, uint64 key, RT_VALUE_TYPE * value_p)
1704 : {
1705 : bool found;
1706 : RT_PTR_ALLOC *slot;
1707 242040 : size_t value_sz = RT_GET_VALUE_SIZE(value_p);
1708 :
1709 : #ifdef RT_SHMEM
1710 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1711 : #endif
1712 :
1713 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
1714 :
1715 : /* Extend the tree if necessary */
1716 242040 : if (unlikely(key > tree->ctl->max_val))
1717 : {
1718 74 : if (tree->ctl->num_keys == 0)
1719 : {
1720 : RT_CHILD_PTR node;
1721 : RT_NODE_4 *n4;
1722 26 : int start_shift = RT_KEY_GET_SHIFT(key);
1723 :
1724 : /*
1725 : * With an empty root node, we don't extend the tree upwards,
1726 : * since that would result in orphan empty nodes. Instead we open
1727 : * code inserting into the root node and extend downward from
1728 : * there.
1729 : */
1730 26 : node.alloc = tree->ctl->root;
1731 26 : RT_PTR_SET_LOCAL(tree, &node);
1732 26 : n4 = (RT_NODE_4 *) node.local;
1733 26 : n4->base.count = 1;
1734 26 : n4->chunks[0] = RT_GET_KEY_CHUNK(key, start_shift);
1735 :
1736 26 : slot = RT_EXTEND_DOWN(tree, &n4->children[0], key, start_shift);
1737 26 : found = false;
1738 26 : tree->ctl->start_shift = start_shift;
1739 26 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(start_shift);
1740 26 : goto have_slot;
1741 : }
1742 : else
1743 48 : RT_EXTEND_UP(tree, key);
1744 : }
1745 :
1746 242014 : slot = RT_GET_SLOT_RECURSIVE(tree, &tree->ctl->root,
1747 242014 : key, tree->ctl->start_shift, &found);
1748 :
1749 242040 : have_slot:
1750 : Assert(slot != NULL);
1751 :
1752 242040 : if (RT_VALUE_IS_EMBEDDABLE(value_p))
1753 : {
1754 : /* free the existing leaf */
1755 217296 : if (found && !RT_CHILDPTR_IS_VALUE(*slot))
1756 2 : RT_FREE_LEAF(tree, *slot);
1757 :
1758 : /* store value directly in child pointer slot */
1759 217296 : memcpy(slot, value_p, value_sz);
1760 :
1761 : #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
1762 : /* tag child pointer */
1763 : #ifdef RT_SHMEM
1764 6 : *slot |= 1;
1765 : #else
1766 4222 : *((uintptr_t *) slot) |= 1;
1767 : #endif
1768 : #endif
1769 : }
1770 : else
1771 : {
1772 : RT_CHILD_PTR leaf;
1773 :
1774 24744 : if (found && !RT_CHILDPTR_IS_VALUE(*slot))
1775 : {
1776 : Assert(RT_PTR_ALLOC_IS_VALID(*slot));
1777 4 : leaf.alloc = *slot;
1778 4 : RT_PTR_SET_LOCAL(tree, &leaf);
1779 :
1780 4 : if (RT_GET_VALUE_SIZE((RT_VALUE_TYPE *) leaf.local) != value_sz)
1781 : {
1782 : /*
1783 : * different sizes, so first free the existing leaf before
1784 : * allocating a new one
1785 : */
1786 4 : RT_FREE_LEAF(tree, *slot);
1787 4 : leaf = RT_ALLOC_LEAF(tree, value_sz);
1788 4 : *slot = leaf.alloc;
1789 : }
1790 : }
1791 : else
1792 : {
1793 : /* allocate new leaf and store it in the child array */
1794 24740 : leaf = RT_ALLOC_LEAF(tree, value_sz);
1795 24740 : *slot = leaf.alloc;
1796 : }
1797 :
1798 24744 : memcpy(leaf.local, value_p, value_sz);
1799 : }
1800 :
1801 : /* Update the statistics */
1802 242040 : if (!found)
1803 219708 : tree->ctl->num_keys++;
1804 :
1805 242040 : return found;
1806 : }
1807 :
1808 : /***************** SETUP / TEARDOWN *****************/
1809 :
1810 : /*
1811 : * Create the radix tree in the given memory context and return it.
1812 : *
1813 : * All local memory required for a radix tree is allocated in the given
1814 : * memory context and its children. Note that RT_FREE() will delete all
1815 : * allocated space within the given memory context, so the dsa_area should
1816 : * be created in a different context.
1817 : */
1818 : RT_SCOPE RT_RADIX_TREE *
1819 : #ifdef RT_SHMEM
1820 38 : RT_CREATE(MemoryContext ctx, dsa_area *dsa, int tranche_id)
1821 : #else
1822 98174 : RT_CREATE(MemoryContext ctx)
1823 : #endif
1824 : {
1825 : RT_RADIX_TREE *tree;
1826 : MemoryContext old_ctx;
1827 : RT_CHILD_PTR rootnode;
1828 : #ifdef RT_SHMEM
1829 : dsa_pointer dp;
1830 : #endif
1831 :
1832 98212 : old_ctx = MemoryContextSwitchTo(ctx);
1833 :
1834 98212 : tree = (RT_RADIX_TREE *) palloc0(sizeof(RT_RADIX_TREE));
1835 98212 : tree->context = ctx;
1836 :
1837 : /*
1838 : * Separate context for iteration in case the tree context doesn't support
1839 : * pfree
1840 : */
1841 98212 : tree->iter_context = AllocSetContextCreate(ctx,
1842 : RT_STR(RT_PREFIX) "_radix_tree iter context",
1843 : ALLOCSET_SMALL_SIZES);
1844 :
1845 : #ifdef RT_SHMEM
1846 38 : tree->dsa = dsa;
1847 38 : dp = dsa_allocate0(dsa, sizeof(RT_RADIX_TREE_CONTROL));
1848 38 : tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, dp);
1849 38 : tree->ctl->handle = dp;
1850 38 : tree->ctl->magic = RT_RADIX_TREE_MAGIC;
1851 38 : LWLockInitialize(&tree->ctl->lock, tranche_id);
1852 : #else
1853 98174 : tree->ctl = (RT_RADIX_TREE_CONTROL *) palloc0(sizeof(RT_RADIX_TREE_CONTROL));
1854 :
1855 : /* Create a slab context for each size class */
1856 589044 : for (int i = 0; i < RT_NUM_SIZE_CLASSES; i++)
1857 : {
1858 490870 : RT_SIZE_CLASS_ELEM size_class = RT_SIZE_CLASS_INFO[i];
1859 490870 : size_t inner_blocksize = RT_SLAB_BLOCK_SIZE(size_class.allocsize);
1860 :
1861 490870 : tree->node_slabs[i] = SlabContextCreate(ctx,
1862 : size_class.name,
1863 : inner_blocksize,
1864 : size_class.allocsize);
1865 : }
1866 :
1867 : /* By default we use the passed context for leaves. */
1868 98174 : tree->leaf_context = tree->context;
1869 :
1870 : #ifndef RT_VARLEN_VALUE_SIZE
1871 :
1872 : /*
1873 : * For leaves storing fixed-length values, we use a slab context to avoid
1874 : * the possibility of space wastage by power-of-2 rounding up.
1875 : */
1876 : if (sizeof(RT_VALUE_TYPE) > sizeof(RT_PTR_ALLOC))
1877 : tree->leaf_context = SlabContextCreate(ctx,
1878 : RT_STR(RT_PREFIX) "_radix_tree leaf context",
1879 : RT_SLAB_BLOCK_SIZE(sizeof(RT_VALUE_TYPE)),
1880 : sizeof(RT_VALUE_TYPE));
1881 : #endif /* !RT_VARLEN_VALUE_SIZE */
1882 : #endif /* RT_SHMEM */
1883 :
1884 : /* add root node now so that RT_SET can assume it exists */
1885 98212 : rootnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1886 98212 : tree->ctl->root = rootnode.alloc;
1887 98212 : tree->ctl->start_shift = 0;
1888 98212 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(0);
1889 :
1890 98212 : MemoryContextSwitchTo(old_ctx);
1891 :
1892 98212 : return tree;
1893 : }
1894 :
1895 : #ifdef RT_SHMEM
1896 : RT_SCOPE RT_RADIX_TREE *
1897 38 : RT_ATTACH(dsa_area *dsa, RT_HANDLE handle)
1898 : {
1899 : RT_RADIX_TREE *tree;
1900 : dsa_pointer control;
1901 :
1902 38 : tree = (RT_RADIX_TREE *) palloc0(sizeof(RT_RADIX_TREE));
1903 :
1904 : /* Find the control object in shared memory */
1905 38 : control = handle;
1906 :
1907 38 : tree->dsa = dsa;
1908 38 : tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, control);
1909 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1910 :
1911 38 : return tree;
1912 : }
1913 :
1914 : RT_SCOPE void
1915 38 : RT_DETACH(RT_RADIX_TREE * tree)
1916 : {
1917 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1918 38 : pfree(tree);
1919 38 : }
1920 :
1921 : RT_SCOPE RT_HANDLE
1922 36 : RT_GET_HANDLE(RT_RADIX_TREE * tree)
1923 : {
1924 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1925 36 : return tree->ctl->handle;
1926 : }
1927 :
1928 : RT_SCOPE void
1929 206 : RT_LOCK_EXCLUSIVE(RT_RADIX_TREE * tree)
1930 : {
1931 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1932 206 : LWLockAcquire(&tree->ctl->lock, LW_EXCLUSIVE);
1933 206 : }
1934 :
1935 : RT_SCOPE void
1936 421684 : RT_LOCK_SHARE(RT_RADIX_TREE * tree)
1937 : {
1938 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1939 421684 : LWLockAcquire(&tree->ctl->lock, LW_SHARED);
1940 421684 : }
1941 :
1942 : RT_SCOPE void
1943 421890 : RT_UNLOCK(RT_RADIX_TREE * tree)
1944 : {
1945 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1946 421890 : LWLockRelease(&tree->ctl->lock);
1947 421890 : }
1948 :
1949 : /*
1950 : * Recursively free all nodes allocated in the DSA area.
1951 : */
1952 : static void
1953 48 : RT_FREE_RECURSE(RT_RADIX_TREE * tree, RT_PTR_ALLOC ptr, int shift)
1954 : {
1955 : RT_CHILD_PTR node;
1956 :
1957 48 : check_stack_depth();
1958 :
1959 48 : node.alloc = ptr;
1960 48 : RT_PTR_SET_LOCAL(tree, &node);
1961 :
1962 48 : switch (node.local->kind)
1963 : {
1964 28 : case RT_NODE_KIND_4:
1965 : {
1966 28 : RT_NODE_4 *n4 = (RT_NODE_4 *) node.local;
1967 :
1968 42 : for (int i = 0; i < n4->base.count; i++)
1969 : {
1970 14 : RT_PTR_ALLOC child = n4->children[i];
1971 :
1972 14 : if (shift > 0)
1973 10 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
1974 4 : else if (!RT_CHILDPTR_IS_VALUE(child))
1975 0 : dsa_free(tree->dsa, child);
1976 : }
1977 :
1978 28 : break;
1979 : }
1980 6 : case RT_NODE_KIND_16:
1981 : {
1982 6 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1983 :
1984 98 : for (int i = 0; i < n16->base.count; i++)
1985 : {
1986 92 : RT_PTR_ALLOC child = n16->children[i];
1987 :
1988 92 : if (shift > 0)
1989 0 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
1990 92 : else if (!RT_CHILDPTR_IS_VALUE(child))
1991 92 : dsa_free(tree->dsa, child);
1992 : }
1993 :
1994 6 : break;
1995 : }
1996 6 : case RT_NODE_KIND_48:
1997 : {
1998 6 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1999 :
2000 1542 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2001 : {
2002 : RT_PTR_ALLOC child;
2003 :
2004 1536 : if (!RT_NODE_48_IS_CHUNK_USED(n48, i))
2005 1266 : continue;
2006 :
2007 270 : child = *RT_NODE_48_GET_CHILD(n48, i);
2008 :
2009 270 : if (shift > 0)
2010 0 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
2011 270 : else if (!RT_CHILDPTR_IS_VALUE(child))
2012 270 : dsa_free(tree->dsa, child);
2013 : }
2014 :
2015 6 : break;
2016 : }
2017 8 : case RT_NODE_KIND_256:
2018 : {
2019 8 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2020 :
2021 2056 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2022 : {
2023 : RT_PTR_ALLOC child;
2024 :
2025 2048 : if (!RT_NODE_256_IS_CHUNK_USED(n256, i))
2026 1102 : continue;
2027 :
2028 946 : child = *RT_NODE_256_GET_CHILD(n256, i);
2029 :
2030 946 : if (shift > 0)
2031 0 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
2032 946 : else if (!RT_CHILDPTR_IS_VALUE(child))
2033 944 : dsa_free(tree->dsa, child);
2034 : }
2035 :
2036 8 : break;
2037 : }
2038 : }
2039 :
2040 : /* Free the inner node */
2041 48 : dsa_free(tree->dsa, ptr);
2042 48 : }
2043 : #endif
2044 :
2045 : /*
2046 : * Free the radix tree, including all nodes and leaves.
2047 : */
2048 : RT_SCOPE void
2049 1104 : RT_FREE(RT_RADIX_TREE * tree)
2050 : {
2051 : #ifdef RT_SHMEM
2052 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2053 :
2054 : /* Free all memory used for radix tree nodes */
2055 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2056 38 : RT_FREE_RECURSE(tree, tree->ctl->root, tree->ctl->start_shift);
2057 :
2058 : /*
2059 : * Vandalize the control block to help catch programming error where other
2060 : * backends access the memory formerly occupied by this radix tree.
2061 : */
2062 38 : tree->ctl->magic = 0;
2063 38 : dsa_free(tree->dsa, tree->ctl->handle);
2064 : #endif
2065 :
2066 : /*
2067 : * Free all space allocated within the tree's context and delete all child
2068 : * contexts such as those used for nodes.
2069 : */
2070 1104 : MemoryContextReset(tree->context);
2071 1104 : }
2072 :
2073 : /***************** ITERATION *****************/
2074 :
2075 : /*
2076 : * Create and return the iterator for the given radix tree.
2077 : *
2078 : * Taking a lock in shared mode during the iteration is the caller's
2079 : * responsibility.
2080 : */
2081 : RT_SCOPE RT_ITER *
2082 1066 : RT_BEGIN_ITERATE(RT_RADIX_TREE * tree)
2083 : {
2084 : RT_ITER *iter;
2085 : RT_CHILD_PTR root;
2086 :
2087 1066 : iter = (RT_ITER *) MemoryContextAllocZero(tree->iter_context,
2088 : sizeof(RT_ITER));
2089 1066 : iter->tree = tree;
2090 :
2091 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2092 1066 : root.alloc = iter->tree->ctl->root;
2093 1066 : RT_PTR_SET_LOCAL(tree, &root);
2094 :
2095 1066 : iter->top_level = iter->tree->ctl->start_shift / RT_SPAN;
2096 :
2097 : /* Set the root to start */
2098 1066 : iter->cur_level = iter->top_level;
2099 1066 : iter->node_iters[iter->cur_level].node = root;
2100 1066 : iter->node_iters[iter->cur_level].idx = 0;
2101 :
2102 1066 : return iter;
2103 : }
2104 :
2105 : /*
2106 : * Scan the inner node and return the next child pointer if one exists, otherwise
2107 : * return NULL.
2108 : */
2109 : static inline RT_PTR_ALLOC *
2110 251324 : RT_NODE_ITERATE_NEXT(RT_ITER * iter, int level)
2111 : {
2112 251324 : uint8 key_chunk = 0;
2113 : RT_NODE_ITER *node_iter;
2114 : RT_CHILD_PTR node;
2115 251324 : RT_PTR_ALLOC *slot = NULL;
2116 :
2117 : #ifdef RT_SHMEM
2118 : Assert(iter->tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2119 : #endif
2120 :
2121 251324 : node_iter = &(iter->node_iters[level]);
2122 251324 : node = node_iter->node;
2123 :
2124 : Assert(node.local != NULL);
2125 :
2126 251324 : switch ((node.local)->kind)
2127 : {
2128 33082 : case RT_NODE_KIND_4:
2129 : {
2130 33082 : RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
2131 :
2132 33082 : if (node_iter->idx >= n4->base.count)
2133 16196 : return NULL;
2134 :
2135 16886 : slot = &n4->children[node_iter->idx];
2136 16886 : key_chunk = n4->chunks[node_iter->idx];
2137 16886 : node_iter->idx++;
2138 16886 : break;
2139 : }
2140 6196 : case RT_NODE_KIND_16:
2141 : {
2142 6196 : RT_NODE_16 *n16 = (RT_NODE_16 *) (node.local);
2143 :
2144 6196 : if (node_iter->idx >= n16->base.count)
2145 328 : return NULL;
2146 :
2147 5868 : slot = &n16->children[node_iter->idx];
2148 5868 : key_chunk = n16->chunks[node_iter->idx];
2149 5868 : node_iter->idx++;
2150 5868 : break;
2151 : }
2152 188760 : case RT_NODE_KIND_48:
2153 : {
2154 188760 : RT_NODE_48 *n48 = (RT_NODE_48 *) (node.local);
2155 : int chunk;
2156 :
2157 1060054 : for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++)
2158 : {
2159 1055934 : if (RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2160 184640 : break;
2161 : }
2162 :
2163 188760 : if (chunk >= RT_NODE_MAX_SLOTS)
2164 4120 : return NULL;
2165 :
2166 184640 : slot = RT_NODE_48_GET_CHILD(n48, chunk);
2167 :
2168 184640 : key_chunk = chunk;
2169 184640 : node_iter->idx = chunk + 1;
2170 184640 : break;
2171 : }
2172 23286 : case RT_NODE_KIND_256:
2173 : {
2174 23286 : RT_NODE_256 *n256 = (RT_NODE_256 *) (node.local);
2175 : int chunk;
2176 :
2177 30312 : for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++)
2178 : {
2179 30208 : if (RT_NODE_256_IS_CHUNK_USED(n256, chunk))
2180 23182 : break;
2181 : }
2182 :
2183 23286 : if (chunk >= RT_NODE_MAX_SLOTS)
2184 104 : return NULL;
2185 :
2186 23182 : slot = RT_NODE_256_GET_CHILD(n256, chunk);
2187 :
2188 23182 : key_chunk = chunk;
2189 23182 : node_iter->idx = chunk + 1;
2190 23182 : break;
2191 : }
2192 : }
2193 :
2194 : /* Update the key */
2195 230576 : iter->key &= ~(((uint64) RT_CHUNK_MASK) << (level * RT_SPAN));
2196 230576 : iter->key |= (((uint64) key_chunk) << (level * RT_SPAN));
2197 :
2198 230576 : return slot;
2199 : }
2200 :
2201 : /*
2202 : * Return pointer to value and set key_p as long as there is a key. Otherwise
2203 : * return NULL.
2204 : */
2205 : RT_SCOPE RT_VALUE_TYPE *
2206 211670 : RT_ITERATE_NEXT(RT_ITER * iter, uint64 *key_p)
2207 : {
2208 211670 : RT_PTR_ALLOC *slot = NULL;
2209 :
2210 252328 : while (iter->cur_level <= iter->top_level)
2211 : {
2212 : RT_CHILD_PTR node;
2213 :
2214 251324 : slot = RT_NODE_ITERATE_NEXT(iter, iter->cur_level);
2215 :
2216 251324 : if (iter->cur_level == 0 && slot != NULL)
2217 : {
2218 : /* Found a value at the leaf node */
2219 210666 : *key_p = iter->key;
2220 210666 : node.alloc = *slot;
2221 :
2222 210666 : if (RT_CHILDPTR_IS_VALUE(*slot))
2223 210666 : return (RT_VALUE_TYPE *) slot;
2224 : else
2225 : {
2226 20078 : RT_PTR_SET_LOCAL(iter->tree, &node);
2227 20078 : return (RT_VALUE_TYPE *) node.local;
2228 : }
2229 : }
2230 :
2231 40658 : if (slot != NULL)
2232 : {
2233 : /* Found the child slot, move down the tree */
2234 19910 : node.alloc = *slot;
2235 19910 : RT_PTR_SET_LOCAL(iter->tree, &node);
2236 :
2237 19910 : iter->cur_level--;
2238 19910 : iter->node_iters[iter->cur_level].node = node;
2239 19910 : iter->node_iters[iter->cur_level].idx = 0;
2240 : }
2241 : else
2242 : {
2243 : /* Not found the child slot, move up the tree */
2244 20748 : iter->cur_level++;
2245 : }
2246 : }
2247 :
2248 : /* We've visited all nodes, so the iteration finished */
2249 1004 : return NULL;
2250 : }
2251 :
2252 : /*
2253 : * Terminate the iteration. The caller is responsible for releasing any locks.
2254 : */
2255 : RT_SCOPE void
2256 1066 : RT_END_ITERATE(RT_ITER * iter)
2257 : {
2258 1066 : pfree(iter);
2259 1066 : }
2260 :
2261 : /***************** DELETION *****************/
2262 :
2263 : #ifdef RT_USE_DELETE
2264 :
2265 : /* Delete the element at "deletepos" */
2266 : static inline void
2267 44134 : RT_SHIFT_ARRAYS_AND_DELETE(uint8 *chunks, RT_PTR_ALLOC * children, int count, int deletepos)
2268 : {
2269 : /*
2270 : * This is basically a memmove, but written in a simple loop for speed on
2271 : * small inputs.
2272 : */
2273 200570 : for (int i = deletepos; i < count - 1; i++)
2274 : {
2275 : /* workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101481 */
2276 : #ifdef __GNUC__
2277 156436 : __asm__("");
2278 : #endif
2279 156436 : chunks[i] = chunks[i + 1];
2280 156436 : children[i] = children[i + 1];
2281 : }
2282 44134 : }
2283 :
2284 : /*
2285 : * Copy both chunk and slot arrays into the right
2286 : * place. The element at "deletepos" is deleted by skipping it.
2287 : */
2288 : static inline void
2289 4162 : RT_COPY_ARRAYS_AND_DELETE(uint8 *dst_chunks, RT_PTR_ALLOC * dst_children,
2290 : uint8 *src_chunks, RT_PTR_ALLOC * src_children,
2291 : int count, int deletepos)
2292 : {
2293 16648 : for (int i = 0; i < count - 1; i++)
2294 : {
2295 : /*
2296 : * use a branch-free computation to skip the index of the deleted
2297 : * element
2298 : */
2299 12486 : int sourceidx = i + (i >= deletepos);
2300 12486 : int destidx = i;
2301 :
2302 12486 : dst_chunks[destidx] = src_chunks[sourceidx];
2303 12486 : dst_children[destidx] = src_children[sourceidx];
2304 : }
2305 4162 : }
2306 :
2307 : /*
2308 : * Note: While all node-growing functions are called to perform an insertion
2309 : * when no more space is available, shrinking is not a hard-and-fast requirement.
2310 : * When shrinking nodes, we generally wait until the count is about 3/4 of
2311 : * the next lower node's fanout. This prevents ping-ponging between different
2312 : * node sizes.
2313 : *
2314 : * Some shrinking functions delete first and then shrink, either because we
2315 : * must or because it's fast and simple that way. Sometimes it's faster to
2316 : * delete while shrinking.
2317 : */
2318 :
2319 : /*
2320 : * Move contents of a node256 to a node48. Any deletion should have happened
2321 : * in the caller.
2322 : */
2323 : static void pg_noinline
2324 32 : RT_SHRINK_NODE_256(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2325 : {
2326 32 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2327 : RT_CHILD_PTR newnode;
2328 : RT_NODE_48 *new48;
2329 32 : int slot_idx = 0;
2330 :
2331 : /* initialize new node */
2332 32 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_48, RT_CLASS_48);
2333 32 : new48 = (RT_NODE_48 *) newnode.local;
2334 :
2335 : /* copy over the entries */
2336 32 : RT_COPY_COMMON(newnode, node);
2337 8224 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2338 : {
2339 8192 : if (RT_NODE_256_IS_CHUNK_USED(n256, i))
2340 : {
2341 1536 : new48->slot_idxs[i] = slot_idx;
2342 1536 : new48->children[slot_idx] = n256->children[i];
2343 1536 : slot_idx++;
2344 : }
2345 : }
2346 :
2347 : /*
2348 : * Since we just copied a dense array, we can fill "isset" using a single
2349 : * store, provided the length of that array is at most the number of bits
2350 : * in a bitmapword.
2351 : */
2352 : Assert(n256->base.count <= BITS_PER_BITMAPWORD);
2353 32 : new48->isset[0] = (bitmapword) (((uint64) 1 << n256->base.count) - 1);
2354 :
2355 : /* free old node and update reference in parent */
2356 32 : *parent_slot = newnode.alloc;
2357 32 : RT_FREE_NODE(tree, node);
2358 32 : }
2359 :
2360 : static inline void
2361 8978 : RT_REMOVE_CHILD_256(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2362 : {
2363 : int shrink_threshold;
2364 8978 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2365 8978 : int idx = RT_BM_IDX(chunk);
2366 8978 : int bitnum = RT_BM_BIT(chunk);
2367 :
2368 : /* Mark the slot free for "chunk" */
2369 8978 : n256->isset[idx] &= ~((bitmapword) 1 << bitnum);
2370 :
2371 8978 : n256->base.count--;
2372 :
2373 : /*
2374 : * A full node256 will have a count of zero because of overflow, so we
2375 : * delete first before checking the shrink threshold.
2376 : */
2377 : Assert(n256->base.count > 0);
2378 :
2379 : /* This simplifies RT_SHRINK_NODE_256() */
2380 8978 : shrink_threshold = BITS_PER_BITMAPWORD;
2381 8978 : shrink_threshold = Min(RT_FANOUT_48 / 4 * 3, shrink_threshold);
2382 :
2383 8978 : if (n256->base.count <= shrink_threshold)
2384 32 : RT_SHRINK_NODE_256(tree, parent_slot, node, chunk);
2385 8978 : }
2386 :
2387 : /*
2388 : * Move contents of a node48 to a node16. Any deletion should have happened
2389 : * in the caller.
2390 : */
2391 : static void pg_noinline
2392 4042 : RT_SHRINK_NODE_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2393 : {
2394 4042 : RT_NODE_48 *n48 = (RT_NODE_48 *) (node.local);
2395 : RT_CHILD_PTR newnode;
2396 : RT_NODE_16 *new16;
2397 4042 : int destidx = 0;
2398 :
2399 : /*
2400 : * Initialize new node. For now we skip the larger node16 size class for
2401 : * simplicity.
2402 : */
2403 4042 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_LO);
2404 4042 : new16 = (RT_NODE_16 *) newnode.local;
2405 :
2406 : /* copy over all existing entries */
2407 4042 : RT_COPY_COMMON(newnode, node);
2408 1038794 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2409 : {
2410 1034752 : if (n48->slot_idxs[chunk] != RT_INVALID_SLOT_IDX)
2411 : {
2412 48504 : new16->chunks[destidx] = chunk;
2413 48504 : new16->children[destidx] = n48->children[n48->slot_idxs[chunk]];
2414 48504 : destidx++;
2415 : }
2416 : }
2417 :
2418 : Assert(destidx < new16->base.fanout);
2419 :
2420 4042 : RT_VERIFY_NODE((RT_NODE *) new16);
2421 :
2422 : /* free old node and update reference in parent */
2423 4042 : *parent_slot = newnode.alloc;
2424 4042 : RT_FREE_NODE(tree, node);
2425 4042 : }
2426 :
2427 : static inline void
2428 133416 : RT_REMOVE_CHILD_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2429 : {
2430 133416 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
2431 133416 : int deletepos = n48->slot_idxs[chunk];
2432 :
2433 : /* For now we skip the larger node16 size class for simplicity */
2434 133416 : int shrink_threshold = RT_FANOUT_16_LO / 4 * 3;
2435 : int idx;
2436 : int bitnum;
2437 :
2438 : Assert(deletepos != RT_INVALID_SLOT_IDX);
2439 :
2440 133416 : idx = RT_BM_IDX(deletepos);
2441 133416 : bitnum = RT_BM_BIT(deletepos);
2442 133416 : n48->isset[idx] &= ~((bitmapword) 1 << bitnum);
2443 133416 : n48->slot_idxs[chunk] = RT_INVALID_SLOT_IDX;
2444 :
2445 133416 : n48->base.count--;
2446 :
2447 : /*
2448 : * To keep shrinking simple, do it after deleting, which is fast for
2449 : * node48 anyway.
2450 : */
2451 133416 : if (n48->base.count <= shrink_threshold)
2452 4042 : RT_SHRINK_NODE_48(tree, parent_slot, node, chunk);
2453 133416 : }
2454 :
2455 : /*
2456 : * Move contents of a node16 to a node4, and delete the one at "deletepos".
2457 : * By deleting as we move, we can avoid memmove operations in the new
2458 : * node.
2459 : */
2460 : static void pg_noinline
2461 4162 : RT_SHRINK_NODE_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 deletepos)
2462 : {
2463 4162 : RT_NODE_16 *n16 = (RT_NODE_16 *) (node.local);
2464 : RT_CHILD_PTR newnode;
2465 : RT_NODE_4 *new4;
2466 :
2467 : /* initialize new node */
2468 4162 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
2469 4162 : new4 = (RT_NODE_4 *) newnode.local;
2470 :
2471 : /* copy over existing entries, except for the one at "deletepos" */
2472 4162 : RT_COPY_COMMON(newnode, node);
2473 4162 : RT_COPY_ARRAYS_AND_DELETE(new4->chunks, new4->children,
2474 4162 : n16->chunks, n16->children,
2475 4162 : n16->base.count, deletepos);
2476 :
2477 4162 : new4->base.count--;
2478 4162 : RT_VERIFY_NODE((RT_NODE *) new4);
2479 :
2480 : /* free old node and update reference in parent */
2481 4162 : *parent_slot = newnode.alloc;
2482 4162 : RT_FREE_NODE(tree, node);
2483 4162 : }
2484 :
2485 : static inline void
2486 39936 : RT_REMOVE_CHILD_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
2487 : {
2488 39936 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
2489 39936 : int deletepos = slot - n16->children;
2490 :
2491 : /*
2492 : * When shrinking to node4, 4 is hard-coded. After shrinking, the new node
2493 : * will end up with 3 elements and 3 is the largest count where linear
2494 : * search is faster than SIMD, at least on x86-64.
2495 : */
2496 39936 : if (n16->base.count <= 4)
2497 : {
2498 4162 : RT_SHRINK_NODE_16(tree, parent_slot, node, deletepos);
2499 4162 : return;
2500 : }
2501 :
2502 : Assert(deletepos >= 0);
2503 : Assert(n16->chunks[deletepos] == chunk);
2504 :
2505 35774 : RT_SHIFT_ARRAYS_AND_DELETE(n16->chunks, n16->children,
2506 35774 : n16->base.count, deletepos);
2507 35774 : n16->base.count--;
2508 : }
2509 :
2510 : static inline void
2511 39862 : RT_REMOVE_CHILD_4(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
2512 : {
2513 39862 : RT_NODE_4 *n4 = (RT_NODE_4 *) node.local;
2514 :
2515 39862 : if (n4->base.count == 1)
2516 : {
2517 : Assert(n4->chunks[0] == chunk);
2518 :
2519 : /*
2520 : * If we're deleting the last entry from the root child node don't
2521 : * free it, but mark both the tree and the root child node empty. That
2522 : * way, RT_SET can assume it exists.
2523 : */
2524 31502 : if (parent_slot == &tree->ctl->root)
2525 : {
2526 62 : n4->base.count = 0;
2527 62 : tree->ctl->start_shift = 0;
2528 62 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(0);
2529 : }
2530 : else
2531 : {
2532 : /*
2533 : * Deleting last entry, so just free the entire node.
2534 : * RT_DELETE_RECURSIVE has already freed the value and lower-level
2535 : * children.
2536 : */
2537 31440 : RT_FREE_NODE(tree, node);
2538 :
2539 : /*
2540 : * Also null out the parent's slot -- this tells the next higher
2541 : * level to delete its child pointer
2542 : */
2543 31440 : *parent_slot = RT_INVALID_PTR_ALLOC;
2544 : }
2545 : }
2546 : else
2547 : {
2548 8360 : int deletepos = slot - n4->children;
2549 :
2550 : Assert(deletepos >= 0);
2551 : Assert(n4->chunks[deletepos] == chunk);
2552 :
2553 8360 : RT_SHIFT_ARRAYS_AND_DELETE(n4->chunks, n4->children,
2554 8360 : n4->base.count, deletepos);
2555 :
2556 8360 : n4->base.count--;
2557 : }
2558 39862 : }
2559 :
2560 : /*
2561 : * Delete the child pointer corresponding to "key" in the given node.
2562 : */
2563 : static inline void
2564 222192 : RT_NODE_DELETE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
2565 : {
2566 222192 : switch ((node.local)->kind)
2567 : {
2568 39862 : case RT_NODE_KIND_4:
2569 : {
2570 39862 : RT_REMOVE_CHILD_4(tree, parent_slot, node, chunk, slot);
2571 39862 : return;
2572 : }
2573 39936 : case RT_NODE_KIND_16:
2574 : {
2575 39936 : RT_REMOVE_CHILD_16(tree, parent_slot, node, chunk, slot);
2576 39936 : return;
2577 : }
2578 133416 : case RT_NODE_KIND_48:
2579 : {
2580 133416 : RT_REMOVE_CHILD_48(tree, parent_slot, node, chunk);
2581 133416 : return;
2582 : }
2583 8978 : case RT_NODE_KIND_256:
2584 : {
2585 8978 : RT_REMOVE_CHILD_256(tree, parent_slot, node, chunk);
2586 8978 : return;
2587 : }
2588 0 : default:
2589 0 : pg_unreachable();
2590 : }
2591 : }
2592 :
2593 : /* workhorse for RT_DELETE */
2594 : static bool
2595 830344 : RT_DELETE_RECURSIVE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift)
2596 : {
2597 : RT_PTR_ALLOC *slot;
2598 : RT_CHILD_PTR node;
2599 830344 : uint8 chunk = RT_GET_KEY_CHUNK(key, shift);
2600 :
2601 830344 : node.alloc = *parent_slot;
2602 830344 : RT_PTR_SET_LOCAL(tree, &node);
2603 830344 : slot = RT_NODE_SEARCH(node.local, chunk);
2604 :
2605 830344 : if (slot == NULL)
2606 17962 : return false;
2607 :
2608 812382 : if (shift == 0)
2609 : {
2610 190752 : if (!RT_CHILDPTR_IS_VALUE(*slot))
2611 0 : RT_FREE_LEAF(tree, *slot);
2612 :
2613 190752 : RT_NODE_DELETE(tree, parent_slot, node, chunk, slot);
2614 190752 : return true;
2615 : }
2616 : else
2617 : {
2618 : bool deleted;
2619 :
2620 621630 : deleted = RT_DELETE_RECURSIVE(tree, slot, key, shift - RT_SPAN);
2621 :
2622 : /* Child node was freed, so delete its slot now */
2623 621630 : if (*slot == RT_INVALID_PTR_ALLOC)
2624 : {
2625 : Assert(deleted);
2626 31440 : RT_NODE_DELETE(tree, parent_slot, node, chunk, slot);
2627 : }
2628 :
2629 621630 : return deleted;
2630 : }
2631 : }
2632 :
2633 : /*
2634 : * Delete the given key from the radix tree. If the key is found delete it
2635 : * and return true, otherwise do nothing and return false.
2636 : *
2637 : * Taking a lock in exclusive mode is the caller's responsibility.
2638 : */
2639 : RT_SCOPE bool
2640 208714 : RT_DELETE(RT_RADIX_TREE * tree, uint64 key)
2641 : {
2642 : bool deleted;
2643 :
2644 : #ifdef RT_SHMEM
2645 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2646 : #endif
2647 :
2648 208714 : if (key > tree->ctl->max_val)
2649 0 : return false;
2650 :
2651 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2652 208714 : deleted = RT_DELETE_RECURSIVE(tree, &tree->ctl->root,
2653 208714 : key, tree->ctl->start_shift);
2654 :
2655 : /* Found the key to delete. Update the statistics */
2656 208714 : if (deleted)
2657 : {
2658 190752 : tree->ctl->num_keys--;
2659 : Assert(tree->ctl->num_keys >= 0);
2660 : }
2661 :
2662 208714 : return deleted;
2663 : }
2664 :
2665 : #endif /* RT_USE_DELETE */
2666 :
2667 : /***************** UTILITY FUNCTIONS *****************/
2668 :
2669 : /*
2670 : * Return the statistics of the amount of memory used by the radix tree.
2671 : *
2672 : * Since dsa_get_total_size() does appropriate locking, the caller doesn't
2673 : * need to take a lock.
2674 : */
2675 : RT_SCOPE uint64
2676 437292 : RT_MEMORY_USAGE(RT_RADIX_TREE * tree)
2677 : {
2678 437292 : size_t total = 0;
2679 :
2680 : #ifdef RT_SHMEM
2681 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2682 2414 : total = dsa_get_total_size(tree->dsa);
2683 : #else
2684 434878 : total = MemoryContextMemAllocated(tree->context, true);
2685 : #endif
2686 :
2687 437292 : return total;
2688 : }
2689 :
2690 : /*
2691 : * Perform some sanity checks on the given node.
2692 : */
2693 : static void
2694 227886 : RT_VERIFY_NODE(RT_NODE * node)
2695 : {
2696 : #ifdef USE_ASSERT_CHECKING
2697 :
2698 : switch (node->kind)
2699 : {
2700 : case RT_NODE_KIND_4:
2701 : {
2702 : RT_NODE_4 *n4 = (RT_NODE_4 *) node;
2703 :
2704 : /* RT_DUMP_NODE(node); */
2705 :
2706 : for (int i = 1; i < n4->base.count; i++)
2707 : Assert(n4->chunks[i - 1] < n4->chunks[i]);
2708 :
2709 : break;
2710 : }
2711 : case RT_NODE_KIND_16:
2712 : {
2713 : RT_NODE_16 *n16 = (RT_NODE_16 *) node;
2714 :
2715 : /* RT_DUMP_NODE(node); */
2716 :
2717 : for (int i = 1; i < n16->base.count; i++)
2718 : Assert(n16->chunks[i - 1] < n16->chunks[i]);
2719 :
2720 : break;
2721 : }
2722 : case RT_NODE_KIND_48:
2723 : {
2724 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
2725 : int cnt = 0;
2726 :
2727 : /* RT_DUMP_NODE(node); */
2728 :
2729 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2730 : {
2731 : uint8 slot = n48->slot_idxs[i];
2732 : int idx = RT_BM_IDX(slot);
2733 : int bitnum = RT_BM_BIT(slot);
2734 :
2735 : if (!RT_NODE_48_IS_CHUNK_USED(n48, i))
2736 : continue;
2737 :
2738 : /* Check if the corresponding slot is used */
2739 : Assert(slot < node->fanout);
2740 : Assert((n48->isset[idx] & ((bitmapword) 1 << bitnum)) != 0);
2741 :
2742 : cnt++;
2743 : }
2744 :
2745 : Assert(n48->base.count == cnt);
2746 :
2747 : break;
2748 : }
2749 : case RT_NODE_KIND_256:
2750 : {
2751 : RT_NODE_256 *n256 = (RT_NODE_256 *) node;
2752 : int cnt = 0;
2753 :
2754 : /* RT_DUMP_NODE(node); */
2755 :
2756 : for (int i = 0; i < RT_BM_IDX(RT_NODE_MAX_SLOTS); i++)
2757 : cnt += bmw_popcount(n256->isset[i]);
2758 :
2759 : /*
2760 : * Check if the number of used chunk matches, accounting for
2761 : * overflow
2762 : */
2763 : if (cnt == RT_FANOUT_256)
2764 : Assert(n256->base.count == 0);
2765 : else
2766 : Assert(n256->base.count == cnt);
2767 :
2768 : break;
2769 : }
2770 : }
2771 : #endif
2772 227886 : }
2773 :
2774 : /***************** DEBUG FUNCTIONS *****************/
2775 :
2776 : #ifdef RT_DEBUG
2777 :
2778 : /*
2779 : * Print out tree stats, some of which are only collected in debugging builds.
2780 : */
2781 : RT_SCOPE void
2782 122 : RT_STATS(RT_RADIX_TREE * tree)
2783 : {
2784 122 : fprintf(stderr, "max_val = " UINT64_FORMAT "\n", tree->ctl->max_val);
2785 122 : fprintf(stderr, "num_keys = %lld\n", (long long) tree->ctl->num_keys);
2786 :
2787 : #ifdef RT_SHMEM
2788 : fprintf(stderr, "handle = " DSA_POINTER_FORMAT "\n", tree->ctl->handle);
2789 : #endif
2790 :
2791 122 : fprintf(stderr, "height = %d", tree->ctl->start_shift / RT_SPAN);
2792 :
2793 732 : for (int i = 0; i < RT_NUM_SIZE_CLASSES; i++)
2794 : {
2795 610 : RT_SIZE_CLASS_ELEM size_class = RT_SIZE_CLASS_INFO[i];
2796 :
2797 610 : fprintf(stderr, ", n%d = %lld", size_class.fanout, (long long) tree->ctl->num_nodes[i]);
2798 : }
2799 :
2800 122 : fprintf(stderr, ", leaves = %lld", (long long) tree->ctl->num_leaves);
2801 :
2802 122 : fprintf(stderr, "\n");
2803 122 : }
2804 :
2805 : /*
2806 : * Print out debugging information about the given node.
2807 : */
2808 : static void
2809 : pg_attribute_unused()
2810 0 : RT_DUMP_NODE(RT_NODE * node)
2811 : {
2812 : #ifdef RT_SHMEM
2813 : #define RT_CHILD_PTR_FORMAT DSA_POINTER_FORMAT
2814 : #else
2815 : #define RT_CHILD_PTR_FORMAT "%p"
2816 : #endif
2817 :
2818 0 : fprintf(stderr, "kind %d, fanout %d, count %u\n",
2819 0 : (node->kind == RT_NODE_KIND_4) ? 4 :
2820 0 : (node->kind == RT_NODE_KIND_16) ? 16 :
2821 0 : (node->kind == RT_NODE_KIND_48) ? 48 : 256,
2822 0 : node->fanout == 0 ? 256 : node->fanout,
2823 0 : node->count == 0 ? 256 : node->count);
2824 :
2825 0 : switch (node->kind)
2826 : {
2827 0 : case RT_NODE_KIND_4:
2828 : {
2829 0 : RT_NODE_4 *n4 = (RT_NODE_4 *) node;
2830 :
2831 0 : fprintf(stderr, "chunks and slots:\n");
2832 0 : for (int i = 0; i < n4->base.count; i++)
2833 : {
2834 0 : fprintf(stderr, " [%d] chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2835 0 : i, n4->chunks[i], n4->children[i]);
2836 : }
2837 :
2838 0 : break;
2839 : }
2840 0 : case RT_NODE_KIND_16:
2841 : {
2842 0 : RT_NODE_16 *n16 = (RT_NODE_16 *) node;
2843 :
2844 0 : fprintf(stderr, "chunks and slots:\n");
2845 0 : for (int i = 0; i < n16->base.count; i++)
2846 : {
2847 0 : fprintf(stderr, " [%d] chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2848 0 : i, n16->chunks[i], n16->children[i]);
2849 : }
2850 0 : break;
2851 : }
2852 0 : case RT_NODE_KIND_48:
2853 : {
2854 0 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
2855 0 : char *sep = "";
2856 :
2857 0 : fprintf(stderr, "slot_idxs: \n");
2858 0 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2859 : {
2860 0 : if (!RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2861 0 : continue;
2862 :
2863 0 : fprintf(stderr, " idx[%d] = %d\n",
2864 0 : chunk, n48->slot_idxs[chunk]);
2865 : }
2866 :
2867 0 : fprintf(stderr, "isset-bitmap: ");
2868 0 : for (int i = 0; i < (RT_FANOUT_48_MAX / BITS_PER_BYTE); i++)
2869 : {
2870 0 : fprintf(stderr, "%s%x", sep, ((uint8 *) n48->isset)[i]);
2871 0 : sep = " ";
2872 : }
2873 0 : fprintf(stderr, "\n");
2874 :
2875 0 : fprintf(stderr, "chunks and slots:\n");
2876 0 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2877 : {
2878 0 : if (!RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2879 0 : continue;
2880 :
2881 0 : fprintf(stderr, " chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2882 : chunk,
2883 0 : *RT_NODE_48_GET_CHILD(n48, chunk));
2884 : }
2885 0 : break;
2886 : }
2887 0 : case RT_NODE_KIND_256:
2888 : {
2889 0 : RT_NODE_256 *n256 = (RT_NODE_256 *) node;
2890 0 : char *sep = "";
2891 :
2892 0 : fprintf(stderr, "isset-bitmap: ");
2893 0 : for (int i = 0; i < (RT_FANOUT_256 / BITS_PER_BYTE); i++)
2894 : {
2895 0 : fprintf(stderr, "%s%x", sep, ((uint8 *) n256->isset)[i]);
2896 0 : sep = " ";
2897 : }
2898 0 : fprintf(stderr, "\n");
2899 :
2900 0 : fprintf(stderr, "chunks and slots:\n");
2901 0 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2902 : {
2903 0 : if (!RT_NODE_256_IS_CHUNK_USED(n256, chunk))
2904 0 : continue;
2905 :
2906 0 : fprintf(stderr, " chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2907 : chunk,
2908 0 : *RT_NODE_256_GET_CHILD(n256, chunk));
2909 : }
2910 0 : break;
2911 : }
2912 : }
2913 0 : }
2914 : #endif /* RT_DEBUG */
2915 :
2916 : #endif /* RT_DEFINE */
2917 :
2918 :
2919 : /* undefine external parameters, so next radix tree can be defined */
2920 : #undef RT_PREFIX
2921 : #undef RT_SCOPE
2922 : #undef RT_DECLARE
2923 : #undef RT_DEFINE
2924 : #undef RT_VALUE_TYPE
2925 : #undef RT_VARLEN_VALUE_SIZE
2926 : #undef RT_RUNTIME_EMBEDDABLE_VALUE
2927 : #undef RT_SHMEM
2928 : #undef RT_USE_DELETE
2929 : #undef RT_DEBUG
2930 :
2931 : /* locally declared macros */
2932 : #undef RT_MAKE_PREFIX
2933 : #undef RT_MAKE_NAME
2934 : #undef RT_MAKE_NAME_
2935 : #undef RT_STR
2936 : #undef RT_STR_
2937 : #undef RT_SPAN
2938 : #undef RT_NODE_MAX_SLOTS
2939 : #undef RT_CHUNK_MASK
2940 : #undef RT_MAX_SHIFT
2941 : #undef RT_MAX_LEVEL
2942 : #undef RT_GET_KEY_CHUNK
2943 : #undef RT_BM_IDX
2944 : #undef RT_BM_BIT
2945 : #undef RT_NODE_MUST_GROW
2946 : #undef RT_NODE_KIND_COUNT
2947 : #undef RT_NUM_SIZE_CLASSES
2948 : #undef RT_INVALID_SLOT_IDX
2949 : #undef RT_SLAB_BLOCK_SIZE
2950 : #undef RT_RADIX_TREE_MAGIC
2951 : #undef RT_CHILD_PTR_FORMAT
2952 :
2953 : /* type declarations */
2954 : #undef RT_RADIX_TREE
2955 : #undef RT_RADIX_TREE_CONTROL
2956 : #undef RT_CHILD_PTR
2957 : #undef RT_PTR_ALLOC
2958 : #undef RT_INVALID_PTR_ALLOC
2959 : #undef RT_HANDLE
2960 : #undef RT_ITER
2961 : #undef RT_NODE
2962 : #undef RT_NODE_ITER
2963 : #undef RT_NODE_KIND_4
2964 : #undef RT_NODE_KIND_16
2965 : #undef RT_NODE_KIND_48
2966 : #undef RT_NODE_KIND_256
2967 : #undef RT_NODE_4
2968 : #undef RT_NODE_16
2969 : #undef RT_NODE_48
2970 : #undef RT_NODE_256
2971 : #undef RT_SIZE_CLASS
2972 : #undef RT_SIZE_CLASS_ELEM
2973 : #undef RT_SIZE_CLASS_INFO
2974 : #undef RT_CLASS_4
2975 : #undef RT_CLASS_16_LO
2976 : #undef RT_CLASS_16_HI
2977 : #undef RT_CLASS_48
2978 : #undef RT_CLASS_256
2979 : #undef RT_FANOUT_4
2980 : #undef RT_FANOUT_4_MAX
2981 : #undef RT_FANOUT_16_LO
2982 : #undef RT_FANOUT_16_HI
2983 : #undef RT_FANOUT_16_MAX
2984 : #undef RT_FANOUT_48
2985 : #undef RT_FANOUT_48_MAX
2986 : #undef RT_FANOUT_256
2987 :
2988 : /* function declarations */
2989 : #undef RT_CREATE
2990 : #undef RT_FREE
2991 : #undef RT_ATTACH
2992 : #undef RT_DETACH
2993 : #undef RT_LOCK_EXCLUSIVE
2994 : #undef RT_LOCK_SHARE
2995 : #undef RT_UNLOCK
2996 : #undef RT_GET_HANDLE
2997 : #undef RT_FIND
2998 : #undef RT_SET
2999 : #undef RT_BEGIN_ITERATE
3000 : #undef RT_ITERATE_NEXT
3001 : #undef RT_END_ITERATE
3002 : #undef RT_DELETE
3003 : #undef RT_MEMORY_USAGE
3004 : #undef RT_DUMP_NODE
3005 : #undef RT_STATS
3006 :
3007 : /* internal helper functions */
3008 : #undef RT_GET_VALUE_SIZE
3009 : #undef RT_VALUE_IS_EMBEDDABLE
3010 : #undef RT_CHILDPTR_IS_VALUE
3011 : #undef RT_GET_SLOT_RECURSIVE
3012 : #undef RT_DELETE_RECURSIVE
3013 : #undef RT_ALLOC_NODE
3014 : #undef RT_ALLOC_LEAF
3015 : #undef RT_FREE_NODE
3016 : #undef RT_FREE_LEAF
3017 : #undef RT_FREE_RECURSE
3018 : #undef RT_EXTEND_UP
3019 : #undef RT_EXTEND_DOWN
3020 : #undef RT_COPY_COMMON
3021 : #undef RT_PTR_SET_LOCAL
3022 : #undef RT_PTR_ALLOC_IS_VALID
3023 : #undef RT_NODE_16_SEARCH_EQ
3024 : #undef RT_NODE_4_GET_INSERTPOS
3025 : #undef RT_NODE_16_GET_INSERTPOS
3026 : #undef RT_SHIFT_ARRAYS_FOR_INSERT
3027 : #undef RT_SHIFT_ARRAYS_AND_DELETE
3028 : #undef RT_COPY_ARRAYS_FOR_INSERT
3029 : #undef RT_COPY_ARRAYS_AND_DELETE
3030 : #undef RT_NODE_48_IS_CHUNK_USED
3031 : #undef RT_NODE_48_GET_CHILD
3032 : #undef RT_NODE_256_IS_CHUNK_USED
3033 : #undef RT_NODE_256_GET_CHILD
3034 : #undef RT_KEY_GET_SHIFT
3035 : #undef RT_SHIFT_GET_MAX_VAL
3036 : #undef RT_NODE_SEARCH
3037 : #undef RT_ADD_CHILD_4
3038 : #undef RT_ADD_CHILD_16
3039 : #undef RT_ADD_CHILD_48
3040 : #undef RT_ADD_CHILD_256
3041 : #undef RT_GROW_NODE_4
3042 : #undef RT_GROW_NODE_16
3043 : #undef RT_GROW_NODE_48
3044 : #undef RT_REMOVE_CHILD_4
3045 : #undef RT_REMOVE_CHILD_16
3046 : #undef RT_REMOVE_CHILD_48
3047 : #undef RT_REMOVE_CHILD_256
3048 : #undef RT_SHRINK_NODE_16
3049 : #undef RT_SHRINK_NODE_48
3050 : #undef RT_SHRINK_NODE_256
3051 : #undef RT_NODE_DELETE
3052 : #undef RT_NODE_INSERT
3053 : #undef RT_NODE_ITERATE_NEXT
3054 : #undef RT_VERIFY_NODE
|