Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * radixtree.h
4 : * Template for adaptive radix tree.
5 : *
6 : * A template to generate an "adaptive radix tree", specialized for value
7 : * types and for local/shared memory.
8 : *
9 : * The concept originates from the paper "The Adaptive Radix Tree: ARTful
10 : * Indexing for Main-Memory Databases" by Viktor Leis, Alfons Kemper,
11 : * and Thomas Neumann, 2013.
12 : *
13 : * Radix trees have some advantages over hash tables:
14 : * - The keys are logically ordered, allowing efficient sorted iteration
15 : * and range queries
16 : * - Operations using keys that are lexicographically close together
17 : * will have favorable memory locality
18 : * - Memory use grows gradually rather than by doubling
19 : * - The key does not need to be stored with the value, since the key
20 : * is implicitly contained in the path to the value
21 : *
22 : * Some disadvantages are:
23 : * - Point queries (along with insertion and deletion) are slower than
24 : * a linear probing hash table as in simplehash.h
25 : * - Memory usage varies by key distribution, so is difficult to predict
26 : *
27 : * A classic radix tree consists of nodes, each containing an array of
28 : * pointers to child nodes. The size of the array is determined by the
29 : * "span" of the tree, which is the number of bits of the key used to
30 : * index into the array. For example, with a span of 6, a "chunk"
31 : * of 6 bits is extracted from the key at each node traversal, and
32 : * the arrays thus have a "fanout" of 2^6 or 64 entries. A large span
33 : * allows a shorter tree, but requires larger arrays that may be mostly
34 : * wasted space.
35 : *
36 : * The key idea of the adaptive radix tree is to choose different
37 : * data structures based on the number of child nodes. A node will
38 : * start out small when it is first populated, and when it is full,
39 : * it is replaced by the next larger size. Conversely, when a node
40 : * becomes mostly empty, it is replaced by the next smaller node. The
41 : * bulk of the code complexity in this module stems from this dynamic
42 : * switching. One mitigating factor is using a span of 8, since bytes
43 : * are directly addressable.
44 : *
45 : * The ART paper mentions three ways to implement leaves:
46 : *
47 : * "- Single-value leaves: The values are stored using an addi-
48 : * tional leaf node type which stores one value.
49 : * - Multi-value leaves: The values are stored in one of four
50 : * different leaf node types, which mirror the structure of
51 : * inner nodes, but contain values instead of pointers.
52 : * - Combined pointer/value slots: If values fit into point-
53 : * ers, no separate node types are necessary. Instead, each
54 : * pointer storage location in an inner node can either
55 : * store a pointer or a value."
56 : *
57 : * We use a form of "combined pointer/value slots", as recommended. Values
58 : * of size (if fixed at compile time) equal or smaller than the platform's
59 : * pointer type are stored in the child slots of the last level node,
60 : * while larger values are the same as "single-value" leaves above. This
61 : * offers flexibility and efficiency. Variable-length types are currently
62 : * treated as single-value leaves for simplicity, but future work may
63 : * allow those to be stored in the child pointer arrays, when they're
64 : * small enough.
65 : *
66 : * There are two other techniques described in the paper that are not
67 : * implemented here:
68 : * - path compression "...removes all inner nodes that have only a single child."
69 : * - lazy path expansion "...inner nodes are only created if they are required
70 : * to distinguish at least two leaf nodes."
71 : *
72 : * We do have a form of "poor man's path compression", however, enabled by
73 : * only supporting unsigned integer keys (for now assumed to be 64-bit):
74 : * A tree doesn't contain paths where the highest bytes of all keys are
75 : * zero. That way, the tree's height adapts to the distribution of keys.
76 : *
77 : * To handle concurrency, we use a single reader-writer lock for the
78 : * radix tree. If concurrent write operations are possible, the tree
79 : * must be exclusively locked during write operations such as RT_SET()
80 : * and RT_DELETE(), and share locked during read operations such as
81 : * RT_FIND() and RT_BEGIN_ITERATE().
82 : *
83 : * TODO: The current locking mechanism is not optimized for high
84 : * concurrency with mixed read-write workloads. In the future it might
85 : * be worthwhile to replace it with the Optimistic Lock Coupling or
86 : * ROWEX mentioned in the paper "The ART of Practical Synchronization"
87 : * by the same authors as the ART paper, 2016.
88 : *
89 : * To generate a radix tree and associated functions for a use case
90 : * several macros have to be #define'ed before this file is included.
91 : * Including the file #undef's all those, so a new radix tree can be
92 : * generated afterwards.
93 : *
94 : * The relevant parameters are:
95 : * - RT_PREFIX - prefix for all symbol names generated. A prefix of "foo"
96 : * will result in radix tree type "foo_radix_tree" and functions like
97 : * "foo_create"/"foo_free" and so forth.
98 : * - RT_DECLARE - if defined function prototypes and type declarations are
99 : * generated
100 : * - RT_DEFINE - if defined function definitions are generated
101 : * - RT_SCOPE - in which scope (e.g. extern, static inline) do function
102 : * declarations reside
103 : * - RT_VALUE_TYPE - the type of the value.
104 : * - RT_VARLEN_VALUE_SIZE() - for variable length values, an expression
105 : * involving a pointer to the value type, to calculate size.
106 : * NOTE: implies that the value is in fact variable-length,
107 : * so do not set for fixed-length values.
108 : * - RT_RUNTIME_EMBEDDABLE_VALUE - for variable length values, allows
109 : * storing the value in a child pointer slot, rather than as a single-
110 : * value leaf, if small enough. This requires that the value, when
111 : * read as a child pointer, can be tagged in the lowest bit.
112 : *
113 : * Optional parameters:
114 : * - RT_SHMEM - if defined, the radix tree is created in the DSA area
115 : * so that multiple processes can access it simultaneously.
116 : * - RT_DEBUG - if defined add stats tracking and debugging functions
117 : *
118 : * Interface
119 : * ---------
120 : *
121 : * RT_CREATE - Create a new, empty radix tree
122 : * RT_FREE - Free the radix tree
123 : * RT_FIND - Lookup the value for a given key
124 : * RT_SET - Set a key-value pair
125 : * RT_BEGIN_ITERATE - Begin iterating through all key-value pairs
126 : * RT_ITERATE_NEXT - Return next key-value pair, if any
127 : * RT_END_ITERATE - End iteration
128 : * RT_MEMORY_USAGE - Get the memory as measured by space in memory context blocks
129 : *
130 : * Interface for Shared Memory
131 : * ---------
132 : *
133 : * RT_ATTACH - Attach to the radix tree
134 : * RT_DETACH - Detach from the radix tree
135 : * RT_LOCK_EXCLUSIVE - Lock the radix tree in exclusive mode
136 : * RT_LOCK_SHARE - Lock the radix tree in share mode
137 : * RT_UNLOCK - Unlock the radix tree
138 : * RT_GET_HANDLE - Return the handle of the radix tree
139 : *
140 : * Optional Interface
141 : * ---------
142 : *
143 : * RT_DELETE - Delete a key-value pair. Declared/defined if RT_USE_DELETE is defined
144 : *
145 : *
146 : * Copyright (c) 2024, PostgreSQL Global Development Group
147 : *
148 : * IDENTIFICATION
149 : * src/include/lib/radixtree.h
150 : *
151 : *-------------------------------------------------------------------------
152 : */
153 :
154 : #include "postgres.h"
155 :
156 : #include "nodes/bitmapset.h"
157 : #include "port/pg_bitutils.h"
158 : #include "port/simd.h"
159 : #include "utils/dsa.h"
160 : #include "utils/memutils.h"
161 :
162 : /* helpers */
163 : #define RT_MAKE_PREFIX(a) CppConcat(a,_)
164 : #define RT_MAKE_NAME(name) RT_MAKE_NAME_(RT_MAKE_PREFIX(RT_PREFIX),name)
165 : #define RT_MAKE_NAME_(a,b) CppConcat(a,b)
166 : /*
167 : * stringify a macro constant, from https://gcc.gnu.org/onlinedocs/cpp/Stringizing.html
168 : */
169 : #define RT_STR(s) RT_STR_(s)
170 : #define RT_STR_(s) #s
171 :
172 : /* function declarations */
173 : #define RT_CREATE RT_MAKE_NAME(create)
174 : #define RT_FREE RT_MAKE_NAME(free)
175 : #define RT_FIND RT_MAKE_NAME(find)
176 : #ifdef RT_SHMEM
177 : #define RT_ATTACH RT_MAKE_NAME(attach)
178 : #define RT_DETACH RT_MAKE_NAME(detach)
179 : #define RT_GET_HANDLE RT_MAKE_NAME(get_handle)
180 : #define RT_LOCK_EXCLUSIVE RT_MAKE_NAME(lock_exclusive)
181 : #define RT_LOCK_SHARE RT_MAKE_NAME(lock_share)
182 : #define RT_UNLOCK RT_MAKE_NAME(unlock)
183 : #endif
184 : #define RT_SET RT_MAKE_NAME(set)
185 : #define RT_BEGIN_ITERATE RT_MAKE_NAME(begin_iterate)
186 : #define RT_ITERATE_NEXT RT_MAKE_NAME(iterate_next)
187 : #define RT_END_ITERATE RT_MAKE_NAME(end_iterate)
188 : #ifdef RT_USE_DELETE
189 : #define RT_DELETE RT_MAKE_NAME(delete)
190 : #endif
191 : #define RT_MEMORY_USAGE RT_MAKE_NAME(memory_usage)
192 : #define RT_DUMP_NODE RT_MAKE_NAME(dump_node)
193 : #define RT_STATS RT_MAKE_NAME(stats)
194 :
195 : /* internal helper functions (no externally visible prototypes) */
196 : #define RT_CHILDPTR_IS_VALUE RT_MAKE_NAME(childptr_is_value)
197 : #define RT_VALUE_IS_EMBEDDABLE RT_MAKE_NAME(value_is_embeddable)
198 : #define RT_GET_SLOT_RECURSIVE RT_MAKE_NAME(get_slot_recursive)
199 : #define RT_DELETE_RECURSIVE RT_MAKE_NAME(delete_recursive)
200 : #define RT_ALLOC_NODE RT_MAKE_NAME(alloc_node)
201 : #define RT_ALLOC_LEAF RT_MAKE_NAME(alloc_leaf)
202 : #define RT_FREE_NODE RT_MAKE_NAME(free_node)
203 : #define RT_FREE_LEAF RT_MAKE_NAME(free_leaf)
204 : #define RT_FREE_RECURSE RT_MAKE_NAME(free_recurse)
205 : #define RT_EXTEND_UP RT_MAKE_NAME(extend_up)
206 : #define RT_EXTEND_DOWN RT_MAKE_NAME(extend_down)
207 : #define RT_COPY_COMMON RT_MAKE_NAME(copy_common)
208 : #define RT_PTR_SET_LOCAL RT_MAKE_NAME(ptr_set_local)
209 : #define RT_NODE_16_SEARCH_EQ RT_MAKE_NAME(node_16_search_eq)
210 : #define RT_NODE_4_GET_INSERTPOS RT_MAKE_NAME(node_4_get_insertpos)
211 : #define RT_NODE_16_GET_INSERTPOS RT_MAKE_NAME(node_16_get_insertpos)
212 : #define RT_SHIFT_ARRAYS_FOR_INSERT RT_MAKE_NAME(shift_arrays_for_insert)
213 : #define RT_SHIFT_ARRAYS_AND_DELETE RT_MAKE_NAME(shift_arrays_and_delete)
214 : #define RT_COPY_ARRAYS_FOR_INSERT RT_MAKE_NAME(copy_arrays_for_insert)
215 : #define RT_COPY_ARRAYS_AND_DELETE RT_MAKE_NAME(copy_arrays_and_delete)
216 : #define RT_NODE_48_IS_CHUNK_USED RT_MAKE_NAME(node_48_is_chunk_used)
217 : #define RT_NODE_48_GET_CHILD RT_MAKE_NAME(node_48_get_child)
218 : #define RT_NODE_256_IS_CHUNK_USED RT_MAKE_NAME(node_256_is_chunk_used)
219 : #define RT_NODE_256_GET_CHILD RT_MAKE_NAME(node_256_get_child)
220 : #define RT_KEY_GET_SHIFT RT_MAKE_NAME(key_get_shift)
221 : #define RT_SHIFT_GET_MAX_VAL RT_MAKE_NAME(shift_get_max_val)
222 : #define RT_NODE_SEARCH RT_MAKE_NAME(node_search)
223 : #define RT_NODE_DELETE RT_MAKE_NAME(node_delete)
224 : #define RT_NODE_INSERT RT_MAKE_NAME(node_insert)
225 : #define RT_ADD_CHILD_4 RT_MAKE_NAME(add_child_4)
226 : #define RT_ADD_CHILD_16 RT_MAKE_NAME(add_child_16)
227 : #define RT_ADD_CHILD_48 RT_MAKE_NAME(add_child_48)
228 : #define RT_ADD_CHILD_256 RT_MAKE_NAME(add_child_256)
229 : #define RT_GROW_NODE_4 RT_MAKE_NAME(grow_node_4)
230 : #define RT_GROW_NODE_16 RT_MAKE_NAME(grow_node_16)
231 : #define RT_GROW_NODE_48 RT_MAKE_NAME(grow_node_48)
232 : #define RT_REMOVE_CHILD_4 RT_MAKE_NAME(remove_child_4)
233 : #define RT_REMOVE_CHILD_16 RT_MAKE_NAME(remove_child_16)
234 : #define RT_REMOVE_CHILD_48 RT_MAKE_NAME(remove_child_48)
235 : #define RT_REMOVE_CHILD_256 RT_MAKE_NAME(remove_child_256)
236 : #define RT_SHRINK_NODE_16 RT_MAKE_NAME(shrink_child_16)
237 : #define RT_SHRINK_NODE_48 RT_MAKE_NAME(shrink_child_48)
238 : #define RT_SHRINK_NODE_256 RT_MAKE_NAME(shrink_child_256)
239 : #define RT_NODE_ITERATE_NEXT RT_MAKE_NAME(node_iterate_next)
240 : #define RT_VERIFY_NODE RT_MAKE_NAME(verify_node)
241 :
242 : /* type declarations */
243 : #define RT_RADIX_TREE RT_MAKE_NAME(radix_tree)
244 : #define RT_RADIX_TREE_CONTROL RT_MAKE_NAME(radix_tree_control)
245 : #define RT_ITER RT_MAKE_NAME(iter)
246 : #ifdef RT_SHMEM
247 : #define RT_HANDLE RT_MAKE_NAME(handle)
248 : #endif
249 : #define RT_NODE RT_MAKE_NAME(node)
250 : #define RT_CHILD_PTR RT_MAKE_NAME(child_ptr)
251 : #define RT_NODE_ITER RT_MAKE_NAME(node_iter)
252 : #define RT_NODE_4 RT_MAKE_NAME(node_4)
253 : #define RT_NODE_16 RT_MAKE_NAME(node_16)
254 : #define RT_NODE_48 RT_MAKE_NAME(node_48)
255 : #define RT_NODE_256 RT_MAKE_NAME(node_256)
256 : #define RT_SIZE_CLASS RT_MAKE_NAME(size_class)
257 : #define RT_SIZE_CLASS_ELEM RT_MAKE_NAME(size_class_elem)
258 : #define RT_SIZE_CLASS_INFO RT_MAKE_NAME(size_class_info)
259 : #define RT_CLASS_4 RT_MAKE_NAME(class_4)
260 : #define RT_CLASS_16_LO RT_MAKE_NAME(class_32_min)
261 : #define RT_CLASS_16_HI RT_MAKE_NAME(class_32_max)
262 : #define RT_CLASS_48 RT_MAKE_NAME(class_48)
263 : #define RT_CLASS_256 RT_MAKE_NAME(class_256)
264 :
265 : /* generate forward declarations necessary to use the radix tree */
266 : #ifdef RT_DECLARE
267 :
268 : typedef struct RT_RADIX_TREE RT_RADIX_TREE;
269 : typedef struct RT_ITER RT_ITER;
270 :
271 : #ifdef RT_SHMEM
272 : typedef dsa_pointer RT_HANDLE;
273 : #endif
274 :
275 : #ifdef RT_SHMEM
276 : RT_SCOPE RT_RADIX_TREE *RT_CREATE(MemoryContext ctx, dsa_area *dsa, int tranche_id);
277 : RT_SCOPE RT_RADIX_TREE *RT_ATTACH(dsa_area *dsa, dsa_pointer dp);
278 : RT_SCOPE void RT_DETACH(RT_RADIX_TREE * tree);
279 : RT_SCOPE RT_HANDLE RT_GET_HANDLE(RT_RADIX_TREE * tree);
280 : RT_SCOPE void RT_LOCK_EXCLUSIVE(RT_RADIX_TREE * tree);
281 : RT_SCOPE void RT_LOCK_SHARE(RT_RADIX_TREE * tree);
282 : RT_SCOPE void RT_UNLOCK(RT_RADIX_TREE * tree);
283 : #else
284 : RT_SCOPE RT_RADIX_TREE *RT_CREATE(MemoryContext ctx);
285 : #endif
286 : RT_SCOPE void RT_FREE(RT_RADIX_TREE * tree);
287 :
288 : RT_SCOPE RT_VALUE_TYPE *RT_FIND(RT_RADIX_TREE * tree, uint64 key);
289 : RT_SCOPE bool RT_SET(RT_RADIX_TREE * tree, uint64 key, RT_VALUE_TYPE * value_p);
290 :
291 : #ifdef RT_USE_DELETE
292 : RT_SCOPE bool RT_DELETE(RT_RADIX_TREE * tree, uint64 key);
293 : #endif
294 :
295 : RT_SCOPE RT_ITER *RT_BEGIN_ITERATE(RT_RADIX_TREE * tree);
296 : RT_SCOPE RT_VALUE_TYPE *RT_ITERATE_NEXT(RT_ITER * iter, uint64 *key_p);
297 : RT_SCOPE void RT_END_ITERATE(RT_ITER * iter);
298 :
299 : RT_SCOPE uint64 RT_MEMORY_USAGE(RT_RADIX_TREE * tree);
300 :
301 : #ifdef RT_DEBUG
302 : RT_SCOPE void RT_STATS(RT_RADIX_TREE * tree);
303 : #endif
304 :
305 : #endif /* RT_DECLARE */
306 :
307 :
308 : /* generate implementation of the radix tree */
309 : #ifdef RT_DEFINE
310 :
311 : /* The number of bits encoded in one tree level */
312 : #define RT_SPAN BITS_PER_BYTE
313 :
314 : /*
315 : * The number of possible partial keys, and thus the maximum number of
316 : * child pointers, for a node.
317 : */
318 : #define RT_NODE_MAX_SLOTS (1 << RT_SPAN)
319 :
320 : /* Mask for extracting a chunk from a key */
321 : #define RT_CHUNK_MASK ((1 << RT_SPAN) - 1)
322 :
323 : /* Maximum shift needed to extract a chunk from a key */
324 : #define RT_MAX_SHIFT RT_KEY_GET_SHIFT(UINT64_MAX)
325 :
326 : /* Maximum level a tree can reach for a key */
327 : #define RT_MAX_LEVEL ((sizeof(uint64) * BITS_PER_BYTE) / RT_SPAN)
328 :
329 : /* Get a chunk from the key */
330 : #define RT_GET_KEY_CHUNK(key, shift) ((uint8) (((key) >> (shift)) & RT_CHUNK_MASK))
331 :
332 : /* For accessing bitmaps */
333 : #define RT_BM_IDX(x) ((x) / BITS_PER_BITMAPWORD)
334 : #define RT_BM_BIT(x) ((x) % BITS_PER_BITMAPWORD)
335 :
336 : /*
337 : * Node kinds
338 : *
339 : * The different node kinds are what make the tree "adaptive".
340 : *
341 : * Each node kind is associated with a different datatype and different
342 : * search/set/delete/iterate algorithms adapted for its size. The largest
343 : * kind, node256 is basically the same as a traditional radix tree,
344 : * and would be most wasteful of memory when sparsely populated. The
345 : * smaller nodes expend some additional CPU time to enable a smaller
346 : * memory footprint.
347 : *
348 : * NOTE: There are 4 node kinds, and this should never be increased,
349 : * for several reasons:
350 : * 1. With 5 or more kinds, gcc tends to use a jump table for switch
351 : * statements.
352 : * 2. The 4 kinds can be represented with 2 bits, so we have the option
353 : * in the future to tag the node pointer with the kind, even on
354 : * platforms with 32-bit pointers. That would touch fewer cache lines
355 : * during traversal and allow faster recovery from branch mispredicts.
356 : * 3. We can have multiple size classes per node kind.
357 : */
358 : #define RT_NODE_KIND_4 0x00
359 : #define RT_NODE_KIND_16 0x01
360 : #define RT_NODE_KIND_48 0x02
361 : #define RT_NODE_KIND_256 0x03
362 : #define RT_NODE_KIND_COUNT 4
363 :
364 : /*
365 : * Calculate the slab block size so that we can allocate at least 32 chunks
366 : * from the block.
367 : */
368 : #define RT_SLAB_BLOCK_SIZE(size) \
369 : Max(SLAB_DEFAULT_BLOCK_SIZE, pg_nextpower2_32(size * 32))
370 :
371 : /* Common header for all nodes */
372 : typedef struct RT_NODE
373 : {
374 : /* Node kind, one per search/set algorithm */
375 : uint8 kind;
376 :
377 : /*
378 : * Max capacity for the current size class. Storing this in the node
379 : * enables multiple size classes per node kind. uint8 is sufficient for
380 : * all node kinds, because we only use this number to test if the node
381 : * needs to grow. Since node256 never needs to grow, we let this overflow
382 : * to zero.
383 : */
384 : uint8 fanout;
385 :
386 : /*
387 : * Number of children. uint8 is sufficient for all node kinds, because
388 : * nodes shrink when this number gets lower than some threshold. Since
389 : * node256 cannot possibly have zero children, we let the counter overflow
390 : * and we interpret zero as "256" for this node kind.
391 : */
392 : uint8 count;
393 : } RT_NODE;
394 :
395 :
396 : /* pointer returned by allocation */
397 : #ifdef RT_SHMEM
398 : #define RT_PTR_ALLOC dsa_pointer
399 : #define RT_INVALID_PTR_ALLOC InvalidDsaPointer
400 : #define RT_PTR_ALLOC_IS_VALID(ptr) DsaPointerIsValid(ptr)
401 : #else
402 : #define RT_PTR_ALLOC RT_NODE *
403 : #define RT_INVALID_PTR_ALLOC NULL
404 : #define RT_PTR_ALLOC_IS_VALID(ptr) PointerIsValid(ptr)
405 : #endif
406 :
407 : /*
408 : * A convenience type used when we need to work with a DSA pointer as well
409 : * as its local pointer. For local memory, both members are the same, so
410 : * we use a union.
411 : */
412 : #ifdef RT_SHMEM
413 : typedef struct RT_CHILD_PTR
414 : #else
415 : typedef union RT_CHILD_PTR
416 : #endif
417 : {
418 : RT_PTR_ALLOC alloc;
419 : RT_NODE *local;
420 : } RT_CHILD_PTR;
421 :
422 :
423 : /*
424 : * Helper macros and functions for value storage.
425 : * We either embed values in the child slots of the last level
426 : * node or store pointers to values to the child slots,
427 : * depending on the value size.
428 : */
429 :
430 : #ifdef RT_VARLEN_VALUE_SIZE
431 : #define RT_GET_VALUE_SIZE(v) RT_VARLEN_VALUE_SIZE(v)
432 : #else
433 : #define RT_GET_VALUE_SIZE(v) sizeof(RT_VALUE_TYPE)
434 : #endif
435 :
436 : /*
437 : * Return true if the value can be stored in the child array
438 : * of the lowest-level node, false otherwise.
439 : */
440 : static inline bool
441 235596 : RT_VALUE_IS_EMBEDDABLE(RT_VALUE_TYPE * value_p)
442 : {
443 : #ifdef RT_VARLEN_VALUE_SIZE
444 :
445 : #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
446 22528 : return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
447 : #else
448 : return false;
449 : #endif
450 :
451 : #else
452 213068 : return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
453 : #endif
454 : }
455 :
456 : /*
457 : * Return true if the child pointer contains the value, false
458 : * if the child pointer is a leaf pointer.
459 : */
460 : static inline bool
461 8620958 : RT_CHILDPTR_IS_VALUE(RT_PTR_ALLOC child)
462 : {
463 : #ifdef RT_VARLEN_VALUE_SIZE
464 :
465 : #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
466 : /* check for pointer tag */
467 : #ifdef RT_SHMEM
468 621028 : return child & 1;
469 : #else
470 7391802 : return ((uintptr_t) child) & 1;
471 : #endif
472 :
473 : #else
474 : return false;
475 : #endif
476 :
477 : #else
478 608128 : return sizeof(RT_VALUE_TYPE) <= sizeof(RT_PTR_ALLOC);
479 : #endif
480 : }
481 :
482 : /*
483 : * Symbols for maximum possible fanout are declared first as they are
484 : * required to declare each node kind. The declarations of other fanout
485 : * values are followed as they need the struct sizes of each node kind.
486 : */
487 :
488 : /* max possible key chunks without struct padding */
489 : #define RT_FANOUT_4_MAX (8 - sizeof(RT_NODE))
490 :
491 : /* equal to two 128-bit SIMD registers, regardless of availability */
492 : #define RT_FANOUT_16_MAX 32
493 :
494 : /*
495 : * This also determines the number of bits necessary for the isset array,
496 : * so we need to be mindful of the size of bitmapword. Since bitmapword
497 : * can be 64 bits, the only values that make sense here are 64 and 128.
498 : * The ART paper uses at most 64 for this node kind, and one advantage
499 : * for us is that "isset" is a single bitmapword on most platforms,
500 : * rather than an array, allowing the compiler to get rid of loops.
501 : */
502 : #define RT_FANOUT_48_MAX 64
503 :
504 : #define RT_FANOUT_256 RT_NODE_MAX_SLOTS
505 :
506 : /*
507 : * Node structs, one for each "kind"
508 : */
509 :
510 : /*
511 : * node4 and node16 use one array for key chunks and another
512 : * array of the same length for children. The keys and children
513 : * are stored at corresponding positions, sorted by chunk.
514 : */
515 :
516 : typedef struct RT_NODE_4
517 : {
518 : RT_NODE base;
519 :
520 : uint8 chunks[RT_FANOUT_4_MAX];
521 :
522 : /* number of children depends on size class */
523 : RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
524 : } RT_NODE_4;
525 :
526 : typedef struct RT_NODE_16
527 : {
528 : RT_NODE base;
529 :
530 : uint8 chunks[RT_FANOUT_16_MAX];
531 :
532 : /* number of children depends on size class */
533 : RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
534 : } RT_NODE_16;
535 :
536 : /*
537 : * node48 uses a 256-element array indexed by key chunks. This array
538 : * stores indexes into a second array containing the children.
539 : */
540 : typedef struct RT_NODE_48
541 : {
542 : RT_NODE base;
543 :
544 : /* The index of slots for each fanout */
545 : uint8 slot_idxs[RT_NODE_MAX_SLOTS];
546 :
547 : /* Invalid index */
548 : #define RT_INVALID_SLOT_IDX 0xFF
549 :
550 : /* bitmap to track which slots are in use */
551 : bitmapword isset[RT_BM_IDX(RT_FANOUT_48_MAX)];
552 :
553 : /* number of children depends on size class */
554 : RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
555 : } RT_NODE_48;
556 :
557 : /*
558 : * node256 is the largest node type. This node has an array of
559 : * children directly indexed by chunk. Unlike other node kinds,
560 : * its array size is by definition fixed.
561 : */
562 : typedef struct RT_NODE_256
563 : {
564 : RT_NODE base;
565 :
566 : /* bitmap to track which slots are in use */
567 : bitmapword isset[RT_BM_IDX(RT_FANOUT_256)];
568 :
569 : /* slots for 256 children */
570 : RT_PTR_ALLOC children[RT_FANOUT_256];
571 : } RT_NODE_256;
572 :
573 : #if defined(RT_SHMEM)
574 : /*
575 : * Make sure the all nodes (except for node256) fit neatly into a DSA
576 : * size class. We assume the RT_FANOUT_4 is in the range where DSA size
577 : * classes increment by 8 (as of PG17 up to 64 bytes), so we just hard
578 : * code that one.
579 : */
580 :
581 : #if SIZEOF_DSA_POINTER < 8
582 : #define RT_FANOUT_16_LO ((96 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
583 : #define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
584 : #define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (512 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC))
585 : #else
586 : #define RT_FANOUT_16_LO ((160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
587 : #define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (320 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
588 : #define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (768 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC))
589 : #endif /* SIZEOF_DSA_POINTER < 8 */
590 :
591 : #else /* ! RT_SHMEM */
592 :
593 : /* doesn't really matter, but may as well use the namesake */
594 : #define RT_FANOUT_16_LO 16
595 : /* use maximum possible */
596 : #define RT_FANOUT_16_HI RT_FANOUT_16_MAX
597 : #define RT_FANOUT_48 RT_FANOUT_48_MAX
598 :
599 : #endif /* RT_SHMEM */
600 :
601 : /*
602 : * To save memory in trees with sparse keys, it would make sense to have two
603 : * size classes for the smallest kind (perhaps a high class of 5 and a low class
604 : * of 2), but it would be more effective to utilize lazy expansion and
605 : * path compression.
606 : */
607 : #define RT_FANOUT_4 4
608 :
609 : StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_MAX, "watch struct padding");
610 : StaticAssertDecl(RT_FANOUT_16_LO < RT_FANOUT_16_HI, "LO subclass bigger than HI");
611 : StaticAssertDecl(RT_FANOUT_48 <= RT_FANOUT_48_MAX, "more slots than isset bits");
612 :
613 : /*
614 : * Node size classes
615 : *
616 : * Nodes of different kinds necessarily belong to different size classes.
617 : * One innovation in our implementation compared to the ART paper is
618 : * decoupling the notion of size class from kind.
619 : *
620 : * The size classes within a given node kind have the same underlying
621 : * type, but a variable number of children/values. This is possible
622 : * because each type (except node256) contains metadata that work the
623 : * same way regardless of how many child slots there are. The nodes
624 : * can introspect their allocated capacity at runtime.
625 : */
626 : typedef enum RT_SIZE_CLASS
627 : {
628 : RT_CLASS_4 = 0,
629 : RT_CLASS_16_LO,
630 : RT_CLASS_16_HI,
631 : RT_CLASS_48,
632 : RT_CLASS_256
633 : } RT_SIZE_CLASS;
634 :
635 : /* Information for each size class */
636 : typedef struct RT_SIZE_CLASS_ELEM
637 : {
638 : const char *name;
639 : int fanout;
640 : size_t allocsize;
641 : } RT_SIZE_CLASS_ELEM;
642 :
643 :
644 : static const RT_SIZE_CLASS_ELEM RT_SIZE_CLASS_INFO[] = {
645 : [RT_CLASS_4] = {
646 : .name = RT_STR(RT_PREFIX) "_radix_tree node4",
647 : .fanout = RT_FANOUT_4,
648 : .allocsize = sizeof(RT_NODE_4) + RT_FANOUT_4 * sizeof(RT_PTR_ALLOC),
649 : },
650 : [RT_CLASS_16_LO] = {
651 : .name = RT_STR(RT_PREFIX) "_radix_tree node16_lo",
652 : .fanout = RT_FANOUT_16_LO,
653 : .allocsize = sizeof(RT_NODE_16) + RT_FANOUT_16_LO * sizeof(RT_PTR_ALLOC),
654 : },
655 : [RT_CLASS_16_HI] = {
656 : .name = RT_STR(RT_PREFIX) "_radix_tree node16_hi",
657 : .fanout = RT_FANOUT_16_HI,
658 : .allocsize = sizeof(RT_NODE_16) + RT_FANOUT_16_HI * sizeof(RT_PTR_ALLOC),
659 : },
660 : [RT_CLASS_48] = {
661 : .name = RT_STR(RT_PREFIX) "_radix_tree node48",
662 : .fanout = RT_FANOUT_48,
663 : .allocsize = sizeof(RT_NODE_48) + RT_FANOUT_48 * sizeof(RT_PTR_ALLOC),
664 : },
665 : [RT_CLASS_256] = {
666 : .name = RT_STR(RT_PREFIX) "_radix_tree node256",
667 : .fanout = RT_FANOUT_256,
668 : .allocsize = sizeof(RT_NODE_256),
669 : },
670 : };
671 :
672 : #define RT_NUM_SIZE_CLASSES lengthof(RT_SIZE_CLASS_INFO)
673 :
674 : #ifdef RT_SHMEM
675 : /* A magic value used to identify our radix tree */
676 : #define RT_RADIX_TREE_MAGIC 0x54A48167
677 : #endif
678 :
679 : /* Contains the actual tree, plus ancillary info */
680 : typedef struct RT_RADIX_TREE_CONTROL
681 : {
682 : #ifdef RT_SHMEM
683 : RT_HANDLE handle;
684 : uint32 magic;
685 : LWLock lock;
686 : #endif
687 :
688 : RT_PTR_ALLOC root;
689 : uint64 max_val;
690 : int64 num_keys;
691 : int start_shift;
692 :
693 : /* statistics */
694 : #ifdef RT_DEBUG
695 : int64 num_nodes[RT_NUM_SIZE_CLASSES];
696 : int64 num_leaves;
697 : #endif
698 : } RT_RADIX_TREE_CONTROL;
699 :
700 : /* Entry point for allocating and accessing the tree */
701 : struct RT_RADIX_TREE
702 : {
703 : MemoryContext context;
704 :
705 : /* pointing to either local memory or DSA */
706 : RT_RADIX_TREE_CONTROL *ctl;
707 :
708 : #ifdef RT_SHMEM
709 : dsa_area *dsa;
710 : #else
711 : MemoryContextData *node_slabs[RT_NUM_SIZE_CLASSES];
712 :
713 : /* leaf_context is used only for single-value leaves */
714 : MemoryContextData *leaf_context;
715 : #endif
716 : MemoryContextData *iter_context;
717 : };
718 :
719 : /*
720 : * Iteration support.
721 : *
722 : * Iterating over the radix tree produces each key/value pair in ascending
723 : * order of the key.
724 : */
725 :
726 : /* state for iterating over a single node */
727 : typedef struct RT_NODE_ITER
728 : {
729 : RT_CHILD_PTR node;
730 :
731 : /*
732 : * The next index of the chunk array in RT_NODE_KIND_4 and RT_NODE_KIND_16
733 : * nodes, or the next chunk in RT_NODE_KIND_48 and RT_NODE_KIND_256 nodes.
734 : * 0 for the initial value.
735 : */
736 : int idx;
737 : } RT_NODE_ITER;
738 :
739 : /* state for iterating over the whole radix tree */
740 : struct RT_ITER
741 : {
742 : RT_RADIX_TREE *tree;
743 :
744 : /*
745 : * A stack to track iteration for each level. Level 0 is the lowest (or
746 : * leaf) level
747 : */
748 : RT_NODE_ITER node_iters[RT_MAX_LEVEL];
749 : int top_level;
750 : int cur_level;
751 :
752 : /* The key constructed during iteration */
753 : uint64 key;
754 : };
755 :
756 :
757 : /* verification (available only in assert-enabled builds) */
758 : static void RT_VERIFY_NODE(RT_NODE * node);
759 :
760 : static inline void
761 34476084 : RT_PTR_SET_LOCAL(RT_RADIX_TREE * tree, RT_CHILD_PTR * node)
762 : {
763 : #ifdef RT_SHMEM
764 1673760 : node->local = dsa_get_address(tree->dsa, node->alloc);
765 : #endif
766 34476084 : }
767 :
768 : /* Convenience functions for node48 and node256 */
769 :
770 : /* Return true if there is an entry for "chunk" */
771 : static inline bool
772 1056970 : RT_NODE_48_IS_CHUNK_USED(RT_NODE_48 * node, uint8 chunk)
773 : {
774 1056970 : return node->slot_idxs[chunk] != RT_INVALID_SLOT_IDX;
775 : }
776 :
777 : static inline RT_PTR_ALLOC *
778 1069452 : RT_NODE_48_GET_CHILD(RT_NODE_48 * node, uint8 chunk)
779 : {
780 1069452 : return &node->children[node->slot_idxs[chunk]];
781 : }
782 :
783 : /* Return true if there is an entry for "chunk" */
784 : static inline bool
785 7761228 : RT_NODE_256_IS_CHUNK_USED(RT_NODE_256 * node, uint8 chunk)
786 : {
787 7761228 : int idx = RT_BM_IDX(chunk);
788 7761228 : int bitnum = RT_BM_BIT(chunk);
789 :
790 7761228 : return (node->isset[idx] & ((bitmapword) 1 << bitnum)) != 0;
791 : }
792 :
793 : static inline RT_PTR_ALLOC *
794 7639772 : RT_NODE_256_GET_CHILD(RT_NODE_256 * node, uint8 chunk)
795 : {
796 : Assert(RT_NODE_256_IS_CHUNK_USED(node, chunk));
797 7639772 : return &node->children[chunk];
798 : }
799 :
800 : /*
801 : * Return the smallest shift that will allowing storing the given key.
802 : */
803 : static inline int
804 20264 : RT_KEY_GET_SHIFT(uint64 key)
805 : {
806 20264 : if (key == 0)
807 0 : return 0;
808 : else
809 20264 : return (pg_leftmost_one_pos64(key) / RT_SPAN) * RT_SPAN;
810 : }
811 :
812 : /*
813 : * Return the max value that can be stored in the tree with the given shift.
814 : */
815 : static uint64
816 20200 : RT_SHIFT_GET_MAX_VAL(int shift)
817 : {
818 20200 : if (shift == RT_MAX_SHIFT)
819 20 : return UINT64_MAX;
820 : else
821 20180 : return (UINT64CONST(1) << (shift + RT_SPAN)) - 1;
822 : }
823 :
824 : /*
825 : * Allocate a new node with the given node kind and size class.
826 : */
827 : static inline RT_CHILD_PTR
828 73134 : RT_ALLOC_NODE(RT_RADIX_TREE * tree, const uint8 kind, const RT_SIZE_CLASS size_class)
829 : {
830 : RT_CHILD_PTR allocnode;
831 : RT_NODE *node;
832 : size_t allocsize;
833 :
834 73134 : allocsize = RT_SIZE_CLASS_INFO[size_class].allocsize;
835 :
836 : #ifdef RT_SHMEM
837 62 : allocnode.alloc = dsa_allocate(tree->dsa, allocsize);
838 : #else
839 73072 : allocnode.alloc = (RT_PTR_ALLOC) MemoryContextAlloc(tree->node_slabs[size_class],
840 : allocsize);
841 : #endif
842 :
843 73134 : RT_PTR_SET_LOCAL(tree, &allocnode);
844 73134 : node = allocnode.local;
845 :
846 : /* initialize contents */
847 :
848 73134 : memset(node, 0, sizeof(RT_NODE));
849 73134 : switch (kind)
850 : {
851 68750 : case RT_NODE_KIND_4:
852 : case RT_NODE_KIND_16:
853 68750 : break;
854 4274 : case RT_NODE_KIND_48:
855 : {
856 4274 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
857 :
858 4274 : memset(n48->isset, 0, sizeof(n48->isset));
859 4274 : memset(n48->slot_idxs, RT_INVALID_SLOT_IDX, sizeof(n48->slot_idxs));
860 4274 : break;
861 : }
862 110 : case RT_NODE_KIND_256:
863 : {
864 110 : RT_NODE_256 *n256 = (RT_NODE_256 *) node;
865 :
866 110 : memset(n256->isset, 0, sizeof(n256->isset));
867 110 : break;
868 : }
869 0 : default:
870 0 : pg_unreachable();
871 : }
872 :
873 73134 : node->kind = kind;
874 :
875 : /*
876 : * For node256, this will actually overflow to zero, but that's okay
877 : * because that node doesn't need to introspect this value.
878 : */
879 73134 : node->fanout = RT_SIZE_CLASS_INFO[size_class].fanout;
880 :
881 : #ifdef RT_DEBUG
882 : /* update the statistics */
883 52140 : tree->ctl->num_nodes[size_class]++;
884 : #endif
885 :
886 73134 : return allocnode;
887 : }
888 :
889 : /*
890 : * Allocate a new leaf.
891 : */
892 : static RT_CHILD_PTR
893 19096 : RT_ALLOC_LEAF(RT_RADIX_TREE * tree, size_t allocsize)
894 : {
895 : RT_CHILD_PTR leaf;
896 :
897 : #ifdef RT_SHMEM
898 472 : leaf.alloc = dsa_allocate(tree->dsa, allocsize);
899 472 : RT_PTR_SET_LOCAL(tree, &leaf);
900 : #else
901 18624 : leaf.alloc = (RT_PTR_ALLOC) MemoryContextAlloc(tree->leaf_context, allocsize);
902 : #endif
903 :
904 : #ifdef RT_DEBUG
905 0 : tree->ctl->num_leaves++;
906 : #endif
907 :
908 19096 : return leaf;
909 : }
910 :
911 : /*
912 : * Copy relevant members of the node header.
913 : * This is a separate function in case other fields are added.
914 : */
915 : static inline void
916 21538 : RT_COPY_COMMON(RT_CHILD_PTR newnode, RT_CHILD_PTR oldnode)
917 : {
918 21538 : (newnode.local)->count = (oldnode.local)->count;
919 21538 : }
920 :
921 : /* Free the given node */
922 : static void
923 52978 : RT_FREE_NODE(RT_RADIX_TREE * tree, RT_CHILD_PTR node)
924 : {
925 : #ifdef RT_DEBUG
926 : int i;
927 :
928 : /* update the statistics */
929 :
930 80990 : for (i = 0; i < RT_NUM_SIZE_CLASSES; i++)
931 : {
932 80958 : if ((node.local)->fanout == RT_SIZE_CLASS_INFO[i].fanout)
933 52044 : break;
934 : }
935 :
936 : /*
937 : * The fanout of node256 will appear to be zero within the node header
938 : * because of overflow, so we need an extra check here.
939 : */
940 52076 : if (i == RT_NUM_SIZE_CLASSES)
941 32 : i = RT_CLASS_256;
942 :
943 52076 : tree->ctl->num_nodes[i]--;
944 : Assert(tree->ctl->num_nodes[i] >= 0);
945 : #endif
946 :
947 : #ifdef RT_SHMEM
948 30 : dsa_free(tree->dsa, node.alloc);
949 : #else
950 52948 : pfree(node.alloc);
951 : #endif
952 52978 : }
953 :
954 : static inline void
955 6 : RT_FREE_LEAF(RT_RADIX_TREE * tree, RT_PTR_ALLOC leaf)
956 : {
957 : Assert(leaf != tree->ctl->root);
958 :
959 : #ifdef RT_DEBUG
960 : /* update the statistics */
961 0 : tree->ctl->num_leaves--;
962 : Assert(tree->ctl->num_leaves >= 0);
963 : #endif
964 :
965 : #ifdef RT_SHMEM
966 0 : dsa_free(tree->dsa, leaf);
967 : #else
968 6 : pfree(leaf);
969 : #endif
970 6 : }
971 :
972 : /***************** SEARCH *****************/
973 :
974 : /*
975 : * Return the address of the child corresponding to "chunk",
976 : * or NULL if there is no such element.
977 : */
978 : static inline RT_PTR_ALLOC *
979 5784654 : RT_NODE_16_SEARCH_EQ(RT_NODE_16 * node, uint8 chunk)
980 : {
981 5784654 : int count = node->base.count;
982 : #ifndef USE_NO_SIMD
983 : Vector8 spread_chunk;
984 : Vector8 haystack1;
985 : Vector8 haystack2;
986 : Vector8 cmp1;
987 : Vector8 cmp2;
988 : uint32 bitfield;
989 5784654 : RT_PTR_ALLOC *slot_simd = NULL;
990 : #endif
991 :
992 : #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
993 : RT_PTR_ALLOC *slot = NULL;
994 :
995 : for (int i = 0; i < count; i++)
996 : {
997 : if (node->chunks[i] == chunk)
998 : {
999 : slot = &node->children[i];
1000 : break;
1001 : }
1002 : }
1003 : #endif
1004 :
1005 : #ifndef USE_NO_SIMD
1006 : /* replicate the search key */
1007 5784654 : spread_chunk = vector8_broadcast(chunk);
1008 :
1009 : /* compare to all 32 keys stored in the node */
1010 5784654 : vector8_load(&haystack1, &node->chunks[0]);
1011 5784654 : vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]);
1012 5784654 : cmp1 = vector8_eq(spread_chunk, haystack1);
1013 5784654 : cmp2 = vector8_eq(spread_chunk, haystack2);
1014 :
1015 : /* convert comparison to a bitfield */
1016 5784654 : bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8));
1017 :
1018 : /* mask off invalid entries */
1019 5784654 : bitfield &= ((UINT64CONST(1) << count) - 1);
1020 :
1021 : /* convert bitfield to index by counting trailing zeros */
1022 5784654 : if (bitfield)
1023 5397048 : slot_simd = &node->children[pg_rightmost_one_pos32(bitfield)];
1024 :
1025 : Assert(slot_simd == slot);
1026 5784654 : return slot_simd;
1027 : #else
1028 : return slot;
1029 : #endif
1030 : }
1031 :
1032 : /*
1033 : * Search for the child pointer corresponding to "key" in the given node.
1034 : *
1035 : * Return child if the key is found, otherwise return NULL.
1036 : */
1037 : static inline RT_PTR_ALLOC *
1038 26642284 : RT_NODE_SEARCH(RT_NODE * node, uint8 chunk)
1039 : {
1040 : /* Make sure we already converted to local pointer */
1041 : Assert(node != NULL);
1042 :
1043 26642284 : switch (node->kind)
1044 : {
1045 11651806 : case RT_NODE_KIND_4:
1046 : {
1047 11651806 : RT_NODE_4 *n4 = (RT_NODE_4 *) node;
1048 :
1049 14313756 : for (int i = 0; i < n4->base.count; i++)
1050 : {
1051 13479072 : if (n4->chunks[i] == chunk)
1052 10817122 : return &n4->children[i];
1053 : }
1054 834684 : return NULL;
1055 : }
1056 5784654 : case RT_NODE_KIND_16:
1057 5784654 : return RT_NODE_16_SEARCH_EQ((RT_NODE_16 *) node, chunk);
1058 1481460 : case RT_NODE_KIND_48:
1059 : {
1060 1481460 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
1061 1481460 : int slotpos = n48->slot_idxs[chunk];
1062 :
1063 1481460 : if (slotpos == RT_INVALID_SLOT_IDX)
1064 596450 : return NULL;
1065 :
1066 885010 : return RT_NODE_48_GET_CHILD(n48, chunk);
1067 : }
1068 7724364 : case RT_NODE_KIND_256:
1069 : {
1070 7724364 : RT_NODE_256 *n256 = (RT_NODE_256 *) node;
1071 :
1072 7724364 : if (!RT_NODE_256_IS_CHUNK_USED(n256, chunk))
1073 125402 : return NULL;
1074 :
1075 7598962 : return RT_NODE_256_GET_CHILD(n256, chunk);
1076 : }
1077 0 : default:
1078 0 : pg_unreachable();
1079 : }
1080 : }
1081 :
1082 : /*
1083 : * Search the given key in the radix tree. Return the pointer to the value if found,
1084 : * otherwise return NULL.
1085 : *
1086 : * Since the function returns a pointer (to support variable-length values),
1087 : * the caller is responsible for locking until it's finished with the value.
1088 : */
1089 : RT_SCOPE RT_VALUE_TYPE *
1090 9914446 : RT_FIND(RT_RADIX_TREE * tree, uint64 key)
1091 : {
1092 : RT_CHILD_PTR node;
1093 9914446 : RT_PTR_ALLOC *slot = NULL;
1094 : int shift;
1095 :
1096 : #ifdef RT_SHMEM
1097 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1098 : #endif
1099 :
1100 9914446 : if (key > tree->ctl->max_val)
1101 2866 : return NULL;
1102 :
1103 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
1104 9911580 : node.alloc = tree->ctl->root;
1105 9911580 : shift = tree->ctl->start_shift;
1106 :
1107 : /* Descend the tree */
1108 33161442 : while (shift >= 0)
1109 : {
1110 24962804 : RT_PTR_SET_LOCAL(tree, &node);
1111 24962804 : slot = RT_NODE_SEARCH(node.local, RT_GET_KEY_CHUNK(key, shift));
1112 24962804 : if (slot == NULL)
1113 1712942 : return NULL;
1114 :
1115 23249862 : node.alloc = *slot;
1116 23249862 : shift -= RT_SPAN;
1117 : }
1118 :
1119 8198638 : if (RT_CHILDPTR_IS_VALUE(*slot))
1120 478364 : return (RT_VALUE_TYPE *) slot;
1121 : else
1122 : {
1123 7720274 : RT_PTR_SET_LOCAL(tree, &node);
1124 7720274 : return (RT_VALUE_TYPE *) node.local;
1125 : }
1126 : }
1127 :
1128 : /***************** INSERTION *****************/
1129 :
1130 : #define RT_NODE_MUST_GROW(node) \
1131 : ((node)->count == (node)->fanout)
1132 :
1133 : /*
1134 : * Return index of the chunk and slot arrays for inserting into the node,
1135 : * such that the arrays remain ordered.
1136 : */
1137 : static inline int
1138 19670 : RT_NODE_4_GET_INSERTPOS(RT_NODE_4 * node, uint8 chunk, int count)
1139 : {
1140 : int idx;
1141 :
1142 45058 : for (idx = 0; idx < count; idx++)
1143 : {
1144 36832 : if (node->chunks[idx] >= chunk)
1145 11444 : break;
1146 : }
1147 :
1148 19670 : return idx;
1149 : }
1150 :
1151 : /*
1152 : * Return index of the chunk and slot arrays for inserting into the node,
1153 : * such that the arrays remain ordered.
1154 : */
1155 : static inline int
1156 118990 : RT_NODE_16_GET_INSERTPOS(RT_NODE_16 * node, uint8 chunk)
1157 : {
1158 118990 : int count = node->base.count;
1159 : #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
1160 : int index;
1161 : #endif
1162 :
1163 : #ifndef USE_NO_SIMD
1164 : Vector8 spread_chunk;
1165 : Vector8 haystack1;
1166 : Vector8 haystack2;
1167 : Vector8 cmp1;
1168 : Vector8 cmp2;
1169 : Vector8 min1;
1170 : Vector8 min2;
1171 : uint32 bitfield;
1172 : int index_simd;
1173 : #endif
1174 :
1175 : /*
1176 : * First compare the last element. There are two reasons to branch here:
1177 : *
1178 : * 1) A realistic pattern is inserting ordered keys. In that case,
1179 : * non-SIMD platforms must do a linear search to the last chunk to find
1180 : * the insert position. This will get slower as the node fills up.
1181 : *
1182 : * 2) On SIMD platforms, we must branch anyway to make sure we don't bit
1183 : * scan an empty bitfield. Doing the branch here eliminates some work that
1184 : * we might otherwise throw away.
1185 : */
1186 : Assert(count > 0);
1187 118990 : if (node->chunks[count - 1] < chunk)
1188 14292 : return count;
1189 :
1190 : #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
1191 :
1192 : for (index = 0; index < count; index++)
1193 : {
1194 : if (node->chunks[index] > chunk)
1195 : break;
1196 : }
1197 : #endif
1198 :
1199 : #ifndef USE_NO_SIMD
1200 :
1201 : /*
1202 : * This is a bit more complicated than RT_NODE_16_SEARCH_EQ(), because no
1203 : * unsigned uint8 comparison instruction exists, at least for SSE2. So we
1204 : * need to play some trickery using vector8_min() to effectively get >=.
1205 : * There'll never be any equal elements in current uses, but that's what
1206 : * we get here...
1207 : */
1208 104698 : spread_chunk = vector8_broadcast(chunk);
1209 104698 : vector8_load(&haystack1, &node->chunks[0]);
1210 104698 : vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]);
1211 104698 : min1 = vector8_min(spread_chunk, haystack1);
1212 104698 : min2 = vector8_min(spread_chunk, haystack2);
1213 104698 : cmp1 = vector8_eq(spread_chunk, min1);
1214 104698 : cmp2 = vector8_eq(spread_chunk, min2);
1215 104698 : bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8));
1216 :
1217 : Assert((bitfield & ((UINT64CONST(1) << count) - 1)) != 0);
1218 104698 : index_simd = pg_rightmost_one_pos32(bitfield);
1219 :
1220 : Assert(index_simd == index);
1221 104698 : return index_simd;
1222 : #else
1223 : return index;
1224 : #endif
1225 : }
1226 :
1227 : /* Shift the elements right at "insertpos" by one */
1228 : static inline void
1229 129718 : RT_SHIFT_ARRAYS_FOR_INSERT(uint8 *chunks, RT_PTR_ALLOC * children, int count, int insertpos)
1230 : {
1231 : /*
1232 : * This is basically a memmove, but written in a simple loop for speed on
1233 : * small inputs.
1234 : */
1235 1118624 : for (int i = count - 1; i >= insertpos; i--)
1236 : {
1237 : /* workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101481 */
1238 : #ifdef __GNUC__
1239 988906 : __asm__("");
1240 : #endif
1241 988906 : chunks[i + 1] = chunks[i];
1242 988906 : children[i + 1] = children[i];
1243 : }
1244 129718 : }
1245 :
1246 : /*
1247 : * Copy both chunk and slot arrays into the right
1248 : * place. The caller is responsible for inserting the new element.
1249 : */
1250 : static inline void
1251 8942 : RT_COPY_ARRAYS_FOR_INSERT(uint8 *dst_chunks, RT_PTR_ALLOC * dst_children,
1252 : uint8 *src_chunks, RT_PTR_ALLOC * src_children,
1253 : int count, int insertpos)
1254 : {
1255 97092 : for (int i = 0; i < count; i++)
1256 : {
1257 88150 : int sourceidx = i;
1258 :
1259 : /* use a branch-free computation to skip the index of the new element */
1260 88150 : int destidx = i + (i >= insertpos);
1261 :
1262 88150 : dst_chunks[destidx] = src_chunks[sourceidx];
1263 88150 : dst_children[destidx] = src_children[sourceidx];
1264 : }
1265 8942 : }
1266 :
1267 : static inline RT_PTR_ALLOC *
1268 18346 : RT_ADD_CHILD_256(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1269 : {
1270 18346 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
1271 18346 : int idx = RT_BM_IDX(chunk);
1272 18346 : int bitnum = RT_BM_BIT(chunk);
1273 :
1274 : /* Mark the slot used for "chunk" */
1275 18346 : n256->isset[idx] |= ((bitmapword) 1 << bitnum);
1276 :
1277 18346 : n256->base.count++;
1278 18346 : RT_VERIFY_NODE((RT_NODE *) n256);
1279 :
1280 18346 : return RT_NODE_256_GET_CHILD(n256, chunk);
1281 : }
1282 :
1283 : static pg_noinline RT_PTR_ALLOC *
1284 110 : RT_GROW_NODE_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1285 : uint8 chunk)
1286 : {
1287 110 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1288 : RT_CHILD_PTR newnode;
1289 : RT_NODE_256 *new256;
1290 110 : int i = 0;
1291 :
1292 : /* initialize new node */
1293 110 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_256, RT_CLASS_256);
1294 110 : new256 = (RT_NODE_256 *) newnode.local;
1295 :
1296 : /* copy over the entries */
1297 110 : RT_COPY_COMMON(newnode, node);
1298 550 : for (int word_num = 0; word_num < RT_BM_IDX(RT_NODE_MAX_SLOTS); word_num++)
1299 : {
1300 440 : bitmapword bitmap = 0;
1301 :
1302 : /*
1303 : * Bit manipulation is a surprisingly large portion of the overhead in
1304 : * the naive implementation. Doing stores word-at-a-time removes a lot
1305 : * of that overhead.
1306 : */
1307 28600 : for (int bit = 0; bit < BITS_PER_BITMAPWORD; bit++)
1308 : {
1309 28160 : uint8 offset = n48->slot_idxs[i];
1310 :
1311 28160 : if (offset != RT_INVALID_SLOT_IDX)
1312 : {
1313 7036 : bitmap |= ((bitmapword) 1 << bit);
1314 7036 : new256->children[i] = n48->children[offset];
1315 : }
1316 :
1317 28160 : i++;
1318 : }
1319 :
1320 440 : new256->isset[word_num] = bitmap;
1321 : }
1322 :
1323 : /* free old node and update reference in parent */
1324 110 : *parent_slot = newnode.alloc;
1325 110 : RT_FREE_NODE(tree, node);
1326 :
1327 110 : return RT_ADD_CHILD_256(tree, newnode, chunk);
1328 : }
1329 :
1330 : static inline RT_PTR_ALLOC *
1331 51942 : RT_ADD_CHILD_48(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1332 : {
1333 51942 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1334 : int insertpos;
1335 51942 : int idx = 0;
1336 : bitmapword w,
1337 : inverse;
1338 :
1339 : /* get the first word with at least one bit not set */
1340 51942 : for (int i = 0; i < RT_BM_IDX(RT_FANOUT_48_MAX); i++)
1341 : {
1342 51942 : w = n48->isset[i];
1343 51942 : if (w < ~((bitmapword) 0))
1344 : {
1345 51942 : idx = i;
1346 51942 : break;
1347 : }
1348 : }
1349 :
1350 : /* To get the first unset bit in w, get the first set bit in ~w */
1351 51942 : inverse = ~w;
1352 51942 : insertpos = idx * BITS_PER_BITMAPWORD;
1353 51942 : insertpos += bmw_rightmost_one_pos(inverse);
1354 : Assert(insertpos < n48->base.fanout);
1355 :
1356 : /* mark the slot used by setting the rightmost zero bit */
1357 51942 : n48->isset[idx] |= w + 1;
1358 :
1359 : /* insert new chunk into place */
1360 51942 : n48->slot_idxs[chunk] = insertpos;
1361 :
1362 51942 : n48->base.count++;
1363 51942 : RT_VERIFY_NODE((RT_NODE *) n48);
1364 :
1365 51942 : return &n48->children[insertpos];
1366 : }
1367 :
1368 : static pg_noinline RT_PTR_ALLOC *
1369 8608 : RT_GROW_NODE_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1370 : uint8 chunk)
1371 : {
1372 8608 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1373 : int insertpos;
1374 :
1375 8608 : if (n16->base.fanout < RT_FANOUT_16_HI)
1376 : {
1377 : RT_CHILD_PTR newnode;
1378 : RT_NODE_16 *new16;
1379 :
1380 : Assert(n16->base.fanout == RT_FANOUT_16_LO);
1381 :
1382 : /* initialize new node */
1383 4366 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_HI);
1384 4366 : new16 = (RT_NODE_16 *) newnode.local;
1385 :
1386 : /* copy over existing entries */
1387 4366 : RT_COPY_COMMON(newnode, node);
1388 : Assert(n16->base.count == RT_FANOUT_16_LO);
1389 4366 : insertpos = RT_NODE_16_GET_INSERTPOS(n16, chunk);
1390 4366 : RT_COPY_ARRAYS_FOR_INSERT(new16->chunks, new16->children,
1391 4366 : n16->chunks, n16->children,
1392 : RT_FANOUT_16_LO, insertpos);
1393 :
1394 : /* insert new chunk into place */
1395 4366 : new16->chunks[insertpos] = chunk;
1396 :
1397 4366 : new16->base.count++;
1398 4366 : RT_VERIFY_NODE((RT_NODE *) new16);
1399 :
1400 : /* free old node and update references */
1401 4366 : RT_FREE_NODE(tree, node);
1402 4366 : *parent_slot = newnode.alloc;
1403 :
1404 4366 : return &new16->children[insertpos];
1405 : }
1406 : else
1407 : {
1408 : RT_CHILD_PTR newnode;
1409 : RT_NODE_48 *new48;
1410 : int idx,
1411 : bit;
1412 :
1413 : Assert(n16->base.fanout == RT_FANOUT_16_HI);
1414 :
1415 : /* initialize new node */
1416 4242 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_48, RT_CLASS_48);
1417 4242 : new48 = (RT_NODE_48 *) newnode.local;
1418 :
1419 : /* copy over the entries */
1420 4242 : RT_COPY_COMMON(newnode, node);
1421 139986 : for (int i = 0; i < RT_FANOUT_16_HI; i++)
1422 135744 : new48->slot_idxs[n16->chunks[i]] = i;
1423 4242 : memcpy(&new48->children[0], &n16->children[0], RT_FANOUT_16_HI * sizeof(new48->children[0]));
1424 :
1425 : /*
1426 : * Since we just copied a dense array, we can fill "isset" using a
1427 : * single store, provided the length of that array is at most the
1428 : * number of bits in a bitmapword.
1429 : */
1430 : Assert(RT_FANOUT_16_HI <= BITS_PER_BITMAPWORD);
1431 4242 : new48->isset[0] = (bitmapword) (((uint64) 1 << RT_FANOUT_16_HI) - 1);
1432 :
1433 : /* put the new child at the end of the copied entries */
1434 4242 : insertpos = RT_FANOUT_16_HI;
1435 4242 : idx = RT_BM_IDX(insertpos);
1436 4242 : bit = RT_BM_BIT(insertpos);
1437 :
1438 : /* mark the slot used */
1439 4242 : new48->isset[idx] |= ((bitmapword) 1 << bit);
1440 :
1441 : /* insert new chunk into place */
1442 4242 : new48->slot_idxs[chunk] = insertpos;
1443 :
1444 4242 : new48->base.count++;
1445 4242 : RT_VERIFY_NODE((RT_NODE *) new48);
1446 :
1447 : /* free old node and update reference in parent */
1448 4242 : *parent_slot = newnode.alloc;
1449 4242 : RT_FREE_NODE(tree, node);
1450 :
1451 4242 : return &new48->children[insertpos];
1452 : }
1453 : }
1454 :
1455 : static inline RT_PTR_ALLOC *
1456 114624 : RT_ADD_CHILD_16(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1457 : {
1458 114624 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1459 114624 : int insertpos = RT_NODE_16_GET_INSERTPOS(n16, chunk);
1460 :
1461 : /* shift chunks and children */
1462 114624 : RT_SHIFT_ARRAYS_FOR_INSERT(n16->chunks, n16->children,
1463 114624 : n16->base.count, insertpos);
1464 :
1465 : /* insert new chunk into place */
1466 114624 : n16->chunks[insertpos] = chunk;
1467 :
1468 114624 : n16->base.count++;
1469 114624 : RT_VERIFY_NODE((RT_NODE *) n16);
1470 :
1471 114624 : return &n16->children[insertpos];
1472 : }
1473 :
1474 : static pg_noinline RT_PTR_ALLOC *
1475 4576 : RT_GROW_NODE_4(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1476 : uint8 chunk)
1477 : {
1478 4576 : RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
1479 : RT_CHILD_PTR newnode;
1480 : RT_NODE_16 *new16;
1481 : int insertpos;
1482 :
1483 : /* initialize new node */
1484 4576 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_LO);
1485 4576 : new16 = (RT_NODE_16 *) newnode.local;
1486 :
1487 : /* copy over existing entries */
1488 4576 : RT_COPY_COMMON(newnode, node);
1489 : Assert(n4->base.count == RT_FANOUT_4);
1490 4576 : insertpos = RT_NODE_4_GET_INSERTPOS(n4, chunk, RT_FANOUT_4);
1491 4576 : RT_COPY_ARRAYS_FOR_INSERT(new16->chunks, new16->children,
1492 4576 : n4->chunks, n4->children,
1493 : RT_FANOUT_4, insertpos);
1494 :
1495 : /* insert new chunk into place */
1496 4576 : new16->chunks[insertpos] = chunk;
1497 :
1498 4576 : new16->base.count++;
1499 4576 : RT_VERIFY_NODE((RT_NODE *) new16);
1500 :
1501 : /* free old node and update reference in parent */
1502 4576 : *parent_slot = newnode.alloc;
1503 4576 : RT_FREE_NODE(tree, node);
1504 :
1505 4576 : return &new16->children[insertpos];
1506 : }
1507 :
1508 : static inline RT_PTR_ALLOC *
1509 15094 : RT_ADD_CHILD_4(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
1510 : {
1511 15094 : RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
1512 15094 : int count = n4->base.count;
1513 15094 : int insertpos = RT_NODE_4_GET_INSERTPOS(n4, chunk, count);
1514 :
1515 : /* shift chunks and children */
1516 15094 : RT_SHIFT_ARRAYS_FOR_INSERT(n4->chunks, n4->children,
1517 : count, insertpos);
1518 :
1519 : /* insert new chunk into place */
1520 15094 : n4->chunks[insertpos] = chunk;
1521 :
1522 15094 : n4->base.count++;
1523 15094 : RT_VERIFY_NODE((RT_NODE *) n4);
1524 :
1525 15094 : return &n4->children[insertpos];
1526 : }
1527 :
1528 : /*
1529 : * Reserve slot in "node"'s child array. The caller will populate it
1530 : * with the actual child pointer.
1531 : *
1532 : * "parent_slot" is the address of the parent's child pointer to "node".
1533 : * If the node we're inserting into needs to grow, we update the parent's
1534 : * child pointer with the pointer to the new larger node.
1535 : */
1536 : static inline RT_PTR_ALLOC *
1537 213190 : RT_NODE_INSERT(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
1538 : uint8 chunk)
1539 : {
1540 213190 : RT_NODE *n = node.local;
1541 :
1542 213190 : switch (n->kind)
1543 : {
1544 19670 : case RT_NODE_KIND_4:
1545 : {
1546 19670 : if (unlikely(RT_NODE_MUST_GROW(n)))
1547 4576 : return RT_GROW_NODE_4(tree, parent_slot, node, chunk);
1548 :
1549 15094 : return RT_ADD_CHILD_4(tree, node, chunk);
1550 : }
1551 123232 : case RT_NODE_KIND_16:
1552 : {
1553 123232 : if (unlikely(RT_NODE_MUST_GROW(n)))
1554 8608 : return RT_GROW_NODE_16(tree, parent_slot, node, chunk);
1555 :
1556 114624 : return RT_ADD_CHILD_16(tree, node, chunk);
1557 : }
1558 52052 : case RT_NODE_KIND_48:
1559 : {
1560 52052 : if (unlikely(RT_NODE_MUST_GROW(n)))
1561 110 : return RT_GROW_NODE_48(tree, parent_slot, node, chunk);
1562 :
1563 51942 : return RT_ADD_CHILD_48(tree, node, chunk);
1564 : }
1565 18236 : case RT_NODE_KIND_256:
1566 18236 : return RT_ADD_CHILD_256(tree, node, chunk);
1567 0 : default:
1568 0 : pg_unreachable();
1569 : }
1570 : }
1571 :
1572 : /*
1573 : * The radix tree doesn't have sufficient height. Put new node(s) on top,
1574 : * and move the old node below it.
1575 : */
1576 : static pg_noinline void
1577 38 : RT_EXTEND_UP(RT_RADIX_TREE * tree, uint64 key)
1578 : {
1579 38 : int target_shift = RT_KEY_GET_SHIFT(key);
1580 38 : int shift = tree->ctl->start_shift;
1581 :
1582 : Assert(shift < target_shift);
1583 :
1584 : /* Grow tree upwards until start shift can accommodate the key */
1585 136 : while (shift < target_shift)
1586 : {
1587 : RT_CHILD_PTR node;
1588 : RT_NODE_4 *n4;
1589 :
1590 98 : node = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1591 98 : n4 = (RT_NODE_4 *) node.local;
1592 98 : n4->base.count = 1;
1593 98 : n4->chunks[0] = 0;
1594 98 : n4->children[0] = tree->ctl->root;
1595 :
1596 : /* Update the root */
1597 98 : tree->ctl->root = node.alloc;
1598 :
1599 98 : shift += RT_SPAN;
1600 : }
1601 :
1602 38 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(target_shift);
1603 38 : tree->ctl->start_shift = target_shift;
1604 38 : }
1605 :
1606 : /*
1607 : * Insert a chain of nodes until we reach the lowest level,
1608 : * and return the address of a slot to be filled further up
1609 : * the call stack.
1610 : */
1611 : static pg_noinline RT_PTR_ALLOC *
1612 9936 : RT_EXTEND_DOWN(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift)
1613 : {
1614 : RT_CHILD_PTR node,
1615 : child;
1616 : RT_NODE_4 *n4;
1617 :
1618 : /*
1619 : * The child pointer of the first node in the chain goes in the
1620 : * caller-provided slot.
1621 : */
1622 9936 : child = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1623 9936 : *parent_slot = child.alloc;
1624 :
1625 9936 : node = child;
1626 9936 : shift -= RT_SPAN;
1627 :
1628 31424 : while (shift > 0)
1629 : {
1630 21488 : child = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1631 :
1632 : /* We open-code the insertion ourselves, for speed. */
1633 21488 : n4 = (RT_NODE_4 *) node.local;
1634 21488 : n4->base.count = 1;
1635 21488 : n4->chunks[0] = RT_GET_KEY_CHUNK(key, shift);
1636 21488 : n4->children[0] = child.alloc;
1637 :
1638 21488 : node = child;
1639 21488 : shift -= RT_SPAN;
1640 : }
1641 : Assert(shift == 0);
1642 :
1643 : /* Reserve slot for the value. */
1644 9936 : n4 = (RT_NODE_4 *) node.local;
1645 9936 : n4->chunks[0] = RT_GET_KEY_CHUNK(key, 0);
1646 9936 : n4->base.count = 1;
1647 :
1648 9936 : return &n4->children[0];
1649 : }
1650 :
1651 : /*
1652 : * Workhorse for RT_SET
1653 : *
1654 : * "parent_slot" is the address of the child pointer we just followed,
1655 : * in the parent's array of children, needed if inserting into the
1656 : * current node causes it to grow.
1657 : */
1658 : static RT_PTR_ALLOC *
1659 849132 : RT_GET_SLOT_RECURSIVE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift, bool *found)
1660 : {
1661 : RT_PTR_ALLOC *slot;
1662 : RT_CHILD_PTR node;
1663 849132 : uint8 chunk = RT_GET_KEY_CHUNK(key, shift);
1664 :
1665 849132 : node.alloc = *parent_slot;
1666 849132 : RT_PTR_SET_LOCAL(tree, &node);
1667 849132 : slot = RT_NODE_SEARCH(node.local, chunk);
1668 :
1669 849132 : if (slot == NULL)
1670 : {
1671 213190 : *found = false;
1672 :
1673 : /* reserve slot for the caller to populate */
1674 :
1675 213190 : slot = RT_NODE_INSERT(tree, parent_slot, node, chunk);
1676 :
1677 213190 : if (shift == 0)
1678 203280 : return slot;
1679 : else
1680 9910 : return RT_EXTEND_DOWN(tree, slot, key, shift);
1681 : }
1682 : else
1683 : {
1684 635942 : if (shift == 0)
1685 : {
1686 22380 : *found = true;
1687 22380 : return slot;
1688 : }
1689 : else
1690 613562 : return RT_GET_SLOT_RECURSIVE(tree, slot, key, shift - RT_SPAN, found);
1691 : }
1692 : }
1693 :
1694 : /*
1695 : * Set key to value that value_p points to. If the entry already exists, we
1696 : * update its value and return true. Returns false if entry doesn't yet exist.
1697 : *
1698 : * Taking a lock in exclusive mode is the caller's responsibility.
1699 : */
1700 : RT_SCOPE bool
1701 235596 : RT_SET(RT_RADIX_TREE * tree, uint64 key, RT_VALUE_TYPE * value_p)
1702 : {
1703 : bool found;
1704 : RT_PTR_ALLOC *slot;
1705 235596 : size_t value_sz = RT_GET_VALUE_SIZE(value_p);
1706 :
1707 : #ifdef RT_SHMEM
1708 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1709 : #endif
1710 :
1711 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
1712 :
1713 : /* Extend the tree if necessary */
1714 235596 : if (unlikely(key > tree->ctl->max_val))
1715 : {
1716 64 : if (tree->ctl->num_keys == 0)
1717 : {
1718 : RT_CHILD_PTR node;
1719 : RT_NODE_4 *n4;
1720 26 : int start_shift = RT_KEY_GET_SHIFT(key);
1721 :
1722 : /*
1723 : * With an empty root node, we don't extend the tree upwards,
1724 : * since that would result in orphan empty nodes. Instead we open
1725 : * code inserting into the root node and extend downward from
1726 : * there.
1727 : */
1728 26 : node.alloc = tree->ctl->root;
1729 26 : RT_PTR_SET_LOCAL(tree, &node);
1730 26 : n4 = (RT_NODE_4 *) node.local;
1731 26 : n4->base.count = 1;
1732 26 : n4->chunks[0] = RT_GET_KEY_CHUNK(key, start_shift);
1733 :
1734 26 : slot = RT_EXTEND_DOWN(tree, &n4->children[0], key, start_shift);
1735 26 : found = false;
1736 26 : tree->ctl->start_shift = start_shift;
1737 26 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(start_shift);
1738 26 : goto have_slot;
1739 : }
1740 : else
1741 38 : RT_EXTEND_UP(tree, key);
1742 : }
1743 :
1744 235570 : slot = RT_GET_SLOT_RECURSIVE(tree, &tree->ctl->root,
1745 235570 : key, tree->ctl->start_shift, &found);
1746 :
1747 235596 : have_slot:
1748 : Assert(slot != NULL);
1749 :
1750 235596 : if (RT_VALUE_IS_EMBEDDABLE(value_p))
1751 : {
1752 : /* free the existing leaf */
1753 216500 : if (found && !RT_CHILDPTR_IS_VALUE(*slot))
1754 2 : RT_FREE_LEAF(tree, *slot);
1755 :
1756 : /* store value directly in child pointer slot */
1757 216500 : memcpy(slot, value_p, value_sz);
1758 :
1759 : #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
1760 : /* tag child pointer */
1761 : #ifdef RT_SHMEM
1762 4 : *slot |= 1;
1763 : #else
1764 3428 : *((uintptr_t *) slot) |= 1;
1765 : #endif
1766 : #endif
1767 : }
1768 : else
1769 : {
1770 : RT_CHILD_PTR leaf;
1771 :
1772 19096 : if (found && !RT_CHILDPTR_IS_VALUE(*slot))
1773 : {
1774 : Assert(RT_PTR_ALLOC_IS_VALID(*slot));
1775 4 : leaf.alloc = *slot;
1776 4 : RT_PTR_SET_LOCAL(tree, &leaf);
1777 :
1778 4 : if (RT_GET_VALUE_SIZE((RT_VALUE_TYPE *) leaf.local) != value_sz)
1779 : {
1780 : /*
1781 : * different sizes, so first free the existing leaf before
1782 : * allocating a new one
1783 : */
1784 4 : RT_FREE_LEAF(tree, *slot);
1785 4 : leaf = RT_ALLOC_LEAF(tree, value_sz);
1786 4 : *slot = leaf.alloc;
1787 : }
1788 : }
1789 : else
1790 : {
1791 : /* allocate new leaf and store it in the child array */
1792 19092 : leaf = RT_ALLOC_LEAF(tree, value_sz);
1793 19092 : *slot = leaf.alloc;
1794 : }
1795 :
1796 19096 : memcpy(leaf.local, value_p, value_sz);
1797 : }
1798 :
1799 : /* Update the statistics */
1800 235596 : if (!found)
1801 213216 : tree->ctl->num_keys++;
1802 :
1803 235596 : return found;
1804 : }
1805 :
1806 : /***************** SETUP / TEARDOWN *****************/
1807 :
1808 : /*
1809 : * Create the radix tree in the given memory context and return it.
1810 : *
1811 : * All local memory required for a radix tree is allocated in the given
1812 : * memory context and its children. Note that RT_FREE() will delete all
1813 : * allocated space within the given memory context, so the dsa_area should
1814 : * be created in a different context.
1815 : */
1816 : RT_SCOPE RT_RADIX_TREE *
1817 : #ifdef RT_SHMEM
1818 26 : RT_CREATE(MemoryContext ctx, dsa_area *dsa, int tranche_id)
1819 : #else
1820 20048 : RT_CREATE(MemoryContext ctx)
1821 : #endif
1822 : {
1823 : RT_RADIX_TREE *tree;
1824 : MemoryContext old_ctx;
1825 : RT_CHILD_PTR rootnode;
1826 : #ifdef RT_SHMEM
1827 : dsa_pointer dp;
1828 : #endif
1829 :
1830 20074 : old_ctx = MemoryContextSwitchTo(ctx);
1831 :
1832 20074 : tree = (RT_RADIX_TREE *) palloc0(sizeof(RT_RADIX_TREE));
1833 20074 : tree->context = ctx;
1834 :
1835 : /*
1836 : * Separate context for iteration in case the tree context doesn't support
1837 : * pfree
1838 : */
1839 20074 : tree->iter_context = AllocSetContextCreate(ctx,
1840 : RT_STR(RT_PREFIX) "_radix_tree iter context",
1841 : ALLOCSET_SMALL_SIZES);
1842 :
1843 : #ifdef RT_SHMEM
1844 26 : tree->dsa = dsa;
1845 26 : dp = dsa_allocate0(dsa, sizeof(RT_RADIX_TREE_CONTROL));
1846 26 : tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, dp);
1847 26 : tree->ctl->handle = dp;
1848 26 : tree->ctl->magic = RT_RADIX_TREE_MAGIC;
1849 26 : LWLockInitialize(&tree->ctl->lock, tranche_id);
1850 : #else
1851 20048 : tree->ctl = (RT_RADIX_TREE_CONTROL *) palloc0(sizeof(RT_RADIX_TREE_CONTROL));
1852 :
1853 : /* Create a slab context for each size class */
1854 120288 : for (int i = 0; i < RT_NUM_SIZE_CLASSES; i++)
1855 : {
1856 100240 : RT_SIZE_CLASS_ELEM size_class = RT_SIZE_CLASS_INFO[i];
1857 100240 : size_t inner_blocksize = RT_SLAB_BLOCK_SIZE(size_class.allocsize);
1858 :
1859 100240 : tree->node_slabs[i] = SlabContextCreate(ctx,
1860 : size_class.name,
1861 : inner_blocksize,
1862 : size_class.allocsize);
1863 : }
1864 :
1865 : /* By default we use the passed context for leaves. */
1866 20048 : tree->leaf_context = tree->context;
1867 :
1868 : #ifndef RT_VARLEN_VALUE_SIZE
1869 :
1870 : /*
1871 : * For leaves storing fixed-length values, we use a slab context to avoid
1872 : * the possibility of space wastage by power-of-2 rounding up.
1873 : */
1874 : if (sizeof(RT_VALUE_TYPE) > sizeof(RT_PTR_ALLOC))
1875 : tree->leaf_context = SlabContextCreate(ctx,
1876 : RT_STR(RT_PREFIX) "_radix_tree leaf context",
1877 : RT_SLAB_BLOCK_SIZE(sizeof(RT_VALUE_TYPE)),
1878 : sizeof(RT_VALUE_TYPE));
1879 : #endif /* !RT_VARLEN_VALUE_SIZE */
1880 : #endif /* RT_SHMEM */
1881 :
1882 : /* add root node now so that RT_SET can assume it exists */
1883 20074 : rootnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
1884 20074 : tree->ctl->root = rootnode.alloc;
1885 20074 : tree->ctl->start_shift = 0;
1886 20074 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(0);
1887 :
1888 20074 : MemoryContextSwitchTo(old_ctx);
1889 :
1890 20074 : return tree;
1891 : }
1892 :
1893 : #ifdef RT_SHMEM
1894 : RT_SCOPE RT_RADIX_TREE *
1895 30 : RT_ATTACH(dsa_area *dsa, RT_HANDLE handle)
1896 : {
1897 : RT_RADIX_TREE *tree;
1898 : dsa_pointer control;
1899 :
1900 30 : tree = (RT_RADIX_TREE *) palloc0(sizeof(RT_RADIX_TREE));
1901 :
1902 : /* Find the control object in shared memory */
1903 30 : control = handle;
1904 :
1905 30 : tree->dsa = dsa;
1906 30 : tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, control);
1907 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1908 :
1909 30 : return tree;
1910 : }
1911 :
1912 : RT_SCOPE void
1913 30 : RT_DETACH(RT_RADIX_TREE * tree)
1914 : {
1915 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1916 30 : pfree(tree);
1917 30 : }
1918 :
1919 : RT_SCOPE RT_HANDLE
1920 24 : RT_GET_HANDLE(RT_RADIX_TREE * tree)
1921 : {
1922 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1923 24 : return tree->ctl->handle;
1924 : }
1925 :
1926 : RT_SCOPE void
1927 206 : RT_LOCK_EXCLUSIVE(RT_RADIX_TREE * tree)
1928 : {
1929 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1930 206 : LWLockAcquire(&tree->ctl->lock, LW_EXCLUSIVE);
1931 206 : }
1932 :
1933 : RT_SCOPE void
1934 421684 : RT_LOCK_SHARE(RT_RADIX_TREE * tree)
1935 : {
1936 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1937 421684 : LWLockAcquire(&tree->ctl->lock, LW_SHARED);
1938 421684 : }
1939 :
1940 : RT_SCOPE void
1941 421890 : RT_UNLOCK(RT_RADIX_TREE * tree)
1942 : {
1943 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1944 421890 : LWLockRelease(&tree->ctl->lock);
1945 421890 : }
1946 :
1947 : /*
1948 : * Recursively free all nodes allocated in the DSA area.
1949 : */
1950 : static void
1951 32 : RT_FREE_RECURSE(RT_RADIX_TREE * tree, RT_PTR_ALLOC ptr, int shift)
1952 : {
1953 : RT_CHILD_PTR node;
1954 :
1955 32 : check_stack_depth();
1956 :
1957 32 : node.alloc = ptr;
1958 32 : RT_PTR_SET_LOCAL(tree, &node);
1959 :
1960 32 : switch (node.local->kind)
1961 : {
1962 22 : case RT_NODE_KIND_4:
1963 : {
1964 22 : RT_NODE_4 *n4 = (RT_NODE_4 *) node.local;
1965 :
1966 32 : for (int i = 0; i < n4->base.count; i++)
1967 : {
1968 10 : RT_PTR_ALLOC child = n4->children[i];
1969 :
1970 10 : if (shift > 0)
1971 6 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
1972 4 : else if (!RT_CHILDPTR_IS_VALUE(child))
1973 0 : dsa_free(tree->dsa, child);
1974 : }
1975 :
1976 22 : break;
1977 : }
1978 2 : case RT_NODE_KIND_16:
1979 : {
1980 2 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1981 :
1982 50 : for (int i = 0; i < n16->base.count; i++)
1983 : {
1984 48 : RT_PTR_ALLOC child = n16->children[i];
1985 :
1986 48 : if (shift > 0)
1987 0 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
1988 48 : else if (!RT_CHILDPTR_IS_VALUE(child))
1989 48 : dsa_free(tree->dsa, child);
1990 : }
1991 :
1992 2 : break;
1993 : }
1994 6 : case RT_NODE_KIND_48:
1995 : {
1996 6 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1997 :
1998 1542 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
1999 : {
2000 : RT_PTR_ALLOC child;
2001 :
2002 1536 : if (!RT_NODE_48_IS_CHUNK_USED(n48, i))
2003 1266 : continue;
2004 :
2005 270 : child = *RT_NODE_48_GET_CHILD(n48, i);
2006 :
2007 270 : if (shift > 0)
2008 0 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
2009 270 : else if (!RT_CHILDPTR_IS_VALUE(child))
2010 270 : dsa_free(tree->dsa, child);
2011 : }
2012 :
2013 6 : break;
2014 : }
2015 2 : case RT_NODE_KIND_256:
2016 : {
2017 2 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2018 :
2019 514 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2020 : {
2021 : RT_PTR_ALLOC child;
2022 :
2023 512 : if (!RT_NODE_256_IS_CHUNK_USED(n256, i))
2024 358 : continue;
2025 :
2026 154 : child = *RT_NODE_256_GET_CHILD(n256, i);
2027 :
2028 154 : if (shift > 0)
2029 0 : RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
2030 154 : else if (!RT_CHILDPTR_IS_VALUE(child))
2031 154 : dsa_free(tree->dsa, child);
2032 : }
2033 :
2034 2 : break;
2035 : }
2036 : }
2037 :
2038 : /* Free the inner node */
2039 32 : dsa_free(tree->dsa, ptr);
2040 32 : }
2041 : #endif
2042 :
2043 : /*
2044 : * Free the radix tree, including all nodes and leaves.
2045 : */
2046 : RT_SCOPE void
2047 970 : RT_FREE(RT_RADIX_TREE * tree)
2048 : {
2049 : #ifdef RT_SHMEM
2050 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2051 :
2052 : /* Free all memory used for radix tree nodes */
2053 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2054 26 : RT_FREE_RECURSE(tree, tree->ctl->root, tree->ctl->start_shift);
2055 :
2056 : /*
2057 : * Vandalize the control block to help catch programming error where other
2058 : * backends access the memory formerly occupied by this radix tree.
2059 : */
2060 26 : tree->ctl->magic = 0;
2061 26 : dsa_free(tree->dsa, tree->ctl->handle);
2062 : #endif
2063 :
2064 : /*
2065 : * Free all space allocated within the tree's context and delete all child
2066 : * contexts such as those used for nodes.
2067 : */
2068 970 : MemoryContextReset(tree->context);
2069 970 : }
2070 :
2071 : /***************** ITERATION *****************/
2072 :
2073 : /*
2074 : * Create and return the iterator for the given radix tree.
2075 : *
2076 : * Taking a lock in shared mode during the iteration is the caller's
2077 : * responsibility.
2078 : */
2079 : RT_SCOPE RT_ITER *
2080 956 : RT_BEGIN_ITERATE(RT_RADIX_TREE * tree)
2081 : {
2082 : RT_ITER *iter;
2083 : RT_CHILD_PTR root;
2084 :
2085 956 : iter = (RT_ITER *) MemoryContextAllocZero(tree->iter_context,
2086 : sizeof(RT_ITER));
2087 956 : iter->tree = tree;
2088 :
2089 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2090 956 : root.alloc = iter->tree->ctl->root;
2091 956 : RT_PTR_SET_LOCAL(tree, &root);
2092 :
2093 956 : iter->top_level = iter->tree->ctl->start_shift / RT_SPAN;
2094 :
2095 : /* Set the root to start */
2096 956 : iter->cur_level = iter->top_level;
2097 956 : iter->node_iters[iter->cur_level].node = root;
2098 956 : iter->node_iters[iter->cur_level].idx = 0;
2099 :
2100 956 : return iter;
2101 : }
2102 :
2103 : /*
2104 : * Scan the inner node and return the next child pointer if one exists, otherwise
2105 : * return NULL.
2106 : */
2107 : static inline RT_PTR_ALLOC *
2108 249300 : RT_NODE_ITERATE_NEXT(RT_ITER * iter, int level)
2109 : {
2110 249300 : uint8 key_chunk = 0;
2111 : RT_NODE_ITER *node_iter;
2112 : RT_CHILD_PTR node;
2113 249300 : RT_PTR_ALLOC *slot = NULL;
2114 :
2115 : #ifdef RT_SHMEM
2116 : Assert(iter->tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2117 : #endif
2118 :
2119 249300 : node_iter = &(iter->node_iters[level]);
2120 249300 : node = node_iter->node;
2121 :
2122 : Assert(node.local != NULL);
2123 :
2124 249300 : switch ((node.local)->kind)
2125 : {
2126 32826 : case RT_NODE_KIND_4:
2127 : {
2128 32826 : RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
2129 :
2130 32826 : if (node_iter->idx >= n4->base.count)
2131 16122 : return NULL;
2132 :
2133 16704 : slot = &n4->children[node_iter->idx];
2134 16704 : key_chunk = n4->chunks[node_iter->idx];
2135 16704 : node_iter->idx++;
2136 16704 : break;
2137 : }
2138 5778 : case RT_NODE_KIND_16:
2139 : {
2140 5778 : RT_NODE_16 *n16 = (RT_NODE_16 *) (node.local);
2141 :
2142 5778 : if (node_iter->idx >= n16->base.count)
2143 298 : return NULL;
2144 :
2145 5480 : slot = &n16->children[node_iter->idx];
2146 5480 : key_chunk = n16->chunks[node_iter->idx];
2147 5480 : node_iter->idx++;
2148 5480 : break;
2149 : }
2150 188290 : case RT_NODE_KIND_48:
2151 : {
2152 188290 : RT_NODE_48 *n48 = (RT_NODE_48 *) (node.local);
2153 : int chunk;
2154 :
2155 1059552 : for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++)
2156 : {
2157 1055434 : if (RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2158 184172 : break;
2159 : }
2160 :
2161 188290 : if (chunk >= RT_NODE_MAX_SLOTS)
2162 4118 : return NULL;
2163 :
2164 184172 : slot = RT_NODE_48_GET_CHILD(n48, chunk);
2165 :
2166 184172 : key_chunk = chunk;
2167 184172 : node_iter->idx = chunk + 1;
2168 184172 : break;
2169 : }
2170 22406 : case RT_NODE_KIND_256:
2171 : {
2172 22406 : RT_NODE_256 *n256 = (RT_NODE_256 *) (node.local);
2173 : int chunk;
2174 :
2175 28256 : for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++)
2176 : {
2177 28160 : if (RT_NODE_256_IS_CHUNK_USED(n256, chunk))
2178 22310 : break;
2179 : }
2180 :
2181 22406 : if (chunk >= RT_NODE_MAX_SLOTS)
2182 96 : return NULL;
2183 :
2184 22310 : slot = RT_NODE_256_GET_CHILD(n256, chunk);
2185 :
2186 22310 : key_chunk = chunk;
2187 22310 : node_iter->idx = chunk + 1;
2188 22310 : break;
2189 : }
2190 : }
2191 :
2192 : /* Update the key */
2193 228666 : iter->key &= ~(((uint64) RT_CHUNK_MASK) << (level * RT_SPAN));
2194 228666 : iter->key |= (((uint64) key_chunk) << (level * RT_SPAN));
2195 :
2196 228666 : return slot;
2197 : }
2198 :
2199 : /*
2200 : * Return pointer to value and set key_p as long as there is a key. Otherwise
2201 : * return NULL.
2202 : */
2203 : RT_SCOPE RT_VALUE_TYPE *
2204 209654 : RT_ITERATE_NEXT(RT_ITER * iter, uint64 *key_p)
2205 : {
2206 209654 : RT_PTR_ALLOC *slot = NULL;
2207 :
2208 250194 : while (iter->cur_level <= iter->top_level)
2209 : {
2210 : RT_CHILD_PTR node;
2211 :
2212 249300 : slot = RT_NODE_ITERATE_NEXT(iter, iter->cur_level);
2213 :
2214 249300 : if (iter->cur_level == 0 && slot != NULL)
2215 : {
2216 : /* Found a value at the leaf node */
2217 208760 : *key_p = iter->key;
2218 208760 : node.alloc = *slot;
2219 :
2220 208760 : if (RT_CHILDPTR_IS_VALUE(*slot))
2221 208760 : return (RT_VALUE_TYPE *) slot;
2222 : else
2223 : {
2224 18996 : RT_PTR_SET_LOCAL(iter->tree, &node);
2225 18996 : return (RT_VALUE_TYPE *) node.local;
2226 : }
2227 : }
2228 :
2229 40540 : if (slot != NULL)
2230 : {
2231 : /* Found the child slot, move down the tree */
2232 19906 : node.alloc = *slot;
2233 19906 : RT_PTR_SET_LOCAL(iter->tree, &node);
2234 :
2235 19906 : iter->cur_level--;
2236 19906 : iter->node_iters[iter->cur_level].node = node;
2237 19906 : iter->node_iters[iter->cur_level].idx = 0;
2238 : }
2239 : else
2240 : {
2241 : /* Not found the child slot, move up the tree */
2242 20634 : iter->cur_level++;
2243 : }
2244 : }
2245 :
2246 : /* We've visited all nodes, so the iteration finished */
2247 894 : return NULL;
2248 : }
2249 :
2250 : /*
2251 : * Terminate the iteration. The caller is responsible for releasing any locks.
2252 : */
2253 : RT_SCOPE void
2254 956 : RT_END_ITERATE(RT_ITER * iter)
2255 : {
2256 956 : pfree(iter);
2257 956 : }
2258 :
2259 : /***************** DELETION *****************/
2260 :
2261 : #ifdef RT_USE_DELETE
2262 :
2263 : /* Delete the element at "deletepos" */
2264 : static inline void
2265 44058 : RT_SHIFT_ARRAYS_AND_DELETE(uint8 *chunks, RT_PTR_ALLOC * children, int count, int deletepos)
2266 : {
2267 : /*
2268 : * This is basically a memmove, but written in a simple loop for speed on
2269 : * small inputs.
2270 : */
2271 200578 : for (int i = deletepos; i < count - 1; i++)
2272 : {
2273 : /* workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101481 */
2274 : #ifdef __GNUC__
2275 156520 : __asm__("");
2276 : #endif
2277 156520 : chunks[i] = chunks[i + 1];
2278 156520 : children[i] = children[i + 1];
2279 : }
2280 44058 : }
2281 :
2282 : /*
2283 : * Copy both chunk and slot arrays into the right
2284 : * place. The element at "deletepos" is deleted by skipping it.
2285 : */
2286 : static inline void
2287 4162 : RT_COPY_ARRAYS_AND_DELETE(uint8 *dst_chunks, RT_PTR_ALLOC * dst_children,
2288 : uint8 *src_chunks, RT_PTR_ALLOC * src_children,
2289 : int count, int deletepos)
2290 : {
2291 16648 : for (int i = 0; i < count - 1; i++)
2292 : {
2293 : /*
2294 : * use a branch-free computation to skip the index of the deleted
2295 : * element
2296 : */
2297 12486 : int sourceidx = i + (i >= deletepos);
2298 12486 : int destidx = i;
2299 :
2300 12486 : dst_chunks[destidx] = src_chunks[sourceidx];
2301 12486 : dst_children[destidx] = src_children[sourceidx];
2302 : }
2303 4162 : }
2304 :
2305 : /*
2306 : * Note: While all node-growing functions are called to perform an insertion
2307 : * when no more space is available, shrinking is not a hard-and-fast requirement.
2308 : * When shrinking nodes, we generally wait until the count is about 3/4 of
2309 : * the next lower node's fanout. This prevents ping-ponging between different
2310 : * node sizes.
2311 : *
2312 : * Some shrinking functions delete first and then shrink, either because we
2313 : * must or because it's fast and simple that way. Sometimes it's faster to
2314 : * delete while shrinking.
2315 : */
2316 :
2317 : /*
2318 : * Move contents of a node256 to a node48. Any deletion should have happened
2319 : * in the caller.
2320 : */
2321 : static void pg_noinline
2322 32 : RT_SHRINK_NODE_256(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2323 : {
2324 32 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2325 : RT_CHILD_PTR newnode;
2326 : RT_NODE_48 *new48;
2327 32 : int slot_idx = 0;
2328 :
2329 : /* initialize new node */
2330 32 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_48, RT_CLASS_48);
2331 32 : new48 = (RT_NODE_48 *) newnode.local;
2332 :
2333 : /* copy over the entries */
2334 32 : RT_COPY_COMMON(newnode, node);
2335 8224 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2336 : {
2337 8192 : if (RT_NODE_256_IS_CHUNK_USED(n256, i))
2338 : {
2339 1536 : new48->slot_idxs[i] = slot_idx;
2340 1536 : new48->children[slot_idx] = n256->children[i];
2341 1536 : slot_idx++;
2342 : }
2343 : }
2344 :
2345 : /*
2346 : * Since we just copied a dense array, we can fill "isset" using a single
2347 : * store, provided the length of that array is at most the number of bits
2348 : * in a bitmapword.
2349 : */
2350 : Assert(n256->base.count <= BITS_PER_BITMAPWORD);
2351 32 : new48->isset[0] = (bitmapword) (((uint64) 1 << n256->base.count) - 1);
2352 :
2353 : /* free old node and update reference in parent */
2354 32 : *parent_slot = newnode.alloc;
2355 32 : RT_FREE_NODE(tree, node);
2356 32 : }
2357 :
2358 : static inline void
2359 8970 : RT_REMOVE_CHILD_256(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2360 : {
2361 : int shrink_threshold;
2362 8970 : RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2363 8970 : int idx = RT_BM_IDX(chunk);
2364 8970 : int bitnum = RT_BM_BIT(chunk);
2365 :
2366 : /* Mark the slot free for "chunk" */
2367 8970 : n256->isset[idx] &= ~((bitmapword) 1 << bitnum);
2368 :
2369 8970 : n256->base.count--;
2370 :
2371 : /*
2372 : * A full node256 will have a count of zero because of overflow, so we
2373 : * delete first before checking the shrink threshold.
2374 : */
2375 : Assert(n256->base.count > 0);
2376 :
2377 : /* This simplifies RT_SHRINK_NODE_256() */
2378 8970 : shrink_threshold = BITS_PER_BITMAPWORD;
2379 8970 : shrink_threshold = Min(RT_FANOUT_48 / 4 * 3, shrink_threshold);
2380 :
2381 8970 : if (n256->base.count <= shrink_threshold)
2382 32 : RT_SHRINK_NODE_256(tree, parent_slot, node, chunk);
2383 8970 : }
2384 :
2385 : /*
2386 : * Move contents of a node48 to a node16. Any deletion should have happened
2387 : * in the caller.
2388 : */
2389 : static void pg_noinline
2390 4050 : RT_SHRINK_NODE_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2391 : {
2392 4050 : RT_NODE_48 *n48 = (RT_NODE_48 *) (node.local);
2393 : RT_CHILD_PTR newnode;
2394 : RT_NODE_16 *new16;
2395 4050 : int destidx = 0;
2396 :
2397 : /*
2398 : * Initialize new node. For now we skip the larger node16 size class for
2399 : * simplicity.
2400 : */
2401 4050 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_LO);
2402 4050 : new16 = (RT_NODE_16 *) newnode.local;
2403 :
2404 : /* copy over all existing entries */
2405 4050 : RT_COPY_COMMON(newnode, node);
2406 1040850 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2407 : {
2408 1036800 : if (n48->slot_idxs[chunk] != RT_INVALID_SLOT_IDX)
2409 : {
2410 48600 : new16->chunks[destidx] = chunk;
2411 48600 : new16->children[destidx] = n48->children[n48->slot_idxs[chunk]];
2412 48600 : destidx++;
2413 : }
2414 : }
2415 :
2416 : Assert(destidx < new16->base.fanout);
2417 :
2418 4050 : RT_VERIFY_NODE((RT_NODE *) new16);
2419 :
2420 : /* free old node and update reference in parent */
2421 4050 : *parent_slot = newnode.alloc;
2422 4050 : RT_FREE_NODE(tree, node);
2423 4050 : }
2424 :
2425 : static inline void
2426 133452 : RT_REMOVE_CHILD_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
2427 : {
2428 133452 : RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
2429 133452 : int deletepos = n48->slot_idxs[chunk];
2430 :
2431 : /* For now we skip the larger node16 size class for simplicity */
2432 133452 : int shrink_threshold = RT_FANOUT_16_LO / 4 * 3;
2433 : int idx;
2434 : int bitnum;
2435 :
2436 : Assert(deletepos != RT_INVALID_SLOT_IDX);
2437 :
2438 133452 : idx = RT_BM_IDX(deletepos);
2439 133452 : bitnum = RT_BM_BIT(deletepos);
2440 133452 : n48->isset[idx] &= ~((bitmapword) 1 << bitnum);
2441 133452 : n48->slot_idxs[chunk] = RT_INVALID_SLOT_IDX;
2442 :
2443 133452 : n48->base.count--;
2444 :
2445 : /*
2446 : * To keep shrinking simple, do it after deleting, which is fast for
2447 : * node48 anyway.
2448 : */
2449 133452 : if (n48->base.count <= shrink_threshold)
2450 4050 : RT_SHRINK_NODE_48(tree, parent_slot, node, chunk);
2451 133452 : }
2452 :
2453 : /*
2454 : * Move contents of a node16 to a node4, and delete the one at "deletepos".
2455 : * By deleting as we move, we can avoid memmove operations in the new
2456 : * node.
2457 : */
2458 : static void pg_noinline
2459 4162 : RT_SHRINK_NODE_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 deletepos)
2460 : {
2461 4162 : RT_NODE_16 *n16 = (RT_NODE_16 *) (node.local);
2462 : RT_CHILD_PTR newnode;
2463 : RT_NODE_4 *new4;
2464 :
2465 : /* initialize new node */
2466 4162 : newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
2467 4162 : new4 = (RT_NODE_4 *) newnode.local;
2468 :
2469 : /* copy over existing entries, except for the one at "deletepos" */
2470 4162 : RT_COPY_COMMON(newnode, node);
2471 4162 : RT_COPY_ARRAYS_AND_DELETE(new4->chunks, new4->children,
2472 4162 : n16->chunks, n16->children,
2473 4162 : n16->base.count, deletepos);
2474 :
2475 4162 : new4->base.count--;
2476 4162 : RT_VERIFY_NODE((RT_NODE *) new4);
2477 :
2478 : /* free old node and update reference in parent */
2479 4162 : *parent_slot = newnode.alloc;
2480 4162 : RT_FREE_NODE(tree, node);
2481 4162 : }
2482 :
2483 : static inline void
2484 39860 : RT_REMOVE_CHILD_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
2485 : {
2486 39860 : RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
2487 39860 : int deletepos = slot - n16->children;
2488 :
2489 : /*
2490 : * When shrinking to node4, 4 is hard-coded. After shrinking, the new node
2491 : * will end up with 3 elements and 3 is the largest count where linear
2492 : * search is faster than SIMD, at least on x86-64.
2493 : */
2494 39860 : if (n16->base.count <= 4)
2495 : {
2496 4162 : RT_SHRINK_NODE_16(tree, parent_slot, node, deletepos);
2497 4162 : return;
2498 : }
2499 :
2500 : Assert(deletepos >= 0);
2501 : Assert(n16->chunks[deletepos] == chunk);
2502 :
2503 35698 : RT_SHIFT_ARRAYS_AND_DELETE(n16->chunks, n16->children,
2504 35698 : n16->base.count, deletepos);
2505 35698 : n16->base.count--;
2506 : }
2507 :
2508 : static inline void
2509 39862 : RT_REMOVE_CHILD_4(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
2510 : {
2511 39862 : RT_NODE_4 *n4 = (RT_NODE_4 *) node.local;
2512 :
2513 39862 : if (n4->base.count == 1)
2514 : {
2515 : Assert(n4->chunks[0] == chunk);
2516 :
2517 : /*
2518 : * If we're deleting the last entry from the root child node don't
2519 : * free it, but mark both the tree and the root child node empty. That
2520 : * way, RT_SET can assume it exists.
2521 : */
2522 31502 : if (parent_slot == &tree->ctl->root)
2523 : {
2524 62 : n4->base.count = 0;
2525 62 : tree->ctl->start_shift = 0;
2526 62 : tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(0);
2527 : }
2528 : else
2529 : {
2530 : /*
2531 : * Deleting last entry, so just free the entire node.
2532 : * RT_DELETE_RECURSIVE has already freed the value and lower-level
2533 : * children.
2534 : */
2535 31440 : RT_FREE_NODE(tree, node);
2536 :
2537 : /*
2538 : * Also null out the parent's slot -- this tells the next higher
2539 : * level to delete its child pointer
2540 : */
2541 31440 : *parent_slot = RT_INVALID_PTR_ALLOC;
2542 : }
2543 : }
2544 : else
2545 : {
2546 8360 : int deletepos = slot - n4->children;
2547 :
2548 : Assert(deletepos >= 0);
2549 : Assert(n4->chunks[deletepos] == chunk);
2550 :
2551 8360 : RT_SHIFT_ARRAYS_AND_DELETE(n4->chunks, n4->children,
2552 8360 : n4->base.count, deletepos);
2553 :
2554 8360 : n4->base.count--;
2555 : }
2556 39862 : }
2557 :
2558 : /*
2559 : * Delete the child pointer corresponding to "key" in the given node.
2560 : */
2561 : static inline void
2562 222144 : RT_NODE_DELETE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
2563 : {
2564 222144 : switch ((node.local)->kind)
2565 : {
2566 39862 : case RT_NODE_KIND_4:
2567 : {
2568 39862 : RT_REMOVE_CHILD_4(tree, parent_slot, node, chunk, slot);
2569 39862 : return;
2570 : }
2571 39860 : case RT_NODE_KIND_16:
2572 : {
2573 39860 : RT_REMOVE_CHILD_16(tree, parent_slot, node, chunk, slot);
2574 39860 : return;
2575 : }
2576 133452 : case RT_NODE_KIND_48:
2577 : {
2578 133452 : RT_REMOVE_CHILD_48(tree, parent_slot, node, chunk);
2579 133452 : return;
2580 : }
2581 8970 : case RT_NODE_KIND_256:
2582 : {
2583 8970 : RT_REMOVE_CHILD_256(tree, parent_slot, node, chunk);
2584 8970 : return;
2585 : }
2586 0 : default:
2587 0 : pg_unreachable();
2588 : }
2589 : }
2590 :
2591 : /* workhorse for RT_DELETE */
2592 : static bool
2593 830348 : RT_DELETE_RECURSIVE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift)
2594 : {
2595 : RT_PTR_ALLOC *slot;
2596 : RT_CHILD_PTR node;
2597 830348 : uint8 chunk = RT_GET_KEY_CHUNK(key, shift);
2598 :
2599 830348 : node.alloc = *parent_slot;
2600 830348 : RT_PTR_SET_LOCAL(tree, &node);
2601 830348 : slot = RT_NODE_SEARCH(node.local, chunk);
2602 :
2603 830348 : if (slot == NULL)
2604 18010 : return false;
2605 :
2606 812338 : if (shift == 0)
2607 : {
2608 190704 : if (!RT_CHILDPTR_IS_VALUE(*slot))
2609 0 : RT_FREE_LEAF(tree, *slot);
2610 :
2611 190704 : RT_NODE_DELETE(tree, parent_slot, node, chunk, slot);
2612 190704 : return true;
2613 : }
2614 : else
2615 : {
2616 : bool deleted;
2617 :
2618 621634 : deleted = RT_DELETE_RECURSIVE(tree, slot, key, shift - RT_SPAN);
2619 :
2620 : /* Child node was freed, so delete its slot now */
2621 621634 : if (*slot == RT_INVALID_PTR_ALLOC)
2622 : {
2623 : Assert(deleted);
2624 31440 : RT_NODE_DELETE(tree, parent_slot, node, chunk, slot);
2625 : }
2626 :
2627 621634 : return deleted;
2628 : }
2629 : }
2630 :
2631 : /*
2632 : * Delete the given key from the radix tree. If the key is found delete it
2633 : * and return true, otherwise do nothing and return false.
2634 : *
2635 : * Taking a lock in exclusive mode is the caller's responsibility.
2636 : */
2637 : RT_SCOPE bool
2638 208714 : RT_DELETE(RT_RADIX_TREE * tree, uint64 key)
2639 : {
2640 : bool deleted;
2641 :
2642 : #ifdef RT_SHMEM
2643 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2644 : #endif
2645 :
2646 208714 : if (key > tree->ctl->max_val)
2647 0 : return false;
2648 :
2649 : Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2650 208714 : deleted = RT_DELETE_RECURSIVE(tree, &tree->ctl->root,
2651 208714 : key, tree->ctl->start_shift);
2652 :
2653 : /* Found the key to delete. Update the statistics */
2654 208714 : if (deleted)
2655 : {
2656 190704 : tree->ctl->num_keys--;
2657 : Assert(tree->ctl->num_keys >= 0);
2658 : }
2659 :
2660 208714 : return deleted;
2661 : }
2662 :
2663 : #endif /* RT_USE_DELETE */
2664 :
2665 : /***************** UTILITY FUNCTIONS *****************/
2666 :
2667 : /*
2668 : * Return the statistics of the amount of memory used by the radix tree.
2669 : *
2670 : * Since dsa_get_total_size() does appropriate locking, the caller doesn't
2671 : * need to take a lock.
2672 : */
2673 : RT_SCOPE uint64
2674 112590 : RT_MEMORY_USAGE(RT_RADIX_TREE * tree)
2675 : {
2676 112590 : size_t total = 0;
2677 :
2678 : #ifdef RT_SHMEM
2679 : Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2680 734 : total = dsa_get_total_size(tree->dsa);
2681 : #else
2682 111856 : total = MemoryContextMemAllocated(tree->context, true);
2683 : #endif
2684 :
2685 112590 : return total;
2686 : }
2687 :
2688 : /*
2689 : * Perform some sanity checks on the given node.
2690 : */
2691 : static void
2692 221402 : RT_VERIFY_NODE(RT_NODE * node)
2693 : {
2694 : #ifdef USE_ASSERT_CHECKING
2695 :
2696 : switch (node->kind)
2697 : {
2698 : case RT_NODE_KIND_4:
2699 : {
2700 : RT_NODE_4 *n4 = (RT_NODE_4 *) node;
2701 :
2702 : /* RT_DUMP_NODE(node); */
2703 :
2704 : for (int i = 1; i < n4->base.count; i++)
2705 : Assert(n4->chunks[i - 1] < n4->chunks[i]);
2706 :
2707 : break;
2708 : }
2709 : case RT_NODE_KIND_16:
2710 : {
2711 : RT_NODE_16 *n16 = (RT_NODE_16 *) node;
2712 :
2713 : /* RT_DUMP_NODE(node); */
2714 :
2715 : for (int i = 1; i < n16->base.count; i++)
2716 : Assert(n16->chunks[i - 1] < n16->chunks[i]);
2717 :
2718 : break;
2719 : }
2720 : case RT_NODE_KIND_48:
2721 : {
2722 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
2723 : int cnt = 0;
2724 :
2725 : /* RT_DUMP_NODE(node); */
2726 :
2727 : for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2728 : {
2729 : uint8 slot = n48->slot_idxs[i];
2730 : int idx = RT_BM_IDX(slot);
2731 : int bitnum = RT_BM_BIT(slot);
2732 :
2733 : if (!RT_NODE_48_IS_CHUNK_USED(n48, i))
2734 : continue;
2735 :
2736 : /* Check if the corresponding slot is used */
2737 : Assert(slot < node->fanout);
2738 : Assert((n48->isset[idx] & ((bitmapword) 1 << bitnum)) != 0);
2739 :
2740 : cnt++;
2741 : }
2742 :
2743 : Assert(n48->base.count == cnt);
2744 :
2745 : break;
2746 : }
2747 : case RT_NODE_KIND_256:
2748 : {
2749 : RT_NODE_256 *n256 = (RT_NODE_256 *) node;
2750 : int cnt = 0;
2751 :
2752 : /* RT_DUMP_NODE(node); */
2753 :
2754 : for (int i = 0; i < RT_BM_IDX(RT_NODE_MAX_SLOTS); i++)
2755 : cnt += bmw_popcount(n256->isset[i]);
2756 :
2757 : /*
2758 : * Check if the number of used chunk matches, accounting for
2759 : * overflow
2760 : */
2761 : if (cnt == RT_FANOUT_256)
2762 : Assert(n256->base.count == 0);
2763 : else
2764 : Assert(n256->base.count == cnt);
2765 :
2766 : break;
2767 : }
2768 : }
2769 : #endif
2770 221402 : }
2771 :
2772 : /***************** DEBUG FUNCTIONS *****************/
2773 :
2774 : #ifdef RT_DEBUG
2775 :
2776 : /*
2777 : * Print out tree stats, some of which are only collected in debugging builds.
2778 : */
2779 : RT_SCOPE void
2780 122 : RT_STATS(RT_RADIX_TREE * tree)
2781 : {
2782 122 : fprintf(stderr, "max_val = " UINT64_FORMAT "\n", tree->ctl->max_val);
2783 122 : fprintf(stderr, "num_keys = %lld\n", (long long) tree->ctl->num_keys);
2784 :
2785 : #ifdef RT_SHMEM
2786 : fprintf(stderr, "handle = " DSA_POINTER_FORMAT "\n", tree->ctl->handle);
2787 : #endif
2788 :
2789 122 : fprintf(stderr, "height = %d", tree->ctl->start_shift / RT_SPAN);
2790 :
2791 732 : for (int i = 0; i < RT_NUM_SIZE_CLASSES; i++)
2792 : {
2793 610 : RT_SIZE_CLASS_ELEM size_class = RT_SIZE_CLASS_INFO[i];
2794 :
2795 610 : fprintf(stderr, ", n%d = %lld", size_class.fanout, (long long) tree->ctl->num_nodes[i]);
2796 : }
2797 :
2798 122 : fprintf(stderr, ", leaves = %lld", (long long) tree->ctl->num_leaves);
2799 :
2800 122 : fprintf(stderr, "\n");
2801 122 : }
2802 :
2803 : /*
2804 : * Print out debugging information about the given node.
2805 : */
2806 : static void
2807 : pg_attribute_unused()
2808 0 : RT_DUMP_NODE(RT_NODE * node)
2809 : {
2810 : #ifdef RT_SHMEM
2811 : #define RT_CHILD_PTR_FORMAT DSA_POINTER_FORMAT
2812 : #else
2813 : #define RT_CHILD_PTR_FORMAT "%p"
2814 : #endif
2815 :
2816 0 : fprintf(stderr, "kind %d, fanout %d, count %u\n",
2817 0 : (node->kind == RT_NODE_KIND_4) ? 4 :
2818 0 : (node->kind == RT_NODE_KIND_16) ? 16 :
2819 0 : (node->kind == RT_NODE_KIND_48) ? 48 : 256,
2820 0 : node->fanout == 0 ? 256 : node->fanout,
2821 0 : node->count == 0 ? 256 : node->count);
2822 :
2823 0 : switch (node->kind)
2824 : {
2825 0 : case RT_NODE_KIND_4:
2826 : {
2827 0 : RT_NODE_4 *n4 = (RT_NODE_4 *) node;
2828 :
2829 0 : fprintf(stderr, "chunks and slots:\n");
2830 0 : for (int i = 0; i < n4->base.count; i++)
2831 : {
2832 0 : fprintf(stderr, " [%d] chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2833 0 : i, n4->chunks[i], n4->children[i]);
2834 : }
2835 :
2836 0 : break;
2837 : }
2838 0 : case RT_NODE_KIND_16:
2839 : {
2840 0 : RT_NODE_16 *n16 = (RT_NODE_16 *) node;
2841 :
2842 0 : fprintf(stderr, "chunks and slots:\n");
2843 0 : for (int i = 0; i < n16->base.count; i++)
2844 : {
2845 0 : fprintf(stderr, " [%d] chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2846 0 : i, n16->chunks[i], n16->children[i]);
2847 : }
2848 0 : break;
2849 : }
2850 0 : case RT_NODE_KIND_48:
2851 : {
2852 0 : RT_NODE_48 *n48 = (RT_NODE_48 *) node;
2853 0 : char *sep = "";
2854 :
2855 0 : fprintf(stderr, "slot_idxs: \n");
2856 0 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2857 : {
2858 0 : if (!RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2859 0 : continue;
2860 :
2861 0 : fprintf(stderr, " idx[%d] = %d\n",
2862 0 : chunk, n48->slot_idxs[chunk]);
2863 : }
2864 :
2865 0 : fprintf(stderr, "isset-bitmap: ");
2866 0 : for (int i = 0; i < (RT_FANOUT_48_MAX / BITS_PER_BYTE); i++)
2867 : {
2868 0 : fprintf(stderr, "%s%x", sep, ((uint8 *) n48->isset)[i]);
2869 0 : sep = " ";
2870 : }
2871 0 : fprintf(stderr, "\n");
2872 :
2873 0 : fprintf(stderr, "chunks and slots:\n");
2874 0 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2875 : {
2876 0 : if (!RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2877 0 : continue;
2878 :
2879 0 : fprintf(stderr, " chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2880 : chunk,
2881 0 : *RT_NODE_48_GET_CHILD(n48, chunk));
2882 : }
2883 0 : break;
2884 : }
2885 0 : case RT_NODE_KIND_256:
2886 : {
2887 0 : RT_NODE_256 *n256 = (RT_NODE_256 *) node;
2888 0 : char *sep = "";
2889 :
2890 0 : fprintf(stderr, "isset-bitmap: ");
2891 0 : for (int i = 0; i < (RT_FANOUT_256 / BITS_PER_BYTE); i++)
2892 : {
2893 0 : fprintf(stderr, "%s%x", sep, ((uint8 *) n256->isset)[i]);
2894 0 : sep = " ";
2895 : }
2896 0 : fprintf(stderr, "\n");
2897 :
2898 0 : fprintf(stderr, "chunks and slots:\n");
2899 0 : for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2900 : {
2901 0 : if (!RT_NODE_256_IS_CHUNK_USED(n256, chunk))
2902 0 : continue;
2903 :
2904 0 : fprintf(stderr, " chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2905 : chunk,
2906 0 : *RT_NODE_256_GET_CHILD(n256, chunk));
2907 : }
2908 0 : break;
2909 : }
2910 : }
2911 0 : }
2912 : #endif /* RT_DEBUG */
2913 :
2914 : #endif /* RT_DEFINE */
2915 :
2916 :
2917 : /* undefine external parameters, so next radix tree can be defined */
2918 : #undef RT_PREFIX
2919 : #undef RT_SCOPE
2920 : #undef RT_DECLARE
2921 : #undef RT_DEFINE
2922 : #undef RT_VALUE_TYPE
2923 : #undef RT_VARLEN_VALUE_SIZE
2924 : #undef RT_RUNTIME_EMBEDDABLE_VALUE
2925 : #undef RT_SHMEM
2926 : #undef RT_USE_DELETE
2927 : #undef RT_DEBUG
2928 :
2929 : /* locally declared macros */
2930 : #undef RT_MAKE_PREFIX
2931 : #undef RT_MAKE_NAME
2932 : #undef RT_MAKE_NAME_
2933 : #undef RT_STR
2934 : #undef RT_STR_
2935 : #undef RT_SPAN
2936 : #undef RT_NODE_MAX_SLOTS
2937 : #undef RT_CHUNK_MASK
2938 : #undef RT_MAX_SHIFT
2939 : #undef RT_MAX_LEVEL
2940 : #undef RT_GET_KEY_CHUNK
2941 : #undef RT_BM_IDX
2942 : #undef RT_BM_BIT
2943 : #undef RT_NODE_MUST_GROW
2944 : #undef RT_NODE_KIND_COUNT
2945 : #undef RT_NUM_SIZE_CLASSES
2946 : #undef RT_INVALID_SLOT_IDX
2947 : #undef RT_SLAB_BLOCK_SIZE
2948 : #undef RT_RADIX_TREE_MAGIC
2949 : #undef RT_CHILD_PTR_FORMAT
2950 :
2951 : /* type declarations */
2952 : #undef RT_RADIX_TREE
2953 : #undef RT_RADIX_TREE_CONTROL
2954 : #undef RT_CHILD_PTR
2955 : #undef RT_PTR_ALLOC
2956 : #undef RT_INVALID_PTR_ALLOC
2957 : #undef RT_HANDLE
2958 : #undef RT_ITER
2959 : #undef RT_NODE
2960 : #undef RT_NODE_ITER
2961 : #undef RT_NODE_KIND_4
2962 : #undef RT_NODE_KIND_16
2963 : #undef RT_NODE_KIND_48
2964 : #undef RT_NODE_KIND_256
2965 : #undef RT_NODE_4
2966 : #undef RT_NODE_16
2967 : #undef RT_NODE_48
2968 : #undef RT_NODE_256
2969 : #undef RT_SIZE_CLASS
2970 : #undef RT_SIZE_CLASS_ELEM
2971 : #undef RT_SIZE_CLASS_INFO
2972 : #undef RT_CLASS_4
2973 : #undef RT_CLASS_16_LO
2974 : #undef RT_CLASS_16_HI
2975 : #undef RT_CLASS_48
2976 : #undef RT_CLASS_256
2977 : #undef RT_FANOUT_4
2978 : #undef RT_FANOUT_4_MAX
2979 : #undef RT_FANOUT_16_LO
2980 : #undef RT_FANOUT_16_HI
2981 : #undef RT_FANOUT_16_MAX
2982 : #undef RT_FANOUT_48
2983 : #undef RT_FANOUT_48_MAX
2984 : #undef RT_FANOUT_256
2985 :
2986 : /* function declarations */
2987 : #undef RT_CREATE
2988 : #undef RT_FREE
2989 : #undef RT_ATTACH
2990 : #undef RT_DETACH
2991 : #undef RT_LOCK_EXCLUSIVE
2992 : #undef RT_LOCK_SHARE
2993 : #undef RT_UNLOCK
2994 : #undef RT_GET_HANDLE
2995 : #undef RT_FIND
2996 : #undef RT_SET
2997 : #undef RT_BEGIN_ITERATE
2998 : #undef RT_ITERATE_NEXT
2999 : #undef RT_END_ITERATE
3000 : #undef RT_DELETE
3001 : #undef RT_MEMORY_USAGE
3002 : #undef RT_DUMP_NODE
3003 : #undef RT_STATS
3004 :
3005 : /* internal helper functions */
3006 : #undef RT_GET_VALUE_SIZE
3007 : #undef RT_VALUE_IS_EMBEDDABLE
3008 : #undef RT_CHILDPTR_IS_VALUE
3009 : #undef RT_GET_SLOT_RECURSIVE
3010 : #undef RT_DELETE_RECURSIVE
3011 : #undef RT_ALLOC_NODE
3012 : #undef RT_ALLOC_LEAF
3013 : #undef RT_FREE_NODE
3014 : #undef RT_FREE_LEAF
3015 : #undef RT_FREE_RECURSE
3016 : #undef RT_EXTEND_UP
3017 : #undef RT_EXTEND_DOWN
3018 : #undef RT_COPY_COMMON
3019 : #undef RT_PTR_SET_LOCAL
3020 : #undef RT_PTR_ALLOC_IS_VALID
3021 : #undef RT_NODE_16_SEARCH_EQ
3022 : #undef RT_NODE_4_GET_INSERTPOS
3023 : #undef RT_NODE_16_GET_INSERTPOS
3024 : #undef RT_SHIFT_ARRAYS_FOR_INSERT
3025 : #undef RT_SHIFT_ARRAYS_AND_DELETE
3026 : #undef RT_COPY_ARRAYS_FOR_INSERT
3027 : #undef RT_COPY_ARRAYS_AND_DELETE
3028 : #undef RT_NODE_48_IS_CHUNK_USED
3029 : #undef RT_NODE_48_GET_CHILD
3030 : #undef RT_NODE_256_IS_CHUNK_USED
3031 : #undef RT_NODE_256_GET_CHILD
3032 : #undef RT_KEY_GET_SHIFT
3033 : #undef RT_SHIFT_GET_MAX_VAL
3034 : #undef RT_NODE_SEARCH
3035 : #undef RT_ADD_CHILD_4
3036 : #undef RT_ADD_CHILD_16
3037 : #undef RT_ADD_CHILD_48
3038 : #undef RT_ADD_CHILD_256
3039 : #undef RT_GROW_NODE_4
3040 : #undef RT_GROW_NODE_16
3041 : #undef RT_GROW_NODE_48
3042 : #undef RT_REMOVE_CHILD_4
3043 : #undef RT_REMOVE_CHILD_16
3044 : #undef RT_REMOVE_CHILD_48
3045 : #undef RT_REMOVE_CHILD_256
3046 : #undef RT_SHRINK_NODE_16
3047 : #undef RT_SHRINK_NODE_48
3048 : #undef RT_SHRINK_NODE_256
3049 : #undef RT_NODE_DELETE
3050 : #undef RT_NODE_INSERT
3051 : #undef RT_NODE_ITERATE_NEXT
3052 : #undef RT_VERIFY_NODE
|