Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * integerset.c
4 : * Data structure to hold a large set of 64-bit integers efficiently
5 : *
6 : * IntegerSet provides an in-memory data structure to hold a set of
7 : * arbitrary 64-bit integers. Internally, the values are stored in a
8 : * B-tree, with a special packed representation at the leaf level using
9 : * the Simple-8b algorithm, which can pack clusters of nearby values
10 : * very tightly.
11 : *
12 : * Memory consumption depends on the number of values stored, but also
13 : * on how far the values are from each other. In the best case, with
14 : * long runs of consecutive integers, memory consumption can be as low as
15 : * 0.1 bytes per integer. In the worst case, if integers are more than
16 : * 2^32 apart, it uses about 8 bytes per integer. In typical use, the
17 : * consumption per integer is somewhere between those extremes, depending
18 : * on the range of integers stored, and how "clustered" they are.
19 : *
20 : *
21 : * Interface
22 : * ---------
23 : *
24 : * intset_create - Create a new, empty set
25 : * intset_add_member - Add an integer to the set
26 : * intset_is_member - Test if an integer is in the set
27 : * intset_begin_iterate - Begin iterating through all integers in set
28 : * intset_iterate_next - Return next set member, if any
29 : *
30 : * intset_create() creates the set in the current memory context. Subsequent
31 : * operations that add to the data structure will continue to allocate from
32 : * that same context, even if it's not current anymore.
33 : *
34 : * Note that there is no function to free an integer set. If you need to do
35 : * that, create a dedicated memory context to hold it, and destroy the memory
36 : * context instead.
37 : *
38 : *
39 : * Limitations
40 : * -----------
41 : *
42 : * - Values must be added in order. (Random insertions would require
43 : * splitting nodes, which hasn't been implemented.)
44 : *
45 : * - Values cannot be added while iteration is in progress.
46 : *
47 : * - No support for removing values.
48 : *
49 : * None of these limitations are fundamental to the data structure, so they
50 : * could be lifted if needed, by writing some new code. But the current
51 : * users of this facility don't need them.
52 : *
53 : *
54 : * References
55 : * ----------
56 : *
57 : * Simple-8b encoding is based on:
58 : *
59 : * Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words,
60 : * Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
61 : * (https://doi.org/10.1002/spe.948)
62 : *
63 : *
64 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
65 : * Portions Copyright (c) 1994, Regents of the University of California
66 : *
67 : * IDENTIFICATION
68 : * src/backend/lib/integerset.c
69 : *
70 : *-------------------------------------------------------------------------
71 : */
72 : #include "postgres.h"
73 :
74 : #include "lib/integerset.h"
75 : #include "utils/memutils.h"
76 :
77 :
78 : /*
79 : * Maximum number of integers that can be encoded in a single Simple-8b
80 : * codeword. (Defined here before anything else, so that we can size arrays
81 : * using this.)
82 : */
83 : #define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240
84 :
85 : /*
86 : * Parameters for shape of the in-memory B-tree.
87 : *
88 : * These set the size of each internal and leaf node. They don't necessarily
89 : * need to be the same, because the tree is just an in-memory structure.
90 : * With the default 64, each node is about 1 kb.
91 : *
92 : * If you change these, you must recalculate MAX_TREE_LEVELS, too!
93 : */
94 : #define MAX_INTERNAL_ITEMS 64
95 : #define MAX_LEAF_ITEMS 64
96 :
97 : /*
98 : * Maximum height of the tree.
99 : *
100 : * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree. The
101 : * theoretical maximum number of items that we can store in a set is 2^64,
102 : * so MAX_TREE_LEVELS should be set so that:
103 : *
104 : * MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
105 : *
106 : * In practice, we'll need far fewer levels, because you will run out of
107 : * memory long before reaching that number, but let's be conservative.
108 : */
109 : #define MAX_TREE_LEVELS 11
110 :
111 : /*
112 : * Node structures, for the in-memory B-tree.
113 : *
114 : * An internal node holds a number of downlink pointers to leaf nodes, or
115 : * to internal nodes on a lower level. For each downlink, the key value
116 : * corresponding to the lower level node is stored in a sorted array. The
117 : * stored key values are low keys. In other words, if the downlink has value
118 : * X, then all items stored on that child are >= X.
119 : *
120 : * Each leaf node holds a number of "items", with a varying number of
121 : * integers packed into each item. Each item consists of two 64-bit words:
122 : * The first word holds the first integer stored in the item, in plain format.
123 : * The second word contains between 0 and 240 more integers, packed using
124 : * Simple-8b encoding. By storing the first integer in plain, unpacked,
125 : * format, we can use binary search to quickly find an item that holds (or
126 : * would hold) a particular integer. And by storing the rest in packed form,
127 : * we still get pretty good memory density, if there are clusters of integers
128 : * with similar values.
129 : *
130 : * Each leaf node also has a pointer to the next leaf node, so that the leaf
131 : * nodes can be easily walked from beginning to end when iterating.
132 : */
133 : typedef struct intset_node intset_node;
134 : typedef struct intset_leaf_node intset_leaf_node;
135 : typedef struct intset_internal_node intset_internal_node;
136 :
137 : /* Common structure of both leaf and internal nodes. */
138 : struct intset_node
139 : {
140 : uint16 level; /* tree level of this node */
141 : uint16 num_items; /* number of items in this node */
142 : };
143 :
144 : /* Internal node */
145 : struct intset_internal_node
146 : {
147 : /* common header, must match intset_node */
148 : uint16 level; /* >= 1 on internal nodes */
149 : uint16 num_items;
150 :
151 : /*
152 : * 'values' is an array of key values, and 'downlinks' are pointers to
153 : * lower-level nodes, corresponding to the key values.
154 : */
155 : uint64 values[MAX_INTERNAL_ITEMS];
156 : intset_node *downlinks[MAX_INTERNAL_ITEMS];
157 : };
158 :
159 : /* Leaf node */
160 : typedef struct
161 : {
162 : uint64 first; /* first integer in this item */
163 : uint64 codeword; /* simple8b encoded differences from 'first' */
164 : } leaf_item;
165 :
166 : #define MAX_VALUES_PER_LEAF_ITEM (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
167 :
168 : struct intset_leaf_node
169 : {
170 : /* common header, must match intset_node */
171 : uint16 level; /* 0 on leafs */
172 : uint16 num_items;
173 :
174 : intset_leaf_node *next; /* right sibling, if any */
175 :
176 : leaf_item items[MAX_LEAF_ITEMS];
177 : };
178 :
179 : /*
180 : * We buffer insertions in a simple array, before packing and inserting them
181 : * into the B-tree. MAX_BUFFERED_VALUES sets the size of the buffer. The
182 : * encoder assumes that it is large enough that we can always fill a leaf
183 : * item with buffered new items. In other words, MAX_BUFFERED_VALUES must be
184 : * larger than MAX_VALUES_PER_LEAF_ITEM. For efficiency, make it much larger.
185 : */
186 : #define MAX_BUFFERED_VALUES (MAX_VALUES_PER_LEAF_ITEM * 2)
187 :
188 : /*
189 : * IntegerSet is the top-level object representing the set.
190 : *
191 : * The integers are stored in an in-memory B-tree structure, plus an array
192 : * for newly-added integers. IntegerSet also tracks information about memory
193 : * usage, as well as the current position when iterating the set with
194 : * intset_begin_iterate / intset_iterate_next.
195 : */
196 : struct IntegerSet
197 : {
198 : /*
199 : * 'context' is the memory context holding this integer set and all its
200 : * tree nodes.
201 : *
202 : * 'mem_used' tracks the amount of memory used. We don't do anything with
203 : * it in integerset.c itself, but the callers can ask for it with
204 : * intset_memory_usage().
205 : */
206 : MemoryContext context;
207 : uint64 mem_used;
208 :
209 : uint64 num_entries; /* total # of values in the set */
210 : uint64 highest_value; /* highest value stored in this set */
211 :
212 : /*
213 : * B-tree to hold the packed values.
214 : *
215 : * 'rightmost_nodes' hold pointers to the rightmost node on each level.
216 : * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
217 : * parent, and so forth, all the way up to the root. These are needed when
218 : * adding new values. (Currently, we require that new values are added at
219 : * the end.)
220 : */
221 : int num_levels; /* height of the tree */
222 : intset_node *root; /* root node */
223 : intset_node *rightmost_nodes[MAX_TREE_LEVELS];
224 : intset_leaf_node *leftmost_leaf; /* leftmost leaf node */
225 :
226 : /*
227 : * Holding area for new items that haven't been inserted to the tree yet.
228 : */
229 : uint64 buffered_values[MAX_BUFFERED_VALUES];
230 : int num_buffered_values;
231 :
232 : /*
233 : * Iterator support.
234 : *
235 : * 'iter_values' is an array of integers ready to be returned to the
236 : * caller; 'iter_num_values' is the length of that array, and
237 : * 'iter_valueno' is the next index. 'iter_node' and 'iter_itemno' point
238 : * to the leaf node, and item within the leaf node, to get the next batch
239 : * of values from.
240 : *
241 : * Normally, 'iter_values' points to 'iter_values_buf', which holds items
242 : * decoded from a leaf item. But after we have scanned the whole B-tree,
243 : * we iterate through all the unbuffered values, too, by pointing
244 : * iter_values to 'buffered_values'.
245 : */
246 : bool iter_active; /* is iteration in progress? */
247 :
248 : const uint64 *iter_values;
249 : int iter_num_values; /* number of elements in 'iter_values' */
250 : int iter_valueno; /* next index into 'iter_values' */
251 :
252 : intset_leaf_node *iter_node; /* current leaf node */
253 : int iter_itemno; /* next item in 'iter_node' to decode */
254 :
255 : uint64 iter_values_buf[MAX_VALUES_PER_LEAF_ITEM];
256 : };
257 :
258 : /*
259 : * Prototypes for internal functions.
260 : */
261 : static void intset_update_upper(IntegerSet *intset, int level,
262 : intset_node *child, uint64 child_key);
263 : static void intset_flush_buffered_values(IntegerSet *intset);
264 :
265 : static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems,
266 : bool nextkey);
267 : static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems,
268 : bool nextkey);
269 :
270 : static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base);
271 : static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
272 : static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);
273 :
274 :
275 : /*
276 : * Create a new, initially empty, integer set.
277 : *
278 : * The integer set is created in the current memory context.
279 : * We will do all subsequent allocations in the same context, too, regardless
280 : * of which memory context is current when new integers are added to the set.
281 : */
282 : IntegerSet *
283 208 : intset_create(void)
284 : {
285 : IntegerSet *intset;
286 :
287 208 : intset = (IntegerSet *) palloc(sizeof(IntegerSet));
288 208 : intset->context = CurrentMemoryContext;
289 208 : intset->mem_used = GetMemoryChunkSpace(intset);
290 :
291 208 : intset->num_entries = 0;
292 208 : intset->highest_value = 0;
293 :
294 208 : intset->num_levels = 0;
295 208 : intset->root = NULL;
296 208 : memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
297 208 : intset->leftmost_leaf = NULL;
298 :
299 208 : intset->num_buffered_values = 0;
300 :
301 208 : intset->iter_active = false;
302 208 : intset->iter_node = NULL;
303 208 : intset->iter_itemno = 0;
304 208 : intset->iter_valueno = 0;
305 208 : intset->iter_num_values = 0;
306 208 : intset->iter_values = NULL;
307 :
308 208 : return intset;
309 : }
310 :
311 : /*
312 : * Allocate a new node.
313 : */
314 : static intset_internal_node *
315 6754 : intset_new_internal_node(IntegerSet *intset)
316 : {
317 : intset_internal_node *n;
318 :
319 6754 : n = (intset_internal_node *) MemoryContextAlloc(intset->context,
320 : sizeof(intset_internal_node));
321 6754 : intset->mem_used += GetMemoryChunkSpace(n);
322 :
323 6754 : n->level = 0; /* caller must set */
324 6754 : n->num_items = 0;
325 :
326 6754 : return n;
327 : }
328 :
329 : static intset_leaf_node *
330 423356 : intset_new_leaf_node(IntegerSet *intset)
331 : {
332 : intset_leaf_node *n;
333 :
334 423356 : n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
335 : sizeof(intset_leaf_node));
336 423356 : intset->mem_used += GetMemoryChunkSpace(n);
337 :
338 423356 : n->level = 0;
339 423356 : n->num_items = 0;
340 423356 : n->next = NULL;
341 :
342 423356 : return n;
343 : }
344 :
345 : /*
346 : * Return the number of entries in the integer set.
347 : */
348 : uint64
349 120 : intset_num_entries(IntegerSet *intset)
350 : {
351 120 : return intset->num_entries;
352 : }
353 :
354 : /*
355 : * Return the amount of memory used by the integer set.
356 : */
357 : uint64
358 10 : intset_memory_usage(IntegerSet *intset)
359 : {
360 10 : return intset->mem_used;
361 : }
362 :
363 : /*
364 : * Add a value to the set.
365 : *
366 : * Values must be added in order.
367 : */
368 : void
369 326008290 : intset_add_member(IntegerSet *intset, uint64 x)
370 : {
371 326008290 : if (intset->iter_active)
372 0 : elog(ERROR, "cannot add new values to integer set while iteration is in progress");
373 :
374 326008290 : if (x <= intset->highest_value && intset->num_entries > 0)
375 0 : elog(ERROR, "cannot add value to integer set out of order");
376 :
377 326008290 : if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
378 : {
379 : /* Time to flush our buffer */
380 1134006 : intset_flush_buffered_values(intset);
381 : Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
382 : }
383 :
384 : /* Add it to the buffer of newly-added values */
385 326008290 : intset->buffered_values[intset->num_buffered_values] = x;
386 326008290 : intset->num_buffered_values++;
387 326008290 : intset->num_entries++;
388 326008290 : intset->highest_value = x;
389 326008290 : }
390 :
391 : /*
392 : * Take a batch of buffered values, and pack them into the B-tree.
393 : */
394 : static void
395 1134006 : intset_flush_buffered_values(IntegerSet *intset)
396 : {
397 1134006 : uint64 *values = intset->buffered_values;
398 1134006 : uint64 num_values = intset->num_buffered_values;
399 1134006 : int num_packed = 0;
400 : intset_leaf_node *leaf;
401 :
402 1134006 : leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
403 :
404 : /*
405 : * If the tree is completely empty, create the first leaf page, which is
406 : * also the root.
407 : */
408 1134006 : if (leaf == NULL)
409 : {
410 : /*
411 : * This is the very first item in the set.
412 : *
413 : * Allocate root node. It's also a leaf.
414 : */
415 28 : leaf = intset_new_leaf_node(intset);
416 :
417 28 : intset->root = (intset_node *) leaf;
418 28 : intset->leftmost_leaf = leaf;
419 28 : intset->rightmost_nodes[0] = (intset_node *) leaf;
420 28 : intset->num_levels = 1;
421 : }
422 :
423 : /*
424 : * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
425 : * stop. In most cases, we cannot encode that many values in a single
426 : * value, but this way, the encoder doesn't have to worry about running
427 : * out of input.
428 : */
429 28227806 : while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
430 : {
431 : leaf_item item;
432 : int num_encoded;
433 :
434 : /*
435 : * Construct the next leaf item, packing as many buffered values as
436 : * possible.
437 : */
438 27093800 : item.first = values[num_packed];
439 27093800 : item.codeword = simple8b_encode(&values[num_packed + 1],
440 : &num_encoded,
441 : item.first);
442 :
443 : /*
444 : * Add the item to the node, allocating a new node if the old one is
445 : * full.
446 : */
447 27093800 : if (leaf->num_items >= MAX_LEAF_ITEMS)
448 : {
449 : /* Allocate new leaf and link it to the tree */
450 423328 : intset_leaf_node *old_leaf = leaf;
451 :
452 423328 : leaf = intset_new_leaf_node(intset);
453 423328 : old_leaf->next = leaf;
454 423328 : intset->rightmost_nodes[0] = (intset_node *) leaf;
455 423328 : intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
456 : }
457 27093800 : leaf->items[leaf->num_items++] = item;
458 :
459 27093800 : num_packed += 1 + num_encoded;
460 : }
461 :
462 : /*
463 : * Move any remaining buffered values to the beginning of the array.
464 : */
465 1134006 : if (num_packed < intset->num_buffered_values)
466 : {
467 1084210 : memmove(&intset->buffered_values[0],
468 1084210 : &intset->buffered_values[num_packed],
469 1084210 : (intset->num_buffered_values - num_packed) * sizeof(uint64));
470 : }
471 1134006 : intset->num_buffered_values -= num_packed;
472 1134006 : }
473 :
474 : /*
475 : * Insert a downlink into parent node, after creating a new node.
476 : *
477 : * Recurses if the parent node is full, too.
478 : */
479 : static void
480 430032 : intset_update_upper(IntegerSet *intset, int level, intset_node *child,
481 : uint64 child_key)
482 : {
483 : intset_internal_node *parent;
484 :
485 : Assert(level > 0);
486 :
487 : /*
488 : * Create a new root node, if necessary.
489 : */
490 430032 : if (level >= intset->num_levels)
491 : {
492 50 : intset_node *oldroot = intset->root;
493 : uint64 downlink_key;
494 :
495 : /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
496 50 : if (intset->num_levels == MAX_TREE_LEVELS)
497 0 : elog(ERROR, "could not expand integer set, maximum number of levels reached");
498 50 : intset->num_levels++;
499 :
500 : /*
501 : * Get the first value on the old root page, to be used as the
502 : * downlink.
503 : */
504 50 : if (intset->root->level == 0)
505 20 : downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
506 : else
507 30 : downlink_key = ((intset_internal_node *) oldroot)->values[0];
508 :
509 50 : parent = intset_new_internal_node(intset);
510 50 : parent->level = level;
511 50 : parent->values[0] = downlink_key;
512 50 : parent->downlinks[0] = oldroot;
513 50 : parent->num_items = 1;
514 :
515 50 : intset->root = (intset_node *) parent;
516 50 : intset->rightmost_nodes[level] = (intset_node *) parent;
517 : }
518 :
519 : /*
520 : * Place the downlink on the parent page.
521 : */
522 430032 : parent = (intset_internal_node *) intset->rightmost_nodes[level];
523 :
524 430032 : if (parent->num_items < MAX_INTERNAL_ITEMS)
525 : {
526 423328 : parent->values[parent->num_items] = child_key;
527 423328 : parent->downlinks[parent->num_items] = child;
528 423328 : parent->num_items++;
529 : }
530 : else
531 : {
532 : /*
533 : * Doesn't fit. Allocate new parent, with the downlink as the first
534 : * item on it, and recursively insert the downlink to the new parent
535 : * to the grandparent.
536 : */
537 6704 : parent = intset_new_internal_node(intset);
538 6704 : parent->level = level;
539 6704 : parent->values[0] = child_key;
540 6704 : parent->downlinks[0] = child;
541 6704 : parent->num_items = 1;
542 :
543 6704 : intset->rightmost_nodes[level] = (intset_node *) parent;
544 :
545 6704 : intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
546 : }
547 430032 : }
548 :
549 : /*
550 : * Does the set contain the given value?
551 : */
552 : bool
553 1806492 : intset_is_member(IntegerSet *intset, uint64 x)
554 : {
555 : intset_node *node;
556 : intset_leaf_node *leaf;
557 : int level;
558 : int itemno;
559 : leaf_item *item;
560 :
561 : /*
562 : * The value might be in the buffer of newly-added values.
563 : */
564 1806492 : if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
565 : {
566 201986 : itemno = intset_binsrch_uint64(x,
567 201986 : intset->buffered_values,
568 : intset->num_buffered_values,
569 : false);
570 201986 : if (itemno >= intset->num_buffered_values)
571 33410 : return false;
572 : else
573 168576 : return (intset->buffered_values[itemno] == x);
574 : }
575 :
576 : /*
577 : * Start from the root, and walk down the B-tree to find the right leaf
578 : * node.
579 : */
580 1604506 : if (!intset->root)
581 180 : return false;
582 1604326 : node = intset->root;
583 6008398 : for (level = intset->num_levels - 1; level > 0; level--)
584 : {
585 4404076 : intset_internal_node *n = (intset_internal_node *) node;
586 :
587 : Assert(node->level == level);
588 :
589 4404076 : itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
590 4404076 : if (itemno == 0)
591 4 : return false;
592 4404072 : node = n->downlinks[itemno - 1];
593 : }
594 : Assert(node->level == 0);
595 1604322 : leaf = (intset_leaf_node *) node;
596 :
597 : /*
598 : * Binary search to find the right item on the leaf page
599 : */
600 1604322 : itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
601 1604322 : if (itemno == 0)
602 18 : return false;
603 1604304 : item = &leaf->items[itemno - 1];
604 :
605 : /* Is this a match to the first value on the item? */
606 1604304 : if (item->first == x)
607 3324 : return true;
608 : Assert(x > item->first);
609 :
610 : /* Is it in the packed codeword? */
611 1600980 : if (simple8b_contains(item->codeword, x, item->first))
612 300510 : return true;
613 :
614 1300470 : return false;
615 : }
616 :
617 : /*
618 : * Begin in-order scan through all the values.
619 : *
620 : * While the iteration is in-progress, you cannot add new values to the set.
621 : */
622 : void
623 124 : intset_begin_iterate(IntegerSet *intset)
624 : {
625 : /* Note that we allow an iteration to be abandoned midway */
626 124 : intset->iter_active = true;
627 124 : intset->iter_node = intset->leftmost_leaf;
628 124 : intset->iter_itemno = 0;
629 124 : intset->iter_valueno = 0;
630 124 : intset->iter_num_values = 0;
631 124 : intset->iter_values = intset->iter_values_buf;
632 124 : }
633 :
634 : /*
635 : * Returns the next integer, when iterating.
636 : *
637 : * intset_begin_iterate() must be called first. intset_iterate_next() returns
638 : * the next value in the set. Returns true, if there was another value, and
639 : * stores the value in *next. Otherwise, returns false.
640 : */
641 : bool
642 353525310 : intset_iterate_next(IntegerSet *intset, uint64 *next)
643 : {
644 : Assert(intset->iter_active);
645 : for (;;)
646 : {
647 : /* Return next iter_values[] entry if any */
648 353525310 : if (intset->iter_valueno < intset->iter_num_values)
649 : {
650 326008066 : *next = intset->iter_values[intset->iter_valueno++];
651 326008066 : return true;
652 : }
653 :
654 : /* Decode next item in current leaf node, if any */
655 27517244 : if (intset->iter_node &&
656 27517156 : intset->iter_itemno < intset->iter_node->num_items)
657 : {
658 : leaf_item *item;
659 : int num_decoded;
660 :
661 27093800 : item = &intset->iter_node->items[intset->iter_itemno++];
662 :
663 27093800 : intset->iter_values_buf[0] = item->first;
664 27093800 : num_decoded = simple8b_decode(item->codeword,
665 : &intset->iter_values_buf[1],
666 : item->first);
667 27093800 : intset->iter_num_values = num_decoded + 1;
668 27093800 : intset->iter_valueno = 0;
669 27093800 : continue;
670 : }
671 :
672 : /* No more items on this leaf, step to next node */
673 423444 : if (intset->iter_node)
674 : {
675 423356 : intset->iter_node = intset->iter_node->next;
676 423356 : intset->iter_itemno = 0;
677 423356 : continue;
678 : }
679 :
680 : /*
681 : * We have reached the end of the B-tree. But we might still have
682 : * some integers in the buffer of newly-added values.
683 : */
684 88 : if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
685 : {
686 54 : intset->iter_values = intset->buffered_values;
687 54 : intset->iter_num_values = intset->num_buffered_values;
688 54 : intset->iter_valueno = 0;
689 54 : continue;
690 : }
691 :
692 34 : break;
693 : }
694 :
695 : /* No more results. */
696 34 : intset->iter_active = false;
697 34 : *next = 0; /* prevent uninitialized-variable warnings */
698 34 : return false;
699 : }
700 :
701 : /*
702 : * intset_binsrch_uint64() -- search a sorted array of uint64s
703 : *
704 : * Returns the first position with key equal or less than the given key.
705 : * The returned position would be the "insert" location for the given key,
706 : * that is, the position where the new key should be inserted to.
707 : *
708 : * 'nextkey' affects the behavior on equal keys. If true, and there is an
709 : * equal key in the array, this returns the position immediately after the
710 : * equal key. If false, this returns the position of the equal key itself.
711 : */
712 : static int
713 4606062 : intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
714 : {
715 : int low,
716 : high,
717 : mid;
718 :
719 4606062 : low = 0;
720 4606062 : high = arr_elems;
721 27975948 : while (high > low)
722 : {
723 23369886 : mid = low + (high - low) / 2;
724 :
725 23369886 : if (nextkey)
726 : {
727 22453562 : if (item >= arr[mid])
728 11133834 : low = mid + 1;
729 : else
730 11319728 : high = mid;
731 : }
732 : else
733 : {
734 916324 : if (item > arr[mid])
735 507394 : low = mid + 1;
736 : else
737 408930 : high = mid;
738 : }
739 : }
740 :
741 4606062 : return low;
742 : }
743 :
744 : /* same, but for an array of leaf items */
745 : static int
746 1604322 : intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
747 : {
748 : int low,
749 : high,
750 : mid;
751 :
752 1604322 : low = 0;
753 1604322 : high = arr_elems;
754 11253488 : while (high > low)
755 : {
756 9649166 : mid = low + (high - low) / 2;
757 :
758 9649166 : if (nextkey)
759 : {
760 9649166 : if (item >= arr[mid].first)
761 4874058 : low = mid + 1;
762 : else
763 4775108 : high = mid;
764 : }
765 : else
766 : {
767 0 : if (item > arr[mid].first)
768 0 : low = mid + 1;
769 : else
770 0 : high = mid;
771 : }
772 : }
773 :
774 1604322 : return low;
775 : }
776 :
777 : /*
778 : * Simple-8b encoding.
779 : *
780 : * The simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
781 : * called "codewords". The number of integers packed into a single codeword
782 : * depends on the integers being packed; small integers are encoded using
783 : * fewer bits than large integers. A single codeword can store a single
784 : * 60-bit integer, or two 30-bit integers, for example.
785 : *
786 : * Since we're storing a unique, sorted, set of integers, we actually encode
787 : * the *differences* between consecutive integers. That way, clusters of
788 : * integers that are close to each other are packed efficiently, regardless
789 : * of their absolute values.
790 : *
791 : * In Simple-8b, each codeword consists of a 4-bit selector, which indicates
792 : * how many integers are encoded in the codeword, and the encoded integers are
793 : * packed into the remaining 60 bits. The selector allows for 16 different
794 : * ways of using the remaining 60 bits, called "modes". The number of integers
795 : * packed into a single codeword in each mode is listed in the simple8b_modes
796 : * table below. For example, consider the following codeword:
797 : *
798 : * 20-bit integer 20-bit integer 20-bit integer
799 : * 1101 00000000000000010010 01111010000100100000 00000000000000010100
800 : * ^
801 : * selector
802 : *
803 : * The selector 1101 is 13 in decimal. From the modes table below, we see
804 : * that it means that the codeword encodes three 20-bit integers. In decimal,
805 : * those integers are 18, 500000 and 20. Because we encode deltas rather than
806 : * absolute values, the actual values that they represent are 18, 500018 and
807 : * 500038.
808 : *
809 : * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes
810 : * (which means 240 or 120 consecutive integers, since we're encoding the
811 : * deltas between integers), without using the rest of the codeword bits
812 : * for anything.
813 : *
814 : * Simple-8b cannot encode integers larger than 60 bits. Values larger than
815 : * that are always stored in the 'first' field of a leaf item, never in the
816 : * packed codeword. If there is a sequence of integers that are more than
817 : * 2^60 apart, the codeword will go unused on those items. To represent that,
818 : * we use a magic EMPTY_CODEWORD codeword value.
819 : */
820 : static const struct simple8b_mode
821 : {
822 : uint8 bits_per_int;
823 : uint8 num_ints;
824 : } simple8b_modes[17] =
825 :
826 : {
827 : {0, 240}, /* mode 0: 240 zeroes */
828 : {0, 120}, /* mode 1: 120 zeroes */
829 : {1, 60}, /* mode 2: sixty 1-bit integers */
830 : {2, 30}, /* mode 3: thirty 2-bit integers */
831 : {3, 20}, /* mode 4: twenty 3-bit integers */
832 : {4, 15}, /* mode 5: fifteen 4-bit integers */
833 : {5, 12}, /* mode 6: twelve 5-bit integers */
834 : {6, 10}, /* mode 7: ten 6-bit integers */
835 : {7, 8}, /* mode 8: eight 7-bit integers (four bits
836 : * are wasted) */
837 : {8, 7}, /* mode 9: seven 8-bit integers (four bits
838 : * are wasted) */
839 : {10, 6}, /* mode 10: six 10-bit integers */
840 : {12, 5}, /* mode 11: five 12-bit integers */
841 : {15, 4}, /* mode 12: four 15-bit integers */
842 : {20, 3}, /* mode 13: three 20-bit integers */
843 : {30, 2}, /* mode 14: two 30-bit integers */
844 : {60, 1}, /* mode 15: one 60-bit integer */
845 :
846 : {0, 0} /* sentinel value */
847 : };
848 :
849 : /*
850 : * EMPTY_CODEWORD is a special value, used to indicate "no values".
851 : * It is used if the next value is too large to be encoded with Simple-8b.
852 : *
853 : * This value looks like a mode-0 codeword, but we can distinguish it
854 : * because a regular mode-0 codeword would have zeroes in the unused bits.
855 : */
856 : #define EMPTY_CODEWORD UINT64CONST(0x0FFFFFFFFFFFFFFF)
857 :
858 : /*
859 : * Encode a number of integers into a Simple-8b codeword.
860 : *
861 : * (What we actually encode are deltas between successive integers.
862 : * "base" is the value before ints[0].)
863 : *
864 : * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
865 : * elements, ensuring that we can produce a full codeword.
866 : *
867 : * Returns the encoded codeword, and sets *num_encoded to the number of
868 : * input integers that were encoded. That can be zero, if the first delta
869 : * is too large to be encoded.
870 : */
871 : static uint64
872 27093800 : simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
873 : {
874 : int selector;
875 : int nints;
876 : int bits;
877 : uint64 diff;
878 : uint64 last_val;
879 : uint64 codeword;
880 : int i;
881 :
882 : Assert(ints[0] > base);
883 :
884 : /*
885 : * Select the "mode" to use for this codeword.
886 : *
887 : * In each iteration, check if the next value can be represented in the
888 : * current mode we're considering. If it's too large, then step up the
889 : * mode to a wider one, and repeat. If it fits, move on to the next
890 : * integer. Repeat until the codeword is full, given the current mode.
891 : *
892 : * Note that we don't have any way to represent unused slots in the
893 : * codeword, so we require each codeword to be "full". It is always
894 : * possible to produce a full codeword unless the very first delta is too
895 : * large to be encoded. For example, if the first delta is small but the
896 : * second is too large to be encoded, we'll end up using the last "mode",
897 : * which has nints == 1.
898 : */
899 27093800 : selector = 0;
900 27093800 : nints = simple8b_modes[0].num_ints;
901 27093800 : bits = simple8b_modes[0].bits_per_int;
902 27093800 : diff = ints[0] - base - 1;
903 27093800 : last_val = ints[0];
904 27093800 : i = 0; /* number of deltas we have accepted */
905 : for (;;)
906 : {
907 691891160 : if (diff >= (UINT64CONST(1) << bits))
908 : {
909 : /* too large, step up to next mode */
910 297985970 : selector++;
911 297985970 : nints = simple8b_modes[selector].num_ints;
912 297985970 : bits = simple8b_modes[selector].bits_per_int;
913 : /* we might already have accepted enough deltas for this mode */
914 297985970 : if (i >= nints)
915 12999830 : break;
916 : }
917 : else
918 : {
919 : /* accept this delta; then done if codeword is full */
920 393905190 : i++;
921 393905190 : if (i >= nints)
922 14093970 : break;
923 : /* examine next delta */
924 : Assert(ints[i] > last_val);
925 379811220 : diff = ints[i] - last_val - 1;
926 379811220 : last_val = ints[i];
927 : }
928 : }
929 :
930 27093800 : if (nints == 0)
931 : {
932 : /*
933 : * The first delta is too large to be encoded with Simple-8b.
934 : *
935 : * If there is at least one not-too-large integer in the input, we
936 : * will encode it using mode 15 (or a more compact mode). Hence, we
937 : * can only get here if the *first* delta is >= 2^60.
938 : */
939 : Assert(i == 0);
940 8 : *num_encoded = 0;
941 8 : return EMPTY_CODEWORD;
942 : }
943 :
944 : /*
945 : * Encode the integers using the selected mode. Note that we shift them
946 : * into the codeword in reverse order, so that they will come out in the
947 : * correct order in the decoder.
948 : */
949 27093792 : codeword = 0;
950 27093792 : if (bits > 0)
951 : {
952 275002100 : for (i = nints - 1; i > 0; i--)
953 : {
954 248007906 : diff = ints[i] - ints[i - 1] - 1;
955 248007906 : codeword |= diff;
956 248007906 : codeword <<= bits;
957 : }
958 26994194 : diff = ints[0] - base - 1;
959 26994194 : codeword |= diff;
960 : }
961 :
962 : /* add selector to the codeword, and return */
963 27093792 : codeword |= (uint64) selector << 60;
964 :
965 27093792 : *num_encoded = nints;
966 27093792 : return codeword;
967 : }
968 :
969 : /*
970 : * Decode a codeword into an array of integers.
971 : * Returns the number of integers decoded.
972 : */
973 : static int
974 27093800 : simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
975 : {
976 27093800 : int selector = (codeword >> 60);
977 27093800 : int nints = simple8b_modes[selector].num_ints;
978 27093800 : int bits = simple8b_modes[selector].bits_per_int;
979 27093800 : uint64 mask = (UINT64CONST(1) << bits) - 1;
980 : uint64 curr_value;
981 :
982 27093800 : if (codeword == EMPTY_CODEWORD)
983 8 : return 0;
984 :
985 27093792 : curr_value = base;
986 325999412 : for (int i = 0; i < nints; i++)
987 : {
988 298905620 : uint64 diff = codeword & mask;
989 :
990 298905620 : curr_value += 1 + diff;
991 298905620 : decoded[i] = curr_value;
992 298905620 : codeword >>= bits;
993 : }
994 27093792 : return nints;
995 : }
996 :
997 : /*
998 : * This is very similar to simple8b_decode(), but instead of decoding all
999 : * the values to an array, it just checks if the given "key" is part of
1000 : * the codeword.
1001 : */
1002 : static bool
1003 1600980 : simple8b_contains(uint64 codeword, uint64 key, uint64 base)
1004 : {
1005 1600980 : int selector = (codeword >> 60);
1006 1600980 : int nints = simple8b_modes[selector].num_ints;
1007 1600980 : int bits = simple8b_modes[selector].bits_per_int;
1008 :
1009 1600980 : if (codeword == EMPTY_CODEWORD)
1010 16 : return false;
1011 :
1012 1600964 : if (bits == 0)
1013 : {
1014 : /* Special handling for 0-bit cases. */
1015 199142 : return (key - base) <= nints;
1016 : }
1017 : else
1018 : {
1019 1401822 : uint64 mask = (UINT64CONST(1) << bits) - 1;
1020 : uint64 curr_value;
1021 :
1022 1401822 : curr_value = base;
1023 10422046 : for (int i = 0; i < nints; i++)
1024 : {
1025 9704466 : uint64 diff = codeword & mask;
1026 :
1027 9704466 : curr_value += 1 + diff;
1028 :
1029 9704466 : if (curr_value >= key)
1030 : {
1031 684242 : if (curr_value == key)
1032 101368 : return true;
1033 : else
1034 582874 : return false;
1035 : }
1036 :
1037 9020224 : codeword >>= bits;
1038 : }
1039 : }
1040 717580 : return false;
1041 : }
|