Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * integerset.c
4 : * Data structure to hold a large set of 64-bit integers efficiently
5 : *
6 : * IntegerSet provides an in-memory data structure to hold a set of
7 : * arbitrary 64-bit integers. Internally, the values are stored in a
8 : * B-tree, with a special packed representation at the leaf level using
9 : * the Simple-8b algorithm, which can pack clusters of nearby values
10 : * very tightly.
11 : *
12 : * Memory consumption depends on the number of values stored, but also
13 : * on how far the values are from each other. In the best case, with
14 : * long runs of consecutive integers, memory consumption can be as low as
15 : * 0.1 bytes per integer. In the worst case, if integers are more than
16 : * 2^32 apart, it uses about 8 bytes per integer. In typical use, the
17 : * consumption per integer is somewhere between those extremes, depending
18 : * on the range of integers stored, and how "clustered" they are.
19 : *
20 : *
21 : * Interface
22 : * ---------
23 : *
24 : * intset_create - Create a new, empty set
25 : * intset_add_member - Add an integer to the set
26 : * intset_is_member - Test if an integer is in the set
27 : * intset_begin_iterate - Begin iterating through all integers in set
28 : * intset_iterate_next - Return next set member, if any
29 : *
30 : * intset_create() creates the set in the current memory context. Subsequent
31 : * operations that add to the data structure will continue to allocate from
32 : * that same context, even if it's not current anymore.
33 : *
34 : * Note that there is no function to free an integer set. If you need to do
35 : * that, create a dedicated memory context to hold it, and destroy the memory
36 : * context instead.
37 : *
38 : *
39 : * Limitations
40 : * -----------
41 : *
42 : * - Values must be added in order. (Random insertions would require
43 : * splitting nodes, which hasn't been implemented.)
44 : *
45 : * - Values cannot be added while iteration is in progress.
46 : *
47 : * - No support for removing values.
48 : *
49 : * None of these limitations are fundamental to the data structure, so they
50 : * could be lifted if needed, by writing some new code. But the current
51 : * users of this facility don't need them.
52 : *
53 : *
54 : * References
55 : * ----------
56 : *
57 : * Simple-8b encoding is based on:
58 : *
59 : * Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words,
60 : * Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
61 : * (https://doi.org/10.1002/spe.948)
62 : *
63 : *
64 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
65 : * Portions Copyright (c) 1994, Regents of the University of California
66 : *
67 : * IDENTIFICATION
68 : * src/backend/lib/integerset.c
69 : *
70 : *-------------------------------------------------------------------------
71 : */
72 : #include "postgres.h"
73 :
74 : #include "lib/integerset.h"
75 : #include "port/pg_bitutils.h"
76 : #include "utils/memutils.h"
77 :
78 :
79 : /*
80 : * Maximum number of integers that can be encoded in a single Simple-8b
81 : * codeword. (Defined here before anything else, so that we can size arrays
82 : * using this.)
83 : */
84 : #define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240
85 :
86 : /*
87 : * Parameters for shape of the in-memory B-tree.
88 : *
89 : * These set the size of each internal and leaf node. They don't necessarily
90 : * need to be the same, because the tree is just an in-memory structure.
91 : * With the default 64, each node is about 1 kb.
92 : *
93 : * If you change these, you must recalculate MAX_TREE_LEVELS, too!
94 : */
95 : #define MAX_INTERNAL_ITEMS 64
96 : #define MAX_LEAF_ITEMS 64
97 :
98 : /*
99 : * Maximum height of the tree.
100 : *
101 : * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree. The
102 : * theoretical maximum number of items that we can store in a set is 2^64,
103 : * so MAX_TREE_LEVELS should be set so that:
104 : *
105 : * MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
106 : *
107 : * In practice, we'll need far fewer levels, because you will run out of
108 : * memory long before reaching that number, but let's be conservative.
109 : */
110 : #define MAX_TREE_LEVELS 11
111 :
112 : /*
113 : * Node structures, for the in-memory B-tree.
114 : *
115 : * An internal node holds a number of downlink pointers to leaf nodes, or
116 : * to internal nodes on a lower level. For each downlink, the key value
117 : * corresponding to the lower level node is stored in a sorted array. The
118 : * stored key values are low keys. In other words, if the downlink has value
119 : * X, then all items stored on that child are >= X.
120 : *
121 : * Each leaf node holds a number of "items", with a varying number of
122 : * integers packed into each item. Each item consists of two 64-bit words:
123 : * The first word holds the first integer stored in the item, in plain format.
124 : * The second word contains between 0 and 240 more integers, packed using
125 : * Simple-8b encoding. By storing the first integer in plain, unpacked,
126 : * format, we can use binary search to quickly find an item that holds (or
127 : * would hold) a particular integer. And by storing the rest in packed form,
128 : * we still get pretty good memory density, if there are clusters of integers
129 : * with similar values.
130 : *
131 : * Each leaf node also has a pointer to the next leaf node, so that the leaf
132 : * nodes can be easily walked from beginning to end when iterating.
133 : */
134 : typedef struct intset_node intset_node;
135 : typedef struct intset_leaf_node intset_leaf_node;
136 : typedef struct intset_internal_node intset_internal_node;
137 :
138 : /* Common structure of both leaf and internal nodes. */
139 : struct intset_node
140 : {
141 : uint16 level; /* tree level of this node */
142 : uint16 num_items; /* number of items in this node */
143 : };
144 :
145 : /* Internal node */
146 : struct intset_internal_node
147 : {
148 : /* common header, must match intset_node */
149 : uint16 level; /* >= 1 on internal nodes */
150 : uint16 num_items;
151 :
152 : /*
153 : * 'values' is an array of key values, and 'downlinks' are pointers to
154 : * lower-level nodes, corresponding to the key values.
155 : */
156 : uint64 values[MAX_INTERNAL_ITEMS];
157 : intset_node *downlinks[MAX_INTERNAL_ITEMS];
158 : };
159 :
160 : /* Leaf node */
161 : typedef struct
162 : {
163 : uint64 first; /* first integer in this item */
164 : uint64 codeword; /* simple8b encoded differences from 'first' */
165 : } leaf_item;
166 :
167 : #define MAX_VALUES_PER_LEAF_ITEM (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
168 :
169 : struct intset_leaf_node
170 : {
171 : /* common header, must match intset_node */
172 : uint16 level; /* 0 on leafs */
173 : uint16 num_items;
174 :
175 : intset_leaf_node *next; /* right sibling, if any */
176 :
177 : leaf_item items[MAX_LEAF_ITEMS];
178 : };
179 :
180 : /*
181 : * We buffer insertions in a simple array, before packing and inserting them
182 : * into the B-tree. MAX_BUFFERED_VALUES sets the size of the buffer. The
183 : * encoder assumes that it is large enough that we can always fill a leaf
184 : * item with buffered new items. In other words, MAX_BUFFERED_VALUES must be
185 : * larger than MAX_VALUES_PER_LEAF_ITEM. For efficiency, make it much larger.
186 : */
187 : #define MAX_BUFFERED_VALUES (MAX_VALUES_PER_LEAF_ITEM * 2)
188 :
189 : /*
190 : * IntegerSet is the top-level object representing the set.
191 : *
192 : * The integers are stored in an in-memory B-tree structure, plus an array
193 : * for newly-added integers. IntegerSet also tracks information about memory
194 : * usage, as well as the current position when iterating the set with
195 : * intset_begin_iterate / intset_iterate_next.
196 : */
197 : struct IntegerSet
198 : {
199 : /*
200 : * 'context' is the memory context holding this integer set and all its
201 : * tree nodes.
202 : *
203 : * 'mem_used' tracks the amount of memory used. We don't do anything with
204 : * it in integerset.c itself, but the callers can ask for it with
205 : * intset_memory_usage().
206 : */
207 : MemoryContext context;
208 : uint64 mem_used;
209 :
210 : uint64 num_entries; /* total # of values in the set */
211 : uint64 highest_value; /* highest value stored in this set */
212 :
213 : /*
214 : * B-tree to hold the packed values.
215 : *
216 : * 'rightmost_nodes' hold pointers to the rightmost node on each level.
217 : * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
218 : * parent, and so forth, all the way up to the root. These are needed when
219 : * adding new values. (Currently, we require that new values are added at
220 : * the end.)
221 : */
222 : int num_levels; /* height of the tree */
223 : intset_node *root; /* root node */
224 : intset_node *rightmost_nodes[MAX_TREE_LEVELS];
225 : intset_leaf_node *leftmost_leaf; /* leftmost leaf node */
226 :
227 : /*
228 : * Holding area for new items that haven't been inserted to the tree yet.
229 : */
230 : uint64 buffered_values[MAX_BUFFERED_VALUES];
231 : int num_buffered_values;
232 :
233 : /*
234 : * Iterator support.
235 : *
236 : * 'iter_values' is an array of integers ready to be returned to the
237 : * caller; 'iter_num_values' is the length of that array, and
238 : * 'iter_valueno' is the next index. 'iter_node' and 'iter_itemno' point
239 : * to the leaf node, and item within the leaf node, to get the next batch
240 : * of values from.
241 : *
242 : * Normally, 'iter_values' points to 'iter_values_buf', which holds items
243 : * decoded from a leaf item. But after we have scanned the whole B-tree,
244 : * we iterate through all the unbuffered values, too, by pointing
245 : * iter_values to 'buffered_values'.
246 : */
247 : bool iter_active; /* is iteration in progress? */
248 :
249 : const uint64 *iter_values;
250 : int iter_num_values; /* number of elements in 'iter_values' */
251 : int iter_valueno; /* next index into 'iter_values' */
252 :
253 : intset_leaf_node *iter_node; /* current leaf node */
254 : int iter_itemno; /* next item in 'iter_node' to decode */
255 :
256 : uint64 iter_values_buf[MAX_VALUES_PER_LEAF_ITEM];
257 : };
258 :
259 : /*
260 : * Prototypes for internal functions.
261 : */
262 : static void intset_update_upper(IntegerSet *intset, int level,
263 : intset_node *child, uint64 child_key);
264 : static void intset_flush_buffered_values(IntegerSet *intset);
265 :
266 : static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems,
267 : bool nextkey);
268 : static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems,
269 : bool nextkey);
270 :
271 : static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base);
272 : static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
273 : static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);
274 :
275 :
276 : /*
277 : * Create a new, initially empty, integer set.
278 : *
279 : * The integer set is created in the current memory context.
280 : * We will do all subsequent allocations in the same context, too, regardless
281 : * of which memory context is current when new integers are added to the set.
282 : */
283 : IntegerSet *
284 204 : intset_create(void)
285 : {
286 : IntegerSet *intset;
287 :
288 204 : intset = (IntegerSet *) palloc(sizeof(IntegerSet));
289 204 : intset->context = CurrentMemoryContext;
290 204 : intset->mem_used = GetMemoryChunkSpace(intset);
291 :
292 204 : intset->num_entries = 0;
293 204 : intset->highest_value = 0;
294 :
295 204 : intset->num_levels = 0;
296 204 : intset->root = NULL;
297 204 : memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
298 204 : intset->leftmost_leaf = NULL;
299 :
300 204 : intset->num_buffered_values = 0;
301 :
302 204 : intset->iter_active = false;
303 204 : intset->iter_node = NULL;
304 204 : intset->iter_itemno = 0;
305 204 : intset->iter_valueno = 0;
306 204 : intset->iter_num_values = 0;
307 204 : intset->iter_values = NULL;
308 :
309 204 : return intset;
310 : }
311 :
312 : /*
313 : * Allocate a new node.
314 : */
315 : static intset_internal_node *
316 6754 : intset_new_internal_node(IntegerSet *intset)
317 : {
318 : intset_internal_node *n;
319 :
320 6754 : n = (intset_internal_node *) MemoryContextAlloc(intset->context,
321 : sizeof(intset_internal_node));
322 6754 : intset->mem_used += GetMemoryChunkSpace(n);
323 :
324 6754 : n->level = 0; /* caller must set */
325 6754 : n->num_items = 0;
326 :
327 6754 : return n;
328 : }
329 :
330 : static intset_leaf_node *
331 423356 : intset_new_leaf_node(IntegerSet *intset)
332 : {
333 : intset_leaf_node *n;
334 :
335 423356 : n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
336 : sizeof(intset_leaf_node));
337 423356 : intset->mem_used += GetMemoryChunkSpace(n);
338 :
339 423356 : n->level = 0;
340 423356 : n->num_items = 0;
341 423356 : n->next = NULL;
342 :
343 423356 : return n;
344 : }
345 :
346 : /*
347 : * Return the number of entries in the integer set.
348 : */
349 : uint64
350 118 : intset_num_entries(IntegerSet *intset)
351 : {
352 118 : return intset->num_entries;
353 : }
354 :
355 : /*
356 : * Return the amount of memory used by the integer set.
357 : */
358 : uint64
359 10 : intset_memory_usage(IntegerSet *intset)
360 : {
361 10 : return intset->mem_used;
362 : }
363 :
364 : /*
365 : * Add a value to the set.
366 : *
367 : * Values must be added in order.
368 : */
369 : void
370 326008126 : intset_add_member(IntegerSet *intset, uint64 x)
371 : {
372 326008126 : if (intset->iter_active)
373 0 : elog(ERROR, "cannot add new values to integer set while iteration is in progress");
374 :
375 326008126 : if (x <= intset->highest_value && intset->num_entries > 0)
376 0 : elog(ERROR, "cannot add value to integer set out of order");
377 :
378 326008126 : if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
379 : {
380 : /* Time to flush our buffer */
381 1134006 : intset_flush_buffered_values(intset);
382 : Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
383 : }
384 :
385 : /* Add it to the buffer of newly-added values */
386 326008126 : intset->buffered_values[intset->num_buffered_values] = x;
387 326008126 : intset->num_buffered_values++;
388 326008126 : intset->num_entries++;
389 326008126 : intset->highest_value = x;
390 326008126 : }
391 :
392 : /*
393 : * Take a batch of buffered values, and pack them into the B-tree.
394 : */
395 : static void
396 1134006 : intset_flush_buffered_values(IntegerSet *intset)
397 : {
398 1134006 : uint64 *values = intset->buffered_values;
399 1134006 : uint64 num_values = intset->num_buffered_values;
400 1134006 : int num_packed = 0;
401 : intset_leaf_node *leaf;
402 :
403 1134006 : leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
404 :
405 : /*
406 : * If the tree is completely empty, create the first leaf page, which is
407 : * also the root.
408 : */
409 1134006 : if (leaf == NULL)
410 : {
411 : /*
412 : * This is the very first item in the set.
413 : *
414 : * Allocate root node. It's also a leaf.
415 : */
416 28 : leaf = intset_new_leaf_node(intset);
417 :
418 28 : intset->root = (intset_node *) leaf;
419 28 : intset->leftmost_leaf = leaf;
420 28 : intset->rightmost_nodes[0] = (intset_node *) leaf;
421 28 : intset->num_levels = 1;
422 : }
423 :
424 : /*
425 : * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
426 : * stop. In most cases, we cannot encode that many values in a single
427 : * value, but this way, the encoder doesn't have to worry about running
428 : * out of input.
429 : */
430 28227808 : while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
431 : {
432 : leaf_item item;
433 : int num_encoded;
434 :
435 : /*
436 : * Construct the next leaf item, packing as many buffered values as
437 : * possible.
438 : */
439 27093802 : item.first = values[num_packed];
440 27093802 : item.codeword = simple8b_encode(&values[num_packed + 1],
441 : &num_encoded,
442 : item.first);
443 :
444 : /*
445 : * Add the item to the node, allocating a new node if the old one is
446 : * full.
447 : */
448 27093802 : if (leaf->num_items >= MAX_LEAF_ITEMS)
449 : {
450 : /* Allocate new leaf and link it to the tree */
451 423328 : intset_leaf_node *old_leaf = leaf;
452 :
453 423328 : leaf = intset_new_leaf_node(intset);
454 423328 : old_leaf->next = leaf;
455 423328 : intset->rightmost_nodes[0] = (intset_node *) leaf;
456 423328 : intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
457 : }
458 27093802 : leaf->items[leaf->num_items++] = item;
459 :
460 27093802 : num_packed += 1 + num_encoded;
461 : }
462 :
463 : /*
464 : * Move any remaining buffered values to the beginning of the array.
465 : */
466 1134006 : if (num_packed < intset->num_buffered_values)
467 : {
468 1084210 : memmove(&intset->buffered_values[0],
469 1084210 : &intset->buffered_values[num_packed],
470 1084210 : (intset->num_buffered_values - num_packed) * sizeof(uint64));
471 : }
472 1134006 : intset->num_buffered_values -= num_packed;
473 1134006 : }
474 :
475 : /*
476 : * Insert a downlink into parent node, after creating a new node.
477 : *
478 : * Recurses if the parent node is full, too.
479 : */
480 : static void
481 430032 : intset_update_upper(IntegerSet *intset, int level, intset_node *child,
482 : uint64 child_key)
483 : {
484 : intset_internal_node *parent;
485 :
486 : Assert(level > 0);
487 :
488 : /*
489 : * Create a new root node, if necessary.
490 : */
491 430032 : if (level >= intset->num_levels)
492 : {
493 50 : intset_node *oldroot = intset->root;
494 : uint64 downlink_key;
495 :
496 : /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
497 50 : if (intset->num_levels == MAX_TREE_LEVELS)
498 0 : elog(ERROR, "could not expand integer set, maximum number of levels reached");
499 50 : intset->num_levels++;
500 :
501 : /*
502 : * Get the first value on the old root page, to be used as the
503 : * downlink.
504 : */
505 50 : if (intset->root->level == 0)
506 20 : downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
507 : else
508 30 : downlink_key = ((intset_internal_node *) oldroot)->values[0];
509 :
510 50 : parent = intset_new_internal_node(intset);
511 50 : parent->level = level;
512 50 : parent->values[0] = downlink_key;
513 50 : parent->downlinks[0] = oldroot;
514 50 : parent->num_items = 1;
515 :
516 50 : intset->root = (intset_node *) parent;
517 50 : intset->rightmost_nodes[level] = (intset_node *) parent;
518 : }
519 :
520 : /*
521 : * Place the downlink on the parent page.
522 : */
523 430032 : parent = (intset_internal_node *) intset->rightmost_nodes[level];
524 :
525 430032 : if (parent->num_items < MAX_INTERNAL_ITEMS)
526 : {
527 423328 : parent->values[parent->num_items] = child_key;
528 423328 : parent->downlinks[parent->num_items] = child;
529 423328 : parent->num_items++;
530 : }
531 : else
532 : {
533 : /*
534 : * Doesn't fit. Allocate new parent, with the downlink as the first
535 : * item on it, and recursively insert the downlink to the new parent
536 : * to the grandparent.
537 : */
538 6704 : parent = intset_new_internal_node(intset);
539 6704 : parent->level = level;
540 6704 : parent->values[0] = child_key;
541 6704 : parent->downlinks[0] = child;
542 6704 : parent->num_items = 1;
543 :
544 6704 : intset->rightmost_nodes[level] = (intset_node *) parent;
545 :
546 6704 : intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
547 : }
548 430032 : }
549 :
550 : /*
551 : * Does the set contain the given value?
552 : */
553 : bool
554 1806166 : intset_is_member(IntegerSet *intset, uint64 x)
555 : {
556 : intset_node *node;
557 : intset_leaf_node *leaf;
558 : int level;
559 : int itemno;
560 : leaf_item *item;
561 :
562 : /*
563 : * The value might be in the buffer of newly-added values.
564 : */
565 1806166 : if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
566 : {
567 201860 : itemno = intset_binsrch_uint64(x,
568 201860 : intset->buffered_values,
569 : intset->num_buffered_values,
570 : false);
571 201860 : if (itemno >= intset->num_buffered_values)
572 33524 : return false;
573 : else
574 168336 : return (intset->buffered_values[itemno] == x);
575 : }
576 :
577 : /*
578 : * Start from the root, and walk down the B-tree to find the right leaf
579 : * node.
580 : */
581 1604306 : if (!intset->root)
582 16 : return false;
583 1604290 : node = intset->root;
584 6008278 : for (level = intset->num_levels - 1; level > 0; level--)
585 : {
586 4403992 : intset_internal_node *n = (intset_internal_node *) node;
587 :
588 : Assert(node->level == level);
589 :
590 4403992 : itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
591 4403992 : if (itemno == 0)
592 4 : return false;
593 4403988 : node = n->downlinks[itemno - 1];
594 : }
595 : Assert(node->level == 0);
596 1604286 : leaf = (intset_leaf_node *) node;
597 :
598 : /*
599 : * Binary search to find the right item on the leaf page
600 : */
601 1604286 : itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
602 1604286 : if (itemno == 0)
603 18 : return false;
604 1604268 : item = &leaf->items[itemno - 1];
605 :
606 : /* Is this a match to the first value on the item? */
607 1604268 : if (item->first == x)
608 3228 : return true;
609 : Assert(x > item->first);
610 :
611 : /* Is it in the packed codeword? */
612 1601040 : if (simple8b_contains(item->codeword, x, item->first))
613 300560 : return true;
614 :
615 1300480 : return false;
616 : }
617 :
618 : /*
619 : * Begin in-order scan through all the values.
620 : *
621 : * While the iteration is in-progress, you cannot add new values to the set.
622 : */
623 : void
624 122 : intset_begin_iterate(IntegerSet *intset)
625 : {
626 : /* Note that we allow an iteration to be abandoned midway */
627 122 : intset->iter_active = true;
628 122 : intset->iter_node = intset->leftmost_leaf;
629 122 : intset->iter_itemno = 0;
630 122 : intset->iter_valueno = 0;
631 122 : intset->iter_num_values = 0;
632 122 : intset->iter_values = intset->iter_values_buf;
633 122 : }
634 :
635 : /*
636 : * Returns the next integer, when iterating.
637 : *
638 : * intset_begin_iterate() must be called first. intset_iterate_next() returns
639 : * the next value in the set. Returns true, if there was another value, and
640 : * stores the value in *next. Otherwise, returns false.
641 : */
642 : bool
643 353525308 : intset_iterate_next(IntegerSet *intset, uint64 *next)
644 : {
645 : Assert(intset->iter_active);
646 : for (;;)
647 : {
648 : /* Return next iter_values[] entry if any */
649 353525308 : if (intset->iter_valueno < intset->iter_num_values)
650 : {
651 326008064 : *next = intset->iter_values[intset->iter_valueno++];
652 326008064 : return true;
653 : }
654 :
655 : /* Decode next item in current leaf node, if any */
656 27517244 : if (intset->iter_node &&
657 27517158 : intset->iter_itemno < intset->iter_node->num_items)
658 : {
659 : leaf_item *item;
660 : int num_decoded;
661 :
662 27093802 : item = &intset->iter_node->items[intset->iter_itemno++];
663 :
664 27093802 : intset->iter_values_buf[0] = item->first;
665 27093802 : num_decoded = simple8b_decode(item->codeword,
666 : &intset->iter_values_buf[1],
667 : item->first);
668 27093802 : intset->iter_num_values = num_decoded + 1;
669 27093802 : intset->iter_valueno = 0;
670 27093802 : continue;
671 : }
672 :
673 : /* No more items on this leaf, step to next node */
674 423442 : if (intset->iter_node)
675 : {
676 423356 : intset->iter_node = intset->iter_node->next;
677 423356 : intset->iter_itemno = 0;
678 423356 : continue;
679 : }
680 :
681 : /*
682 : * We have reached the end of the B-tree. But we might still have
683 : * some integers in the buffer of newly-added values.
684 : */
685 86 : if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
686 : {
687 52 : intset->iter_values = intset->buffered_values;
688 52 : intset->iter_num_values = intset->num_buffered_values;
689 52 : intset->iter_valueno = 0;
690 52 : continue;
691 : }
692 :
693 34 : break;
694 : }
695 :
696 : /* No more results. */
697 34 : intset->iter_active = false;
698 34 : *next = 0; /* prevent uninitialized-variable warnings */
699 34 : return false;
700 : }
701 :
702 : /*
703 : * intset_binsrch_uint64() -- search a sorted array of uint64s
704 : *
705 : * Returns the first position with key equal or less than the given key.
706 : * The returned position would be the "insert" location for the given key,
707 : * that is, the position where the new key should be inserted to.
708 : *
709 : * 'nextkey' affects the behavior on equal keys. If true, and there is an
710 : * equal key in the array, this returns the position immediately after the
711 : * equal key. If false, this returns the position of the equal key itself.
712 : */
713 : static int
714 4605852 : intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
715 : {
716 : int low,
717 : high,
718 : mid;
719 :
720 4605852 : low = 0;
721 4605852 : high = arr_elems;
722 27974658 : while (high > low)
723 : {
724 23368806 : mid = low + (high - low) / 2;
725 :
726 23368806 : if (nextkey)
727 : {
728 22453946 : if (item >= arr[mid])
729 11133438 : low = mid + 1;
730 : else
731 11320508 : high = mid;
732 : }
733 : else
734 : {
735 914860 : if (item > arr[mid])
736 507526 : low = mid + 1;
737 : else
738 407334 : high = mid;
739 : }
740 : }
741 :
742 4605852 : return low;
743 : }
744 :
745 : /* same, but for an array of leaf items */
746 : static int
747 1604286 : intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
748 : {
749 : int low,
750 : high,
751 : mid;
752 :
753 1604286 : low = 0;
754 1604286 : high = arr_elems;
755 11253582 : while (high > low)
756 : {
757 9649296 : mid = low + (high - low) / 2;
758 :
759 9649296 : if (nextkey)
760 : {
761 9649296 : if (item >= arr[mid].first)
762 4871980 : low = mid + 1;
763 : else
764 4777316 : high = mid;
765 : }
766 : else
767 : {
768 0 : if (item > arr[mid].first)
769 0 : low = mid + 1;
770 : else
771 0 : high = mid;
772 : }
773 : }
774 :
775 1604286 : return low;
776 : }
777 :
778 : /*
779 : * Simple-8b encoding.
780 : *
781 : * The simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
782 : * called "codewords". The number of integers packed into a single codeword
783 : * depends on the integers being packed; small integers are encoded using
784 : * fewer bits than large integers. A single codeword can store a single
785 : * 60-bit integer, or two 30-bit integers, for example.
786 : *
787 : * Since we're storing a unique, sorted, set of integers, we actually encode
788 : * the *differences* between consecutive integers. That way, clusters of
789 : * integers that are close to each other are packed efficiently, regardless
790 : * of their absolute values.
791 : *
792 : * In Simple-8b, each codeword consists of a 4-bit selector, which indicates
793 : * how many integers are encoded in the codeword, and the encoded integers are
794 : * packed into the remaining 60 bits. The selector allows for 16 different
795 : * ways of using the remaining 60 bits, called "modes". The number of integers
796 : * packed into a single codeword in each mode is listed in the simple8b_modes
797 : * table below. For example, consider the following codeword:
798 : *
799 : * 20-bit integer 20-bit integer 20-bit integer
800 : * 1101 00000000000000010010 01111010000100100000 00000000000000010100
801 : * ^
802 : * selector
803 : *
804 : * The selector 1101 is 13 in decimal. From the modes table below, we see
805 : * that it means that the codeword encodes three 20-bit integers. In decimal,
806 : * those integers are 18, 500000 and 20. Because we encode deltas rather than
807 : * absolute values, the actual values that they represent are 18, 500018 and
808 : * 500038.
809 : *
810 : * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes
811 : * (which means 240 or 120 consecutive integers, since we're encoding the
812 : * deltas between integers), without using the rest of the codeword bits
813 : * for anything.
814 : *
815 : * Simple-8b cannot encode integers larger than 60 bits. Values larger than
816 : * that are always stored in the 'first' field of a leaf item, never in the
817 : * packed codeword. If there is a sequence of integers that are more than
818 : * 2^60 apart, the codeword will go unused on those items. To represent that,
819 : * we use a magic EMPTY_CODEWORD codeword value.
820 : */
821 : static const struct simple8b_mode
822 : {
823 : uint8 bits_per_int;
824 : uint8 num_ints;
825 : } simple8b_modes[17] =
826 :
827 : {
828 : {0, 240}, /* mode 0: 240 zeroes */
829 : {0, 120}, /* mode 1: 120 zeroes */
830 : {1, 60}, /* mode 2: sixty 1-bit integers */
831 : {2, 30}, /* mode 3: thirty 2-bit integers */
832 : {3, 20}, /* mode 4: twenty 3-bit integers */
833 : {4, 15}, /* mode 5: fifteen 4-bit integers */
834 : {5, 12}, /* mode 6: twelve 5-bit integers */
835 : {6, 10}, /* mode 7: ten 6-bit integers */
836 : {7, 8}, /* mode 8: eight 7-bit integers (four bits
837 : * are wasted) */
838 : {8, 7}, /* mode 9: seven 8-bit integers (four bits
839 : * are wasted) */
840 : {10, 6}, /* mode 10: six 10-bit integers */
841 : {12, 5}, /* mode 11: five 12-bit integers */
842 : {15, 4}, /* mode 12: four 15-bit integers */
843 : {20, 3}, /* mode 13: three 20-bit integers */
844 : {30, 2}, /* mode 14: two 30-bit integers */
845 : {60, 1}, /* mode 15: one 60-bit integer */
846 :
847 : {0, 0} /* sentinel value */
848 : };
849 :
850 : /*
851 : * EMPTY_CODEWORD is a special value, used to indicate "no values".
852 : * It is used if the next value is too large to be encoded with Simple-8b.
853 : *
854 : * This value looks like a mode-0 codeword, but we can distinguish it
855 : * because a regular mode-0 codeword would have zeroes in the unused bits.
856 : */
857 : #define EMPTY_CODEWORD UINT64CONST(0x0FFFFFFFFFFFFFFF)
858 :
859 : /*
860 : * Encode a number of integers into a Simple-8b codeword.
861 : *
862 : * (What we actually encode are deltas between successive integers.
863 : * "base" is the value before ints[0].)
864 : *
865 : * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
866 : * elements, ensuring that we can produce a full codeword.
867 : *
868 : * Returns the encoded codeword, and sets *num_encoded to the number of
869 : * input integers that were encoded. That can be zero, if the first delta
870 : * is too large to be encoded.
871 : */
872 : static uint64
873 27093802 : simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
874 : {
875 : int selector;
876 : int nints;
877 : int bits;
878 : uint64 diff;
879 : uint64 last_val;
880 : uint64 codeword;
881 : int i;
882 :
883 : Assert(ints[0] > base);
884 :
885 : /*
886 : * Select the "mode" to use for this codeword.
887 : *
888 : * In each iteration, check if the next value can be represented in the
889 : * current mode we're considering. If it's too large, then step up the
890 : * mode to a wider one, and repeat. If it fits, move on to the next
891 : * integer. Repeat until the codeword is full, given the current mode.
892 : *
893 : * Note that we don't have any way to represent unused slots in the
894 : * codeword, so we require each codeword to be "full". It is always
895 : * possible to produce a full codeword unless the very first delta is too
896 : * large to be encoded. For example, if the first delta is small but the
897 : * second is too large to be encoded, we'll end up using the last "mode",
898 : * which has nints == 1.
899 : */
900 27093802 : selector = 0;
901 27093802 : nints = simple8b_modes[0].num_ints;
902 27093802 : bits = simple8b_modes[0].bits_per_int;
903 27093802 : diff = ints[0] - base - 1;
904 27093802 : last_val = ints[0];
905 27093802 : i = 0; /* number of deltas we have accepted */
906 : for (;;)
907 : {
908 691891192 : if (diff >= (UINT64CONST(1) << bits))
909 : {
910 : /* too large, step up to next mode */
911 297986008 : selector++;
912 297986008 : nints = simple8b_modes[selector].num_ints;
913 297986008 : bits = simple8b_modes[selector].bits_per_int;
914 : /* we might already have accepted enough deltas for this mode */
915 297986008 : if (i >= nints)
916 12999834 : break;
917 : }
918 : else
919 : {
920 : /* accept this delta; then done if codeword is full */
921 393905184 : i++;
922 393905184 : if (i >= nints)
923 14093968 : break;
924 : /* examine next delta */
925 : Assert(ints[i] > last_val);
926 379811216 : diff = ints[i] - last_val - 1;
927 379811216 : last_val = ints[i];
928 : }
929 : }
930 :
931 27093802 : if (nints == 0)
932 : {
933 : /*
934 : * The first delta is too large to be encoded with Simple-8b.
935 : *
936 : * If there is at least one not-too-large integer in the input, we
937 : * will encode it using mode 15 (or a more compact mode). Hence, we
938 : * can only get here if the *first* delta is >= 2^60.
939 : */
940 : Assert(i == 0);
941 8 : *num_encoded = 0;
942 8 : return EMPTY_CODEWORD;
943 : }
944 :
945 : /*
946 : * Encode the integers using the selected mode. Note that we shift them
947 : * into the codeword in reverse order, so that they will come out in the
948 : * correct order in the decoder.
949 : */
950 27093794 : codeword = 0;
951 27093794 : if (bits > 0)
952 : {
953 275002094 : for (i = nints - 1; i > 0; i--)
954 : {
955 248007898 : diff = ints[i] - ints[i - 1] - 1;
956 248007898 : codeword |= diff;
957 248007898 : codeword <<= bits;
958 : }
959 26994196 : diff = ints[0] - base - 1;
960 26994196 : codeword |= diff;
961 : }
962 :
963 : /* add selector to the codeword, and return */
964 27093794 : codeword |= (uint64) selector << 60;
965 :
966 27093794 : *num_encoded = nints;
967 27093794 : return codeword;
968 : }
969 :
970 : /*
971 : * Decode a codeword into an array of integers.
972 : * Returns the number of integers decoded.
973 : */
974 : static int
975 27093802 : simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
976 : {
977 27093802 : int selector = (codeword >> 60);
978 27093802 : int nints = simple8b_modes[selector].num_ints;
979 27093802 : int bits = simple8b_modes[selector].bits_per_int;
980 27093802 : uint64 mask = (UINT64CONST(1) << bits) - 1;
981 : uint64 curr_value;
982 :
983 27093802 : if (codeword == EMPTY_CODEWORD)
984 8 : return 0;
985 :
986 27093794 : curr_value = base;
987 325999408 : for (int i = 0; i < nints; i++)
988 : {
989 298905614 : uint64 diff = codeword & mask;
990 :
991 298905614 : curr_value += 1 + diff;
992 298905614 : decoded[i] = curr_value;
993 298905614 : codeword >>= bits;
994 : }
995 27093794 : return nints;
996 : }
997 :
998 : /*
999 : * This is very similar to simple8b_decode(), but instead of decoding all
1000 : * the values to an array, it just checks if the given "key" is part of
1001 : * the codeword.
1002 : */
1003 : static bool
1004 1601040 : simple8b_contains(uint64 codeword, uint64 key, uint64 base)
1005 : {
1006 1601040 : int selector = (codeword >> 60);
1007 1601040 : int nints = simple8b_modes[selector].num_ints;
1008 1601040 : int bits = simple8b_modes[selector].bits_per_int;
1009 :
1010 1601040 : if (codeword == EMPTY_CODEWORD)
1011 16 : return false;
1012 :
1013 1601024 : if (bits == 0)
1014 : {
1015 : /* Special handling for 0-bit cases. */
1016 199174 : return (key - base) <= nints;
1017 : }
1018 : else
1019 : {
1020 1401850 : uint64 mask = (UINT64CONST(1) << bits) - 1;
1021 : uint64 curr_value;
1022 :
1023 1401850 : curr_value = base;
1024 10434932 : for (int i = 0; i < nints; i++)
1025 : {
1026 9717524 : uint64 diff = codeword & mask;
1027 :
1028 9717524 : curr_value += 1 + diff;
1029 :
1030 9717524 : if (curr_value >= key)
1031 : {
1032 684442 : if (curr_value == key)
1033 101386 : return true;
1034 : else
1035 583056 : return false;
1036 : }
1037 :
1038 9033082 : codeword >>= bits;
1039 : }
1040 : }
1041 717408 : return false;
1042 : }
|