LCOV - code coverage report
Current view: top level - src/backend/lib - integerset.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 248 254 97.6 %
Date: 2019-11-22 07:06:56 Functions: 16 16 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * integerset.c
       4             :  *    Data structure to hold a large set of 64-bit integers efficiently
       5             :  *
       6             :  * IntegerSet provides an in-memory data structure to hold a set of
       7             :  * arbitrary 64-bit integers.  Internally, the values are stored in a
       8             :  * B-tree, with a special packed representation at the leaf level using
       9             :  * the Simple-8b algorithm, which can pack clusters of nearby values
      10             :  * very tightly.
      11             :  *
      12             :  * Memory consumption depends on the number of values stored, but also
      13             :  * on how far the values are from each other.  In the best case, with
      14             :  * long runs of consecutive integers, memory consumption can be as low as
      15             :  * 0.1 bytes per integer.  In the worst case, if integers are more than
      16             :  * 2^32 apart, it uses about 8 bytes per integer.  In typical use, the
      17             :  * consumption per integer is somewhere between those extremes, depending
      18             :  * on the range of integers stored, and how "clustered" they are.
      19             :  *
      20             :  *
      21             :  * Interface
      22             :  * ---------
      23             :  *
      24             :  *  intset_create           - Create a new, empty set
      25             :  *  intset_add_member       - Add an integer to the set
      26             :  *  intset_is_member        - Test if an integer is in the set
      27             :  *  intset_begin_iterate    - Begin iterating through all integers in set
      28             :  *  intset_iterate_next     - Return next set member, if any
      29             :  *
      30             :  * intset_create() creates the set in the current memory context.  Subsequent
      31             :  * operations that add to the data structure will continue to allocate from
      32             :  * that same context, even if it's not current anymore.
      33             :  *
      34             :  * Note that there is no function to free an integer set.  If you need to do
      35             :  * that, create a dedicated memory context to hold it, and destroy the memory
      36             :  * context instead.
      37             :  *
      38             :  *
      39             :  * Limitations
      40             :  * -----------
      41             :  *
      42             :  * - Values must be added in order.  (Random insertions would require
      43             :  *   splitting nodes, which hasn't been implemented.)
      44             :  *
      45             :  * - Values cannot be added while iteration is in progress.
      46             :  *
      47             :  * - No support for removing values.
      48             :  *
      49             :  * None of these limitations are fundamental to the data structure, so they
      50             :  * could be lifted if needed, by writing some new code.  But the current
      51             :  * users of this facility don't need them.
      52             :  *
      53             :  *
      54             :  * References
      55             :  * ----------
      56             :  *
      57             :  * Simple-8b encoding is based on:
      58             :  *
      59             :  * Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words,
      60             :  *   Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
      61             :  *   (https://doi.org/10.1002/spe.948)
      62             :  *
      63             :  *
      64             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      65             :  * Portions Copyright (c) 1994, Regents of the University of California
      66             :  *
      67             :  * IDENTIFICATION
      68             :  *    src/backend/lib/integerset.c
      69             :  *
      70             :  *-------------------------------------------------------------------------
      71             :  */
      72             : #include "postgres.h"
      73             : 
      74             : #include "access/htup_details.h"
      75             : #include "lib/integerset.h"
      76             : #include "port/pg_bitutils.h"
      77             : #include "utils/memutils.h"
      78             : 
      79             : 
      80             : /*
      81             :  * Maximum number of integers that can be encoded in a single Simple-8b
      82             :  * codeword. (Defined here before anything else, so that we can size arrays
      83             :  * using this.)
      84             :  */
      85             : #define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240
      86             : 
      87             : /*
      88             :  * Parameters for shape of the in-memory B-tree.
      89             :  *
      90             :  * These set the size of each internal and leaf node.  They don't necessarily
      91             :  * need to be the same, because the tree is just an in-memory structure.
      92             :  * With the default 64, each node is about 1 kb.
      93             :  *
      94             :  * If you change these, you must recalculate MAX_TREE_LEVELS, too!
      95             :  */
      96             : #define MAX_INTERNAL_ITEMS  64
      97             : #define MAX_LEAF_ITEMS  64
      98             : 
      99             : /*
     100             :  * Maximum height of the tree.
     101             :  *
     102             :  * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree.  The
     103             :  * theoretical maximum number of items that we can store in a set is 2^64,
     104             :  * so MAX_TREE_LEVELS should be set so that:
     105             :  *
     106             :  *   MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
     107             :  *
     108             :  * In practice, we'll need far fewer levels, because you will run out of
     109             :  * memory long before reaching that number, but let's be conservative.
     110             :  */
     111             : #define MAX_TREE_LEVELS     11
     112             : 
     113             : /*
     114             :  * Node structures, for the in-memory B-tree.
     115             :  *
     116             :  * An internal node holds a number of downlink pointers to leaf nodes, or
     117             :  * to internal nodes on a lower level.  For each downlink, the key value
     118             :  * corresponding to the lower level node is stored in a sorted array.  The
     119             :  * stored key values are low keys.  In other words, if the downlink has value
     120             :  * X, then all items stored on that child are >= X.
     121             :  *
     122             :  * Each leaf node holds a number of "items", with a varying number of
     123             :  * integers packed into each item.  Each item consists of two 64-bit words:
     124             :  * The first word holds the first integer stored in the item, in plain format.
     125             :  * The second word contains between 0 and 240 more integers, packed using
     126             :  * Simple-8b encoding.  By storing the first integer in plain, unpacked,
     127             :  * format, we can use binary search to quickly find an item that holds (or
     128             :  * would hold) a particular integer.  And by storing the rest in packed form,
     129             :  * we still get pretty good memory density, if there are clusters of integers
     130             :  * with similar values.
     131             :  *
     132             :  * Each leaf node also has a pointer to the next leaf node, so that the leaf
     133             :  * nodes can be easily walked from beginning to end when iterating.
     134             :  */
     135             : typedef struct intset_node intset_node;
     136             : typedef struct intset_leaf_node intset_leaf_node;
     137             : typedef struct intset_internal_node intset_internal_node;
     138             : 
     139             : /* Common structure of both leaf and internal nodes. */
     140             : struct intset_node
     141             : {
     142             :     uint16      level;          /* tree level of this node */
     143             :     uint16      num_items;      /* number of items in this node */
     144             : };
     145             : 
     146             : /* Internal node */
     147             : struct intset_internal_node
     148             : {
     149             :     /* common header, must match intset_node */
     150             :     uint16      level;          /* >= 1 on internal nodes */
     151             :     uint16      num_items;
     152             : 
     153             :     /*
     154             :      * 'values' is an array of key values, and 'downlinks' are pointers to
     155             :      * lower-level nodes, corresponding to the key values.
     156             :      */
     157             :     uint64      values[MAX_INTERNAL_ITEMS];
     158             :     intset_node *downlinks[MAX_INTERNAL_ITEMS];
     159             : };
     160             : 
     161             : /* Leaf node */
     162             : typedef struct
     163             : {
     164             :     uint64      first;          /* first integer in this item */
     165             :     uint64      codeword;       /* simple8b encoded differences from 'first' */
     166             : } leaf_item;
     167             : 
     168             : #define MAX_VALUES_PER_LEAF_ITEM    (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
     169             : 
     170             : struct intset_leaf_node
     171             : {
     172             :     /* common header, must match intset_node */
     173             :     uint16      level;          /* 0 on leafs */
     174             :     uint16      num_items;
     175             : 
     176             :     intset_leaf_node *next;     /* right sibling, if any */
     177             : 
     178             :     leaf_item   items[MAX_LEAF_ITEMS];
     179             : };
     180             : 
     181             : /*
     182             :  * We buffer insertions in a simple array, before packing and inserting them
     183             :  * into the B-tree.  MAX_BUFFERED_VALUES sets the size of the buffer.  The
     184             :  * encoder assumes that it is large enough that we can always fill a leaf
     185             :  * item with buffered new items.  In other words, MAX_BUFFERED_VALUES must be
     186             :  * larger than MAX_VALUES_PER_LEAF_ITEM.  For efficiency, make it much larger.
     187             :  */
     188             : #define MAX_BUFFERED_VALUES         (MAX_VALUES_PER_LEAF_ITEM * 2)
     189             : 
     190             : /*
     191             :  * IntegerSet is the top-level object representing the set.
     192             :  *
     193             :  * The integers are stored in an in-memory B-tree structure, plus an array
     194             :  * for newly-added integers.  IntegerSet also tracks information about memory
     195             :  * usage, as well as the current position when iterating the set with
     196             :  * intset_begin_iterate / intset_iterate_next.
     197             :  */
     198             : struct IntegerSet
     199             : {
     200             :     /*
     201             :      * 'context' is the memory context holding this integer set and all its
     202             :      * tree nodes.
     203             :      *
     204             :      * 'mem_used' tracks the amount of memory used.  We don't do anything with
     205             :      * it in integerset.c itself, but the callers can ask for it with
     206             :      * intset_memory_usage().
     207             :      */
     208             :     MemoryContext context;
     209             :     uint64      mem_used;
     210             : 
     211             :     uint64      num_entries;    /* total # of values in the set */
     212             :     uint64      highest_value;  /* highest value stored in this set */
     213             : 
     214             :     /*
     215             :      * B-tree to hold the packed values.
     216             :      *
     217             :      * 'rightmost_nodes' hold pointers to the rightmost node on each level.
     218             :      * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
     219             :      * parent, and so forth, all the way up to the root. These are needed when
     220             :      * adding new values. (Currently, we require that new values are added at
     221             :      * the end.)
     222             :      */
     223             :     int         num_levels;     /* height of the tree */
     224             :     intset_node *root;          /* root node */
     225             :     intset_node *rightmost_nodes[MAX_TREE_LEVELS];
     226             :     intset_leaf_node *leftmost_leaf;    /* leftmost leaf node */
     227             : 
     228             :     /*
     229             :      * Holding area for new items that haven't been inserted to the tree yet.
     230             :      */
     231             :     uint64      buffered_values[MAX_BUFFERED_VALUES];
     232             :     int         num_buffered_values;
     233             : 
     234             :     /*
     235             :      * Iterator support.
     236             :      *
     237             :      * 'iter_values' is an array of integers ready to be returned to the
     238             :      * caller; 'iter_num_values' is the length of that array, and
     239             :      * 'iter_valueno' is the next index.  'iter_node' and 'iter_itemno' point
     240             :      * to the leaf node, and item within the leaf node, to get the next batch
     241             :      * of values from.
     242             :      *
     243             :      * Normally, 'iter_values' points to 'iter_values_buf', which holds items
     244             :      * decoded from a leaf item.  But after we have scanned the whole B-tree,
     245             :      * we iterate through all the unbuffered values, too, by pointing
     246             :      * iter_values to 'buffered_values'.
     247             :      */
     248             :     bool        iter_active;    /* is iteration in progress? */
     249             : 
     250             :     const uint64 *iter_values;
     251             :     int         iter_num_values;    /* number of elements in 'iter_values' */
     252             :     int         iter_valueno;   /* next index into 'iter_values' */
     253             : 
     254             :     intset_leaf_node *iter_node;    /* current leaf node */
     255             :     int         iter_itemno;    /* next item in 'iter_node' to decode */
     256             : 
     257             :     uint64      iter_values_buf[MAX_VALUES_PER_LEAF_ITEM];
     258             : };
     259             : 
     260             : /*
     261             :  * Prototypes for internal functions.
     262             :  */
     263             : static void intset_update_upper(IntegerSet *intset, int level,
     264             :                                 intset_node *child, uint64 child_key);
     265             : static void intset_flush_buffered_values(IntegerSet *intset);
     266             : 
     267             : static int  intset_binsrch_uint64(uint64 value, uint64 *arr, int arr_elems,
     268             :                                   bool nextkey);
     269             : static int  intset_binsrch_leaf(uint64 value, leaf_item *arr, int arr_elems,
     270             :                                 bool nextkey);
     271             : 
     272             : static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base);
     273             : static int  simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
     274             : static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);
     275             : 
     276             : 
     277             : /*
     278             :  * Create a new, initially empty, integer set.
     279             :  *
     280             :  * The integer set is created in the current memory context.
     281             :  * We will do all subsequent allocations in the same context, too, regardless
     282             :  * of which memory context is current when new integers are added to the set.
     283             :  */
     284             : IntegerSet *
     285         128 : intset_create(void)
     286             : {
     287             :     IntegerSet *intset;
     288             : 
     289         128 :     intset = (IntegerSet *) palloc(sizeof(IntegerSet));
     290         128 :     intset->context = CurrentMemoryContext;
     291         128 :     intset->mem_used = GetMemoryChunkSpace(intset);
     292             : 
     293         128 :     intset->num_entries = 0;
     294         128 :     intset->highest_value = 0;
     295             : 
     296         128 :     intset->num_levels = 0;
     297         128 :     intset->root = NULL;
     298         128 :     memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
     299         128 :     intset->leftmost_leaf = NULL;
     300             : 
     301         128 :     intset->num_buffered_values = 0;
     302             : 
     303         128 :     intset->iter_active = false;
     304         128 :     intset->iter_node = NULL;
     305         128 :     intset->iter_itemno = 0;
     306         128 :     intset->iter_valueno = 0;
     307         128 :     intset->iter_num_values = 0;
     308         128 :     intset->iter_values = NULL;
     309             : 
     310         128 :     return intset;
     311             : }
     312             : 
     313             : /*
     314             :  * Allocate a new node.
     315             :  */
     316             : static intset_internal_node *
     317        6754 : intset_new_internal_node(IntegerSet *intset)
     318             : {
     319             :     intset_internal_node *n;
     320             : 
     321        6754 :     n = (intset_internal_node *) MemoryContextAlloc(intset->context,
     322             :                                                     sizeof(intset_internal_node));
     323        6754 :     intset->mem_used += GetMemoryChunkSpace(n);
     324             : 
     325        6754 :     n->level = 0;                /* caller must set */
     326        6754 :     n->num_items = 0;
     327             : 
     328        6754 :     return n;
     329             : }
     330             : 
     331             : static intset_leaf_node *
     332      423356 : intset_new_leaf_node(IntegerSet *intset)
     333             : {
     334             :     intset_leaf_node *n;
     335             : 
     336      423356 :     n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
     337             :                                                 sizeof(intset_leaf_node));
     338      423356 :     intset->mem_used += GetMemoryChunkSpace(n);
     339             : 
     340      423356 :     n->level = 0;
     341      423356 :     n->num_items = 0;
     342      423356 :     n->next = NULL;
     343             : 
     344      423356 :     return n;
     345             : }
     346             : 
     347             : /*
     348             :  * Return the number of entries in the integer set.
     349             :  */
     350             : uint64
     351          76 : intset_num_entries(IntegerSet *intset)
     352             : {
     353          76 :     return intset->num_entries;
     354             : }
     355             : 
     356             : /*
     357             :  * Return the amount of memory used by the integer set.
     358             :  */
     359             : uint64
     360          10 : intset_memory_usage(IntegerSet *intset)
     361             : {
     362          10 :     return intset->mem_used;
     363             : }
     364             : 
     365             : /*
     366             :  * Add a value to the set.
     367             :  *
     368             :  * Values must be added in order.
     369             :  */
     370             : void
     371   326008714 : intset_add_member(IntegerSet *intset, uint64 x)
     372             : {
     373   326008714 :     if (intset->iter_active)
     374           0 :         elog(ERROR, "cannot add new values to integer set while iteration is in progress");
     375             : 
     376   326008714 :     if (x <= intset->highest_value && intset->num_entries > 0)
     377           0 :         elog(ERROR, "cannot add value to integer set out of order");
     378             : 
     379   326008714 :     if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
     380             :     {
     381             :         /* Time to flush our buffer */
     382     1134006 :         intset_flush_buffered_values(intset);
     383             :         Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
     384             :     }
     385             : 
     386             :     /* Add it to the buffer of newly-added values */
     387   326008714 :     intset->buffered_values[intset->num_buffered_values] = x;
     388   326008714 :     intset->num_buffered_values++;
     389   326008714 :     intset->num_entries++;
     390   326008714 :     intset->highest_value = x;
     391   326008714 : }
     392             : 
     393             : /*
     394             :  * Take a batch of buffered values, and pack them into the B-tree.
     395             :  */
     396             : static void
     397     1134006 : intset_flush_buffered_values(IntegerSet *intset)
     398             : {
     399     1134006 :     uint64     *values = intset->buffered_values;
     400     1134006 :     uint64      num_values = intset->num_buffered_values;
     401     1134006 :     int         num_packed = 0;
     402             :     intset_leaf_node *leaf;
     403             : 
     404     1134006 :     leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
     405             : 
     406             :     /*
     407             :      * If the tree is completely empty, create the first leaf page, which is
     408             :      * also the root.
     409             :      */
     410     1134006 :     if (leaf == NULL)
     411             :     {
     412             :         /*
     413             :          * This is the very first item in the set.
     414             :          *
     415             :          * Allocate root node. It's also a leaf.
     416             :          */
     417          28 :         leaf = intset_new_leaf_node(intset);
     418             : 
     419          28 :         intset->root = (intset_node *) leaf;
     420          28 :         intset->leftmost_leaf = leaf;
     421          28 :         intset->rightmost_nodes[0] = (intset_node *) leaf;
     422          28 :         intset->num_levels = 1;
     423             :     }
     424             : 
     425             :     /*
     426             :      * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
     427             :      * stop.  In most cases, we cannot encode that many values in a single
     428             :      * value, but this way, the encoder doesn't have to worry about running
     429             :      * out of input.
     430             :      */
     431    29361754 :     while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
     432             :     {
     433             :         leaf_item   item;
     434             :         int         num_encoded;
     435             : 
     436             :         /*
     437             :          * Construct the next leaf item, packing as many buffered values as
     438             :          * possible.
     439             :          */
     440    27093742 :         item.first = values[num_packed];
     441    27093742 :         item.codeword = simple8b_encode(&values[num_packed + 1],
     442             :                                         &num_encoded,
     443             :                                         item.first);
     444             : 
     445             :         /*
     446             :          * Add the item to the node, allocating a new node if the old one is
     447             :          * full.
     448             :          */
     449    27093742 :         if (leaf->num_items >= MAX_LEAF_ITEMS)
     450             :         {
     451             :             /* Allocate new leaf and link it to the tree */
     452      423328 :             intset_leaf_node *old_leaf = leaf;
     453             : 
     454      423328 :             leaf = intset_new_leaf_node(intset);
     455      423328 :             old_leaf->next = leaf;
     456      423328 :             intset->rightmost_nodes[0] = (intset_node *) leaf;
     457      423328 :             intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
     458             :         }
     459    27093742 :         leaf->items[leaf->num_items++] = item;
     460             : 
     461    27093742 :         num_packed += 1 + num_encoded;
     462             :     }
     463             : 
     464             :     /*
     465             :      * Move any remaining buffered values to the beginning of the array.
     466             :      */
     467     1134006 :     if (num_packed < intset->num_buffered_values)
     468             :     {
     469     2168420 :         memmove(&intset->buffered_values[0],
     470     1084210 :                 &intset->buffered_values[num_packed],
     471     1084210 :                 (intset->num_buffered_values - num_packed) * sizeof(uint64));
     472             :     }
     473     1134006 :     intset->num_buffered_values -= num_packed;
     474     1134006 : }
     475             : 
     476             : /*
     477             :  * Insert a downlink into parent node, after creating a new node.
     478             :  *
     479             :  * Recurses if the parent node is full, too.
     480             :  */
     481             : static void
     482      430032 : intset_update_upper(IntegerSet *intset, int level, intset_node *child,
     483             :                     uint64 child_key)
     484             : {
     485             :     intset_internal_node *parent;
     486             : 
     487             :     Assert(level > 0);
     488             : 
     489             :     /*
     490             :      * Create a new root node, if necessary.
     491             :      */
     492      430032 :     if (level >= intset->num_levels)
     493             :     {
     494          50 :         intset_node *oldroot = intset->root;
     495             :         uint64      downlink_key;
     496             : 
     497             :         /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
     498          50 :         if (intset->num_levels == MAX_TREE_LEVELS)
     499           0 :             elog(ERROR, "could not expand integer set, maximum number of levels reached");
     500          50 :         intset->num_levels++;
     501             : 
     502             :         /*
     503             :          * Get the first value on the old root page, to be used as the
     504             :          * downlink.
     505             :          */
     506          50 :         if (intset->root->level == 0)
     507          20 :             downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
     508             :         else
     509          30 :             downlink_key = ((intset_internal_node *) oldroot)->values[0];
     510             : 
     511          50 :         parent = intset_new_internal_node(intset);
     512          50 :         parent->level = level;
     513          50 :         parent->values[0] = downlink_key;
     514          50 :         parent->downlinks[0] = oldroot;
     515          50 :         parent->num_items = 1;
     516             : 
     517          50 :         intset->root = (intset_node *) parent;
     518          50 :         intset->rightmost_nodes[level] = (intset_node *) parent;
     519             :     }
     520             : 
     521             :     /*
     522             :      * Place the downlink on the parent page.
     523             :      */
     524      430032 :     parent = (intset_internal_node *) intset->rightmost_nodes[level];
     525             : 
     526      430032 :     if (parent->num_items < MAX_INTERNAL_ITEMS)
     527             :     {
     528      423328 :         parent->values[parent->num_items] = child_key;
     529      423328 :         parent->downlinks[parent->num_items] = child;
     530      423328 :         parent->num_items++;
     531             :     }
     532             :     else
     533             :     {
     534             :         /*
     535             :          * Doesn't fit.  Allocate new parent, with the downlink as the first
     536             :          * item on it, and recursively insert the downlink to the new parent
     537             :          * to the grandparent.
     538             :          */
     539        6704 :         parent = intset_new_internal_node(intset);
     540        6704 :         parent->level = level;
     541        6704 :         parent->values[0] = child_key;
     542        6704 :         parent->downlinks[0] = child;
     543        6704 :         parent->num_items = 1;
     544             : 
     545        6704 :         intset->rightmost_nodes[level] = (intset_node *) parent;
     546             : 
     547        6704 :         intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
     548             :     }
     549      430032 : }
     550             : 
     551             : /*
     552             :  * Does the set contain the given value?
     553             :  */
     554             : bool
     555     1807262 : intset_is_member(IntegerSet *intset, uint64 x)
     556             : {
     557             :     intset_node *node;
     558             :     intset_leaf_node *leaf;
     559             :     int         level;
     560             :     int         itemno;
     561             :     leaf_item  *item;
     562             : 
     563             :     /*
     564             :      * The value might be in the buffer of newly-added values.
     565             :      */
     566     1807262 :     if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
     567             :     {
     568             :         int         itemno;
     569             : 
     570      405352 :         itemno = intset_binsrch_uint64(x,
     571      202676 :                                        intset->buffered_values,
     572             :                                        intset->num_buffered_values,
     573             :                                        false);
     574      202676 :         if (itemno >= intset->num_buffered_values)
     575          78 :             return false;
     576             :         else
     577      202598 :             return (intset->buffered_values[itemno] == x);
     578             :     }
     579             : 
     580             :     /*
     581             :      * Start from the root, and walk down the B-tree to find the right leaf
     582             :      * node.
     583             :      */
     584     1604586 :     if (!intset->root)
     585         294 :         return false;
     586     1604292 :     node = intset->root;
     587     6008316 :     for (level = intset->num_levels - 1; level > 0; level--)
     588             :     {
     589     4404028 :         intset_internal_node *n = (intset_internal_node *) node;
     590             : 
     591             :         Assert(node->level == level);
     592             : 
     593     4404028 :         itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
     594     4404028 :         if (itemno == 0)
     595           4 :             return false;
     596     4404024 :         node = n->downlinks[itemno - 1];
     597             :     }
     598             :     Assert(node->level == 0);
     599     1604288 :     leaf = (intset_leaf_node *) node;
     600             : 
     601             :     /*
     602             :      * Binary search to find the right item on the leaf page
     603             :      */
     604     1604288 :     itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
     605     1604288 :     if (itemno == 0)
     606          18 :         return false;
     607     1604270 :     item = &leaf->items[itemno - 1];
     608             : 
     609             :     /* Is this a match to the first value on the item? */
     610     1604270 :     if (item->first == x)
     611        3340 :         return true;
     612             :     Assert(x > item->first);
     613             : 
     614             :     /* Is it in the packed codeword? */
     615     1600930 :     if (simple8b_contains(item->codeword, x, item->first))
     616      300548 :         return true;
     617             : 
     618     1300382 :     return false;
     619             : }
     620             : 
     621             : /*
     622             :  * Begin in-order scan through all the values.
     623             :  *
     624             :  * While the iteration is in-progress, you cannot add new values to the set.
     625             :  */
     626             : void
     627          80 : intset_begin_iterate(IntegerSet *intset)
     628             : {
     629             :     /* Note that we allow an iteration to be abandoned midway */
     630          80 :     intset->iter_active = true;
     631          80 :     intset->iter_node = intset->leftmost_leaf;
     632          80 :     intset->iter_itemno = 0;
     633          80 :     intset->iter_valueno = 0;
     634          80 :     intset->iter_num_values = 0;
     635          80 :     intset->iter_values = intset->iter_values_buf;
     636          80 : }
     637             : 
     638             : /*
     639             :  * Returns the next integer, when iterating.
     640             :  *
     641             :  * intset_begin_iterate() must be called first.  intset_iterate_next() returns
     642             :  * the next value in the set.  Returns true, if there was another value, and
     643             :  * stores the value in *next.  Otherwise, returns false.
     644             :  */
     645             : bool
     646   353525258 : intset_iterate_next(IntegerSet *intset, uint64 *next)
     647             : {
     648             :     Assert(intset->iter_active);
     649             :     for (;;)
     650             :     {
     651             :         /* Return next iter_values[] entry if any */
     652   381042398 :         if (intset->iter_valueno < intset->iter_num_values)
     653             :         {
     654   326008094 :             *next = intset->iter_values[intset->iter_valueno++];
     655   326008094 :             return true;
     656             :         }
     657             : 
     658             :         /* Decode next item in current leaf node, if any */
     659    55034262 :         if (intset->iter_node &&
     660    27517098 :             intset->iter_itemno < intset->iter_node->num_items)
     661             :         {
     662             :             leaf_item  *item;
     663             :             int         num_decoded;
     664             : 
     665    27093742 :             item = &intset->iter_node->items[intset->iter_itemno++];
     666             : 
     667    27093742 :             intset->iter_values_buf[0] = item->first;
     668    27093742 :             num_decoded = simple8b_decode(item->codeword,
     669             :                                           &intset->iter_values_buf[1],
     670             :                                           item->first);
     671    27093742 :             intset->iter_num_values = num_decoded + 1;
     672    27093742 :             intset->iter_valueno = 0;
     673    27093742 :             continue;
     674             :         }
     675             : 
     676             :         /* No more items on this leaf, step to next node */
     677      423422 :         if (intset->iter_node)
     678             :         {
     679      423356 :             intset->iter_node = intset->iter_node->next;
     680      423356 :             intset->iter_itemno = 0;
     681      423356 :             continue;
     682             :         }
     683             : 
     684             :         /*
     685             :          * We have reached the end of the B-tree.  But we might still have
     686             :          * some integers in the buffer of newly-added values.
     687             :          */
     688          66 :         if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
     689             :         {
     690          42 :             intset->iter_values = intset->buffered_values;
     691          42 :             intset->iter_num_values = intset->num_buffered_values;
     692          42 :             intset->iter_valueno = 0;
     693          42 :             continue;
     694             :         }
     695             : 
     696          24 :         break;
     697             :     }
     698             : 
     699             :     /* No more results. */
     700          24 :     intset->iter_active = false;
     701          24 :     *next = 0;                  /* prevent uninitialized-variable warnings */
     702          24 :     return false;
     703             : }
     704             : 
     705             : /*
     706             :  * intset_binsrch_uint64() -- search a sorted array of uint64s
     707             :  *
     708             :  * Returns the first position with key equal or less than the given key.
     709             :  * The returned position would be the "insert" location for the given key,
     710             :  * that is, the position where the new key should be inserted to.
     711             :  *
     712             :  * 'nextkey' affects the behavior on equal keys.  If true, and there is an
     713             :  * equal key in the array, this returns the position immediately after the
     714             :  * equal key.  If false, this returns the position of the equal key itself.
     715             :  */
     716             : static int
     717     4606704 : intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
     718             : {
     719             :     int         low,
     720             :                 high,
     721             :                 mid;
     722             : 
     723     4606704 :     low = 0;
     724     4606704 :     high = arr_elems;
     725    32601206 :     while (high > low)
     726             :     {
     727    23387798 :         mid = low + (high - low) / 2;
     728             : 
     729    23387798 :         if (nextkey)
     730             :         {
     731    22453074 :             if (item >= arr[mid])
     732    11127394 :                 low = mid + 1;
     733             :             else
     734    11325680 :                 high = mid;
     735             :         }
     736             :         else
     737             :         {
     738      934724 :             if (item > arr[mid])
     739      384982 :                 low = mid + 1;
     740             :             else
     741      549742 :                 high = mid;
     742             :         }
     743             :     }
     744             : 
     745     4606704 :     return low;
     746             : }
     747             : 
     748             : /* same, but for an array of leaf items */
     749             : static int
     750     1604288 : intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
     751             : {
     752             :     int         low,
     753             :                 high,
     754             :                 mid;
     755             : 
     756     1604288 :     low = 0;
     757     1604288 :     high = arr_elems;
     758    12857708 :     while (high > low)
     759             :     {
     760     9649132 :         mid = low + (high - low) / 2;
     761             : 
     762     9649132 :         if (nextkey)
     763             :         {
     764     9649132 :             if (item >= arr[mid].first)
     765     4873436 :                 low = mid + 1;
     766             :             else
     767     4775696 :                 high = mid;
     768             :         }
     769             :         else
     770             :         {
     771           0 :             if (item > arr[mid].first)
     772           0 :                 low = mid + 1;
     773             :             else
     774           0 :                 high = mid;
     775             :         }
     776             :     }
     777             : 
     778     1604288 :     return low;
     779             : }
     780             : 
     781             : /*
     782             :  * Simple-8b encoding.
     783             :  *
     784             :  * The simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
     785             :  * called "codewords".  The number of integers packed into a single codeword
     786             :  * depends on the integers being packed; small integers are encoded using
     787             :  * fewer bits than large integers.  A single codeword can store a single
     788             :  * 60-bit integer, or two 30-bit integers, for example.
     789             :  *
     790             :  * Since we're storing a unique, sorted, set of integers, we actually encode
     791             :  * the *differences* between consecutive integers.  That way, clusters of
     792             :  * integers that are close to each other are packed efficiently, regardless
     793             :  * of their absolute values.
     794             :  *
     795             :  * In Simple-8b, each codeword consists of a 4-bit selector, which indicates
     796             :  * how many integers are encoded in the codeword, and the encoded integers are
     797             :  * packed into the remaining 60 bits.  The selector allows for 16 different
     798             :  * ways of using the remaining 60 bits, called "modes".  The number of integers
     799             :  * packed into a single codeword in each mode is listed in the simple8b_modes
     800             :  * table below.  For example, consider the following codeword:
     801             :  *
     802             :  *      20-bit integer       20-bit integer       20-bit integer
     803             :  * 1101 00000000000000010010 01111010000100100000 00000000000000010100
     804             :  * ^
     805             :  * selector
     806             :  *
     807             :  * The selector 1101 is 13 in decimal.  From the modes table below, we see
     808             :  * that it means that the codeword encodes three 20-bit integers.  In decimal,
     809             :  * those integers are 18, 500000 and 20.  Because we encode deltas rather than
     810             :  * absolute values, the actual values that they represent are 18, 500018 and
     811             :  * 500038.
     812             :  *
     813             :  * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes
     814             :  * (which means 240 or 120 consecutive integers, since we're encoding the
     815             :  * deltas between integers), without using the rest of the codeword bits
     816             :  * for anything.
     817             :  *
     818             :  * Simple-8b cannot encode integers larger than 60 bits.  Values larger than
     819             :  * that are always stored in the 'first' field of a leaf item, never in the
     820             :  * packed codeword.  If there is a sequence of integers that are more than
     821             :  * 2^60 apart, the codeword will go unused on those items.  To represent that,
     822             :  * we use a magic EMPTY_CODEWORD codeword value.
     823             :  */
     824             : static const struct simple8b_mode
     825             : {
     826             :     uint8       bits_per_int;
     827             :     uint8       num_ints;
     828             : }           simple8b_modes[17] =
     829             : 
     830             : {
     831             :     {0, 240},                   /* mode  0: 240 zeroes */
     832             :     {0, 120},                   /* mode  1: 120 zeroes */
     833             :     {1, 60},                    /* mode  2: sixty 1-bit integers */
     834             :     {2, 30},                    /* mode  3: thirty 2-bit integers */
     835             :     {3, 20},                    /* mode  4: twenty 3-bit integers */
     836             :     {4, 15},                    /* mode  5: fifteen 4-bit integers */
     837             :     {5, 12},                    /* mode  6: twelve 5-bit integers */
     838             :     {6, 10},                    /* mode  7: ten 6-bit integers */
     839             :     {7, 8},                     /* mode  8: eight 7-bit integers (four bits
     840             :                                  * are wasted) */
     841             :     {8, 7},                     /* mode  9: seven 8-bit integers (four bits
     842             :                                  * are wasted) */
     843             :     {10, 6},                    /* mode 10: six 10-bit integers */
     844             :     {12, 5},                    /* mode 11: five 12-bit integers */
     845             :     {15, 4},                    /* mode 12: four 15-bit integers */
     846             :     {20, 3},                    /* mode 13: three 20-bit integers */
     847             :     {30, 2},                    /* mode 14: two 30-bit integers */
     848             :     {60, 1},                    /* mode 15: one 60-bit integer */
     849             : 
     850             :     {0, 0}                      /* sentinel value */
     851             : };
     852             : 
     853             : /*
     854             :  * EMPTY_CODEWORD is a special value, used to indicate "no values".
     855             :  * It is used if the next value is too large to be encoded with Simple-8b.
     856             :  *
     857             :  * This value looks like a mode-0 codeword, but we can distinguish it
     858             :  * because a regular mode-0 codeword would have zeroes in the unused bits.
     859             :  */
     860             : #define EMPTY_CODEWORD      UINT64CONST(0x0FFFFFFFFFFFFFFF)
     861             : 
     862             : /*
     863             :  * Encode a number of integers into a Simple-8b codeword.
     864             :  *
     865             :  * (What we actually encode are deltas between successive integers.
     866             :  * "base" is the value before ints[0].)
     867             :  *
     868             :  * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
     869             :  * elements, ensuring that we can produce a full codeword.
     870             :  *
     871             :  * Returns the encoded codeword, and sets *num_encoded to the number of
     872             :  * input integers that were encoded.  That can be zero, if the first delta
     873             :  * is too large to be encoded.
     874             :  */
     875             : static uint64
     876    27093742 : simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
     877             : {
     878             :     int         selector;
     879             :     int         nints;
     880             :     int         bits;
     881             :     uint64      diff;
     882             :     uint64      last_val;
     883             :     uint64      codeword;
     884             :     int         i;
     885             : 
     886             :     Assert(ints[0] > base);
     887             : 
     888             :     /*
     889             :      * Select the "mode" to use for this codeword.
     890             :      *
     891             :      * In each iteration, check if the next value can be represented in the
     892             :      * current mode we're considering.  If it's too large, then step up the
     893             :      * mode to a wider one, and repeat.  If it fits, move on to the next
     894             :      * integer.  Repeat until the codeword is full, given the current mode.
     895             :      *
     896             :      * Note that we don't have any way to represent unused slots in the
     897             :      * codeword, so we require each codeword to be "full".  It is always
     898             :      * possible to produce a full codeword unless the very first delta is too
     899             :      * large to be encoded.  For example, if the first delta is small but the
     900             :      * second is too large to be encoded, we'll end up using the last "mode",
     901             :      * which has nints == 1.
     902             :      */
     903    27093742 :     selector = 0;
     904    27093742 :     nints = simple8b_modes[0].num_ints;
     905    27093742 :     bits = simple8b_modes[0].bits_per_int;
     906    27093742 :     diff = ints[0] - base - 1;
     907    27093742 :     last_val = ints[0];
     908    27093742 :     i = 0;                      /* number of deltas we have accepted */
     909             :     for (;;)
     910             :     {
     911  1356686722 :         if (diff >= (UINT64CONST(1) << bits))
     912             :         {
     913             :             /* too large, step up to next mode */
     914   297984988 :             selector++;
     915   297984988 :             nints = simple8b_modes[selector].num_ints;
     916   297984988 :             bits = simple8b_modes[selector].bits_per_int;
     917             :             /* we might already have accepted enough deltas for this mode */
     918   297984988 :             if (i >= nints)
     919    12999872 :                 break;
     920             :         }
     921             :         else
     922             :         {
     923             :             /* accept this delta; then done if codeword is full */
     924   393905244 :             i++;
     925   393905244 :             if (i >= nints)
     926    14093870 :                 break;
     927             :             /* examine next delta */
     928             :             Assert(ints[i] > last_val);
     929   379811374 :             diff = ints[i] - last_val - 1;
     930   379811374 :             last_val = ints[i];
     931             :         }
     932             :     }
     933             : 
     934    27093742 :     if (nints == 0)
     935             :     {
     936             :         /*
     937             :          * The first delta is too large to be encoded with Simple-8b.
     938             :          *
     939             :          * If there is at least one not-too-large integer in the input, we
     940             :          * will encode it using mode 15 (or a more compact mode).  Hence, we
     941             :          * can only get here if the *first* delta is >= 2^60.
     942             :          */
     943             :         Assert(i == 0);
     944           8 :         *num_encoded = 0;
     945           8 :         return EMPTY_CODEWORD;
     946             :     }
     947             : 
     948             :     /*
     949             :      * Encode the integers using the selected mode.  Note that we shift them
     950             :      * into the codeword in reverse order, so that they will come out in the
     951             :      * correct order in the decoder.
     952             :      */
     953    27093734 :     codeword = 0;
     954    27093734 :     if (bits > 0)
     955             :     {
     956   275002154 :         for (i = nints - 1; i > 0; i--)
     957             :         {
     958   248008018 :             diff = ints[i] - ints[i - 1] - 1;
     959   248008018 :             codeword |= diff;
     960   248008018 :             codeword <<= bits;
     961             :         }
     962    26994136 :         diff = ints[0] - base - 1;
     963    26994136 :         codeword |= diff;
     964             :     }
     965             : 
     966             :     /* add selector to the codeword, and return */
     967    27093734 :     codeword |= (uint64) selector << 60;
     968             : 
     969    27093734 :     *num_encoded = nints;
     970    27093734 :     return codeword;
     971             : }
     972             : 
     973             : /*
     974             :  * Decode a codeword into an array of integers.
     975             :  * Returns the number of integers decoded.
     976             :  */
     977             : static int
     978    27093742 : simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
     979             : {
     980    27093742 :     int         selector = (codeword >> 60);
     981    27093742 :     int         nints = simple8b_modes[selector].num_ints;
     982    27093742 :     int         bits = simple8b_modes[selector].bits_per_int;
     983    27093742 :     uint64      mask = (UINT64CONST(1) << bits) - 1;
     984             :     uint64      curr_value;
     985             : 
     986    27093742 :     if (codeword == EMPTY_CODEWORD)
     987           8 :         return 0;
     988             : 
     989    27093734 :     curr_value = base;
     990   325999408 :     for (int i = 0; i < nints; i++)
     991             :     {
     992   298905674 :         uint64      diff = codeword & mask;
     993             : 
     994   298905674 :         curr_value += 1 + diff;
     995   298905674 :         decoded[i] = curr_value;
     996   298905674 :         codeword >>= bits;
     997             :     }
     998    27093734 :     return nints;
     999             : }
    1000             : 
    1001             : /*
    1002             :  * This is very similar to simple8b_decode(), but instead of decoding all
    1003             :  * the values to an array, it just checks if the given "key" is part of
    1004             :  * the codeword.
    1005             :  */
    1006             : static bool
    1007     1600930 : simple8b_contains(uint64 codeword, uint64 key, uint64 base)
    1008             : {
    1009     1600930 :     int         selector = (codeword >> 60);
    1010     1600930 :     int         nints = simple8b_modes[selector].num_ints;
    1011     1600930 :     int         bits = simple8b_modes[selector].bits_per_int;
    1012             : 
    1013     1600930 :     if (codeword == EMPTY_CODEWORD)
    1014          16 :         return false;
    1015             : 
    1016     1600914 :     if (bits == 0)
    1017             :     {
    1018             :         /* Special handling for 0-bit cases. */
    1019      199102 :         return (key - base) <= nints;
    1020             :     }
    1021             :     else
    1022             :     {
    1023     1401812 :         uint64      mask = (UINT64CONST(1) << bits) - 1;
    1024             :         uint64      curr_value;
    1025             : 
    1026     1401812 :         curr_value = base;
    1027    10428554 :         for (int i = 0; i < nints; i++)
    1028             :         {
    1029     9711572 :             uint64      diff = codeword & mask;
    1030             : 
    1031     9711572 :             curr_value += 1 + diff;
    1032             : 
    1033     9711572 :             if (curr_value >= key)
    1034             :             {
    1035      684830 :                 if (curr_value == key)
    1036      101446 :                     return true;
    1037             :                 else
    1038      583384 :                     return false;
    1039             :             }
    1040             : 
    1041     9026742 :             codeword >>= bits;
    1042             :         }
    1043             :     }
    1044      716982 :     return false;
    1045             : }

Generated by: LCOV version 1.13