LCOV - code coverage report
Current view: top level - src/backend/access/brin - brin_minmax_multi.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 574 769 74.6 %
Date: 2021-12-03 03:09:03 Functions: 44 51 86.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * brin_minmax_multi.c
       3             :  *      Implementation of Multi Min/Max opclass for BRIN
       4             :  *
       5             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       6             :  * Portions Copyright (c) 1994, Regents of the University of California
       7             :  *
       8             :  *
       9             :  * Implements a variant of minmax opclass, where the summary is composed of
      10             :  * multiple smaller intervals. This allows us to handle outliers, which
      11             :  * usually make the simple minmax opclass inefficient.
      12             :  *
      13             :  * Consider for example page range with simple minmax interval [1000,2000],
      14             :  * and assume a new row gets inserted into the range with value 1000000.
      15             :  * Due to that the interval gets [1000,1000000]. I.e. the minmax interval
      16             :  * got 1000x wider and won't be useful to eliminate scan keys between 2001
      17             :  * and 1000000.
      18             :  *
      19             :  * With minmax-multi opclass, we may have [1000,2000] interval initially,
      20             :  * but after adding the new row we start tracking it as two interval:
      21             :  *
      22             :  *   [1000,2000] and [1000000,1000000]
      23             :  *
      24             :  * This allows us to still eliminate the page range when the scan keys hit
      25             :  * the gap between 2000 and 1000000, making it useful in cases when the
      26             :  * simple minmax opclass gets inefficient.
      27             :  *
      28             :  * The number of intervals tracked per page range is somewhat flexible.
      29             :  * What is restricted is the number of values per page range, and the limit
      30             :  * is currently 32 (see values_per_range reloption). Collapsed intervals
      31             :  * (with equal minimum and maximum value) are stored as a single value,
      32             :  * while regular intervals require two values.
      33             :  *
      34             :  * When the number of values gets too high (by adding new values to the
      35             :  * summary), we merge some of the intervals to free space for more values.
      36             :  * This is done in a greedy way - we simply pick the two closest intervals,
      37             :  * merge them, and repeat this until the number of values to store gets
      38             :  * sufficiently low (below 50% of maximum values), but that is mostly
      39             :  * arbitrary threshold and may be changed easily).
      40             :  *
      41             :  * To pick the closest intervals we use the "distance" support procedure,
      42             :  * which measures space between two ranges (i.e. the length of an interval).
      43             :  * The computed value may be an approximation - in the worst case we will
      44             :  * merge two ranges that are slightly less optimal at that step, but the
      45             :  * index should still produce correct results.
      46             :  *
      47             :  * The compactions (reducing the number of values) is fairly expensive, as
      48             :  * it requires calling the distance functions, sorting etc. So when building
      49             :  * the summary, we use a significantly larger buffer, and only enforce the
      50             :  * exact limit at the very end. This improves performance, and it also helps
      51             :  * with building better ranges (due to the greedy approach).
      52             :  *
      53             :  *
      54             :  * IDENTIFICATION
      55             :  *    src/backend/access/brin/brin_minmax_multi.c
      56             :  */
      57             : #include "postgres.h"
      58             : 
      59             : /* needed for PGSQL_AF_INET */
      60             : #include <sys/socket.h>
      61             : 
      62             : #include "access/genam.h"
      63             : #include "access/brin.h"
      64             : #include "access/brin_internal.h"
      65             : #include "access/brin_tuple.h"
      66             : #include "access/reloptions.h"
      67             : #include "access/stratnum.h"
      68             : #include "access/htup_details.h"
      69             : #include "catalog/pg_type.h"
      70             : #include "catalog/pg_am.h"
      71             : #include "catalog/pg_amop.h"
      72             : #include "utils/array.h"
      73             : #include "utils/builtins.h"
      74             : #include "utils/date.h"
      75             : #include "utils/datum.h"
      76             : #include "utils/float.h"
      77             : #include "utils/inet.h"
      78             : #include "utils/lsyscache.h"
      79             : #include "utils/memutils.h"
      80             : #include "utils/numeric.h"
      81             : #include "utils/pg_lsn.h"
      82             : #include "utils/rel.h"
      83             : #include "utils/syscache.h"
      84             : #include "utils/timestamp.h"
      85             : #include "utils/uuid.h"
      86             : 
      87             : /*
      88             :  * Additional SQL level support functions
      89             :  *
      90             :  * Procedure numbers must not use values reserved for BRIN itself; see
      91             :  * brin_internal.h.
      92             :  */
      93             : #define     MINMAX_MAX_PROCNUMS     1   /* maximum support procs we need */
      94             : #define     PROCNUM_DISTANCE        11  /* required, distance between values */
      95             : 
      96             : /*
      97             :  * Subtract this from procnum to obtain index in MinmaxMultiOpaque arrays
      98             :  * (Must be equal to minimum of private procnums).
      99             :  */
     100             : #define     PROCNUM_BASE            11
     101             : 
     102             : /*
     103             :  * Sizing the insert buffer - we use 10x the number of values specified
     104             :  * in the reloption, but we cap it to 8192 not to get too large. When
     105             :  * the buffer gets full, we reduce the number of values by half.
     106             :  */
     107             : #define     MINMAX_BUFFER_FACTOR            10
     108             : #define     MINMAX_BUFFER_MIN               256
     109             : #define     MINMAX_BUFFER_MAX               8192
     110             : #define     MINMAX_BUFFER_LOAD_FACTOR       0.5
     111             : 
     112             : typedef struct MinmaxMultiOpaque
     113             : {
     114             :     FmgrInfo    extra_procinfos[MINMAX_MAX_PROCNUMS];
     115             :     bool        extra_proc_missing[MINMAX_MAX_PROCNUMS];
     116             :     Oid         cached_subtype;
     117             :     FmgrInfo    strategy_procinfos[BTMaxStrategyNumber];
     118             : } MinmaxMultiOpaque;
     119             : 
     120             : /*
     121             :  * Storage type for BRIN's minmax reloptions
     122             :  */
     123             : typedef struct MinMaxMultiOptions
     124             : {
     125             :     int32       vl_len_;        /* varlena header (do not touch directly!) */
     126             :     int         valuesPerRange; /* number of values per range */
     127             : } MinMaxMultiOptions;
     128             : 
     129             : #define MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE        32
     130             : 
     131             : #define MinMaxMultiGetValuesPerRange(opts) \
     132             :         ((opts) && (((MinMaxMultiOptions *) (opts))->valuesPerRange != 0) ? \
     133             :          ((MinMaxMultiOptions *) (opts))->valuesPerRange : \
     134             :          MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE)
     135             : 
     136             : #define SAMESIGN(a,b) (((a) < 0) == ((b) < 0))
     137             : 
     138             : /*
     139             :  * The summary of minmax-multi indexes has two representations - Ranges for
     140             :  * convenient processing, and SerializedRanges for storage in bytea value.
     141             :  *
     142             :  * The Ranges struct stores the boundary values in a single array, but we
     143             :  * treat regular and single-point ranges differently to save space. For
     144             :  * regular ranges (with different boundary values) we have to store both
     145             :  * values, while for "single-point ranges" we only need to save one value.
     146             :  *
     147             :  * The 'values' array stores boundary values for regular ranges first (there
     148             :  * are 2*nranges values to store), and then the nvalues boundary values for
     149             :  * single-point ranges. That is, we have (2*nranges + nvalues) boundary
     150             :  * values in the array.
     151             :  *
     152             :  * +---------------------------------+-------------------------------+
     153             :  * | ranges (sorted pairs of values) | sorted values (single points) |
     154             :  * +---------------------------------+-------------------------------+
     155             :  *
     156             :  * This allows us to quickly add new values, and store outliers without
     157             :  * making the other ranges very wide.
     158             :  *
     159             :  * We never store more than maxvalues values (as set by values_per_range
     160             :  * reloption). If needed we merge some of the ranges.
     161             :  *
     162             :  * To minimize palloc overhead, we always allocate the full array with
     163             :  * space for maxvalues elements. This should be fine as long as the
     164             :  * maxvalues is reasonably small (64 seems fine), which is the case
     165             :  * thanks to values_per_range reloption being limited to 256.
     166             :  */
     167             : typedef struct Ranges
     168             : {
     169             :     /* Cache information that we need quite often. */
     170             :     Oid         typid;
     171             :     Oid         colloid;
     172             :     AttrNumber  attno;
     173             :     FmgrInfo   *cmp;
     174             : 
     175             :     /* (2*nranges + nvalues) <= maxvalues */
     176             :     int         nranges;        /* number of ranges in the array (stored) */
     177             :     int         nsorted;        /* number of sorted values (ranges + points) */
     178             :     int         nvalues;        /* number of values in the data array (all) */
     179             :     int         maxvalues;      /* maximum number of values (reloption) */
     180             : 
     181             :     /*
     182             :      * We simply add the values into a large buffer, without any expensive
     183             :      * steps (sorting, deduplication, ...). The buffer is a multiple of the
     184             :      * target number of values, so the compaction happens less often,
     185             :      * amortizing the costs. We keep the actual target and compact to the
     186             :      * requested number of values at the very end, before serializing to
     187             :      * on-disk representation.
     188             :      */
     189             :     /* requested number of values */
     190             :     int         target_maxvalues;
     191             : 
     192             :     /* values stored for this range - either raw values, or ranges */
     193             :     Datum       values[FLEXIBLE_ARRAY_MEMBER];
     194             : } Ranges;
     195             : 
     196             : /*
     197             :  * On-disk the summary is stored as a bytea value, with a simple header
     198             :  * with basic metadata, followed by the boundary values. It has a varlena
     199             :  * header, so can be treated as varlena directly.
     200             :  *
     201             :  * See range_serialize/range_deserialize for serialization details.
     202             :  */
     203             : typedef struct SerializedRanges
     204             : {
     205             :     /* varlena header (do not touch directly!) */
     206             :     int32       vl_len_;
     207             : 
     208             :     /* type of values stored in the data array */
     209             :     Oid         typid;
     210             : 
     211             :     /* (2*nranges + nvalues) <= maxvalues */
     212             :     int         nranges;        /* number of ranges in the array (stored) */
     213             :     int         nvalues;        /* number of values in the data array (all) */
     214             :     int         maxvalues;      /* maximum number of values (reloption) */
     215             : 
     216             :     /* contains the actual data */
     217             :     char        data[FLEXIBLE_ARRAY_MEMBER];
     218             : } SerializedRanges;
     219             : 
     220             : static SerializedRanges *range_serialize(Ranges *range);
     221             : 
     222             : static Ranges *range_deserialize(int maxvalues, SerializedRanges *range);
     223             : 
     224             : 
     225             : /*
     226             :  * Used to represent ranges expanded to make merging and combining easier.
     227             :  *
     228             :  * Each expanded range is essentially an interval, represented by min/max
     229             :  * values, along with a flag whether it's a collapsed range (in which case
     230             :  * the min and max values are equal). We have the flag to handle by-ref
     231             :  * data types - we can't simply compare the datums, and this saves some
     232             :  * calls to the type-specific comparator function.
     233             :  */
     234             : typedef struct ExpandedRange
     235             : {
     236             :     Datum       minval;         /* lower boundary */
     237             :     Datum       maxval;         /* upper boundary */
     238             :     bool        collapsed;      /* true if minval==maxval */
     239             : } ExpandedRange;
     240             : 
     241             : /*
     242             :  * Represents a distance between two ranges (identified by index into
     243             :  * an array of extended ranges).
     244             :  */
     245             : typedef struct DistanceValue
     246             : {
     247             :     int         index;
     248             :     double      value;
     249             : } DistanceValue;
     250             : 
     251             : 
     252             : /* Cache for support and strategy procedures. */
     253             : 
     254             : static FmgrInfo *minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno,
     255             :                                            uint16 procnum);
     256             : 
     257             : static FmgrInfo *minmax_multi_get_strategy_procinfo(BrinDesc *bdesc,
     258             :                                                     uint16 attno, Oid subtype,
     259             :                                                     uint16 strategynum);
     260             : 
     261             : typedef struct compare_context
     262             : {
     263             :     FmgrInfo   *cmpFn;
     264             :     Oid         colloid;
     265             : } compare_context;
     266             : 
     267             : static int  compare_values(const void *a, const void *b, void *arg);
     268             : 
     269             : 
     270             : #ifdef USE_ASSERT_CHECKING
     271             : /*
     272             :  * Check that the order of the array values is correct, using the cmp
     273             :  * function (which should be BTLessStrategyNumber).
     274             :  */
     275             : static void
     276             : AssertArrayOrder(FmgrInfo *cmp, Oid colloid, Datum *values, int nvalues)
     277             : {
     278             :     int         i;
     279             :     Datum       lt;
     280             : 
     281             :     for (i = 0; i < (nvalues - 1); i++)
     282             :     {
     283             :         lt = FunctionCall2Coll(cmp, colloid, values[i], values[i + 1]);
     284             :         Assert(DatumGetBool(lt));
     285             :     }
     286             : }
     287             : #endif
     288             : 
     289             : /*
     290             :  * Comprehensive check of the Ranges structure.
     291             :  */
     292             : static void
     293      117384 : AssertCheckRanges(Ranges *ranges, FmgrInfo *cmpFn, Oid colloid)
     294             : {
     295             : #ifdef USE_ASSERT_CHECKING
     296             :     int         i;
     297             : 
     298             :     /* some basic sanity checks */
     299             :     Assert(ranges->nranges >= 0);
     300             :     Assert(ranges->nsorted >= 0);
     301             :     Assert(ranges->nvalues >= ranges->nsorted);
     302             :     Assert(ranges->maxvalues >= 2 * ranges->nranges + ranges->nvalues);
     303             :     Assert(ranges->typid != InvalidOid);
     304             : 
     305             :     /*
     306             :      * First the ranges - there are 2*nranges boundary values, and the values
     307             :      * have to be strictly ordered (equal values would mean the range is
     308             :      * collapsed, and should be stored as a point). This also guarantees that
     309             :      * the ranges do not overlap.
     310             :      */
     311             :     AssertArrayOrder(cmpFn, colloid, ranges->values, 2 * ranges->nranges);
     312             : 
     313             :     /* then the single-point ranges (with nvalues boundar values ) */
     314             :     AssertArrayOrder(cmpFn, colloid, &ranges->values[2 * ranges->nranges],
     315             :                      ranges->nsorted);
     316             : 
     317             :     /*
     318             :      * Check that none of the values are not covered by ranges (both sorted
     319             :      * and unsorted)
     320             :      */
     321             :     for (i = 0; i < ranges->nvalues; i++)
     322             :     {
     323             :         Datum       compar;
     324             :         int         start,
     325             :                     end;
     326             :         Datum       minvalue,
     327             :                     maxvalue;
     328             : 
     329             :         Datum       value = ranges->values[2 * ranges->nranges + i];
     330             : 
     331             :         if (ranges->nranges == 0)
     332             :             break;
     333             : 
     334             :         minvalue = ranges->values[0];
     335             :         maxvalue = ranges->values[2 * ranges->nranges - 1];
     336             : 
     337             :         /*
     338             :          * Is the value smaller than the minval? If yes, we'll recurse to the
     339             :          * left side of range array.
     340             :          */
     341             :         compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue);
     342             : 
     343             :         /* smaller than the smallest value in the first range */
     344             :         if (DatumGetBool(compar))
     345             :             continue;
     346             : 
     347             :         /*
     348             :          * Is the value greater than the maxval? If yes, we'll recurse to the
     349             :          * right side of range array.
     350             :          */
     351             :         compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value);
     352             : 
     353             :         /* larger than the largest value in the last range */
     354             :         if (DatumGetBool(compar))
     355             :             continue;
     356             : 
     357             :         start = 0;              /* first range */
     358             :         end = ranges->nranges - 1;   /* last range */
     359             :         while (true)
     360             :         {
     361             :             int         midpoint = (start + end) / 2;
     362             : 
     363             :             /* this means we ran out of ranges in the last step */
     364             :             if (start > end)
     365             :                 break;
     366             : 
     367             :             /* copy the min/max values from the ranges */
     368             :             minvalue = ranges->values[2 * midpoint];
     369             :             maxvalue = ranges->values[2 * midpoint + 1];
     370             : 
     371             :             /*
     372             :              * Is the value smaller than the minval? If yes, we'll recurse to
     373             :              * the left side of range array.
     374             :              */
     375             :             compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue);
     376             : 
     377             :             /* smaller than the smallest value in this range */
     378             :             if (DatumGetBool(compar))
     379             :             {
     380             :                 end = (midpoint - 1);
     381             :                 continue;
     382             :             }
     383             : 
     384             :             /*
     385             :              * Is the value greater than the minval? If yes, we'll recurse to
     386             :              * the right side of range array.
     387             :              */
     388             :             compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value);
     389             : 
     390             :             /* larger than the largest value in this range */
     391             :             if (DatumGetBool(compar))
     392             :             {
     393             :                 start = (midpoint + 1);
     394             :                 continue;
     395             :             }
     396             : 
     397             :             /* hey, we found a matching range */
     398             :             Assert(false);
     399             :         }
     400             :     }
     401             : 
     402             :     /* and values in the unsorted part must not be in sorted part */
     403             :     for (i = ranges->nsorted; i < ranges->nvalues; i++)
     404             :     {
     405             :         compare_context cxt;
     406             :         Datum       value = ranges->values[2 * ranges->nranges + i];
     407             : 
     408             :         if (ranges->nsorted == 0)
     409             :             break;
     410             : 
     411             :         cxt.colloid = ranges->colloid;
     412             :         cxt.cmpFn = ranges->cmp;
     413             : 
     414             :         Assert(bsearch_arg(&value, &ranges->values[2 * ranges->nranges],
     415             :                            ranges->nsorted, sizeof(Datum),
     416             :                            compare_values, (void *) &cxt) == NULL);
     417             :     }
     418             : #endif
     419      117384 : }
     420             : 
     421             : /*
     422             :  * Check that the expanded ranges (built when reducing the number of ranges
     423             :  * by combining some of them) are correctly sorted and do not overlap.
     424             :  */
     425             : static void
     426           0 : AssertCheckExpandedRanges(BrinDesc *bdesc, Oid colloid, AttrNumber attno,
     427             :                           Form_pg_attribute attr, ExpandedRange *ranges,
     428             :                           int nranges)
     429             : {
     430             : #ifdef USE_ASSERT_CHECKING
     431             :     int         i;
     432             :     FmgrInfo   *eq;
     433             :     FmgrInfo   *lt;
     434             : 
     435             :     eq = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
     436             :                                             BTEqualStrategyNumber);
     437             : 
     438             :     lt = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
     439             :                                             BTLessStrategyNumber);
     440             : 
     441             :     /*
     442             :      * Each range independently should be valid, i.e. that for the boundary
     443             :      * values (lower <= upper).
     444             :      */
     445             :     for (i = 0; i < nranges; i++)
     446             :     {
     447             :         Datum       r;
     448             :         Datum       minval = ranges[i].minval;
     449             :         Datum       maxval = ranges[i].maxval;
     450             : 
     451             :         if (ranges[i].collapsed)    /* collapsed: minval == maxval */
     452             :             r = FunctionCall2Coll(eq, colloid, minval, maxval);
     453             :         else                    /* non-collapsed: minval < maxval */
     454             :             r = FunctionCall2Coll(lt, colloid, minval, maxval);
     455             : 
     456             :         Assert(DatumGetBool(r));
     457             :     }
     458             : 
     459             :     /*
     460             :      * And the ranges should be ordered and must not overlap, i.e. upper <
     461             :      * lower for boundaries of consecutive ranges.
     462             :      */
     463             :     for (i = 0; i < nranges - 1; i++)
     464             :     {
     465             :         Datum       r;
     466             :         Datum       maxval = ranges[i].maxval;
     467             :         Datum       minval = ranges[i + 1].minval;
     468             : 
     469             :         r = FunctionCall2Coll(lt, colloid, maxval, minval);
     470             : 
     471             :         Assert(DatumGetBool(r));
     472             :     }
     473             : #endif
     474           0 : }
     475             : 
     476             : 
     477             : /*
     478             :  * minmax_multi_init
     479             :  *      Initialize the deserialized range list, allocate all the memory.
     480             :  *
     481             :  * This is only in-memory representation of the ranges, so we allocate
     482             :  * enough space for the maximum number of values (so as not to have to do
     483             :  * repallocs as the ranges grow).
     484             :  */
     485             : static Ranges *
     486       22508 : minmax_multi_init(int maxvalues)
     487             : {
     488             :     Size        len;
     489             :     Ranges     *ranges;
     490             : 
     491             :     Assert(maxvalues > 0);
     492             : 
     493       22508 :     len = offsetof(Ranges, values); /* fixed header */
     494       22508 :     len += maxvalues * sizeof(Datum);   /* Datum values */
     495             : 
     496       22508 :     ranges = (Ranges *) palloc0(len);
     497             : 
     498       22508 :     ranges->maxvalues = maxvalues;
     499             : 
     500       22508 :     return ranges;
     501             : }
     502             : 
     503             : 
     504             : /*
     505             :  * range_deduplicate_values
     506             :  *      Deduplicate the part with values in the simple points.
     507             :  *
     508             :  * This is meant to be a cheaper way of reducing the size of the ranges. It
     509             :  * does not touch the ranges, and only sorts the other values - it does not
     510             :  * call the distance functions, which may be quite expensive, etc.
     511             :  *
     512             :  * We do know the values are not duplicate with the ranges, because we check
     513             :  * that before adding a new value. Same for the sorted part of values.
     514             :  */
     515             : static void
     516        3136 : range_deduplicate_values(Ranges *range)
     517             : {
     518             :     int         i,
     519             :                 n;
     520             :     int         start;
     521             :     compare_context cxt;
     522             : 
     523             :     /*
     524             :      * If there are no unsorted values, we're done (this probably can't
     525             :      * happen, as we're adding values to unsorted part).
     526             :      */
     527        3136 :     if (range->nsorted == range->nvalues)
     528        2972 :         return;
     529             : 
     530             :     /* sort the values */
     531         164 :     cxt.colloid = range->colloid;
     532         164 :     cxt.cmpFn = range->cmp;
     533             : 
     534             :     /* the values start right after the ranges (which are always sorted) */
     535         164 :     start = 2 * range->nranges;
     536             : 
     537             :     /*
     538             :      * XXX This might do a merge sort, to leverage that the first part of the
     539             :      * array is already sorted. If the sorted part is large, it might be quite
     540             :      * a bit faster.
     541             :      */
     542         164 :     qsort_arg(&range->values[start],
     543         164 :               range->nvalues, sizeof(Datum),
     544             :               compare_values, (void *) &cxt);
     545             : 
     546         164 :     n = 1;
     547       52160 :     for (i = 1; i < range->nvalues; i++)
     548             :     {
     549             :         /* same as preceding value, so store it */
     550       51996 :         if (compare_values(&range->values[start + i - 1],
     551       51996 :                            &range->values[start + i],
     552             :                            (void *) &cxt) == 0)
     553           0 :             continue;
     554             : 
     555       51996 :         range->values[start + n] = range->values[start + i];
     556             : 
     557       51996 :         n++;
     558             :     }
     559             : 
     560             :     /* now all the values are sorted */
     561         164 :     range->nvalues = n;
     562         164 :     range->nsorted = n;
     563             : 
     564         164 :     AssertCheckRanges(range, range->cmp, range->colloid);
     565             : }
     566             : 
     567             : 
     568             : /*
     569             :  * range_serialize
     570             :  *    Serialize the in-memory representation into a compact varlena value.
     571             :  *
     572             :  * Simply copy the header and then also the individual values, as stored
     573             :  * in the in-memory value array.
     574             :  */
     575             : static SerializedRanges *
     576        2972 : range_serialize(Ranges *range)
     577             : {
     578             :     Size        len;
     579             :     int         nvalues;
     580             :     SerializedRanges *serialized;
     581             :     Oid         typid;
     582             :     int         typlen;
     583             :     bool        typbyval;
     584             : 
     585             :     int         i;
     586             :     char       *ptr;
     587             : 
     588             :     /* simple sanity checks */
     589             :     Assert(range->nranges >= 0);
     590             :     Assert(range->nsorted >= 0);
     591             :     Assert(range->nvalues >= 0);
     592             :     Assert(range->maxvalues > 0);
     593             :     Assert(range->target_maxvalues > 0);
     594             : 
     595             :     /* at this point the range should be compacted to the target size */
     596             :     Assert(2 * range->nranges + range->nvalues <= range->target_maxvalues);
     597             : 
     598             :     Assert(range->target_maxvalues <= range->maxvalues);
     599             : 
     600             :     /* range boundaries are always sorted */
     601             :     Assert(range->nvalues >= range->nsorted);
     602             : 
     603             :     /* deduplicate values, if there's unsorted part */
     604        2972 :     range_deduplicate_values(range);
     605             : 
     606             :     /* see how many Datum values we actually have */
     607        2972 :     nvalues = 2 * range->nranges + range->nvalues;
     608             : 
     609        2972 :     typid = range->typid;
     610        2972 :     typbyval = get_typbyval(typid);
     611        2972 :     typlen = get_typlen(typid);
     612             : 
     613             :     /* header is always needed */
     614        2972 :     len = offsetof(SerializedRanges, data);
     615             : 
     616             :     /*
     617             :      * The space needed depends on data type - for fixed-length data types
     618             :      * (by-value and some by-reference) it's pretty simple, just multiply
     619             :      * (attlen * nvalues) and we're done. For variable-length by-reference
     620             :      * types we need to actually walk all the values and sum the lengths.
     621             :      */
     622        2972 :     if (typlen == -1)           /* varlena */
     623             :     {
     624             :         int         i;
     625             : 
     626        2256 :         for (i = 0; i < nvalues; i++)
     627             :         {
     628        1812 :             len += VARSIZE_ANY(range->values[i]);
     629             :         }
     630             :     }
     631        2528 :     else if (typlen == -2)      /* cstring */
     632             :     {
     633             :         int         i;
     634             : 
     635           0 :         for (i = 0; i < nvalues; i++)
     636             :         {
     637             :             /* don't forget to include the null terminator ;-) */
     638           0 :             len += strlen(DatumGetCString(range->values[i])) + 1;
     639             :         }
     640             :     }
     641             :     else                        /* fixed-length types (even by-reference) */
     642             :     {
     643             :         Assert(typlen > 0);
     644        2528 :         len += nvalues * typlen;
     645             :     }
     646             : 
     647             :     /*
     648             :      * Allocate the serialized object, copy the basic information. The
     649             :      * serialized object is a varlena, so update the header.
     650             :      */
     651        2972 :     serialized = (SerializedRanges *) palloc0(len);
     652        2972 :     SET_VARSIZE(serialized, len);
     653             : 
     654        2972 :     serialized->typid = typid;
     655        2972 :     serialized->nranges = range->nranges;
     656        2972 :     serialized->nvalues = range->nvalues;
     657        2972 :     serialized->maxvalues = range->target_maxvalues;
     658             : 
     659             :     /*
     660             :      * And now copy also the boundary values (like the length calculation this
     661             :      * depends on the particular data type).
     662             :      */
     663        2972 :     ptr = serialized->data;      /* start of the serialized data */
     664             : 
     665       14228 :     for (i = 0; i < nvalues; i++)
     666             :     {
     667       11256 :         if (typbyval)           /* simple by-value data types */
     668             :         {
     669             :             Datum       tmp;
     670             : 
     671             :             /*
     672             :              * For byval types, we need to copy just the significant bytes -
     673             :              * we can't use memcpy directly, as that assumes little-endian
     674             :              * behavior.  store_att_byval does almost what we need, but it
     675             :              * requires a properly aligned buffer - the output buffer does not
     676             :              * guarantee that. So we simply use a local Datum variable (which
     677             :              * guarantees proper alignment), and then copy the value from it.
     678             :              */
     679        6140 :             store_att_byval(&tmp, range->values[i], typlen);
     680             : 
     681        6140 :             memcpy(ptr, &tmp, typlen);
     682        6140 :             ptr += typlen;
     683             :         }
     684        5116 :         else if (typlen > 0) /* fixed-length by-ref types */
     685             :         {
     686        3304 :             memcpy(ptr, DatumGetPointer(range->values[i]), typlen);
     687        3304 :             ptr += typlen;
     688             :         }
     689        1812 :         else if (typlen == -1)  /* varlena */
     690             :         {
     691        1812 :             int         tmp = VARSIZE_ANY(DatumGetPointer(range->values[i]));
     692             : 
     693        1812 :             memcpy(ptr, DatumGetPointer(range->values[i]), tmp);
     694        1812 :             ptr += tmp;
     695             :         }
     696           0 :         else if (typlen == -2)  /* cstring */
     697             :         {
     698           0 :             int         tmp = strlen(DatumGetCString(range->values[i])) + 1;
     699             : 
     700           0 :             memcpy(ptr, DatumGetCString(range->values[i]), tmp);
     701           0 :             ptr += tmp;
     702             :         }
     703             : 
     704             :         /* make sure we haven't overflown the buffer end */
     705             :         Assert(ptr <= ((char *) serialized + len));
     706             :     }
     707             : 
     708             :     /* exact size */
     709             :     Assert(ptr == ((char *) serialized + len));
     710             : 
     711        2972 :     return serialized;
     712             : }
     713             : 
     714             : /*
     715             :  * range_deserialize
     716             :  *    Serialize the in-memory representation into a compact varlena value.
     717             :  *
     718             :  * Simply copy the header and then also the individual values, as stored
     719             :  * in the in-memory value array.
     720             :  */
     721             : static Ranges *
     722       19544 : range_deserialize(int maxvalues, SerializedRanges *serialized)
     723             : {
     724             :     int         i,
     725             :                 nvalues;
     726             :     char       *ptr,
     727             :                *dataptr;
     728             :     bool        typbyval;
     729             :     int         typlen;
     730             :     Size        datalen;
     731             : 
     732             :     Ranges     *range;
     733             : 
     734             :     Assert(serialized->nranges >= 0);
     735             :     Assert(serialized->nvalues >= 0);
     736             :     Assert(serialized->maxvalues > 0);
     737             : 
     738       19544 :     nvalues = 2 * serialized->nranges + serialized->nvalues;
     739             : 
     740             :     Assert(nvalues <= serialized->maxvalues);
     741             :     Assert(serialized->maxvalues <= maxvalues);
     742             : 
     743       19544 :     range = minmax_multi_init(maxvalues);
     744             : 
     745             :     /* copy the header info */
     746       19544 :     range->nranges = serialized->nranges;
     747       19544 :     range->nvalues = serialized->nvalues;
     748       19544 :     range->nsorted = serialized->nvalues;
     749       19544 :     range->maxvalues = maxvalues;
     750       19544 :     range->target_maxvalues = serialized->maxvalues;
     751             : 
     752       19544 :     range->typid = serialized->typid;
     753             : 
     754       19544 :     typbyval = get_typbyval(serialized->typid);
     755       19544 :     typlen = get_typlen(serialized->typid);
     756             : 
     757             :     /*
     758             :      * And now deconstruct the values into Datum array. We have to copy the
     759             :      * data because the serialized representation ignores alignment, and we
     760             :      * don't want to rely on it being kept around anyway.
     761             :      */
     762       19544 :     ptr = serialized->data;
     763             : 
     764             :     /*
     765             :      * We don't want to allocate many pieces, so we just allocate everything
     766             :      * in one chunk. How much space will we need?
     767             :      *
     768             :      * XXX We don't need to copy simple by-value data types.
     769             :      */
     770       19544 :     datalen = 0;
     771       19544 :     dataptr = NULL;
     772       42924 :     for (i = 0; (i < nvalues) && (!typbyval); i++)
     773             :     {
     774       23380 :         if (typlen > 0)          /* fixed-length by-ref types */
     775       12000 :             datalen += MAXALIGN(typlen);
     776       11380 :         else if (typlen == -1)  /* varlena */
     777             :         {
     778       11380 :             datalen += MAXALIGN(VARSIZE_ANY(DatumGetPointer(ptr)));
     779       11380 :             ptr += VARSIZE_ANY(DatumGetPointer(ptr));
     780             :         }
     781           0 :         else if (typlen == -2)  /* cstring */
     782             :         {
     783           0 :             Size        slen = strlen(DatumGetCString(ptr)) + 1;
     784             : 
     785           0 :             datalen += MAXALIGN(slen);
     786           0 :             ptr += slen;
     787             :         }
     788             :     }
     789             : 
     790       19544 :     if (datalen > 0)
     791        7128 :         dataptr = palloc(datalen);
     792             : 
     793             :     /*
     794             :      * Restore the source pointer (might have been modified when calculating
     795             :      * the space we need to allocate).
     796             :      */
     797       19544 :     ptr = serialized->data;
     798             : 
     799       80468 :     for (i = 0; i < nvalues; i++)
     800             :     {
     801       60924 :         if (typbyval)           /* simple by-value data types */
     802             :         {
     803       37544 :             Datum       v = 0;
     804             : 
     805       37544 :             memcpy(&v, ptr, typlen);
     806             : 
     807       37544 :             range->values[i] = fetch_att(&v, true, typlen);
     808       37544 :             ptr += typlen;
     809             :         }
     810       23380 :         else if (typlen > 0) /* fixed-length by-ref types */
     811             :         {
     812       12000 :             range->values[i] = PointerGetDatum(dataptr);
     813             : 
     814       12000 :             memcpy(dataptr, ptr, typlen);
     815       12000 :             dataptr += MAXALIGN(typlen);
     816             : 
     817       12000 :             ptr += typlen;
     818             :         }
     819       11380 :         else if (typlen == -1)  /* varlena */
     820             :         {
     821       11380 :             range->values[i] = PointerGetDatum(dataptr);
     822             : 
     823       11380 :             memcpy(dataptr, ptr, VARSIZE_ANY(ptr));
     824       11380 :             dataptr += MAXALIGN(VARSIZE_ANY(ptr));
     825       11380 :             ptr += VARSIZE_ANY(ptr);
     826             :         }
     827           0 :         else if (typlen == -2)  /* cstring */
     828             :         {
     829           0 :             Size        slen = strlen(ptr) + 1;
     830             : 
     831           0 :             range->values[i] = PointerGetDatum(dataptr);
     832             : 
     833           0 :             memcpy(dataptr, ptr, slen);
     834           0 :             dataptr += MAXALIGN(slen);
     835           0 :             ptr += slen;
     836             :         }
     837             : 
     838             :         /* make sure we haven't overflown the buffer end */
     839             :         Assert(ptr <= ((char *) serialized + VARSIZE_ANY(serialized)));
     840             :     }
     841             : 
     842             :     /* should have consumed the whole input value exactly */
     843             :     Assert(ptr == ((char *) serialized + VARSIZE_ANY(serialized)));
     844             : 
     845             :     /* return the deserialized value */
     846       19544 :     return range;
     847             : }
     848             : 
     849             : /*
     850             :  * compare_expanded_ranges
     851             :  *    Compare the expanded ranges - first by minimum, then by maximum.
     852             :  *
     853             :  * We do guarantee that ranges in a single Ranges object do not overlap, so it
     854             :  * may seem strange that we don't order just by minimum. But when merging two
     855             :  * Ranges (which happens in the union function), the ranges may in fact
     856             :  * overlap. So we do compare both.
     857             :  */
     858             : static int
     859      409672 : compare_expanded_ranges(const void *a, const void *b, void *arg)
     860             : {
     861      409672 :     ExpandedRange *ra = (ExpandedRange *) a;
     862      409672 :     ExpandedRange *rb = (ExpandedRange *) b;
     863             :     Datum       r;
     864             : 
     865      409672 :     compare_context *cxt = (compare_context *) arg;
     866             : 
     867             :     /* first compare minvals */
     868      409672 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->minval, rb->minval);
     869             : 
     870      409672 :     if (DatumGetBool(r))
     871      299540 :         return -1;
     872             : 
     873      110132 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->minval, ra->minval);
     874             : 
     875      110132 :     if (DatumGetBool(r))
     876      108300 :         return 1;
     877             : 
     878             :     /* then compare maxvals */
     879        1832 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->maxval, rb->maxval);
     880             : 
     881        1832 :     if (DatumGetBool(r))
     882           0 :         return -1;
     883             : 
     884        1832 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->maxval, ra->maxval);
     885             : 
     886        1832 :     if (DatumGetBool(r))
     887           0 :         return 1;
     888             : 
     889        1832 :     return 0;
     890             : }
     891             : 
     892             : /*
     893             :  * compare_values
     894             :  *    Compare the values.
     895             :  */
     896             : static int
     897      707060 : compare_values(const void *a, const void *b, void *arg)
     898             : {
     899      707060 :     Datum      *da = (Datum *) a;
     900      707060 :     Datum      *db = (Datum *) b;
     901             :     Datum       r;
     902             : 
     903      707060 :     compare_context *cxt = (compare_context *) arg;
     904             : 
     905      707060 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *da, *db);
     906             : 
     907      707060 :     if (DatumGetBool(r))
     908      377980 :         return -1;
     909             : 
     910      329080 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *db, *da);
     911             : 
     912      329080 :     if (DatumGetBool(r))
     913      292012 :         return 1;
     914             : 
     915       37068 :     return 0;
     916             : }
     917             : 
     918             : /*
     919             :  * Check if the new value matches one of the existing ranges.
     920             :  */
     921             : static bool
     922       57064 : has_matching_range(BrinDesc *bdesc, Oid colloid, Ranges *ranges,
     923             :                    Datum newval, AttrNumber attno, Oid typid)
     924             : {
     925             :     Datum       compar;
     926             : 
     927       57064 :     Datum       minvalue = ranges->values[0];
     928       57064 :     Datum       maxvalue = ranges->values[2 * ranges->nranges - 1];
     929             : 
     930             :     FmgrInfo   *cmpLessFn;
     931             :     FmgrInfo   *cmpGreaterFn;
     932             : 
     933             :     /* binary search on ranges */
     934             :     int         start,
     935             :                 end;
     936             : 
     937       57064 :     if (ranges->nranges == 0)
     938       18344 :         return false;
     939             : 
     940             :     /*
     941             :      * Otherwise, need to compare the new value with boundaries of all the
     942             :      * ranges. First check if it's less than the absolute minimum, which is
     943             :      * the first value in the array.
     944             :      */
     945       38720 :     cmpLessFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
     946             :                                                    BTLessStrategyNumber);
     947       38720 :     compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue);
     948             : 
     949             :     /* smaller than the smallest value in the range list */
     950       38720 :     if (DatumGetBool(compar))
     951           0 :         return false;
     952             : 
     953             :     /*
     954             :      * And now compare it to the existing maximum (last value in the data
     955             :      * array). But only if we haven't already ruled out a possible match in
     956             :      * the minvalue check.
     957             :      */
     958       38720 :     cmpGreaterFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
     959             :                                                       BTGreaterStrategyNumber);
     960       38720 :     compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue);
     961             : 
     962       38720 :     if (DatumGetBool(compar))
     963       38720 :         return false;
     964             : 
     965             :     /*
     966             :      * So we know it's in the general min/max, the question is whether it
     967             :      * falls in one of the ranges or gaps. We'll do a binary search on
     968             :      * individual ranges - for each range we check equality (value falls into
     969             :      * the range), and then check ranges either above or below the current
     970             :      * range.
     971             :      */
     972           0 :     start = 0;                  /* first range */
     973           0 :     end = (ranges->nranges - 1); /* last range */
     974             :     while (true)
     975           0 :     {
     976           0 :         int         midpoint = (start + end) / 2;
     977             : 
     978             :         /* this means we ran out of ranges in the last step */
     979           0 :         if (start > end)
     980           0 :             return false;
     981             : 
     982             :         /* copy the min/max values from the ranges */
     983           0 :         minvalue = ranges->values[2 * midpoint];
     984           0 :         maxvalue = ranges->values[2 * midpoint + 1];
     985             : 
     986             :         /*
     987             :          * Is the value smaller than the minval? If yes, we'll recurse to the
     988             :          * left side of range array.
     989             :          */
     990           0 :         compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue);
     991             : 
     992             :         /* smaller than the smallest value in this range */
     993           0 :         if (DatumGetBool(compar))
     994             :         {
     995           0 :             end = (midpoint - 1);
     996           0 :             continue;
     997             :         }
     998             : 
     999             :         /*
    1000             :          * Is the value greater than the minval? If yes, we'll recurse to the
    1001             :          * right side of range array.
    1002             :          */
    1003           0 :         compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue);
    1004             : 
    1005             :         /* larger than the largest value in this range */
    1006           0 :         if (DatumGetBool(compar))
    1007             :         {
    1008           0 :             start = (midpoint + 1);
    1009           0 :             continue;
    1010             :         }
    1011             : 
    1012             :         /* hey, we found a matching range */
    1013           0 :         return true;
    1014             :     }
    1015             : 
    1016             :     return false;
    1017             : }
    1018             : 
    1019             : 
    1020             : /*
    1021             :  * range_contains_value
    1022             :  *      See if the new value is already contained in the range list.
    1023             :  *
    1024             :  * We first inspect the list of intervals. We use a small trick - we check
    1025             :  * the value against min/max of the whole range (min of the first interval,
    1026             :  * max of the last one) first, and only inspect the individual intervals if
    1027             :  * this passes.
    1028             :  *
    1029             :  * If the value matches none of the intervals, we check the exact values.
    1030             :  * We simply loop through them and invoke equality operator on them.
    1031             :  *
    1032             :  * The last parameter (full) determines whether we need to search all the
    1033             :  * values, including the unsorted part. With full=false, the unsorted part
    1034             :  * is not searched, which may produce false negatives and duplicate values
    1035             :  * (in the unsorted part only), but when we're building the range that's
    1036             :  * fine - we'll deduplicate before serialization, and it can only happen
    1037             :  * if there already are unsorted values (so it was already modified).
    1038             :  *
    1039             :  * Serialized ranges don't have any unsorted values, so this can't cause
    1040             :  * false negatives during querying.
    1041             :  */
    1042             : static bool
    1043       57064 : range_contains_value(BrinDesc *bdesc, Oid colloid,
    1044             :                      AttrNumber attno, Form_pg_attribute attr,
    1045             :                      Ranges *ranges, Datum newval, bool full)
    1046             : {
    1047             :     int         i;
    1048             :     FmgrInfo   *cmpEqualFn;
    1049       57064 :     Oid         typid = attr->atttypid;
    1050             : 
    1051             :     /*
    1052             :      * First inspect the ranges, if there are any. We first check the whole
    1053             :      * range, and only when there's still a chance of getting a match we
    1054             :      * inspect the individual ranges.
    1055             :      */
    1056       57064 :     if (has_matching_range(bdesc, colloid, ranges, newval, attno, typid))
    1057           0 :         return true;
    1058             : 
    1059       57064 :     cmpEqualFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
    1060             :                                                     BTEqualStrategyNumber);
    1061             : 
    1062             :     /*
    1063             :      * There is no matching range, so let's inspect the sorted values.
    1064             :      *
    1065             :      * We do a sequential search for small numbers of values, and binary
    1066             :      * search once we have more than 16 values. This threshold is somewhat
    1067             :      * arbitrary, as it depends on how expensive the comparison function is.
    1068             :      *
    1069             :      * XXX If we use the threshold here, maybe we should do the same thing in
    1070             :      * has_matching_range? Or maybe we should do the bin search all the time?
    1071             :      *
    1072             :      * XXX We could use the same optimization as for ranges, to check if the
    1073             :      * value is between min/max, to maybe rule out all sorted values without
    1074             :      * having to inspect all of them.
    1075             :      */
    1076       57064 :     if (ranges->nsorted >= 16)
    1077             :     {
    1078             :         compare_context cxt;
    1079             : 
    1080       38720 :         cxt.colloid = ranges->colloid;
    1081       38720 :         cxt.cmpFn = ranges->cmp;
    1082             : 
    1083       38720 :         if (bsearch_arg(&newval, &ranges->values[2 * ranges->nranges],
    1084       38720 :                         ranges->nsorted, sizeof(Datum),
    1085             :                         compare_values, (void *) &cxt) != NULL)
    1086           0 :             return true;
    1087             :     }
    1088             :     else
    1089             :     {
    1090       33700 :         for (i = 2 * ranges->nranges; i < 2 * ranges->nranges + ranges->nsorted; i++)
    1091             :         {
    1092             :             Datum       compar;
    1093             : 
    1094       15396 :             compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]);
    1095             : 
    1096             :             /* found an exact match */
    1097       15396 :             if (DatumGetBool(compar))
    1098          40 :                 return true;
    1099             :         }
    1100             :     }
    1101             : 
    1102             :     /* If not asked to inspect the unsorted part, we're done. */
    1103       57024 :     if (!full)
    1104       57024 :         return false;
    1105             : 
    1106             :     /* Inspect the unsorted part. */
    1107           0 :     for (i = 2 * ranges->nranges + ranges->nsorted; i < 2 * ranges->nranges + ranges->nvalues; i++)
    1108             :     {
    1109             :         Datum       compar;
    1110             : 
    1111           0 :         compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]);
    1112             : 
    1113             :         /* found an exact match */
    1114           0 :         if (DatumGetBool(compar))
    1115           0 :             return true;
    1116             :     }
    1117             : 
    1118             :     /* the value is not covered by this BRIN tuple */
    1119           0 :     return false;
    1120             : }
    1121             : 
    1122             : /*
    1123             :  * Expand ranges from Ranges into ExpandedRange array. This expects the
    1124             :  * eranges to be pre-allocated and with the correct size - there needs to be
    1125             :  * (nranges + nvalues) elements.
    1126             :  *
    1127             :  * The order of expanded ranges is arbitrary. We do expand the ranges first,
    1128             :  * and this part is sorted. But then we expand the values, and this part may
    1129             :  * be unsorted.
    1130             :  */
    1131             : static void
    1132        3132 : fill_expanded_ranges(ExpandedRange *eranges, int neranges, Ranges *ranges)
    1133             : {
    1134             :     int         idx;
    1135             :     int         i;
    1136             : 
    1137             :     /* Check that the output array has the right size. */
    1138             :     Assert(neranges == (ranges->nranges + ranges->nvalues));
    1139             : 
    1140        3132 :     idx = 0;
    1141        3296 :     for (i = 0; i < ranges->nranges; i++)
    1142             :     {
    1143         164 :         eranges[idx].minval = ranges->values[2 * i];
    1144         164 :         eranges[idx].maxval = ranges->values[2 * i + 1];
    1145         164 :         eranges[idx].collapsed = false;
    1146         164 :         idx++;
    1147             : 
    1148             :         Assert(idx <= neranges);
    1149             :     }
    1150             : 
    1151       73132 :     for (i = 0; i < ranges->nvalues; i++)
    1152             :     {
    1153       70000 :         eranges[idx].minval = ranges->values[2 * ranges->nranges + i];
    1154       70000 :         eranges[idx].maxval = ranges->values[2 * ranges->nranges + i];
    1155       70000 :         eranges[idx].collapsed = true;
    1156       70000 :         idx++;
    1157             : 
    1158             :         Assert(idx <= neranges);
    1159             :     }
    1160             : 
    1161             :     /* Did we produce the expected number of elements? */
    1162             :     Assert(idx == neranges);
    1163             : 
    1164        3132 :     return;
    1165             : }
    1166             : 
    1167             : /*
    1168             :  * Sort and deduplicate expanded ranges.
    1169             :  *
    1170             :  * The ranges may be deduplicated - we're simply appending values, without
    1171             :  * checking for duplicates etc. So maybe the deduplication will reduce the
    1172             :  * number of ranges enough, and we won't have to compute the distances etc.
    1173             :  *
    1174             :  * Returns the number of expanded ranges.
    1175             :  */
    1176             : static int
    1177        3132 : sort_expanded_ranges(FmgrInfo *cmp, Oid colloid,
    1178             :                      ExpandedRange *eranges, int neranges)
    1179             : {
    1180             :     int         n;
    1181             :     int         i;
    1182             :     compare_context cxt;
    1183             : 
    1184             :     Assert(neranges > 0);
    1185             : 
    1186             :     /* sort the values */
    1187        3132 :     cxt.colloid = colloid;
    1188        3132 :     cxt.cmpFn = cmp;
    1189             : 
    1190             :     /*
    1191             :      * XXX We do qsort on all the values, but we could also leverage the fact
    1192             :      * that some of the input data is already sorted (all the ranges and maybe
    1193             :      * some of the points) and do merge sort.
    1194             :      */
    1195        3132 :     qsort_arg(eranges, neranges, sizeof(ExpandedRange),
    1196             :               compare_expanded_ranges, (void *) &cxt);
    1197             : 
    1198             :     /*
    1199             :      * Deduplicate the ranges - simply compare each range to the preceding
    1200             :      * one, and skip the duplicate ones.
    1201             :      */
    1202        3132 :     n = 1;
    1203       70164 :     for (i = 1; i < neranges; i++)
    1204             :     {
    1205             :         /* if the current range is equal to the preceding one, do nothing */
    1206       67032 :         if (!compare_expanded_ranges(&eranges[i - 1], &eranges[i], (void *) &cxt))
    1207         812 :             continue;
    1208             : 
    1209             :         /* otherwise, copy it to n-th place (if not already there) */
    1210       66220 :         if (i != n)
    1211        1952 :             memcpy(&eranges[n], &eranges[i], sizeof(ExpandedRange));
    1212             : 
    1213       66220 :         n++;
    1214             :     }
    1215             : 
    1216             :     Assert((n > 0) && (n <= neranges));
    1217             : 
    1218        3132 :     return n;
    1219             : }
    1220             : 
    1221             : /*
    1222             :  * When combining multiple Range values (in union function), some of the
    1223             :  * ranges may overlap. We simply merge the overlapping ranges to fix that.
    1224             :  *
    1225             :  * XXX This assumes the expanded ranges were previously sorted (by minval
    1226             :  * and then maxval). We leverage this when detecting overlap.
    1227             :  */
    1228             : static int
    1229           0 : merge_overlapping_ranges(FmgrInfo *cmp, Oid colloid,
    1230             :                          ExpandedRange *eranges, int neranges)
    1231             : {
    1232             :     int         idx;
    1233             : 
    1234             :     /* Merge ranges (idx) and (idx+1) if they overlap. */
    1235           0 :     idx = 0;
    1236           0 :     while (idx < (neranges - 1))
    1237             :     {
    1238             :         Datum       r;
    1239             : 
    1240             :         /*
    1241             :          * comparing [?,maxval] vs. [minval,?] - the ranges overlap if (minval
    1242             :          * < maxval)
    1243             :          */
    1244           0 :         r = FunctionCall2Coll(cmp, colloid,
    1245           0 :                               eranges[idx].maxval,
    1246           0 :                               eranges[idx + 1].minval);
    1247             : 
    1248             :         /*
    1249             :          * Nope, maxval < minval, so no overlap. And we know the ranges are
    1250             :          * ordered, so there are no more overlaps, because all the remaining
    1251             :          * ranges have greater or equal minval.
    1252             :          */
    1253           0 :         if (DatumGetBool(r))
    1254             :         {
    1255             :             /* proceed to the next range */
    1256           0 :             idx += 1;
    1257           0 :             continue;
    1258             :         }
    1259             : 
    1260             :         /*
    1261             :          * So ranges 'idx' and 'idx+1' do overlap, but we don't know if
    1262             :          * 'idx+1' is contained in 'idx', or if they overlap only partially.
    1263             :          * So compare the upper bounds and keep the larger one.
    1264             :          */
    1265           0 :         r = FunctionCall2Coll(cmp, colloid,
    1266           0 :                               eranges[idx].maxval,
    1267           0 :                               eranges[idx + 1].maxval);
    1268             : 
    1269           0 :         if (DatumGetBool(r))
    1270           0 :             eranges[idx].maxval = eranges[idx + 1].maxval;
    1271             : 
    1272             :         /*
    1273             :          * The range certainly is no longer collapsed (irrespectively of the
    1274             :          * previous state).
    1275             :          */
    1276           0 :         eranges[idx].collapsed = false;
    1277             : 
    1278             :         /*
    1279             :          * Now get rid of the (idx+1) range entirely by shifting the remaining
    1280             :          * ranges by 1. There are neranges elements, and we need to move
    1281             :          * elements from (idx+2). That means the number of elements to move is
    1282             :          * [ncranges - (idx+2)].
    1283             :          */
    1284           0 :         memmove(&eranges[idx + 1], &eranges[idx + 2],
    1285           0 :                 (neranges - (idx + 2)) * sizeof(ExpandedRange));
    1286             : 
    1287             :         /*
    1288             :          * Decrease the number of ranges, and repeat (with the same range, as
    1289             :          * it might overlap with additional ranges thanks to the merge).
    1290             :          */
    1291           0 :         neranges--;
    1292             :     }
    1293             : 
    1294           0 :     return neranges;
    1295             : }
    1296             : 
    1297             : /*
    1298             :  * Simple comparator for distance values, comparing the double value.
    1299             :  * This is intentionally sorting the distances in descending order, i.e.
    1300             :  * the longer gaps will be at the front.
    1301             :  */
    1302             : static int
    1303       94644 : compare_distances(const void *a, const void *b)
    1304             : {
    1305       94644 :     DistanceValue *da = (DistanceValue *) a;
    1306       94644 :     DistanceValue *db = (DistanceValue *) b;
    1307             : 
    1308       94644 :     if (da->value < db->value)
    1309       22068 :         return 1;
    1310       72576 :     else if (da->value > db->value)
    1311       16448 :         return -1;
    1312             : 
    1313       56128 :     return 0;
    1314             : }
    1315             : 
    1316             : /*
    1317             :  * Given an array of expanded ranges, compute size of the gaps between each
    1318             :  * range.  For neranges there are (neranges-1) gaps.
    1319             :  *
    1320             :  * We simply call the "distance" function to compute the (max-min) for pairs
    1321             :  * of consecutive ranges. The function may be fairly expensive, so we do that
    1322             :  * just once (and then use it to pick as many ranges to merge as possible).
    1323             :  *
    1324             :  * See reduce_expanded_ranges for details.
    1325             :  */
    1326             : static DistanceValue *
    1327        3132 : build_distances(FmgrInfo *distanceFn, Oid colloid,
    1328             :                 ExpandedRange *eranges, int neranges)
    1329             : {
    1330             :     int         i;
    1331             :     int         ndistances;
    1332             :     DistanceValue *distances;
    1333             : 
    1334             :     Assert(neranges >= 2);
    1335             : 
    1336        3132 :     ndistances = (neranges - 1);
    1337        3132 :     distances = (DistanceValue *) palloc0(sizeof(DistanceValue) * ndistances);
    1338             : 
    1339             :     /*
    1340             :      * Walk through the ranges once and compute the distance between the
    1341             :      * ranges so that we can sort them once.
    1342             :      */
    1343       69352 :     for (i = 0; i < ndistances; i++)
    1344             :     {
    1345             :         Datum       a1,
    1346             :                     a2,
    1347             :                     r;
    1348             : 
    1349       66220 :         a1 = eranges[i].maxval;
    1350       66220 :         a2 = eranges[i + 1].minval;
    1351             : 
    1352             :         /* compute length of the gap (between max/min) */
    1353       66220 :         r = FunctionCall2Coll(distanceFn, colloid, a1, a2);
    1354             : 
    1355             :         /* remember the index of the gap the distance is for */
    1356       66220 :         distances[i].index = i;
    1357       66220 :         distances[i].value = DatumGetFloat8(r);
    1358             :     }
    1359             : 
    1360             :     /*
    1361             :      * Sort the distances in descending order, so that the longest gaps are at
    1362             :      * the front.
    1363             :      */
    1364        3132 :     pg_qsort(distances, ndistances, sizeof(DistanceValue), compare_distances);
    1365             : 
    1366        3132 :     return distances;
    1367             : }
    1368             : 
    1369             : /*
    1370             :  * Builds expanded ranges for the existing ranges (and single-point ranges),
    1371             :  * and also the new value (which did not fit into the array).  This expanded
    1372             :  * representation makes the processing a bit easier, as it allows handling
    1373             :  * ranges and points the same way.
    1374             :  *
    1375             :  * We sort and deduplicate the expanded ranges - this is necessary, because
    1376             :  * the points may be unsorted. And moreover the two parts (ranges and
    1377             :  * points) are sorted on their own.
    1378             :  */
    1379             : static ExpandedRange *
    1380        3132 : build_expanded_ranges(FmgrInfo *cmp, Oid colloid, Ranges *ranges,
    1381             :                       int *nranges)
    1382             : {
    1383             :     int         neranges;
    1384             :     ExpandedRange *eranges;
    1385             : 
    1386             :     /* both ranges and points are expanded into a separate element */
    1387        3132 :     neranges = ranges->nranges + ranges->nvalues;
    1388             : 
    1389        3132 :     eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange));
    1390             : 
    1391             :     /* fill the expanded ranges */
    1392        3132 :     fill_expanded_ranges(eranges, neranges, ranges);
    1393             : 
    1394             :     /* sort and deduplicate the expanded ranges */
    1395        3132 :     neranges = sort_expanded_ranges(cmp, colloid, eranges, neranges);
    1396             : 
    1397             :     /* remember how many ranges we built */
    1398        3132 :     *nranges = neranges;
    1399             : 
    1400        3132 :     return eranges;
    1401             : }
    1402             : 
    1403             : #ifdef USE_ASSERT_CHECKING
    1404             : /*
    1405             :  * Counts boundary values needed to store the ranges. Each single-point
    1406             :  * range is stored using a single value, each regular range needs two.
    1407             :  */
    1408             : static int
    1409             : count_values(ExpandedRange *cranges, int ncranges)
    1410             : {
    1411             :     int         i;
    1412             :     int         count;
    1413             : 
    1414             :     count = 0;
    1415             :     for (i = 0; i < ncranges; i++)
    1416             :     {
    1417             :         if (cranges[i].collapsed)
    1418             :             count += 1;
    1419             :         else
    1420             :             count += 2;
    1421             :     }
    1422             : 
    1423             :     return count;
    1424             : }
    1425             : #endif
    1426             : 
    1427             : /*
    1428             :  * reduce_expanded_ranges
    1429             :  *      reduce the ranges until the number of values is low enough
    1430             :  *
    1431             :  * Combines ranges until the number of boundary values drops below the
    1432             :  * threshold specified by max_values. This happens by merging enough
    1433             :  * ranges by the distance between them.
    1434             :  *
    1435             :  * Returns the number of result ranges.
    1436             :  *
    1437             :  * We simply use the global min/max and then add boundaries for enough
    1438             :  * largest gaps. Each gap adds 2 values, so we simply use (target/2-1)
    1439             :  * distances. Then we simply sort all the values - each two values are
    1440             :  * a boundary of a range (possibly collapsed).
    1441             :  *
    1442             :  * XXX Some of the ranges may be collapsed (i.e. the min/max values are
    1443             :  * equal), but we ignore that for now. We could repeat the process,
    1444             :  * adding a couple more gaps recursively.
    1445             :  *
    1446             :  * XXX The ranges to merge are selected solely using the distance. But
    1447             :  * that may not be the best strategy, for example when multiple gaps
    1448             :  * are of equal (or very similar) length.
    1449             :  *
    1450             :  * Consider for example points 1, 2, 3, .., 64, which have gaps of the
    1451             :  * same length 1 of course. In that case, we tend to pick the first
    1452             :  * gap of that length, which leads to this:
    1453             :  *
    1454             :  *    step 1:  [1, 2], 3, 4, 5, .., 64
    1455             :  *    step 2:  [1, 3], 4, 5,    .., 64
    1456             :  *    step 3:  [1, 4], 5,       .., 64
    1457             :  *    ...
    1458             :  *
    1459             :  * So in the end we'll have one "large" range and multiple small points.
    1460             :  * That may be fine, but it seems a bit strange and non-optimal. Maybe
    1461             :  * we should consider other things when picking ranges to merge - e.g.
    1462             :  * length of the ranges? Or perhaps randomize the choice of ranges, with
    1463             :  * probability inversely proportional to the distance (the gap lengths
    1464             :  * may be very close, but not exactly the same).
    1465             :  *
    1466             :  * XXX Or maybe we could just handle this by using random value as a
    1467             :  * tie-break, or by adding random noise to the actual distance.
    1468             :  */
    1469             : static int
    1470        3132 : reduce_expanded_ranges(ExpandedRange *eranges, int neranges,
    1471             :                        DistanceValue *distances, int max_values,
    1472             :                        FmgrInfo *cmp, Oid colloid)
    1473             : {
    1474             :     int         i;
    1475             :     int         nvalues;
    1476             :     Datum      *values;
    1477             : 
    1478             :     compare_context cxt;
    1479             : 
    1480             :     /* total number of gaps between ranges */
    1481        3132 :     int         ndistances = (neranges - 1);
    1482             : 
    1483             :     /* number of gaps to keep */
    1484        3132 :     int         keep = (max_values / 2 - 1);
    1485             : 
    1486             :     /*
    1487             :      * Maybe we have a sufficiently low number of ranges already?
    1488             :      *
    1489             :      * XXX This should happen before we actually do the expensive stuff like
    1490             :      * sorting, so maybe this should be just an assert.
    1491             :      */
    1492        3132 :     if (keep >= ndistances)
    1493        2884 :         return neranges;
    1494             : 
    1495             :     /* sort the values */
    1496         248 :     cxt.colloid = colloid;
    1497         248 :     cxt.cmpFn = cmp;
    1498             : 
    1499             :     /* allocate space for the boundary values */
    1500         248 :     nvalues = 0;
    1501         248 :     values = (Datum *) palloc(sizeof(Datum) * max_values);
    1502             : 
    1503             :     /* add the global min/max values, from the first/last range */
    1504         248 :     values[nvalues++] = eranges[0].minval;
    1505         248 :     values[nvalues++] = eranges[neranges - 1].maxval;
    1506             : 
    1507             :     /* add boundary values for enough gaps */
    1508       14464 :     for (i = 0; i < keep; i++)
    1509             :     {
    1510             :         /* index of the gap between (index) and (index+1) ranges */
    1511       14216 :         int         index = distances[i].index;
    1512             : 
    1513             :         Assert((index >= 0) && ((index + 1) < neranges));
    1514             : 
    1515             :         /* add max from the preceding range, minval from the next one */
    1516       14216 :         values[nvalues++] = eranges[index].maxval;
    1517       14216 :         values[nvalues++] = eranges[index + 1].minval;
    1518             : 
    1519             :         Assert(nvalues <= max_values);
    1520             :     }
    1521             : 
    1522             :     /* We should have an even number of range values. */
    1523             :     Assert(nvalues % 2 == 0);
    1524             : 
    1525             :     /*
    1526             :      * Sort the values using the comparator function, and form ranges from the
    1527             :      * sorted result.
    1528             :      */
    1529         248 :     qsort_arg(values, nvalues, sizeof(Datum),
    1530             :               compare_values, (void *) &cxt);
    1531             : 
    1532             :     /* We have nvalues boundary values, which means nvalues/2 ranges. */
    1533       14712 :     for (i = 0; i < (nvalues / 2); i++)
    1534             :     {
    1535       14464 :         eranges[i].minval = values[2 * i];
    1536       14464 :         eranges[i].maxval = values[2 * i + 1];
    1537             : 
    1538             :         /* if the boundary values are the same, it's a collapsed range */
    1539       28928 :         eranges[i].collapsed = (compare_values(&values[2 * i],
    1540       14464 :                                                &values[2 * i + 1],
    1541       14464 :                                                &cxt) == 0);
    1542             :     }
    1543             : 
    1544         248 :     return (nvalues / 2);
    1545             : }
    1546             : 
    1547             : /*
    1548             :  * Store the boundary values from ExpandedRanges back into 'ranges' (using
    1549             :  * only the minimal number of values needed).
    1550             :  */
    1551             : static void
    1552        3132 : store_expanded_ranges(Ranges *ranges, ExpandedRange *eranges, int neranges)
    1553             : {
    1554             :     int         i;
    1555        3132 :     int         idx = 0;
    1556             : 
    1557             :     /* first copy in the regular ranges */
    1558        3132 :     ranges->nranges = 0;
    1559       26456 :     for (i = 0; i < neranges; i++)
    1560             :     {
    1561       23324 :         if (!eranges[i].collapsed)
    1562             :         {
    1563        1212 :             ranges->values[idx++] = eranges[i].minval;
    1564        1212 :             ranges->values[idx++] = eranges[i].maxval;
    1565        1212 :             ranges->nranges++;
    1566             :         }
    1567             :     }
    1568             : 
    1569             :     /* now copy in the collapsed ones */
    1570        3132 :     ranges->nvalues = 0;
    1571       26456 :     for (i = 0; i < neranges; i++)
    1572             :     {
    1573       23324 :         if (eranges[i].collapsed)
    1574             :         {
    1575       22112 :             ranges->values[idx++] = eranges[i].minval;
    1576       22112 :             ranges->nvalues++;
    1577             :         }
    1578             :     }
    1579             : 
    1580             :     /* all the values are sorted */
    1581        3132 :     ranges->nsorted = ranges->nvalues;
    1582             : 
    1583             :     Assert(count_values(eranges, neranges) == 2 * ranges->nranges + ranges->nvalues);
    1584             :     Assert(2 * ranges->nranges + ranges->nvalues <= ranges->maxvalues);
    1585        3132 : }
    1586             : 
    1587             : 
    1588             : /*
    1589             :  * Consider freeing space in the ranges. Checks if there's space for at least
    1590             :  * one new value, and performs compaction if needed.
    1591             :  *
    1592             :  * Returns true if the value was actually modified.
    1593             :  */
    1594             : static bool
    1595       57064 : ensure_free_space_in_buffer(BrinDesc *bdesc, Oid colloid,
    1596             :                             AttrNumber attno, Form_pg_attribute attr,
    1597             :                             Ranges *range)
    1598             : {
    1599             :     MemoryContext ctx;
    1600             :     MemoryContext oldctx;
    1601             : 
    1602             :     FmgrInfo   *cmpFn,
    1603             :                *distanceFn;
    1604             : 
    1605             :     /* expanded ranges */
    1606             :     ExpandedRange *eranges;
    1607             :     int         neranges;
    1608             :     DistanceValue *distances;
    1609             : 
    1610             :     /*
    1611             :      * If there is free space in the buffer, we're done without having to
    1612             :      * modify anything.
    1613             :      */
    1614       57064 :     if (2 * range->nranges + range->nvalues < range->maxvalues)
    1615       56900 :         return false;
    1616             : 
    1617             :     /* we'll certainly need the comparator, so just look it up now */
    1618         164 :     cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
    1619             :                                                BTLessStrategyNumber);
    1620             : 
    1621             :     /* deduplicate values, if there's an unsorted part */
    1622         164 :     range_deduplicate_values(range);
    1623             : 
    1624             :     /*
    1625             :      * Did we reduce enough free space by just the deduplication?
    1626             :      *
    1627             :      * We don't simply check against range->maxvalues again. The deduplication
    1628             :      * might have freed very little space (e.g. just one value), forcing us to
    1629             :      * do deduplication very often. In that case, it's better to do the
    1630             :      * compaction and reduce more space.
    1631             :      */
    1632         164 :     if (2 * range->nranges + range->nvalues <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR)
    1633           0 :         return true;
    1634             : 
    1635             :     /*
    1636             :      * We need to combine some of the existing ranges, to reduce the number of
    1637             :      * values we have to store.
    1638             :      *
    1639             :      * The distanceFn calls (which may internally call e.g. numeric_le) may
    1640             :      * allocate quite a bit of memory, and we must not leak it (we might have
    1641             :      * to do this repeatedly, even for a single BRIN page range). Otherwise
    1642             :      * we'd have problems e.g. when building new indexes. So we use a memory
    1643             :      * context and make sure we free the memory at the end (so if we call the
    1644             :      * distance function many times, it might be an issue, but meh).
    1645             :      */
    1646         164 :     ctx = AllocSetContextCreate(CurrentMemoryContext,
    1647             :                                 "minmax-multi context",
    1648             :                                 ALLOCSET_DEFAULT_SIZES);
    1649             : 
    1650         164 :     oldctx = MemoryContextSwitchTo(ctx);
    1651             : 
    1652             :     /* build the expanded ranges */
    1653         164 :     eranges = build_expanded_ranges(cmpFn, colloid, range, &neranges);
    1654             : 
    1655             :     /* and we'll also need the 'distance' procedure */
    1656         164 :     distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE);
    1657             : 
    1658             :     /* build array of gap distances and sort them in ascending order */
    1659         164 :     distances = build_distances(distanceFn, colloid, eranges, neranges);
    1660             : 
    1661             :     /*
    1662             :      * Combine ranges until we release at least 50% of the space. This
    1663             :      * threshold is somewhat arbitrary, perhaps needs tuning. We must not use
    1664             :      * too low or high value.
    1665             :      */
    1666         328 :     neranges = reduce_expanded_ranges(eranges, neranges, distances,
    1667         164 :                                       range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR,
    1668             :                                       cmpFn, colloid);
    1669             : 
    1670             :     /* Make sure we've sufficiently reduced the number of ranges. */
    1671             :     Assert(count_values(eranges, neranges) <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR);
    1672             : 
    1673             :     /* decompose the expanded ranges into regular ranges and single values */
    1674         164 :     store_expanded_ranges(range, eranges, neranges);
    1675             : 
    1676         164 :     MemoryContextSwitchTo(oldctx);
    1677         164 :     MemoryContextDelete(ctx);
    1678             : 
    1679             :     /* Did we break the ranges somehow? */
    1680         164 :     AssertCheckRanges(range, cmpFn, colloid);
    1681             : 
    1682         164 :     return true;
    1683             : }
    1684             : 
    1685             : /*
    1686             :  * range_add_value
    1687             :  *      Add the new value to the minmax-multi range.
    1688             :  */
    1689             : static bool
    1690       57064 : range_add_value(BrinDesc *bdesc, Oid colloid,
    1691             :                 AttrNumber attno, Form_pg_attribute attr,
    1692             :                 Ranges *ranges, Datum newval)
    1693             : {
    1694             :     FmgrInfo   *cmpFn;
    1695       57064 :     bool        modified = false;
    1696             : 
    1697             :     /* we'll certainly need the comparator, so just look it up now */
    1698       57064 :     cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
    1699             :                                                BTLessStrategyNumber);
    1700             : 
    1701             :     /* comprehensive checks of the input ranges */
    1702       57064 :     AssertCheckRanges(ranges, cmpFn, colloid);
    1703             : 
    1704             :     /*
    1705             :      * Make sure there's enough free space in the buffer. We only trigger this
    1706             :      * when the buffer is full, which means it had to be modified as we size
    1707             :      * it to be larger than what is stored on disk.
    1708             :      *
    1709             :      * This needs to happen before we check if the value is contained in the
    1710             :      * range, because the value might be in the unsorted part, and we don't
    1711             :      * check that in range_contains_value. The deduplication would then move
    1712             :      * it to the sorted part, and we'd add the value too, which violates the
    1713             :      * rule that we never have duplicates with the ranges or sorted values.
    1714             :      *
    1715             :      * We might also deduplicate and recheck if the value is contained, but
    1716             :      * that seems like overkill. We'd need to deduplicate anyway, so why not
    1717             :      * do it now.
    1718             :      */
    1719       57064 :     modified = ensure_free_space_in_buffer(bdesc, colloid,
    1720             :                                            attno, attr, ranges);
    1721             : 
    1722             :     /*
    1723             :      * Bail out if the value already is covered by the range.
    1724             :      *
    1725             :      * We could also add values until we hit values_per_range, and then do the
    1726             :      * deduplication in a batch, hoping for better efficiency. But that would
    1727             :      * mean we actually modify the range every time, which means having to
    1728             :      * serialize the value, which does palloc, walks the values, copies them,
    1729             :      * etc. Not exactly cheap.
    1730             :      *
    1731             :      * So instead we do the check, which should be fairly cheap - assuming the
    1732             :      * comparator function is not very expensive.
    1733             :      *
    1734             :      * This also implies the values array can't contain duplicate values.
    1735             :      */
    1736       57064 :     if (range_contains_value(bdesc, colloid, attno, attr, ranges, newval, false))
    1737          40 :         return modified;
    1738             : 
    1739             :     /* Make a copy of the value, if needed. */
    1740       57024 :     newval = datumCopy(newval, attr->attbyval, attr->attlen);
    1741             : 
    1742             :     /*
    1743             :      * If there's space in the values array, copy it in and we're done.
    1744             :      *
    1745             :      * We do want to keep the values sorted (to speed up searches), so we do a
    1746             :      * simple insertion sort. We could do something more elaborate, e.g. by
    1747             :      * sorting the values only now and then, but for small counts (e.g. when
    1748             :      * maxvalues is 64) this should be fine.
    1749             :      */
    1750       57024 :     ranges->values[2 * ranges->nranges + ranges->nvalues] = newval;
    1751       57024 :     ranges->nvalues++;
    1752             : 
    1753             :     /* If we added the first value, we can consider it as sorted. */
    1754       57024 :     if (ranges->nvalues == 1)
    1755        2964 :         ranges->nsorted = 1;
    1756             : 
    1757             :     /*
    1758             :      * Check we haven't broken the ordering of boundary values (checks both
    1759             :      * parts, but that doesn't hurt).
    1760             :      */
    1761       57024 :     AssertCheckRanges(ranges, cmpFn, colloid);
    1762             : 
    1763             :     /* Check the range contains the value we just added. */
    1764             :     Assert(range_contains_value(bdesc, colloid, attno, attr, ranges, newval, true));
    1765             : 
    1766             :     /* yep, we've modified the range */
    1767       57024 :     return true;
    1768             : }
    1769             : 
    1770             : /*
    1771             :  * Generate range representation of data collected during "batch mode".
    1772             :  * This is similar to reduce_expanded_ranges, except that we can't assume
    1773             :  * the values are sorted and there may be duplicate values.
    1774             :  */
    1775             : static void
    1776        2972 : compactify_ranges(BrinDesc *bdesc, Ranges *ranges, int max_values)
    1777             : {
    1778             :     FmgrInfo   *cmpFn,
    1779             :                *distanceFn;
    1780             : 
    1781             :     /* expanded ranges */
    1782             :     ExpandedRange *eranges;
    1783             :     int         neranges;
    1784             :     DistanceValue *distances;
    1785             : 
    1786             :     MemoryContext ctx;
    1787             :     MemoryContext oldctx;
    1788             : 
    1789             :     /*
    1790             :      * Do we need to actually compactify anything?
    1791             :      *
    1792             :      * There are two reasons why compaction may be needed - firstly, there may
    1793             :      * be too many values, or some of the values may be unsorted.
    1794             :      */
    1795        2972 :     if ((ranges->nranges * 2 + ranges->nvalues <= max_values) &&
    1796        2888 :         (ranges->nsorted == ranges->nvalues))
    1797           4 :         return;
    1798             : 
    1799             :     /* we'll certainly need the comparator, so just look it up now */
    1800        2968 :     cmpFn = minmax_multi_get_strategy_procinfo(bdesc, ranges->attno, ranges->typid,
    1801             :                                                BTLessStrategyNumber);
    1802             : 
    1803             :     /* and we'll also need the 'distance' procedure */
    1804        2968 :     distanceFn = minmax_multi_get_procinfo(bdesc, ranges->attno, PROCNUM_DISTANCE);
    1805             : 
    1806             :     /*
    1807             :      * The distanceFn calls (which may internally call e.g. numeric_le) may
    1808             :      * allocate quite a bit of memory, and we must not leak it. Otherwise,
    1809             :      * we'd have problems e.g. when building indexes. So we create a local
    1810             :      * memory context and make sure we free the memory before leaving this
    1811             :      * function (not after every call).
    1812             :      */
    1813        2968 :     ctx = AllocSetContextCreate(CurrentMemoryContext,
    1814             :                                 "minmax-multi context",
    1815             :                                 ALLOCSET_DEFAULT_SIZES);
    1816             : 
    1817        2968 :     oldctx = MemoryContextSwitchTo(ctx);
    1818             : 
    1819             :     /* build the expanded ranges */
    1820        2968 :     eranges = build_expanded_ranges(cmpFn, ranges->colloid, ranges, &neranges);
    1821             : 
    1822             :     /* build array of gap distances and sort them in ascending order */
    1823        2968 :     distances = build_distances(distanceFn, ranges->colloid,
    1824             :                                 eranges, neranges);
    1825             : 
    1826             :     /*
    1827             :      * Combine ranges until we get below max_values. We don't use any scale
    1828             :      * factor, because this is used during serialization, and we don't expect
    1829             :      * more tuples to be inserted anytime soon.
    1830             :      */
    1831        2968 :     neranges = reduce_expanded_ranges(eranges, neranges, distances,
    1832             :                                       max_values, cmpFn, ranges->colloid);
    1833             : 
    1834             :     Assert(count_values(eranges, neranges) <= max_values);
    1835             : 
    1836             :     /* transform back into regular ranges and single values */
    1837        2968 :     store_expanded_ranges(ranges, eranges, neranges);
    1838             : 
    1839             :     /* check all the range invariants */
    1840        2968 :     AssertCheckRanges(ranges, cmpFn, ranges->colloid);
    1841             : 
    1842        2968 :     MemoryContextSwitchTo(oldctx);
    1843        2968 :     MemoryContextDelete(ctx);
    1844             : }
    1845             : 
    1846             : Datum
    1847       12444 : brin_minmax_multi_opcinfo(PG_FUNCTION_ARGS)
    1848             : {
    1849             :     BrinOpcInfo *result;
    1850             : 
    1851             :     /*
    1852             :      * opaque->strategy_procinfos is initialized lazily; here it is set to
    1853             :      * all-uninitialized by palloc0 which sets fn_oid to InvalidOid.
    1854             :      */
    1855             : 
    1856       12444 :     result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) +
    1857             :                      sizeof(MinmaxMultiOpaque));
    1858       12444 :     result->oi_nstored = 1;
    1859       12444 :     result->oi_regular_nulls = true;
    1860       12444 :     result->oi_opaque = (MinmaxMultiOpaque *)
    1861       12444 :         MAXALIGN((char *) result + SizeofBrinOpcInfo(1));
    1862       12444 :     result->oi_typcache[0] = lookup_type_cache(PG_BRIN_MINMAX_MULTI_SUMMARYOID, 0);
    1863             : 
    1864       12444 :     PG_RETURN_POINTER(result);
    1865             : }
    1866             : 
    1867             : /*
    1868             :  * Compute the distance between two float4 values (plain subtraction).
    1869             :  */
    1870             : Datum
    1871         464 : brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS)
    1872             : {
    1873         464 :     float       a1 = PG_GETARG_FLOAT4(0);
    1874         464 :     float       a2 = PG_GETARG_FLOAT4(1);
    1875             : 
    1876             :     /* if both values are NaN, then we consider them the same */
    1877         464 :     if (isnan(a1) && isnan(a2))
    1878           0 :         PG_RETURN_FLOAT8(0.0);
    1879             : 
    1880             :     /* if one value is NaN, use infinite distance */
    1881         464 :     if (isnan(a1) || isnan(a2))
    1882           4 :         PG_RETURN_FLOAT8(get_float8_infinity());
    1883             : 
    1884             :     /*
    1885             :      * We know the values are range boundaries, but the range may be collapsed
    1886             :      * (i.e. single points), with equal values.
    1887             :      */
    1888             :     Assert(a1 <= a2);
    1889             : 
    1890         460 :     PG_RETURN_FLOAT8((double) a2 - (double) a1);
    1891             : }
    1892             : 
    1893             : /*
    1894             :  * Compute the distance between two float8 values (plain subtraction).
    1895             :  */
    1896             : Datum
    1897         696 : brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS)
    1898             : {
    1899         696 :     double      a1 = PG_GETARG_FLOAT8(0);
    1900         696 :     double      a2 = PG_GETARG_FLOAT8(1);
    1901             : 
    1902             :     /* if both values are NaN, then we consider them the same */
    1903         696 :     if (isnan(a1) && isnan(a2))
    1904           0 :         PG_RETURN_FLOAT8(0.0);
    1905             : 
    1906             :     /* if one value is NaN, use infinite distance */
    1907         696 :     if (isnan(a1) || isnan(a2))
    1908           4 :         PG_RETURN_FLOAT8(get_float8_infinity());
    1909             : 
    1910             :     /*
    1911             :      * We know the values are range boundaries, but the range may be collapsed
    1912             :      * (i.e. single points), with equal values.
    1913             :      */
    1914             :     Assert(a1 <= a2);
    1915             : 
    1916         692 :     PG_RETURN_FLOAT8(a2 - a1);
    1917             : }
    1918             : 
    1919             : /*
    1920             :  * Compute the distance between two int2 values (plain subtraction).
    1921             :  */
    1922             : Datum
    1923         676 : brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS)
    1924             : {
    1925         676 :     int16       a1 = PG_GETARG_INT16(0);
    1926         676 :     int16       a2 = PG_GETARG_INT16(1);
    1927             : 
    1928             :     /*
    1929             :      * We know the values are range boundaries, but the range may be collapsed
    1930             :      * (i.e. single points), with equal values.
    1931             :      */
    1932             :     Assert(a1 <= a2);
    1933             : 
    1934         676 :     PG_RETURN_FLOAT8((double) a2 - (double) a1);
    1935             : }
    1936             : 
    1937             : /*
    1938             :  * Compute the distance between two int4 values (plain subtraction).
    1939             :  */
    1940             : Datum
    1941       54316 : brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS)
    1942             : {
    1943       54316 :     int32       a1 = PG_GETARG_INT32(0);
    1944       54316 :     int32       a2 = PG_GETARG_INT32(1);
    1945             : 
    1946             :     /*
    1947             :      * We know the values are range boundaries, but the range may be collapsed
    1948             :      * (i.e. single points), with equal values.
    1949             :      */
    1950             :     Assert(a1 <= a2);
    1951             : 
    1952       54316 :     PG_RETURN_FLOAT8((double) a2 - (double) a1);
    1953             : }
    1954             : 
    1955             : /*
    1956             :  * Compute the distance between two int8 values (plain subtraction).
    1957             :  */
    1958             : Datum
    1959         684 : brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS)
    1960             : {
    1961         684 :     int64       a1 = PG_GETARG_INT64(0);
    1962         684 :     int64       a2 = PG_GETARG_INT64(1);
    1963             : 
    1964             :     /*
    1965             :      * We know the values are range boundaries, but the range may be collapsed
    1966             :      * (i.e. single points), with equal values.
    1967             :      */
    1968             :     Assert(a1 <= a2);
    1969             : 
    1970         684 :     PG_RETURN_FLOAT8((double) a2 - (double) a1);
    1971             : }
    1972             : 
    1973             : /*
    1974             :  * Compute the distance between two tid values (by mapping them to float8 and
    1975             :  * then subtracting them).
    1976             :  */
    1977             : Datum
    1978         684 : brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS)
    1979             : {
    1980             :     double      da1,
    1981             :                 da2;
    1982             : 
    1983         684 :     ItemPointer pa1 = (ItemPointer) PG_GETARG_DATUM(0);
    1984         684 :     ItemPointer pa2 = (ItemPointer) PG_GETARG_DATUM(1);
    1985             : 
    1986             :     /*
    1987             :      * We know the values are range boundaries, but the range may be collapsed
    1988             :      * (i.e. single points), with equal values.
    1989             :      */
    1990             :     Assert(ItemPointerCompare(pa1, pa2) <= 0);
    1991             : 
    1992             :     /*
    1993             :      * We use the no-check variants here, because user-supplied values may
    1994             :      * have (ip_posid == 0). See ItemPointerCompare.
    1995             :      */
    1996         684 :     da1 = ItemPointerGetBlockNumberNoCheck(pa1) * MaxHeapTuplesPerPage +
    1997         684 :         ItemPointerGetOffsetNumberNoCheck(pa1);
    1998             : 
    1999         684 :     da2 = ItemPointerGetBlockNumberNoCheck(pa2) * MaxHeapTuplesPerPage +
    2000         684 :         ItemPointerGetOffsetNumberNoCheck(pa2);
    2001             : 
    2002         684 :     PG_RETURN_FLOAT8(da2 - da1);
    2003             : }
    2004             : 
    2005             : /*
    2006             :  * Compute the distance between two numeric values (plain subtraction).
    2007             :  */
    2008             : Datum
    2009         684 : brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS)
    2010             : {
    2011             :     Datum       d;
    2012         684 :     Datum       a1 = PG_GETARG_DATUM(0);
    2013         684 :     Datum       a2 = PG_GETARG_DATUM(1);
    2014             : 
    2015             :     /*
    2016             :      * We know the values are range boundaries, but the range may be collapsed
    2017             :      * (i.e. single points), with equal values.
    2018             :      */
    2019             :     Assert(DatumGetBool(DirectFunctionCall2(numeric_le, a1, a2)));
    2020             : 
    2021         684 :     d = DirectFunctionCall2(numeric_sub, a2, a1);   /* a2 - a1 */
    2022             : 
    2023         684 :     PG_RETURN_FLOAT8(DirectFunctionCall1(numeric_float8, d));
    2024             : }
    2025             : 
    2026             : /*
    2027             :  * Compute the approximate distance between two UUID values.
    2028             :  *
    2029             :  * XXX We do not need a perfectly accurate value, so we approximate the
    2030             :  * deltas (which would have to be 128-bit integers) with a 64-bit float.
    2031             :  * The small inaccuracies do not matter in practice, in the worst case
    2032             :  * we'll decide to merge ranges that are not the closest ones.
    2033             :  */
    2034             : Datum
    2035         684 : brin_minmax_multi_distance_uuid(PG_FUNCTION_ARGS)
    2036             : {
    2037             :     int         i;
    2038         684 :     float8      delta = 0;
    2039             : 
    2040         684 :     Datum       a1 = PG_GETARG_DATUM(0);
    2041         684 :     Datum       a2 = PG_GETARG_DATUM(1);
    2042             : 
    2043         684 :     pg_uuid_t  *u1 = DatumGetUUIDP(a1);
    2044         684 :     pg_uuid_t  *u2 = DatumGetUUIDP(a2);
    2045             : 
    2046             :     /*
    2047             :      * We know the values are range boundaries, but the range may be collapsed
    2048             :      * (i.e. single points), with equal values.
    2049             :      */
    2050             :     Assert(DatumGetBool(DirectFunctionCall2(uuid_le, a1, a2)));
    2051             : 
    2052             :     /* compute approximate delta as a double precision value */
    2053       11628 :     for (i = UUID_LEN - 1; i >= 0; i--)
    2054             :     {
    2055       10944 :         delta += (int) u2->data[i] - (int) u1->data[i];
    2056       10944 :         delta /= 256;
    2057             :     }
    2058             : 
    2059             :     Assert(delta >= 0);
    2060             : 
    2061         684 :     PG_RETURN_FLOAT8(delta);
    2062             : }
    2063             : 
    2064             : /*
    2065             :  * Compute the approximate distance between two dates.
    2066             :  */
    2067             : Datum
    2068         684 : brin_minmax_multi_distance_date(PG_FUNCTION_ARGS)
    2069             : {
    2070         684 :     DateADT     dateVal1 = PG_GETARG_DATEADT(0);
    2071         684 :     DateADT     dateVal2 = PG_GETARG_DATEADT(1);
    2072             : 
    2073         684 :     if (DATE_NOT_FINITE(dateVal1) || DATE_NOT_FINITE(dateVal2))
    2074           0 :         PG_RETURN_FLOAT8(0);
    2075             : 
    2076         684 :     PG_RETURN_FLOAT8(dateVal1 - dateVal2);
    2077             : }
    2078             : 
    2079             : /*
    2080             :  * Compute the approximate distance between two time (without tz) values.
    2081             :  *
    2082             :  * TimeADT is just an int64, so we simply subtract the values directly.
    2083             :  */
    2084             : Datum
    2085         676 : brin_minmax_multi_distance_time(PG_FUNCTION_ARGS)
    2086             : {
    2087         676 :     float8      delta = 0;
    2088             : 
    2089         676 :     TimeADT     ta = PG_GETARG_TIMEADT(0);
    2090         676 :     TimeADT     tb = PG_GETARG_TIMEADT(1);
    2091             : 
    2092         676 :     delta = (tb - ta);
    2093             : 
    2094             :     Assert(delta >= 0);
    2095             : 
    2096         676 :     PG_RETURN_FLOAT8(delta);
    2097             : }
    2098             : 
    2099             : /*
    2100             :  * Compute the approximate distance between two timetz values.
    2101             :  *
    2102             :  * Simply subtracts the TimeADT (int64) values embedded in TimeTzADT.
    2103             :  */
    2104             : Datum
    2105         524 : brin_minmax_multi_distance_timetz(PG_FUNCTION_ARGS)
    2106             : {
    2107         524 :     float8      delta = 0;
    2108             : 
    2109         524 :     TimeTzADT  *ta = PG_GETARG_TIMETZADT_P(0);
    2110         524 :     TimeTzADT  *tb = PG_GETARG_TIMETZADT_P(1);
    2111             : 
    2112         524 :     delta = (tb->time - ta->time) + (tb->zone - ta->zone) * USECS_PER_SEC;
    2113             : 
    2114             :     Assert(delta >= 0);
    2115             : 
    2116         524 :     PG_RETURN_FLOAT8(delta);
    2117             : }
    2118             : 
    2119             : /*
    2120             :  * Compute the distance between two timestamp values.
    2121             :  */
    2122             : Datum
    2123        1360 : brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS)
    2124             : {
    2125        1360 :     float8      delta = 0;
    2126             : 
    2127        1360 :     Timestamp   dt1 = PG_GETARG_TIMESTAMP(0);
    2128        1360 :     Timestamp   dt2 = PG_GETARG_TIMESTAMP(1);
    2129             : 
    2130        1360 :     if (TIMESTAMP_NOT_FINITE(dt1) || TIMESTAMP_NOT_FINITE(dt2))
    2131           0 :         PG_RETURN_FLOAT8(0);
    2132             : 
    2133        1360 :     delta = dt2 - dt1;
    2134             : 
    2135             :     Assert(delta >= 0);
    2136             : 
    2137        1360 :     PG_RETURN_FLOAT8(delta);
    2138             : }
    2139             : 
    2140             : /*
    2141             :  * Compute the distance between two interval values.
    2142             :  */
    2143             : Datum
    2144         684 : brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS)
    2145             : {
    2146         684 :     float8      delta = 0;
    2147             : 
    2148         684 :     Interval   *ia = PG_GETARG_INTERVAL_P(0);
    2149         684 :     Interval   *ib = PG_GETARG_INTERVAL_P(1);
    2150             :     Interval   *result;
    2151             : 
    2152             :     int64       dayfraction;
    2153             :     int64       days;
    2154             : 
    2155         684 :     result = (Interval *) palloc(sizeof(Interval));
    2156             : 
    2157         684 :     result->month = ib->month - ia->month;
    2158             :     /* overflow check copied from int4mi */
    2159         684 :     if (!SAMESIGN(ib->month, ia->month) &&
    2160           0 :         !SAMESIGN(result->month, ib->month))
    2161           0 :         ereport(ERROR,
    2162             :                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
    2163             :                  errmsg("interval out of range")));
    2164             : 
    2165         684 :     result->day = ib->day - ia->day;
    2166         684 :     if (!SAMESIGN(ib->day, ia->day) &&
    2167           0 :         !SAMESIGN(result->day, ib->day))
    2168           0 :         ereport(ERROR,
    2169             :                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
    2170             :                  errmsg("interval out of range")));
    2171             : 
    2172         684 :     result->time = ib->time - ia->time;
    2173         684 :     if (!SAMESIGN(ib->time, ia->time) &&
    2174           0 :         !SAMESIGN(result->time, ib->time))
    2175           0 :         ereport(ERROR,
    2176             :                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
    2177             :                  errmsg("interval out of range")));
    2178             : 
    2179             :     /*
    2180             :      * Delta is (fractional) number of days between the intervals. Assume
    2181             :      * months have 30 days for consistency with interval_cmp_internal. We
    2182             :      * don't need to be exact, in the worst case we'll build a bit less
    2183             :      * efficient ranges. But we should not contradict interval_cmp.
    2184             :      */
    2185         684 :     dayfraction = result->time % USECS_PER_DAY;
    2186         684 :     days = result->time / USECS_PER_DAY;
    2187         684 :     days += result->month * INT64CONST(30);
    2188         684 :     days += result->day;
    2189             : 
    2190             :     /* convert to double precision */
    2191         684 :     delta = (double) days + dayfraction / (double) USECS_PER_DAY;
    2192             : 
    2193             :     Assert(delta >= 0);
    2194             : 
    2195         684 :     PG_RETURN_FLOAT8(delta);
    2196             : }
    2197             : 
    2198             : /*
    2199             :  * Compute the distance between two pg_lsn values.
    2200             :  *
    2201             :  * LSN is just an int64 encoding position in the stream, so just subtract
    2202             :  * those int64 values directly.
    2203             :  */
    2204             : Datum
    2205         684 : brin_minmax_multi_distance_pg_lsn(PG_FUNCTION_ARGS)
    2206             : {
    2207         684 :     float8      delta = 0;
    2208             : 
    2209         684 :     XLogRecPtr  lsna = PG_GETARG_LSN(0);
    2210         684 :     XLogRecPtr  lsnb = PG_GETARG_LSN(1);
    2211             : 
    2212         684 :     delta = (lsnb - lsna);
    2213             : 
    2214             :     Assert(delta >= 0);
    2215             : 
    2216         684 :     PG_RETURN_FLOAT8(delta);
    2217             : }
    2218             : 
    2219             : /*
    2220             :  * Compute the distance between two macaddr values.
    2221             :  *
    2222             :  * mac addresses are treated as 6 unsigned chars, so do the same thing we
    2223             :  * already do for UUID values.
    2224             :  */
    2225             : Datum
    2226         524 : brin_minmax_multi_distance_macaddr(PG_FUNCTION_ARGS)
    2227             : {
    2228             :     float8      delta;
    2229             : 
    2230         524 :     macaddr    *a = PG_GETARG_MACADDR_P(0);
    2231         524 :     macaddr    *b = PG_GETARG_MACADDR_P(1);
    2232             : 
    2233         524 :     delta = ((float8) b->f - (float8) a->f);
    2234         524 :     delta /= 256;
    2235             : 
    2236         524 :     delta += ((float8) b->e - (float8) a->e);
    2237         524 :     delta /= 256;
    2238             : 
    2239         524 :     delta += ((float8) b->d - (float8) a->d);
    2240         524 :     delta /= 256;
    2241             : 
    2242         524 :     delta += ((float8) b->c - (float8) a->c);
    2243         524 :     delta /= 256;
    2244             : 
    2245         524 :     delta += ((float8) b->b - (float8) a->b);
    2246         524 :     delta /= 256;
    2247             : 
    2248         524 :     delta += ((float8) b->a - (float8) a->a);
    2249         524 :     delta /= 256;
    2250             : 
    2251             :     Assert(delta >= 0);
    2252             : 
    2253         524 :     PG_RETURN_FLOAT8(delta);
    2254             : }
    2255             : 
    2256             : /*
    2257             :  * Compute the distance between two macaddr8 values.
    2258             :  *
    2259             :  * macaddr8 addresses are 8 unsigned chars, so do the same thing we
    2260             :  * already do for UUID values.
    2261             :  */
    2262             : Datum
    2263         684 : brin_minmax_multi_distance_macaddr8(PG_FUNCTION_ARGS)
    2264             : {
    2265             :     float8      delta;
    2266             : 
    2267         684 :     macaddr8   *a = PG_GETARG_MACADDR8_P(0);
    2268         684 :     macaddr8   *b = PG_GETARG_MACADDR8_P(1);
    2269             : 
    2270         684 :     delta = ((float8) b->h - (float8) a->h);
    2271         684 :     delta /= 256;
    2272             : 
    2273         684 :     delta += ((float8) b->g - (float8) a->g);
    2274         684 :     delta /= 256;
    2275             : 
    2276         684 :     delta += ((float8) b->f - (float8) a->f);
    2277         684 :     delta /= 256;
    2278             : 
    2279         684 :     delta += ((float8) b->e - (float8) a->e);
    2280         684 :     delta /= 256;
    2281             : 
    2282         684 :     delta += ((float8) b->d - (float8) a->d);
    2283         684 :     delta /= 256;
    2284             : 
    2285         684 :     delta += ((float8) b->c - (float8) a->c);
    2286         684 :     delta /= 256;
    2287             : 
    2288         684 :     delta += ((float8) b->b - (float8) a->b);
    2289         684 :     delta /= 256;
    2290             : 
    2291         684 :     delta += ((float8) b->a - (float8) a->a);
    2292         684 :     delta /= 256;
    2293             : 
    2294             :     Assert(delta >= 0);
    2295             : 
    2296         684 :     PG_RETURN_FLOAT8(delta);
    2297             : }
    2298             : 
    2299             : /*
    2300             :  * Compute the distance between two inet values.
    2301             :  *
    2302             :  * The distance is defined as the difference between 32-bit/128-bit values,
    2303             :  * depending on the IP version. The distance is computed by subtracting
    2304             :  * the bytes and normalizing it to [0,1] range for each IP family.
    2305             :  * Addresses from different families are considered to be in maximum
    2306             :  * distance, which is 1.0.
    2307             :  *
    2308             :  * XXX Does this need to consider the mask (bits)?  For now, it's ignored.
    2309             :  */
    2310             : Datum
    2311        1512 : brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS)
    2312             : {
    2313             :     float8      delta;
    2314             :     int         i;
    2315             :     int         len;
    2316             :     unsigned char *addra,
    2317             :                *addrb;
    2318             : 
    2319        1512 :     inet       *ipa = PG_GETARG_INET_PP(0);
    2320        1512 :     inet       *ipb = PG_GETARG_INET_PP(1);
    2321             : 
    2322             :     int         lena,
    2323             :                 lenb;
    2324             : 
    2325             :     /*
    2326             :      * If the addresses are from different families, consider them to be in
    2327             :      * maximal possible distance (which is 1.0).
    2328             :      */
    2329        1512 :     if (ip_family(ipa) != ip_family(ipb))
    2330         120 :         PG_RETURN_FLOAT8(1.0);
    2331             : 
    2332        1392 :     addra = (unsigned char *) palloc(ip_addrsize(ipa));
    2333        1392 :     memcpy(addra, ip_addr(ipa), ip_addrsize(ipa));
    2334             : 
    2335        1392 :     addrb = (unsigned char *) palloc(ip_addrsize(ipb));
    2336        1392 :     memcpy(addrb, ip_addr(ipb), ip_addrsize(ipb));
    2337             : 
    2338             :     /*
    2339             :      * The length is calculated from the mask length, because we sort the
    2340             :      * addresses by first address in the range, so A.B.C.D/24 < A.B.C.1 (the
    2341             :      * first range starts at A.B.C.0, which is before A.B.C.1). We don't want
    2342             :      * to produce a negative delta in this case, so we just cut the extra
    2343             :      * bytes.
    2344             :      *
    2345             :      * XXX Maybe this should be a bit more careful and cut the bits, not just
    2346             :      * whole bytes.
    2347             :      */
    2348        1392 :     lena = ip_bits(ipa);
    2349        1392 :     lenb = ip_bits(ipb);
    2350             : 
    2351        1392 :     len = ip_addrsize(ipa);
    2352             : 
    2353             :     /* apply the network mask to both addresses */
    2354       10512 :     for (i = 0; i < len; i++)
    2355             :     {
    2356             :         unsigned char mask;
    2357             :         int         nbits;
    2358             : 
    2359        9120 :         nbits = lena - (i * 8);
    2360        9120 :         if (nbits < 8)
    2361             :         {
    2362        1084 :             mask = (0xFF << (8 - nbits));
    2363        1084 :             addra[i] = (addra[i] & mask);
    2364             :         }
    2365             : 
    2366        9120 :         nbits = lenb - (i * 8);
    2367        9120 :         if (nbits < 8)
    2368             :         {
    2369        1084 :             mask = (0xFF << (8 - nbits));
    2370        1084 :             addrb[i] = (addrb[i] & mask);
    2371             :         }
    2372             :     }
    2373             : 
    2374             :     /* Calculate the difference between the addresses. */
    2375        1392 :     delta = 0;
    2376       10512 :     for (i = len - 1; i >= 0; i--)
    2377             :     {
    2378        9120 :         unsigned char a = addra[i];
    2379        9120 :         unsigned char b = addrb[i];
    2380             : 
    2381        9120 :         delta += (float8) b - (float8) a;
    2382        9120 :         delta /= 256;
    2383             :     }
    2384             : 
    2385             :     Assert((delta >= 0) && (delta <= 1));
    2386             : 
    2387        1392 :     pfree(addra);
    2388        1392 :     pfree(addrb);
    2389             : 
    2390        1392 :     PG_RETURN_FLOAT8(delta);
    2391             : }
    2392             : 
    2393             : static void
    2394        2972 : brin_minmax_multi_serialize(BrinDesc *bdesc, Datum src, Datum *dst)
    2395             : {
    2396        2972 :     Ranges     *ranges = (Ranges *) DatumGetPointer(src);
    2397             :     SerializedRanges *s;
    2398             : 
    2399             :     /*
    2400             :      * In batch mode, we need to compress the accumulated values to the
    2401             :      * actually requested number of values/ranges.
    2402             :      */
    2403        2972 :     compactify_ranges(bdesc, ranges, ranges->target_maxvalues);
    2404             : 
    2405             :     /* At this point everything has to be fully sorted. */
    2406             :     Assert(ranges->nsorted == ranges->nvalues);
    2407             : 
    2408        2972 :     s = range_serialize(ranges);
    2409        2972 :     dst[0] = PointerGetDatum(s);
    2410        2972 : }
    2411             : 
    2412             : static int
    2413        2964 : brin_minmax_multi_get_values(BrinDesc *bdesc, MinMaxMultiOptions *opts)
    2414             : {
    2415        2964 :     return MinMaxMultiGetValuesPerRange(opts);
    2416             : }
    2417             : 
    2418             : /*
    2419             :  * Examine the given index tuple (which contains the partial status of a
    2420             :  * certain page range) by comparing it to the given value that comes from
    2421             :  * another heap tuple.  If the new value is outside the min/max range
    2422             :  * specified by the existing tuple values, update the index tuple and return
    2423             :  * true.  Otherwise, return false and do not modify in this case.
    2424             :  */
    2425             : Datum
    2426       57064 : brin_minmax_multi_add_value(PG_FUNCTION_ARGS)
    2427             : {
    2428       57064 :     BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
    2429       57064 :     BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
    2430       57064 :     Datum       newval = PG_GETARG_DATUM(2);
    2431       57064 :     bool        isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3);
    2432       57064 :     MinMaxMultiOptions *opts = (MinMaxMultiOptions *) PG_GET_OPCLASS_OPTIONS();
    2433       57064 :     Oid         colloid = PG_GET_COLLATION();
    2434       57064 :     bool        modified = false;
    2435             :     Form_pg_attribute attr;
    2436             :     AttrNumber  attno;
    2437             :     Ranges     *ranges;
    2438       57064 :     SerializedRanges *serialized = NULL;
    2439             : 
    2440             :     Assert(!isnull);
    2441             : 
    2442       57064 :     attno = column->bv_attno;
    2443       57064 :     attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
    2444             : 
    2445             :     /* use the already deserialized value, if possible */
    2446       57064 :     ranges = (Ranges *) DatumGetPointer(column->bv_mem_value);
    2447             : 
    2448             :     /*
    2449             :      * If this is the first non-null value, we need to initialize the range
    2450             :      * list. Otherwise, just extract the existing range list from BrinValues.
    2451             :      *
    2452             :      * When starting with an empty range, we assume this is a batch mode and
    2453             :      * we use a larger buffer. The buffer size is derived from the BRIN range
    2454             :      * size, number of rows per page, with some sensible min/max values. A
    2455             :      * small buffer would be bad for performance, but a large buffer might
    2456             :      * require a lot of memory (because of keeping all the values).
    2457             :      */
    2458       57064 :     if (column->bv_allnulls)
    2459             :     {
    2460             :         MemoryContext oldctx;
    2461             : 
    2462             :         int         target_maxvalues;
    2463             :         int         maxvalues;
    2464        2964 :         BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
    2465             : 
    2466             :         /* what was specified as a reloption? */
    2467        2964 :         target_maxvalues = brin_minmax_multi_get_values(bdesc, opts);
    2468             : 
    2469             :         /*
    2470             :          * Determine the insert buffer size - we use 10x the target, capped to
    2471             :          * the maximum number of values in the heap range. This is more than
    2472             :          * enough, considering the actual number of rows per page is likely
    2473             :          * much lower, but meh.
    2474             :          */
    2475        2964 :         maxvalues = Min(target_maxvalues * MINMAX_BUFFER_FACTOR,
    2476             :                         MaxHeapTuplesPerPage * pagesPerRange);
    2477             : 
    2478             :         /* but always at least the original value */
    2479        2964 :         maxvalues = Max(maxvalues, target_maxvalues);
    2480             : 
    2481             :         /* always cap by MIN/MAX */
    2482        2964 :         maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN);
    2483        2964 :         maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX);
    2484             : 
    2485        2964 :         oldctx = MemoryContextSwitchTo(column->bv_context);
    2486        2964 :         ranges = minmax_multi_init(maxvalues);
    2487        2964 :         ranges->attno = attno;
    2488        2964 :         ranges->colloid = colloid;
    2489        2964 :         ranges->typid = attr->atttypid;
    2490        2964 :         ranges->target_maxvalues = target_maxvalues;
    2491             : 
    2492             :         /* we'll certainly need the comparator, so just look it up now */
    2493        2964 :         ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
    2494             :                                                          BTLessStrategyNumber);
    2495             : 
    2496        2964 :         MemoryContextSwitchTo(oldctx);
    2497             : 
    2498        2964 :         column->bv_allnulls = false;
    2499        2964 :         modified = true;
    2500             : 
    2501        2964 :         column->bv_mem_value = PointerGetDatum(ranges);
    2502        2964 :         column->bv_serialize = brin_minmax_multi_serialize;
    2503             :     }
    2504       54100 :     else if (!ranges)
    2505             :     {
    2506             :         MemoryContext oldctx;
    2507             : 
    2508             :         int         maxvalues;
    2509           8 :         BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
    2510             : 
    2511           8 :         oldctx = MemoryContextSwitchTo(column->bv_context);
    2512             : 
    2513           8 :         serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]);
    2514             : 
    2515             :         /*
    2516             :          * Determine the insert buffer size - we use 10x the target, capped to
    2517             :          * the maximum number of values in the heap range. This is more than
    2518             :          * enough, considering the actual number of rows per page is likely
    2519             :          * much lower, but meh.
    2520             :          */
    2521           8 :         maxvalues = Min(serialized->maxvalues * MINMAX_BUFFER_FACTOR,
    2522             :                         MaxHeapTuplesPerPage * pagesPerRange);
    2523             : 
    2524             :         /* but always at least the original value */
    2525           8 :         maxvalues = Max(maxvalues, serialized->maxvalues);
    2526             : 
    2527             :         /* always cap by MIN/MAX */
    2528           8 :         maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN);
    2529           8 :         maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX);
    2530             : 
    2531           8 :         ranges = range_deserialize(maxvalues, serialized);
    2532             : 
    2533           8 :         ranges->attno = attno;
    2534           8 :         ranges->colloid = colloid;
    2535           8 :         ranges->typid = attr->atttypid;
    2536             : 
    2537             :         /* we'll certainly need the comparator, so just look it up now */
    2538           8 :         ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
    2539             :                                                          BTLessStrategyNumber);
    2540             : 
    2541           8 :         column->bv_mem_value = PointerGetDatum(ranges);
    2542           8 :         column->bv_serialize = brin_minmax_multi_serialize;
    2543             : 
    2544           8 :         MemoryContextSwitchTo(oldctx);
    2545             :     }
    2546             : 
    2547             :     /*
    2548             :      * Try to add the new value to the range. We need to update the modified
    2549             :      * flag, so that we serialize the updated summary later.
    2550             :      */
    2551       57064 :     modified |= range_add_value(bdesc, colloid, attno, attr, ranges, newval);
    2552             : 
    2553             : 
    2554       57064 :     PG_RETURN_BOOL(modified);
    2555             : }
    2556             : 
    2557             : /*
    2558             :  * Given an index tuple corresponding to a certain page range and a scan key,
    2559             :  * return whether the scan key is consistent with the index tuple's min/max
    2560             :  * values.  Return true if so, false otherwise.
    2561             :  */
    2562             : Datum
    2563       19536 : brin_minmax_multi_consistent(PG_FUNCTION_ARGS)
    2564             : {
    2565       19536 :     BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
    2566       19536 :     BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
    2567       19536 :     ScanKey    *keys = (ScanKey *) PG_GETARG_POINTER(2);
    2568       19536 :     int         nkeys = PG_GETARG_INT32(3);
    2569             : 
    2570       19536 :     Oid         colloid = PG_GET_COLLATION(),
    2571             :                 subtype;
    2572             :     AttrNumber  attno;
    2573             :     Datum       value;
    2574             :     FmgrInfo   *finfo;
    2575             :     SerializedRanges *serialized;
    2576             :     Ranges     *ranges;
    2577             :     int         keyno;
    2578             :     int         rangeno;
    2579             :     int         i;
    2580             : 
    2581       19536 :     attno = column->bv_attno;
    2582             : 
    2583       19536 :     serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]);
    2584       19536 :     ranges = range_deserialize(serialized->maxvalues, serialized);
    2585             : 
    2586             :     /* inspect the ranges, and for each one evaluate the scan keys */
    2587       19536 :     for (rangeno = 0; rangeno < ranges->nranges; rangeno++)
    2588             :     {
    2589           0 :         Datum       minval = ranges->values[2 * rangeno];
    2590           0 :         Datum       maxval = ranges->values[2 * rangeno + 1];
    2591             : 
    2592             :         /* assume the range is matching, and we'll try to prove otherwise */
    2593           0 :         bool        matching = true;
    2594             : 
    2595           0 :         for (keyno = 0; keyno < nkeys; keyno++)
    2596             :         {
    2597             :             Datum       matches;
    2598           0 :             ScanKey     key = keys[keyno];
    2599             : 
    2600             :             /* NULL keys are handled and filtered-out in bringetbitmap */
    2601             :             Assert(!(key->sk_flags & SK_ISNULL));
    2602             : 
    2603           0 :             attno = key->sk_attno;
    2604           0 :             subtype = key->sk_subtype;
    2605           0 :             value = key->sk_argument;
    2606           0 :             switch (key->sk_strategy)
    2607             :             {
    2608           0 :                 case BTLessStrategyNumber:
    2609             :                 case BTLessEqualStrategyNumber:
    2610           0 :                     finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
    2611           0 :                                                                key->sk_strategy);
    2612             :                     /* first value from the array */
    2613           0 :                     matches = FunctionCall2Coll(finfo, colloid, minval, value);
    2614           0 :                     break;
    2615             : 
    2616           0 :                 case BTEqualStrategyNumber:
    2617             :                     {
    2618             :                         Datum       compar;
    2619             :                         FmgrInfo   *cmpFn;
    2620             : 
    2621             :                         /* by default this range does not match */
    2622           0 :                         matches = false;
    2623             : 
    2624             :                         /*
    2625             :                          * Otherwise, need to compare the new value with
    2626             :                          * boundaries of all the ranges. First check if it's
    2627             :                          * less than the absolute minimum, which is the first
    2628             :                          * value in the array.
    2629             :                          */
    2630           0 :                         cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
    2631             :                                                                    BTGreaterStrategyNumber);
    2632           0 :                         compar = FunctionCall2Coll(cmpFn, colloid, minval, value);
    2633             : 
    2634             :                         /* smaller than the smallest value in this range */
    2635           0 :                         if (DatumGetBool(compar))
    2636           0 :                             break;
    2637             : 
    2638           0 :                         cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
    2639             :                                                                    BTLessStrategyNumber);
    2640           0 :                         compar = FunctionCall2Coll(cmpFn, colloid, maxval, value);
    2641             : 
    2642             :                         /* larger than the largest value in this range */
    2643           0 :                         if (DatumGetBool(compar))
    2644           0 :                             break;
    2645             : 
    2646             :                         /*
    2647             :                          * We haven't managed to eliminate this range, so
    2648             :                          * consider it matching.
    2649             :                          */
    2650           0 :                         matches = true;
    2651             : 
    2652           0 :                         break;
    2653             :                     }
    2654           0 :                 case BTGreaterEqualStrategyNumber:
    2655             :                 case BTGreaterStrategyNumber:
    2656           0 :                     finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
    2657           0 :                                                                key->sk_strategy);
    2658             :                     /* last value from the array */
    2659           0 :                     matches = FunctionCall2Coll(finfo, colloid, maxval, value);
    2660           0 :                     break;
    2661             : 
    2662           0 :                 default:
    2663             :                     /* shouldn't happen */
    2664           0 :                     elog(ERROR, "invalid strategy number %d", key->sk_strategy);
    2665             :                     matches = 0;
    2666             :                     break;
    2667             :             }
    2668             : 
    2669             :             /* the range has to match all the scan keys */
    2670           0 :             matching &= DatumGetBool(matches);
    2671             : 
    2672             :             /* once we find a non-matching key, we're done */
    2673           0 :             if (!matching)
    2674           0 :                 break;
    2675             :         }
    2676             : 
    2677             :         /*
    2678             :          * have we found a range matching all scan keys? if yes, we're done
    2679             :          */
    2680           0 :         if (matching)
    2681           0 :             PG_RETURN_DATUM(BoolGetDatum(true));
    2682             :     }
    2683             : 
    2684             :     /*
    2685             :      * And now inspect the values. We don't bother with doing a binary search
    2686             :      * here, because we're dealing with serialized / fully compacted ranges,
    2687             :      * so there should be only very few values.
    2688             :      */
    2689       31864 :     for (i = 0; i < ranges->nvalues; i++)
    2690             :     {
    2691       28008 :         Datum       val = ranges->values[2 * ranges->nranges + i];
    2692             : 
    2693             :         /* assume the range is matching, and we'll try to prove otherwise */
    2694       28008 :         bool        matching = true;
    2695             : 
    2696       43688 :         for (keyno = 0; keyno < nkeys; keyno++)
    2697             :         {
    2698             :             Datum       matches;
    2699       28008 :             ScanKey     key = keys[keyno];
    2700             : 
    2701             :             /* we've already dealt with NULL keys at the beginning */
    2702       28008 :             if (key->sk_flags & SK_ISNULL)
    2703           0 :                 continue;
    2704             : 
    2705       28008 :             attno = key->sk_attno;
    2706       28008 :             subtype = key->sk_subtype;
    2707       28008 :             value = key->sk_argument;
    2708       28008 :             switch (key->sk_strategy)
    2709             :             {
    2710       28008 :                 case BTLessStrategyNumber:
    2711             :                 case BTLessEqualStrategyNumber:
    2712             :                 case BTEqualStrategyNumber:
    2713             :                 case BTGreaterEqualStrategyNumber:
    2714             :                 case BTGreaterStrategyNumber:
    2715             : 
    2716       28008 :                     finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
    2717       28008 :                                                                key->sk_strategy);
    2718       28008 :                     matches = FunctionCall2Coll(finfo, colloid, val, value);
    2719       28008 :                     break;
    2720             : 
    2721           0 :                 default:
    2722             :                     /* shouldn't happen */
    2723           0 :                     elog(ERROR, "invalid strategy number %d", key->sk_strategy);
    2724             :                     matches = 0;
    2725             :                     break;
    2726             :             }
    2727             : 
    2728             :             /* the range has to match all the scan keys */
    2729       28008 :             matching &= DatumGetBool(matches);
    2730             : 
    2731             :             /* once we find a non-matching key, we're done */
    2732       28008 :             if (!matching)
    2733       12328 :                 break;
    2734             :         }
    2735             : 
    2736             :         /* have we found a range matching all scan keys? if yes, we're done */
    2737       28008 :         if (matching)
    2738       15680 :             PG_RETURN_DATUM(BoolGetDatum(true));
    2739             :     }
    2740             : 
    2741        3856 :     PG_RETURN_DATUM(BoolGetDatum(false));
    2742             : }
    2743             : 
    2744             : /*
    2745             :  * Given two BrinValues, update the first of them as a union of the summary
    2746             :  * values contained in both.  The second one is untouched.
    2747             :  */
    2748             : Datum
    2749           0 : brin_minmax_multi_union(PG_FUNCTION_ARGS)
    2750             : {
    2751           0 :     BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
    2752           0 :     BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1);
    2753           0 :     BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2);
    2754             : 
    2755           0 :     Oid         colloid = PG_GET_COLLATION();
    2756             :     SerializedRanges *serialized_a;
    2757             :     SerializedRanges *serialized_b;
    2758             :     Ranges     *ranges_a;
    2759             :     Ranges     *ranges_b;
    2760             :     AttrNumber  attno;
    2761             :     Form_pg_attribute attr;
    2762             :     ExpandedRange *eranges;
    2763             :     int         neranges;
    2764             :     FmgrInfo   *cmpFn,
    2765             :                *distanceFn;
    2766             :     DistanceValue *distances;
    2767             :     MemoryContext ctx;
    2768             :     MemoryContext oldctx;
    2769             : 
    2770             :     Assert(col_a->bv_attno == col_b->bv_attno);
    2771             :     Assert(!col_a->bv_allnulls && !col_b->bv_allnulls);
    2772             : 
    2773           0 :     attno = col_a->bv_attno;
    2774           0 :     attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
    2775             : 
    2776           0 :     serialized_a = (SerializedRanges *) PG_DETOAST_DATUM(col_a->bv_values[0]);
    2777           0 :     serialized_b = (SerializedRanges *) PG_DETOAST_DATUM(col_b->bv_values[0]);
    2778             : 
    2779           0 :     ranges_a = range_deserialize(serialized_a->maxvalues, serialized_a);
    2780           0 :     ranges_b = range_deserialize(serialized_b->maxvalues, serialized_b);
    2781             : 
    2782             :     /* make sure neither of the ranges is NULL */
    2783             :     Assert(ranges_a && ranges_b);
    2784             : 
    2785           0 :     neranges = (ranges_a->nranges + ranges_a->nvalues) +
    2786           0 :         (ranges_b->nranges + ranges_b->nvalues);
    2787             : 
    2788             :     /*
    2789             :      * The distanceFn calls (which may internally call e.g. numeric_le) may
    2790             :      * allocate quite a bit of memory, and we must not leak it. Otherwise,
    2791             :      * we'd have problems e.g. when building indexes. So we create a local
    2792             :      * memory context and make sure we free the memory before leaving this
    2793             :      * function (not after every call).
    2794             :      */
    2795           0 :     ctx = AllocSetContextCreate(CurrentMemoryContext,
    2796             :                                 "minmax-multi context",
    2797             :                                 ALLOCSET_DEFAULT_SIZES);
    2798             : 
    2799           0 :     oldctx = MemoryContextSwitchTo(ctx);
    2800             : 
    2801             :     /* allocate and fill */
    2802           0 :     eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange));
    2803             : 
    2804             :     /* fill the expanded ranges with entries for the first range */
    2805           0 :     fill_expanded_ranges(eranges, ranges_a->nranges + ranges_a->nvalues,
    2806             :                          ranges_a);
    2807             : 
    2808             :     /* and now add combine ranges for the second range */
    2809           0 :     fill_expanded_ranges(&eranges[ranges_a->nranges + ranges_a->nvalues],
    2810           0 :                          ranges_b->nranges + ranges_b->nvalues,
    2811             :                          ranges_b);
    2812             : 
    2813           0 :     cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
    2814             :                                                BTLessStrategyNumber);
    2815             : 
    2816             :     /* sort the expanded ranges */
    2817           0 :     neranges = sort_expanded_ranges(cmpFn, colloid, eranges, neranges);
    2818             : 
    2819             :     /*
    2820             :      * We've loaded two different lists of expanded ranges, so some of them
    2821             :      * may be overlapping. So walk through them and merge them.
    2822             :      */
    2823           0 :     neranges = merge_overlapping_ranges(cmpFn, colloid, eranges, neranges);
    2824             : 
    2825             :     /* check that the combine ranges are correct (no overlaps, ordering) */
    2826           0 :     AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
    2827             : 
    2828             :     /*
    2829             :      * If needed, reduce some of the ranges.
    2830             :      *
    2831             :      * XXX This may be fairly expensive, so maybe we should do it only when
    2832             :      * it's actually needed (when we have too many ranges).
    2833             :      */
    2834             : 
    2835             :     /* build array of gap distances and sort them in ascending order */
    2836           0 :     distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE);
    2837           0 :     distances = build_distances(distanceFn, colloid, eranges, neranges);
    2838             : 
    2839             :     /*
    2840             :      * See how many values would be needed to store the current ranges, and if
    2841             :      * needed combine as many of them to get below the threshold. The
    2842             :      * collapsed ranges will be stored as a single value.
    2843             :      *
    2844             :      * XXX This does not apply the load factor, as we don't expect to add more
    2845             :      * values to the range, so we prefer to keep as many ranges as possible.
    2846             :      *
    2847             :      * XXX Can the maxvalues be different in the two ranges? Perhaps we should
    2848             :      * use maximum of those?
    2849             :      */
    2850           0 :     neranges = reduce_expanded_ranges(eranges, neranges, distances,
    2851             :                                       ranges_a->maxvalues,
    2852             :                                       cmpFn, colloid);
    2853             : 
    2854             :     /* update the first range summary */
    2855           0 :     store_expanded_ranges(ranges_a, eranges, neranges);
    2856             : 
    2857           0 :     MemoryContextSwitchTo(oldctx);
    2858           0 :     MemoryContextDelete(ctx);
    2859             : 
    2860             :     /* cleanup and update the serialized value */
    2861           0 :     pfree(serialized_a);
    2862           0 :     col_a->bv_values[0] = PointerGetDatum(range_serialize(ranges_a));
    2863             : 
    2864           0 :     PG_RETURN_VOID();
    2865             : }
    2866             : 
    2867             : /*
    2868             :  * Cache and return minmax multi opclass support procedure
    2869             :  *
    2870             :  * Return the procedure corresponding to the given function support number
    2871             :  * or null if it does not exist.
    2872             :  */
    2873             : static FmgrInfo *
    2874        3132 : minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum)
    2875             : {
    2876             :     MinmaxMultiOpaque *opaque;
    2877        3132 :     uint16      basenum = procnum - PROCNUM_BASE;
    2878             : 
    2879             :     /*
    2880             :      * We cache these in the opaque struct, to avoid repetitive syscache
    2881             :      * lookups.
    2882             :      */
    2883        3132 :     opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
    2884             : 
    2885             :     /*
    2886             :      * If we already searched for this proc and didn't find it, don't bother
    2887             :      * searching again.
    2888             :      */
    2889        3132 :     if (opaque->extra_proc_missing[basenum])
    2890           0 :         return NULL;
    2891             : 
    2892        3132 :     if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid)
    2893             :     {
    2894         252 :         if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno,
    2895             :                                                 procnum)))
    2896             :         {
    2897         252 :             fmgr_info_copy(&opaque->extra_procinfos[basenum],
    2898             :                            index_getprocinfo(bdesc->bd_index, attno, procnum),
    2899             :                            bdesc->bd_context);
    2900             :         }
    2901             :         else
    2902             :         {
    2903           0 :             opaque->extra_proc_missing[basenum] = true;
    2904           0 :             return NULL;
    2905             :         }
    2906             :     }
    2907             : 
    2908        3132 :     return &opaque->extra_procinfos[basenum];
    2909             : }
    2910             : 
    2911             : /*
    2912             :  * Cache and return the procedure for the given strategy.
    2913             :  *
    2914             :  * Note: this function mirrors minmax_multi_get_strategy_procinfo; see notes
    2915             :  * there.  If changes are made here, see that function too.
    2916             :  */
    2917             : static FmgrInfo *
    2918      225680 : minmax_multi_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype,
    2919             :                                    uint16 strategynum)
    2920             : {
    2921             :     MinmaxMultiOpaque *opaque;
    2922             : 
    2923             :     Assert(strategynum >= 1 &&
    2924             :            strategynum <= BTMaxStrategyNumber);
    2925             : 
    2926      225680 :     opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
    2927             : 
    2928             :     /*
    2929             :      * We cache the procedures for the previous subtype in the opaque struct,
    2930             :      * to avoid repetitive syscache lookups.  If the subtype changed,
    2931             :      * invalidate all the cached entries.
    2932             :      */
    2933      225680 :     if (opaque->cached_subtype != subtype)
    2934             :     {
    2935             :         uint16      i;
    2936             : 
    2937        5064 :         for (i = 1; i <= BTMaxStrategyNumber; i++)
    2938        4220 :             opaque->strategy_procinfos[i - 1].fn_oid = InvalidOid;
    2939         844 :         opaque->cached_subtype = subtype;
    2940             :     }
    2941             : 
    2942      225680 :     if (opaque->strategy_procinfos[strategynum - 1].fn_oid == InvalidOid)
    2943             :     {
    2944             :         Form_pg_attribute attr;
    2945             :         HeapTuple   tuple;
    2946             :         Oid         opfamily,
    2947             :                     oprid;
    2948             :         bool        isNull;
    2949             : 
    2950        1100 :         opfamily = bdesc->bd_index->rd_opfamily[attno - 1];
    2951        1100 :         attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
    2952        1100 :         tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(opfamily),
    2953        1100 :                                 ObjectIdGetDatum(attr->atttypid),
    2954             :                                 ObjectIdGetDatum(subtype),
    2955             :                                 Int16GetDatum(strategynum));
    2956        1100 :         if (!HeapTupleIsValid(tuple))
    2957           0 :             elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
    2958             :                  strategynum, attr->atttypid, subtype, opfamily);
    2959             : 
    2960        1100 :         oprid = DatumGetObjectId(SysCacheGetAttr(AMOPSTRATEGY, tuple,
    2961             :                                                  Anum_pg_amop_amopopr, &isNull));
    2962        1100 :         ReleaseSysCache(tuple);
    2963             :         Assert(!isNull && RegProcedureIsValid(oprid));
    2964             : 
    2965        1100 :         fmgr_info_cxt(get_opcode(oprid),
    2966        1100 :                       &opaque->strategy_procinfos[strategynum - 1],
    2967             :                       bdesc->bd_context);
    2968             :     }
    2969             : 
    2970      225680 :     return &opaque->strategy_procinfos[strategynum - 1];
    2971             : }
    2972             : 
    2973             : Datum
    2974         464 : brin_minmax_multi_options(PG_FUNCTION_ARGS)
    2975             : {
    2976         464 :     local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
    2977             : 
    2978         464 :     init_local_reloptions(relopts, sizeof(MinMaxMultiOptions));
    2979             : 
    2980         464 :     add_local_int_reloption(relopts, "values_per_range", "desc",
    2981             :                             MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE, 8, 256,
    2982             :                             offsetof(MinMaxMultiOptions, valuesPerRange));
    2983             : 
    2984         464 :     PG_RETURN_VOID();
    2985             : }
    2986             : 
    2987             : /*
    2988             :  * brin_minmax_multi_summary_in
    2989             :  *      - input routine for type brin_minmax_multi_summary.
    2990             :  *
    2991             :  * brin_minmax_multi_summary is only used internally to represent summaries
    2992             :  * in BRIN minmax-multi indexes, so it has no operations of its own, and we
    2993             :  * disallow input too.
    2994             :  */
    2995             : Datum
    2996           0 : brin_minmax_multi_summary_in(PG_FUNCTION_ARGS)
    2997             : {
    2998             :     /*
    2999             :      * brin_minmax_multi_summary stores the data in binary form and parsing
    3000             :      * text input is not needed, so disallow this.
    3001             :      */
    3002           0 :     ereport(ERROR,
    3003             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    3004             :              errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary")));
    3005             : 
    3006             :     PG_RETURN_VOID();           /* keep compiler quiet */
    3007             : }
    3008             : 
    3009             : 
    3010             : /*
    3011             :  * brin_minmax_multi_summary_out
    3012             :  *      - output routine for type brin_minmax_multi_summary.
    3013             :  *
    3014             :  * BRIN minmax-multi summaries are serialized into a bytea value, but we
    3015             :  * want to output something nicer humans can understand.
    3016             :  */
    3017             : Datum
    3018           0 : brin_minmax_multi_summary_out(PG_FUNCTION_ARGS)
    3019             : {
    3020             :     int         i;
    3021             :     int         idx;
    3022             :     SerializedRanges *ranges;
    3023             :     Ranges     *ranges_deserialized;
    3024             :     StringInfoData str;
    3025             :     bool        isvarlena;
    3026             :     Oid         outfunc;
    3027             :     FmgrInfo    fmgrinfo;
    3028           0 :     ArrayBuildState *astate_values = NULL;
    3029             : 
    3030           0 :     initStringInfo(&str);
    3031           0 :     appendStringInfoChar(&str, '{');
    3032             : 
    3033             :     /*
    3034             :      * Detoast to get value with full 4B header (can't be stored in a toast
    3035             :      * table, but can use 1B header).
    3036             :      */
    3037           0 :     ranges = (SerializedRanges *) PG_DETOAST_DATUM(PG_GETARG_BYTEA_PP(0));
    3038             : 
    3039             :     /* lookup output func for the type */
    3040           0 :     getTypeOutputInfo(ranges->typid, &outfunc, &isvarlena);
    3041           0 :     fmgr_info(outfunc, &fmgrinfo);
    3042             : 
    3043             :     /* deserialize the range info easy-to-process pieces */
    3044           0 :     ranges_deserialized = range_deserialize(ranges->maxvalues, ranges);
    3045             : 
    3046           0 :     appendStringInfo(&str, "nranges: %d  nvalues: %d  maxvalues: %d",
    3047             :                      ranges_deserialized->nranges,
    3048             :                      ranges_deserialized->nvalues,
    3049             :                      ranges_deserialized->maxvalues);
    3050             : 
    3051             :     /* serialize ranges */
    3052           0 :     idx = 0;
    3053           0 :     for (i = 0; i < ranges_deserialized->nranges; i++)
    3054             :     {
    3055             :         char       *a,
    3056             :                    *b;
    3057             :         text       *c;
    3058             :         StringInfoData str;
    3059             : 
    3060           0 :         initStringInfo(&str);
    3061             : 
    3062           0 :         a = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]);
    3063           0 :         b = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]);
    3064             : 
    3065           0 :         appendStringInfo(&str, "%s ... %s", a, b);
    3066             : 
    3067           0 :         c = cstring_to_text(str.data);
    3068             : 
    3069           0 :         astate_values = accumArrayResult(astate_values,
    3070             :                                          PointerGetDatum(c),
    3071             :                                          false,
    3072             :                                          TEXTOID,
    3073             :                                          CurrentMemoryContext);
    3074             :     }
    3075             : 
    3076           0 :     if (ranges_deserialized->nranges > 0)
    3077             :     {
    3078             :         Oid         typoutput;
    3079             :         bool        typIsVarlena;
    3080             :         Datum       val;
    3081             :         char       *extval;
    3082             : 
    3083           0 :         getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena);
    3084             : 
    3085           0 :         val = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext));
    3086             : 
    3087           0 :         extval = OidOutputFunctionCall(typoutput, val);
    3088             : 
    3089           0 :         appendStringInfo(&str, " ranges: %s", extval);
    3090             :     }
    3091             : 
    3092             :     /* serialize individual values */
    3093           0 :     astate_values = NULL;
    3094             : 
    3095           0 :     for (i = 0; i < ranges_deserialized->nvalues; i++)
    3096             :     {
    3097             :         Datum       a;
    3098             :         text       *b;
    3099             :         StringInfoData str;
    3100             : 
    3101           0 :         initStringInfo(&str);
    3102             : 
    3103           0 :         a = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]);
    3104             : 
    3105           0 :         appendStringInfoString(&str, DatumGetCString(a));
    3106             : 
    3107           0 :         b = cstring_to_text(str.data);
    3108             : 
    3109           0 :         astate_values = accumArrayResult(astate_values,
    3110             :                                          PointerGetDatum(b),
    3111             :                                          false,
    3112             :                                          TEXTOID,
    3113             :                                          CurrentMemoryContext);
    3114             :     }
    3115             : 
    3116           0 :     if (ranges_deserialized->nvalues > 0)
    3117             :     {
    3118             :         Oid         typoutput;
    3119             :         bool        typIsVarlena;
    3120             :         Datum       val;
    3121             :         char       *extval;
    3122             : 
    3123           0 :         getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena);
    3124             : 
    3125           0 :         val = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext));
    3126             : 
    3127           0 :         extval = OidOutputFunctionCall(typoutput, val);
    3128             : 
    3129           0 :         appendStringInfo(&str, " values: %s", extval);
    3130             :     }
    3131             : 
    3132             : 
    3133           0 :     appendStringInfoChar(&str, '}');
    3134             : 
    3135           0 :     PG_RETURN_CSTRING(str.data);
    3136             : }
    3137             : 
    3138             : /*
    3139             :  * brin_minmax_multi_summary_recv
    3140             :  *      - binary input routine for type brin_minmax_multi_summary.
    3141             :  */
    3142             : Datum
    3143           0 : brin_minmax_multi_summary_recv(PG_FUNCTION_ARGS)
    3144             : {
    3145           0 :     ereport(ERROR,
    3146             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    3147             :              errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary")));
    3148             : 
    3149             :     PG_RETURN_VOID();           /* keep compiler quiet */
    3150             : }
    3151             : 
    3152             : /*
    3153             :  * brin_minmax_multi_summary_send
    3154             :  *      - binary output routine for type brin_minmax_multi_summary.
    3155             :  *
    3156             :  * BRIN minmax-multi summaries are serialized in a bytea value (although
    3157             :  * the type is named differently), so let's just send that.
    3158             :  */
    3159             : Datum
    3160           0 : brin_minmax_multi_summary_send(PG_FUNCTION_ARGS)
    3161             : {
    3162           0 :     return byteasend(fcinfo);
    3163             : }

Generated by: LCOV version 1.14