LCOV - code coverage report
Current view: top level - src/backend/access/brin - brin_minmax_multi.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 91.9 % 753 692
Test Date: 2026-03-01 00:15:48 Functions: 94.1 % 51 48
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*
       2              :  * brin_minmax_multi.c
       3              :  *      Implementation of Multi Min/Max opclass for BRIN
       4              :  *
       5              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       6              :  * Portions Copyright (c) 1994, Regents of the University of California
       7              :  *
       8              :  *
       9              :  * Implements a variant of minmax opclass, where the summary is composed of
      10              :  * multiple smaller intervals. This allows us to handle outliers, which
      11              :  * usually make the simple minmax opclass inefficient.
      12              :  *
      13              :  * Consider for example page range with simple minmax interval [1000,2000],
      14              :  * and assume a new row gets inserted into the range with value 1000000.
      15              :  * Due to that the interval gets [1000,1000000]. I.e. the minmax interval
      16              :  * got 1000x wider and won't be useful to eliminate scan keys between 2001
      17              :  * and 1000000.
      18              :  *
      19              :  * With minmax-multi opclass, we may have [1000,2000] interval initially,
      20              :  * but after adding the new row we start tracking it as two interval:
      21              :  *
      22              :  *   [1000,2000] and [1000000,1000000]
      23              :  *
      24              :  * This allows us to still eliminate the page range when the scan keys hit
      25              :  * the gap between 2000 and 1000000, making it useful in cases when the
      26              :  * simple minmax opclass gets inefficient.
      27              :  *
      28              :  * The number of intervals tracked per page range is somewhat flexible.
      29              :  * What is restricted is the number of values per page range, and the limit
      30              :  * is currently 32 (see values_per_range reloption). Collapsed intervals
      31              :  * (with equal minimum and maximum value) are stored as a single value,
      32              :  * while regular intervals require two values.
      33              :  *
      34              :  * When the number of values gets too high (by adding new values to the
      35              :  * summary), we merge some of the intervals to free space for more values.
      36              :  * This is done in a greedy way - we simply pick the two closest intervals,
      37              :  * merge them, and repeat this until the number of values to store gets
      38              :  * sufficiently low (below 50% of maximum values), but that is mostly
      39              :  * arbitrary threshold and may be changed easily).
      40              :  *
      41              :  * To pick the closest intervals we use the "distance" support procedure,
      42              :  * which measures space between two ranges (i.e. the length of an interval).
      43              :  * The computed value may be an approximation - in the worst case we will
      44              :  * merge two ranges that are slightly less optimal at that step, but the
      45              :  * index should still produce correct results.
      46              :  *
      47              :  * The compactions (reducing the number of values) is fairly expensive, as
      48              :  * it requires calling the distance functions, sorting etc. So when building
      49              :  * the summary, we use a significantly larger buffer, and only enforce the
      50              :  * exact limit at the very end. This improves performance, and it also helps
      51              :  * with building better ranges (due to the greedy approach).
      52              :  *
      53              :  *
      54              :  * IDENTIFICATION
      55              :  *    src/backend/access/brin/brin_minmax_multi.c
      56              :  */
      57              : #include "postgres.h"
      58              : 
      59              : /* needed for PGSQL_AF_INET */
      60              : #include <sys/socket.h>
      61              : 
      62              : #include "access/brin.h"
      63              : #include "access/brin_internal.h"
      64              : #include "access/brin_tuple.h"
      65              : #include "access/genam.h"
      66              : #include "access/htup_details.h"
      67              : #include "access/reloptions.h"
      68              : #include "access/stratnum.h"
      69              : #include "catalog/pg_am.h"
      70              : #include "catalog/pg_amop.h"
      71              : #include "catalog/pg_type.h"
      72              : #include "utils/array.h"
      73              : #include "utils/builtins.h"
      74              : #include "utils/date.h"
      75              : #include "utils/datum.h"
      76              : #include "utils/float.h"
      77              : #include "utils/inet.h"
      78              : #include "utils/lsyscache.h"
      79              : #include "utils/memutils.h"
      80              : #include "utils/pg_lsn.h"
      81              : #include "utils/rel.h"
      82              : #include "utils/syscache.h"
      83              : #include "utils/timestamp.h"
      84              : #include "utils/uuid.h"
      85              : 
      86              : /*
      87              :  * Additional SQL level support functions
      88              :  *
      89              :  * Procedure numbers must not use values reserved for BRIN itself; see
      90              :  * brin_internal.h.
      91              :  */
      92              : #define     MINMAX_MAX_PROCNUMS     1   /* maximum support procs we need */
      93              : #define     PROCNUM_DISTANCE        11  /* required, distance between values */
      94              : 
      95              : /*
      96              :  * Subtract this from procnum to obtain index in MinmaxMultiOpaque arrays
      97              :  * (Must be equal to minimum of private procnums).
      98              :  */
      99              : #define     PROCNUM_BASE            11
     100              : 
     101              : /*
     102              :  * Sizing the insert buffer - we use 10x the number of values specified
     103              :  * in the reloption, but we cap it to 8192 not to get too large. When
     104              :  * the buffer gets full, we reduce the number of values by half.
     105              :  */
     106              : #define     MINMAX_BUFFER_FACTOR            10
     107              : #define     MINMAX_BUFFER_MIN               256
     108              : #define     MINMAX_BUFFER_MAX               8192
     109              : #define     MINMAX_BUFFER_LOAD_FACTOR       0.5
     110              : 
     111              : typedef struct MinmaxMultiOpaque
     112              : {
     113              :     FmgrInfo    extra_procinfos[MINMAX_MAX_PROCNUMS];
     114              :     Oid         cached_subtype;
     115              :     FmgrInfo    strategy_procinfos[BTMaxStrategyNumber];
     116              : } MinmaxMultiOpaque;
     117              : 
     118              : /*
     119              :  * Storage type for BRIN's minmax reloptions
     120              :  */
     121              : typedef struct MinMaxMultiOptions
     122              : {
     123              :     int32       vl_len_;        /* varlena header (do not touch directly!) */
     124              :     int         valuesPerRange; /* number of values per range */
     125              : } MinMaxMultiOptions;
     126              : 
     127              : #define MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE        32
     128              : 
     129              : #define MinMaxMultiGetValuesPerRange(opts) \
     130              :         ((opts) && (((MinMaxMultiOptions *) (opts))->valuesPerRange != 0) ? \
     131              :          ((MinMaxMultiOptions *) (opts))->valuesPerRange : \
     132              :          MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE)
     133              : 
     134              : #define SAMESIGN(a,b) (((a) < 0) == ((b) < 0))
     135              : 
     136              : /*
     137              :  * The summary of minmax-multi indexes has two representations - Ranges for
     138              :  * convenient processing, and SerializedRanges for storage in bytea value.
     139              :  *
     140              :  * The Ranges struct stores the boundary values in a single array, but we
     141              :  * treat regular and single-point ranges differently to save space. For
     142              :  * regular ranges (with different boundary values) we have to store both
     143              :  * the lower and upper bound of the range, while for "single-point ranges"
     144              :  * we only need to store a single value.
     145              :  *
     146              :  * The 'values' array stores boundary values for regular ranges first (there
     147              :  * are 2*nranges values to store), and then the nvalues boundary values for
     148              :  * single-point ranges. That is, we have (2*nranges + nvalues) boundary
     149              :  * values in the array.
     150              :  *
     151              :  * +-------------------------+----------------------------------+
     152              :  * | ranges (2 * nranges of) | single point values (nvalues of) |
     153              :  * +-------------------------+----------------------------------+
     154              :  *
     155              :  * This allows us to quickly add new values, and store outliers without
     156              :  * having to widen any of the existing range values.
     157              :  *
     158              :  * 'nsorted' denotes how many of 'nvalues' in the values[] array are sorted.
     159              :  * When nsorted == nvalues, all single point values are sorted.
     160              :  *
     161              :  * We never store more than maxvalues values (as set by values_per_range
     162              :  * reloption). If needed we merge some of the ranges.
     163              :  *
     164              :  * To minimize palloc overhead, we always allocate the full array with
     165              :  * space for maxvalues elements. This should be fine as long as the
     166              :  * maxvalues is reasonably small (64 seems fine), which is the case
     167              :  * thanks to values_per_range reloption being limited to 256.
     168              :  */
     169              : typedef struct Ranges
     170              : {
     171              :     /* Cache information that we need quite often. */
     172              :     Oid         typid;
     173              :     Oid         colloid;
     174              :     AttrNumber  attno;
     175              :     FmgrInfo   *cmp;
     176              : 
     177              :     /* (2*nranges + nvalues) <= maxvalues */
     178              :     int         nranges;        /* number of ranges in the values[] array */
     179              :     int         nsorted;        /* number of nvalues which are sorted */
     180              :     int         nvalues;        /* number of point values in values[] array */
     181              :     int         maxvalues;      /* number of elements in the values[] array */
     182              : 
     183              :     /*
     184              :      * We simply add the values into a large buffer, without any expensive
     185              :      * steps (sorting, deduplication, ...). The buffer is a multiple of the
     186              :      * target number of values, so the compaction happens less often,
     187              :      * amortizing the costs. We keep the actual target and compact to the
     188              :      * requested number of values at the very end, before serializing to
     189              :      * on-disk representation.
     190              :      */
     191              :     /* requested number of values */
     192              :     int         target_maxvalues;
     193              : 
     194              :     /* values stored for this range - either raw values, or ranges */
     195              :     Datum       values[FLEXIBLE_ARRAY_MEMBER];
     196              : } Ranges;
     197              : 
     198              : /*
     199              :  * On-disk the summary is stored as a bytea value, with a simple header
     200              :  * with basic metadata, followed by the boundary values. It has a varlena
     201              :  * header, so can be treated as varlena directly.
     202              :  *
     203              :  * See brin_range_serialize/brin_range_deserialize for serialization details.
     204              :  */
     205              : typedef struct SerializedRanges
     206              : {
     207              :     /* varlena header (do not touch directly!) */
     208              :     int32       vl_len_;
     209              : 
     210              :     /* type of values stored in the data array */
     211              :     Oid         typid;
     212              : 
     213              :     /* (2*nranges + nvalues) <= maxvalues */
     214              :     int         nranges;        /* number of ranges in the array (stored) */
     215              :     int         nvalues;        /* number of values in the data array (all) */
     216              :     int         maxvalues;      /* maximum number of values (reloption) */
     217              : 
     218              :     /* contains the actual data */
     219              :     char        data[FLEXIBLE_ARRAY_MEMBER];
     220              : } SerializedRanges;
     221              : 
     222              : static SerializedRanges *brin_range_serialize(Ranges *range);
     223              : 
     224              : static Ranges *brin_range_deserialize(int maxvalues,
     225              :                                       SerializedRanges *serialized);
     226              : 
     227              : 
     228              : /*
     229              :  * Used to represent ranges expanded to make merging and combining easier.
     230              :  *
     231              :  * Each expanded range is essentially an interval, represented by min/max
     232              :  * values, along with a flag whether it's a collapsed range (in which case
     233              :  * the min and max values are equal). We have the flag to handle by-ref
     234              :  * data types - we can't simply compare the datums, and this saves some
     235              :  * calls to the type-specific comparator function.
     236              :  */
     237              : typedef struct ExpandedRange
     238              : {
     239              :     Datum       minval;         /* lower boundary */
     240              :     Datum       maxval;         /* upper boundary */
     241              :     bool        collapsed;      /* true if minval==maxval */
     242              : } ExpandedRange;
     243              : 
     244              : /*
     245              :  * Represents a distance between two ranges (identified by index into
     246              :  * an array of extended ranges).
     247              :  */
     248              : typedef struct DistanceValue
     249              : {
     250              :     int         index;
     251              :     double      value;
     252              : } DistanceValue;
     253              : 
     254              : 
     255              : /* Cache for support and strategy procedures. */
     256              : 
     257              : static FmgrInfo *minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno,
     258              :                                            uint16 procnum);
     259              : 
     260              : static FmgrInfo *minmax_multi_get_strategy_procinfo(BrinDesc *bdesc,
     261              :                                                     uint16 attno, Oid subtype,
     262              :                                                     uint16 strategynum);
     263              : 
     264              : typedef struct compare_context
     265              : {
     266              :     FmgrInfo   *cmpFn;
     267              :     Oid         colloid;
     268              : } compare_context;
     269              : 
     270              : static int  compare_values(const void *a, const void *b, void *arg);
     271              : 
     272              : 
     273              : #ifdef USE_ASSERT_CHECKING
     274              : /*
     275              :  * Check that the order of the array values is correct, using the cmp
     276              :  * function (which should be BTLessStrategyNumber).
     277              :  */
     278              : static void
     279              : AssertArrayOrder(FmgrInfo *cmp, Oid colloid, const Datum *values, int nvalues)
     280              : {
     281              :     int         i;
     282              :     Datum       lt;
     283              : 
     284              :     for (i = 0; i < (nvalues - 1); i++)
     285              :     {
     286              :         lt = FunctionCall2Coll(cmp, colloid, values[i], values[i + 1]);
     287              :         Assert(DatumGetBool(lt));
     288              :     }
     289              : }
     290              : #endif
     291              : 
     292              : /*
     293              :  * Comprehensive check of the Ranges structure.
     294              :  */
     295              : static void
     296       135914 : AssertCheckRanges(Ranges *ranges, FmgrInfo *cmpFn, Oid colloid)
     297              : {
     298              : #ifdef USE_ASSERT_CHECKING
     299              :     int         i;
     300              : 
     301              :     /* some basic sanity checks */
     302              :     Assert(ranges->nranges >= 0);
     303              :     Assert(ranges->nsorted >= 0);
     304              :     Assert(ranges->nvalues >= ranges->nsorted);
     305              :     Assert(ranges->maxvalues >= 2 * ranges->nranges + ranges->nvalues);
     306              :     Assert(ranges->typid != InvalidOid);
     307              : 
     308              :     /*
     309              :      * First the ranges - there are 2*nranges boundary values, and the values
     310              :      * have to be strictly ordered (equal values would mean the range is
     311              :      * collapsed, and should be stored as a point). This also guarantees that
     312              :      * the ranges do not overlap.
     313              :      */
     314              :     AssertArrayOrder(cmpFn, colloid, ranges->values, 2 * ranges->nranges);
     315              : 
     316              :     /* then the single-point ranges (with nvalues boundary values ) */
     317              :     AssertArrayOrder(cmpFn, colloid, &ranges->values[2 * ranges->nranges],
     318              :                      ranges->nsorted);
     319              : 
     320              :     /*
     321              :      * Check that none of the values are not covered by ranges (both sorted
     322              :      * and unsorted)
     323              :      */
     324              :     if (ranges->nranges > 0)
     325              :     {
     326              :         for (i = 0; i < ranges->nvalues; i++)
     327              :         {
     328              :             Datum       compar;
     329              :             int         start,
     330              :                         end;
     331              :             Datum       minvalue = ranges->values[0];
     332              :             Datum       maxvalue = ranges->values[2 * ranges->nranges - 1];
     333              :             Datum       value = ranges->values[2 * ranges->nranges + i];
     334              : 
     335              :             compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue);
     336              : 
     337              :             /*
     338              :              * If the value is smaller than the lower bound in the first range
     339              :              * then it cannot possibly be in any of the ranges.
     340              :              */
     341              :             if (DatumGetBool(compar))
     342              :                 continue;
     343              : 
     344              :             compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value);
     345              : 
     346              :             /*
     347              :              * Likewise, if the value is larger than the upper bound of the
     348              :              * final range, then it cannot possibly be inside any of the
     349              :              * ranges.
     350              :              */
     351              :             if (DatumGetBool(compar))
     352              :                 continue;
     353              : 
     354              :             /* bsearch the ranges to see if 'value' fits within any of them */
     355              :             start = 0;          /* first range */
     356              :             end = ranges->nranges - 1;   /* last range */
     357              :             while (true)
     358              :             {
     359              :                 int         midpoint = (start + end) / 2;
     360              : 
     361              :                 /* this means we ran out of ranges in the last step */
     362              :                 if (start > end)
     363              :                     break;
     364              : 
     365              :                 /* copy the min/max values from the ranges */
     366              :                 minvalue = ranges->values[2 * midpoint];
     367              :                 maxvalue = ranges->values[2 * midpoint + 1];
     368              : 
     369              :                 /*
     370              :                  * Is the value smaller than the minval? If yes, we'll recurse
     371              :                  * to the left side of range array.
     372              :                  */
     373              :                 compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue);
     374              : 
     375              :                 /* smaller than the smallest value in this range */
     376              :                 if (DatumGetBool(compar))
     377              :                 {
     378              :                     end = (midpoint - 1);
     379              :                     continue;
     380              :                 }
     381              : 
     382              :                 /*
     383              :                  * Is the value greater than the minval? If yes, we'll recurse
     384              :                  * to the right side of range array.
     385              :                  */
     386              :                 compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value);
     387              : 
     388              :                 /* larger than the largest value in this range */
     389              :                 if (DatumGetBool(compar))
     390              :                 {
     391              :                     start = (midpoint + 1);
     392              :                     continue;
     393              :                 }
     394              : 
     395              :                 /* hey, we found a matching range */
     396              :                 Assert(false);
     397              :             }
     398              :         }
     399              :     }
     400              : 
     401              :     /* and values in the unsorted part must not be in the sorted part */
     402              :     if (ranges->nsorted > 0)
     403              :     {
     404              :         compare_context cxt;
     405              : 
     406              :         cxt.colloid = ranges->colloid;
     407              :         cxt.cmpFn = ranges->cmp;
     408              : 
     409              :         for (i = ranges->nsorted; i < ranges->nvalues; i++)
     410              :         {
     411              :             Datum       value = ranges->values[2 * ranges->nranges + i];
     412              : 
     413              :             Assert(bsearch_arg(&value, &ranges->values[2 * ranges->nranges],
     414              :                                ranges->nsorted, sizeof(Datum),
     415              :                                compare_values, &cxt) == NULL);
     416              :         }
     417              :     }
     418              : #endif
     419       135914 : }
     420              : 
     421              : /*
     422              :  * Check that the expanded ranges (built when reducing the number of ranges
     423              :  * by combining some of them) are correctly sorted and do not overlap.
     424              :  */
     425              : static void
     426          258 : AssertCheckExpandedRanges(BrinDesc *bdesc, Oid colloid, AttrNumber attno,
     427              :                           Form_pg_attribute attr, ExpandedRange *ranges,
     428              :                           int nranges)
     429              : {
     430              : #ifdef USE_ASSERT_CHECKING
     431              :     int         i;
     432              :     FmgrInfo   *eq;
     433              :     FmgrInfo   *lt;
     434              : 
     435              :     eq = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
     436              :                                             BTEqualStrategyNumber);
     437              : 
     438              :     lt = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
     439              :                                             BTLessStrategyNumber);
     440              : 
     441              :     /*
     442              :      * Each range independently should be valid, i.e. that for the boundary
     443              :      * values (lower <= upper).
     444              :      */
     445              :     for (i = 0; i < nranges; i++)
     446              :     {
     447              :         Datum       r;
     448              :         Datum       minval = ranges[i].minval;
     449              :         Datum       maxval = ranges[i].maxval;
     450              : 
     451              :         if (ranges[i].collapsed)    /* collapsed: minval == maxval */
     452              :             r = FunctionCall2Coll(eq, colloid, minval, maxval);
     453              :         else                    /* non-collapsed: minval < maxval */
     454              :             r = FunctionCall2Coll(lt, colloid, minval, maxval);
     455              : 
     456              :         Assert(DatumGetBool(r));
     457              :     }
     458              : 
     459              :     /*
     460              :      * And the ranges should be ordered and must not overlap, i.e. upper <
     461              :      * lower for boundaries of consecutive ranges.
     462              :      */
     463              :     for (i = 0; i < nranges - 1; i++)
     464              :     {
     465              :         Datum       r;
     466              :         Datum       maxval = ranges[i].maxval;
     467              :         Datum       minval = ranges[i + 1].minval;
     468              : 
     469              :         r = FunctionCall2Coll(lt, colloid, maxval, minval);
     470              : 
     471              :         Assert(DatumGetBool(r));
     472              :     }
     473              : #endif
     474          258 : }
     475              : 
     476              : 
     477              : /*
     478              :  * minmax_multi_init
     479              :  *      Initialize the deserialized range list, allocate all the memory.
     480              :  *
     481              :  * This is only in-memory representation of the ranges, so we allocate
     482              :  * enough space for the maximum number of values (so as not to have to do
     483              :  * repallocs as the ranges grow).
     484              :  */
     485              : static Ranges *
     486        25419 : minmax_multi_init(int maxvalues)
     487              : {
     488              :     Size        len;
     489              :     Ranges     *ranges;
     490              : 
     491              :     Assert(maxvalues > 0);
     492              : 
     493        25419 :     len = offsetof(Ranges, values); /* fixed header */
     494        25419 :     len += maxvalues * sizeof(Datum);   /* Datum values */
     495              : 
     496        25419 :     ranges = (Ranges *) palloc0(len);
     497              : 
     498        25419 :     ranges->maxvalues = maxvalues;
     499              : 
     500        25419 :     return ranges;
     501              : }
     502              : 
     503              : 
     504              : /*
     505              :  * range_deduplicate_values
     506              :  *      Deduplicate the part with values in the simple points.
     507              :  *
     508              :  * This is meant to be a cheaper way of reducing the size of the ranges. It
     509              :  * does not touch the ranges, and only sorts the other values - it does not
     510              :  * call the distance functions, which may be quite expensive, etc.
     511              :  *
     512              :  * We do know the values are not duplicate with the ranges, because we check
     513              :  * that before adding a new value. Same for the sorted part of values.
     514              :  */
     515              : static void
     516         9186 : range_deduplicate_values(Ranges *range)
     517              : {
     518              :     int         i,
     519              :                 n;
     520              :     int         start;
     521              :     compare_context cxt;
     522              : 
     523              :     /*
     524              :      * If there are no unsorted values, we're done (this probably can't
     525              :      * happen, as we're adding values to unsorted part).
     526              :      */
     527         9186 :     if (range->nsorted == range->nvalues)
     528         9063 :         return;
     529              : 
     530              :     /* sort the values */
     531          123 :     cxt.colloid = range->colloid;
     532          123 :     cxt.cmpFn = range->cmp;
     533              : 
     534              :     /* the values start right after the ranges (which are always sorted) */
     535          123 :     start = 2 * range->nranges;
     536              : 
     537              :     /*
     538              :      * XXX This might do a merge sort, to leverage that the first part of the
     539              :      * array is already sorted. If the sorted part is large, it might be quite
     540              :      * a bit faster.
     541              :      */
     542          123 :     qsort_arg(&range->values[start],
     543          123 :               range->nvalues, sizeof(Datum),
     544              :               compare_values, &cxt);
     545              : 
     546          123 :     n = 1;
     547        39120 :     for (i = 1; i < range->nvalues; i++)
     548              :     {
     549              :         /* same as preceding value, so store it */
     550        38997 :         if (compare_values(&range->values[start + i - 1],
     551        38997 :                            &range->values[start + i],
     552              :                            &cxt) == 0)
     553            0 :             continue;
     554              : 
     555        38997 :         range->values[start + n] = range->values[start + i];
     556              : 
     557        38997 :         n++;
     558              :     }
     559              : 
     560              :     /* now all the values are sorted */
     561          123 :     range->nvalues = n;
     562          123 :     range->nsorted = n;
     563              : 
     564          123 :     AssertCheckRanges(range, range->cmp, range->colloid);
     565              : }
     566              : 
     567              : 
     568              : /*
     569              :  * brin_range_serialize
     570              :  *    Serialize the in-memory representation into a compact varlena value.
     571              :  *
     572              :  * Simply copy the header and then also the individual values, as stored
     573              :  * in the in-memory value array.
     574              :  */
     575              : static SerializedRanges *
     576         9063 : brin_range_serialize(Ranges *range)
     577              : {
     578              :     Size        len;
     579              :     int         nvalues;
     580              :     SerializedRanges *serialized;
     581              :     Oid         typid;
     582              :     int         typlen;
     583              :     bool        typbyval;
     584              : 
     585              :     char       *ptr;
     586              : 
     587              :     /* simple sanity checks */
     588              :     Assert(range->nranges >= 0);
     589              :     Assert(range->nsorted >= 0);
     590              :     Assert(range->nvalues >= 0);
     591              :     Assert(range->maxvalues > 0);
     592              :     Assert(range->target_maxvalues > 0);
     593              : 
     594              :     /* at this point the range should be compacted to the target size */
     595              :     Assert(2 * range->nranges + range->nvalues <= range->target_maxvalues);
     596              : 
     597              :     Assert(range->target_maxvalues <= range->maxvalues);
     598              : 
     599              :     /* range boundaries are always sorted */
     600              :     Assert(range->nvalues >= range->nsorted);
     601              : 
     602              :     /* deduplicate values, if there's unsorted part */
     603         9063 :     range_deduplicate_values(range);
     604              : 
     605              :     /* see how many Datum values we actually have */
     606         9063 :     nvalues = 2 * range->nranges + range->nvalues;
     607              : 
     608         9063 :     typid = range->typid;
     609         9063 :     typbyval = get_typbyval(typid);
     610         9063 :     typlen = get_typlen(typid);
     611              : 
     612              :     /* header is always needed */
     613         9063 :     len = offsetof(SerializedRanges, data);
     614              : 
     615              :     /*
     616              :      * The space needed depends on data type - for fixed-length data types
     617              :      * (by-value and some by-reference) it's pretty simple, just multiply
     618              :      * (attlen * nvalues) and we're done. For variable-length by-reference
     619              :      * types we need to actually walk all the values and sum the lengths.
     620              :      */
     621         9063 :     if (typlen == -1)           /* varlena */
     622              :     {
     623              :         int         i;
     624              : 
     625         5976 :         for (i = 0; i < nvalues; i++)
     626              :         {
     627         4683 :             len += VARSIZE_ANY(DatumGetPointer(range->values[i]));
     628              :         }
     629              :     }
     630         7770 :     else if (typlen == -2)      /* cstring */
     631              :     {
     632              :         int         i;
     633              : 
     634            0 :         for (i = 0; i < nvalues; i++)
     635              :         {
     636              :             /* don't forget to include the null terminator ;-) */
     637            0 :             len += strlen(DatumGetCString(range->values[i])) + 1;
     638              :         }
     639              :     }
     640              :     else                        /* fixed-length types (even by-reference) */
     641              :     {
     642              :         Assert(typlen > 0);
     643         7770 :         len += nvalues * typlen;
     644              :     }
     645              : 
     646              :     /*
     647              :      * Allocate the serialized object, copy the basic information. The
     648              :      * serialized object is a varlena, so update the header.
     649              :      */
     650         9063 :     serialized = (SerializedRanges *) palloc0(len);
     651         9063 :     SET_VARSIZE(serialized, len);
     652              : 
     653         9063 :     serialized->typid = typid;
     654         9063 :     serialized->nranges = range->nranges;
     655         9063 :     serialized->nvalues = range->nvalues;
     656         9063 :     serialized->maxvalues = range->target_maxvalues;
     657              : 
     658              :     /*
     659              :      * And now copy also the boundary values (like the length calculation this
     660              :      * depends on the particular data type).
     661              :      */
     662         9063 :     ptr = serialized->data;      /* start of the serialized data */
     663              : 
     664        44803 :     for (int i = 0; i < nvalues; i++)
     665              :     {
     666        35740 :         if (typbyval)           /* simple by-value data types */
     667              :         {
     668              :             Datum       tmp;
     669              : 
     670              :             /*
     671              :              * For byval types, we need to copy just the significant bytes -
     672              :              * we can't use memcpy directly, as that assumes little-endian
     673              :              * behavior.  store_att_byval does almost what we need, but it
     674              :              * requires a properly aligned buffer - the output buffer does not
     675              :              * guarantee that. So we simply use a local Datum variable (which
     676              :              * guarantees proper alignment), and then copy the value from it.
     677              :              */
     678        22303 :             store_att_byval(&tmp, range->values[i], typlen);
     679              : 
     680        22303 :             memcpy(ptr, &tmp, typlen);
     681        22303 :             ptr += typlen;
     682              :         }
     683        13437 :         else if (typlen > 0) /* fixed-length by-ref types */
     684              :         {
     685         8754 :             memcpy(ptr, DatumGetPointer(range->values[i]), typlen);
     686         8754 :             ptr += typlen;
     687              :         }
     688         4683 :         else if (typlen == -1)  /* varlena */
     689              :         {
     690         4683 :             int         tmp = VARSIZE_ANY(DatumGetPointer(range->values[i]));
     691              : 
     692         4683 :             memcpy(ptr, DatumGetPointer(range->values[i]), tmp);
     693         4683 :             ptr += tmp;
     694              :         }
     695            0 :         else if (typlen == -2)  /* cstring */
     696              :         {
     697            0 :             int         tmp = strlen(DatumGetCString(range->values[i])) + 1;
     698              : 
     699            0 :             memcpy(ptr, DatumGetCString(range->values[i]), tmp);
     700            0 :             ptr += tmp;
     701              :         }
     702              : 
     703              :         /* make sure we haven't overflown the buffer end */
     704              :         Assert(ptr <= ((char *) serialized + len));
     705              :     }
     706              : 
     707              :     /* exact size */
     708              :     Assert(ptr == ((char *) serialized + len));
     709              : 
     710         9063 :     return serialized;
     711              : }
     712              : 
     713              : /*
     714              :  * brin_range_deserialize
     715              :  *    Deserialize a compact varlena value into the in-memory representation.
     716              :  *
     717              :  * Simply copy the header and then also the individual values, as stored
     718              :  * in the in-memory value array.
     719              :  */
     720              : static Ranges *
     721        22935 : brin_range_deserialize(int maxvalues, SerializedRanges *serialized)
     722              : {
     723              :     int         i,
     724              :                 nvalues;
     725              :     char       *ptr,
     726              :                *dataptr;
     727              :     bool        typbyval;
     728              :     int         typlen;
     729              :     Size        datalen;
     730              : 
     731              :     Ranges     *range;
     732              : 
     733              :     Assert(serialized->nranges >= 0);
     734              :     Assert(serialized->nvalues >= 0);
     735              :     Assert(serialized->maxvalues > 0);
     736              : 
     737        22935 :     nvalues = 2 * serialized->nranges + serialized->nvalues;
     738              : 
     739              :     Assert(nvalues <= serialized->maxvalues);
     740              :     Assert(serialized->maxvalues <= maxvalues);
     741              : 
     742        22935 :     range = minmax_multi_init(maxvalues);
     743              : 
     744              :     /* copy the header info */
     745        22935 :     range->nranges = serialized->nranges;
     746        22935 :     range->nvalues = serialized->nvalues;
     747        22935 :     range->nsorted = serialized->nvalues;
     748        22935 :     range->maxvalues = maxvalues;
     749        22935 :     range->target_maxvalues = serialized->maxvalues;
     750              : 
     751        22935 :     range->typid = serialized->typid;
     752              : 
     753        22935 :     typbyval = get_typbyval(serialized->typid);
     754        22935 :     typlen = get_typlen(serialized->typid);
     755              : 
     756              :     /*
     757              :      * And now deconstruct the values into Datum array. We have to copy the
     758              :      * data because the serialized representation ignores alignment, and we
     759              :      * don't want to rely on it being kept around anyway.
     760              :      */
     761        22935 :     ptr = serialized->data;
     762              : 
     763              :     /*
     764              :      * We don't want to allocate many pieces, so we just allocate everything
     765              :      * in one chunk. How much space will we need?
     766              :      *
     767              :      * XXX We don't need to copy simple by-value data types.
     768              :      */
     769        22935 :     datalen = 0;
     770        22935 :     dataptr = NULL;
     771        52814 :     for (i = 0; (i < nvalues) && (!typbyval); i++)
     772              :     {
     773        29879 :         if (typlen > 0)          /* fixed-length by-ref types */
     774        18065 :             datalen += MAXALIGN(typlen);
     775        11814 :         else if (typlen == -1)  /* varlena */
     776              :         {
     777        11814 :             datalen += MAXALIGN(VARSIZE_ANY(ptr));
     778        11814 :             ptr += VARSIZE_ANY(ptr);
     779              :         }
     780            0 :         else if (typlen == -2)  /* cstring */
     781              :         {
     782            0 :             Size        slen = strlen(ptr) + 1;
     783              : 
     784            0 :             datalen += MAXALIGN(slen);
     785            0 :             ptr += slen;
     786              :         }
     787              :     }
     788              : 
     789        22935 :     if (datalen > 0)
     790         8697 :         dataptr = palloc(datalen);
     791              : 
     792              :     /*
     793              :      * Restore the source pointer (might have been modified when calculating
     794              :      * the space we need to allocate).
     795              :      */
     796        22935 :     ptr = serialized->data;
     797              : 
     798       118593 :     for (i = 0; i < nvalues; i++)
     799              :     {
     800        95658 :         if (typbyval)           /* simple by-value data types */
     801              :         {
     802        65779 :             Datum       v = 0;
     803              : 
     804        65779 :             memcpy(&v, ptr, typlen);
     805              : 
     806        65779 :             range->values[i] = fetch_att(&v, true, typlen);
     807        65779 :             ptr += typlen;
     808              :         }
     809        29879 :         else if (typlen > 0) /* fixed-length by-ref types */
     810              :         {
     811        18065 :             range->values[i] = PointerGetDatum(dataptr);
     812              : 
     813        18065 :             memcpy(dataptr, ptr, typlen);
     814        18065 :             dataptr += MAXALIGN(typlen);
     815              : 
     816        18065 :             ptr += typlen;
     817              :         }
     818        11814 :         else if (typlen == -1)  /* varlena */
     819              :         {
     820        11814 :             range->values[i] = PointerGetDatum(dataptr);
     821              : 
     822        11814 :             memcpy(dataptr, ptr, VARSIZE_ANY(ptr));
     823        11814 :             dataptr += MAXALIGN(VARSIZE_ANY(ptr));
     824        11814 :             ptr += VARSIZE_ANY(ptr);
     825              :         }
     826            0 :         else if (typlen == -2)  /* cstring */
     827              :         {
     828            0 :             Size        slen = strlen(ptr) + 1;
     829              : 
     830            0 :             range->values[i] = PointerGetDatum(dataptr);
     831              : 
     832            0 :             memcpy(dataptr, ptr, slen);
     833            0 :             dataptr += MAXALIGN(slen);
     834            0 :             ptr += slen;
     835              :         }
     836              : 
     837              :         /* make sure we haven't overflown the buffer end */
     838              :         Assert(ptr <= ((char *) serialized + VARSIZE_ANY(serialized)));
     839              :     }
     840              : 
     841              :     /* should have consumed the whole input value exactly */
     842              :     Assert(ptr == ((char *) serialized + VARSIZE_ANY(serialized)));
     843              : 
     844              :     /* return the deserialized value */
     845        22935 :     return range;
     846              : }
     847              : 
     848              : /*
     849              :  * compare_expanded_ranges
     850              :  *    Compare the expanded ranges - first by minimum, then by maximum.
     851              :  *
     852              :  * We do guarantee that ranges in a single Ranges object do not overlap, so it
     853              :  * may seem strange that we don't order just by minimum. But when merging two
     854              :  * Ranges (which happens in the union function), the ranges may in fact
     855              :  * overlap. So we do compare both.
     856              :  */
     857              : static int
     858       418567 : compare_expanded_ranges(const void *a, const void *b, void *arg)
     859              : {
     860       418567 :     const ExpandedRange *ra = a;
     861       418567 :     const ExpandedRange *rb = b;
     862              :     Datum       r;
     863              : 
     864       418567 :     compare_context *cxt = (compare_context *) arg;
     865              : 
     866              :     /* first compare minvals */
     867       418567 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->minval, rb->minval);
     868              : 
     869       418567 :     if (DatumGetBool(r))
     870       270305 :         return -1;
     871              : 
     872       148262 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->minval, ra->minval);
     873              : 
     874       148262 :     if (DatumGetBool(r))
     875       115996 :         return 1;
     876              : 
     877              :     /* then compare maxvals */
     878        32266 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->maxval, rb->maxval);
     879              : 
     880        32266 :     if (DatumGetBool(r))
     881            0 :         return -1;
     882              : 
     883        32266 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->maxval, ra->maxval);
     884              : 
     885        32266 :     if (DatumGetBool(r))
     886            0 :         return 1;
     887              : 
     888        32266 :     return 0;
     889              : }
     890              : 
     891              : /*
     892              :  * compare_values
     893              :  *    Compare the values.
     894              :  */
     895              : static int
     896       571152 : compare_values(const void *a, const void *b, void *arg)
     897              : {
     898       571152 :     const Datum *da = a;
     899       571152 :     const Datum *db = b;
     900              :     Datum       r;
     901              : 
     902       571152 :     compare_context *cxt = (compare_context *) arg;
     903              : 
     904       571152 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *da, *db);
     905              : 
     906       571152 :     if (DatumGetBool(r))
     907       303672 :         return -1;
     908              : 
     909       267480 :     r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *db, *da);
     910              : 
     911       267480 :     if (DatumGetBool(r))
     912       233865 :         return 1;
     913              : 
     914        33615 :     return 0;
     915              : }
     916              : 
     917              : /*
     918              :  * Check if the new value matches one of the existing ranges.
     919              :  */
     920              : static bool
     921        70377 : has_matching_range(BrinDesc *bdesc, Oid colloid, Ranges *ranges,
     922              :                    Datum newval, AttrNumber attno, Oid typid)
     923              : {
     924              :     Datum       compar;
     925              : 
     926              :     Datum       minvalue;
     927              :     Datum       maxvalue;
     928              : 
     929              :     FmgrInfo   *cmpLessFn;
     930              :     FmgrInfo   *cmpGreaterFn;
     931              : 
     932              :     /* binary search on ranges */
     933              :     int         start,
     934              :                 end;
     935              : 
     936        70377 :     if (ranges->nranges == 0)
     937        40902 :         return false;
     938              : 
     939        29475 :     minvalue = ranges->values[0];
     940        29475 :     maxvalue = ranges->values[2 * ranges->nranges - 1];
     941              : 
     942              :     /*
     943              :      * Otherwise, need to compare the new value with boundaries of all the
     944              :      * ranges. First check if it's less than the absolute minimum, which is
     945              :      * the first value in the array.
     946              :      */
     947        29475 :     cmpLessFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
     948              :                                                    BTLessStrategyNumber);
     949        29475 :     compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue);
     950              : 
     951              :     /* smaller than the smallest value in the range list */
     952        29475 :     if (DatumGetBool(compar))
     953            9 :         return false;
     954              : 
     955              :     /*
     956              :      * And now compare it to the existing maximum (last value in the data
     957              :      * array). But only if we haven't already ruled out a possible match in
     958              :      * the minvalue check.
     959              :      */
     960        29466 :     cmpGreaterFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
     961              :                                                       BTGreaterStrategyNumber);
     962        29466 :     compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue);
     963              : 
     964        29466 :     if (DatumGetBool(compar))
     965        29103 :         return false;
     966              : 
     967              :     /*
     968              :      * So we know it's in the general min/max, the question is whether it
     969              :      * falls in one of the ranges or gaps. We'll do a binary search on
     970              :      * individual ranges - for each range we check equality (value falls into
     971              :      * the range), and then check ranges either above or below the current
     972              :      * range.
     973              :      */
     974          363 :     start = 0;                  /* first range */
     975          363 :     end = (ranges->nranges - 1); /* last range */
     976              :     while (true)
     977          777 :     {
     978         1140 :         int         midpoint = (start + end) / 2;
     979              : 
     980              :         /* this means we ran out of ranges in the last step */
     981         1140 :         if (start > end)
     982          144 :             return false;
     983              : 
     984              :         /* copy the min/max values from the ranges */
     985          996 :         minvalue = ranges->values[2 * midpoint];
     986          996 :         maxvalue = ranges->values[2 * midpoint + 1];
     987              : 
     988              :         /*
     989              :          * Is the value smaller than the minval? If yes, we'll recurse to the
     990              :          * left side of range array.
     991              :          */
     992          996 :         compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue);
     993              : 
     994              :         /* smaller than the smallest value in this range */
     995          996 :         if (DatumGetBool(compar))
     996              :         {
     997          291 :             end = (midpoint - 1);
     998          291 :             continue;
     999              :         }
    1000              : 
    1001              :         /*
    1002              :          * Is the value greater than the minval? If yes, we'll recurse to the
    1003              :          * right side of range array.
    1004              :          */
    1005          705 :         compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue);
    1006              : 
    1007              :         /* larger than the largest value in this range */
    1008          705 :         if (DatumGetBool(compar))
    1009              :         {
    1010          486 :             start = (midpoint + 1);
    1011          486 :             continue;
    1012              :         }
    1013              : 
    1014              :         /* hey, we found a matching range */
    1015          219 :         return true;
    1016              :     }
    1017              : 
    1018              :     return false;
    1019              : }
    1020              : 
    1021              : 
    1022              : /*
    1023              :  * range_contains_value
    1024              :  *      See if the new value is already contained in the range list.
    1025              :  *
    1026              :  * We first inspect the list of intervals. We use a small trick - we check
    1027              :  * the value against min/max of the whole range (min of the first interval,
    1028              :  * max of the last one) first, and only inspect the individual intervals if
    1029              :  * this passes.
    1030              :  *
    1031              :  * If the value matches none of the intervals, we check the exact values.
    1032              :  * We simply loop through them and invoke equality operator on them.
    1033              :  *
    1034              :  * The last parameter (full) determines whether we need to search all the
    1035              :  * values, including the unsorted part. With full=false, the unsorted part
    1036              :  * is not searched, which may produce false negatives and duplicate values
    1037              :  * (in the unsorted part only), but when we're building the range that's
    1038              :  * fine - we'll deduplicate before serialization, and it can only happen
    1039              :  * if there already are unsorted values (so it was already modified).
    1040              :  *
    1041              :  * Serialized ranges don't have any unsorted values, so this can't cause
    1042              :  * false negatives during querying.
    1043              :  */
    1044              : static bool
    1045        70377 : range_contains_value(BrinDesc *bdesc, Oid colloid,
    1046              :                      AttrNumber attno, Form_pg_attribute attr,
    1047              :                      Ranges *ranges, Datum newval, bool full)
    1048              : {
    1049              :     int         i;
    1050              :     FmgrInfo   *cmpEqualFn;
    1051        70377 :     Oid         typid = attr->atttypid;
    1052              : 
    1053              :     /*
    1054              :      * First inspect the ranges, if there are any. We first check the whole
    1055              :      * range, and only when there's still a chance of getting a match we
    1056              :      * inspect the individual ranges.
    1057              :      */
    1058        70377 :     if (has_matching_range(bdesc, colloid, ranges, newval, attno, typid))
    1059          219 :         return true;
    1060              : 
    1061        70158 :     cmpEqualFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
    1062              :                                                     BTEqualStrategyNumber);
    1063              : 
    1064              :     /*
    1065              :      * There is no matching range, so let's inspect the sorted values.
    1066              :      *
    1067              :      * We do a sequential search for small numbers of values, and binary
    1068              :      * search once we have more than 16 values. This threshold is somewhat
    1069              :      * arbitrary, as it depends on how expensive the comparison function is.
    1070              :      *
    1071              :      * XXX If we use the threshold here, maybe we should do the same thing in
    1072              :      * has_matching_range? Or maybe we should do the bin search all the time?
    1073              :      *
    1074              :      * XXX We could use the same optimization as for ranges, to check if the
    1075              :      * value is between min/max, to maybe rule out all sorted values without
    1076              :      * having to inspect all of them.
    1077              :      */
    1078        70158 :     if (ranges->nsorted >= 16)
    1079              :     {
    1080              :         compare_context cxt;
    1081              : 
    1082        29046 :         cxt.colloid = ranges->colloid;
    1083        29046 :         cxt.cmpFn = ranges->cmp;
    1084              : 
    1085        29046 :         if (bsearch_arg(&newval, &ranges->values[2 * ranges->nranges],
    1086        29046 :                         ranges->nsorted, sizeof(Datum),
    1087              :                         compare_values, &cxt) != NULL)
    1088            0 :             return true;
    1089              :     }
    1090              :     else
    1091              :     {
    1092        81714 :         for (i = 2 * ranges->nranges; i < 2 * ranges->nranges + ranges->nsorted; i++)
    1093              :         {
    1094              :             Datum       compar;
    1095              : 
    1096        48525 :             compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]);
    1097              : 
    1098              :             /* found an exact match */
    1099        48525 :             if (DatumGetBool(compar))
    1100         7923 :                 return true;
    1101              :         }
    1102              :     }
    1103              : 
    1104              :     /* If not asked to inspect the unsorted part, we're done. */
    1105        62235 :     if (!full)
    1106        62235 :         return false;
    1107              : 
    1108              :     /* Inspect the unsorted part. */
    1109            0 :     for (i = 2 * ranges->nranges + ranges->nsorted; i < 2 * ranges->nranges + ranges->nvalues; i++)
    1110              :     {
    1111              :         Datum       compar;
    1112              : 
    1113            0 :         compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]);
    1114              : 
    1115              :         /* found an exact match */
    1116            0 :         if (DatumGetBool(compar))
    1117            0 :             return true;
    1118              :     }
    1119              : 
    1120              :     /* the value is not covered by this BRIN tuple */
    1121            0 :     return false;
    1122              : }
    1123              : 
    1124              : /*
    1125              :  * Expand ranges from Ranges into ExpandedRange array. This expects the
    1126              :  * eranges to be pre-allocated and with the correct size - there needs to be
    1127              :  * (nranges + nvalues) elements.
    1128              :  *
    1129              :  * The order of expanded ranges is arbitrary. We do expand the ranges first,
    1130              :  * and this part is sorted. But then we expand the values, and this part may
    1131              :  * be unsorted.
    1132              :  */
    1133              : static void
    1134         3191 : fill_expanded_ranges(ExpandedRange *eranges, int neranges, Ranges *ranges)
    1135              : {
    1136              :     int         idx;
    1137              :     int         i;
    1138              : 
    1139              :     /* Check that the output array has the right size. */
    1140              :     Assert(neranges == (ranges->nranges + ranges->nvalues));
    1141              : 
    1142         3191 :     idx = 0;
    1143         4325 :     for (i = 0; i < ranges->nranges; i++)
    1144              :     {
    1145         1134 :         eranges[idx].minval = ranges->values[2 * i];
    1146         1134 :         eranges[idx].maxval = ranges->values[2 * i + 1];
    1147         1134 :         eranges[idx].collapsed = false;
    1148         1134 :         idx++;
    1149              : 
    1150              :         Assert(idx <= neranges);
    1151              :     }
    1152              : 
    1153        78767 :     for (i = 0; i < ranges->nvalues; i++)
    1154              :     {
    1155        75576 :         eranges[idx].minval = ranges->values[2 * ranges->nranges + i];
    1156        75576 :         eranges[idx].maxval = ranges->values[2 * ranges->nranges + i];
    1157        75576 :         eranges[idx].collapsed = true;
    1158        75576 :         idx++;
    1159              : 
    1160              :         Assert(idx <= neranges);
    1161              :     }
    1162              : 
    1163              :     /* Did we produce the expected number of elements? */
    1164              :     Assert(idx == neranges);
    1165              : 
    1166         3191 :     return;
    1167              : }
    1168              : 
    1169              : /*
    1170              :  * Sort and deduplicate expanded ranges.
    1171              :  *
    1172              :  * The ranges may be deduplicated - we're simply appending values, without
    1173              :  * checking for duplicates etc. So maybe the deduplication will reduce the
    1174              :  * number of ranges enough, and we won't have to compute the distances etc.
    1175              :  *
    1176              :  * Returns the number of expanded ranges.
    1177              :  */
    1178              : static int
    1179         3185 : sort_expanded_ranges(FmgrInfo *cmp, Oid colloid,
    1180              :                      ExpandedRange *eranges, int neranges)
    1181              : {
    1182              :     int         n;
    1183              :     int         i;
    1184              :     compare_context cxt;
    1185              : 
    1186              :     Assert(neranges > 0);
    1187              : 
    1188              :     /* sort the values */
    1189         3185 :     cxt.colloid = colloid;
    1190         3185 :     cxt.cmpFn = cmp;
    1191              : 
    1192              :     /*
    1193              :      * XXX We do qsort on all the values, but we could also leverage the fact
    1194              :      * that some of the input data is already sorted (all the ranges and maybe
    1195              :      * some of the points) and do merge sort.
    1196              :      */
    1197         3185 :     qsort_arg(eranges, neranges, sizeof(ExpandedRange),
    1198              :               compare_expanded_ranges, &cxt);
    1199              : 
    1200              :     /*
    1201              :      * Deduplicate the ranges - simply compare each range to the preceding
    1202              :      * one, and skip the duplicate ones.
    1203              :      */
    1204         3185 :     n = 1;
    1205        76710 :     for (i = 1; i < neranges; i++)
    1206              :     {
    1207              :         /* if the current range is equal to the preceding one, do nothing */
    1208        73525 :         if (!compare_expanded_ranges(&eranges[i - 1], &eranges[i], &cxt))
    1209        14799 :             continue;
    1210              : 
    1211              :         /* otherwise, copy it to n-th place (if not already there) */
    1212        58726 :         if (i != n)
    1213         4702 :             memcpy(&eranges[n], &eranges[i], sizeof(ExpandedRange));
    1214              : 
    1215        58726 :         n++;
    1216              :     }
    1217              : 
    1218              :     Assert((n > 0) && (n <= neranges));
    1219              : 
    1220         3185 :     return n;
    1221              : }
    1222              : 
    1223              : /*
    1224              :  * When combining multiple Range values (in union function), some of the
    1225              :  * ranges may overlap. We simply merge the overlapping ranges to fix that.
    1226              :  *
    1227              :  * XXX This assumes the expanded ranges were previously sorted (by minval
    1228              :  * and then maxval). We leverage this when detecting overlap.
    1229              :  */
    1230              : static int
    1231            6 : merge_overlapping_ranges(FmgrInfo *cmp, Oid colloid,
    1232              :                          ExpandedRange *eranges, int neranges)
    1233              : {
    1234              :     int         idx;
    1235              : 
    1236              :     /* Merge ranges (idx) and (idx+1) if they overlap. */
    1237            6 :     idx = 0;
    1238           57 :     while (idx < (neranges - 1))
    1239              :     {
    1240              :         Datum       r;
    1241              : 
    1242              :         /*
    1243              :          * comparing [?,maxval] vs. [minval,?] - the ranges overlap if (minval
    1244              :          * < maxval)
    1245              :          */
    1246           51 :         r = FunctionCall2Coll(cmp, colloid,
    1247           51 :                               eranges[idx].maxval,
    1248           51 :                               eranges[idx + 1].minval);
    1249              : 
    1250              :         /*
    1251              :          * Nope, maxval < minval, so no overlap. And we know the ranges are
    1252              :          * ordered, so there are no more overlaps, because all the remaining
    1253              :          * ranges have greater or equal minval.
    1254              :          */
    1255           51 :         if (DatumGetBool(r))
    1256              :         {
    1257              :             /* proceed to the next range */
    1258           51 :             idx += 1;
    1259           51 :             continue;
    1260              :         }
    1261              : 
    1262              :         /*
    1263              :          * So ranges 'idx' and 'idx+1' do overlap, but we don't know if
    1264              :          * 'idx+1' is contained in 'idx', or if they overlap only partially.
    1265              :          * So compare the upper bounds and keep the larger one.
    1266              :          */
    1267            0 :         r = FunctionCall2Coll(cmp, colloid,
    1268            0 :                               eranges[idx].maxval,
    1269            0 :                               eranges[idx + 1].maxval);
    1270              : 
    1271            0 :         if (DatumGetBool(r))
    1272            0 :             eranges[idx].maxval = eranges[idx + 1].maxval;
    1273              : 
    1274              :         /*
    1275              :          * The range certainly is no longer collapsed (irrespectively of the
    1276              :          * previous state).
    1277              :          */
    1278            0 :         eranges[idx].collapsed = false;
    1279              : 
    1280              :         /*
    1281              :          * Now get rid of the (idx+1) range entirely by shifting the remaining
    1282              :          * ranges by 1. There are neranges elements, and we need to move
    1283              :          * elements from (idx+2). That means the number of elements to move is
    1284              :          * [ncranges - (idx+2)].
    1285              :          */
    1286            0 :         memmove(&eranges[idx + 1], &eranges[idx + 2],
    1287            0 :                 (neranges - (idx + 2)) * sizeof(ExpandedRange));
    1288              : 
    1289              :         /*
    1290              :          * Decrease the number of ranges, and repeat (with the same range, as
    1291              :          * it might overlap with additional ranges thanks to the merge).
    1292              :          */
    1293            0 :         neranges--;
    1294              :     }
    1295              : 
    1296            6 :     return neranges;
    1297              : }
    1298              : 
    1299              : /*
    1300              :  * Simple comparator for distance values, comparing the double value.
    1301              :  * This is intentionally sorting the distances in descending order, i.e.
    1302              :  * the longer gaps will be at the front.
    1303              :  */
    1304              : static int
    1305        86299 : compare_distances(const void *a, const void *b)
    1306              : {
    1307        86299 :     const DistanceValue *da = a;
    1308        86299 :     const DistanceValue *db = b;
    1309              : 
    1310        86299 :     if (da->value < db->value)
    1311        20497 :         return 1;
    1312        65802 :     else if (da->value > db->value)
    1313        13910 :         return -1;
    1314              : 
    1315        51892 :     return 0;
    1316              : }
    1317              : 
    1318              : /*
    1319              :  * Given an array of expanded ranges, compute size of the gaps between each
    1320              :  * range.  For neranges there are (neranges-1) gaps.
    1321              :  *
    1322              :  * We simply call the "distance" function to compute the (max-min) for pairs
    1323              :  * of consecutive ranges. The function may be fairly expensive, so we do that
    1324              :  * just once (and then use it to pick as many ranges to merge as possible).
    1325              :  *
    1326              :  * See reduce_expanded_ranges for details.
    1327              :  */
    1328              : static DistanceValue *
    1329         3185 : build_distances(FmgrInfo *distanceFn, Oid colloid,
    1330              :                 ExpandedRange *eranges, int neranges)
    1331              : {
    1332              :     int         i;
    1333              :     int         ndistances;
    1334              :     DistanceValue *distances;
    1335              : 
    1336              :     Assert(neranges > 0);
    1337              : 
    1338              :     /* If there's only a single range, there's no distance to calculate. */
    1339         3185 :     if (neranges == 1)
    1340            0 :         return NULL;
    1341              : 
    1342         3185 :     ndistances = (neranges - 1);
    1343         3185 :     distances = palloc0_array(DistanceValue, ndistances);
    1344              : 
    1345              :     /*
    1346              :      * Walk through the ranges once and compute the distance between the
    1347              :      * ranges so that we can sort them once.
    1348              :      */
    1349        61911 :     for (i = 0; i < ndistances; i++)
    1350              :     {
    1351              :         Datum       a1,
    1352              :                     a2,
    1353              :                     r;
    1354              : 
    1355        58726 :         a1 = eranges[i].maxval;
    1356        58726 :         a2 = eranges[i + 1].minval;
    1357              : 
    1358              :         /* compute length of the gap (between max/min) */
    1359        58726 :         r = FunctionCall2Coll(distanceFn, colloid, a1, a2);
    1360              : 
    1361              :         /* remember the index of the gap the distance is for */
    1362        58726 :         distances[i].index = i;
    1363        58726 :         distances[i].value = DatumGetFloat8(r);
    1364              :     }
    1365              : 
    1366              :     /*
    1367              :      * Sort the distances in descending order, so that the longest gaps are at
    1368              :      * the front.
    1369              :      */
    1370         3185 :     qsort(distances, ndistances, sizeof(DistanceValue), compare_distances);
    1371              : 
    1372         3185 :     return distances;
    1373              : }
    1374              : 
    1375              : /*
    1376              :  * Builds expanded ranges for the existing ranges (and single-point ranges),
    1377              :  * and also the new value (which did not fit into the array).  This expanded
    1378              :  * representation makes the processing a bit easier, as it allows handling
    1379              :  * ranges and points the same way.
    1380              :  *
    1381              :  * We sort and deduplicate the expanded ranges - this is necessary, because
    1382              :  * the points may be unsorted. And moreover the two parts (ranges and
    1383              :  * points) are sorted on their own.
    1384              :  */
    1385              : static ExpandedRange *
    1386         3179 : build_expanded_ranges(FmgrInfo *cmp, Oid colloid, Ranges *ranges,
    1387              :                       int *nranges)
    1388              : {
    1389              :     int         neranges;
    1390              :     ExpandedRange *eranges;
    1391              : 
    1392              :     /* both ranges and points are expanded into a separate element */
    1393         3179 :     neranges = ranges->nranges + ranges->nvalues;
    1394              : 
    1395         3179 :     eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange));
    1396              : 
    1397              :     /* fill the expanded ranges */
    1398         3179 :     fill_expanded_ranges(eranges, neranges, ranges);
    1399              : 
    1400              :     /* sort and deduplicate the expanded ranges */
    1401         3179 :     neranges = sort_expanded_ranges(cmp, colloid, eranges, neranges);
    1402              : 
    1403              :     /* remember how many ranges we built */
    1404         3179 :     *nranges = neranges;
    1405              : 
    1406         3179 :     return eranges;
    1407              : }
    1408              : 
    1409              : #ifdef USE_ASSERT_CHECKING
    1410              : /*
    1411              :  * Counts boundary values needed to store the ranges. Each single-point
    1412              :  * range is stored using a single value, each regular range needs two.
    1413              :  */
    1414              : static int
    1415              : count_values(ExpandedRange *cranges, int ncranges)
    1416              : {
    1417              :     int         i;
    1418              :     int         count;
    1419              : 
    1420              :     count = 0;
    1421              :     for (i = 0; i < ncranges; i++)
    1422              :     {
    1423              :         if (cranges[i].collapsed)
    1424              :             count += 1;
    1425              :         else
    1426              :             count += 2;
    1427              :     }
    1428              : 
    1429              :     return count;
    1430              : }
    1431              : #endif
    1432              : 
    1433              : /*
    1434              :  * reduce_expanded_ranges
    1435              :  *      reduce the ranges until the number of values is low enough
    1436              :  *
    1437              :  * Combines ranges until the number of boundary values drops below the
    1438              :  * threshold specified by max_values. This happens by merging enough
    1439              :  * ranges by the distance between them.
    1440              :  *
    1441              :  * Returns the number of result ranges.
    1442              :  *
    1443              :  * We simply use the global min/max and then add boundaries for enough
    1444              :  * largest gaps. Each gap adds 2 values, so we simply use (target/2-1)
    1445              :  * distances. Then we simply sort all the values - each two values are
    1446              :  * a boundary of a range (possibly collapsed).
    1447              :  *
    1448              :  * XXX Some of the ranges may be collapsed (i.e. the min/max values are
    1449              :  * equal), but we ignore that for now. We could repeat the process,
    1450              :  * adding a couple more gaps recursively.
    1451              :  *
    1452              :  * XXX The ranges to merge are selected solely using the distance. But
    1453              :  * that may not be the best strategy, for example when multiple gaps
    1454              :  * are of equal (or very similar) length.
    1455              :  *
    1456              :  * Consider for example points 1, 2, 3, .., 64, which have gaps of the
    1457              :  * same length 1 of course. In that case, we tend to pick the first
    1458              :  * gap of that length, which leads to this:
    1459              :  *
    1460              :  *    step 1:  [1, 2], 3, 4, 5, .., 64
    1461              :  *    step 2:  [1, 3], 4, 5,    .., 64
    1462              :  *    step 3:  [1, 4], 5,       .., 64
    1463              :  *    ...
    1464              :  *
    1465              :  * So in the end we'll have one "large" range and multiple small points.
    1466              :  * That may be fine, but it seems a bit strange and non-optimal. Maybe
    1467              :  * we should consider other things when picking ranges to merge - e.g.
    1468              :  * length of the ranges? Or perhaps randomize the choice of ranges, with
    1469              :  * probability inversely proportional to the distance (the gap lengths
    1470              :  * may be very close, but not exactly the same).
    1471              :  *
    1472              :  * XXX Or maybe we could just handle this by using random value as a
    1473              :  * tie-break, or by adding random noise to the actual distance.
    1474              :  */
    1475              : static int
    1476         3185 : reduce_expanded_ranges(ExpandedRange *eranges, int neranges,
    1477              :                        DistanceValue *distances, int max_values,
    1478              :                        FmgrInfo *cmp, Oid colloid)
    1479              : {
    1480              :     int         i;
    1481              :     int         nvalues;
    1482              :     Datum      *values;
    1483              : 
    1484              :     compare_context cxt;
    1485              : 
    1486              :     /* total number of gaps between ranges */
    1487         3185 :     int         ndistances = (neranges - 1);
    1488              : 
    1489              :     /* number of gaps to keep */
    1490         3185 :     int         keep = (max_values / 2 - 1);
    1491              : 
    1492              :     /*
    1493              :      * Maybe we have a sufficiently low number of ranges already?
    1494              :      *
    1495              :      * XXX This should happen before we actually do the expensive stuff like
    1496              :      * sorting, so maybe this should be just an assert.
    1497              :      */
    1498         3185 :     if (keep >= ndistances)
    1499         2762 :         return neranges;
    1500              : 
    1501              :     /* sort the values */
    1502          423 :     cxt.colloid = colloid;
    1503          423 :     cxt.cmpFn = cmp;
    1504              : 
    1505              :     /* allocate space for the boundary values */
    1506          423 :     nvalues = 0;
    1507          423 :     values = palloc_array(Datum, max_values);
    1508              : 
    1509              :     /* add the global min/max values, from the first/last range */
    1510          423 :     values[nvalues++] = eranges[0].minval;
    1511          423 :     values[nvalues++] = eranges[neranges - 1].maxval;
    1512              : 
    1513              :     /* add boundary values for enough gaps */
    1514        14640 :     for (i = 0; i < keep; i++)
    1515              :     {
    1516              :         /* index of the gap between (index) and (index+1) ranges */
    1517        14217 :         int         index = distances[i].index;
    1518              : 
    1519              :         Assert((index >= 0) && ((index + 1) < neranges));
    1520              : 
    1521              :         /* add max from the preceding range, minval from the next one */
    1522        14217 :         values[nvalues++] = eranges[index].maxval;
    1523        14217 :         values[nvalues++] = eranges[index + 1].minval;
    1524              : 
    1525              :         Assert(nvalues <= max_values);
    1526              :     }
    1527              : 
    1528              :     /* We should have an even number of range values. */
    1529              :     Assert(nvalues % 2 == 0);
    1530              : 
    1531              :     /*
    1532              :      * Sort the values using the comparator function, and form ranges from the
    1533              :      * sorted result.
    1534              :      */
    1535          423 :     qsort_arg(values, nvalues, sizeof(Datum),
    1536              :               compare_values, &cxt);
    1537              : 
    1538              :     /* We have nvalues boundary values, which means nvalues/2 ranges. */
    1539        15063 :     for (i = 0; i < (nvalues / 2); i++)
    1540              :     {
    1541        14640 :         eranges[i].minval = values[2 * i];
    1542        14640 :         eranges[i].maxval = values[2 * i + 1];
    1543              : 
    1544              :         /* if the boundary values are the same, it's a collapsed range */
    1545        29280 :         eranges[i].collapsed = (compare_values(&values[2 * i],
    1546        14640 :                                                &values[2 * i + 1],
    1547        14640 :                                                &cxt) == 0);
    1548              :     }
    1549              : 
    1550          423 :     return (nvalues / 2);
    1551              : }
    1552              : 
    1553              : /*
    1554              :  * Store the boundary values from ExpandedRanges back into 'ranges' (using
    1555              :  * only the minimal number of values needed).
    1556              :  */
    1557              : static void
    1558         3185 : store_expanded_ranges(Ranges *ranges, ExpandedRange *eranges, int neranges)
    1559              : {
    1560              :     int         i;
    1561         3185 :     int         idx = 0;
    1562              : 
    1563              :     /* first copy in the regular ranges */
    1564         3185 :     ranges->nranges = 0;
    1565        28178 :     for (i = 0; i < neranges; i++)
    1566              :     {
    1567        24993 :         if (!eranges[i].collapsed)
    1568              :         {
    1569         2208 :             ranges->values[idx++] = eranges[i].minval;
    1570         2208 :             ranges->values[idx++] = eranges[i].maxval;
    1571         2208 :             ranges->nranges++;
    1572              :         }
    1573              :     }
    1574              : 
    1575              :     /* now copy in the collapsed ones */
    1576         3185 :     ranges->nvalues = 0;
    1577        28178 :     for (i = 0; i < neranges; i++)
    1578              :     {
    1579        24993 :         if (eranges[i].collapsed)
    1580              :         {
    1581        22785 :             ranges->values[idx++] = eranges[i].minval;
    1582        22785 :             ranges->nvalues++;
    1583              :         }
    1584              :     }
    1585              : 
    1586              :     /* all the values are sorted */
    1587         3185 :     ranges->nsorted = ranges->nvalues;
    1588              : 
    1589              :     Assert(count_values(eranges, neranges) == 2 * ranges->nranges + ranges->nvalues);
    1590              :     Assert(2 * ranges->nranges + ranges->nvalues <= ranges->maxvalues);
    1591         3185 : }
    1592              : 
    1593              : 
    1594              : /*
    1595              :  * Consider freeing space in the ranges. Checks if there's space for at least
    1596              :  * one new value, and performs compaction if needed.
    1597              :  *
    1598              :  * Returns true if the value was actually modified.
    1599              :  */
    1600              : static bool
    1601        70377 : ensure_free_space_in_buffer(BrinDesc *bdesc, Oid colloid,
    1602              :                             AttrNumber attno, Form_pg_attribute attr,
    1603              :                             Ranges *range)
    1604              : {
    1605              :     MemoryContext ctx;
    1606              :     MemoryContext oldctx;
    1607              : 
    1608              :     FmgrInfo   *cmpFn,
    1609              :                *distanceFn;
    1610              : 
    1611              :     /* expanded ranges */
    1612              :     ExpandedRange *eranges;
    1613              :     int         neranges;
    1614              :     DistanceValue *distances;
    1615              : 
    1616              :     /*
    1617              :      * If there is free space in the buffer, we're done without having to
    1618              :      * modify anything.
    1619              :      */
    1620        70377 :     if (2 * range->nranges + range->nvalues < range->maxvalues)
    1621        70254 :         return false;
    1622              : 
    1623              :     /* we'll certainly need the comparator, so just look it up now */
    1624          123 :     cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
    1625              :                                                BTLessStrategyNumber);
    1626              : 
    1627              :     /* deduplicate values, if there's an unsorted part */
    1628          123 :     range_deduplicate_values(range);
    1629              : 
    1630              :     /*
    1631              :      * Did we reduce enough free space by just the deduplication?
    1632              :      *
    1633              :      * We don't simply check against range->maxvalues again. The deduplication
    1634              :      * might have freed very little space (e.g. just one value), forcing us to
    1635              :      * do deduplication very often. In that case, it's better to do the
    1636              :      * compaction and reduce more space.
    1637              :      */
    1638          123 :     if (2 * range->nranges + range->nvalues <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR)
    1639            0 :         return true;
    1640              : 
    1641              :     /*
    1642              :      * We need to combine some of the existing ranges, to reduce the number of
    1643              :      * values we have to store.
    1644              :      *
    1645              :      * The distanceFn calls (which may internally call e.g. numeric_le) may
    1646              :      * allocate quite a bit of memory, and we must not leak it (we might have
    1647              :      * to do this repeatedly, even for a single BRIN page range). Otherwise
    1648              :      * we'd have problems e.g. when building new indexes. So we use a memory
    1649              :      * context and make sure we free the memory at the end (so if we call the
    1650              :      * distance function many times, it might be an issue, but meh).
    1651              :      */
    1652          123 :     ctx = AllocSetContextCreate(CurrentMemoryContext,
    1653              :                                 "minmax-multi context",
    1654              :                                 ALLOCSET_DEFAULT_SIZES);
    1655              : 
    1656          123 :     oldctx = MemoryContextSwitchTo(ctx);
    1657              : 
    1658              :     /* build the expanded ranges */
    1659          123 :     eranges = build_expanded_ranges(cmpFn, colloid, range, &neranges);
    1660              : 
    1661              :     /* Is the expanded representation of ranges correct? */
    1662          123 :     AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
    1663              : 
    1664              :     /* and we'll also need the 'distance' procedure */
    1665          123 :     distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE);
    1666              : 
    1667              :     /* build array of gap distances and sort them in ascending order */
    1668          123 :     distances = build_distances(distanceFn, colloid, eranges, neranges);
    1669              : 
    1670              :     /*
    1671              :      * Combine ranges until we release at least 50% of the space. This
    1672              :      * threshold is somewhat arbitrary, perhaps needs tuning. We must not use
    1673              :      * too low or high value.
    1674              :      */
    1675          246 :     neranges = reduce_expanded_ranges(eranges, neranges, distances,
    1676          123 :                                       range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR,
    1677              :                                       cmpFn, colloid);
    1678              : 
    1679              :     /* Is the result of reducing expanded ranges correct? */
    1680          123 :     AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
    1681              : 
    1682              :     /* Make sure we've sufficiently reduced the number of ranges. */
    1683              :     Assert(count_values(eranges, neranges) <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR);
    1684              : 
    1685              :     /* decompose the expanded ranges into regular ranges and single values */
    1686          123 :     store_expanded_ranges(range, eranges, neranges);
    1687              : 
    1688          123 :     MemoryContextSwitchTo(oldctx);
    1689          123 :     MemoryContextDelete(ctx);
    1690              : 
    1691              :     /* Did we break the ranges somehow? */
    1692          123 :     AssertCheckRanges(range, cmpFn, colloid);
    1693              : 
    1694          123 :     return true;
    1695              : }
    1696              : 
    1697              : /*
    1698              :  * range_add_value
    1699              :  *      Add the new value to the minmax-multi range.
    1700              :  */
    1701              : static bool
    1702        70377 : range_add_value(BrinDesc *bdesc, Oid colloid,
    1703              :                 AttrNumber attno, Form_pg_attribute attr,
    1704              :                 Ranges *ranges, Datum newval)
    1705              : {
    1706              :     FmgrInfo   *cmpFn;
    1707        70377 :     bool        modified = false;
    1708              : 
    1709              :     /* we'll certainly need the comparator, so just look it up now */
    1710        70377 :     cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
    1711              :                                                BTLessStrategyNumber);
    1712              : 
    1713              :     /* comprehensive checks of the input ranges */
    1714        70377 :     AssertCheckRanges(ranges, cmpFn, colloid);
    1715              : 
    1716              :     /*
    1717              :      * Make sure there's enough free space in the buffer. We only trigger this
    1718              :      * when the buffer is full, which means it had to be modified as we size
    1719              :      * it to be larger than what is stored on disk.
    1720              :      *
    1721              :      * This needs to happen before we check if the value is contained in the
    1722              :      * range, because the value might be in the unsorted part, and we don't
    1723              :      * check that in range_contains_value. The deduplication would then move
    1724              :      * it to the sorted part, and we'd add the value too, which violates the
    1725              :      * rule that we never have duplicates with the ranges or sorted values.
    1726              :      *
    1727              :      * We might also deduplicate and recheck if the value is contained, but
    1728              :      * that seems like overkill. We'd need to deduplicate anyway, so why not
    1729              :      * do it now.
    1730              :      */
    1731        70377 :     modified = ensure_free_space_in_buffer(bdesc, colloid,
    1732              :                                            attno, attr, ranges);
    1733              : 
    1734              :     /*
    1735              :      * Bail out if the value already is covered by the range.
    1736              :      *
    1737              :      * We could also add values until we hit values_per_range, and then do the
    1738              :      * deduplication in a batch, hoping for better efficiency. But that would
    1739              :      * mean we actually modify the range every time, which means having to
    1740              :      * serialize the value, which does palloc, walks the values, copies them,
    1741              :      * etc. Not exactly cheap.
    1742              :      *
    1743              :      * So instead we do the check, which should be fairly cheap - assuming the
    1744              :      * comparator function is not very expensive.
    1745              :      *
    1746              :      * This also implies the values array can't contain duplicate values.
    1747              :      */
    1748        70377 :     if (range_contains_value(bdesc, colloid, attno, attr, ranges, newval, false))
    1749         8142 :         return modified;
    1750              : 
    1751              :     /* Make a copy of the value, if needed. */
    1752        62235 :     newval = datumCopy(newval, attr->attbyval, attr->attlen);
    1753              : 
    1754              :     /*
    1755              :      * If there's space in the values array, copy it in and we're done.
    1756              :      *
    1757              :      * We do want to keep the values sorted (to speed up searches), so we do a
    1758              :      * simple insertion sort. We could do something more elaborate, e.g. by
    1759              :      * sorting the values only now and then, but for small counts (e.g. when
    1760              :      * maxvalues is 64) this should be fine.
    1761              :      */
    1762        62235 :     ranges->values[2 * ranges->nranges + ranges->nvalues] = newval;
    1763        62235 :     ranges->nvalues++;
    1764              : 
    1765              :     /* If we added the first value, we can consider it as sorted. */
    1766        62235 :     if (ranges->nvalues == 1)
    1767         2484 :         ranges->nsorted = 1;
    1768              : 
    1769              :     /*
    1770              :      * Check we haven't broken the ordering of boundary values (checks both
    1771              :      * parts, but that doesn't hurt).
    1772              :      */
    1773        62235 :     AssertCheckRanges(ranges, cmpFn, colloid);
    1774              : 
    1775              :     /* Check the range contains the value we just added. */
    1776              :     Assert(range_contains_value(bdesc, colloid, attno, attr, ranges, newval, true));
    1777              : 
    1778              :     /* yep, we've modified the range */
    1779        62235 :     return true;
    1780              : }
    1781              : 
    1782              : /*
    1783              :  * Generate range representation of data collected during "batch mode".
    1784              :  * This is similar to reduce_expanded_ranges, except that we can't assume
    1785              :  * the values are sorted and there may be duplicate values.
    1786              :  */
    1787              : static void
    1788         9057 : compactify_ranges(BrinDesc *bdesc, Ranges *ranges, int max_values)
    1789              : {
    1790              :     FmgrInfo   *cmpFn,
    1791              :                *distanceFn;
    1792              : 
    1793              :     /* expanded ranges */
    1794              :     ExpandedRange *eranges;
    1795              :     int         neranges;
    1796              :     DistanceValue *distances;
    1797              : 
    1798              :     MemoryContext ctx;
    1799              :     MemoryContext oldctx;
    1800              : 
    1801              :     /*
    1802              :      * Do we need to actually compactify anything?
    1803              :      *
    1804              :      * There are two reasons why compaction may be needed - firstly, there may
    1805              :      * be too many values, or some of the values may be unsorted.
    1806              :      */
    1807         9057 :     if ((ranges->nranges * 2 + ranges->nvalues <= max_values) &&
    1808         8818 :         (ranges->nsorted == ranges->nvalues))
    1809         6001 :         return;
    1810              : 
    1811              :     /* we'll certainly need the comparator, so just look it up now */
    1812         3056 :     cmpFn = minmax_multi_get_strategy_procinfo(bdesc, ranges->attno, ranges->typid,
    1813              :                                                BTLessStrategyNumber);
    1814              : 
    1815              :     /* and we'll also need the 'distance' procedure */
    1816         3056 :     distanceFn = minmax_multi_get_procinfo(bdesc, ranges->attno, PROCNUM_DISTANCE);
    1817              : 
    1818              :     /*
    1819              :      * The distanceFn calls (which may internally call e.g. numeric_le) may
    1820              :      * allocate quite a bit of memory, and we must not leak it. Otherwise,
    1821              :      * we'd have problems e.g. when building indexes. So we create a local
    1822              :      * memory context and make sure we free the memory before leaving this
    1823              :      * function (not after every call).
    1824              :      */
    1825         3056 :     ctx = AllocSetContextCreate(CurrentMemoryContext,
    1826              :                                 "minmax-multi context",
    1827              :                                 ALLOCSET_DEFAULT_SIZES);
    1828              : 
    1829         3056 :     oldctx = MemoryContextSwitchTo(ctx);
    1830              : 
    1831              :     /* build the expanded ranges */
    1832         3056 :     eranges = build_expanded_ranges(cmpFn, ranges->colloid, ranges, &neranges);
    1833              : 
    1834              :     /* build array of gap distances and sort them in ascending order */
    1835         3056 :     distances = build_distances(distanceFn, ranges->colloid,
    1836              :                                 eranges, neranges);
    1837              : 
    1838              :     /*
    1839              :      * Combine ranges until we get below max_values. We don't use any scale
    1840              :      * factor, because this is used during serialization, and we don't expect
    1841              :      * more tuples to be inserted anytime soon.
    1842              :      */
    1843         3056 :     neranges = reduce_expanded_ranges(eranges, neranges, distances,
    1844              :                                       max_values, cmpFn, ranges->colloid);
    1845              : 
    1846              :     Assert(count_values(eranges, neranges) <= max_values);
    1847              : 
    1848              :     /* transform back into regular ranges and single values */
    1849         3056 :     store_expanded_ranges(ranges, eranges, neranges);
    1850              : 
    1851              :     /* check all the range invariants */
    1852         3056 :     AssertCheckRanges(ranges, cmpFn, ranges->colloid);
    1853              : 
    1854         3056 :     MemoryContextSwitchTo(oldctx);
    1855         3056 :     MemoryContextDelete(ctx);
    1856              : }
    1857              : 
    1858              : Datum
    1859         9754 : brin_minmax_multi_opcinfo(PG_FUNCTION_ARGS)
    1860              : {
    1861              :     BrinOpcInfo *result;
    1862              : 
    1863              :     /*
    1864              :      * opaque->strategy_procinfos is initialized lazily; here it is set to
    1865              :      * all-uninitialized by palloc0 which sets fn_oid to InvalidOid.
    1866              :      */
    1867              : 
    1868         9754 :     result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) +
    1869              :                      sizeof(MinmaxMultiOpaque));
    1870         9754 :     result->oi_nstored = 1;
    1871         9754 :     result->oi_regular_nulls = true;
    1872         9754 :     result->oi_opaque = (MinmaxMultiOpaque *)
    1873         9754 :         MAXALIGN((char *) result + SizeofBrinOpcInfo(1));
    1874         9754 :     result->oi_typcache[0] = lookup_type_cache(PG_BRIN_MINMAX_MULTI_SUMMARYOID, 0);
    1875              : 
    1876         9754 :     PG_RETURN_POINTER(result);
    1877              : }
    1878              : 
    1879              : /*
    1880              :  * Compute the distance between two float4 values (plain subtraction).
    1881              :  */
    1882              : Datum
    1883          354 : brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS)
    1884              : {
    1885          354 :     float       a1 = PG_GETARG_FLOAT4(0);
    1886          354 :     float       a2 = PG_GETARG_FLOAT4(1);
    1887              : 
    1888              :     /* if both values are NaN, then we consider them the same */
    1889          354 :     if (isnan(a1) && isnan(a2))
    1890            0 :         PG_RETURN_FLOAT8(0.0);
    1891              : 
    1892              :     /* if one value is NaN, use infinite distance */
    1893          354 :     if (isnan(a1) || isnan(a2))
    1894            3 :         PG_RETURN_FLOAT8(get_float8_infinity());
    1895              : 
    1896              :     /*
    1897              :      * We know the values are range boundaries, but the range may be collapsed
    1898              :      * (i.e. single points), with equal values.
    1899              :      */
    1900              :     Assert(a1 <= a2);
    1901              : 
    1902          351 :     PG_RETURN_FLOAT8((double) a2 - (double) a1);
    1903              : }
    1904              : 
    1905              : /*
    1906              :  * Compute the distance between two float8 values (plain subtraction).
    1907              :  */
    1908              : Datum
    1909          528 : brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS)
    1910              : {
    1911          528 :     double      a1 = PG_GETARG_FLOAT8(0);
    1912          528 :     double      a2 = PG_GETARG_FLOAT8(1);
    1913              : 
    1914              :     /* if both values are NaN, then we consider them the same */
    1915          528 :     if (isnan(a1) && isnan(a2))
    1916            0 :         PG_RETURN_FLOAT8(0.0);
    1917              : 
    1918              :     /* if one value is NaN, use infinite distance */
    1919          528 :     if (isnan(a1) || isnan(a2))
    1920            3 :         PG_RETURN_FLOAT8(get_float8_infinity());
    1921              : 
    1922              :     /*
    1923              :      * We know the values are range boundaries, but the range may be collapsed
    1924              :      * (i.e. single points), with equal values.
    1925              :      */
    1926              :     Assert(a1 <= a2);
    1927              : 
    1928          525 :     PG_RETURN_FLOAT8(a2 - a1);
    1929              : }
    1930              : 
    1931              : /*
    1932              :  * Compute the distance between two int2 values (plain subtraction).
    1933              :  */
    1934              : Datum
    1935          513 : brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS)
    1936              : {
    1937          513 :     int16       a1 = PG_GETARG_INT16(0);
    1938          513 :     int16       a2 = PG_GETARG_INT16(1);
    1939              : 
    1940              :     /*
    1941              :      * We know the values are range boundaries, but the range may be collapsed
    1942              :      * (i.e. single points), with equal values.
    1943              :      */
    1944              :     Assert(a1 <= a2);
    1945              : 
    1946          513 :     PG_RETURN_FLOAT8((double) a2 - (double) a1);
    1947              : }
    1948              : 
    1949              : /*
    1950              :  * Compute the distance between two int4 values (plain subtraction).
    1951              :  */
    1952              : Datum
    1953        43389 : brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS)
    1954              : {
    1955        43389 :     int32       a1 = PG_GETARG_INT32(0);
    1956        43389 :     int32       a2 = PG_GETARG_INT32(1);
    1957              : 
    1958              :     /*
    1959              :      * We know the values are range boundaries, but the range may be collapsed
    1960              :      * (i.e. single points), with equal values.
    1961              :      */
    1962              :     Assert(a1 <= a2);
    1963              : 
    1964        43389 :     PG_RETURN_FLOAT8((double) a2 - (double) a1);
    1965              : }
    1966              : 
    1967              : /*
    1968              :  * Compute the distance between two int8 values (plain subtraction).
    1969              :  */
    1970              : Datum
    1971         5611 : brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS)
    1972              : {
    1973         5611 :     int64       a1 = PG_GETARG_INT64(0);
    1974         5611 :     int64       a2 = PG_GETARG_INT64(1);
    1975              : 
    1976              :     /*
    1977              :      * We know the values are range boundaries, but the range may be collapsed
    1978              :      * (i.e. single points), with equal values.
    1979              :      */
    1980              :     Assert(a1 <= a2);
    1981              : 
    1982         5611 :     PG_RETURN_FLOAT8((double) a2 - (double) a1);
    1983              : }
    1984              : 
    1985              : /*
    1986              :  * Compute the distance between two tid values (by mapping them to float8 and
    1987              :  * then subtracting them).
    1988              :  */
    1989              : Datum
    1990          519 : brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS)
    1991              : {
    1992              :     double      da1,
    1993              :                 da2;
    1994              : 
    1995          519 :     ItemPointer pa1 = (ItemPointer) PG_GETARG_POINTER(0);
    1996          519 :     ItemPointer pa2 = (ItemPointer) PG_GETARG_POINTER(1);
    1997              : 
    1998              :     /*
    1999              :      * We know the values are range boundaries, but the range may be collapsed
    2000              :      * (i.e. single points), with equal values.
    2001              :      */
    2002              :     Assert(ItemPointerCompare(pa1, pa2) <= 0);
    2003              : 
    2004              :     /*
    2005              :      * We use the no-check variants here, because user-supplied values may
    2006              :      * have (ip_posid == 0). See ItemPointerCompare.
    2007              :      */
    2008          519 :     da1 = ItemPointerGetBlockNumberNoCheck(pa1) * MaxHeapTuplesPerPage +
    2009          519 :         ItemPointerGetOffsetNumberNoCheck(pa1);
    2010              : 
    2011          519 :     da2 = ItemPointerGetBlockNumberNoCheck(pa2) * MaxHeapTuplesPerPage +
    2012          519 :         ItemPointerGetOffsetNumberNoCheck(pa2);
    2013              : 
    2014          519 :     PG_RETURN_FLOAT8(da2 - da1);
    2015              : }
    2016              : 
    2017              : /*
    2018              :  * Compute the distance between two numeric values (plain subtraction).
    2019              :  */
    2020              : Datum
    2021          519 : brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS)
    2022              : {
    2023              :     Datum       d;
    2024          519 :     Datum       a1 = PG_GETARG_DATUM(0);
    2025          519 :     Datum       a2 = PG_GETARG_DATUM(1);
    2026              : 
    2027              :     /*
    2028              :      * We know the values are range boundaries, but the range may be collapsed
    2029              :      * (i.e. single points), with equal values.
    2030              :      */
    2031              :     Assert(DatumGetBool(DirectFunctionCall2(numeric_le, a1, a2)));
    2032              : 
    2033          519 :     d = DirectFunctionCall2(numeric_sub, a2, a1);   /* a2 - a1 */
    2034              : 
    2035          519 :     PG_RETURN_DATUM(DirectFunctionCall1(numeric_float8, d));
    2036              : }
    2037              : 
    2038              : /*
    2039              :  * Compute the approximate distance between two UUID values.
    2040              :  *
    2041              :  * XXX We do not need a perfectly accurate value, so we approximate the
    2042              :  * deltas (which would have to be 128-bit integers) with a 64-bit float.
    2043              :  * The small inaccuracies do not matter in practice, in the worst case
    2044              :  * we'll decide to merge ranges that are not the closest ones.
    2045              :  */
    2046              : Datum
    2047          864 : brin_minmax_multi_distance_uuid(PG_FUNCTION_ARGS)
    2048              : {
    2049              :     int         i;
    2050          864 :     float8      delta = 0;
    2051              : 
    2052          864 :     Datum       a1 = PG_GETARG_DATUM(0);
    2053          864 :     Datum       a2 = PG_GETARG_DATUM(1);
    2054              : 
    2055          864 :     pg_uuid_t  *u1 = DatumGetUUIDP(a1);
    2056          864 :     pg_uuid_t  *u2 = DatumGetUUIDP(a2);
    2057              : 
    2058              :     /*
    2059              :      * We know the values are range boundaries, but the range may be collapsed
    2060              :      * (i.e. single points), with equal values.
    2061              :      */
    2062              :     Assert(DatumGetBool(DirectFunctionCall2(uuid_le, a1, a2)));
    2063              : 
    2064              :     /* compute approximate delta as a double precision value */
    2065        14688 :     for (i = UUID_LEN - 1; i >= 0; i--)
    2066              :     {
    2067        13824 :         delta += (int) u2->data[i] - (int) u1->data[i];
    2068        13824 :         delta /= 256;
    2069              :     }
    2070              : 
    2071              :     Assert(delta >= 0);
    2072              : 
    2073          864 :     PG_RETURN_FLOAT8(delta);
    2074              : }
    2075              : 
    2076              : /*
    2077              :  * Compute the approximate distance between two dates.
    2078              :  */
    2079              : Datum
    2080          819 : brin_minmax_multi_distance_date(PG_FUNCTION_ARGS)
    2081              : {
    2082          819 :     float8      delta = 0;
    2083          819 :     DateADT     dateVal1 = PG_GETARG_DATEADT(0);
    2084          819 :     DateADT     dateVal2 = PG_GETARG_DATEADT(1);
    2085              : 
    2086          819 :     delta = (float8) dateVal2 - (float8) dateVal1;
    2087              : 
    2088              :     Assert(delta >= 0);
    2089              : 
    2090          819 :     PG_RETURN_FLOAT8(delta);
    2091              : }
    2092              : 
    2093              : /*
    2094              :  * Compute the approximate distance between two time (without tz) values.
    2095              :  *
    2096              :  * TimeADT is just an int64, so we simply subtract the values directly.
    2097              :  */
    2098              : Datum
    2099          513 : brin_minmax_multi_distance_time(PG_FUNCTION_ARGS)
    2100              : {
    2101          513 :     float8      delta = 0;
    2102              : 
    2103          513 :     TimeADT     ta = PG_GETARG_TIMEADT(0);
    2104          513 :     TimeADT     tb = PG_GETARG_TIMEADT(1);
    2105              : 
    2106          513 :     delta = (tb - ta);
    2107              : 
    2108              :     Assert(delta >= 0);
    2109              : 
    2110          513 :     PG_RETURN_FLOAT8(delta);
    2111              : }
    2112              : 
    2113              : /*
    2114              :  * Compute the approximate distance between two timetz values.
    2115              :  *
    2116              :  * Simply subtracts the TimeADT (int64) values embedded in TimeTzADT.
    2117              :  */
    2118              : Datum
    2119          399 : brin_minmax_multi_distance_timetz(PG_FUNCTION_ARGS)
    2120              : {
    2121          399 :     float8      delta = 0;
    2122              : 
    2123          399 :     TimeTzADT  *ta = PG_GETARG_TIMETZADT_P(0);
    2124          399 :     TimeTzADT  *tb = PG_GETARG_TIMETZADT_P(1);
    2125              : 
    2126          399 :     delta = (tb->time - ta->time) + (tb->zone - ta->zone) * USECS_PER_SEC;
    2127              : 
    2128              :     Assert(delta >= 0);
    2129              : 
    2130          399 :     PG_RETURN_FLOAT8(delta);
    2131              : }
    2132              : 
    2133              : /*
    2134              :  * Compute the distance between two timestamp values.
    2135              :  */
    2136              : Datum
    2137         1332 : brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS)
    2138              : {
    2139         1332 :     float8      delta = 0;
    2140              : 
    2141         1332 :     Timestamp   dt1 = PG_GETARG_TIMESTAMP(0);
    2142         1332 :     Timestamp   dt2 = PG_GETARG_TIMESTAMP(1);
    2143              : 
    2144         1332 :     delta = (float8) dt2 - (float8) dt1;
    2145              : 
    2146              :     Assert(delta >= 0);
    2147              : 
    2148         1332 :     PG_RETURN_FLOAT8(delta);
    2149              : }
    2150              : 
    2151              : /*
    2152              :  * Compute the distance between two interval values.
    2153              :  */
    2154              : Datum
    2155          768 : brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS)
    2156              : {
    2157          768 :     float8      delta = 0;
    2158              : 
    2159          768 :     Interval   *ia = PG_GETARG_INTERVAL_P(0);
    2160          768 :     Interval   *ib = PG_GETARG_INTERVAL_P(1);
    2161              : 
    2162              :     int64       dayfraction;
    2163              :     int64       days;
    2164              : 
    2165              :     /*
    2166              :      * Delta is (fractional) number of days between the intervals. Assume
    2167              :      * months have 30 days for consistency with interval_cmp_internal. We
    2168              :      * don't need to be exact, in the worst case we'll build a bit less
    2169              :      * efficient ranges. But we should not contradict interval_cmp.
    2170              :      */
    2171          768 :     dayfraction = (ib->time % USECS_PER_DAY) - (ia->time % USECS_PER_DAY);
    2172          768 :     days = (ib->time / USECS_PER_DAY) - (ia->time / USECS_PER_DAY);
    2173          768 :     days += (int64) ib->day - (int64) ia->day;
    2174          768 :     days += ((int64) ib->month - (int64) ia->month) * INT64CONST(30);
    2175              : 
    2176              :     /* convert to double precision */
    2177          768 :     delta = (double) days + dayfraction / (double) USECS_PER_DAY;
    2178              : 
    2179              :     Assert(delta >= 0);
    2180              : 
    2181          768 :     PG_RETURN_FLOAT8(delta);
    2182              : }
    2183              : 
    2184              : /*
    2185              :  * Compute the distance between two pg_lsn values.
    2186              :  *
    2187              :  * LSN is just an int64 encoding position in the stream, so just subtract
    2188              :  * those int64 values directly.
    2189              :  */
    2190              : Datum
    2191          519 : brin_minmax_multi_distance_pg_lsn(PG_FUNCTION_ARGS)
    2192              : {
    2193          519 :     float8      delta = 0;
    2194              : 
    2195          519 :     XLogRecPtr  lsna = PG_GETARG_LSN(0);
    2196          519 :     XLogRecPtr  lsnb = PG_GETARG_LSN(1);
    2197              : 
    2198          519 :     delta = (lsnb - lsna);
    2199              : 
    2200              :     Assert(delta >= 0);
    2201              : 
    2202          519 :     PG_RETURN_FLOAT8(delta);
    2203              : }
    2204              : 
    2205              : /*
    2206              :  * Compute the distance between two macaddr values.
    2207              :  *
    2208              :  * mac addresses are treated as 6 unsigned chars, so do the same thing we
    2209              :  * already do for UUID values.
    2210              :  */
    2211              : Datum
    2212          399 : brin_minmax_multi_distance_macaddr(PG_FUNCTION_ARGS)
    2213              : {
    2214              :     float8      delta;
    2215              : 
    2216          399 :     macaddr    *a = PG_GETARG_MACADDR_P(0);
    2217          399 :     macaddr    *b = PG_GETARG_MACADDR_P(1);
    2218              : 
    2219          399 :     delta = ((float8) b->f - (float8) a->f);
    2220          399 :     delta /= 256;
    2221              : 
    2222          399 :     delta += ((float8) b->e - (float8) a->e);
    2223          399 :     delta /= 256;
    2224              : 
    2225          399 :     delta += ((float8) b->d - (float8) a->d);
    2226          399 :     delta /= 256;
    2227              : 
    2228          399 :     delta += ((float8) b->c - (float8) a->c);
    2229          399 :     delta /= 256;
    2230              : 
    2231          399 :     delta += ((float8) b->b - (float8) a->b);
    2232          399 :     delta /= 256;
    2233              : 
    2234          399 :     delta += ((float8) b->a - (float8) a->a);
    2235          399 :     delta /= 256;
    2236              : 
    2237              :     Assert(delta >= 0);
    2238              : 
    2239          399 :     PG_RETURN_FLOAT8(delta);
    2240              : }
    2241              : 
    2242              : /*
    2243              :  * Compute the distance between two macaddr8 values.
    2244              :  *
    2245              :  * macaddr8 addresses are 8 unsigned chars, so do the same thing we
    2246              :  * already do for UUID values.
    2247              :  */
    2248              : Datum
    2249          519 : brin_minmax_multi_distance_macaddr8(PG_FUNCTION_ARGS)
    2250              : {
    2251              :     float8      delta;
    2252              : 
    2253          519 :     macaddr8   *a = PG_GETARG_MACADDR8_P(0);
    2254          519 :     macaddr8   *b = PG_GETARG_MACADDR8_P(1);
    2255              : 
    2256          519 :     delta = ((float8) b->h - (float8) a->h);
    2257          519 :     delta /= 256;
    2258              : 
    2259          519 :     delta += ((float8) b->g - (float8) a->g);
    2260          519 :     delta /= 256;
    2261              : 
    2262          519 :     delta += ((float8) b->f - (float8) a->f);
    2263          519 :     delta /= 256;
    2264              : 
    2265          519 :     delta += ((float8) b->e - (float8) a->e);
    2266          519 :     delta /= 256;
    2267              : 
    2268          519 :     delta += ((float8) b->d - (float8) a->d);
    2269          519 :     delta /= 256;
    2270              : 
    2271          519 :     delta += ((float8) b->c - (float8) a->c);
    2272          519 :     delta /= 256;
    2273              : 
    2274          519 :     delta += ((float8) b->b - (float8) a->b);
    2275          519 :     delta /= 256;
    2276              : 
    2277          519 :     delta += ((float8) b->a - (float8) a->a);
    2278          519 :     delta /= 256;
    2279              : 
    2280              :     Assert(delta >= 0);
    2281              : 
    2282          519 :     PG_RETURN_FLOAT8(delta);
    2283              : }
    2284              : 
    2285              : /*
    2286              :  * Compute the distance between two inet values.
    2287              :  *
    2288              :  * The distance is defined as the difference between 32-bit/128-bit values,
    2289              :  * depending on the IP version. The distance is computed by subtracting
    2290              :  * the bytes and normalizing it to [0,1] range for each IP family.
    2291              :  * Addresses from different families are considered to be in maximum
    2292              :  * distance, which is 1.0.
    2293              :  *
    2294              :  * XXX Does this need to consider the mask (bits)?  For now, it's ignored.
    2295              :  */
    2296              : Datum
    2297         1161 : brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS)
    2298              : {
    2299              :     float8      delta;
    2300              :     int         i;
    2301              :     int         len;
    2302              :     unsigned char *addra,
    2303              :                *addrb;
    2304              : 
    2305         1161 :     inet       *ipa = PG_GETARG_INET_PP(0);
    2306         1161 :     inet       *ipb = PG_GETARG_INET_PP(1);
    2307              : 
    2308              :     int         lena,
    2309              :                 lenb;
    2310              : 
    2311              :     /*
    2312              :      * If the addresses are from different families, consider them to be in
    2313              :      * maximal possible distance (which is 1.0).
    2314              :      */
    2315         1161 :     if (ip_family(ipa) != ip_family(ipb))
    2316           96 :         PG_RETURN_FLOAT8(1.0);
    2317              : 
    2318         1065 :     addra = (unsigned char *) palloc(ip_addrsize(ipa));
    2319         1065 :     memcpy(addra, ip_addr(ipa), ip_addrsize(ipa));
    2320              : 
    2321         1065 :     addrb = (unsigned char *) palloc(ip_addrsize(ipb));
    2322         1065 :     memcpy(addrb, ip_addr(ipb), ip_addrsize(ipb));
    2323              : 
    2324              :     /*
    2325              :      * The length is calculated from the mask length, because we sort the
    2326              :      * addresses by first address in the range, so A.B.C.D/24 < A.B.C.1 (the
    2327              :      * first range starts at A.B.C.0, which is before A.B.C.1). We don't want
    2328              :      * to produce a negative delta in this case, so we just cut the extra
    2329              :      * bytes.
    2330              :      *
    2331              :      * XXX Maybe this should be a bit more careful and cut the bits, not just
    2332              :      * whole bytes.
    2333              :      */
    2334         1065 :     lena = ip_bits(ipa);
    2335         1065 :     lenb = ip_bits(ipb);
    2336              : 
    2337         1065 :     len = ip_addrsize(ipa);
    2338              : 
    2339              :     /* apply the network mask to both addresses */
    2340         8061 :     for (i = 0; i < len; i++)
    2341              :     {
    2342              :         unsigned char mask;
    2343              :         int         nbits;
    2344              : 
    2345         6996 :         nbits = Max(0, lena - (i * 8));
    2346         6996 :         if (nbits < 8)
    2347              :         {
    2348          837 :             mask = (0xFF << (8 - nbits));
    2349          837 :             addra[i] = (addra[i] & mask);
    2350              :         }
    2351              : 
    2352         6996 :         nbits = Max(0, lenb - (i * 8));
    2353         6996 :         if (nbits < 8)
    2354              :         {
    2355          834 :             mask = (0xFF << (8 - nbits));
    2356          834 :             addrb[i] = (addrb[i] & mask);
    2357              :         }
    2358              :     }
    2359              : 
    2360              :     /* Calculate the difference between the addresses. */
    2361         1065 :     delta = 0;
    2362         8061 :     for (i = len - 1; i >= 0; i--)
    2363              :     {
    2364         6996 :         unsigned char a = addra[i];
    2365         6996 :         unsigned char b = addrb[i];
    2366              : 
    2367         6996 :         delta += (float8) b - (float8) a;
    2368         6996 :         delta /= 256;
    2369              :     }
    2370              : 
    2371              :     Assert((delta >= 0) && (delta <= 1));
    2372              : 
    2373         1065 :     pfree(addra);
    2374         1065 :     pfree(addrb);
    2375              : 
    2376         1065 :     PG_RETURN_FLOAT8(delta);
    2377              : }
    2378              : 
    2379              : static void
    2380         9057 : brin_minmax_multi_serialize(BrinDesc *bdesc, Datum src, Datum *dst)
    2381              : {
    2382         9057 :     Ranges     *ranges = (Ranges *) DatumGetPointer(src);
    2383              :     SerializedRanges *s;
    2384              : 
    2385              :     /*
    2386              :      * In batch mode, we need to compress the accumulated values to the
    2387              :      * actually requested number of values/ranges.
    2388              :      */
    2389         9057 :     compactify_ranges(bdesc, ranges, ranges->target_maxvalues);
    2390              : 
    2391              :     /* At this point everything has to be fully sorted. */
    2392              :     Assert(ranges->nsorted == ranges->nvalues);
    2393              : 
    2394         9057 :     s = brin_range_serialize(ranges);
    2395         9057 :     dst[0] = PointerGetDatum(s);
    2396         9057 : }
    2397              : 
    2398              : static int
    2399         2484 : brin_minmax_multi_get_values(BrinDesc *bdesc, MinMaxMultiOptions *opts)
    2400              : {
    2401         2484 :     return MinMaxMultiGetValuesPerRange(opts);
    2402              : }
    2403              : 
    2404              : /*
    2405              :  * Examine the given index tuple (which contains the partial status of a
    2406              :  * certain page range) by comparing it to the given value that comes from
    2407              :  * another heap tuple.  If the new value is outside the min/max range
    2408              :  * specified by the existing tuple values, update the index tuple and return
    2409              :  * true.  Otherwise, return false and do not modify in this case.
    2410              :  */
    2411              : Datum
    2412        70377 : brin_minmax_multi_add_value(PG_FUNCTION_ARGS)
    2413              : {
    2414        70377 :     BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
    2415        70377 :     BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
    2416        70377 :     Datum       newval = PG_GETARG_DATUM(2);
    2417        70377 :     bool        isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_BOOL(3);
    2418        70377 :     MinMaxMultiOptions *opts = (MinMaxMultiOptions *) PG_GET_OPCLASS_OPTIONS();
    2419        70377 :     Oid         colloid = PG_GET_COLLATION();
    2420        70377 :     bool        modified = false;
    2421              :     Form_pg_attribute attr;
    2422              :     AttrNumber  attno;
    2423              :     Ranges     *ranges;
    2424        70377 :     SerializedRanges *serialized = NULL;
    2425              : 
    2426              :     Assert(!isnull);
    2427              : 
    2428        70377 :     attno = column->bv_attno;
    2429        70377 :     attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
    2430              : 
    2431              :     /* use the already deserialized value, if possible */
    2432        70377 :     ranges = (Ranges *) DatumGetPointer(column->bv_mem_value);
    2433              : 
    2434              :     /*
    2435              :      * If this is the first non-null value, we need to initialize the range
    2436              :      * list. Otherwise, just extract the existing range list from BrinValues.
    2437              :      *
    2438              :      * When starting with an empty range, we assume this is a batch mode and
    2439              :      * we use a larger buffer. The buffer size is derived from the BRIN range
    2440              :      * size, number of rows per page, with some sensible min/max values. A
    2441              :      * small buffer would be bad for performance, but a large buffer might
    2442              :      * require a lot of memory (because of keeping all the values).
    2443              :      */
    2444        70377 :     if (column->bv_allnulls)
    2445              :     {
    2446              :         MemoryContext oldctx;
    2447              : 
    2448              :         int         target_maxvalues;
    2449              :         int         maxvalues;
    2450         2484 :         BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
    2451              : 
    2452              :         /* what was specified as a reloption? */
    2453         2484 :         target_maxvalues = brin_minmax_multi_get_values(bdesc, opts);
    2454              : 
    2455              :         /*
    2456              :          * Determine the insert buffer size - we use 10x the target, capped to
    2457              :          * the maximum number of values in the heap range. This is more than
    2458              :          * enough, considering the actual number of rows per page is likely
    2459              :          * much lower, but meh.
    2460              :          */
    2461         2484 :         maxvalues = Min(target_maxvalues * MINMAX_BUFFER_FACTOR,
    2462              :                         MaxHeapTuplesPerPage * pagesPerRange);
    2463              : 
    2464              :         /* but always at least the original value */
    2465         2484 :         maxvalues = Max(maxvalues, target_maxvalues);
    2466              : 
    2467              :         /* always cap by MIN/MAX */
    2468         2484 :         maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN);
    2469         2484 :         maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX);
    2470              : 
    2471         2484 :         oldctx = MemoryContextSwitchTo(column->bv_context);
    2472         2484 :         ranges = minmax_multi_init(maxvalues);
    2473         2484 :         ranges->attno = attno;
    2474         2484 :         ranges->colloid = colloid;
    2475         2484 :         ranges->typid = attr->atttypid;
    2476         2484 :         ranges->target_maxvalues = target_maxvalues;
    2477              : 
    2478              :         /* we'll certainly need the comparator, so just look it up now */
    2479         2484 :         ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
    2480              :                                                          BTLessStrategyNumber);
    2481              : 
    2482         2484 :         MemoryContextSwitchTo(oldctx);
    2483              : 
    2484         2484 :         column->bv_allnulls = false;
    2485         2484 :         modified = true;
    2486              : 
    2487         2484 :         column->bv_mem_value = PointerGetDatum(ranges);
    2488         2484 :         column->bv_serialize = brin_minmax_multi_serialize;
    2489              :     }
    2490        67893 :     else if (!ranges)
    2491              :     {
    2492              :         MemoryContext oldctx;
    2493              : 
    2494              :         int         maxvalues;
    2495         7110 :         BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
    2496              : 
    2497         7110 :         oldctx = MemoryContextSwitchTo(column->bv_context);
    2498              : 
    2499         7110 :         serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]);
    2500              : 
    2501              :         /*
    2502              :          * Determine the insert buffer size - we use 10x the target, capped to
    2503              :          * the maximum number of values in the heap range. This is more than
    2504              :          * enough, considering the actual number of rows per page is likely
    2505              :          * much lower, but meh.
    2506              :          */
    2507         7110 :         maxvalues = Min(serialized->maxvalues * MINMAX_BUFFER_FACTOR,
    2508              :                         MaxHeapTuplesPerPage * pagesPerRange);
    2509              : 
    2510              :         /* but always at least the original value */
    2511         7110 :         maxvalues = Max(maxvalues, serialized->maxvalues);
    2512              : 
    2513              :         /* always cap by MIN/MAX */
    2514         7110 :         maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN);
    2515         7110 :         maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX);
    2516              : 
    2517         7110 :         ranges = brin_range_deserialize(maxvalues, serialized);
    2518              : 
    2519         7110 :         ranges->attno = attno;
    2520         7110 :         ranges->colloid = colloid;
    2521         7110 :         ranges->typid = attr->atttypid;
    2522              : 
    2523              :         /* we'll certainly need the comparator, so just look it up now */
    2524         7110 :         ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
    2525              :                                                          BTLessStrategyNumber);
    2526              : 
    2527         7110 :         column->bv_mem_value = PointerGetDatum(ranges);
    2528         7110 :         column->bv_serialize = brin_minmax_multi_serialize;
    2529              : 
    2530         7110 :         MemoryContextSwitchTo(oldctx);
    2531              :     }
    2532              : 
    2533              :     /*
    2534              :      * Try to add the new value to the range. We need to update the modified
    2535              :      * flag, so that we serialize the updated summary later.
    2536              :      */
    2537        70377 :     modified |= range_add_value(bdesc, colloid, attno, attr, ranges, newval);
    2538              : 
    2539              : 
    2540        70377 :     PG_RETURN_BOOL(modified);
    2541              : }
    2542              : 
    2543              : /*
    2544              :  * Given an index tuple corresponding to a certain page range and a scan key,
    2545              :  * return whether the scan key is consistent with the index tuple's min/max
    2546              :  * values.  Return true if so, false otherwise.
    2547              :  */
    2548              : Datum
    2549        15693 : brin_minmax_multi_consistent(PG_FUNCTION_ARGS)
    2550              : {
    2551        15693 :     BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
    2552        15693 :     BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
    2553        15693 :     ScanKey    *keys = (ScanKey *) PG_GETARG_POINTER(2);
    2554        15693 :     int         nkeys = PG_GETARG_INT32(3);
    2555              : 
    2556        15693 :     Oid         colloid = PG_GET_COLLATION(),
    2557              :                 subtype;
    2558              :     AttrNumber  attno;
    2559              :     Datum       value;
    2560              :     FmgrInfo   *finfo;
    2561              :     SerializedRanges *serialized;
    2562              :     Ranges     *ranges;
    2563              :     int         keyno;
    2564              :     int         rangeno;
    2565              :     int         i;
    2566              : 
    2567        15693 :     attno = column->bv_attno;
    2568              : 
    2569        15693 :     serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]);
    2570        15693 :     ranges = brin_range_deserialize(serialized->maxvalues, serialized);
    2571              : 
    2572              :     /* inspect the ranges, and for each one evaluate the scan keys */
    2573        17004 :     for (rangeno = 0; rangeno < ranges->nranges; rangeno++)
    2574              :     {
    2575         1656 :         Datum       minval = ranges->values[2 * rangeno];
    2576         1656 :         Datum       maxval = ranges->values[2 * rangeno + 1];
    2577              : 
    2578              :         /* assume the range is matching, and we'll try to prove otherwise */
    2579         1656 :         bool        matching = true;
    2580              : 
    2581         2001 :         for (keyno = 0; keyno < nkeys; keyno++)
    2582              :         {
    2583              :             bool        matches;
    2584         1656 :             ScanKey     key = keys[keyno];
    2585              : 
    2586              :             /* NULL keys are handled and filtered-out in bringetbitmap */
    2587              :             Assert(!(key->sk_flags & SK_ISNULL));
    2588              : 
    2589         1656 :             attno = key->sk_attno;
    2590         1656 :             subtype = key->sk_subtype;
    2591         1656 :             value = key->sk_argument;
    2592         1656 :             switch (key->sk_strategy)
    2593              :             {
    2594          459 :                 case BTLessStrategyNumber:
    2595              :                 case BTLessEqualStrategyNumber:
    2596          459 :                     finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
    2597          459 :                                                                key->sk_strategy);
    2598              :                     /* first value from the array */
    2599          459 :                     matches = DatumGetBool(FunctionCall2Coll(finfo, colloid, minval, value));
    2600          459 :                     break;
    2601              : 
    2602          495 :                 case BTEqualStrategyNumber:
    2603              :                     {
    2604              :                         Datum       compar;
    2605              :                         FmgrInfo   *cmpFn;
    2606              : 
    2607              :                         /* by default this range does not match */
    2608          495 :                         matches = false;
    2609              : 
    2610              :                         /*
    2611              :                          * Otherwise, need to compare the new value with
    2612              :                          * boundaries of all the ranges. First check if it's
    2613              :                          * less than the absolute minimum, which is the first
    2614              :                          * value in the array.
    2615              :                          */
    2616          495 :                         cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
    2617              :                                                                    BTGreaterStrategyNumber);
    2618          495 :                         compar = FunctionCall2Coll(cmpFn, colloid, minval, value);
    2619              : 
    2620              :                         /* smaller than the smallest value in this range */
    2621          495 :                         if (DatumGetBool(compar))
    2622           60 :                             break;
    2623              : 
    2624          435 :                         cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
    2625              :                                                                    BTLessStrategyNumber);
    2626          435 :                         compar = FunctionCall2Coll(cmpFn, colloid, maxval, value);
    2627              : 
    2628              :                         /* larger than the largest value in this range */
    2629          435 :                         if (DatumGetBool(compar))
    2630          411 :                             break;
    2631              : 
    2632              :                         /*
    2633              :                          * We haven't managed to eliminate this range, so
    2634              :                          * consider it matching.
    2635              :                          */
    2636           24 :                         matches = true;
    2637              : 
    2638           24 :                         break;
    2639              :                     }
    2640          702 :                 case BTGreaterEqualStrategyNumber:
    2641              :                 case BTGreaterStrategyNumber:
    2642          702 :                     finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
    2643          702 :                                                                key->sk_strategy);
    2644              :                     /* last value from the array */
    2645          702 :                     matches = DatumGetBool(FunctionCall2Coll(finfo, colloid, maxval, value));
    2646          702 :                     break;
    2647              : 
    2648            0 :                 default:
    2649              :                     /* shouldn't happen */
    2650            0 :                     elog(ERROR, "invalid strategy number %d", key->sk_strategy);
    2651              :                     matches = false;
    2652              :                     break;
    2653              :             }
    2654              : 
    2655              :             /* the range has to match all the scan keys */
    2656         1656 :             matching &= matches;
    2657              : 
    2658              :             /* once we find a non-matching key, we're done */
    2659         1656 :             if (!matching)
    2660         1311 :                 break;
    2661              :         }
    2662              : 
    2663              :         /*
    2664              :          * have we found a range matching all scan keys? if yes, we're done
    2665              :          */
    2666         1656 :         if (matching)
    2667          345 :             PG_RETURN_BOOL(true);
    2668              :     }
    2669              : 
    2670              :     /*
    2671              :      * And now inspect the values. We don't bother with doing a binary search
    2672              :      * here, because we're dealing with serialized / fully compacted ranges,
    2673              :      * so there should be only very few values.
    2674              :      */
    2675        31138 :     for (i = 0; i < ranges->nvalues; i++)
    2676              :     {
    2677        27685 :         Datum       val = ranges->values[2 * ranges->nranges + i];
    2678              : 
    2679              :         /* assume the range is matching, and we'll try to prove otherwise */
    2680        27685 :         bool        matching = true;
    2681              : 
    2682        39580 :         for (keyno = 0; keyno < nkeys; keyno++)
    2683              :         {
    2684              :             bool        matches;
    2685        27685 :             ScanKey     key = keys[keyno];
    2686              : 
    2687              :             /* we've already dealt with NULL keys at the beginning */
    2688        27685 :             if (key->sk_flags & SK_ISNULL)
    2689            0 :                 continue;
    2690              : 
    2691        27685 :             attno = key->sk_attno;
    2692        27685 :             subtype = key->sk_subtype;
    2693        27685 :             value = key->sk_argument;
    2694        27685 :             switch (key->sk_strategy)
    2695              :             {
    2696        27685 :                 case BTLessStrategyNumber:
    2697              :                 case BTLessEqualStrategyNumber:
    2698              :                 case BTEqualStrategyNumber:
    2699              :                 case BTGreaterEqualStrategyNumber:
    2700              :                 case BTGreaterStrategyNumber:
    2701              : 
    2702        27685 :                     finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
    2703        27685 :                                                                key->sk_strategy);
    2704        27685 :                     matches = DatumGetBool(FunctionCall2Coll(finfo, colloid, val, value));
    2705        27685 :                     break;
    2706              : 
    2707            0 :                 default:
    2708              :                     /* shouldn't happen */
    2709            0 :                     elog(ERROR, "invalid strategy number %d", key->sk_strategy);
    2710              :                     matches = false;
    2711              :                     break;
    2712              :             }
    2713              : 
    2714              :             /* the range has to match all the scan keys */
    2715        27685 :             matching &= matches;
    2716              : 
    2717              :             /* once we find a non-matching key, we're done */
    2718        27685 :             if (!matching)
    2719        15790 :                 break;
    2720              :         }
    2721              : 
    2722              :         /* have we found a range matching all scan keys? if yes, we're done */
    2723        27685 :         if (matching)
    2724        11895 :             PG_RETURN_BOOL(true);
    2725              :     }
    2726              : 
    2727         3453 :     PG_RETURN_BOOL(false);
    2728              : }
    2729              : 
    2730              : /*
    2731              :  * Given two BrinValues, update the first of them as a union of the summary
    2732              :  * values contained in both.  The second one is untouched.
    2733              :  */
    2734              : Datum
    2735            6 : brin_minmax_multi_union(PG_FUNCTION_ARGS)
    2736              : {
    2737            6 :     BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
    2738            6 :     BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1);
    2739            6 :     BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2);
    2740              : 
    2741            6 :     Oid         colloid = PG_GET_COLLATION();
    2742              :     SerializedRanges *serialized_a;
    2743              :     SerializedRanges *serialized_b;
    2744              :     Ranges     *ranges_a;
    2745              :     Ranges     *ranges_b;
    2746              :     AttrNumber  attno;
    2747              :     Form_pg_attribute attr;
    2748              :     ExpandedRange *eranges;
    2749              :     int         neranges;
    2750              :     FmgrInfo   *cmpFn,
    2751              :                *distanceFn;
    2752              :     DistanceValue *distances;
    2753              :     MemoryContext ctx;
    2754              :     MemoryContext oldctx;
    2755              : 
    2756              :     Assert(col_a->bv_attno == col_b->bv_attno);
    2757              :     Assert(!col_a->bv_allnulls && !col_b->bv_allnulls);
    2758              : 
    2759            6 :     attno = col_a->bv_attno;
    2760            6 :     attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
    2761              : 
    2762            6 :     serialized_a = (SerializedRanges *) PG_DETOAST_DATUM(col_a->bv_values[0]);
    2763            6 :     serialized_b = (SerializedRanges *) PG_DETOAST_DATUM(col_b->bv_values[0]);
    2764              : 
    2765            6 :     ranges_a = brin_range_deserialize(serialized_a->maxvalues, serialized_a);
    2766            6 :     ranges_b = brin_range_deserialize(serialized_b->maxvalues, serialized_b);
    2767              : 
    2768              :     /* make sure neither of the ranges is NULL */
    2769              :     Assert(ranges_a && ranges_b);
    2770              : 
    2771            6 :     neranges = (ranges_a->nranges + ranges_a->nvalues) +
    2772            6 :         (ranges_b->nranges + ranges_b->nvalues);
    2773              : 
    2774              :     /*
    2775              :      * The distanceFn calls (which may internally call e.g. numeric_le) may
    2776              :      * allocate quite a bit of memory, and we must not leak it. Otherwise,
    2777              :      * we'd have problems e.g. when building indexes. So we create a local
    2778              :      * memory context and make sure we free the memory before leaving this
    2779              :      * function (not after every call).
    2780              :      */
    2781            6 :     ctx = AllocSetContextCreate(CurrentMemoryContext,
    2782              :                                 "minmax-multi context",
    2783              :                                 ALLOCSET_DEFAULT_SIZES);
    2784              : 
    2785            6 :     oldctx = MemoryContextSwitchTo(ctx);
    2786              : 
    2787              :     /* allocate and fill */
    2788            6 :     eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange));
    2789              : 
    2790              :     /* fill the expanded ranges with entries for the first range */
    2791            6 :     fill_expanded_ranges(eranges, ranges_a->nranges + ranges_a->nvalues,
    2792              :                          ranges_a);
    2793              : 
    2794              :     /* and now add combine ranges for the second range */
    2795            6 :     fill_expanded_ranges(&eranges[ranges_a->nranges + ranges_a->nvalues],
    2796            6 :                          ranges_b->nranges + ranges_b->nvalues,
    2797              :                          ranges_b);
    2798              : 
    2799            6 :     cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
    2800              :                                                BTLessStrategyNumber);
    2801              : 
    2802              :     /* sort the expanded ranges */
    2803            6 :     neranges = sort_expanded_ranges(cmpFn, colloid, eranges, neranges);
    2804              : 
    2805              :     /*
    2806              :      * We've loaded two different lists of expanded ranges, so some of them
    2807              :      * may be overlapping. So walk through them and merge them.
    2808              :      */
    2809            6 :     neranges = merge_overlapping_ranges(cmpFn, colloid, eranges, neranges);
    2810              : 
    2811              :     /* check that the combine ranges are correct (no overlaps, ordering) */
    2812            6 :     AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
    2813              : 
    2814              :     /*
    2815              :      * If needed, reduce some of the ranges.
    2816              :      *
    2817              :      * XXX This may be fairly expensive, so maybe we should do it only when
    2818              :      * it's actually needed (when we have too many ranges).
    2819              :      */
    2820              : 
    2821              :     /* build array of gap distances and sort them in ascending order */
    2822            6 :     distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE);
    2823            6 :     distances = build_distances(distanceFn, colloid, eranges, neranges);
    2824              : 
    2825              :     /*
    2826              :      * See how many values would be needed to store the current ranges, and if
    2827              :      * needed combine as many of them to get below the threshold. The
    2828              :      * collapsed ranges will be stored as a single value.
    2829              :      *
    2830              :      * XXX This does not apply the load factor, as we don't expect to add more
    2831              :      * values to the range, so we prefer to keep as many ranges as possible.
    2832              :      *
    2833              :      * XXX Can the maxvalues be different in the two ranges? Perhaps we should
    2834              :      * use maximum of those?
    2835              :      */
    2836            6 :     neranges = reduce_expanded_ranges(eranges, neranges, distances,
    2837              :                                       ranges_a->maxvalues,
    2838              :                                       cmpFn, colloid);
    2839              : 
    2840              :     /* Is the result of reducing expanded ranges correct? */
    2841            6 :     AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
    2842              : 
    2843              :     /* update the first range summary */
    2844            6 :     store_expanded_ranges(ranges_a, eranges, neranges);
    2845              : 
    2846            6 :     MemoryContextSwitchTo(oldctx);
    2847            6 :     MemoryContextDelete(ctx);
    2848              : 
    2849              :     /* cleanup and update the serialized value */
    2850            6 :     pfree(serialized_a);
    2851            6 :     col_a->bv_values[0] = PointerGetDatum(brin_range_serialize(ranges_a));
    2852              : 
    2853            6 :     PG_RETURN_VOID();
    2854              : }
    2855              : 
    2856              : /*
    2857              :  * Cache and return minmax multi opclass support procedure
    2858              :  *
    2859              :  * Return the procedure corresponding to the given function support number
    2860              :  * or null if it does not exist.
    2861              :  */
    2862              : static FmgrInfo *
    2863         3185 : minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum)
    2864              : {
    2865              :     MinmaxMultiOpaque *opaque;
    2866         3185 :     uint16      basenum = procnum - PROCNUM_BASE;
    2867              : 
    2868              :     /*
    2869              :      * We cache these in the opaque struct, to avoid repetitive syscache
    2870              :      * lookups.
    2871              :      */
    2872         3185 :     opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
    2873              : 
    2874         3185 :     if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid)
    2875              :     {
    2876          295 :         if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno,
    2877              :                                                 procnum)))
    2878          295 :             fmgr_info_copy(&opaque->extra_procinfos[basenum],
    2879              :                            index_getprocinfo(bdesc->bd_index, attno, procnum),
    2880              :                            bdesc->bd_context);
    2881              :         else
    2882            0 :             ereport(ERROR,
    2883              :                     errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
    2884              :                     errmsg_internal("invalid opclass definition"),
    2885              :                     errdetail_internal("The operator class is missing support function %d for column %d.",
    2886              :                                        procnum, attno));
    2887              :     }
    2888              : 
    2889         3185 :     return &opaque->extra_procinfos[basenum];
    2890              : }
    2891              : 
    2892              : /*
    2893              :  * Cache and return the procedure for the given strategy.
    2894              :  *
    2895              :  * Note: this function mirrors minmax_multi_get_strategy_procinfo; see notes
    2896              :  * there.  If changes are made here, see that function too.
    2897              :  */
    2898              : static FmgrInfo *
    2899       242031 : minmax_multi_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype,
    2900              :                                    uint16 strategynum)
    2901              : {
    2902              :     MinmaxMultiOpaque *opaque;
    2903              : 
    2904              :     Assert(strategynum >= 1 &&
    2905              :            strategynum <= BTMaxStrategyNumber);
    2906              : 
    2907       242031 :     opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
    2908              : 
    2909              :     /*
    2910              :      * We cache the procedures for the previous subtype in the opaque struct,
    2911              :      * to avoid repetitive syscache lookups.  If the subtype changed,
    2912              :      * invalidate all the cached entries.
    2913              :      */
    2914       242031 :     if (opaque->cached_subtype != subtype)
    2915              :     {
    2916              :         uint16      i;
    2917              : 
    2918         5892 :         for (i = 1; i <= BTMaxStrategyNumber; i++)
    2919         4910 :             opaque->strategy_procinfos[i - 1].fn_oid = InvalidOid;
    2920          982 :         opaque->cached_subtype = subtype;
    2921              :     }
    2922              : 
    2923       242031 :     if (opaque->strategy_procinfos[strategynum - 1].fn_oid == InvalidOid)
    2924              :     {
    2925              :         Form_pg_attribute attr;
    2926              :         HeapTuple   tuple;
    2927              :         Oid         opfamily,
    2928              :                     oprid;
    2929              : 
    2930         1439 :         opfamily = bdesc->bd_index->rd_opfamily[attno - 1];
    2931         1439 :         attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
    2932         1439 :         tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(opfamily),
    2933              :                                 ObjectIdGetDatum(attr->atttypid),
    2934              :                                 ObjectIdGetDatum(subtype),
    2935              :                                 UInt16GetDatum(strategynum));
    2936         1439 :         if (!HeapTupleIsValid(tuple))
    2937            0 :             elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
    2938              :                  strategynum, attr->atttypid, subtype, opfamily);
    2939              : 
    2940         1439 :         oprid = DatumGetObjectId(SysCacheGetAttrNotNull(AMOPSTRATEGY, tuple,
    2941              :                                                         Anum_pg_amop_amopopr));
    2942         1439 :         ReleaseSysCache(tuple);
    2943              :         Assert(RegProcedureIsValid(oprid));
    2944              : 
    2945         1439 :         fmgr_info_cxt(get_opcode(oprid),
    2946         1439 :                       &opaque->strategy_procinfos[strategynum - 1],
    2947              :                       bdesc->bd_context);
    2948              :     }
    2949              : 
    2950       242031 :     return &opaque->strategy_procinfos[strategynum - 1];
    2951              : }
    2952              : 
    2953              : Datum
    2954          712 : brin_minmax_multi_options(PG_FUNCTION_ARGS)
    2955              : {
    2956          712 :     local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
    2957              : 
    2958          712 :     init_local_reloptions(relopts, sizeof(MinMaxMultiOptions));
    2959              : 
    2960          712 :     add_local_int_reloption(relopts, "values_per_range", "desc",
    2961              :                             MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE, 8, 256,
    2962              :                             offsetof(MinMaxMultiOptions, valuesPerRange));
    2963              : 
    2964          712 :     PG_RETURN_VOID();
    2965              : }
    2966              : 
    2967              : /*
    2968              :  * brin_minmax_multi_summary_in
    2969              :  *      - input routine for type brin_minmax_multi_summary.
    2970              :  *
    2971              :  * brin_minmax_multi_summary is only used internally to represent summaries
    2972              :  * in BRIN minmax-multi indexes, so it has no operations of its own, and we
    2973              :  * disallow input too.
    2974              :  */
    2975              : Datum
    2976            0 : brin_minmax_multi_summary_in(PG_FUNCTION_ARGS)
    2977              : {
    2978              :     /*
    2979              :      * brin_minmax_multi_summary stores the data in binary form and parsing
    2980              :      * text input is not needed, so disallow this.
    2981              :      */
    2982            0 :     ereport(ERROR,
    2983              :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    2984              :              errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary")));
    2985              : 
    2986              :     PG_RETURN_VOID();           /* keep compiler quiet */
    2987              : }
    2988              : 
    2989              : 
    2990              : /*
    2991              :  * brin_minmax_multi_summary_out
    2992              :  *      - output routine for type brin_minmax_multi_summary.
    2993              :  *
    2994              :  * BRIN minmax-multi summaries are serialized into a bytea value, but we
    2995              :  * want to output something nicer humans can understand.
    2996              :  */
    2997              : Datum
    2998          120 : brin_minmax_multi_summary_out(PG_FUNCTION_ARGS)
    2999              : {
    3000              :     int         i;
    3001              :     int         idx;
    3002              :     SerializedRanges *ranges;
    3003              :     Ranges     *ranges_deserialized;
    3004              :     StringInfoData str;
    3005              :     bool        isvarlena;
    3006              :     Oid         outfunc;
    3007              :     FmgrInfo    fmgrinfo;
    3008          120 :     ArrayBuildState *astate_values = NULL;
    3009              : 
    3010          120 :     initStringInfo(&str);
    3011          120 :     appendStringInfoChar(&str, '{');
    3012              : 
    3013              :     /*
    3014              :      * Detoast to get value with full 4B header (can't be stored in a toast
    3015              :      * table, but can use 1B header).
    3016              :      */
    3017          120 :     ranges = (SerializedRanges *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
    3018              : 
    3019              :     /* lookup output func for the type */
    3020          120 :     getTypeOutputInfo(ranges->typid, &outfunc, &isvarlena);
    3021          120 :     fmgr_info(outfunc, &fmgrinfo);
    3022              : 
    3023              :     /* deserialize the range info easy-to-process pieces */
    3024          120 :     ranges_deserialized = brin_range_deserialize(ranges->maxvalues, ranges);
    3025              : 
    3026          120 :     appendStringInfo(&str, "nranges: %d  nvalues: %d  maxvalues: %d",
    3027              :                      ranges_deserialized->nranges,
    3028              :                      ranges_deserialized->nvalues,
    3029              :                      ranges_deserialized->maxvalues);
    3030              : 
    3031              :     /* serialize ranges */
    3032          120 :     idx = 0;
    3033          120 :     for (i = 0; i < ranges_deserialized->nranges; i++)
    3034              :     {
    3035              :         char       *a,
    3036              :                    *b;
    3037              :         text       *c;
    3038              :         StringInfoData buf;
    3039              : 
    3040            0 :         initStringInfo(&buf);
    3041              : 
    3042            0 :         a = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]);
    3043            0 :         b = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]);
    3044              : 
    3045            0 :         appendStringInfo(&buf, "%s ... %s", a, b);
    3046              : 
    3047            0 :         c = cstring_to_text_with_len(buf.data, buf.len);
    3048              : 
    3049            0 :         astate_values = accumArrayResult(astate_values,
    3050              :                                          PointerGetDatum(c),
    3051              :                                          false,
    3052              :                                          TEXTOID,
    3053              :                                          CurrentMemoryContext);
    3054              :     }
    3055              : 
    3056          120 :     if (ranges_deserialized->nranges > 0)
    3057              :     {
    3058              :         Oid         typoutput;
    3059              :         bool        typIsVarlena;
    3060              :         Datum       val;
    3061              :         char       *extval;
    3062              : 
    3063            0 :         getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena);
    3064              : 
    3065            0 :         val = makeArrayResult(astate_values, CurrentMemoryContext);
    3066              : 
    3067            0 :         extval = OidOutputFunctionCall(typoutput, val);
    3068              : 
    3069            0 :         appendStringInfo(&str, " ranges: %s", extval);
    3070              :     }
    3071              : 
    3072              :     /* serialize individual values */
    3073          120 :     astate_values = NULL;
    3074              : 
    3075         1296 :     for (i = 0; i < ranges_deserialized->nvalues; i++)
    3076              :     {
    3077              :         Datum       a;
    3078              :         text       *b;
    3079              : 
    3080         1176 :         a = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]);
    3081         1176 :         b = cstring_to_text(DatumGetCString(a));
    3082              : 
    3083         1176 :         astate_values = accumArrayResult(astate_values,
    3084              :                                          PointerGetDatum(b),
    3085              :                                          false,
    3086              :                                          TEXTOID,
    3087              :                                          CurrentMemoryContext);
    3088              :     }
    3089              : 
    3090          120 :     if (ranges_deserialized->nvalues > 0)
    3091              :     {
    3092              :         Oid         typoutput;
    3093              :         bool        typIsVarlena;
    3094              :         Datum       val;
    3095              :         char       *extval;
    3096              : 
    3097          120 :         getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena);
    3098              : 
    3099          120 :         val = makeArrayResult(astate_values, CurrentMemoryContext);
    3100              : 
    3101          120 :         extval = OidOutputFunctionCall(typoutput, val);
    3102              : 
    3103          120 :         appendStringInfo(&str, " values: %s", extval);
    3104              :     }
    3105              : 
    3106              : 
    3107          120 :     appendStringInfoChar(&str, '}');
    3108              : 
    3109          120 :     PG_RETURN_CSTRING(str.data);
    3110              : }
    3111              : 
    3112              : /*
    3113              :  * brin_minmax_multi_summary_recv
    3114              :  *      - binary input routine for type brin_minmax_multi_summary.
    3115              :  */
    3116              : Datum
    3117            0 : brin_minmax_multi_summary_recv(PG_FUNCTION_ARGS)
    3118              : {
    3119            0 :     ereport(ERROR,
    3120              :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    3121              :              errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary")));
    3122              : 
    3123              :     PG_RETURN_VOID();           /* keep compiler quiet */
    3124              : }
    3125              : 
    3126              : /*
    3127              :  * brin_minmax_multi_summary_send
    3128              :  *      - binary output routine for type brin_minmax_multi_summary.
    3129              :  *
    3130              :  * BRIN minmax-multi summaries are serialized in a bytea value (although
    3131              :  * the type is named differently), so let's just send that.
    3132              :  */
    3133              : Datum
    3134            0 : brin_minmax_multi_summary_send(PG_FUNCTION_ARGS)
    3135              : {
    3136            0 :     return byteasend(fcinfo);
    3137              : }
        

Generated by: LCOV version 2.0-1