LCOV - code coverage report
Current view: top level - contrib/intarray - _int_selfuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 79 98 80.6 %
Date: 2026-02-09 22:18:06 Functions: 13 16 81.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * _int_selfuncs.c
       4             :  *    Functions for selectivity estimation of intarray operators
       5             :  *
       6             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    contrib/intarray/_int_selfuncs.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "_int.h"
      18             : #include "access/htup_details.h"
      19             : #include "catalog/pg_operator.h"
      20             : #include "catalog/pg_statistic.h"
      21             : #include "catalog/pg_type.h"
      22             : #include "commands/extension.h"
      23             : #include "miscadmin.h"
      24             : #include "utils/fmgrprotos.h"
      25             : #include "utils/lsyscache.h"
      26             : #include "utils/selfuncs.h"
      27             : 
      28           4 : PG_FUNCTION_INFO_V1(_int_overlap_sel);
      29           4 : PG_FUNCTION_INFO_V1(_int_contains_sel);
      30           4 : PG_FUNCTION_INFO_V1(_int_contained_sel);
      31           2 : PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
      32           2 : PG_FUNCTION_INFO_V1(_int_contains_joinsel);
      33           2 : PG_FUNCTION_INFO_V1(_int_contained_joinsel);
      34           4 : PG_FUNCTION_INFO_V1(_int_matchsel);
      35             : 
      36             : 
      37             : static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
      38             :                                        int nmcelems, float4 minfreq);
      39             : static int  compare_val_int4(const void *a, const void *b);
      40             : 
      41             : /*
      42             :  * Wrappers around the default array selectivity estimation functions.
      43             :  *
      44             :  * The default array selectivity operators for the @>, && and @< operators
      45             :  * work fine for integer arrays. However, if we tried to just use arraycontsel
      46             :  * and arraycontjoinsel directly as the cost estimator functions for our
      47             :  * operators, they would not work as intended, because they look at the
      48             :  * operator's OID. Our operators behave exactly like the built-in anyarray
      49             :  * versions, but we must tell the cost estimator functions which built-in
      50             :  * operators they correspond to. These wrappers just replace the operator
      51             :  * OID with the corresponding built-in operator's OID, and call the built-in
      52             :  * function.
      53             :  */
      54             : 
      55             : Datum
      56          14 : _int_overlap_sel(PG_FUNCTION_ARGS)
      57             : {
      58          14 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      59             :                                         PG_GETARG_DATUM(0),
      60             :                                         ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
      61             :                                         PG_GETARG_DATUM(2),
      62             :                                         PG_GETARG_DATUM(3)));
      63             : }
      64             : 
      65             : Datum
      66          56 : _int_contains_sel(PG_FUNCTION_ARGS)
      67             : {
      68          56 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      69             :                                         PG_GETARG_DATUM(0),
      70             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
      71             :                                         PG_GETARG_DATUM(2),
      72             :                                         PG_GETARG_DATUM(3)));
      73             : }
      74             : 
      75             : Datum
      76          14 : _int_contained_sel(PG_FUNCTION_ARGS)
      77             : {
      78          14 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      79             :                                         PG_GETARG_DATUM(0),
      80             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
      81             :                                         PG_GETARG_DATUM(2),
      82             :                                         PG_GETARG_DATUM(3)));
      83             : }
      84             : 
      85             : Datum
      86           0 : _int_overlap_joinsel(PG_FUNCTION_ARGS)
      87             : {
      88           0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
      89             :                                         PG_GETARG_DATUM(0),
      90             :                                         ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
      91             :                                         PG_GETARG_DATUM(2),
      92             :                                         PG_GETARG_DATUM(3),
      93             :                                         PG_GETARG_DATUM(4)));
      94             : }
      95             : 
      96             : Datum
      97           0 : _int_contains_joinsel(PG_FUNCTION_ARGS)
      98             : {
      99           0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
     100             :                                         PG_GETARG_DATUM(0),
     101             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
     102             :                                         PG_GETARG_DATUM(2),
     103             :                                         PG_GETARG_DATUM(3),
     104             :                                         PG_GETARG_DATUM(4)));
     105             : }
     106             : 
     107             : Datum
     108           0 : _int_contained_joinsel(PG_FUNCTION_ARGS)
     109             : {
     110           0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
     111             :                                         PG_GETARG_DATUM(0),
     112             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
     113             :                                         PG_GETARG_DATUM(2),
     114             :                                         PG_GETARG_DATUM(3),
     115             :                                         PG_GETARG_DATUM(4)));
     116             : }
     117             : 
     118             : 
     119             : /*
     120             :  * _int_matchsel -- restriction selectivity function for intarray @@ query_int
     121             :  */
     122             : Datum
     123          98 : _int_matchsel(PG_FUNCTION_ARGS)
     124             : {
     125          98 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     126             : 
     127          98 :     List       *args = (List *) PG_GETARG_POINTER(2);
     128          98 :     int         varRelid = PG_GETARG_INT32(3);
     129             :     VariableStatData vardata;
     130             :     Node       *other;
     131             :     bool        varonleft;
     132             :     Selectivity selec;
     133             :     QUERYTYPE  *query;
     134          98 :     Datum      *mcelems = NULL;
     135          98 :     float4     *mcefreqs = NULL;
     136          98 :     int         nmcelems = 0;
     137          98 :     float4      minfreq = 0.0;
     138          98 :     float4      nullfrac = 0.0;
     139             :     AttStatsSlot sslot;
     140             : 
     141             :     /*
     142             :      * If expression is not "variable @@ something" or "something @@ variable"
     143             :      * then punt and return a default estimate.
     144             :      */
     145          98 :     if (!get_restriction_variable(root, args, varRelid,
     146             :                                   &vardata, &other, &varonleft))
     147           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     148             : 
     149             :     /*
     150             :      * Variable should be int[]. We don't support cases where variable is
     151             :      * query_int.
     152             :      */
     153          98 :     if (vardata.vartype != INT4ARRAYOID)
     154           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     155             : 
     156             :     /*
     157             :      * Can't do anything useful if the something is not a constant, either.
     158             :      */
     159          98 :     if (!IsA(other, Const))
     160             :     {
     161           0 :         ReleaseVariableStats(vardata);
     162           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     163             :     }
     164             : 
     165             :     /*
     166             :      * The "@@" operator is strict, so we can cope with NULL right away.
     167             :      */
     168          98 :     if (((Const *) other)->constisnull)
     169             :     {
     170           0 :         ReleaseVariableStats(vardata);
     171           0 :         PG_RETURN_FLOAT8(0.0);
     172             :     }
     173             : 
     174             :     /*
     175             :      * Verify that the Const is a query_int, else return a default estimate.
     176             :      * (This could only fail if someone attached this estimator to the wrong
     177             :      * operator.)
     178             :      */
     179          98 :     if (((Const *) other)->consttype !=
     180          98 :         get_function_sibling_type(fcinfo->flinfo->fn_oid, "query_int"))
     181             :     {
     182           0 :         ReleaseVariableStats(vardata);
     183           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     184             :     }
     185             : 
     186          98 :     query = DatumGetQueryTypeP(((Const *) other)->constvalue);
     187             : 
     188             :     /* Empty query matches nothing */
     189          98 :     if (query->size == 0)
     190             :     {
     191           0 :         ReleaseVariableStats(vardata);
     192           0 :         PG_RETURN_FLOAT8(0.0);
     193             :     }
     194             : 
     195             :     /*
     196             :      * Get the statistics for the intarray column.
     197             :      *
     198             :      * We're interested in the Most-Common-Elements list, and the NULL
     199             :      * fraction.
     200             :      */
     201          98 :     if (HeapTupleIsValid(vardata.statsTuple))
     202             :     {
     203             :         Form_pg_statistic stats;
     204             : 
     205          86 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     206          86 :         nullfrac = stats->stanullfrac;
     207             : 
     208             :         /*
     209             :          * For an int4 array, the default array type analyze function will
     210             :          * collect a Most Common Elements list, which is an array of int4s.
     211             :          */
     212          86 :         if (get_attstatsslot(&sslot, vardata.statsTuple,
     213             :                              STATISTIC_KIND_MCELEM, InvalidOid,
     214             :                              ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
     215             :         {
     216             :             Assert(sslot.valuetype == INT4OID);
     217             : 
     218             :             /*
     219             :              * There should be three more Numbers than Values, because the
     220             :              * last three (for intarray) cells are taken for minimal, maximal
     221             :              * and nulls frequency. Punt if not.
     222             :              */
     223          86 :             if (sslot.nnumbers == sslot.nvalues + 3)
     224             :             {
     225             :                 /* Grab the minimal MCE frequency. */
     226          86 :                 minfreq = sslot.numbers[sslot.nvalues];
     227             : 
     228          86 :                 mcelems = sslot.values;
     229          86 :                 mcefreqs = sslot.numbers;
     230          86 :                 nmcelems = sslot.nvalues;
     231             :             }
     232             :         }
     233             :     }
     234             :     else
     235          12 :         memset(&sslot, 0, sizeof(sslot));
     236             : 
     237             :     /* Process the logical expression in the query, using the stats */
     238          98 :     selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
     239             :                                 mcelems, mcefreqs, nmcelems, minfreq);
     240             : 
     241             :     /* MCE stats count only non-null rows, so adjust for null rows. */
     242          98 :     selec *= (1.0 - nullfrac);
     243             : 
     244          98 :     free_attstatsslot(&sslot);
     245          98 :     ReleaseVariableStats(vardata);
     246             : 
     247          98 :     CLAMP_PROBABILITY(selec);
     248             : 
     249          98 :     PG_RETURN_FLOAT8((float8) selec);
     250             : }
     251             : 
     252             : /*
     253             :  * Estimate selectivity of single intquery operator
     254             :  */
     255             : static Selectivity
     256         434 : int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
     257             :                     int nmcelems, float4 minfreq)
     258             : {
     259             :     Selectivity selec;
     260             : 
     261             :     /* since this function recurses, it could be driven to stack overflow */
     262         434 :     check_stack_depth();
     263             : 
     264         434 :     if (item->type == VAL)
     265             :     {
     266             :         Datum      *searchres;
     267             : 
     268         238 :         if (mcelems == NULL)
     269          28 :             return (Selectivity) DEFAULT_EQ_SEL;
     270             : 
     271         210 :         searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
     272             :                                       sizeof(Datum), compare_val_int4);
     273         210 :         if (searchres)
     274             :         {
     275             :             /*
     276             :              * The element is in MCELEM.  Return precise selectivity (or at
     277             :              * least as precise as ANALYZE could find out).
     278             :              */
     279         182 :             selec = mcefreqs[searchres - mcelems];
     280             :         }
     281             :         else
     282             :         {
     283             :             /*
     284             :              * The element is not in MCELEM.  Estimate its frequency as half
     285             :              * that of the least-frequent MCE.  (We know it cannot be more
     286             :              * than minfreq, and it could be a great deal less.  Half seems
     287             :              * like a good compromise.)  For probably-historical reasons,
     288             :              * clamp to not more than DEFAULT_EQ_SEL.
     289             :              */
     290          28 :             selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
     291             :         }
     292             :     }
     293         196 :     else if (item->type == OPR)
     294             :     {
     295             :         /* Current query node is an operator */
     296             :         Selectivity s1,
     297             :                     s2;
     298             : 
     299         196 :         s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
     300             :                                  minfreq);
     301         196 :         switch (item->val)
     302             :         {
     303          56 :             case (int32) '!':
     304          56 :                 selec = 1.0 - s1;
     305          56 :                 break;
     306             : 
     307          84 :             case (int32) '&':
     308          84 :                 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
     309             :                                          nmcelems, minfreq);
     310          84 :                 selec = s1 * s2;
     311          84 :                 break;
     312             : 
     313          56 :             case (int32) '|':
     314          56 :                 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
     315             :                                          nmcelems, minfreq);
     316          56 :                 selec = s1 + s2 - s1 * s2;
     317          56 :                 break;
     318             : 
     319           0 :             default:
     320           0 :                 elog(ERROR, "unrecognized operator: %d", item->val);
     321             :                 selec = 0;      /* keep compiler quiet */
     322             :                 break;
     323             :         }
     324             :     }
     325             :     else
     326             :     {
     327           0 :         elog(ERROR, "unrecognized int query item type: %u", item->type);
     328             :         selec = 0;              /* keep compiler quiet */
     329             :     }
     330             : 
     331             :     /* Clamp intermediate results to stay sane despite roundoff error */
     332         406 :     CLAMP_PROBABILITY(selec);
     333             : 
     334         406 :     return selec;
     335             : }
     336             : 
     337             : /*
     338             :  * Comparison function for binary search in mcelem array.
     339             :  */
     340             : static int
     341        1494 : compare_val_int4(const void *a, const void *b)
     342             : {
     343        1494 :     int32       key = *(const int32 *) a;
     344        1494 :     int32       value = DatumGetInt32(*(const Datum *) b);
     345             : 
     346        1494 :     if (key < value)
     347         782 :         return -1;
     348         712 :     else if (key > value)
     349         530 :         return 1;
     350             :     else
     351         182 :         return 0;
     352             : }

Generated by: LCOV version 1.16