LCOV - code coverage report
Current view: top level - contrib/intarray - _int_selfuncs.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 79.8 % 99 79
Test Date: 2026-05-28 09:16:21 Functions: 81.2 % 16 13
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * _int_selfuncs.c
       4              :  *    Functions for selectivity estimation of intarray operators
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    contrib/intarray/_int_selfuncs.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "_int.h"
      18              : #include "access/htup_details.h"
      19              : #include "catalog/pg_operator.h"
      20              : #include "catalog/pg_statistic.h"
      21              : #include "catalog/pg_type.h"
      22              : #include "commands/extension.h"
      23              : #include "miscadmin.h"
      24              : #include "utils/fmgrprotos.h"
      25              : #include "utils/lsyscache.h"
      26              : #include "utils/selfuncs.h"
      27              : 
      28            2 : PG_FUNCTION_INFO_V1(_int_overlap_sel);
      29            2 : PG_FUNCTION_INFO_V1(_int_contains_sel);
      30            2 : PG_FUNCTION_INFO_V1(_int_contained_sel);
      31            1 : PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
      32            1 : PG_FUNCTION_INFO_V1(_int_contains_joinsel);
      33            1 : PG_FUNCTION_INFO_V1(_int_contained_joinsel);
      34            2 : PG_FUNCTION_INFO_V1(_int_matchsel);
      35              : 
      36              : 
      37              : static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
      38              :                                        int nmcelems, float4 minfreq);
      39              : static int  compare_val_int4(const void *a, const void *b);
      40              : 
      41              : /*
      42              :  * Wrappers around the default array selectivity estimation functions.
      43              :  *
      44              :  * The default array selectivity operators for the @>, && and @< operators
      45              :  * work fine for integer arrays. However, if we tried to just use arraycontsel
      46              :  * and arraycontjoinsel directly as the cost estimator functions for our
      47              :  * operators, they would not work as intended, because they look at the
      48              :  * operator's OID. Our operators behave exactly like the built-in anyarray
      49              :  * versions, but we must tell the cost estimator functions which built-in
      50              :  * operators they correspond to. These wrappers just replace the operator
      51              :  * OID with the corresponding built-in operator's OID, and call the built-in
      52              :  * function.
      53              :  */
      54              : 
      55              : Datum
      56            7 : _int_overlap_sel(PG_FUNCTION_ARGS)
      57              : {
      58            7 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      59              :                                         PG_GETARG_DATUM(0),
      60              :                                         ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
      61              :                                         PG_GETARG_DATUM(2),
      62              :                                         PG_GETARG_DATUM(3)));
      63              : }
      64              : 
      65              : Datum
      66           28 : _int_contains_sel(PG_FUNCTION_ARGS)
      67              : {
      68           28 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      69              :                                         PG_GETARG_DATUM(0),
      70              :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
      71              :                                         PG_GETARG_DATUM(2),
      72              :                                         PG_GETARG_DATUM(3)));
      73              : }
      74              : 
      75              : Datum
      76            7 : _int_contained_sel(PG_FUNCTION_ARGS)
      77              : {
      78            7 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      79              :                                         PG_GETARG_DATUM(0),
      80              :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
      81              :                                         PG_GETARG_DATUM(2),
      82              :                                         PG_GETARG_DATUM(3)));
      83              : }
      84              : 
      85              : Datum
      86            0 : _int_overlap_joinsel(PG_FUNCTION_ARGS)
      87              : {
      88            0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
      89              :                                         PG_GETARG_DATUM(0),
      90              :                                         ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
      91              :                                         PG_GETARG_DATUM(2),
      92              :                                         PG_GETARG_DATUM(3),
      93              :                                         PG_GETARG_DATUM(4)));
      94              : }
      95              : 
      96              : Datum
      97            0 : _int_contains_joinsel(PG_FUNCTION_ARGS)
      98              : {
      99            0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
     100              :                                         PG_GETARG_DATUM(0),
     101              :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
     102              :                                         PG_GETARG_DATUM(2),
     103              :                                         PG_GETARG_DATUM(3),
     104              :                                         PG_GETARG_DATUM(4)));
     105              : }
     106              : 
     107              : Datum
     108            0 : _int_contained_joinsel(PG_FUNCTION_ARGS)
     109              : {
     110            0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
     111              :                                         PG_GETARG_DATUM(0),
     112              :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
     113              :                                         PG_GETARG_DATUM(2),
     114              :                                         PG_GETARG_DATUM(3),
     115              :                                         PG_GETARG_DATUM(4)));
     116              : }
     117              : 
     118              : 
     119              : /*
     120              :  * _int_matchsel -- restriction selectivity function for intarray @@ query_int
     121              :  */
     122              : Datum
     123           49 : _int_matchsel(PG_FUNCTION_ARGS)
     124              : {
     125           49 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     126              : 
     127           49 :     List       *args = (List *) PG_GETARG_POINTER(2);
     128           49 :     int         varRelid = PG_GETARG_INT32(3);
     129              :     VariableStatData vardata;
     130              :     Node       *other;
     131              :     bool        varonleft;
     132              :     Selectivity selec;
     133              :     QUERYTYPE  *query;
     134           49 :     Datum      *mcelems = NULL;
     135           49 :     float4     *mcefreqs = NULL;
     136           49 :     int         nmcelems = 0;
     137           49 :     float4      minfreq = 0.0;
     138           49 :     float4      nullfrac = 0.0;
     139              :     AttStatsSlot sslot;
     140              : 
     141              :     /*
     142              :      * If expression is not "variable @@ something" or "something @@ variable"
     143              :      * then punt and return a default estimate.
     144              :      */
     145           49 :     if (!get_restriction_variable(root, args, varRelid,
     146              :                                   &vardata, &other, &varonleft))
     147            0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     148              : 
     149              :     /*
     150              :      * Variable should be int[]. We don't support cases where variable is
     151              :      * query_int.
     152              :      */
     153           49 :     if (vardata.vartype != INT4ARRAYOID)
     154              :     {
     155            0 :         ReleaseVariableStats(vardata);
     156            0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     157              :     }
     158              : 
     159              :     /*
     160              :      * Can't do anything useful if the something is not a constant, either.
     161              :      */
     162           49 :     if (!IsA(other, Const))
     163              :     {
     164            0 :         ReleaseVariableStats(vardata);
     165            0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     166              :     }
     167              : 
     168              :     /*
     169              :      * The "@@" operator is strict, so we can cope with NULL right away.
     170              :      */
     171           49 :     if (((Const *) other)->constisnull)
     172              :     {
     173            0 :         ReleaseVariableStats(vardata);
     174            0 :         PG_RETURN_FLOAT8(0.0);
     175              :     }
     176              : 
     177              :     /*
     178              :      * Verify that the Const is a query_int, else return a default estimate.
     179              :      * (This could only fail if someone attached this estimator to the wrong
     180              :      * operator.)
     181              :      */
     182           49 :     if (((Const *) other)->consttype !=
     183           49 :         get_function_sibling_type(fcinfo->flinfo->fn_oid, "query_int"))
     184              :     {
     185            0 :         ReleaseVariableStats(vardata);
     186            0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     187              :     }
     188              : 
     189           49 :     query = DatumGetQueryTypeP(((Const *) other)->constvalue);
     190              : 
     191              :     /* Empty query matches nothing */
     192           49 :     if (query->size == 0)
     193              :     {
     194            0 :         ReleaseVariableStats(vardata);
     195            0 :         PG_RETURN_FLOAT8(0.0);
     196              :     }
     197              : 
     198              :     /*
     199              :      * Get the statistics for the intarray column.
     200              :      *
     201              :      * We're interested in the Most-Common-Elements list, and the NULL
     202              :      * fraction.
     203              :      */
     204           49 :     if (HeapTupleIsValid(vardata.statsTuple))
     205              :     {
     206              :         Form_pg_statistic stats;
     207              : 
     208           43 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     209           43 :         nullfrac = stats->stanullfrac;
     210              : 
     211              :         /*
     212              :          * For an int4 array, the default array type analyze function will
     213              :          * collect a Most Common Elements list, which is an array of int4s.
     214              :          */
     215           43 :         if (get_attstatsslot(&sslot, vardata.statsTuple,
     216              :                              STATISTIC_KIND_MCELEM, InvalidOid,
     217              :                              ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
     218              :         {
     219              :             Assert(sslot.valuetype == INT4OID);
     220              : 
     221              :             /*
     222              :              * There should be three more Numbers than Values, because the
     223              :              * last three (for intarray) cells are taken for minimal, maximal
     224              :              * and nulls frequency. Punt if not.
     225              :              */
     226           43 :             if (sslot.nnumbers == sslot.nvalues + 3)
     227              :             {
     228              :                 /* Grab the minimal MCE frequency. */
     229           43 :                 minfreq = sslot.numbers[sslot.nvalues];
     230              : 
     231           43 :                 mcelems = sslot.values;
     232           43 :                 mcefreqs = sslot.numbers;
     233           43 :                 nmcelems = sslot.nvalues;
     234              :             }
     235              :         }
     236              :     }
     237              :     else
     238            6 :         memset(&sslot, 0, sizeof(sslot));
     239              : 
     240              :     /* Process the logical expression in the query, using the stats */
     241           49 :     selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
     242              :                                 mcelems, mcefreqs, nmcelems, minfreq);
     243              : 
     244              :     /* MCE stats count only non-null rows, so adjust for null rows. */
     245           49 :     selec *= (1.0 - nullfrac);
     246              : 
     247           49 :     free_attstatsslot(&sslot);
     248           49 :     ReleaseVariableStats(vardata);
     249              : 
     250           49 :     CLAMP_PROBABILITY(selec);
     251              : 
     252           49 :     PG_RETURN_FLOAT8((float8) selec);
     253              : }
     254              : 
     255              : /*
     256              :  * Estimate selectivity of single intquery operator
     257              :  */
     258              : static Selectivity
     259          217 : int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
     260              :                     int nmcelems, float4 minfreq)
     261              : {
     262              :     Selectivity selec;
     263              : 
     264              :     /* since this function recurses, it could be driven to stack overflow */
     265          217 :     check_stack_depth();
     266              : 
     267          217 :     if (item->type == VAL)
     268              :     {
     269              :         Datum      *searchres;
     270              : 
     271          119 :         if (mcelems == NULL)
     272           14 :             return (Selectivity) DEFAULT_EQ_SEL;
     273              : 
     274          105 :         searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
     275              :                                       sizeof(Datum), compare_val_int4);
     276          105 :         if (searchres)
     277              :         {
     278              :             /*
     279              :              * The element is in MCELEM.  Return precise selectivity (or at
     280              :              * least as precise as ANALYZE could find out).
     281              :              */
     282           91 :             selec = mcefreqs[searchres - mcelems];
     283              :         }
     284              :         else
     285              :         {
     286              :             /*
     287              :              * The element is not in MCELEM.  Estimate its frequency as half
     288              :              * that of the least-frequent MCE.  (We know it cannot be more
     289              :              * than minfreq, and it could be a great deal less.  Half seems
     290              :              * like a good compromise.)  For probably-historical reasons,
     291              :              * clamp to not more than DEFAULT_EQ_SEL.
     292              :              */
     293           14 :             selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
     294              :         }
     295              :     }
     296           98 :     else if (item->type == OPR)
     297              :     {
     298              :         /* Current query node is an operator */
     299              :         Selectivity s1,
     300              :                     s2;
     301              : 
     302           98 :         s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
     303              :                                  minfreq);
     304           98 :         switch (item->val)
     305              :         {
     306           28 :             case (int32) '!':
     307           28 :                 selec = 1.0 - s1;
     308           28 :                 break;
     309              : 
     310           42 :             case (int32) '&':
     311           42 :                 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
     312              :                                          nmcelems, minfreq);
     313           42 :                 selec = s1 * s2;
     314           42 :                 break;
     315              : 
     316           28 :             case (int32) '|':
     317           28 :                 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
     318              :                                          nmcelems, minfreq);
     319           28 :                 selec = s1 + s2 - s1 * s2;
     320           28 :                 break;
     321              : 
     322            0 :             default:
     323            0 :                 elog(ERROR, "unrecognized operator: %d", item->val);
     324              :                 selec = 0;      /* keep compiler quiet */
     325              :                 break;
     326              :         }
     327              :     }
     328              :     else
     329              :     {
     330            0 :         elog(ERROR, "unrecognized int query item type: %u", item->type);
     331              :         selec = 0;              /* keep compiler quiet */
     332              :     }
     333              : 
     334              :     /* Clamp intermediate results to stay sane despite roundoff error */
     335          203 :     CLAMP_PROBABILITY(selec);
     336              : 
     337          203 :     return selec;
     338              : }
     339              : 
     340              : /*
     341              :  * Comparison function for binary search in mcelem array.
     342              :  */
     343              : static int
     344          747 : compare_val_int4(const void *a, const void *b)
     345              : {
     346          747 :     int32       key = *(const int32 *) a;
     347          747 :     int32       value = DatumGetInt32(*(const Datum *) b);
     348              : 
     349          747 :     if (key < value)
     350          391 :         return -1;
     351          356 :     else if (key > value)
     352          265 :         return 1;
     353              :     else
     354           91 :         return 0;
     355              : }
        

Generated by: LCOV version 2.0-1