LCOV - code coverage report
Current view: top level - contrib/intarray - _int_selfuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 72 90 80.0 %
Date: 2025-01-18 04:15:08 Functions: 13 16 81.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * _int_selfuncs.c
       4             :  *    Functions for selectivity estimation of intarray operators
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    contrib/intarray/_int_selfuncs.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "_int.h"
      18             : #include "access/htup_details.h"
      19             : #include "catalog/pg_operator.h"
      20             : #include "catalog/pg_statistic.h"
      21             : #include "catalog/pg_type.h"
      22             : #include "miscadmin.h"
      23             : #include "utils/fmgrprotos.h"
      24             : #include "utils/lsyscache.h"
      25             : #include "utils/selfuncs.h"
      26             : 
      27           4 : PG_FUNCTION_INFO_V1(_int_overlap_sel);
      28           4 : PG_FUNCTION_INFO_V1(_int_contains_sel);
      29           4 : PG_FUNCTION_INFO_V1(_int_contained_sel);
      30           2 : PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
      31           2 : PG_FUNCTION_INFO_V1(_int_contains_joinsel);
      32           2 : PG_FUNCTION_INFO_V1(_int_contained_joinsel);
      33           4 : PG_FUNCTION_INFO_V1(_int_matchsel);
      34             : 
      35             : 
      36             : static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
      37             :                                        int nmcelems, float4 minfreq);
      38             : static int  compare_val_int4(const void *a, const void *b);
      39             : 
      40             : /*
      41             :  * Wrappers around the default array selectivity estimation functions.
      42             :  *
      43             :  * The default array selectivity operators for the @>, && and @< operators
      44             :  * work fine for integer arrays. However, if we tried to just use arraycontsel
      45             :  * and arraycontjoinsel directly as the cost estimator functions for our
      46             :  * operators, they would not work as intended, because they look at the
      47             :  * operator's OID. Our operators behave exactly like the built-in anyarray
      48             :  * versions, but we must tell the cost estimator functions which built-in
      49             :  * operators they correspond to. These wrappers just replace the operator
      50             :  * OID with the corresponding built-in operator's OID, and call the built-in
      51             :  * function.
      52             :  */
      53             : 
      54             : Datum
      55          14 : _int_overlap_sel(PG_FUNCTION_ARGS)
      56             : {
      57          14 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      58             :                                         PG_GETARG_DATUM(0),
      59             :                                         ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
      60             :                                         PG_GETARG_DATUM(2),
      61             :                                         PG_GETARG_DATUM(3)));
      62             : }
      63             : 
      64             : Datum
      65          56 : _int_contains_sel(PG_FUNCTION_ARGS)
      66             : {
      67          56 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      68             :                                         PG_GETARG_DATUM(0),
      69             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
      70             :                                         PG_GETARG_DATUM(2),
      71             :                                         PG_GETARG_DATUM(3)));
      72             : }
      73             : 
      74             : Datum
      75          14 : _int_contained_sel(PG_FUNCTION_ARGS)
      76             : {
      77          14 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      78             :                                         PG_GETARG_DATUM(0),
      79             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
      80             :                                         PG_GETARG_DATUM(2),
      81             :                                         PG_GETARG_DATUM(3)));
      82             : }
      83             : 
      84             : Datum
      85           0 : _int_overlap_joinsel(PG_FUNCTION_ARGS)
      86             : {
      87           0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
      88             :                                         PG_GETARG_DATUM(0),
      89             :                                         ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
      90             :                                         PG_GETARG_DATUM(2),
      91             :                                         PG_GETARG_DATUM(3),
      92             :                                         PG_GETARG_DATUM(4)));
      93             : }
      94             : 
      95             : Datum
      96           0 : _int_contains_joinsel(PG_FUNCTION_ARGS)
      97             : {
      98           0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
      99             :                                         PG_GETARG_DATUM(0),
     100             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
     101             :                                         PG_GETARG_DATUM(2),
     102             :                                         PG_GETARG_DATUM(3),
     103             :                                         PG_GETARG_DATUM(4)));
     104             : }
     105             : 
     106             : Datum
     107           0 : _int_contained_joinsel(PG_FUNCTION_ARGS)
     108             : {
     109           0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
     110             :                                         PG_GETARG_DATUM(0),
     111             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
     112             :                                         PG_GETARG_DATUM(2),
     113             :                                         PG_GETARG_DATUM(3),
     114             :                                         PG_GETARG_DATUM(4)));
     115             : }
     116             : 
     117             : 
     118             : /*
     119             :  * _int_matchsel -- restriction selectivity function for intarray @@ query_int
     120             :  */
     121             : Datum
     122          84 : _int_matchsel(PG_FUNCTION_ARGS)
     123             : {
     124          84 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     125             : 
     126          84 :     List       *args = (List *) PG_GETARG_POINTER(2);
     127          84 :     int         varRelid = PG_GETARG_INT32(3);
     128             :     VariableStatData vardata;
     129             :     Node       *other;
     130             :     bool        varonleft;
     131             :     Selectivity selec;
     132             :     QUERYTYPE  *query;
     133          84 :     Datum      *mcelems = NULL;
     134          84 :     float4     *mcefreqs = NULL;
     135          84 :     int         nmcelems = 0;
     136          84 :     float4      minfreq = 0.0;
     137          84 :     float4      nullfrac = 0.0;
     138             :     AttStatsSlot sslot;
     139             : 
     140             :     /*
     141             :      * If expression is not "variable @@ something" or "something @@ variable"
     142             :      * then punt and return a default estimate.
     143             :      */
     144          84 :     if (!get_restriction_variable(root, args, varRelid,
     145             :                                   &vardata, &other, &varonleft))
     146           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     147             : 
     148             :     /*
     149             :      * Variable should be int[]. We don't support cases where variable is
     150             :      * query_int.
     151             :      */
     152          84 :     if (vardata.vartype != INT4ARRAYOID)
     153           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     154             : 
     155             :     /*
     156             :      * Can't do anything useful if the something is not a constant, either.
     157             :      */
     158          84 :     if (!IsA(other, Const))
     159             :     {
     160           0 :         ReleaseVariableStats(vardata);
     161           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     162             :     }
     163             : 
     164             :     /*
     165             :      * The "@@" operator is strict, so we can cope with NULL right away.
     166             :      */
     167          84 :     if (((Const *) other)->constisnull)
     168             :     {
     169           0 :         ReleaseVariableStats(vardata);
     170           0 :         PG_RETURN_FLOAT8(0.0);
     171             :     }
     172             : 
     173             :     /* The caller made sure the const is a query, so get it now */
     174          84 :     query = DatumGetQueryTypeP(((Const *) other)->constvalue);
     175             : 
     176             :     /* Empty query matches nothing */
     177          84 :     if (query->size == 0)
     178             :     {
     179           0 :         ReleaseVariableStats(vardata);
     180           0 :         return (Selectivity) 0.0;
     181             :     }
     182             : 
     183             :     /*
     184             :      * Get the statistics for the intarray column.
     185             :      *
     186             :      * We're interested in the Most-Common-Elements list, and the NULL
     187             :      * fraction.
     188             :      */
     189          84 :     if (HeapTupleIsValid(vardata.statsTuple))
     190             :     {
     191             :         Form_pg_statistic stats;
     192             : 
     193          72 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     194          72 :         nullfrac = stats->stanullfrac;
     195             : 
     196             :         /*
     197             :          * For an int4 array, the default array type analyze function will
     198             :          * collect a Most Common Elements list, which is an array of int4s.
     199             :          */
     200          72 :         if (get_attstatsslot(&sslot, vardata.statsTuple,
     201             :                              STATISTIC_KIND_MCELEM, InvalidOid,
     202             :                              ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
     203             :         {
     204             :             Assert(sslot.valuetype == INT4OID);
     205             : 
     206             :             /*
     207             :              * There should be three more Numbers than Values, because the
     208             :              * last three (for intarray) cells are taken for minimal, maximal
     209             :              * and nulls frequency. Punt if not.
     210             :              */
     211          72 :             if (sslot.nnumbers == sslot.nvalues + 3)
     212             :             {
     213             :                 /* Grab the lowest frequency. */
     214          72 :                 minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
     215             : 
     216          72 :                 mcelems = sslot.values;
     217          72 :                 mcefreqs = sslot.numbers;
     218          72 :                 nmcelems = sslot.nvalues;
     219             :             }
     220             :         }
     221             :     }
     222             :     else
     223          12 :         memset(&sslot, 0, sizeof(sslot));
     224             : 
     225             :     /* Process the logical expression in the query, using the stats */
     226          84 :     selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
     227             :                                 mcelems, mcefreqs, nmcelems, minfreq);
     228             : 
     229             :     /* MCE stats count only non-null rows, so adjust for null rows. */
     230          84 :     selec *= (1.0 - nullfrac);
     231             : 
     232          84 :     free_attstatsslot(&sslot);
     233          84 :     ReleaseVariableStats(vardata);
     234             : 
     235          84 :     CLAMP_PROBABILITY(selec);
     236             : 
     237          84 :     PG_RETURN_FLOAT8((float8) selec);
     238             : }
     239             : 
     240             : /*
     241             :  * Estimate selectivity of single intquery operator
     242             :  */
     243             : static Selectivity
     244         350 : int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
     245             :                     int nmcelems, float4 minfreq)
     246             : {
     247             :     Selectivity selec;
     248             : 
     249             :     /* since this function recurses, it could be driven to stack overflow */
     250         350 :     check_stack_depth();
     251             : 
     252         350 :     if (item->type == VAL)
     253             :     {
     254             :         Datum      *searchres;
     255             : 
     256         196 :         if (mcelems == NULL)
     257          28 :             return (Selectivity) DEFAULT_EQ_SEL;
     258             : 
     259         168 :         searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
     260             :                                       sizeof(Datum), compare_val_int4);
     261         168 :         if (searchres)
     262             :         {
     263             :             /*
     264             :              * The element is in MCELEM.  Return precise selectivity (or at
     265             :              * least as precise as ANALYZE could find out).
     266             :              */
     267         168 :             selec = mcefreqs[searchres - mcelems];
     268             :         }
     269             :         else
     270             :         {
     271             :             /*
     272             :              * The element is not in MCELEM.  Punt, but assume that the
     273             :              * selectivity cannot be more than minfreq / 2.
     274             :              */
     275           0 :             selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
     276             :         }
     277             :     }
     278         154 :     else if (item->type == OPR)
     279             :     {
     280             :         /* Current query node is an operator */
     281             :         Selectivity s1,
     282             :                     s2;
     283             : 
     284         154 :         s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
     285             :                                  minfreq);
     286         154 :         switch (item->val)
     287             :         {
     288          42 :             case (int32) '!':
     289          42 :                 selec = 1.0 - s1;
     290          42 :                 break;
     291             : 
     292          70 :             case (int32) '&':
     293          70 :                 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
     294             :                                          nmcelems, minfreq);
     295          70 :                 selec = s1 * s2;
     296          70 :                 break;
     297             : 
     298          42 :             case (int32) '|':
     299          42 :                 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
     300             :                                          nmcelems, minfreq);
     301          42 :                 selec = s1 + s2 - s1 * s2;
     302          42 :                 break;
     303             : 
     304           0 :             default:
     305           0 :                 elog(ERROR, "unrecognized operator: %d", item->val);
     306             :                 selec = 0;      /* keep compiler quiet */
     307             :                 break;
     308             :         }
     309             :     }
     310             :     else
     311             :     {
     312           0 :         elog(ERROR, "unrecognized int query item type: %u", item->type);
     313             :         selec = 0;              /* keep compiler quiet */
     314             :     }
     315             : 
     316             :     /* Clamp intermediate results to stay sane despite roundoff error */
     317         322 :     CLAMP_PROBABILITY(selec);
     318             : 
     319         322 :     return selec;
     320             : }
     321             : 
     322             : /*
     323             :  * Comparison function for binary search in mcelem array.
     324             :  */
     325             : static int
     326        1200 : compare_val_int4(const void *a, const void *b)
     327             : {
     328        1200 :     int32       key = *(int32 *) a;
     329        1200 :     const Datum *t = (const Datum *) b;
     330             : 
     331        1200 :     return key - DatumGetInt32(*t);
     332             : }

Generated by: LCOV version 1.14