LCOV - code coverage report
Current view: top level - contrib/intarray - _int_selfuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 72 90 80.0 %
Date: 2024-04-26 00:11:43 Functions: 13 16 81.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * _int_selfuncs.c
       4             :  *    Functions for selectivity estimation of intarray operators
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    contrib/intarray/_int_selfuncs.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "_int.h"
      18             : #include "access/htup_details.h"
      19             : #include "catalog/pg_operator.h"
      20             : #include "catalog/pg_statistic.h"
      21             : #include "catalog/pg_type.h"
      22             : #include "miscadmin.h"
      23             : #include "utils/builtins.h"
      24             : #include "utils/lsyscache.h"
      25             : #include "utils/selfuncs.h"
      26             : #include "utils/syscache.h"
      27             : 
      28           4 : PG_FUNCTION_INFO_V1(_int_overlap_sel);
      29           4 : PG_FUNCTION_INFO_V1(_int_contains_sel);
      30           4 : PG_FUNCTION_INFO_V1(_int_contained_sel);
      31           2 : PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
      32           2 : PG_FUNCTION_INFO_V1(_int_contains_joinsel);
      33           2 : PG_FUNCTION_INFO_V1(_int_contained_joinsel);
      34           4 : PG_FUNCTION_INFO_V1(_int_matchsel);
      35             : 
      36             : 
      37             : static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
      38             :                                        int nmcelems, float4 minfreq);
      39             : static int  compare_val_int4(const void *a, const void *b);
      40             : 
      41             : /*
      42             :  * Wrappers around the default array selectivity estimation functions.
      43             :  *
      44             :  * The default array selectivity operators for the @>, && and @< operators
      45             :  * work fine for integer arrays. However, if we tried to just use arraycontsel
      46             :  * and arraycontjoinsel directly as the cost estimator functions for our
      47             :  * operators, they would not work as intended, because they look at the
      48             :  * operator's OID. Our operators behave exactly like the built-in anyarray
      49             :  * versions, but we must tell the cost estimator functions which built-in
      50             :  * operators they correspond to. These wrappers just replace the operator
      51             :  * OID with the corresponding built-in operator's OID, and call the built-in
      52             :  * function.
      53             :  */
      54             : 
      55             : Datum
      56          14 : _int_overlap_sel(PG_FUNCTION_ARGS)
      57             : {
      58          14 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      59             :                                         PG_GETARG_DATUM(0),
      60             :                                         ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
      61             :                                         PG_GETARG_DATUM(2),
      62             :                                         PG_GETARG_DATUM(3)));
      63             : }
      64             : 
      65             : Datum
      66          56 : _int_contains_sel(PG_FUNCTION_ARGS)
      67             : {
      68          56 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      69             :                                         PG_GETARG_DATUM(0),
      70             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
      71             :                                         PG_GETARG_DATUM(2),
      72             :                                         PG_GETARG_DATUM(3)));
      73             : }
      74             : 
      75             : Datum
      76          14 : _int_contained_sel(PG_FUNCTION_ARGS)
      77             : {
      78          14 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      79             :                                         PG_GETARG_DATUM(0),
      80             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
      81             :                                         PG_GETARG_DATUM(2),
      82             :                                         PG_GETARG_DATUM(3)));
      83             : }
      84             : 
      85             : Datum
      86           0 : _int_overlap_joinsel(PG_FUNCTION_ARGS)
      87             : {
      88           0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
      89             :                                         PG_GETARG_DATUM(0),
      90             :                                         ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
      91             :                                         PG_GETARG_DATUM(2),
      92             :                                         PG_GETARG_DATUM(3),
      93             :                                         PG_GETARG_DATUM(4)));
      94             : }
      95             : 
      96             : Datum
      97           0 : _int_contains_joinsel(PG_FUNCTION_ARGS)
      98             : {
      99           0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
     100             :                                         PG_GETARG_DATUM(0),
     101             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
     102             :                                         PG_GETARG_DATUM(2),
     103             :                                         PG_GETARG_DATUM(3),
     104             :                                         PG_GETARG_DATUM(4)));
     105             : }
     106             : 
     107             : Datum
     108           0 : _int_contained_joinsel(PG_FUNCTION_ARGS)
     109             : {
     110           0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
     111             :                                         PG_GETARG_DATUM(0),
     112             :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
     113             :                                         PG_GETARG_DATUM(2),
     114             :                                         PG_GETARG_DATUM(3),
     115             :                                         PG_GETARG_DATUM(4)));
     116             : }
     117             : 
     118             : 
     119             : /*
     120             :  * _int_matchsel -- restriction selectivity function for intarray @@ query_int
     121             :  */
     122             : Datum
     123          84 : _int_matchsel(PG_FUNCTION_ARGS)
     124             : {
     125          84 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     126             : 
     127          84 :     List       *args = (List *) PG_GETARG_POINTER(2);
     128          84 :     int         varRelid = PG_GETARG_INT32(3);
     129             :     VariableStatData vardata;
     130             :     Node       *other;
     131             :     bool        varonleft;
     132             :     Selectivity selec;
     133             :     QUERYTYPE  *query;
     134          84 :     Datum      *mcelems = NULL;
     135          84 :     float4     *mcefreqs = NULL;
     136          84 :     int         nmcelems = 0;
     137          84 :     float4      minfreq = 0.0;
     138          84 :     float4      nullfrac = 0.0;
     139             :     AttStatsSlot sslot;
     140             : 
     141             :     /*
     142             :      * If expression is not "variable @@ something" or "something @@ variable"
     143             :      * then punt and return a default estimate.
     144             :      */
     145          84 :     if (!get_restriction_variable(root, args, varRelid,
     146             :                                   &vardata, &other, &varonleft))
     147           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     148             : 
     149             :     /*
     150             :      * Variable should be int[]. We don't support cases where variable is
     151             :      * query_int.
     152             :      */
     153          84 :     if (vardata.vartype != INT4ARRAYOID)
     154           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     155             : 
     156             :     /*
     157             :      * Can't do anything useful if the something is not a constant, either.
     158             :      */
     159          84 :     if (!IsA(other, Const))
     160             :     {
     161           0 :         ReleaseVariableStats(vardata);
     162           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     163             :     }
     164             : 
     165             :     /*
     166             :      * The "@@" operator is strict, so we can cope with NULL right away.
     167             :      */
     168          84 :     if (((Const *) other)->constisnull)
     169             :     {
     170           0 :         ReleaseVariableStats(vardata);
     171           0 :         PG_RETURN_FLOAT8(0.0);
     172             :     }
     173             : 
     174             :     /* The caller made sure the const is a query, so get it now */
     175          84 :     query = DatumGetQueryTypeP(((Const *) other)->constvalue);
     176             : 
     177             :     /* Empty query matches nothing */
     178          84 :     if (query->size == 0)
     179             :     {
     180           0 :         ReleaseVariableStats(vardata);
     181           0 :         return (Selectivity) 0.0;
     182             :     }
     183             : 
     184             :     /*
     185             :      * Get the statistics for the intarray column.
     186             :      *
     187             :      * We're interested in the Most-Common-Elements list, and the NULL
     188             :      * fraction.
     189             :      */
     190          84 :     if (HeapTupleIsValid(vardata.statsTuple))
     191             :     {
     192             :         Form_pg_statistic stats;
     193             : 
     194          72 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     195          72 :         nullfrac = stats->stanullfrac;
     196             : 
     197             :         /*
     198             :          * For an int4 array, the default array type analyze function will
     199             :          * collect a Most Common Elements list, which is an array of int4s.
     200             :          */
     201          72 :         if (get_attstatsslot(&sslot, vardata.statsTuple,
     202             :                              STATISTIC_KIND_MCELEM, InvalidOid,
     203             :                              ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
     204             :         {
     205             :             Assert(sslot.valuetype == INT4OID);
     206             : 
     207             :             /*
     208             :              * There should be three more Numbers than Values, because the
     209             :              * last three (for intarray) cells are taken for minimal, maximal
     210             :              * and nulls frequency. Punt if not.
     211             :              */
     212          72 :             if (sslot.nnumbers == sslot.nvalues + 3)
     213             :             {
     214             :                 /* Grab the lowest frequency. */
     215          72 :                 minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
     216             : 
     217          72 :                 mcelems = sslot.values;
     218          72 :                 mcefreqs = sslot.numbers;
     219          72 :                 nmcelems = sslot.nvalues;
     220             :             }
     221             :         }
     222             :     }
     223             :     else
     224          12 :         memset(&sslot, 0, sizeof(sslot));
     225             : 
     226             :     /* Process the logical expression in the query, using the stats */
     227          84 :     selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
     228             :                                 mcelems, mcefreqs, nmcelems, minfreq);
     229             : 
     230             :     /* MCE stats count only non-null rows, so adjust for null rows. */
     231          84 :     selec *= (1.0 - nullfrac);
     232             : 
     233          84 :     free_attstatsslot(&sslot);
     234          84 :     ReleaseVariableStats(vardata);
     235             : 
     236          84 :     CLAMP_PROBABILITY(selec);
     237             : 
     238          84 :     PG_RETURN_FLOAT8((float8) selec);
     239             : }
     240             : 
     241             : /*
     242             :  * Estimate selectivity of single intquery operator
     243             :  */
     244             : static Selectivity
     245         350 : int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
     246             :                     int nmcelems, float4 minfreq)
     247             : {
     248             :     Selectivity selec;
     249             : 
     250             :     /* since this function recurses, it could be driven to stack overflow */
     251         350 :     check_stack_depth();
     252             : 
     253         350 :     if (item->type == VAL)
     254             :     {
     255             :         Datum      *searchres;
     256             : 
     257         196 :         if (mcelems == NULL)
     258          28 :             return (Selectivity) DEFAULT_EQ_SEL;
     259             : 
     260         168 :         searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
     261             :                                       sizeof(Datum), compare_val_int4);
     262         168 :         if (searchres)
     263             :         {
     264             :             /*
     265             :              * The element is in MCELEM.  Return precise selectivity (or at
     266             :              * least as precise as ANALYZE could find out).
     267             :              */
     268         168 :             selec = mcefreqs[searchres - mcelems];
     269             :         }
     270             :         else
     271             :         {
     272             :             /*
     273             :              * The element is not in MCELEM.  Punt, but assume that the
     274             :              * selectivity cannot be more than minfreq / 2.
     275             :              */
     276           0 :             selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
     277             :         }
     278             :     }
     279         154 :     else if (item->type == OPR)
     280             :     {
     281             :         /* Current query node is an operator */
     282             :         Selectivity s1,
     283             :                     s2;
     284             : 
     285         154 :         s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
     286             :                                  minfreq);
     287         154 :         switch (item->val)
     288             :         {
     289          42 :             case (int32) '!':
     290          42 :                 selec = 1.0 - s1;
     291          42 :                 break;
     292             : 
     293          70 :             case (int32) '&':
     294          70 :                 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
     295             :                                          nmcelems, minfreq);
     296          70 :                 selec = s1 * s2;
     297          70 :                 break;
     298             : 
     299          42 :             case (int32) '|':
     300          42 :                 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
     301             :                                          nmcelems, minfreq);
     302          42 :                 selec = s1 + s2 - s1 * s2;
     303          42 :                 break;
     304             : 
     305           0 :             default:
     306           0 :                 elog(ERROR, "unrecognized operator: %d", item->val);
     307             :                 selec = 0;      /* keep compiler quiet */
     308             :                 break;
     309             :         }
     310             :     }
     311             :     else
     312             :     {
     313           0 :         elog(ERROR, "unrecognized int query item type: %u", item->type);
     314             :         selec = 0;              /* keep compiler quiet */
     315             :     }
     316             : 
     317             :     /* Clamp intermediate results to stay sane despite roundoff error */
     318         322 :     CLAMP_PROBABILITY(selec);
     319             : 
     320         322 :     return selec;
     321             : }
     322             : 
     323             : /*
     324             :  * Comparison function for binary search in mcelem array.
     325             :  */
     326             : static int
     327        1200 : compare_val_int4(const void *a, const void *b)
     328             : {
     329        1200 :     int32       key = *(int32 *) a;
     330        1200 :     const Datum *t = (const Datum *) b;
     331             : 
     332        1200 :     return key - DatumGetInt32(*t);
     333             : }

Generated by: LCOV version 1.14