LCOV - code coverage report
Current view: top level - src/backend/utils/adt - network_selfuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 12 270 4.4 %
Date: 2020-12-05 17:06:23 Functions: 1 14 7.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * network_selfuncs.c
       4             :  *    Functions for selectivity estimation of inet/cidr operators
       5             :  *
       6             :  * This module provides estimators for the subnet inclusion and overlap
       7             :  * operators.  Estimates are based on null fraction, most common values,
       8             :  * and histogram of inet/cidr columns.
       9             :  *
      10             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
      11             :  * Portions Copyright (c) 1994, Regents of the University of California
      12             :  *
      13             :  *
      14             :  * IDENTIFICATION
      15             :  *    src/backend/utils/adt/network_selfuncs.c
      16             :  *
      17             :  *-------------------------------------------------------------------------
      18             :  */
      19             : #include "postgres.h"
      20             : 
      21             : #include <math.h>
      22             : 
      23             : #include "access/htup_details.h"
      24             : #include "catalog/pg_operator.h"
      25             : #include "catalog/pg_statistic.h"
      26             : #include "utils/builtins.h"
      27             : #include "utils/inet.h"
      28             : #include "utils/lsyscache.h"
      29             : #include "utils/selfuncs.h"
      30             : 
      31             : 
      32             : /* Default selectivity for the inet overlap operator */
      33             : #define DEFAULT_OVERLAP_SEL 0.01
      34             : 
      35             : /* Default selectivity for the various inclusion operators */
      36             : #define DEFAULT_INCLUSION_SEL 0.005
      37             : 
      38             : /* Default selectivity for specified operator */
      39             : #define DEFAULT_SEL(operator) \
      40             :     ((operator) == OID_INET_OVERLAP_OP ? \
      41             :      DEFAULT_OVERLAP_SEL : DEFAULT_INCLUSION_SEL)
      42             : 
      43             : /* Maximum number of items to consider in join selectivity calculations */
      44             : #define MAX_CONSIDERED_ELEMS 1024
      45             : 
      46             : static Selectivity networkjoinsel_inner(Oid operator,
      47             :                                         VariableStatData *vardata1, VariableStatData *vardata2);
      48             : static Selectivity networkjoinsel_semi(Oid operator,
      49             :                                        VariableStatData *vardata1, VariableStatData *vardata2);
      50             : static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues);
      51             : static Selectivity inet_hist_value_sel(Datum *values, int nvalues,
      52             :                                        Datum constvalue, int opr_codenum);
      53             : static Selectivity inet_mcv_join_sel(Datum *mcv1_values,
      54             :                                      float4 *mcv1_numbers, int mcv1_nvalues, Datum *mcv2_values,
      55             :                                      float4 *mcv2_numbers, int mcv2_nvalues, Oid operator);
      56             : static Selectivity inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers,
      57             :                                      int mcv_nvalues, Datum *hist_values, int hist_nvalues,
      58             :                                      int opr_codenum);
      59             : static Selectivity inet_hist_inclusion_join_sel(Datum *hist1_values,
      60             :                                                 int hist1_nvalues,
      61             :                                                 Datum *hist2_values, int hist2_nvalues,
      62             :                                                 int opr_codenum);
      63             : static Selectivity inet_semi_join_sel(Datum lhs_value,
      64             :                                       bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
      65             :                                       bool hist_exists, Datum *hist_values, int hist_nvalues,
      66             :                                       double hist_weight,
      67             :                                       FmgrInfo *proc, int opr_codenum);
      68             : static int  inet_opr_codenum(Oid operator);
      69             : static int  inet_inclusion_cmp(inet *left, inet *right, int opr_codenum);
      70             : static int  inet_masklen_inclusion_cmp(inet *left, inet *right,
      71             :                                        int opr_codenum);
      72             : static int  inet_hist_match_divider(inet *boundary, inet *query,
      73             :                                     int opr_codenum);
      74             : 
      75             : /*
      76             :  * Selectivity estimation for the subnet inclusion/overlap operators
      77             :  */
      78             : Datum
      79         600 : networksel(PG_FUNCTION_ARGS)
      80             : {
      81         600 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
      82         600 :     Oid         operator = PG_GETARG_OID(1);
      83         600 :     List       *args = (List *) PG_GETARG_POINTER(2);
      84         600 :     int         varRelid = PG_GETARG_INT32(3);
      85             :     VariableStatData vardata;
      86             :     Node       *other;
      87             :     bool        varonleft;
      88             :     Selectivity selec,
      89             :                 mcv_selec,
      90             :                 non_mcv_selec;
      91             :     Datum       constvalue;
      92             :     Form_pg_statistic stats;
      93             :     AttStatsSlot hslot;
      94             :     double      sumcommon,
      95             :                 nullfrac;
      96             :     FmgrInfo    proc;
      97             : 
      98             :     /*
      99             :      * If expression is not (variable op something) or (something op
     100             :      * variable), then punt and return a default estimate.
     101             :      */
     102         600 :     if (!get_restriction_variable(root, args, varRelid,
     103             :                                   &vardata, &other, &varonleft))
     104           0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     105             : 
     106             :     /*
     107             :      * Can't do anything useful if the something is not a constant, either.
     108             :      */
     109         600 :     if (!IsA(other, Const))
     110             :     {
     111           0 :         ReleaseVariableStats(vardata);
     112           0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     113             :     }
     114             : 
     115             :     /* All of the operators handled here are strict. */
     116         600 :     if (((Const *) other)->constisnull)
     117             :     {
     118           0 :         ReleaseVariableStats(vardata);
     119           0 :         PG_RETURN_FLOAT8(0.0);
     120             :     }
     121         600 :     constvalue = ((Const *) other)->constvalue;
     122             : 
     123             :     /* Otherwise, we need stats in order to produce a non-default estimate. */
     124         600 :     if (!HeapTupleIsValid(vardata.statsTuple))
     125             :     {
     126         600 :         ReleaseVariableStats(vardata);
     127         600 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     128             :     }
     129             : 
     130           0 :     stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     131           0 :     nullfrac = stats->stanullfrac;
     132             : 
     133             :     /*
     134             :      * If we have most-common-values info, add up the fractions of the MCV
     135             :      * entries that satisfy MCV OP CONST.  These fractions contribute directly
     136             :      * to the result selectivity.  Also add up the total fraction represented
     137             :      * by MCV entries.
     138             :      */
     139           0 :     fmgr_info(get_opcode(operator), &proc);
     140           0 :     mcv_selec = mcv_selectivity(&vardata, &proc, InvalidOid,
     141             :                                 constvalue, varonleft,
     142             :                                 &sumcommon);
     143             : 
     144             :     /*
     145             :      * If we have a histogram, use it to estimate the proportion of the
     146             :      * non-MCV population that satisfies the clause.  If we don't, apply the
     147             :      * default selectivity to that population.
     148             :      */
     149           0 :     if (get_attstatsslot(&hslot, vardata.statsTuple,
     150             :                          STATISTIC_KIND_HISTOGRAM, InvalidOid,
     151             :                          ATTSTATSSLOT_VALUES))
     152             :     {
     153           0 :         int         opr_codenum = inet_opr_codenum(operator);
     154             : 
     155             :         /* Commute if needed, so we can consider histogram to be on the left */
     156           0 :         if (!varonleft)
     157           0 :             opr_codenum = -opr_codenum;
     158           0 :         non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues,
     159             :                                             constvalue, opr_codenum);
     160             : 
     161           0 :         free_attstatsslot(&hslot);
     162             :     }
     163             :     else
     164           0 :         non_mcv_selec = DEFAULT_SEL(operator);
     165             : 
     166             :     /* Combine selectivities for MCV and non-MCV populations */
     167           0 :     selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec;
     168             : 
     169             :     /* Result should be in range, but make sure... */
     170           0 :     CLAMP_PROBABILITY(selec);
     171             : 
     172           0 :     ReleaseVariableStats(vardata);
     173             : 
     174           0 :     PG_RETURN_FLOAT8(selec);
     175             : }
     176             : 
     177             : /*
     178             :  * Join selectivity estimation for the subnet inclusion/overlap operators
     179             :  *
     180             :  * This function has the same structure as eqjoinsel() in selfuncs.c.
     181             :  *
     182             :  * Throughout networkjoinsel and its subroutines, we have a performance issue
     183             :  * in that the amount of work to be done is O(N^2) in the length of the MCV
     184             :  * and histogram arrays.  To keep the runtime from getting out of hand when
     185             :  * large statistics targets have been set, we arbitrarily limit the number of
     186             :  * values considered to 1024 (MAX_CONSIDERED_ELEMS).  For the MCV arrays, this
     187             :  * is easy: just consider at most the first N elements.  (Since the MCVs are
     188             :  * sorted by decreasing frequency, this correctly gets us the first N MCVs.)
     189             :  * For the histogram arrays, we decimate; that is consider only every k'th
     190             :  * element, where k is chosen so that no more than MAX_CONSIDERED_ELEMS
     191             :  * elements are considered.  This should still give us a good random sample of
     192             :  * the non-MCV population.  Decimation is done on-the-fly in the loops that
     193             :  * iterate over the histogram arrays.
     194             :  */
     195             : Datum
     196           0 : networkjoinsel(PG_FUNCTION_ARGS)
     197             : {
     198           0 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     199           0 :     Oid         operator = PG_GETARG_OID(1);
     200           0 :     List       *args = (List *) PG_GETARG_POINTER(2);
     201             : #ifdef NOT_USED
     202             :     JoinType    jointype = (JoinType) PG_GETARG_INT16(3);
     203             : #endif
     204           0 :     SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
     205             :     double      selec;
     206             :     VariableStatData vardata1;
     207             :     VariableStatData vardata2;
     208             :     bool        join_is_reversed;
     209             : 
     210           0 :     get_join_variables(root, args, sjinfo,
     211             :                        &vardata1, &vardata2, &join_is_reversed);
     212             : 
     213           0 :     switch (sjinfo->jointype)
     214             :     {
     215           0 :         case JOIN_INNER:
     216             :         case JOIN_LEFT:
     217             :         case JOIN_FULL:
     218             : 
     219             :             /*
     220             :              * Selectivity for left/full join is not exactly the same as inner
     221             :              * join, but we neglect the difference, as eqjoinsel does.
     222             :              */
     223           0 :             selec = networkjoinsel_inner(operator, &vardata1, &vardata2);
     224           0 :             break;
     225           0 :         case JOIN_SEMI:
     226             :         case JOIN_ANTI:
     227             :             /* Here, it's important that we pass the outer var on the left. */
     228           0 :             if (!join_is_reversed)
     229           0 :                 selec = networkjoinsel_semi(operator, &vardata1, &vardata2);
     230             :             else
     231           0 :                 selec = networkjoinsel_semi(get_commutator(operator),
     232             :                                             &vardata2, &vardata1);
     233           0 :             break;
     234           0 :         default:
     235             :             /* other values not expected here */
     236           0 :             elog(ERROR, "unrecognized join type: %d",
     237             :                  (int) sjinfo->jointype);
     238             :             selec = 0;          /* keep compiler quiet */
     239             :             break;
     240             :     }
     241             : 
     242           0 :     ReleaseVariableStats(vardata1);
     243           0 :     ReleaseVariableStats(vardata2);
     244             : 
     245           0 :     CLAMP_PROBABILITY(selec);
     246             : 
     247           0 :     PG_RETURN_FLOAT8((float8) selec);
     248             : }
     249             : 
     250             : /*
     251             :  * Inner join selectivity estimation for subnet inclusion/overlap operators
     252             :  *
     253             :  * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram
     254             :  * selectivity for join using the subnet inclusion operators.  Unlike the
     255             :  * join selectivity function for the equality operator, eqjoinsel_inner(),
     256             :  * one to one matching of the values is not enough.  Network inclusion
     257             :  * operators are likely to match many to many, so we must check all pairs.
     258             :  * (Note: it might be possible to exploit understanding of the histogram's
     259             :  * btree ordering to reduce the work needed, but we don't currently try.)
     260             :  * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
     261             :  */
     262             : static Selectivity
     263           0 : networkjoinsel_inner(Oid operator,
     264             :                      VariableStatData *vardata1, VariableStatData *vardata2)
     265             : {
     266             :     Form_pg_statistic stats;
     267           0 :     double      nullfrac1 = 0.0,
     268           0 :                 nullfrac2 = 0.0;
     269           0 :     Selectivity selec = 0.0,
     270           0 :                 sumcommon1 = 0.0,
     271           0 :                 sumcommon2 = 0.0;
     272           0 :     bool        mcv1_exists = false,
     273           0 :                 mcv2_exists = false,
     274           0 :                 hist1_exists = false,
     275           0 :                 hist2_exists = false;
     276             :     int         opr_codenum;
     277           0 :     int         mcv1_length = 0,
     278           0 :                 mcv2_length = 0;
     279             :     AttStatsSlot mcv1_slot;
     280             :     AttStatsSlot mcv2_slot;
     281             :     AttStatsSlot hist1_slot;
     282             :     AttStatsSlot hist2_slot;
     283             : 
     284           0 :     if (HeapTupleIsValid(vardata1->statsTuple))
     285             :     {
     286           0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
     287           0 :         nullfrac1 = stats->stanullfrac;
     288             : 
     289           0 :         mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
     290             :                                        STATISTIC_KIND_MCV, InvalidOid,
     291             :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     292           0 :         hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
     293             :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     294             :                                         ATTSTATSSLOT_VALUES);
     295             :         /* Arbitrarily limit number of MCVs considered */
     296           0 :         mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
     297           0 :         if (mcv1_exists)
     298           0 :             sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
     299             :     }
     300             :     else
     301             :     {
     302           0 :         memset(&mcv1_slot, 0, sizeof(mcv1_slot));
     303           0 :         memset(&hist1_slot, 0, sizeof(hist1_slot));
     304             :     }
     305             : 
     306           0 :     if (HeapTupleIsValid(vardata2->statsTuple))
     307             :     {
     308           0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
     309           0 :         nullfrac2 = stats->stanullfrac;
     310             : 
     311           0 :         mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
     312             :                                        STATISTIC_KIND_MCV, InvalidOid,
     313             :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     314           0 :         hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
     315             :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     316             :                                         ATTSTATSSLOT_VALUES);
     317             :         /* Arbitrarily limit number of MCVs considered */
     318           0 :         mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
     319           0 :         if (mcv2_exists)
     320           0 :             sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
     321             :     }
     322             :     else
     323             :     {
     324           0 :         memset(&mcv2_slot, 0, sizeof(mcv2_slot));
     325           0 :         memset(&hist2_slot, 0, sizeof(hist2_slot));
     326             :     }
     327             : 
     328           0 :     opr_codenum = inet_opr_codenum(operator);
     329             : 
     330             :     /*
     331             :      * Calculate selectivity for MCV vs MCV matches.
     332             :      */
     333           0 :     if (mcv1_exists && mcv2_exists)
     334           0 :         selec += inet_mcv_join_sel(mcv1_slot.values, mcv1_slot.numbers,
     335             :                                    mcv1_length,
     336             :                                    mcv2_slot.values, mcv2_slot.numbers,
     337             :                                    mcv2_length,
     338             :                                    operator);
     339             : 
     340             :     /*
     341             :      * Add in selectivities for MCV vs histogram matches, scaling according to
     342             :      * the fractions of the populations represented by the histograms. Note
     343             :      * that the second case needs to commute the operator.
     344             :      */
     345           0 :     if (mcv1_exists && hist2_exists)
     346           0 :         selec += (1.0 - nullfrac2 - sumcommon2) *
     347           0 :             inet_mcv_hist_sel(mcv1_slot.values, mcv1_slot.numbers, mcv1_length,
     348             :                               hist2_slot.values, hist2_slot.nvalues,
     349             :                               opr_codenum);
     350           0 :     if (mcv2_exists && hist1_exists)
     351           0 :         selec += (1.0 - nullfrac1 - sumcommon1) *
     352           0 :             inet_mcv_hist_sel(mcv2_slot.values, mcv2_slot.numbers, mcv2_length,
     353             :                               hist1_slot.values, hist1_slot.nvalues,
     354             :                               -opr_codenum);
     355             : 
     356             :     /*
     357             :      * Add in selectivity for histogram vs histogram matches, again scaling
     358             :      * appropriately.
     359             :      */
     360           0 :     if (hist1_exists && hist2_exists)
     361           0 :         selec += (1.0 - nullfrac1 - sumcommon1) *
     362           0 :             (1.0 - nullfrac2 - sumcommon2) *
     363           0 :             inet_hist_inclusion_join_sel(hist1_slot.values, hist1_slot.nvalues,
     364             :                                          hist2_slot.values, hist2_slot.nvalues,
     365             :                                          opr_codenum);
     366             : 
     367             :     /*
     368             :      * If useful statistics are not available then use the default estimate.
     369             :      * We can apply null fractions if known, though.
     370             :      */
     371           0 :     if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
     372           0 :         selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
     373             : 
     374             :     /* Release stats. */
     375           0 :     free_attstatsslot(&mcv1_slot);
     376           0 :     free_attstatsslot(&mcv2_slot);
     377           0 :     free_attstatsslot(&hist1_slot);
     378           0 :     free_attstatsslot(&hist2_slot);
     379             : 
     380           0 :     return selec;
     381             : }
     382             : 
     383             : /*
     384             :  * Semi join selectivity estimation for subnet inclusion/overlap operators
     385             :  *
     386             :  * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs
     387             :  * histogram selectivity for semi/anti join cases.
     388             :  */
     389             : static Selectivity
     390           0 : networkjoinsel_semi(Oid operator,
     391             :                     VariableStatData *vardata1, VariableStatData *vardata2)
     392             : {
     393             :     Form_pg_statistic stats;
     394           0 :     Selectivity selec = 0.0,
     395           0 :                 sumcommon1 = 0.0,
     396           0 :                 sumcommon2 = 0.0;
     397           0 :     double      nullfrac1 = 0.0,
     398           0 :                 nullfrac2 = 0.0,
     399           0 :                 hist2_weight = 0.0;
     400           0 :     bool        mcv1_exists = false,
     401           0 :                 mcv2_exists = false,
     402           0 :                 hist1_exists = false,
     403           0 :                 hist2_exists = false;
     404             :     int         opr_codenum;
     405             :     FmgrInfo    proc;
     406             :     int         i,
     407           0 :                 mcv1_length = 0,
     408           0 :                 mcv2_length = 0;
     409             :     AttStatsSlot mcv1_slot;
     410             :     AttStatsSlot mcv2_slot;
     411             :     AttStatsSlot hist1_slot;
     412             :     AttStatsSlot hist2_slot;
     413             : 
     414           0 :     if (HeapTupleIsValid(vardata1->statsTuple))
     415             :     {
     416           0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
     417           0 :         nullfrac1 = stats->stanullfrac;
     418             : 
     419           0 :         mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
     420             :                                        STATISTIC_KIND_MCV, InvalidOid,
     421             :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     422           0 :         hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
     423             :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     424             :                                         ATTSTATSSLOT_VALUES);
     425             :         /* Arbitrarily limit number of MCVs considered */
     426           0 :         mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
     427           0 :         if (mcv1_exists)
     428           0 :             sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
     429             :     }
     430             :     else
     431             :     {
     432           0 :         memset(&mcv1_slot, 0, sizeof(mcv1_slot));
     433           0 :         memset(&hist1_slot, 0, sizeof(hist1_slot));
     434             :     }
     435             : 
     436           0 :     if (HeapTupleIsValid(vardata2->statsTuple))
     437             :     {
     438           0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
     439           0 :         nullfrac2 = stats->stanullfrac;
     440             : 
     441           0 :         mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
     442             :                                        STATISTIC_KIND_MCV, InvalidOid,
     443             :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     444           0 :         hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
     445             :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     446             :                                         ATTSTATSSLOT_VALUES);
     447             :         /* Arbitrarily limit number of MCVs considered */
     448           0 :         mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
     449           0 :         if (mcv2_exists)
     450           0 :             sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
     451             :     }
     452             :     else
     453             :     {
     454           0 :         memset(&mcv2_slot, 0, sizeof(mcv2_slot));
     455           0 :         memset(&hist2_slot, 0, sizeof(hist2_slot));
     456             :     }
     457             : 
     458           0 :     opr_codenum = inet_opr_codenum(operator);
     459           0 :     fmgr_info(get_opcode(operator), &proc);
     460             : 
     461             :     /* Estimate number of input rows represented by RHS histogram. */
     462           0 :     if (hist2_exists && vardata2->rel)
     463           0 :         hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows;
     464             : 
     465             :     /*
     466             :      * Consider each element of the LHS MCV list, matching it to whatever RHS
     467             :      * stats we have.  Scale according to the known frequency of the MCV.
     468             :      */
     469           0 :     if (mcv1_exists && (mcv2_exists || hist2_exists))
     470             :     {
     471           0 :         for (i = 0; i < mcv1_length; i++)
     472             :         {
     473           0 :             selec += mcv1_slot.numbers[i] *
     474           0 :                 inet_semi_join_sel(mcv1_slot.values[i],
     475             :                                    mcv2_exists, mcv2_slot.values, mcv2_length,
     476             :                                    hist2_exists,
     477             :                                    hist2_slot.values, hist2_slot.nvalues,
     478             :                                    hist2_weight,
     479             :                                    &proc, opr_codenum);
     480             :         }
     481             :     }
     482             : 
     483             :     /*
     484             :      * Consider each element of the LHS histogram, except for the first and
     485             :      * last elements, which we exclude on the grounds that they're outliers
     486             :      * and thus not very representative.  Scale on the assumption that each
     487             :      * such histogram element represents an equal share of the LHS histogram
     488             :      * population (which is a bit bogus, because the members of its bucket may
     489             :      * not all act the same with respect to the join clause, but it's hard to
     490             :      * do better).
     491             :      *
     492             :      * If there are too many histogram elements, decimate to limit runtime.
     493             :      */
     494           0 :     if (hist1_exists && hist1_slot.nvalues > 2 && (mcv2_exists || hist2_exists))
     495             :     {
     496           0 :         double      hist_selec_sum = 0.0;
     497             :         int         k,
     498             :                     n;
     499             : 
     500           0 :         k = (hist1_slot.nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
     501             : 
     502           0 :         n = 0;
     503           0 :         for (i = 1; i < hist1_slot.nvalues - 1; i += k)
     504             :         {
     505           0 :             hist_selec_sum +=
     506           0 :                 inet_semi_join_sel(hist1_slot.values[i],
     507             :                                    mcv2_exists, mcv2_slot.values, mcv2_length,
     508             :                                    hist2_exists,
     509             :                                    hist2_slot.values, hist2_slot.nvalues,
     510             :                                    hist2_weight,
     511             :                                    &proc, opr_codenum);
     512           0 :             n++;
     513             :         }
     514             : 
     515           0 :         selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n;
     516             :     }
     517             : 
     518             :     /*
     519             :      * If useful statistics are not available then use the default estimate.
     520             :      * We can apply null fractions if known, though.
     521             :      */
     522           0 :     if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
     523           0 :         selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
     524             : 
     525             :     /* Release stats. */
     526           0 :     free_attstatsslot(&mcv1_slot);
     527           0 :     free_attstatsslot(&mcv2_slot);
     528           0 :     free_attstatsslot(&hist1_slot);
     529           0 :     free_attstatsslot(&hist2_slot);
     530             : 
     531           0 :     return selec;
     532             : }
     533             : 
     534             : /*
     535             :  * Compute the fraction of a relation's population that is represented
     536             :  * by the MCV list.
     537             :  */
     538             : static Selectivity
     539           0 : mcv_population(float4 *mcv_numbers, int mcv_nvalues)
     540             : {
     541           0 :     Selectivity sumcommon = 0.0;
     542             :     int         i;
     543             : 
     544           0 :     for (i = 0; i < mcv_nvalues; i++)
     545             :     {
     546           0 :         sumcommon += mcv_numbers[i];
     547             :     }
     548             : 
     549           0 :     return sumcommon;
     550             : }
     551             : 
     552             : /*
     553             :  * Inet histogram vs single value selectivity estimation
     554             :  *
     555             :  * Estimate the fraction of the histogram population that satisfies
     556             :  * "value OPR CONST".  (The result needs to be scaled to reflect the
     557             :  * proportion of the total population represented by the histogram.)
     558             :  *
     559             :  * The histogram is originally for the inet btree comparison operators.
     560             :  * Only the common bits of the network part and the length of the network part
     561             :  * (masklen) are interesting for the subnet inclusion operators.  Fortunately,
     562             :  * btree comparison treats the network part as the major sort key.  Even so,
     563             :  * the length of the network part would not really be significant in the
     564             :  * histogram.  This would lead to big mistakes for data sets with uneven
     565             :  * masklen distribution.  To reduce this problem, comparisons with the left
     566             :  * and the right sides of the buckets are used together.
     567             :  *
     568             :  * Histogram bucket matches are calculated in two forms.  If the constant
     569             :  * matches both bucket endpoints the bucket is considered as fully matched.
     570             :  * The second form is to match the bucket partially; we recognize this when
     571             :  * the constant matches just one endpoint, or the two endpoints fall on
     572             :  * opposite sides of the constant.  (Note that when the constant matches an
     573             :  * interior histogram element, it gets credit for partial matches to the
     574             :  * buckets on both sides, while a match to a histogram endpoint gets credit
     575             :  * for only one partial match.  This is desirable.)
     576             :  *
     577             :  * The divider in the partial bucket match is imagined as the distance
     578             :  * between the decisive bits and the common bits of the addresses.  It will
     579             :  * be used as a power of two as it is the natural scale for the IP network
     580             :  * inclusion.  This partial bucket match divider calculation is an empirical
     581             :  * formula and subject to change with more experiment.
     582             :  *
     583             :  * For a partial match, we try to calculate dividers for both of the
     584             :  * boundaries.  If the address family of a boundary value does not match the
     585             :  * constant or comparison of the length of the network parts is not correct
     586             :  * for the operator, the divider for that boundary will not be taken into
     587             :  * account.  If both of the dividers are valid, the greater one will be used
     588             :  * to minimize the mistake in buckets that have disparate masklens.  This
     589             :  * calculation is unfair when dividers can be calculated for both of the
     590             :  * boundaries but they are far from each other; but it is not a common
     591             :  * situation as the boundaries are expected to share most of their significant
     592             :  * bits of their masklens.  The mistake would be greater, if we would use the
     593             :  * minimum instead of the maximum, and we don't know a sensible way to combine
     594             :  * them.
     595             :  *
     596             :  * For partial match in buckets that have different address families on the
     597             :  * left and right sides, only the boundary with the same address family is
     598             :  * taken into consideration.  This can cause more mistakes for these buckets
     599             :  * if the masklens of their boundaries are also disparate.  But this can only
     600             :  * happen in one bucket, since only two address families exist.  It seems a
     601             :  * better option than not considering these buckets at all.
     602             :  */
     603             : static Selectivity
     604           0 : inet_hist_value_sel(Datum *values, int nvalues, Datum constvalue,
     605             :                     int opr_codenum)
     606             : {
     607           0 :     Selectivity match = 0.0;
     608             :     inet       *query,
     609             :                *left,
     610             :                *right;
     611             :     int         i,
     612             :                 k,
     613             :                 n;
     614             :     int         left_order,
     615             :                 right_order,
     616             :                 left_divider,
     617             :                 right_divider;
     618             : 
     619             :     /* guard against zero-divide below */
     620           0 :     if (nvalues <= 1)
     621           0 :         return 0.0;
     622             : 
     623             :     /* if there are too many histogram elements, decimate to limit runtime */
     624           0 :     k = (nvalues - 2) / MAX_CONSIDERED_ELEMS + 1;
     625             : 
     626           0 :     query = DatumGetInetPP(constvalue);
     627             : 
     628             :     /* "left" is the left boundary value of the current bucket ... */
     629           0 :     left = DatumGetInetPP(values[0]);
     630           0 :     left_order = inet_inclusion_cmp(left, query, opr_codenum);
     631             : 
     632           0 :     n = 0;
     633           0 :     for (i = k; i < nvalues; i += k)
     634             :     {
     635             :         /* ... and "right" is the right boundary value */
     636           0 :         right = DatumGetInetPP(values[i]);
     637           0 :         right_order = inet_inclusion_cmp(right, query, opr_codenum);
     638             : 
     639           0 :         if (left_order == 0 && right_order == 0)
     640             :         {
     641             :             /* The whole bucket matches, since both endpoints do. */
     642           0 :             match += 1.0;
     643             :         }
     644           0 :         else if ((left_order <= 0 && right_order >= 0) ||
     645           0 :                  (left_order >= 0 && right_order <= 0))
     646             :         {
     647             :             /* Partial bucket match. */
     648           0 :             left_divider = inet_hist_match_divider(left, query, opr_codenum);
     649           0 :             right_divider = inet_hist_match_divider(right, query, opr_codenum);
     650             : 
     651           0 :             if (left_divider >= 0 || right_divider >= 0)
     652           0 :                 match += 1.0 / pow(2.0, Max(left_divider, right_divider));
     653             :         }
     654             : 
     655             :         /* Shift the variables. */
     656           0 :         left = right;
     657           0 :         left_order = right_order;
     658             : 
     659             :         /* Count the number of buckets considered. */
     660           0 :         n++;
     661             :     }
     662             : 
     663           0 :     return match / n;
     664             : }
     665             : 
     666             : /*
     667             :  * Inet MCV vs MCV join selectivity estimation
     668             :  *
     669             :  * We simply add up the fractions of the populations that satisfy the clause.
     670             :  * The result is exact and does not need to be scaled further.
     671             :  */
     672             : static Selectivity
     673           0 : inet_mcv_join_sel(Datum *mcv1_values, float4 *mcv1_numbers, int mcv1_nvalues,
     674             :                   Datum *mcv2_values, float4 *mcv2_numbers, int mcv2_nvalues,
     675             :                   Oid operator)
     676             : {
     677           0 :     Selectivity selec = 0.0;
     678             :     FmgrInfo    proc;
     679             :     int         i,
     680             :                 j;
     681             : 
     682           0 :     fmgr_info(get_opcode(operator), &proc);
     683             : 
     684           0 :     for (i = 0; i < mcv1_nvalues; i++)
     685             :     {
     686           0 :         for (j = 0; j < mcv2_nvalues; j++)
     687           0 :             if (DatumGetBool(FunctionCall2(&proc,
     688             :                                            mcv1_values[i],
     689             :                                            mcv2_values[j])))
     690           0 :                 selec += mcv1_numbers[i] * mcv2_numbers[j];
     691             :     }
     692           0 :     return selec;
     693             : }
     694             : 
     695             : /*
     696             :  * Inet MCV vs histogram join selectivity estimation
     697             :  *
     698             :  * For each MCV on the lefthand side, estimate the fraction of the righthand's
     699             :  * histogram population that satisfies the join clause, and add those up,
     700             :  * scaling by the MCV's frequency.  The result still needs to be scaled
     701             :  * according to the fraction of the righthand's population represented by
     702             :  * the histogram.
     703             :  */
     704             : static Selectivity
     705           0 : inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues,
     706             :                   Datum *hist_values, int hist_nvalues,
     707             :                   int opr_codenum)
     708             : {
     709           0 :     Selectivity selec = 0.0;
     710             :     int         i;
     711             : 
     712             :     /*
     713             :      * We'll call inet_hist_value_selec with the histogram on the left, so we
     714             :      * must commute the operator.
     715             :      */
     716           0 :     opr_codenum = -opr_codenum;
     717             : 
     718           0 :     for (i = 0; i < mcv_nvalues; i++)
     719             :     {
     720           0 :         selec += mcv_numbers[i] *
     721           0 :             inet_hist_value_sel(hist_values, hist_nvalues, mcv_values[i],
     722             :                                 opr_codenum);
     723             :     }
     724           0 :     return selec;
     725             : }
     726             : 
     727             : /*
     728             :  * Inet histogram vs histogram join selectivity estimation
     729             :  *
     730             :  * Here, we take all values listed in the second histogram (except for the
     731             :  * first and last elements, which are excluded on the grounds of possibly
     732             :  * not being very representative) and treat them as a uniform sample of
     733             :  * the non-MCV population for that relation.  For each one, we apply
     734             :  * inet_hist_value_selec to see what fraction of the first histogram
     735             :  * it matches.
     736             :  *
     737             :  * We could alternatively do this the other way around using the operator's
     738             :  * commutator.  XXX would it be worthwhile to do it both ways and take the
     739             :  * average?  That would at least avoid non-commutative estimation results.
     740             :  */
     741             : static Selectivity
     742           0 : inet_hist_inclusion_join_sel(Datum *hist1_values, int hist1_nvalues,
     743             :                              Datum *hist2_values, int hist2_nvalues,
     744             :                              int opr_codenum)
     745             : {
     746           0 :     double      match = 0.0;
     747             :     int         i,
     748             :                 k,
     749             :                 n;
     750             : 
     751           0 :     if (hist2_nvalues <= 2)
     752           0 :         return 0.0;             /* no interior histogram elements */
     753             : 
     754             :     /* if there are too many histogram elements, decimate to limit runtime */
     755           0 :     k = (hist2_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
     756             : 
     757           0 :     n = 0;
     758           0 :     for (i = 1; i < hist2_nvalues - 1; i += k)
     759             :     {
     760           0 :         match += inet_hist_value_sel(hist1_values, hist1_nvalues,
     761           0 :                                      hist2_values[i], opr_codenum);
     762           0 :         n++;
     763             :     }
     764             : 
     765           0 :     return match / n;
     766             : }
     767             : 
     768             : /*
     769             :  * Inet semi join selectivity estimation for one value
     770             :  *
     771             :  * The function calculates the probability that there is at least one row
     772             :  * in the RHS table that satisfies the "lhs_value op column" condition.
     773             :  * It is used in semi join estimation to check a sample from the left hand
     774             :  * side table.
     775             :  *
     776             :  * The MCV and histogram from the right hand side table should be provided as
     777             :  * arguments with the lhs_value from the left hand side table for the join.
     778             :  * hist_weight is the total number of rows represented by the histogram.
     779             :  * For example, if the table has 1000 rows, and 10% of the rows are in the MCV
     780             :  * list, and another 10% are NULLs, hist_weight would be 800.
     781             :  *
     782             :  * First, the lhs_value will be matched to the most common values.  If it
     783             :  * matches any of them, 1.0 will be returned, because then there is surely
     784             :  * a match.
     785             :  *
     786             :  * Otherwise, the histogram will be used to estimate the number of rows in
     787             :  * the second table that match the condition.  If the estimate is greater
     788             :  * than 1.0, 1.0 will be returned, because it means there is a greater chance
     789             :  * that the lhs_value will match more than one row in the table.  If it is
     790             :  * between 0.0 and 1.0, it will be returned as the probability.
     791             :  */
     792             : static Selectivity
     793           0 : inet_semi_join_sel(Datum lhs_value,
     794             :                    bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
     795             :                    bool hist_exists, Datum *hist_values, int hist_nvalues,
     796             :                    double hist_weight,
     797             :                    FmgrInfo *proc, int opr_codenum)
     798             : {
     799           0 :     if (mcv_exists)
     800             :     {
     801             :         int         i;
     802             : 
     803           0 :         for (i = 0; i < mcv_nvalues; i++)
     804             :         {
     805           0 :             if (DatumGetBool(FunctionCall2(proc,
     806             :                                            lhs_value,
     807             :                                            mcv_values[i])))
     808           0 :                 return 1.0;
     809             :         }
     810             :     }
     811             : 
     812           0 :     if (hist_exists && hist_weight > 0)
     813             :     {
     814             :         Selectivity hist_selec;
     815             : 
     816             :         /* Commute operator, since we're passing lhs_value on the right */
     817           0 :         hist_selec = inet_hist_value_sel(hist_values, hist_nvalues,
     818             :                                          lhs_value, -opr_codenum);
     819             : 
     820           0 :         if (hist_selec > 0)
     821           0 :             return Min(1.0, hist_weight * hist_selec);
     822             :     }
     823             : 
     824           0 :     return 0.0;
     825             : }
     826             : 
     827             : /*
     828             :  * Assign useful code numbers for the subnet inclusion/overlap operators
     829             :  *
     830             :  * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend
     831             :  * on the exact codes assigned here; but many other places in this file
     832             :  * know that they can negate a code to obtain the code for the commutator
     833             :  * operator.
     834             :  */
     835             : static int
     836           0 : inet_opr_codenum(Oid operator)
     837             : {
     838           0 :     switch (operator)
     839             :     {
     840           0 :         case OID_INET_SUP_OP:
     841           0 :             return -2;
     842           0 :         case OID_INET_SUPEQ_OP:
     843           0 :             return -1;
     844           0 :         case OID_INET_OVERLAP_OP:
     845           0 :             return 0;
     846           0 :         case OID_INET_SUBEQ_OP:
     847           0 :             return 1;
     848           0 :         case OID_INET_SUB_OP:
     849           0 :             return 2;
     850           0 :         default:
     851           0 :             elog(ERROR, "unrecognized operator %u for inet selectivity",
     852             :                  operator);
     853             :     }
     854             :     return 0;                   /* unreached, but keep compiler quiet */
     855             : }
     856             : 
     857             : /*
     858             :  * Comparison function for the subnet inclusion/overlap operators
     859             :  *
     860             :  * If the comparison is okay for the specified inclusion operator, the return
     861             :  * value will be 0.  Otherwise the return value will be less than or greater
     862             :  * than 0 as appropriate for the operator.
     863             :  *
     864             :  * Comparison is compatible with the basic comparison function for the inet
     865             :  * type.  See network_cmp_internal() in network.c for the original.  Basic
     866             :  * comparison operators are implemented with the network_cmp_internal()
     867             :  * function.  It is possible to implement the subnet inclusion operators with
     868             :  * this function.
     869             :  *
     870             :  * Comparison is first on the common bits of the network part, then on the
     871             :  * length of the network part (masklen) as in the network_cmp_internal()
     872             :  * function.  Only the first part is in this function.  The second part is
     873             :  * separated to another function for reusability.  The difference between the
     874             :  * second part and the original network_cmp_internal() is that the inclusion
     875             :  * operator is considered while comparing the lengths of the network parts.
     876             :  * See the inet_masklen_inclusion_cmp() function below.
     877             :  */
     878             : static int
     879           0 : inet_inclusion_cmp(inet *left, inet *right, int opr_codenum)
     880             : {
     881           0 :     if (ip_family(left) == ip_family(right))
     882             :     {
     883             :         int         order;
     884             : 
     885           0 :         order = bitncmp(ip_addr(left), ip_addr(right),
     886           0 :                         Min(ip_bits(left), ip_bits(right)));
     887           0 :         if (order != 0)
     888           0 :             return order;
     889             : 
     890           0 :         return inet_masklen_inclusion_cmp(left, right, opr_codenum);
     891             :     }
     892             : 
     893           0 :     return ip_family(left) - ip_family(right);
     894             : }
     895             : 
     896             : /*
     897             :  * Masklen comparison function for the subnet inclusion/overlap operators
     898             :  *
     899             :  * Compares the lengths of the network parts of the inputs.  If the comparison
     900             :  * is okay for the specified inclusion operator, the return value will be 0.
     901             :  * Otherwise the return value will be less than or greater than 0 as
     902             :  * appropriate for the operator.
     903             :  */
     904             : static int
     905           0 : inet_masklen_inclusion_cmp(inet *left, inet *right, int opr_codenum)
     906             : {
     907             :     int         order;
     908             : 
     909           0 :     order = (int) ip_bits(left) - (int) ip_bits(right);
     910             : 
     911             :     /*
     912             :      * Return 0 if the operator would accept this combination of masklens.
     913             :      * Note that opr_codenum zero (overlaps) will accept all cases.
     914             :      */
     915           0 :     if ((order > 0 && opr_codenum >= 0) ||
     916           0 :         (order == 0 && opr_codenum >= -1 && opr_codenum <= 1) ||
     917           0 :         (order < 0 && opr_codenum <= 0))
     918           0 :         return 0;
     919             : 
     920             :     /*
     921             :      * Otherwise, return a negative value for sup/supeq (notionally, the RHS
     922             :      * needs to have a larger masklen than it has, which would make it sort
     923             :      * later), or a positive value for sub/subeq (vice versa).
     924             :      */
     925           0 :     return opr_codenum;
     926             : }
     927             : 
     928             : /*
     929             :  * Inet histogram partial match divider calculation
     930             :  *
     931             :  * First the families and the lengths of the network parts are compared using
     932             :  * the subnet inclusion operator.  If those are acceptable for the operator,
     933             :  * the divider will be calculated using the masklens and the common bits of
     934             :  * the addresses.  -1 will be returned if it cannot be calculated.
     935             :  *
     936             :  * See commentary for inet_hist_value_sel() for some rationale for this.
     937             :  */
     938             : static int
     939           0 : inet_hist_match_divider(inet *boundary, inet *query, int opr_codenum)
     940             : {
     941           0 :     if (ip_family(boundary) == ip_family(query) &&
     942           0 :         inet_masklen_inclusion_cmp(boundary, query, opr_codenum) == 0)
     943             :     {
     944             :         int         min_bits,
     945             :                     decisive_bits;
     946             : 
     947           0 :         min_bits = Min(ip_bits(boundary), ip_bits(query));
     948             : 
     949             :         /*
     950             :          * Set decisive_bits to the masklen of the one that should contain the
     951             :          * other according to the operator.
     952             :          */
     953           0 :         if (opr_codenum < 0)
     954           0 :             decisive_bits = ip_bits(boundary);
     955           0 :         else if (opr_codenum > 0)
     956           0 :             decisive_bits = ip_bits(query);
     957             :         else
     958           0 :             decisive_bits = min_bits;
     959             : 
     960             :         /*
     961             :          * Now return the number of non-common decisive bits.  (This will be
     962             :          * zero if the boundary and query in fact match, else positive.)
     963             :          */
     964           0 :         if (min_bits > 0)
     965           0 :             return decisive_bits - bitncommon(ip_addr(boundary),
     966           0 :                                               ip_addr(query),
     967             :                                               min_bits);
     968           0 :         return decisive_bits;
     969             :     }
     970             : 
     971           0 :     return -1;
     972             : }

Generated by: LCOV version 1.13