LCOV - code coverage report
Current view: top level - src/backend/utils/adt - network_selfuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta1 Lines: 12 261 4.6 %
Date: 2019-06-16 14:06:46 Functions: 1 14 7.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * network_selfuncs.c
       4             :  *    Functions for selectivity estimation of inet/cidr operators
       5             :  *
       6             :  * This module provides estimators for the subnet inclusion and overlap
       7             :  * operators.  Estimates are based on null fraction, most common values,
       8             :  * and histogram of inet/cidr columns.
       9             :  *
      10             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      11             :  * Portions Copyright (c) 1994, Regents of the University of California
      12             :  *
      13             :  *
      14             :  * IDENTIFICATION
      15             :  *    src/backend/utils/adt/network_selfuncs.c
      16             :  *
      17             :  *-------------------------------------------------------------------------
      18             :  */
      19             : #include "postgres.h"
      20             : 
      21             : #include <math.h>
      22             : 
      23             : #include "access/htup_details.h"
      24             : #include "catalog/pg_operator.h"
      25             : #include "catalog/pg_statistic.h"
      26             : #include "utils/builtins.h"
      27             : #include "utils/inet.h"
      28             : #include "utils/lsyscache.h"
      29             : #include "utils/selfuncs.h"
      30             : 
      31             : 
      32             : /* Default selectivity for the inet overlap operator */
      33             : #define DEFAULT_OVERLAP_SEL 0.01
      34             : 
      35             : /* Default selectivity for the various inclusion operators */
      36             : #define DEFAULT_INCLUSION_SEL 0.005
      37             : 
      38             : /* Default selectivity for specified operator */
      39             : #define DEFAULT_SEL(operator) \
      40             :     ((operator) == OID_INET_OVERLAP_OP ? \
      41             :      DEFAULT_OVERLAP_SEL : DEFAULT_INCLUSION_SEL)
      42             : 
      43             : /* Maximum number of items to consider in join selectivity calculations */
      44             : #define MAX_CONSIDERED_ELEMS 1024
      45             : 
      46             : static Selectivity networkjoinsel_inner(Oid operator,
      47             :                                         VariableStatData *vardata1, VariableStatData *vardata2);
      48             : static Selectivity networkjoinsel_semi(Oid operator,
      49             :                                        VariableStatData *vardata1, VariableStatData *vardata2);
      50             : static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues);
      51             : static Selectivity inet_hist_value_sel(Datum *values, int nvalues,
      52             :                                        Datum constvalue, int opr_codenum);
      53             : static Selectivity inet_mcv_join_sel(Datum *mcv1_values,
      54             :                                      float4 *mcv1_numbers, int mcv1_nvalues, Datum *mcv2_values,
      55             :                                      float4 *mcv2_numbers, int mcv2_nvalues, Oid operator);
      56             : static Selectivity inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers,
      57             :                                      int mcv_nvalues, Datum *hist_values, int hist_nvalues,
      58             :                                      int opr_codenum);
      59             : static Selectivity inet_hist_inclusion_join_sel(Datum *hist1_values,
      60             :                                                 int hist1_nvalues,
      61             :                                                 Datum *hist2_values, int hist2_nvalues,
      62             :                                                 int opr_codenum);
      63             : static Selectivity inet_semi_join_sel(Datum lhs_value,
      64             :                                       bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
      65             :                                       bool hist_exists, Datum *hist_values, int hist_nvalues,
      66             :                                       double hist_weight,
      67             :                                       FmgrInfo *proc, int opr_codenum);
      68             : static int  inet_opr_codenum(Oid operator);
      69             : static int  inet_inclusion_cmp(inet *left, inet *right, int opr_codenum);
      70             : static int  inet_masklen_inclusion_cmp(inet *left, inet *right,
      71             :                                        int opr_codenum);
      72             : static int  inet_hist_match_divider(inet *boundary, inet *query,
      73             :                                     int opr_codenum);
      74             : 
      75             : /*
      76             :  * Selectivity estimation for the subnet inclusion/overlap operators
      77             :  */
      78             : Datum
      79         600 : networksel(PG_FUNCTION_ARGS)
      80             : {
      81         600 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
      82         600 :     Oid         operator = PG_GETARG_OID(1);
      83         600 :     List       *args = (List *) PG_GETARG_POINTER(2);
      84         600 :     int         varRelid = PG_GETARG_INT32(3);
      85             :     VariableStatData vardata;
      86             :     Node       *other;
      87             :     bool        varonleft;
      88             :     Selectivity selec,
      89             :                 mcv_selec,
      90             :                 non_mcv_selec;
      91             :     Datum       constvalue;
      92             :     Form_pg_statistic stats;
      93             :     AttStatsSlot hslot;
      94             :     double      sumcommon,
      95             :                 nullfrac;
      96             :     FmgrInfo    proc;
      97             : 
      98             :     /*
      99             :      * If expression is not (variable op something) or (something op
     100             :      * variable), then punt and return a default estimate.
     101             :      */
     102         600 :     if (!get_restriction_variable(root, args, varRelid,
     103             :                                   &vardata, &other, &varonleft))
     104           0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     105             : 
     106             :     /*
     107             :      * Can't do anything useful if the something is not a constant, either.
     108             :      */
     109         600 :     if (!IsA(other, Const))
     110             :     {
     111           0 :         ReleaseVariableStats(vardata);
     112           0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     113             :     }
     114             : 
     115             :     /* All of the operators handled here are strict. */
     116         600 :     if (((Const *) other)->constisnull)
     117             :     {
     118           0 :         ReleaseVariableStats(vardata);
     119           0 :         PG_RETURN_FLOAT8(0.0);
     120             :     }
     121         600 :     constvalue = ((Const *) other)->constvalue;
     122             : 
     123             :     /* Otherwise, we need stats in order to produce a non-default estimate. */
     124         600 :     if (!HeapTupleIsValid(vardata.statsTuple))
     125             :     {
     126         600 :         ReleaseVariableStats(vardata);
     127         600 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     128             :     }
     129             : 
     130           0 :     stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     131           0 :     nullfrac = stats->stanullfrac;
     132             : 
     133             :     /*
     134             :      * If we have most-common-values info, add up the fractions of the MCV
     135             :      * entries that satisfy MCV OP CONST.  These fractions contribute directly
     136             :      * to the result selectivity.  Also add up the total fraction represented
     137             :      * by MCV entries.
     138             :      */
     139           0 :     fmgr_info(get_opcode(operator), &proc);
     140           0 :     mcv_selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft,
     141             :                                 &sumcommon);
     142             : 
     143             :     /*
     144             :      * If we have a histogram, use it to estimate the proportion of the
     145             :      * non-MCV population that satisfies the clause.  If we don't, apply the
     146             :      * default selectivity to that population.
     147             :      */
     148           0 :     if (get_attstatsslot(&hslot, vardata.statsTuple,
     149             :                          STATISTIC_KIND_HISTOGRAM, InvalidOid,
     150             :                          ATTSTATSSLOT_VALUES))
     151             :     {
     152           0 :         int         opr_codenum = inet_opr_codenum(operator);
     153             : 
     154             :         /* Commute if needed, so we can consider histogram to be on the left */
     155           0 :         if (!varonleft)
     156           0 :             opr_codenum = -opr_codenum;
     157           0 :         non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues,
     158             :                                             constvalue, opr_codenum);
     159             : 
     160           0 :         free_attstatsslot(&hslot);
     161             :     }
     162             :     else
     163           0 :         non_mcv_selec = DEFAULT_SEL(operator);
     164             : 
     165             :     /* Combine selectivities for MCV and non-MCV populations */
     166           0 :     selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec;
     167             : 
     168             :     /* Result should be in range, but make sure... */
     169           0 :     CLAMP_PROBABILITY(selec);
     170             : 
     171           0 :     ReleaseVariableStats(vardata);
     172             : 
     173           0 :     PG_RETURN_FLOAT8(selec);
     174             : }
     175             : 
     176             : /*
     177             :  * Join selectivity estimation for the subnet inclusion/overlap operators
     178             :  *
     179             :  * This function has the same structure as eqjoinsel() in selfuncs.c.
     180             :  *
     181             :  * Throughout networkjoinsel and its subroutines, we have a performance issue
     182             :  * in that the amount of work to be done is O(N^2) in the length of the MCV
     183             :  * and histogram arrays.  To keep the runtime from getting out of hand when
     184             :  * large statistics targets have been set, we arbitrarily limit the number of
     185             :  * values considered to 1024 (MAX_CONSIDERED_ELEMS).  For the MCV arrays, this
     186             :  * is easy: just consider at most the first N elements.  (Since the MCVs are
     187             :  * sorted by decreasing frequency, this correctly gets us the first N MCVs.)
     188             :  * For the histogram arrays, we decimate; that is consider only every k'th
     189             :  * element, where k is chosen so that no more than MAX_CONSIDERED_ELEMS
     190             :  * elements are considered.  This should still give us a good random sample of
     191             :  * the non-MCV population.  Decimation is done on-the-fly in the loops that
     192             :  * iterate over the histogram arrays.
     193             :  */
     194             : Datum
     195           0 : networkjoinsel(PG_FUNCTION_ARGS)
     196             : {
     197           0 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     198           0 :     Oid         operator = PG_GETARG_OID(1);
     199           0 :     List       *args = (List *) PG_GETARG_POINTER(2);
     200             : #ifdef NOT_USED
     201             :     JoinType    jointype = (JoinType) PG_GETARG_INT16(3);
     202             : #endif
     203           0 :     SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
     204             :     double      selec;
     205             :     VariableStatData vardata1;
     206             :     VariableStatData vardata2;
     207             :     bool        join_is_reversed;
     208             : 
     209           0 :     get_join_variables(root, args, sjinfo,
     210             :                        &vardata1, &vardata2, &join_is_reversed);
     211             : 
     212           0 :     switch (sjinfo->jointype)
     213             :     {
     214             :         case JOIN_INNER:
     215             :         case JOIN_LEFT:
     216             :         case JOIN_FULL:
     217             : 
     218             :             /*
     219             :              * Selectivity for left/full join is not exactly the same as inner
     220             :              * join, but we neglect the difference, as eqjoinsel does.
     221             :              */
     222           0 :             selec = networkjoinsel_inner(operator, &vardata1, &vardata2);
     223           0 :             break;
     224             :         case JOIN_SEMI:
     225             :         case JOIN_ANTI:
     226             :             /* Here, it's important that we pass the outer var on the left. */
     227           0 :             if (!join_is_reversed)
     228           0 :                 selec = networkjoinsel_semi(operator, &vardata1, &vardata2);
     229             :             else
     230           0 :                 selec = networkjoinsel_semi(get_commutator(operator),
     231             :                                             &vardata2, &vardata1);
     232           0 :             break;
     233             :         default:
     234             :             /* other values not expected here */
     235           0 :             elog(ERROR, "unrecognized join type: %d",
     236             :                  (int) sjinfo->jointype);
     237             :             selec = 0;          /* keep compiler quiet */
     238             :             break;
     239             :     }
     240             : 
     241           0 :     ReleaseVariableStats(vardata1);
     242           0 :     ReleaseVariableStats(vardata2);
     243             : 
     244           0 :     CLAMP_PROBABILITY(selec);
     245             : 
     246           0 :     PG_RETURN_FLOAT8((float8) selec);
     247             : }
     248             : 
     249             : /*
     250             :  * Inner join selectivity estimation for subnet inclusion/overlap operators
     251             :  *
     252             :  * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram
     253             :  * selectivity for join using the subnet inclusion operators.  Unlike the
     254             :  * join selectivity function for the equality operator, eqjoinsel_inner(),
     255             :  * one to one matching of the values is not enough.  Network inclusion
     256             :  * operators are likely to match many to many, so we must check all pairs.
     257             :  * (Note: it might be possible to exploit understanding of the histogram's
     258             :  * btree ordering to reduce the work needed, but we don't currently try.)
     259             :  * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
     260             :  */
     261             : static Selectivity
     262           0 : networkjoinsel_inner(Oid operator,
     263             :                      VariableStatData *vardata1, VariableStatData *vardata2)
     264             : {
     265             :     Form_pg_statistic stats;
     266           0 :     double      nullfrac1 = 0.0,
     267           0 :                 nullfrac2 = 0.0;
     268           0 :     Selectivity selec = 0.0,
     269           0 :                 sumcommon1 = 0.0,
     270           0 :                 sumcommon2 = 0.0;
     271           0 :     bool        mcv1_exists = false,
     272           0 :                 mcv2_exists = false,
     273           0 :                 hist1_exists = false,
     274           0 :                 hist2_exists = false;
     275             :     int         opr_codenum;
     276           0 :     int         mcv1_length = 0,
     277           0 :                 mcv2_length = 0;
     278             :     AttStatsSlot mcv1_slot;
     279             :     AttStatsSlot mcv2_slot;
     280             :     AttStatsSlot hist1_slot;
     281             :     AttStatsSlot hist2_slot;
     282             : 
     283           0 :     if (HeapTupleIsValid(vardata1->statsTuple))
     284             :     {
     285           0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
     286           0 :         nullfrac1 = stats->stanullfrac;
     287             : 
     288           0 :         mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
     289             :                                        STATISTIC_KIND_MCV, InvalidOid,
     290             :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     291           0 :         hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
     292             :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     293             :                                         ATTSTATSSLOT_VALUES);
     294             :         /* Arbitrarily limit number of MCVs considered */
     295           0 :         mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
     296           0 :         if (mcv1_exists)
     297           0 :             sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
     298             :     }
     299             :     else
     300             :     {
     301           0 :         memset(&mcv1_slot, 0, sizeof(mcv1_slot));
     302           0 :         memset(&hist1_slot, 0, sizeof(hist1_slot));
     303             :     }
     304             : 
     305           0 :     if (HeapTupleIsValid(vardata2->statsTuple))
     306             :     {
     307           0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
     308           0 :         nullfrac2 = stats->stanullfrac;
     309             : 
     310           0 :         mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
     311             :                                        STATISTIC_KIND_MCV, InvalidOid,
     312             :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     313           0 :         hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
     314             :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     315             :                                         ATTSTATSSLOT_VALUES);
     316             :         /* Arbitrarily limit number of MCVs considered */
     317           0 :         mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
     318           0 :         if (mcv2_exists)
     319           0 :             sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
     320             :     }
     321             :     else
     322             :     {
     323           0 :         memset(&mcv2_slot, 0, sizeof(mcv2_slot));
     324           0 :         memset(&hist2_slot, 0, sizeof(hist2_slot));
     325             :     }
     326             : 
     327           0 :     opr_codenum = inet_opr_codenum(operator);
     328             : 
     329             :     /*
     330             :      * Calculate selectivity for MCV vs MCV matches.
     331             :      */
     332           0 :     if (mcv1_exists && mcv2_exists)
     333           0 :         selec += inet_mcv_join_sel(mcv1_slot.values, mcv1_slot.numbers,
     334             :                                    mcv1_length,
     335             :                                    mcv2_slot.values, mcv2_slot.numbers,
     336             :                                    mcv2_length,
     337             :                                    operator);
     338             : 
     339             :     /*
     340             :      * Add in selectivities for MCV vs histogram matches, scaling according to
     341             :      * the fractions of the populations represented by the histograms. Note
     342             :      * that the second case needs to commute the operator.
     343             :      */
     344           0 :     if (mcv1_exists && hist2_exists)
     345           0 :         selec += (1.0 - nullfrac2 - sumcommon2) *
     346           0 :             inet_mcv_hist_sel(mcv1_slot.values, mcv1_slot.numbers, mcv1_length,
     347             :                               hist2_slot.values, hist2_slot.nvalues,
     348             :                               opr_codenum);
     349           0 :     if (mcv2_exists && hist1_exists)
     350           0 :         selec += (1.0 - nullfrac1 - sumcommon1) *
     351           0 :             inet_mcv_hist_sel(mcv2_slot.values, mcv2_slot.numbers, mcv2_length,
     352             :                               hist1_slot.values, hist1_slot.nvalues,
     353             :                               -opr_codenum);
     354             : 
     355             :     /*
     356             :      * Add in selectivity for histogram vs histogram matches, again scaling
     357             :      * appropriately.
     358             :      */
     359           0 :     if (hist1_exists && hist2_exists)
     360           0 :         selec += (1.0 - nullfrac1 - sumcommon1) *
     361           0 :             (1.0 - nullfrac2 - sumcommon2) *
     362           0 :             inet_hist_inclusion_join_sel(hist1_slot.values, hist1_slot.nvalues,
     363             :                                          hist2_slot.values, hist2_slot.nvalues,
     364             :                                          opr_codenum);
     365             : 
     366             :     /*
     367             :      * If useful statistics are not available then use the default estimate.
     368             :      * We can apply null fractions if known, though.
     369             :      */
     370           0 :     if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
     371           0 :         selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
     372             : 
     373             :     /* Release stats. */
     374           0 :     free_attstatsslot(&mcv1_slot);
     375           0 :     free_attstatsslot(&mcv2_slot);
     376           0 :     free_attstatsslot(&hist1_slot);
     377           0 :     free_attstatsslot(&hist2_slot);
     378             : 
     379           0 :     return selec;
     380             : }
     381             : 
     382             : /*
     383             :  * Semi join selectivity estimation for subnet inclusion/overlap operators
     384             :  *
     385             :  * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs
     386             :  * histogram selectivity for semi/anti join cases.
     387             :  */
     388             : static Selectivity
     389           0 : networkjoinsel_semi(Oid operator,
     390             :                     VariableStatData *vardata1, VariableStatData *vardata2)
     391             : {
     392             :     Form_pg_statistic stats;
     393           0 :     Selectivity selec = 0.0,
     394           0 :                 sumcommon1 = 0.0,
     395           0 :                 sumcommon2 = 0.0;
     396           0 :     double      nullfrac1 = 0.0,
     397           0 :                 nullfrac2 = 0.0,
     398           0 :                 hist2_weight = 0.0;
     399           0 :     bool        mcv1_exists = false,
     400           0 :                 mcv2_exists = false,
     401           0 :                 hist1_exists = false,
     402           0 :                 hist2_exists = false;
     403             :     int         opr_codenum;
     404             :     FmgrInfo    proc;
     405             :     int         i,
     406           0 :                 mcv1_length = 0,
     407           0 :                 mcv2_length = 0;
     408             :     AttStatsSlot mcv1_slot;
     409             :     AttStatsSlot mcv2_slot;
     410             :     AttStatsSlot hist1_slot;
     411             :     AttStatsSlot hist2_slot;
     412             : 
     413           0 :     if (HeapTupleIsValid(vardata1->statsTuple))
     414             :     {
     415           0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
     416           0 :         nullfrac1 = stats->stanullfrac;
     417             : 
     418           0 :         mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
     419             :                                        STATISTIC_KIND_MCV, InvalidOid,
     420             :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     421           0 :         hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
     422             :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     423             :                                         ATTSTATSSLOT_VALUES);
     424             :         /* Arbitrarily limit number of MCVs considered */
     425           0 :         mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
     426           0 :         if (mcv1_exists)
     427           0 :             sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
     428             :     }
     429             :     else
     430             :     {
     431           0 :         memset(&mcv1_slot, 0, sizeof(mcv1_slot));
     432           0 :         memset(&hist1_slot, 0, sizeof(hist1_slot));
     433             :     }
     434             : 
     435           0 :     if (HeapTupleIsValid(vardata2->statsTuple))
     436             :     {
     437           0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
     438           0 :         nullfrac2 = stats->stanullfrac;
     439             : 
     440           0 :         mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
     441             :                                        STATISTIC_KIND_MCV, InvalidOid,
     442             :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     443           0 :         hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
     444             :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     445             :                                         ATTSTATSSLOT_VALUES);
     446             :         /* Arbitrarily limit number of MCVs considered */
     447           0 :         mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
     448           0 :         if (mcv2_exists)
     449           0 :             sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
     450             :     }
     451             :     else
     452             :     {
     453           0 :         memset(&mcv2_slot, 0, sizeof(mcv2_slot));
     454           0 :         memset(&hist2_slot, 0, sizeof(hist2_slot));
     455             :     }
     456             : 
     457           0 :     opr_codenum = inet_opr_codenum(operator);
     458           0 :     fmgr_info(get_opcode(operator), &proc);
     459             : 
     460             :     /* Estimate number of input rows represented by RHS histogram. */
     461           0 :     if (hist2_exists && vardata2->rel)
     462           0 :         hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows;
     463             : 
     464             :     /*
     465             :      * Consider each element of the LHS MCV list, matching it to whatever RHS
     466             :      * stats we have.  Scale according to the known frequency of the MCV.
     467             :      */
     468           0 :     if (mcv1_exists && (mcv2_exists || hist2_exists))
     469             :     {
     470           0 :         for (i = 0; i < mcv1_length; i++)
     471             :         {
     472           0 :             selec += mcv1_slot.numbers[i] *
     473           0 :                 inet_semi_join_sel(mcv1_slot.values[i],
     474             :                                    mcv2_exists, mcv2_slot.values, mcv2_length,
     475             :                                    hist2_exists,
     476             :                                    hist2_slot.values, hist2_slot.nvalues,
     477             :                                    hist2_weight,
     478             :                                    &proc, opr_codenum);
     479             :         }
     480             :     }
     481             : 
     482             :     /*
     483             :      * Consider each element of the LHS histogram, except for the first and
     484             :      * last elements, which we exclude on the grounds that they're outliers
     485             :      * and thus not very representative.  Scale on the assumption that each
     486             :      * such histogram element represents an equal share of the LHS histogram
     487             :      * population (which is a bit bogus, because the members of its bucket may
     488             :      * not all act the same with respect to the join clause, but it's hard to
     489             :      * do better).
     490             :      *
     491             :      * If there are too many histogram elements, decimate to limit runtime.
     492             :      */
     493           0 :     if (hist1_exists && hist1_slot.nvalues > 2 && (mcv2_exists || hist2_exists))
     494             :     {
     495           0 :         double      hist_selec_sum = 0.0;
     496             :         int         k,
     497             :                     n;
     498             : 
     499           0 :         k = (hist1_slot.nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
     500             : 
     501           0 :         n = 0;
     502           0 :         for (i = 1; i < hist1_slot.nvalues - 1; i += k)
     503             :         {
     504           0 :             hist_selec_sum +=
     505           0 :                 inet_semi_join_sel(hist1_slot.values[i],
     506             :                                    mcv2_exists, mcv2_slot.values, mcv2_length,
     507             :                                    hist2_exists,
     508             :                                    hist2_slot.values, hist2_slot.nvalues,
     509             :                                    hist2_weight,
     510             :                                    &proc, opr_codenum);
     511           0 :             n++;
     512             :         }
     513             : 
     514           0 :         selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n;
     515             :     }
     516             : 
     517             :     /*
     518             :      * If useful statistics are not available then use the default estimate.
     519             :      * We can apply null fractions if known, though.
     520             :      */
     521           0 :     if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
     522           0 :         selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
     523             : 
     524             :     /* Release stats. */
     525           0 :     free_attstatsslot(&mcv1_slot);
     526           0 :     free_attstatsslot(&mcv2_slot);
     527           0 :     free_attstatsslot(&hist1_slot);
     528           0 :     free_attstatsslot(&hist2_slot);
     529             : 
     530           0 :     return selec;
     531             : }
     532             : 
     533             : /*
     534             :  * Compute the fraction of a relation's population that is represented
     535             :  * by the MCV list.
     536             :  */
     537             : static Selectivity
     538           0 : mcv_population(float4 *mcv_numbers, int mcv_nvalues)
     539             : {
     540           0 :     Selectivity sumcommon = 0.0;
     541             :     int         i;
     542             : 
     543           0 :     for (i = 0; i < mcv_nvalues; i++)
     544             :     {
     545           0 :         sumcommon += mcv_numbers[i];
     546             :     }
     547             : 
     548           0 :     return sumcommon;
     549             : }
     550             : 
     551             : /*
     552             :  * Inet histogram vs single value selectivity estimation
     553             :  *
     554             :  * Estimate the fraction of the histogram population that satisfies
     555             :  * "value OPR CONST".  (The result needs to be scaled to reflect the
     556             :  * proportion of the total population represented by the histogram.)
     557             :  *
     558             :  * The histogram is originally for the inet btree comparison operators.
     559             :  * Only the common bits of the network part and the length of the network part
     560             :  * (masklen) are interesting for the subnet inclusion operators.  Fortunately,
     561             :  * btree comparison treats the network part as the major sort key.  Even so,
     562             :  * the length of the network part would not really be significant in the
     563             :  * histogram.  This would lead to big mistakes for data sets with uneven
     564             :  * masklen distribution.  To reduce this problem, comparisons with the left
     565             :  * and the right sides of the buckets are used together.
     566             :  *
     567             :  * Histogram bucket matches are calculated in two forms.  If the constant
     568             :  * matches both bucket endpoints the bucket is considered as fully matched.
     569             :  * The second form is to match the bucket partially; we recognize this when
     570             :  * the constant matches just one endpoint, or the two endpoints fall on
     571             :  * opposite sides of the constant.  (Note that when the constant matches an
     572             :  * interior histogram element, it gets credit for partial matches to the
     573             :  * buckets on both sides, while a match to a histogram endpoint gets credit
     574             :  * for only one partial match.  This is desirable.)
     575             :  *
     576             :  * The divider in the partial bucket match is imagined as the distance
     577             :  * between the decisive bits and the common bits of the addresses.  It will
     578             :  * be used as a power of two as it is the natural scale for the IP network
     579             :  * inclusion.  This partial bucket match divider calculation is an empirical
     580             :  * formula and subject to change with more experiment.
     581             :  *
     582             :  * For a partial match, we try to calculate dividers for both of the
     583             :  * boundaries.  If the address family of a boundary value does not match the
     584             :  * constant or comparison of the length of the network parts is not correct
     585             :  * for the operator, the divider for that boundary will not be taken into
     586             :  * account.  If both of the dividers are valid, the greater one will be used
     587             :  * to minimize the mistake in buckets that have disparate masklens.  This
     588             :  * calculation is unfair when dividers can be calculated for both of the
     589             :  * boundaries but they are far from each other; but it is not a common
     590             :  * situation as the boundaries are expected to share most of their significant
     591             :  * bits of their masklens.  The mistake would be greater, if we would use the
     592             :  * minimum instead of the maximum, and we don't know a sensible way to combine
     593             :  * them.
     594             :  *
     595             :  * For partial match in buckets that have different address families on the
     596             :  * left and right sides, only the boundary with the same address family is
     597             :  * taken into consideration.  This can cause more mistakes for these buckets
     598             :  * if the masklens of their boundaries are also disparate.  But this can only
     599             :  * happen in one bucket, since only two address families exist.  It seems a
     600             :  * better option than not considering these buckets at all.
     601             :  */
     602             : static Selectivity
     603           0 : inet_hist_value_sel(Datum *values, int nvalues, Datum constvalue,
     604             :                     int opr_codenum)
     605             : {
     606           0 :     Selectivity match = 0.0;
     607             :     inet       *query,
     608             :                *left,
     609             :                *right;
     610             :     int         i,
     611             :                 k,
     612             :                 n;
     613             :     int         left_order,
     614             :                 right_order,
     615             :                 left_divider,
     616             :                 right_divider;
     617             : 
     618             :     /* guard against zero-divide below */
     619           0 :     if (nvalues <= 1)
     620           0 :         return 0.0;
     621             : 
     622             :     /* if there are too many histogram elements, decimate to limit runtime */
     623           0 :     k = (nvalues - 2) / MAX_CONSIDERED_ELEMS + 1;
     624             : 
     625           0 :     query = DatumGetInetPP(constvalue);
     626             : 
     627             :     /* "left" is the left boundary value of the current bucket ... */
     628           0 :     left = DatumGetInetPP(values[0]);
     629           0 :     left_order = inet_inclusion_cmp(left, query, opr_codenum);
     630             : 
     631           0 :     n = 0;
     632           0 :     for (i = k; i < nvalues; i += k)
     633             :     {
     634             :         /* ... and "right" is the right boundary value */
     635           0 :         right = DatumGetInetPP(values[i]);
     636           0 :         right_order = inet_inclusion_cmp(right, query, opr_codenum);
     637             : 
     638           0 :         if (left_order == 0 && right_order == 0)
     639             :         {
     640             :             /* The whole bucket matches, since both endpoints do. */
     641           0 :             match += 1.0;
     642             :         }
     643           0 :         else if ((left_order <= 0 && right_order >= 0) ||
     644           0 :                  (left_order >= 0 && right_order <= 0))
     645             :         {
     646             :             /* Partial bucket match. */
     647           0 :             left_divider = inet_hist_match_divider(left, query, opr_codenum);
     648           0 :             right_divider = inet_hist_match_divider(right, query, opr_codenum);
     649             : 
     650           0 :             if (left_divider >= 0 || right_divider >= 0)
     651           0 :                 match += 1.0 / pow(2.0, Max(left_divider, right_divider));
     652             :         }
     653             : 
     654             :         /* Shift the variables. */
     655           0 :         left = right;
     656           0 :         left_order = right_order;
     657             : 
     658             :         /* Count the number of buckets considered. */
     659           0 :         n++;
     660             :     }
     661             : 
     662           0 :     return match / n;
     663             : }
     664             : 
     665             : /*
     666             :  * Inet MCV vs MCV join selectivity estimation
     667             :  *
     668             :  * We simply add up the fractions of the populations that satisfy the clause.
     669             :  * The result is exact and does not need to be scaled further.
     670             :  */
     671             : static Selectivity
     672           0 : inet_mcv_join_sel(Datum *mcv1_values, float4 *mcv1_numbers, int mcv1_nvalues,
     673             :                   Datum *mcv2_values, float4 *mcv2_numbers, int mcv2_nvalues,
     674             :                   Oid operator)
     675             : {
     676           0 :     Selectivity selec = 0.0;
     677             :     FmgrInfo    proc;
     678             :     int         i,
     679             :                 j;
     680             : 
     681           0 :     fmgr_info(get_opcode(operator), &proc);
     682             : 
     683           0 :     for (i = 0; i < mcv1_nvalues; i++)
     684             :     {
     685           0 :         for (j = 0; j < mcv2_nvalues; j++)
     686           0 :             if (DatumGetBool(FunctionCall2(&proc,
     687             :                                            mcv1_values[i],
     688             :                                            mcv2_values[j])))
     689           0 :                 selec += mcv1_numbers[i] * mcv2_numbers[j];
     690             :     }
     691           0 :     return selec;
     692             : }
     693             : 
     694             : /*
     695             :  * Inet MCV vs histogram join selectivity estimation
     696             :  *
     697             :  * For each MCV on the lefthand side, estimate the fraction of the righthand's
     698             :  * histogram population that satisfies the join clause, and add those up,
     699             :  * scaling by the MCV's frequency.  The result still needs to be scaled
     700             :  * according to the fraction of the righthand's population represented by
     701             :  * the histogram.
     702             :  */
     703             : static Selectivity
     704           0 : inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues,
     705             :                   Datum *hist_values, int hist_nvalues,
     706             :                   int opr_codenum)
     707             : {
     708           0 :     Selectivity selec = 0.0;
     709             :     int         i;
     710             : 
     711             :     /*
     712             :      * We'll call inet_hist_value_selec with the histogram on the left, so we
     713             :      * must commute the operator.
     714             :      */
     715           0 :     opr_codenum = -opr_codenum;
     716             : 
     717           0 :     for (i = 0; i < mcv_nvalues; i++)
     718             :     {
     719           0 :         selec += mcv_numbers[i] *
     720           0 :             inet_hist_value_sel(hist_values, hist_nvalues, mcv_values[i],
     721             :                                 opr_codenum);
     722             :     }
     723           0 :     return selec;
     724             : }
     725             : 
     726             : /*
     727             :  * Inet histogram vs histogram join selectivity estimation
     728             :  *
     729             :  * Here, we take all values listed in the second histogram (except for the
     730             :  * first and last elements, which are excluded on the grounds of possibly
     731             :  * not being very representative) and treat them as a uniform sample of
     732             :  * the non-MCV population for that relation.  For each one, we apply
     733             :  * inet_hist_value_selec to see what fraction of the first histogram
     734             :  * it matches.
     735             :  *
     736             :  * We could alternatively do this the other way around using the operator's
     737             :  * commutator.  XXX would it be worthwhile to do it both ways and take the
     738             :  * average?  That would at least avoid non-commutative estimation results.
     739             :  */
     740             : static Selectivity
     741           0 : inet_hist_inclusion_join_sel(Datum *hist1_values, int hist1_nvalues,
     742             :                              Datum *hist2_values, int hist2_nvalues,
     743             :                              int opr_codenum)
     744             : {
     745           0 :     double      match = 0.0;
     746             :     int         i,
     747             :                 k,
     748             :                 n;
     749             : 
     750           0 :     if (hist2_nvalues <= 2)
     751           0 :         return 0.0;             /* no interior histogram elements */
     752             : 
     753             :     /* if there are too many histogram elements, decimate to limit runtime */
     754           0 :     k = (hist2_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
     755             : 
     756           0 :     n = 0;
     757           0 :     for (i = 1; i < hist2_nvalues - 1; i += k)
     758             :     {
     759           0 :         match += inet_hist_value_sel(hist1_values, hist1_nvalues,
     760           0 :                                      hist2_values[i], opr_codenum);
     761           0 :         n++;
     762             :     }
     763             : 
     764           0 :     return match / n;
     765             : }
     766             : 
     767             : /*
     768             :  * Inet semi join selectivity estimation for one value
     769             :  *
     770             :  * The function calculates the probability that there is at least one row
     771             :  * in the RHS table that satisfies the "lhs_value op column" condition.
     772             :  * It is used in semi join estimation to check a sample from the left hand
     773             :  * side table.
     774             :  *
     775             :  * The MCV and histogram from the right hand side table should be provided as
     776             :  * arguments with the lhs_value from the left hand side table for the join.
     777             :  * hist_weight is the total number of rows represented by the histogram.
     778             :  * For example, if the table has 1000 rows, and 10% of the rows are in the MCV
     779             :  * list, and another 10% are NULLs, hist_weight would be 800.
     780             :  *
     781             :  * First, the lhs_value will be matched to the most common values.  If it
     782             :  * matches any of them, 1.0 will be returned, because then there is surely
     783             :  * a match.
     784             :  *
     785             :  * Otherwise, the histogram will be used to estimate the number of rows in
     786             :  * the second table that match the condition.  If the estimate is greater
     787             :  * than 1.0, 1.0 will be returned, because it means there is a greater chance
     788             :  * that the lhs_value will match more than one row in the table.  If it is
     789             :  * between 0.0 and 1.0, it will be returned as the probability.
     790             :  */
     791             : static Selectivity
     792           0 : inet_semi_join_sel(Datum lhs_value,
     793             :                    bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
     794             :                    bool hist_exists, Datum *hist_values, int hist_nvalues,
     795             :                    double hist_weight,
     796             :                    FmgrInfo *proc, int opr_codenum)
     797             : {
     798           0 :     if (mcv_exists)
     799             :     {
     800             :         int         i;
     801             : 
     802           0 :         for (i = 0; i < mcv_nvalues; i++)
     803             :         {
     804           0 :             if (DatumGetBool(FunctionCall2(proc,
     805             :                                            lhs_value,
     806             :                                            mcv_values[i])))
     807           0 :                 return 1.0;
     808             :         }
     809             :     }
     810             : 
     811           0 :     if (hist_exists && hist_weight > 0)
     812             :     {
     813             :         Selectivity hist_selec;
     814             : 
     815             :         /* Commute operator, since we're passing lhs_value on the right */
     816           0 :         hist_selec = inet_hist_value_sel(hist_values, hist_nvalues,
     817             :                                          lhs_value, -opr_codenum);
     818             : 
     819           0 :         if (hist_selec > 0)
     820           0 :             return Min(1.0, hist_weight * hist_selec);
     821             :     }
     822             : 
     823           0 :     return 0.0;
     824             : }
     825             : 
     826             : /*
     827             :  * Assign useful code numbers for the subnet inclusion/overlap operators
     828             :  *
     829             :  * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend
     830             :  * on the exact codes assigned here; but many other places in this file
     831             :  * know that they can negate a code to obtain the code for the commutator
     832             :  * operator.
     833             :  */
     834             : static int
     835           0 : inet_opr_codenum(Oid operator)
     836             : {
     837           0 :     switch (operator)
     838             :     {
     839             :         case OID_INET_SUP_OP:
     840           0 :             return -2;
     841             :         case OID_INET_SUPEQ_OP:
     842           0 :             return -1;
     843             :         case OID_INET_OVERLAP_OP:
     844           0 :             return 0;
     845             :         case OID_INET_SUBEQ_OP:
     846           0 :             return 1;
     847             :         case OID_INET_SUB_OP:
     848           0 :             return 2;
     849             :         default:
     850           0 :             elog(ERROR, "unrecognized operator %u for inet selectivity",
     851             :                  operator);
     852             :     }
     853             :     return 0;                   /* unreached, but keep compiler quiet */
     854             : }
     855             : 
     856             : /*
     857             :  * Comparison function for the subnet inclusion/overlap operators
     858             :  *
     859             :  * If the comparison is okay for the specified inclusion operator, the return
     860             :  * value will be 0.  Otherwise the return value will be less than or greater
     861             :  * than 0 as appropriate for the operator.
     862             :  *
     863             :  * Comparison is compatible with the basic comparison function for the inet
     864             :  * type.  See network_cmp_internal() in network.c for the original.  Basic
     865             :  * comparison operators are implemented with the network_cmp_internal()
     866             :  * function.  It is possible to implement the subnet inclusion operators with
     867             :  * this function.
     868             :  *
     869             :  * Comparison is first on the common bits of the network part, then on the
     870             :  * length of the network part (masklen) as in the network_cmp_internal()
     871             :  * function.  Only the first part is in this function.  The second part is
     872             :  * separated to another function for reusability.  The difference between the
     873             :  * second part and the original network_cmp_internal() is that the inclusion
     874             :  * operator is considered while comparing the lengths of the network parts.
     875             :  * See the inet_masklen_inclusion_cmp() function below.
     876             :  */
     877             : static int
     878           0 : inet_inclusion_cmp(inet *left, inet *right, int opr_codenum)
     879             : {
     880           0 :     if (ip_family(left) == ip_family(right))
     881             :     {
     882             :         int         order;
     883             : 
     884           0 :         order = bitncmp(ip_addr(left), ip_addr(right),
     885           0 :                         Min(ip_bits(left), ip_bits(right)));
     886           0 :         if (order != 0)
     887           0 :             return order;
     888             : 
     889           0 :         return inet_masklen_inclusion_cmp(left, right, opr_codenum);
     890             :     }
     891             : 
     892           0 :     return ip_family(left) - ip_family(right);
     893             : }
     894             : 
     895             : /*
     896             :  * Masklen comparison function for the subnet inclusion/overlap operators
     897             :  *
     898             :  * Compares the lengths of the network parts of the inputs.  If the comparison
     899             :  * is okay for the specified inclusion operator, the return value will be 0.
     900             :  * Otherwise the return value will be less than or greater than 0 as
     901             :  * appropriate for the operator.
     902             :  */
     903             : static int
     904           0 : inet_masklen_inclusion_cmp(inet *left, inet *right, int opr_codenum)
     905             : {
     906             :     int         order;
     907             : 
     908           0 :     order = (int) ip_bits(left) - (int) ip_bits(right);
     909             : 
     910             :     /*
     911             :      * Return 0 if the operator would accept this combination of masklens.
     912             :      * Note that opr_codenum zero (overlaps) will accept all cases.
     913             :      */
     914           0 :     if ((order > 0 && opr_codenum >= 0) ||
     915           0 :         (order == 0 && opr_codenum >= -1 && opr_codenum <= 1) ||
     916           0 :         (order < 0 && opr_codenum <= 0))
     917           0 :         return 0;
     918             : 
     919             :     /*
     920             :      * Otherwise, return a negative value for sup/supeq (notionally, the RHS
     921             :      * needs to have a larger masklen than it has, which would make it sort
     922             :      * later), or a positive value for sub/subeq (vice versa).
     923             :      */
     924           0 :     return opr_codenum;
     925             : }
     926             : 
     927             : /*
     928             :  * Inet histogram partial match divider calculation
     929             :  *
     930             :  * First the families and the lengths of the network parts are compared using
     931             :  * the subnet inclusion operator.  If those are acceptable for the operator,
     932             :  * the divider will be calculated using the masklens and the common bits of
     933             :  * the addresses.  -1 will be returned if it cannot be calculated.
     934             :  *
     935             :  * See commentary for inet_hist_value_sel() for some rationale for this.
     936             :  */
     937             : static int
     938           0 : inet_hist_match_divider(inet *boundary, inet *query, int opr_codenum)
     939             : {
     940           0 :     if (ip_family(boundary) == ip_family(query) &&
     941           0 :         inet_masklen_inclusion_cmp(boundary, query, opr_codenum) == 0)
     942             :     {
     943             :         int         min_bits,
     944             :                     decisive_bits;
     945             : 
     946           0 :         min_bits = Min(ip_bits(boundary), ip_bits(query));
     947             : 
     948             :         /*
     949             :          * Set decisive_bits to the masklen of the one that should contain the
     950             :          * other according to the operator.
     951             :          */
     952           0 :         if (opr_codenum < 0)
     953           0 :             decisive_bits = ip_bits(boundary);
     954           0 :         else if (opr_codenum > 0)
     955           0 :             decisive_bits = ip_bits(query);
     956             :         else
     957           0 :             decisive_bits = min_bits;
     958             : 
     959             :         /*
     960             :          * Now return the number of non-common decisive bits.  (This will be
     961             :          * zero if the boundary and query in fact match, else positive.)
     962             :          */
     963           0 :         if (min_bits > 0)
     964           0 :             return decisive_bits - bitncommon(ip_addr(boundary),
     965           0 :                                               ip_addr(query),
     966             :                                               min_bits);
     967           0 :         return decisive_bits;
     968             :     }
     969             : 
     970           0 :     return -1;
     971             : }

Generated by: LCOV version 1.13