LCOV - PostgreSQL 19devel - src/backend/utils/adt/network

LCOV - code coverage report

Current view:	top level - src/backend/utils/adt - network_selfuncs.c (source / functions)		Coverage	Total	Hit
Test:	PostgreSQL 19devel	Lines:	9.2 %	271	25
Test Date:	2026-02-17 17:20:33	Functions:	14.3 %	14	2
Legend:	Lines: hit not hit

            Line data    Source code

       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * network_selfuncs.c
       4              :  *    Functions for selectivity estimation of inet/cidr operators
       5              :  *
       6              :  * This module provides estimators for the subnet inclusion and overlap
       7              :  * operators.  Estimates are based on null fraction, most common values,
       8              :  * and histogram of inet/cidr columns.
       9              :  *
      10              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      11              :  * Portions Copyright (c) 1994, Regents of the University of California
      12              :  *
      13              :  *
      14              :  * IDENTIFICATION
      15              :  *    src/backend/utils/adt/network_selfuncs.c
      16              :  *
      17              :  *-------------------------------------------------------------------------
      18              :  */
      19              : #include "postgres.h"
      20              : 
      21              : #include <math.h>
      22              : 
      23              : #include "access/htup_details.h"
      24              : #include "catalog/pg_operator.h"
      25              : #include "catalog/pg_statistic.h"
      26              : #include "utils/fmgrprotos.h"
      27              : #include "utils/inet.h"
      28              : #include "utils/lsyscache.h"
      29              : #include "utils/selfuncs.h"
      30              : 
      31              : 
      32              : /* Default selectivity for the inet overlap operator */
      33              : #define DEFAULT_OVERLAP_SEL 0.01
      34              : 
      35              : /* Default selectivity for the various inclusion operators */
      36              : #define DEFAULT_INCLUSION_SEL 0.005
      37              : 
      38              : /* Default selectivity for specified operator */
      39              : #define DEFAULT_SEL(operator) \
      40              :     ((operator) == OID_INET_OVERLAP_OP ? \
      41              :      DEFAULT_OVERLAP_SEL : DEFAULT_INCLUSION_SEL)
      42              : 
      43              : /* Maximum number of items to consider in join selectivity calculations */
      44              : #define MAX_CONSIDERED_ELEMS 1024
      45              : 
      46              : static Selectivity networkjoinsel_inner(Oid operator, int opr_codenum,
      47              :                                         VariableStatData *vardata1, VariableStatData *vardata2);
      48              : static Selectivity networkjoinsel_semi(Oid operator, int opr_codenum,
      49              :                                        VariableStatData *vardata1, VariableStatData *vardata2);
      50              : static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues);
      51              : static Selectivity inet_hist_value_sel(const Datum *values, int nvalues,
      52              :                                        Datum constvalue, int opr_codenum);
      53              : static Selectivity inet_mcv_join_sel(Datum *mcv1_values,
      54              :                                      float4 *mcv1_numbers, int mcv1_nvalues, Datum *mcv2_values,
      55              :                                      float4 *mcv2_numbers, int mcv2_nvalues, Oid operator);
      56              : static Selectivity inet_mcv_hist_sel(const Datum *mcv_values, float4 *mcv_numbers,
      57              :                                      int mcv_nvalues, const Datum *hist_values, int hist_nvalues,
      58              :                                      int opr_codenum);
      59              : static Selectivity inet_hist_inclusion_join_sel(const Datum *hist1_values,
      60              :                                                 int hist1_nvalues,
      61              :                                                 const Datum *hist2_values, int hist2_nvalues,
      62              :                                                 int opr_codenum);
      63              : static Selectivity inet_semi_join_sel(Datum lhs_value,
      64              :                                       bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
      65              :                                       bool hist_exists, Datum *hist_values, int hist_nvalues,
      66              :                                       double hist_weight,
      67              :                                       FmgrInfo *proc, int opr_codenum);
      68              : static int  inet_opr_codenum(Oid operator);
      69              : static int  inet_inclusion_cmp(inet *left, inet *right, int opr_codenum);
      70              : static int  inet_masklen_inclusion_cmp(inet *left, inet *right,
      71              :                                        int opr_codenum);
      72              : static int  inet_hist_match_divider(inet *boundary, inet *query,
      73              :                                     int opr_codenum);
      74              : 
      75              : /*
      76              :  * Selectivity estimation for the subnet inclusion/overlap operators
      77              :  */
      78              : Datum
      79          450 : networksel(PG_FUNCTION_ARGS)
      80              : {
      81          450 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
      82          450 :     Oid         operator = PG_GETARG_OID(1);
      83          450 :     List       *args = (List *) PG_GETARG_POINTER(2);
      84          450 :     int         varRelid = PG_GETARG_INT32(3);
      85              :     int         opr_codenum;
      86              :     VariableStatData vardata;
      87              :     Node       *other;
      88              :     bool        varonleft;
      89              :     Selectivity selec,
      90              :                 mcv_selec,
      91              :                 non_mcv_selec;
      92              :     Datum       constvalue;
      93              :     Form_pg_statistic stats;
      94              :     AttStatsSlot hslot;
      95              :     double      sumcommon,
      96              :                 nullfrac;
      97              :     FmgrInfo    proc;
      98              : 
      99              :     /*
     100              :      * Before all else, verify that the operator is one of the ones supported
     101              :      * by this function, which in turn proves that the input datatypes are
     102              :      * what we expect.  Otherwise, attaching this selectivity function to some
     103              :      * unexpected operator could cause trouble.
     104              :      */
     105          450 :     opr_codenum = inet_opr_codenum(operator);
     106              : 
     107              :     /*
     108              :      * If expression is not (variable op something) or (something op
     109              :      * variable), then punt and return a default estimate.
     110              :      */
     111          450 :     if (!get_restriction_variable(root, args, varRelid,
     112              :                                   &vardata, &other, &varonleft))
     113            0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     114              : 
     115              :     /*
     116              :      * Can't do anything useful if the something is not a constant, either.
     117              :      */
     118          450 :     if (!IsA(other, Const))
     119              :     {
     120            0 :         ReleaseVariableStats(vardata);
     121            0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     122              :     }
     123              : 
     124              :     /* All of the operators handled here are strict. */
     125          450 :     if (((Const *) other)->constisnull)
     126              :     {
     127            0 :         ReleaseVariableStats(vardata);
     128            0 :         PG_RETURN_FLOAT8(0.0);
     129              :     }
     130          450 :     constvalue = ((Const *) other)->constvalue;
     131              : 
     132              :     /* Otherwise, we need stats in order to produce a non-default estimate. */
     133          450 :     if (!HeapTupleIsValid(vardata.statsTuple))
     134              :     {
     135          450 :         ReleaseVariableStats(vardata);
     136          450 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     137              :     }
     138              : 
     139            0 :     stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     140            0 :     nullfrac = stats->stanullfrac;
     141              : 
     142              :     /*
     143              :      * If we have most-common-values info, add up the fractions of the MCV
     144              :      * entries that satisfy MCV OP CONST.  These fractions contribute directly
     145              :      * to the result selectivity.  Also add up the total fraction represented
     146              :      * by MCV entries.
     147              :      */
     148            0 :     fmgr_info(get_opcode(operator), &proc);
     149            0 :     mcv_selec = mcv_selectivity(&vardata, &proc, InvalidOid,
     150              :                                 constvalue, varonleft,
     151              :                                 &sumcommon);
     152              : 
     153              :     /*
     154              :      * If we have a histogram, use it to estimate the proportion of the
     155              :      * non-MCV population that satisfies the clause.  If we don't, apply the
     156              :      * default selectivity to that population.
     157              :      */
     158            0 :     if (get_attstatsslot(&hslot, vardata.statsTuple,
     159              :                          STATISTIC_KIND_HISTOGRAM, InvalidOid,
     160              :                          ATTSTATSSLOT_VALUES))
     161              :     {
     162              :         int         h_codenum;
     163              : 
     164              :         /* Commute if needed, so we can consider histogram to be on the left */
     165            0 :         h_codenum = varonleft ? opr_codenum : -opr_codenum;
     166            0 :         non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues,
     167              :                                             constvalue, h_codenum);
     168              : 
     169            0 :         free_attstatsslot(&hslot);
     170              :     }
     171              :     else
     172            0 :         non_mcv_selec = DEFAULT_SEL(operator);
     173              : 
     174              :     /* Combine selectivities for MCV and non-MCV populations */
     175            0 :     selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec;
     176              : 
     177              :     /* Result should be in range, but make sure... */
     178            0 :     CLAMP_PROBABILITY(selec);
     179              : 
     180            0 :     ReleaseVariableStats(vardata);
     181              : 
     182            0 :     PG_RETURN_FLOAT8(selec);
     183              : }
     184              : 
     185              : /*
     186              :  * Join selectivity estimation for the subnet inclusion/overlap operators
     187              :  *
     188              :  * This function has the same structure as eqjoinsel() in selfuncs.c.
     189              :  *
     190              :  * Throughout networkjoinsel and its subroutines, we have a performance issue
     191              :  * in that the amount of work to be done is O(N^2) in the length of the MCV
     192              :  * and histogram arrays.  To keep the runtime from getting out of hand when
     193              :  * large statistics targets have been set, we arbitrarily limit the number of
     194              :  * values considered to 1024 (MAX_CONSIDERED_ELEMS).  For the MCV arrays, this
     195              :  * is easy: just consider at most the first N elements.  (Since the MCVs are
     196              :  * sorted by decreasing frequency, this correctly gets us the first N MCVs.)
     197              :  * For the histogram arrays, we decimate; that is consider only every k'th
     198              :  * element, where k is chosen so that no more than MAX_CONSIDERED_ELEMS
     199              :  * elements are considered.  This should still give us a good random sample of
     200              :  * the non-MCV population.  Decimation is done on-the-fly in the loops that
     201              :  * iterate over the histogram arrays.
     202              :  */
     203              : Datum
     204            0 : networkjoinsel(PG_FUNCTION_ARGS)
     205              : {
     206            0 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     207            0 :     Oid         operator = PG_GETARG_OID(1);
     208            0 :     List       *args = (List *) PG_GETARG_POINTER(2);
     209              : #ifdef NOT_USED
     210              :     JoinType    jointype = (JoinType) PG_GETARG_INT16(3);
     211              : #endif
     212            0 :     SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
     213              :     double      selec;
     214              :     int         opr_codenum;
     215              :     VariableStatData vardata1;
     216              :     VariableStatData vardata2;
     217              :     bool        join_is_reversed;
     218              : 
     219              :     /*
     220              :      * Before all else, verify that the operator is one of the ones supported
     221              :      * by this function, which in turn proves that the input datatypes are
     222              :      * what we expect.  Otherwise, attaching this selectivity function to some
     223              :      * unexpected operator could cause trouble.
     224              :      */
     225            0 :     opr_codenum = inet_opr_codenum(operator);
     226              : 
     227            0 :     get_join_variables(root, args, sjinfo,
     228              :                        &vardata1, &vardata2, &join_is_reversed);
     229              : 
     230            0 :     switch (sjinfo->jointype)
     231              :     {
     232            0 :         case JOIN_INNER:
     233              :         case JOIN_LEFT:
     234              :         case JOIN_FULL:
     235              : 
     236              :             /*
     237              :              * Selectivity for left/full join is not exactly the same as inner
     238              :              * join, but we neglect the difference, as eqjoinsel does.
     239              :              */
     240            0 :             selec = networkjoinsel_inner(operator, opr_codenum,
     241              :                                          &vardata1, &vardata2);
     242            0 :             break;
     243            0 :         case JOIN_SEMI:
     244              :         case JOIN_ANTI:
     245              :             /* Here, it's important that we pass the outer var on the left. */
     246            0 :             if (!join_is_reversed)
     247            0 :                 selec = networkjoinsel_semi(operator, opr_codenum,
     248              :                                             &vardata1, &vardata2);
     249              :             else
     250            0 :                 selec = networkjoinsel_semi(get_commutator(operator),
     251              :                                             -opr_codenum,
     252              :                                             &vardata2, &vardata1);
     253            0 :             break;
     254            0 :         default:
     255              :             /* other values not expected here */
     256            0 :             elog(ERROR, "unrecognized join type: %d",
     257              :                  (int) sjinfo->jointype);
     258              :             selec = 0;          /* keep compiler quiet */
     259              :             break;
     260              :     }
     261              : 
     262            0 :     ReleaseVariableStats(vardata1);
     263            0 :     ReleaseVariableStats(vardata2);
     264              : 
     265            0 :     CLAMP_PROBABILITY(selec);
     266              : 
     267            0 :     PG_RETURN_FLOAT8((float8) selec);
     268              : }
     269              : 
     270              : /*
     271              :  * Inner join selectivity estimation for subnet inclusion/overlap operators
     272              :  *
     273              :  * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram
     274              :  * selectivity for join using the subnet inclusion operators.  Unlike the
     275              :  * join selectivity function for the equality operator, eqjoinsel_inner(),
     276              :  * one to one matching of the values is not enough.  Network inclusion
     277              :  * operators are likely to match many to many, so we must check all pairs.
     278              :  * (Note: it might be possible to exploit understanding of the histogram's
     279              :  * btree ordering to reduce the work needed, but we don't currently try.)
     280              :  * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
     281              :  */
     282              : static Selectivity
     283            0 : networkjoinsel_inner(Oid operator, int opr_codenum,
     284              :                      VariableStatData *vardata1, VariableStatData *vardata2)
     285              : {
     286              :     Form_pg_statistic stats;
     287            0 :     double      nullfrac1 = 0.0,
     288            0 :                 nullfrac2 = 0.0;
     289            0 :     Selectivity selec = 0.0,
     290            0 :                 sumcommon1 = 0.0,
     291            0 :                 sumcommon2 = 0.0;
     292            0 :     bool        mcv1_exists = false,
     293            0 :                 mcv2_exists = false,
     294            0 :                 hist1_exists = false,
     295            0 :                 hist2_exists = false;
     296            0 :     int         mcv1_length = 0,
     297            0 :                 mcv2_length = 0;
     298              :     AttStatsSlot mcv1_slot;
     299              :     AttStatsSlot mcv2_slot;
     300              :     AttStatsSlot hist1_slot;
     301              :     AttStatsSlot hist2_slot;
     302              : 
     303            0 :     if (HeapTupleIsValid(vardata1->statsTuple))
     304              :     {
     305            0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
     306            0 :         nullfrac1 = stats->stanullfrac;
     307              : 
     308            0 :         mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
     309              :                                        STATISTIC_KIND_MCV, InvalidOid,
     310              :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     311            0 :         hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
     312              :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     313              :                                         ATTSTATSSLOT_VALUES);
     314              :         /* Arbitrarily limit number of MCVs considered */
     315            0 :         mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
     316            0 :         if (mcv1_exists)
     317            0 :             sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
     318              :     }
     319              :     else
     320              :     {
     321            0 :         memset(&mcv1_slot, 0, sizeof(mcv1_slot));
     322            0 :         memset(&hist1_slot, 0, sizeof(hist1_slot));
     323              :     }
     324              : 
     325            0 :     if (HeapTupleIsValid(vardata2->statsTuple))
     326              :     {
     327            0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
     328            0 :         nullfrac2 = stats->stanullfrac;
     329              : 
     330            0 :         mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
     331              :                                        STATISTIC_KIND_MCV, InvalidOid,
     332              :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     333            0 :         hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
     334              :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     335              :                                         ATTSTATSSLOT_VALUES);
     336              :         /* Arbitrarily limit number of MCVs considered */
     337            0 :         mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
     338            0 :         if (mcv2_exists)
     339            0 :             sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
     340              :     }
     341              :     else
     342              :     {
     343            0 :         memset(&mcv2_slot, 0, sizeof(mcv2_slot));
     344            0 :         memset(&hist2_slot, 0, sizeof(hist2_slot));
     345              :     }
     346              : 
     347              :     /*
     348              :      * Calculate selectivity for MCV vs MCV matches.
     349              :      */
     350            0 :     if (mcv1_exists && mcv2_exists)
     351            0 :         selec += inet_mcv_join_sel(mcv1_slot.values, mcv1_slot.numbers,
     352              :                                    mcv1_length,
     353              :                                    mcv2_slot.values, mcv2_slot.numbers,
     354              :                                    mcv2_length,
     355              :                                    operator);
     356              : 
     357              :     /*
     358              :      * Add in selectivities for MCV vs histogram matches, scaling according to
     359              :      * the fractions of the populations represented by the histograms. Note
     360              :      * that the second case needs to commute the operator.
     361              :      */
     362            0 :     if (mcv1_exists && hist2_exists)
     363            0 :         selec += (1.0 - nullfrac2 - sumcommon2) *
     364            0 :             inet_mcv_hist_sel(mcv1_slot.values, mcv1_slot.numbers, mcv1_length,
     365            0 :                               hist2_slot.values, hist2_slot.nvalues,
     366              :                               opr_codenum);
     367            0 :     if (mcv2_exists && hist1_exists)
     368            0 :         selec += (1.0 - nullfrac1 - sumcommon1) *
     369            0 :             inet_mcv_hist_sel(mcv2_slot.values, mcv2_slot.numbers, mcv2_length,
     370            0 :                               hist1_slot.values, hist1_slot.nvalues,
     371              :                               -opr_codenum);
     372              : 
     373              :     /*
     374              :      * Add in selectivity for histogram vs histogram matches, again scaling
     375              :      * appropriately.
     376              :      */
     377            0 :     if (hist1_exists && hist2_exists)
     378            0 :         selec += (1.0 - nullfrac1 - sumcommon1) *
     379            0 :             (1.0 - nullfrac2 - sumcommon2) *
     380            0 :             inet_hist_inclusion_join_sel(hist1_slot.values, hist1_slot.nvalues,
     381            0 :                                          hist2_slot.values, hist2_slot.nvalues,
     382              :                                          opr_codenum);
     383              : 
     384              :     /*
     385              :      * If useful statistics are not available then use the default estimate.
     386              :      * We can apply null fractions if known, though.
     387              :      */
     388            0 :     if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
     389            0 :         selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
     390              : 
     391              :     /* Release stats. */
     392            0 :     free_attstatsslot(&mcv1_slot);
     393            0 :     free_attstatsslot(&mcv2_slot);
     394            0 :     free_attstatsslot(&hist1_slot);
     395            0 :     free_attstatsslot(&hist2_slot);
     396              : 
     397            0 :     return selec;
     398              : }
     399              : 
     400              : /*
     401              :  * Semi join selectivity estimation for subnet inclusion/overlap operators
     402              :  *
     403              :  * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs
     404              :  * histogram selectivity for semi/anti join cases.
     405              :  */
     406              : static Selectivity
     407            0 : networkjoinsel_semi(Oid operator, int opr_codenum,
     408              :                     VariableStatData *vardata1, VariableStatData *vardata2)
     409              : {
     410              :     Form_pg_statistic stats;
     411            0 :     Selectivity selec = 0.0,
     412            0 :                 sumcommon1 = 0.0,
     413            0 :                 sumcommon2 = 0.0;
     414            0 :     double      nullfrac1 = 0.0,
     415            0 :                 nullfrac2 = 0.0,
     416            0 :                 hist2_weight = 0.0;
     417            0 :     bool        mcv1_exists = false,
     418            0 :                 mcv2_exists = false,
     419            0 :                 hist1_exists = false,
     420            0 :                 hist2_exists = false;
     421              :     FmgrInfo    proc;
     422              :     int         i,
     423            0 :                 mcv1_length = 0,
     424            0 :                 mcv2_length = 0;
     425              :     AttStatsSlot mcv1_slot;
     426              :     AttStatsSlot mcv2_slot;
     427              :     AttStatsSlot hist1_slot;
     428              :     AttStatsSlot hist2_slot;
     429              : 
     430            0 :     if (HeapTupleIsValid(vardata1->statsTuple))
     431              :     {
     432            0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
     433            0 :         nullfrac1 = stats->stanullfrac;
     434              : 
     435            0 :         mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
     436              :                                        STATISTIC_KIND_MCV, InvalidOid,
     437              :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     438            0 :         hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
     439              :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     440              :                                         ATTSTATSSLOT_VALUES);
     441              :         /* Arbitrarily limit number of MCVs considered */
     442            0 :         mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
     443            0 :         if (mcv1_exists)
     444            0 :             sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
     445              :     }
     446              :     else
     447              :     {
     448            0 :         memset(&mcv1_slot, 0, sizeof(mcv1_slot));
     449            0 :         memset(&hist1_slot, 0, sizeof(hist1_slot));
     450              :     }
     451              : 
     452            0 :     if (HeapTupleIsValid(vardata2->statsTuple))
     453              :     {
     454            0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
     455            0 :         nullfrac2 = stats->stanullfrac;
     456              : 
     457            0 :         mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
     458              :                                        STATISTIC_KIND_MCV, InvalidOid,
     459              :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     460            0 :         hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
     461              :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     462              :                                         ATTSTATSSLOT_VALUES);
     463              :         /* Arbitrarily limit number of MCVs considered */
     464            0 :         mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
     465            0 :         if (mcv2_exists)
     466            0 :             sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
     467              :     }
     468              :     else
     469              :     {
     470            0 :         memset(&mcv2_slot, 0, sizeof(mcv2_slot));
     471            0 :         memset(&hist2_slot, 0, sizeof(hist2_slot));
     472              :     }
     473              : 
     474            0 :     fmgr_info(get_opcode(operator), &proc);
     475              : 
     476              :     /* Estimate number of input rows represented by RHS histogram. */
     477            0 :     if (hist2_exists && vardata2->rel)
     478            0 :         hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows;
     479              : 
     480              :     /*
     481              :      * Consider each element of the LHS MCV list, matching it to whatever RHS
     482              :      * stats we have.  Scale according to the known frequency of the MCV.
     483              :      */
     484            0 :     if (mcv1_exists && (mcv2_exists || hist2_exists))
     485              :     {
     486            0 :         for (i = 0; i < mcv1_length; i++)
     487              :         {
     488            0 :             selec += mcv1_slot.numbers[i] *
     489            0 :                 inet_semi_join_sel(mcv1_slot.values[i],
     490              :                                    mcv2_exists, mcv2_slot.values, mcv2_length,
     491              :                                    hist2_exists,
     492              :                                    hist2_slot.values, hist2_slot.nvalues,
     493              :                                    hist2_weight,
     494              :                                    &proc, opr_codenum);
     495              :         }
     496              :     }
     497              : 
     498              :     /*
     499              :      * Consider each element of the LHS histogram, except for the first and
     500              :      * last elements, which we exclude on the grounds that they're outliers
     501              :      * and thus not very representative.  Scale on the assumption that each
     502              :      * such histogram element represents an equal share of the LHS histogram
     503              :      * population (which is a bit bogus, because the members of its bucket may
     504              :      * not all act the same with respect to the join clause, but it's hard to
     505              :      * do better).
     506              :      *
     507              :      * If there are too many histogram elements, decimate to limit runtime.
     508              :      */
     509            0 :     if (hist1_exists && hist1_slot.nvalues > 2 && (mcv2_exists || hist2_exists))
     510              :     {
     511            0 :         double      hist_selec_sum = 0.0;
     512              :         int         k,
     513              :                     n;
     514              : 
     515            0 :         k = (hist1_slot.nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
     516              : 
     517            0 :         n = 0;
     518            0 :         for (i = 1; i < hist1_slot.nvalues - 1; i += k)
     519              :         {
     520            0 :             hist_selec_sum +=
     521            0 :                 inet_semi_join_sel(hist1_slot.values[i],
     522              :                                    mcv2_exists, mcv2_slot.values, mcv2_length,
     523              :                                    hist2_exists,
     524              :                                    hist2_slot.values, hist2_slot.nvalues,
     525              :                                    hist2_weight,
     526              :                                    &proc, opr_codenum);
     527            0 :             n++;
     528              :         }
     529              : 
     530            0 :         selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n;
     531              :     }
     532              : 
     533              :     /*
     534              :      * If useful statistics are not available then use the default estimate.
     535              :      * We can apply null fractions if known, though.
     536              :      */
     537            0 :     if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
     538            0 :         selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
     539              : 
     540              :     /* Release stats. */
     541            0 :     free_attstatsslot(&mcv1_slot);
     542            0 :     free_attstatsslot(&mcv2_slot);
     543            0 :     free_attstatsslot(&hist1_slot);
     544            0 :     free_attstatsslot(&hist2_slot);
     545              : 
     546            0 :     return selec;
     547              : }
     548              : 
     549              : /*
     550              :  * Compute the fraction of a relation's population that is represented
     551              :  * by the MCV list.
     552              :  */
     553              : static Selectivity
     554            0 : mcv_population(float4 *mcv_numbers, int mcv_nvalues)
     555              : {
     556            0 :     Selectivity sumcommon = 0.0;
     557              :     int         i;
     558              : 
     559            0 :     for (i = 0; i < mcv_nvalues; i++)
     560              :     {
     561            0 :         sumcommon += mcv_numbers[i];
     562              :     }
     563              : 
     564            0 :     return sumcommon;
     565              : }
     566              : 
     567              : /*
     568              :  * Inet histogram vs single value selectivity estimation
     569              :  *
     570              :  * Estimate the fraction of the histogram population that satisfies
     571              :  * "value OPR CONST".  (The result needs to be scaled to reflect the
     572              :  * proportion of the total population represented by the histogram.)
     573              :  *
     574              :  * The histogram is originally for the inet btree comparison operators.
     575              :  * Only the common bits of the network part and the length of the network part
     576              :  * (masklen) are interesting for the subnet inclusion operators.  Fortunately,
     577              :  * btree comparison treats the network part as the major sort key.  Even so,
     578              :  * the length of the network part would not really be significant in the
     579              :  * histogram.  This would lead to big mistakes for data sets with uneven
     580              :  * masklen distribution.  To reduce this problem, comparisons with the left
     581              :  * and the right sides of the buckets are used together.
     582              :  *
     583              :  * Histogram bucket matches are calculated in two forms.  If the constant
     584              :  * matches both bucket endpoints the bucket is considered as fully matched.
     585              :  * The second form is to match the bucket partially; we recognize this when
     586              :  * the constant matches just one endpoint, or the two endpoints fall on
     587              :  * opposite sides of the constant.  (Note that when the constant matches an
     588              :  * interior histogram element, it gets credit for partial matches to the
     589              :  * buckets on both sides, while a match to a histogram endpoint gets credit
     590              :  * for only one partial match.  This is desirable.)
     591              :  *
     592              :  * The divider in the partial bucket match is imagined as the distance
     593              :  * between the decisive bits and the common bits of the addresses.  It will
     594              :  * be used as a power of two as it is the natural scale for the IP network
     595              :  * inclusion.  This partial bucket match divider calculation is an empirical
     596              :  * formula and subject to change with more experiment.
     597              :  *
     598              :  * For a partial match, we try to calculate dividers for both of the
     599              :  * boundaries.  If the address family of a boundary value does not match the
     600              :  * constant or comparison of the length of the network parts is not correct
     601              :  * for the operator, the divider for that boundary will not be taken into
     602              :  * account.  If both of the dividers are valid, the greater one will be used
     603              :  * to minimize the mistake in buckets that have disparate masklens.  This
     604              :  * calculation is unfair when dividers can be calculated for both of the
     605              :  * boundaries but they are far from each other; but it is not a common
     606              :  * situation as the boundaries are expected to share most of their significant
     607              :  * bits of their masklens.  The mistake would be greater, if we would use the
     608              :  * minimum instead of the maximum, and we don't know a sensible way to combine
     609              :  * them.
     610              :  *
     611              :  * For partial match in buckets that have different address families on the
     612              :  * left and right sides, only the boundary with the same address family is
     613              :  * taken into consideration.  This can cause more mistakes for these buckets
     614              :  * if the masklens of their boundaries are also disparate.  But this can only
     615              :  * happen in one bucket, since only two address families exist.  It seems a
     616              :  * better option than not considering these buckets at all.
     617              :  */
     618              : static Selectivity
     619            0 : inet_hist_value_sel(const Datum *values, int nvalues, Datum constvalue,
     620              :                     int opr_codenum)
     621              : {
     622            0 :     Selectivity match = 0.0;
     623              :     inet       *query,
     624              :                *left,
     625              :                *right;
     626              :     int         i,
     627              :                 k,
     628              :                 n;
     629              :     int         left_order,
     630              :                 right_order,
     631              :                 left_divider,
     632              :                 right_divider;
     633              : 
     634              :     /* guard against zero-divide below */
     635            0 :     if (nvalues <= 1)
     636            0 :         return 0.0;
     637              : 
     638              :     /* if there are too many histogram elements, decimate to limit runtime */
     639            0 :     k = (nvalues - 2) / MAX_CONSIDERED_ELEMS + 1;
     640              : 
     641            0 :     query = DatumGetInetPP(constvalue);
     642              : 
     643              :     /* "left" is the left boundary value of the current bucket ... */
     644            0 :     left = DatumGetInetPP(values[0]);
     645            0 :     left_order = inet_inclusion_cmp(left, query, opr_codenum);
     646              : 
     647            0 :     n = 0;
     648            0 :     for (i = k; i < nvalues; i += k)
     649              :     {
     650              :         /* ... and "right" is the right boundary value */
     651            0 :         right = DatumGetInetPP(values[i]);
     652            0 :         right_order = inet_inclusion_cmp(right, query, opr_codenum);
     653              : 
     654            0 :         if (left_order == 0 && right_order == 0)
     655              :         {
     656              :             /* The whole bucket matches, since both endpoints do. */
     657            0 :             match += 1.0;
     658              :         }
     659            0 :         else if ((left_order <= 0 && right_order >= 0) ||
     660            0 :                  (left_order >= 0 && right_order <= 0))
     661              :         {
     662              :             /* Partial bucket match. */
     663            0 :             left_divider = inet_hist_match_divider(left, query, opr_codenum);
     664            0 :             right_divider = inet_hist_match_divider(right, query, opr_codenum);
     665              : 
     666            0 :             if (left_divider >= 0 || right_divider >= 0)
     667            0 :                 match += 1.0 / pow(2.0, Max(left_divider, right_divider));
     668              :         }
     669              : 
     670              :         /* Shift the variables. */
     671            0 :         left = right;
     672            0 :         left_order = right_order;
     673              : 
     674              :         /* Count the number of buckets considered. */
     675            0 :         n++;
     676              :     }
     677              : 
     678            0 :     return match / n;
     679              : }
     680              : 
     681              : /*
     682              :  * Inet MCV vs MCV join selectivity estimation
     683              :  *
     684              :  * We simply add up the fractions of the populations that satisfy the clause.
     685              :  * The result is exact and does not need to be scaled further.
     686              :  */
     687              : static Selectivity
     688            0 : inet_mcv_join_sel(Datum *mcv1_values, float4 *mcv1_numbers, int mcv1_nvalues,
     689              :                   Datum *mcv2_values, float4 *mcv2_numbers, int mcv2_nvalues,
     690              :                   Oid operator)
     691              : {
     692            0 :     Selectivity selec = 0.0;
     693              :     FmgrInfo    proc;
     694              :     int         i,
     695              :                 j;
     696              : 
     697            0 :     fmgr_info(get_opcode(operator), &proc);
     698              : 
     699            0 :     for (i = 0; i < mcv1_nvalues; i++)
     700              :     {
     701            0 :         for (j = 0; j < mcv2_nvalues; j++)
     702            0 :             if (DatumGetBool(FunctionCall2(&proc,
     703              :                                            mcv1_values[i],
     704              :                                            mcv2_values[j])))
     705            0 :                 selec += mcv1_numbers[i] * mcv2_numbers[j];
     706              :     }
     707            0 :     return selec;
     708              : }
     709              : 
     710              : /*
     711              :  * Inet MCV vs histogram join selectivity estimation
     712              :  *
     713              :  * For each MCV on the lefthand side, estimate the fraction of the righthand's
     714              :  * histogram population that satisfies the join clause, and add those up,
     715              :  * scaling by the MCV's frequency.  The result still needs to be scaled
     716              :  * according to the fraction of the righthand's population represented by
     717              :  * the histogram.
     718              :  */
     719              : static Selectivity
     720            0 : inet_mcv_hist_sel(const Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues,
     721              :                   const Datum *hist_values, int hist_nvalues,
     722              :                   int opr_codenum)
     723              : {
     724            0 :     Selectivity selec = 0.0;
     725              :     int         i;
     726              : 
     727              :     /*
     728              :      * We'll call inet_hist_value_selec with the histogram on the left, so we
     729              :      * must commute the operator.
     730              :      */
     731            0 :     opr_codenum = -opr_codenum;
     732              : 
     733            0 :     for (i = 0; i < mcv_nvalues; i++)
     734              :     {
     735            0 :         selec += mcv_numbers[i] *
     736            0 :             inet_hist_value_sel(hist_values, hist_nvalues, mcv_values[i],
     737              :                                 opr_codenum);
     738              :     }
     739            0 :     return selec;
     740              : }
     741              : 
     742              : /*
     743              :  * Inet histogram vs histogram join selectivity estimation
     744              :  *
     745              :  * Here, we take all values listed in the second histogram (except for the
     746              :  * first and last elements, which are excluded on the grounds of possibly
     747              :  * not being very representative) and treat them as a uniform sample of
     748              :  * the non-MCV population for that relation.  For each one, we apply
     749              :  * inet_hist_value_selec to see what fraction of the first histogram
     750              :  * it matches.
     751              :  *
     752              :  * We could alternatively do this the other way around using the operator's
     753              :  * commutator.  XXX would it be worthwhile to do it both ways and take the
     754              :  * average?  That would at least avoid non-commutative estimation results.
     755              :  */
     756              : static Selectivity
     757            0 : inet_hist_inclusion_join_sel(const Datum *hist1_values, int hist1_nvalues,
     758              :                              const Datum *hist2_values, int hist2_nvalues,
     759              :                              int opr_codenum)
     760              : {
     761            0 :     double      match = 0.0;
     762              :     int         i,
     763              :                 k,
     764              :                 n;
     765              : 
     766            0 :     if (hist2_nvalues <= 2)
     767            0 :         return 0.0;             /* no interior histogram elements */
     768              : 
     769              :     /* if there are too many histogram elements, decimate to limit runtime */
     770            0 :     k = (hist2_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
     771              : 
     772            0 :     n = 0;
     773            0 :     for (i = 1; i < hist2_nvalues - 1; i += k)
     774              :     {
     775            0 :         match += inet_hist_value_sel(hist1_values, hist1_nvalues,
     776            0 :                                      hist2_values[i], opr_codenum);
     777            0 :         n++;
     778              :     }
     779              : 
     780            0 :     return match / n;
     781              : }
     782              : 
     783              : /*
     784              :  * Inet semi join selectivity estimation for one value
     785              :  *
     786              :  * The function calculates the probability that there is at least one row
     787              :  * in the RHS table that satisfies the "lhs_value op column" condition.
     788              :  * It is used in semi join estimation to check a sample from the left hand
     789              :  * side table.
     790              :  *
     791              :  * The MCV and histogram from the right hand side table should be provided as
     792              :  * arguments with the lhs_value from the left hand side table for the join.
     793              :  * hist_weight is the total number of rows represented by the histogram.
     794              :  * For example, if the table has 1000 rows, and 10% of the rows are in the MCV
     795              :  * list, and another 10% are NULLs, hist_weight would be 800.
     796              :  *
     797              :  * First, the lhs_value will be matched to the most common values.  If it
     798              :  * matches any of them, 1.0 will be returned, because then there is surely
     799              :  * a match.
     800              :  *
     801              :  * Otherwise, the histogram will be used to estimate the number of rows in
     802              :  * the second table that match the condition.  If the estimate is greater
     803              :  * than 1.0, 1.0 will be returned, because it means there is a greater chance
     804              :  * that the lhs_value will match more than one row in the table.  If it is
     805              :  * between 0.0 and 1.0, it will be returned as the probability.
     806              :  */
     807              : static Selectivity
     808            0 : inet_semi_join_sel(Datum lhs_value,
     809              :                    bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
     810              :                    bool hist_exists, Datum *hist_values, int hist_nvalues,
     811              :                    double hist_weight,
     812              :                    FmgrInfo *proc, int opr_codenum)
     813              : {
     814            0 :     if (mcv_exists)
     815              :     {
     816              :         int         i;
     817              : 
     818            0 :         for (i = 0; i < mcv_nvalues; i++)
     819              :         {
     820            0 :             if (DatumGetBool(FunctionCall2(proc,
     821              :                                            lhs_value,
     822              :                                            mcv_values[i])))
     823            0 :                 return 1.0;
     824              :         }
     825              :     }
     826              : 
     827            0 :     if (hist_exists && hist_weight > 0)
     828              :     {
     829              :         Selectivity hist_selec;
     830              : 
     831              :         /* Commute operator, since we're passing lhs_value on the right */
     832            0 :         hist_selec = inet_hist_value_sel(hist_values, hist_nvalues,
     833              :                                          lhs_value, -opr_codenum);
     834              : 
     835            0 :         if (hist_selec > 0)
     836            0 :             return Min(1.0, hist_weight * hist_selec);
     837              :     }
     838              : 
     839            0 :     return 0.0;
     840              : }
     841              : 
     842              : /*
     843              :  * Assign useful code numbers for the subnet inclusion/overlap operators
     844              :  *
     845              :  * This will throw an error if the operator is not one of the ones we
     846              :  * support in networksel() and networkjoinsel().
     847              :  *
     848              :  * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend
     849              :  * on the exact codes assigned here; but many other places in this file
     850              :  * know that they can negate a code to obtain the code for the commutator
     851              :  * operator.
     852              :  */
     853              : static int
     854          450 : inet_opr_codenum(Oid operator)
     855              : {
     856          450 :     switch (operator)
     857              :     {
     858           60 :         case OID_INET_SUP_OP:
     859           60 :             return -2;
     860          108 :         case OID_INET_SUPEQ_OP:
     861          108 :             return -1;
     862          102 :         case OID_INET_OVERLAP_OP:
     863          102 :             return 0;
     864          108 :         case OID_INET_SUBEQ_OP:
     865          108 :             return 1;
     866           72 :         case OID_INET_SUB_OP:
     867           72 :             return 2;
     868            0 :         default:
     869            0 :             elog(ERROR, "unrecognized operator %u for inet selectivity",
     870              :                  operator);
     871              :     }
     872              :     return 0;                   /* unreached, but keep compiler quiet */
     873              : }
     874              : 
     875              : /*
     876              :  * Comparison function for the subnet inclusion/overlap operators
     877              :  *
     878              :  * If the comparison is okay for the specified inclusion operator, the return
     879              :  * value will be 0.  Otherwise the return value will be less than or greater
     880              :  * than 0 as appropriate for the operator.
     881              :  *
     882              :  * Comparison is compatible with the basic comparison function for the inet
     883              :  * type.  See network_cmp_internal() in network.c for the original.  Basic
     884              :  * comparison operators are implemented with the network_cmp_internal()
     885              :  * function.  It is possible to implement the subnet inclusion operators with
     886              :  * this function.
     887              :  *
     888              :  * Comparison is first on the common bits of the network part, then on the
     889              :  * length of the network part (masklen) as in the network_cmp_internal()
     890              :  * function.  Only the first part is in this function.  The second part is
     891              :  * separated to another function for reusability.  The difference between the
     892              :  * second part and the original network_cmp_internal() is that the inclusion
     893              :  * operator is considered while comparing the lengths of the network parts.
     894              :  * See the inet_masklen_inclusion_cmp() function below.
     895              :  */
     896              : static int
     897            0 : inet_inclusion_cmp(inet *left, inet *right, int opr_codenum)
     898              : {
     899            0 :     if (ip_family(left) == ip_family(right))
     900              :     {
     901              :         int         order;
     902              : 
     903            0 :         order = bitncmp(ip_addr(left), ip_addr(right),
     904            0 :                         Min(ip_bits(left), ip_bits(right)));
     905            0 :         if (order != 0)
     906            0 :             return order;
     907              : 
     908            0 :         return inet_masklen_inclusion_cmp(left, right, opr_codenum);
     909              :     }
     910              : 
     911            0 :     return ip_family(left) - ip_family(right);
     912              : }
     913              : 
     914              : /*
     915              :  * Masklen comparison function for the subnet inclusion/overlap operators
     916              :  *
     917              :  * Compares the lengths of the network parts of the inputs.  If the comparison
     918              :  * is okay for the specified inclusion operator, the return value will be 0.
     919              :  * Otherwise the return value will be less than or greater than 0 as
     920              :  * appropriate for the operator.
     921              :  */
     922              : static int
     923            0 : inet_masklen_inclusion_cmp(inet *left, inet *right, int opr_codenum)
     924              : {
     925              :     int         order;
     926              : 
     927            0 :     order = (int) ip_bits(left) - (int) ip_bits(right);
     928              : 
     929              :     /*
     930              :      * Return 0 if the operator would accept this combination of masklens.
     931              :      * Note that opr_codenum zero (overlaps) will accept all cases.
     932              :      */
     933            0 :     if ((order > 0 && opr_codenum >= 0) ||
     934            0 :         (order == 0 && opr_codenum >= -1 && opr_codenum <= 1) ||
     935            0 :         (order < 0 && opr_codenum <= 0))
     936            0 :         return 0;
     937              : 
     938              :     /*
     939              :      * Otherwise, return a negative value for sup/supeq (notionally, the RHS
     940              :      * needs to have a larger masklen than it has, which would make it sort
     941              :      * later), or a positive value for sub/subeq (vice versa).
     942              :      */
     943            0 :     return opr_codenum;
     944              : }
     945              : 
     946              : /*
     947              :  * Inet histogram partial match divider calculation
     948              :  *
     949              :  * First the families and the lengths of the network parts are compared using
     950              :  * the subnet inclusion operator.  If those are acceptable for the operator,
     951              :  * the divider will be calculated using the masklens and the common bits of
     952              :  * the addresses.  -1 will be returned if it cannot be calculated.
     953              :  *
     954              :  * See commentary for inet_hist_value_sel() for some rationale for this.
     955              :  */
     956              : static int
     957            0 : inet_hist_match_divider(inet *boundary, inet *query, int opr_codenum)
     958              : {
     959            0 :     if (ip_family(boundary) == ip_family(query) &&
     960            0 :         inet_masklen_inclusion_cmp(boundary, query, opr_codenum) == 0)
     961              :     {
     962              :         int         min_bits,
     963              :                     decisive_bits;
     964              : 
     965            0 :         min_bits = Min(ip_bits(boundary), ip_bits(query));
     966              : 
     967              :         /*
     968              :          * Set decisive_bits to the masklen of the one that should contain the
     969              :          * other according to the operator.
     970              :          */
     971            0 :         if (opr_codenum < 0)
     972            0 :             decisive_bits = ip_bits(boundary);
     973            0 :         else if (opr_codenum > 0)
     974            0 :             decisive_bits = ip_bits(query);
     975              :         else
     976            0 :             decisive_bits = min_bits;
     977              : 
     978              :         /*
     979              :          * Now return the number of non-common decisive bits.  (This will be
     980              :          * zero if the boundary and query in fact match, else positive.)
     981              :          */
     982            0 :         if (min_bits > 0)
     983            0 :             return decisive_bits - bitncommon(ip_addr(boundary),
     984            0 :                                               ip_addr(query),
     985              :                                               min_bits);
     986            0 :         return decisive_bits;
     987              :     }
     988              : 
     989            0 :     return -1;
     990              : }

Generated by: LCOV version 2.0-1