LCOV - code coverage report
Current view: top level - src/backend/utils/adt - like_support.c (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta2 Lines: 382 514 74.3 %
Date: 2019-06-19 14:06:47 Functions: 29 40 72.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * like_support.c
       4             :  *    Planner support functions for LIKE, regex, and related operators.
       5             :  *
       6             :  * These routines handle special optimization of operators that can be
       7             :  * used with index scans even though they are not known to the executor's
       8             :  * indexscan machinery.  The key idea is that these operators allow us
       9             :  * to derive approximate indexscan qual clauses, such that any tuples
      10             :  * that pass the operator clause itself must also satisfy the simpler
      11             :  * indexscan condition(s).  Then we can use the indexscan machinery
      12             :  * to avoid scanning as much of the table as we'd otherwise have to,
      13             :  * while applying the original operator as a qpqual condition to ensure
      14             :  * we deliver only the tuples we want.  (In essence, we're using a regular
      15             :  * index as if it were a lossy index.)
      16             :  *
      17             :  * An example of what we're doing is
      18             :  *          textfield LIKE 'abc%def'
      19             :  * from which we can generate the indexscanable conditions
      20             :  *          textfield >= 'abc' AND textfield < 'abd'
      21             :  * which allow efficient scanning of an index on textfield.
      22             :  * (In reality, character set and collation issues make the transformation
      23             :  * from LIKE to indexscan limits rather harder than one might think ...
      24             :  * but that's the basic idea.)
      25             :  *
      26             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      27             :  * Portions Copyright (c) 1994, Regents of the University of California
      28             :  *
      29             :  *
      30             :  * IDENTIFICATION
      31             :  *    src/backend/utils/adt/like_support.c
      32             :  *
      33             :  *-------------------------------------------------------------------------
      34             :  */
      35             : #include "postgres.h"
      36             : 
      37             : #include <math.h>
      38             : 
      39             : #include "access/htup_details.h"
      40             : #include "access/stratnum.h"
      41             : #include "catalog/pg_collation.h"
      42             : #include "catalog/pg_opfamily.h"
      43             : #include "catalog/pg_statistic.h"
      44             : #include "catalog/pg_type.h"
      45             : #include "mb/pg_wchar.h"
      46             : #include "nodes/makefuncs.h"
      47             : #include "nodes/nodeFuncs.h"
      48             : #include "nodes/supportnodes.h"
      49             : #include "utils/builtins.h"
      50             : #include "utils/datum.h"
      51             : #include "utils/lsyscache.h"
      52             : #include "utils/pg_locale.h"
      53             : #include "utils/selfuncs.h"
      54             : #include "utils/varlena.h"
      55             : 
      56             : 
      57             : typedef enum
      58             : {
      59             :     Pattern_Type_Like,
      60             :     Pattern_Type_Like_IC,
      61             :     Pattern_Type_Regex,
      62             :     Pattern_Type_Regex_IC,
      63             :     Pattern_Type_Prefix
      64             : } Pattern_Type;
      65             : 
      66             : typedef enum
      67             : {
      68             :     Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
      69             : } Pattern_Prefix_Status;
      70             : 
      71             : static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
      72             : static List *match_pattern_prefix(Node *leftop,
      73             :                                   Node *rightop,
      74             :                                   Pattern_Type ptype,
      75             :                                   Oid expr_coll,
      76             :                                   Oid opfamily,
      77             :                                   Oid indexcollation);
      78             : static double patternsel_common(PlannerInfo *root,
      79             :                                 Oid oprid,
      80             :                                 Oid opfuncid,
      81             :                                 List *args,
      82             :                                 int varRelid,
      83             :                                 Oid collation,
      84             :                                 Pattern_Type ptype,
      85             :                                 bool negate);
      86             : static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
      87             :                                                   Pattern_Type ptype,
      88             :                                                   Oid collation,
      89             :                                                   Const **prefix,
      90             :                                                   Selectivity *rest_selec);
      91             : static Selectivity prefix_selectivity(PlannerInfo *root,
      92             :                                       VariableStatData *vardata,
      93             :                                       Oid vartype, Oid opfamily, Const *prefixcon);
      94             : static Selectivity like_selectivity(const char *patt, int pattlen,
      95             :                                     bool case_insensitive);
      96             : static Selectivity regex_selectivity(const char *patt, int pattlen,
      97             :                                      bool case_insensitive,
      98             :                                      int fixed_prefix_len);
      99             : static int  pattern_char_isalpha(char c, bool is_multibyte,
     100             :                                  pg_locale_t locale, bool locale_is_c);
     101             : static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
     102             :                                   Oid collation);
     103             : static Datum string_to_datum(const char *str, Oid datatype);
     104             : static Const *string_to_const(const char *str, Oid datatype);
     105             : static Const *string_to_bytea_const(const char *str, size_t str_len);
     106             : 
     107             : 
     108             : /*
     109             :  * Planner support functions for LIKE, regex, and related operators
     110             :  */
     111             : Datum
     112        4226 : textlike_support(PG_FUNCTION_ARGS)
     113             : {
     114        4226 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     115             : 
     116        4226 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
     117             : }
     118             : 
     119             : Datum
     120          96 : texticlike_support(PG_FUNCTION_ARGS)
     121             : {
     122          96 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     123             : 
     124          96 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
     125             : }
     126             : 
     127             : Datum
     128        6966 : textregexeq_support(PG_FUNCTION_ARGS)
     129             : {
     130        6966 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     131             : 
     132        6966 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
     133             : }
     134             : 
     135             : Datum
     136          64 : texticregexeq_support(PG_FUNCTION_ARGS)
     137             : {
     138          64 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     139             : 
     140          64 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
     141             : }
     142             : 
     143             : /* Common code for the above */
     144             : static Node *
     145       11352 : like_regex_support(Node *rawreq, Pattern_Type ptype)
     146             : {
     147       11352 :     Node       *ret = NULL;
     148             : 
     149       11352 :     if (IsA(rawreq, SupportRequestSelectivity))
     150             :     {
     151             :         /*
     152             :          * Make a selectivity estimate for a function call, just as we'd do if
     153             :          * the call was via the corresponding operator.
     154             :          */
     155           0 :         SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
     156             :         Selectivity s1;
     157             : 
     158           0 :         if (req->is_join)
     159             :         {
     160             :             /*
     161             :              * For the moment we just punt.  If patternjoinsel is ever
     162             :              * improved to do better, this should be made to call it.
     163             :              */
     164           0 :             s1 = DEFAULT_MATCH_SEL;
     165             :         }
     166             :         else
     167             :         {
     168             :             /* Share code with operator restriction selectivity functions */
     169           0 :             s1 = patternsel_common(req->root,
     170             :                                    InvalidOid,
     171             :                                    req->funcid,
     172             :                                    req->args,
     173             :                                    req->varRelid,
     174             :                                    req->inputcollid,
     175             :                                    ptype,
     176             :                                    false);
     177             :         }
     178           0 :         req->selectivity = s1;
     179           0 :         ret = (Node *) req;
     180             :     }
     181       11352 :     else if (IsA(rawreq, SupportRequestIndexCondition))
     182             :     {
     183             :         /* Try to convert operator/function call to index conditions */
     184        3058 :         SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
     185             : 
     186             :         /*
     187             :          * Currently we have no "reverse" match operators with the pattern on
     188             :          * the left, so we only need consider cases with the indexkey on the
     189             :          * left.
     190             :          */
     191        3058 :         if (req->indexarg != 0)
     192           0 :             return NULL;
     193             : 
     194        3058 :         if (is_opclause(req->node))
     195             :         {
     196        3058 :             OpExpr     *clause = (OpExpr *) req->node;
     197             : 
     198             :             Assert(list_length(clause->args) == 2);
     199        3058 :             ret = (Node *)
     200        6116 :                 match_pattern_prefix((Node *) linitial(clause->args),
     201        3058 :                                      (Node *) lsecond(clause->args),
     202             :                                      ptype,
     203             :                                      clause->inputcollid,
     204             :                                      req->opfamily,
     205             :                                      req->indexcollation);
     206             :         }
     207           0 :         else if (is_funcclause(req->node))   /* be paranoid */
     208             :         {
     209           0 :             FuncExpr   *clause = (FuncExpr *) req->node;
     210             : 
     211             :             Assert(list_length(clause->args) == 2);
     212           0 :             ret = (Node *)
     213           0 :                 match_pattern_prefix((Node *) linitial(clause->args),
     214           0 :                                      (Node *) lsecond(clause->args),
     215             :                                      ptype,
     216             :                                      clause->inputcollid,
     217             :                                      req->opfamily,
     218             :                                      req->indexcollation);
     219             :         }
     220             :     }
     221             : 
     222       11352 :     return ret;
     223             : }
     224             : 
     225             : /*
     226             :  * match_pattern_prefix
     227             :  *    Try to generate an indexqual for a LIKE or regex operator.
     228             :  */
     229             : static List *
     230        3058 : match_pattern_prefix(Node *leftop,
     231             :                      Node *rightop,
     232             :                      Pattern_Type ptype,
     233             :                      Oid expr_coll,
     234             :                      Oid opfamily,
     235             :                      Oid indexcollation)
     236             : {
     237             :     List       *result;
     238             :     Const      *patt;
     239             :     Const      *prefix;
     240             :     Pattern_Prefix_Status pstatus;
     241             :     Oid         ldatatype;
     242             :     Oid         rdatatype;
     243             :     Oid         oproid;
     244             :     Expr       *expr;
     245             :     FmgrInfo    ltproc;
     246             :     Const      *greaterstr;
     247             : 
     248             :     /*
     249             :      * Can't do anything with a non-constant or NULL pattern argument.
     250             :      *
     251             :      * Note that since we restrict ourselves to cases with a hard constant on
     252             :      * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
     253             :      * about verifying that.
     254             :      */
     255        6116 :     if (!IsA(rightop, Const) ||
     256        3058 :         ((Const *) rightop)->constisnull)
     257           0 :         return NIL;
     258        3058 :     patt = (Const *) rightop;
     259             : 
     260             :     /*
     261             :      * Not supported if the expression collation is nondeterministic.  The
     262             :      * optimized equality or prefix tests use bytewise comparisons, which is
     263             :      * not consistent with nondeterministic collations.  The actual
     264             :      * pattern-matching implementation functions will later error out that
     265             :      * pattern-matching is not supported with nondeterministic collations. (We
     266             :      * could also error out here, but by doing it later we get more precise
     267             :      * error messages.)  (It should be possible to support at least
     268             :      * Pattern_Prefix_Exact, but no point as along as the actual
     269             :      * pattern-matching implementations don't support it.)
     270             :      *
     271             :      * expr_coll is not set for a non-collation-aware data type such as bytea.
     272             :      */
     273        3058 :     if (expr_coll && !get_collation_isdeterministic(expr_coll))
     274           0 :         return NIL;
     275             : 
     276             :     /*
     277             :      * Try to extract a fixed prefix from the pattern.
     278             :      */
     279        3058 :     pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
     280             :                                    &prefix, NULL);
     281             : 
     282             :     /* fail if no fixed prefix */
     283        3058 :     if (pstatus == Pattern_Prefix_None)
     284         126 :         return NIL;
     285             : 
     286             :     /*
     287             :      * Must also check that index's opfamily supports the operators we will
     288             :      * want to apply.  (A hash index, for example, will not support ">=".)
     289             :      * Currently, only btree and spgist support the operators we need.
     290             :      *
     291             :      * Note: actually, in the Pattern_Prefix_Exact case, we only need "=" so a
     292             :      * hash index would work.  Currently it doesn't seem worth checking for
     293             :      * that, however.
     294             :      *
     295             :      * We insist on the opfamily being one of the specific ones we expect,
     296             :      * else we'd do the wrong thing if someone were to make a reverse-sort
     297             :      * opfamily with the same operators.
     298             :      *
     299             :      * The non-pattern opclasses will not sort the way we need in most non-C
     300             :      * locales.  We can use such an index anyway for an exact match (simple
     301             :      * equality), but not for prefix-match cases.  Note that here we are
     302             :      * looking at the index's collation, not the expression's collation --
     303             :      * this test is *not* dependent on the LIKE/regex operator's collation.
     304             :      *
     305             :      * While we're at it, identify the type the comparison constant(s) should
     306             :      * have, based on the opfamily.
     307             :      */
     308        2932 :     switch (opfamily)
     309             :     {
     310             :         case TEXT_BTREE_FAM_OID:
     311        3852 :             if (!(pstatus == Pattern_Prefix_Exact ||
     312         920 :                   lc_collate_is_c(indexcollation)))
     313           8 :                 return NIL;
     314        2924 :             rdatatype = TEXTOID;
     315        2924 :             break;
     316             : 
     317             :         case TEXT_PATTERN_BTREE_FAM_OID:
     318             :         case TEXT_SPGIST_FAM_OID:
     319           0 :             rdatatype = TEXTOID;
     320           0 :             break;
     321             : 
     322             :         case BPCHAR_BTREE_FAM_OID:
     323           0 :             if (!(pstatus == Pattern_Prefix_Exact ||
     324           0 :                   lc_collate_is_c(indexcollation)))
     325           0 :                 return NIL;
     326           0 :             rdatatype = BPCHAROID;
     327           0 :             break;
     328             : 
     329             :         case BPCHAR_PATTERN_BTREE_FAM_OID:
     330           0 :             rdatatype = BPCHAROID;
     331           0 :             break;
     332             : 
     333             :         case BYTEA_BTREE_FAM_OID:
     334           0 :             rdatatype = BYTEAOID;
     335           0 :             break;
     336             : 
     337             :         default:
     338           0 :             return NIL;
     339             :     }
     340             : 
     341             :     /* OK, prepare to create the indexqual(s) */
     342        2924 :     ldatatype = exprType(leftop);
     343             : 
     344             :     /*
     345             :      * If necessary, coerce the prefix constant to the right type.  The given
     346             :      * prefix constant is either text or bytea type, therefore the only case
     347             :      * where we need to do anything is when converting text to bpchar.  Those
     348             :      * two types are binary-compatible, so relabeling the Const node is
     349             :      * sufficient.
     350             :      */
     351        2924 :     if (prefix->consttype != rdatatype)
     352             :     {
     353             :         Assert(prefix->consttype == TEXTOID &&
     354             :                rdatatype == BPCHAROID);
     355           0 :         prefix->consttype = rdatatype;
     356             :     }
     357             : 
     358             :     /*
     359             :      * If we found an exact-match pattern, generate an "=" indexqual.
     360             :      */
     361        2924 :     if (pstatus == Pattern_Prefix_Exact)
     362             :     {
     363        2012 :         oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
     364             :                                      BTEqualStrategyNumber);
     365        2012 :         if (oproid == InvalidOid)
     366           0 :             elog(ERROR, "no = operator for opfamily %u", opfamily);
     367        2012 :         expr = make_opclause(oproid, BOOLOID, false,
     368             :                              (Expr *) leftop, (Expr *) prefix,
     369             :                              InvalidOid, indexcollation);
     370        2012 :         result = list_make1(expr);
     371        2012 :         return result;
     372             :     }
     373             : 
     374             :     /*
     375             :      * Otherwise, we have a nonempty required prefix of the values.
     376             :      *
     377             :      * We can always say "x >= prefix".
     378             :      */
     379         912 :     oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
     380             :                                  BTGreaterEqualStrategyNumber);
     381         912 :     if (oproid == InvalidOid)
     382           0 :         elog(ERROR, "no >= operator for opfamily %u", opfamily);
     383         912 :     expr = make_opclause(oproid, BOOLOID, false,
     384             :                          (Expr *) leftop, (Expr *) prefix,
     385             :                          InvalidOid, indexcollation);
     386         912 :     result = list_make1(expr);
     387             : 
     388             :     /*-------
     389             :      * If we can create a string larger than the prefix, we can say
     390             :      * "x < greaterstr".  NB: we rely on make_greater_string() to generate
     391             :      * a guaranteed-greater string, not just a probably-greater string.
     392             :      * In general this is only guaranteed in C locale, so we'd better be
     393             :      * using a C-locale index collation.
     394             :      *-------
     395             :      */
     396         912 :     oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
     397             :                                  BTLessStrategyNumber);
     398         912 :     if (oproid == InvalidOid)
     399           0 :         elog(ERROR, "no < operator for opfamily %u", opfamily);
     400         912 :     fmgr_info(get_opcode(oproid), &ltproc);
     401         912 :     greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
     402         912 :     if (greaterstr)
     403             :     {
     404         912 :         expr = make_opclause(oproid, BOOLOID, false,
     405             :                              (Expr *) leftop, (Expr *) greaterstr,
     406             :                              InvalidOid, indexcollation);
     407         912 :         result = lappend(result, expr);
     408             :     }
     409             : 
     410         912 :     return result;
     411             : }
     412             : 
     413             : 
     414             : /*
     415             :  * patternsel_common - generic code for pattern-match restriction selectivity.
     416             :  *
     417             :  * To support using this from either the operator or function paths, caller
     418             :  * may pass either operator OID or underlying function OID; we look up the
     419             :  * latter from the former if needed.  (We could just have patternsel() call
     420             :  * get_opcode(), but the work would be wasted if we don't have a need to
     421             :  * compare a fixed prefix to the pg_statistic data.)
     422             :  *
     423             :  * Note that oprid and/or opfuncid should be for the positive-match operator
     424             :  * even when negate is true.
     425             :  */
     426             : static double
     427        4470 : patternsel_common(PlannerInfo *root,
     428             :                   Oid oprid,
     429             :                   Oid opfuncid,
     430             :                   List *args,
     431             :                   int varRelid,
     432             :                   Oid collation,
     433             :                   Pattern_Type ptype,
     434             :                   bool negate)
     435             : {
     436             :     VariableStatData vardata;
     437             :     Node       *other;
     438             :     bool        varonleft;
     439             :     Datum       constval;
     440             :     Oid         consttype;
     441             :     Oid         vartype;
     442             :     Oid         opfamily;
     443             :     Pattern_Prefix_Status pstatus;
     444             :     Const      *patt;
     445        4470 :     Const      *prefix = NULL;
     446        4470 :     Selectivity rest_selec = 0;
     447        4470 :     double      nullfrac = 0.0;
     448             :     double      result;
     449             : 
     450             :     /*
     451             :      * Initialize result to the appropriate default estimate depending on
     452             :      * whether it's a match or not-match operator.
     453             :      */
     454        4470 :     if (negate)
     455         568 :         result = 1.0 - DEFAULT_MATCH_SEL;
     456             :     else
     457        3902 :         result = DEFAULT_MATCH_SEL;
     458             : 
     459             :     /*
     460             :      * If expression is not variable op constant, then punt and return the
     461             :      * default estimate.
     462             :      */
     463        4470 :     if (!get_restriction_variable(root, args, varRelid,
     464             :                                   &vardata, &other, &varonleft))
     465          64 :         return result;
     466        4406 :     if (!varonleft || !IsA(other, Const))
     467             :     {
     468           0 :         ReleaseVariableStats(vardata);
     469           0 :         return result;
     470             :     }
     471             : 
     472             :     /*
     473             :      * If the constant is NULL, assume operator is strict and return zero, ie,
     474             :      * operator will never return TRUE.  (It's zero even for a negator op.)
     475             :      */
     476        4406 :     if (((Const *) other)->constisnull)
     477             :     {
     478           0 :         ReleaseVariableStats(vardata);
     479           0 :         return 0.0;
     480             :     }
     481        4406 :     constval = ((Const *) other)->constvalue;
     482        4406 :     consttype = ((Const *) other)->consttype;
     483             : 
     484             :     /*
     485             :      * The right-hand const is type text or bytea for all supported operators.
     486             :      * We do not expect to see binary-compatible types here, since
     487             :      * const-folding should have relabeled the const to exactly match the
     488             :      * operator's declared type.
     489             :      */
     490        4406 :     if (consttype != TEXTOID && consttype != BYTEAOID)
     491             :     {
     492          24 :         ReleaseVariableStats(vardata);
     493          24 :         return result;
     494             :     }
     495             : 
     496             :     /*
     497             :      * Similarly, the exposed type of the left-hand side should be one of
     498             :      * those we know.  (Do not look at vardata.atttype, which might be
     499             :      * something binary-compatible but different.)  We can use it to choose
     500             :      * the index opfamily from which we must draw the comparison operators.
     501             :      *
     502             :      * NOTE: It would be more correct to use the PATTERN opfamilies than the
     503             :      * simple ones, but at the moment ANALYZE will not generate statistics for
     504             :      * the PATTERN operators.  But our results are so approximate anyway that
     505             :      * it probably hardly matters.
     506             :      */
     507        4382 :     vartype = vardata.vartype;
     508             : 
     509        4382 :     switch (vartype)
     510             :     {
     511             :         case TEXTOID:
     512             :         case NAMEOID:
     513        4350 :             opfamily = TEXT_BTREE_FAM_OID;
     514        4350 :             break;
     515             :         case BPCHAROID:
     516          24 :             opfamily = BPCHAR_BTREE_FAM_OID;
     517          24 :             break;
     518             :         case BYTEAOID:
     519           4 :             opfamily = BYTEA_BTREE_FAM_OID;
     520           4 :             break;
     521             :         default:
     522           4 :             ReleaseVariableStats(vardata);
     523           4 :             return result;
     524             :     }
     525             : 
     526             :     /*
     527             :      * Grab the nullfrac for use below.
     528             :      */
     529        4378 :     if (HeapTupleIsValid(vardata.statsTuple))
     530             :     {
     531             :         Form_pg_statistic stats;
     532             : 
     533        3114 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     534        3114 :         nullfrac = stats->stanullfrac;
     535             :     }
     536             : 
     537             :     /*
     538             :      * Pull out any fixed prefix implied by the pattern, and estimate the
     539             :      * fractional selectivity of the remainder of the pattern.  Unlike many
     540             :      * other selectivity estimators, we use the pattern operator's actual
     541             :      * collation for this step.  This is not because we expect the collation
     542             :      * to make a big difference in the selectivity estimate (it seldom would),
     543             :      * but because we want to be sure we cache compiled regexps under the
     544             :      * right cache key, so that they can be re-used at runtime.
     545             :      */
     546        4378 :     patt = (Const *) other;
     547        4378 :     pstatus = pattern_fixed_prefix(patt, ptype, collation,
     548             :                                    &prefix, &rest_selec);
     549             : 
     550             :     /*
     551             :      * If necessary, coerce the prefix constant to the right type.
     552             :      */
     553        4378 :     if (prefix && prefix->consttype != vartype)
     554             :     {
     555             :         char       *prefixstr;
     556             : 
     557        3404 :         switch (prefix->consttype)
     558             :         {
     559             :             case TEXTOID:
     560        3404 :                 prefixstr = TextDatumGetCString(prefix->constvalue);
     561        3404 :                 break;
     562             :             case BYTEAOID:
     563           0 :                 prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,
     564             :                                                                 prefix->constvalue));
     565           0 :                 break;
     566             :             default:
     567           0 :                 elog(ERROR, "unrecognized consttype: %u",
     568             :                      prefix->consttype);
     569             :                 ReleaseVariableStats(vardata);
     570             :                 return result;
     571             :         }
     572        3404 :         prefix = string_to_const(prefixstr, vartype);
     573        3404 :         pfree(prefixstr);
     574             :     }
     575             : 
     576        4378 :     if (pstatus == Pattern_Prefix_Exact)
     577             :     {
     578             :         /*
     579             :          * Pattern specifies an exact match, so pretend operator is '='
     580             :          */
     581        2012 :         Oid         eqopr = get_opfamily_member(opfamily, vartype, vartype,
     582             :                                                 BTEqualStrategyNumber);
     583             : 
     584        2012 :         if (eqopr == InvalidOid)
     585           0 :             elog(ERROR, "no = operator for opfamily %u", opfamily);
     586        2012 :         result = var_eq_const(&vardata, eqopr, prefix->constvalue,
     587             :                               false, true, false);
     588             :     }
     589             :     else
     590             :     {
     591             :         /*
     592             :          * Not exact-match pattern.  If we have a sufficiently large
     593             :          * histogram, estimate selectivity for the histogram part of the
     594             :          * population by counting matches in the histogram.  If not, estimate
     595             :          * selectivity of the fixed prefix and remainder of pattern
     596             :          * separately, then combine the two to get an estimate of the
     597             :          * selectivity for the part of the column population represented by
     598             :          * the histogram.  (For small histograms, we combine these
     599             :          * approaches.)
     600             :          *
     601             :          * We then add up data for any most-common-values values; these are
     602             :          * not in the histogram population, and we can get exact answers for
     603             :          * them by applying the pattern operator, so there's no reason to
     604             :          * approximate.  (If the MCVs cover a significant part of the total
     605             :          * population, this gives us a big leg up in accuracy.)
     606             :          */
     607             :         Selectivity selec;
     608             :         int         hist_size;
     609             :         FmgrInfo    opproc;
     610             :         double      mcv_selec,
     611             :                     sumcommon;
     612             : 
     613             :         /* Try to use the histogram entries to get selectivity */
     614        2366 :         if (!OidIsValid(opfuncid))
     615        2366 :             opfuncid = get_opcode(oprid);
     616        2366 :         fmgr_info(opfuncid, &opproc);
     617             : 
     618        2366 :         selec = histogram_selectivity(&vardata, &opproc, constval, true,
     619             :                                       10, 1, &hist_size);
     620             : 
     621             :         /* If not at least 100 entries, use the heuristic method */
     622        2366 :         if (hist_size < 100)
     623             :         {
     624             :             Selectivity heursel;
     625             :             Selectivity prefixsel;
     626             : 
     627        1716 :             if (pstatus == Pattern_Prefix_Partial)
     628        1290 :                 prefixsel = prefix_selectivity(root, &vardata, vartype,
     629             :                                                opfamily, prefix);
     630             :             else
     631         426 :                 prefixsel = 1.0;
     632        1716 :             heursel = prefixsel * rest_selec;
     633             : 
     634        1716 :             if (selec < 0)       /* fewer than 10 histogram entries? */
     635        1596 :                 selec = heursel;
     636             :             else
     637             :             {
     638             :                 /*
     639             :                  * For histogram sizes from 10 to 100, we combine the
     640             :                  * histogram and heuristic selectivities, putting increasingly
     641             :                  * more trust in the histogram for larger sizes.
     642             :                  */
     643         120 :                 double      hist_weight = hist_size / 100.0;
     644             : 
     645         120 :                 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
     646             :             }
     647             :         }
     648             : 
     649             :         /* In any case, don't believe extremely small or large estimates. */
     650        2366 :         if (selec < 0.0001)
     651         682 :             selec = 0.0001;
     652        1684 :         else if (selec > 0.9999)
     653          74 :             selec = 0.9999;
     654             : 
     655             :         /*
     656             :          * If we have most-common-values info, add up the fractions of the MCV
     657             :          * entries that satisfy MCV OP PATTERN.  These fractions contribute
     658             :          * directly to the result selectivity.  Also add up the total fraction
     659             :          * represented by MCV entries.
     660             :          */
     661        2366 :         mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
     662             :                                     &sumcommon);
     663             : 
     664             :         /*
     665             :          * Now merge the results from the MCV and histogram calculations,
     666             :          * realizing that the histogram covers only the non-null values that
     667             :          * are not listed in MCV.
     668             :          */
     669        2366 :         selec *= 1.0 - nullfrac - sumcommon;
     670        2366 :         selec += mcv_selec;
     671        2366 :         result = selec;
     672             :     }
     673             : 
     674             :     /* now adjust if we wanted not-match rather than match */
     675        4378 :     if (negate)
     676         492 :         result = 1.0 - result - nullfrac;
     677             : 
     678             :     /* result should be in range, but make sure... */
     679        4378 :     CLAMP_PROBABILITY(result);
     680             : 
     681        4378 :     if (prefix)
     682             :     {
     683        4018 :         pfree(DatumGetPointer(prefix->constvalue));
     684        4018 :         pfree(prefix);
     685             :     }
     686             : 
     687        4378 :     ReleaseVariableStats(vardata);
     688             : 
     689        4378 :     return result;
     690             : }
     691             : 
     692             : /*
     693             :  * Fix impedance mismatch between SQL-callable functions and patternsel_common
     694             :  */
     695             : static double
     696        4470 : patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
     697             : {
     698        4470 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     699        4470 :     Oid         operator = PG_GETARG_OID(1);
     700        4470 :     List       *args = (List *) PG_GETARG_POINTER(2);
     701        4470 :     int         varRelid = PG_GETARG_INT32(3);
     702        4470 :     Oid         collation = PG_GET_COLLATION();
     703             : 
     704             :     /*
     705             :      * If this is for a NOT LIKE or similar operator, get the corresponding
     706             :      * positive-match operator and work with that.
     707             :      */
     708        4470 :     if (negate)
     709             :     {
     710         568 :         operator = get_negator(operator);
     711         568 :         if (!OidIsValid(operator))
     712           0 :             elog(ERROR, "patternsel called for operator without a negator");
     713             :     }
     714             : 
     715        4470 :     return patternsel_common(root,
     716             :                              operator,
     717             :                              InvalidOid,
     718             :                              args,
     719             :                              varRelid,
     720             :                              collation,
     721             :                              ptype,
     722             :                              negate);
     723             : }
     724             : 
     725             : /*
     726             :  *      regexeqsel      - Selectivity of regular-expression pattern match.
     727             :  */
     728             : Datum
     729        2392 : regexeqsel(PG_FUNCTION_ARGS)
     730             : {
     731        2392 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
     732             : }
     733             : 
     734             : /*
     735             :  *      icregexeqsel    - Selectivity of case-insensitive regex match.
     736             :  */
     737             : Datum
     738          40 : icregexeqsel(PG_FUNCTION_ARGS)
     739             : {
     740          40 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
     741             : }
     742             : 
     743             : /*
     744             :  *      likesel         - Selectivity of LIKE pattern match.
     745             :  */
     746             : Datum
     747        1406 : likesel(PG_FUNCTION_ARGS)
     748             : {
     749        1406 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
     750             : }
     751             : 
     752             : /*
     753             :  *      prefixsel           - selectivity of prefix operator
     754             :  */
     755             : Datum
     756          20 : prefixsel(PG_FUNCTION_ARGS)
     757             : {
     758          20 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
     759             : }
     760             : 
     761             : /*
     762             :  *
     763             :  *      iclikesel           - Selectivity of ILIKE pattern match.
     764             :  */
     765             : Datum
     766          44 : iclikesel(PG_FUNCTION_ARGS)
     767             : {
     768          44 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
     769             : }
     770             : 
     771             : /*
     772             :  *      regexnesel      - Selectivity of regular-expression pattern non-match.
     773             :  */
     774             : Datum
     775         506 : regexnesel(PG_FUNCTION_ARGS)
     776             : {
     777         506 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
     778             : }
     779             : 
     780             : /*
     781             :  *      icregexnesel    - Selectivity of case-insensitive regex non-match.
     782             :  */
     783             : Datum
     784          12 : icregexnesel(PG_FUNCTION_ARGS)
     785             : {
     786          12 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
     787             : }
     788             : 
     789             : /*
     790             :  *      nlikesel        - Selectivity of LIKE pattern non-match.
     791             :  */
     792             : Datum
     793          42 : nlikesel(PG_FUNCTION_ARGS)
     794             : {
     795          42 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
     796             : }
     797             : 
     798             : /*
     799             :  *      icnlikesel      - Selectivity of ILIKE pattern non-match.
     800             :  */
     801             : Datum
     802           8 : icnlikesel(PG_FUNCTION_ARGS)
     803             : {
     804           8 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
     805             : }
     806             : 
     807             : /*
     808             :  * patternjoinsel       - Generic code for pattern-match join selectivity.
     809             :  */
     810             : static double
     811           0 : patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
     812             : {
     813             :     /* For the moment we just punt. */
     814           0 :     return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
     815             : }
     816             : 
     817             : /*
     818             :  *      regexeqjoinsel  - Join selectivity of regular-expression pattern match.
     819             :  */
     820             : Datum
     821           0 : regexeqjoinsel(PG_FUNCTION_ARGS)
     822             : {
     823           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
     824             : }
     825             : 
     826             : /*
     827             :  *      icregexeqjoinsel    - Join selectivity of case-insensitive regex match.
     828             :  */
     829             : Datum
     830           0 : icregexeqjoinsel(PG_FUNCTION_ARGS)
     831             : {
     832           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
     833             : }
     834             : 
     835             : /*
     836             :  *      likejoinsel         - Join selectivity of LIKE pattern match.
     837             :  */
     838             : Datum
     839           0 : likejoinsel(PG_FUNCTION_ARGS)
     840             : {
     841           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
     842             : }
     843             : 
     844             : /*
     845             :  *      prefixjoinsel           - Join selectivity of prefix operator
     846             :  */
     847             : Datum
     848           0 : prefixjoinsel(PG_FUNCTION_ARGS)
     849             : {
     850           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
     851             : }
     852             : 
     853             : /*
     854             :  *      iclikejoinsel           - Join selectivity of ILIKE pattern match.
     855             :  */
     856             : Datum
     857           0 : iclikejoinsel(PG_FUNCTION_ARGS)
     858             : {
     859           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
     860             : }
     861             : 
     862             : /*
     863             :  *      regexnejoinsel  - Join selectivity of regex non-match.
     864             :  */
     865             : Datum
     866           0 : regexnejoinsel(PG_FUNCTION_ARGS)
     867             : {
     868           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
     869             : }
     870             : 
     871             : /*
     872             :  *      icregexnejoinsel    - Join selectivity of case-insensitive regex non-match.
     873             :  */
     874             : Datum
     875           0 : icregexnejoinsel(PG_FUNCTION_ARGS)
     876             : {
     877           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
     878             : }
     879             : 
     880             : /*
     881             :  *      nlikejoinsel        - Join selectivity of LIKE pattern non-match.
     882             :  */
     883             : Datum
     884           0 : nlikejoinsel(PG_FUNCTION_ARGS)
     885             : {
     886           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
     887             : }
     888             : 
     889             : /*
     890             :  *      icnlikejoinsel      - Join selectivity of ILIKE pattern non-match.
     891             :  */
     892             : Datum
     893           0 : icnlikejoinsel(PG_FUNCTION_ARGS)
     894             : {
     895           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
     896             : }
     897             : 
     898             : 
     899             : /*-------------------------------------------------------------------------
     900             :  *
     901             :  * Pattern analysis functions
     902             :  *
     903             :  * These routines support analysis of LIKE and regular-expression patterns
     904             :  * by the planner/optimizer.  It's important that they agree with the
     905             :  * regular-expression code in backend/regex/ and the LIKE code in
     906             :  * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
     907             :  * must be conservative: if we report a string longer than the true fixed
     908             :  * prefix, the query may produce actually wrong answers, rather than just
     909             :  * getting a bad selectivity estimate!
     910             :  *
     911             :  *-------------------------------------------------------------------------
     912             :  */
     913             : 
     914             : /*
     915             :  * Extract the fixed prefix, if any, for a pattern.
     916             :  *
     917             :  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
     918             :  *  or to NULL if no fixed prefix exists for the pattern.
     919             :  * If rest_selec is not NULL, *rest_selec is set to an estimate of the
     920             :  *  selectivity of the remainder of the pattern (without any fixed prefix).
     921             :  * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
     922             :  *
     923             :  * The return value distinguishes no fixed prefix, a partial prefix,
     924             :  * or an exact-match-only pattern.
     925             :  */
     926             : 
     927             : static Pattern_Prefix_Status
     928        2388 : like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
     929             :                   Const **prefix_const, Selectivity *rest_selec)
     930             : {
     931             :     char       *match;
     932             :     char       *patt;
     933             :     int         pattlen;
     934        2388 :     Oid         typeid = patt_const->consttype;
     935             :     int         pos,
     936             :                 match_pos;
     937        2388 :     bool        is_multibyte = (pg_database_encoding_max_length() > 1);
     938        2388 :     pg_locale_t locale = 0;
     939        2388 :     bool        locale_is_c = false;
     940             : 
     941             :     /* the right-hand const is type text or bytea */
     942             :     Assert(typeid == BYTEAOID || typeid == TEXTOID);
     943             : 
     944        2388 :     if (case_insensitive)
     945             :     {
     946          60 :         if (typeid == BYTEAOID)
     947           0 :             ereport(ERROR,
     948             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     949             :                      errmsg("case insensitive matching not supported on type bytea")));
     950             : 
     951             :         /* If case-insensitive, we need locale info */
     952          60 :         if (lc_ctype_is_c(collation))
     953          60 :             locale_is_c = true;
     954           0 :         else if (collation != DEFAULT_COLLATION_OID)
     955             :         {
     956           0 :             if (!OidIsValid(collation))
     957             :             {
     958             :                 /*
     959             :                  * This typically means that the parser could not resolve a
     960             :                  * conflict of implicit collations, so report it that way.
     961             :                  */
     962           0 :                 ereport(ERROR,
     963             :                         (errcode(ERRCODE_INDETERMINATE_COLLATION),
     964             :                          errmsg("could not determine which collation to use for ILIKE"),
     965             :                          errhint("Use the COLLATE clause to set the collation explicitly.")));
     966             :             }
     967           0 :             locale = pg_newlocale_from_collation(collation);
     968             :         }
     969             :     }
     970             : 
     971        2388 :     if (typeid != BYTEAOID)
     972             :     {
     973        2380 :         patt = TextDatumGetCString(patt_const->constvalue);
     974        2380 :         pattlen = strlen(patt);
     975             :     }
     976             :     else
     977             :     {
     978           8 :         bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
     979             : 
     980           8 :         pattlen = VARSIZE_ANY_EXHDR(bstr);
     981           8 :         patt = (char *) palloc(pattlen);
     982           8 :         memcpy(patt, VARDATA_ANY(bstr), pattlen);
     983             :         Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
     984             :     }
     985             : 
     986        2388 :     match = palloc(pattlen + 1);
     987        2388 :     match_pos = 0;
     988       12792 :     for (pos = 0; pos < pattlen; pos++)
     989             :     {
     990             :         /* % and _ are wildcard characters in LIKE */
     991       23892 :         if (patt[pos] == '%' ||
     992       11108 :             patt[pos] == '_')
     993             :             break;
     994             : 
     995             :         /* Backslash escapes the next character */
     996       10424 :         if (patt[pos] == '\\')
     997             :         {
     998         116 :             pos++;
     999         116 :             if (pos >= pattlen)
    1000           0 :                 break;
    1001             :         }
    1002             : 
    1003             :         /* Stop if case-varying character (it's sort of a wildcard) */
    1004       10508 :         if (case_insensitive &&
    1005          84 :             pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
    1006          20 :             break;
    1007             : 
    1008       10404 :         match[match_pos++] = patt[pos];
    1009             :     }
    1010             : 
    1011        2388 :     match[match_pos] = '\0';
    1012             : 
    1013        2388 :     if (typeid != BYTEAOID)
    1014        2380 :         *prefix_const = string_to_const(match, typeid);
    1015             :     else
    1016           8 :         *prefix_const = string_to_bytea_const(match, match_pos);
    1017             : 
    1018        2388 :     if (rest_selec != NULL)
    1019        1484 :         *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
    1020             :                                        case_insensitive);
    1021             : 
    1022        2388 :     pfree(patt);
    1023        2388 :     pfree(match);
    1024             : 
    1025             :     /* in LIKE, an empty pattern is an exact match! */
    1026        2388 :     if (pos == pattlen)
    1027           8 :         return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
    1028             : 
    1029        2380 :     if (match_pos > 0)
    1030        2102 :         return Pattern_Prefix_Partial;
    1031             : 
    1032         278 :     return Pattern_Prefix_None;
    1033             : }
    1034             : 
    1035             : static Pattern_Prefix_Status
    1036        5028 : regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
    1037             :                    Const **prefix_const, Selectivity *rest_selec)
    1038             : {
    1039        5028 :     Oid         typeid = patt_const->consttype;
    1040             :     char       *prefix;
    1041             :     bool        exact;
    1042             : 
    1043             :     /*
    1044             :      * Should be unnecessary, there are no bytea regex operators defined. As
    1045             :      * such, it should be noted that the rest of this function has *not* been
    1046             :      * made safe for binary (possibly NULL containing) strings.
    1047             :      */
    1048        5028 :     if (typeid == BYTEAOID)
    1049           0 :         ereport(ERROR,
    1050             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1051             :                  errmsg("regular-expression matching not supported on type bytea")));
    1052             : 
    1053             :     /* Use the regexp machinery to extract the prefix, if any */
    1054        5028 :     prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
    1055             :                                  case_insensitive, collation,
    1056             :                                  &exact);
    1057             : 
    1058        5028 :     if (prefix == NULL)
    1059             :     {
    1060         402 :         *prefix_const = NULL;
    1061             : 
    1062         402 :         if (rest_selec != NULL)
    1063             :         {
    1064         360 :             char       *patt = TextDatumGetCString(patt_const->constvalue);
    1065             : 
    1066         360 :             *rest_selec = regex_selectivity(patt, strlen(patt),
    1067             :                                             case_insensitive,
    1068             :                                             0);
    1069         360 :             pfree(patt);
    1070             :         }
    1071             : 
    1072         402 :         return Pattern_Prefix_None;
    1073             :     }
    1074             : 
    1075        4626 :     *prefix_const = string_to_const(prefix, typeid);
    1076             : 
    1077        4626 :     if (rest_selec != NULL)
    1078             :     {
    1079        2514 :         if (exact)
    1080             :         {
    1081             :             /* Exact match, so there's no additional selectivity */
    1082        2008 :             *rest_selec = 1.0;
    1083             :         }
    1084             :         else
    1085             :         {
    1086         506 :             char       *patt = TextDatumGetCString(patt_const->constvalue);
    1087             : 
    1088         506 :             *rest_selec = regex_selectivity(patt, strlen(patt),
    1089             :                                             case_insensitive,
    1090         506 :                                             strlen(prefix));
    1091         506 :             pfree(patt);
    1092             :         }
    1093             :     }
    1094             : 
    1095        4626 :     pfree(prefix);
    1096             : 
    1097        4626 :     if (exact)
    1098        4016 :         return Pattern_Prefix_Exact;    /* pattern specifies exact match */
    1099             :     else
    1100         610 :         return Pattern_Prefix_Partial;
    1101             : }
    1102             : 
    1103             : static Pattern_Prefix_Status
    1104        7436 : pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
    1105             :                      Const **prefix, Selectivity *rest_selec)
    1106             : {
    1107             :     Pattern_Prefix_Status result;
    1108             : 
    1109        7436 :     switch (ptype)
    1110             :     {
    1111             :         case Pattern_Type_Like:
    1112        2328 :             result = like_fixed_prefix(patt, false, collation,
    1113             :                                        prefix, rest_selec);
    1114        2328 :             break;
    1115             :         case Pattern_Type_Like_IC:
    1116          60 :             result = like_fixed_prefix(patt, true, collation,
    1117             :                                        prefix, rest_selec);
    1118          60 :             break;
    1119             :         case Pattern_Type_Regex:
    1120        4996 :             result = regex_fixed_prefix(patt, false, collation,
    1121             :                                         prefix, rest_selec);
    1122        4996 :             break;
    1123             :         case Pattern_Type_Regex_IC:
    1124          32 :             result = regex_fixed_prefix(patt, true, collation,
    1125             :                                         prefix, rest_selec);
    1126          32 :             break;
    1127             :         case Pattern_Type_Prefix:
    1128             :             /* Prefix type work is trivial.  */
    1129          20 :             result = Pattern_Prefix_Partial;
    1130          20 :             *rest_selec = 1.0;  /* all */
    1131          60 :             *prefix = makeConst(patt->consttype,
    1132             :                                 patt->consttypmod,
    1133             :                                 patt->constcollid,
    1134             :                                 patt->constlen,
    1135             :                                 datumCopy(patt->constvalue,
    1136          20 :                                           patt->constbyval,
    1137             :                                           patt->constlen),
    1138          20 :                                 patt->constisnull,
    1139          20 :                                 patt->constbyval);
    1140          20 :             break;
    1141             :         default:
    1142           0 :             elog(ERROR, "unrecognized ptype: %d", (int) ptype);
    1143             :             result = Pattern_Prefix_None;   /* keep compiler quiet */
    1144             :             break;
    1145             :     }
    1146        7436 :     return result;
    1147             : }
    1148             : 
    1149             : /*
    1150             :  * Estimate the selectivity of a fixed prefix for a pattern match.
    1151             :  *
    1152             :  * A fixed prefix "foo" is estimated as the selectivity of the expression
    1153             :  * "variable >= 'foo' AND variable < 'fop'" (see also indxpath.c).
    1154             :  *
    1155             :  * The selectivity estimate is with respect to the portion of the column
    1156             :  * population represented by the histogram --- the caller must fold this
    1157             :  * together with info about MCVs and NULLs.
    1158             :  *
    1159             :  * We use the >= and < operators from the specified btree opfamily to do the
    1160             :  * estimation.  The given variable and Const must be of the associated
    1161             :  * datatype.
    1162             :  *
    1163             :  * XXX Note: we make use of the upper bound to estimate operator selectivity
    1164             :  * even if the locale is such that we cannot rely on the upper-bound string.
    1165             :  * The selectivity only needs to be approximately right anyway, so it seems
    1166             :  * more useful to use the upper-bound code than not.
    1167             :  */
    1168             : static Selectivity
    1169        1290 : prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
    1170             :                    Oid vartype, Oid opfamily, Const *prefixcon)
    1171             : {
    1172             :     Selectivity prefixsel;
    1173             :     Oid         cmpopr;
    1174             :     FmgrInfo    opproc;
    1175             :     AttStatsSlot sslot;
    1176             :     Const      *greaterstrcon;
    1177             :     Selectivity eq_sel;
    1178             : 
    1179        1290 :     cmpopr = get_opfamily_member(opfamily, vartype, vartype,
    1180             :                                  BTGreaterEqualStrategyNumber);
    1181        1290 :     if (cmpopr == InvalidOid)
    1182           0 :         elog(ERROR, "no >= operator for opfamily %u", opfamily);
    1183        1290 :     fmgr_info(get_opcode(cmpopr), &opproc);
    1184             : 
    1185        1290 :     prefixsel = ineq_histogram_selectivity(root, vardata,
    1186             :                                            &opproc, true, true,
    1187             :                                            prefixcon->constvalue,
    1188             :                                            prefixcon->consttype);
    1189             : 
    1190        1290 :     if (prefixsel < 0.0)
    1191             :     {
    1192             :         /* No histogram is present ... return a suitable default estimate */
    1193         810 :         return DEFAULT_MATCH_SEL;
    1194             :     }
    1195             : 
    1196             :     /*-------
    1197             :      * If we can create a string larger than the prefix, say
    1198             :      * "x < greaterstr".  We try to generate the string referencing the
    1199             :      * collation of the var's statistics, but if that's not available,
    1200             :      * use DEFAULT_COLLATION_OID.
    1201             :      *-------
    1202             :      */
    1203         960 :     if (HeapTupleIsValid(vardata->statsTuple) &&
    1204         480 :         get_attstatsslot(&sslot, vardata->statsTuple,
    1205             :                          STATISTIC_KIND_HISTOGRAM, InvalidOid, 0))
    1206             :          /* sslot.stacoll is set up */ ;
    1207             :     else
    1208           0 :         sslot.stacoll = DEFAULT_COLLATION_OID;
    1209         480 :     cmpopr = get_opfamily_member(opfamily, vartype, vartype,
    1210             :                                  BTLessStrategyNumber);
    1211         480 :     if (cmpopr == InvalidOid)
    1212           0 :         elog(ERROR, "no < operator for opfamily %u", opfamily);
    1213         480 :     fmgr_info(get_opcode(cmpopr), &opproc);
    1214         480 :     greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll);
    1215         480 :     if (greaterstrcon)
    1216             :     {
    1217             :         Selectivity topsel;
    1218             : 
    1219         480 :         topsel = ineq_histogram_selectivity(root, vardata,
    1220             :                                             &opproc, false, false,
    1221             :                                             greaterstrcon->constvalue,
    1222             :                                             greaterstrcon->consttype);
    1223             : 
    1224             :         /* ineq_histogram_selectivity worked before, it shouldn't fail now */
    1225             :         Assert(topsel >= 0.0);
    1226             : 
    1227             :         /*
    1228             :          * Merge the two selectivities in the same way as for a range query
    1229             :          * (see clauselist_selectivity()).  Note that we don't need to worry
    1230             :          * about double-exclusion of nulls, since ineq_histogram_selectivity
    1231             :          * doesn't count those anyway.
    1232             :          */
    1233         480 :         prefixsel = topsel + prefixsel - 1.0;
    1234             :     }
    1235             : 
    1236             :     /*
    1237             :      * If the prefix is long then the two bounding values might be too close
    1238             :      * together for the histogram to distinguish them usefully, resulting in a
    1239             :      * zero estimate (plus or minus roundoff error). To avoid returning a
    1240             :      * ridiculously small estimate, compute the estimated selectivity for
    1241             :      * "variable = 'foo'", and clamp to that. (Obviously, the resultant
    1242             :      * estimate should be at least that.)
    1243             :      *
    1244             :      * We apply this even if we couldn't make a greater string.  That case
    1245             :      * suggests that the prefix is near the maximum possible, and thus
    1246             :      * probably off the end of the histogram, and thus we probably got a very
    1247             :      * small estimate from the >= condition; so we still need to clamp.
    1248             :      */
    1249         480 :     cmpopr = get_opfamily_member(opfamily, vartype, vartype,
    1250             :                                  BTEqualStrategyNumber);
    1251         480 :     if (cmpopr == InvalidOid)
    1252           0 :         elog(ERROR, "no = operator for opfamily %u", opfamily);
    1253         480 :     eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
    1254             :                           false, true, false);
    1255             : 
    1256         480 :     prefixsel = Max(prefixsel, eq_sel);
    1257             : 
    1258         480 :     return prefixsel;
    1259             : }
    1260             : 
    1261             : 
    1262             : /*
    1263             :  * Estimate the selectivity of a pattern of the specified type.
    1264             :  * Note that any fixed prefix of the pattern will have been removed already,
    1265             :  * so actually we may be looking at just a fragment of the pattern.
    1266             :  *
    1267             :  * For now, we use a very simplistic approach: fixed characters reduce the
    1268             :  * selectivity a good deal, character ranges reduce it a little,
    1269             :  * wildcards (such as % for LIKE or .* for regex) increase it.
    1270             :  */
    1271             : 
    1272             : #define FIXED_CHAR_SEL  0.20    /* about 1/5 */
    1273             : #define CHAR_RANGE_SEL  0.25
    1274             : #define ANY_CHAR_SEL    0.9     /* not 1, since it won't match end-of-string */
    1275             : #define FULL_WILDCARD_SEL 5.0
    1276             : #define PARTIAL_WILDCARD_SEL 2.0
    1277             : 
    1278             : static Selectivity
    1279        1484 : like_selectivity(const char *patt, int pattlen, bool case_insensitive)
    1280             : {
    1281        1484 :     Selectivity sel = 1.0;
    1282             :     int         pos;
    1283             : 
    1284             :     /* Skip any leading wildcard; it's already factored into initial sel */
    1285        2980 :     for (pos = 0; pos < pattlen; pos++)
    1286             :     {
    1287        1984 :         if (patt[pos] != '%' && patt[pos] != '_')
    1288         488 :             break;
    1289             :     }
    1290             : 
    1291        4254 :     for (; pos < pattlen; pos++)
    1292             :     {
    1293             :         /* % and _ are wildcard characters in LIKE */
    1294        2770 :         if (patt[pos] == '%')
    1295         416 :             sel *= FULL_WILDCARD_SEL;
    1296        2354 :         else if (patt[pos] == '_')
    1297          96 :             sel *= ANY_CHAR_SEL;
    1298        2258 :         else if (patt[pos] == '\\')
    1299             :         {
    1300             :             /* Backslash quotes the next character */
    1301          28 :             pos++;
    1302          28 :             if (pos >= pattlen)
    1303           0 :                 break;
    1304          28 :             sel *= FIXED_CHAR_SEL;
    1305             :         }
    1306             :         else
    1307        2230 :             sel *= FIXED_CHAR_SEL;
    1308             :     }
    1309             :     /* Could get sel > 1 if multiple wildcards */
    1310        1484 :     if (sel > 1.0)
    1311           0 :         sel = 1.0;
    1312        1484 :     return sel;
    1313             : }
    1314             : 
    1315             : static Selectivity
    1316         952 : regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
    1317             : {
    1318         952 :     Selectivity sel = 1.0;
    1319         952 :     int         paren_depth = 0;
    1320         952 :     int         paren_pos = 0;  /* dummy init to keep compiler quiet */
    1321             :     int         pos;
    1322             : 
    1323        7826 :     for (pos = 0; pos < pattlen; pos++)
    1324             :     {
    1325        6890 :         if (patt[pos] == '(')
    1326             :         {
    1327          74 :             if (paren_depth == 0)
    1328          70 :                 paren_pos = pos;    /* remember start of parenthesized item */
    1329          74 :             paren_depth++;
    1330             :         }
    1331        6816 :         else if (patt[pos] == ')' && paren_depth > 0)
    1332             :         {
    1333          74 :             paren_depth--;
    1334         148 :             if (paren_depth == 0)
    1335         140 :                 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
    1336          70 :                                              pos - (paren_pos + 1),
    1337             :                                              case_insensitive);
    1338             :         }
    1339        6742 :         else if (patt[pos] == '|' && paren_depth == 0)
    1340             :         {
    1341             :             /*
    1342             :              * If unquoted | is present at paren level 0 in pattern, we have
    1343             :              * multiple alternatives; sum their probabilities.
    1344             :              */
    1345          32 :             sel += regex_selectivity_sub(patt + (pos + 1),
    1346          16 :                                          pattlen - (pos + 1),
    1347             :                                          case_insensitive);
    1348          16 :             break;              /* rest of pattern is now processed */
    1349             :         }
    1350        6726 :         else if (patt[pos] == '[')
    1351             :         {
    1352          56 :             bool        negclass = false;
    1353             : 
    1354          56 :             if (patt[++pos] == '^')
    1355             :             {
    1356           0 :                 negclass = true;
    1357           0 :                 pos++;
    1358             :             }
    1359          56 :             if (patt[pos] == ']')   /* ']' at start of class is not special */
    1360           0 :                 pos++;
    1361         312 :             while (pos < pattlen && patt[pos] != ']')
    1362         200 :                 pos++;
    1363          56 :             if (paren_depth == 0)
    1364          56 :                 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
    1365             :         }
    1366        6670 :         else if (patt[pos] == '.')
    1367             :         {
    1368         216 :             if (paren_depth == 0)
    1369         160 :                 sel *= ANY_CHAR_SEL;
    1370             :         }
    1371       12732 :         else if (patt[pos] == '*' ||
    1372       12526 :                  patt[pos] == '?' ||
    1373        6248 :                  patt[pos] == '+')
    1374             :         {
    1375             :             /* Ought to be smarter about quantifiers... */
    1376         436 :             if (paren_depth == 0)
    1377         150 :                 sel *= PARTIAL_WILDCARD_SEL;
    1378             :         }
    1379        6236 :         else if (patt[pos] == '{')
    1380             :         {
    1381         284 :             while (pos < pattlen && patt[pos] != '}')
    1382         156 :                 pos++;
    1383          64 :             if (paren_depth == 0)
    1384          52 :                 sel *= PARTIAL_WILDCARD_SEL;
    1385             :         }
    1386        6172 :         else if (patt[pos] == '\\')
    1387             :         {
    1388             :             /* backslash quotes the next character */
    1389          40 :             pos++;
    1390          40 :             if (pos >= pattlen)
    1391           0 :                 break;
    1392          40 :             if (paren_depth == 0)
    1393          24 :                 sel *= FIXED_CHAR_SEL;
    1394             :         }
    1395             :         else
    1396             :         {
    1397        6132 :             if (paren_depth == 0)
    1398        5758 :                 sel *= FIXED_CHAR_SEL;
    1399             :         }
    1400             :     }
    1401             :     /* Could get sel > 1 if multiple wildcards */
    1402         952 :     if (sel > 1.0)
    1403          16 :         sel = 1.0;
    1404         952 :     return sel;
    1405             : }
    1406             : 
    1407             : static Selectivity
    1408         866 : regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
    1409             :                   int fixed_prefix_len)
    1410             : {
    1411             :     Selectivity sel;
    1412             : 
    1413             :     /* If patt doesn't end with $, consider it to have a trailing wildcard */
    1414         866 :     if (pattlen > 0 && patt[pattlen - 1] == '$' &&
    1415          42 :         (pattlen == 1 || patt[pattlen - 2] != '\\'))
    1416             :     {
    1417             :         /* has trailing $ */
    1418          42 :         sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
    1419             :     }
    1420             :     else
    1421             :     {
    1422             :         /* no trailing $ */
    1423         824 :         sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
    1424         824 :         sel *= FULL_WILDCARD_SEL;
    1425             :     }
    1426             : 
    1427             :     /* If there's a fixed prefix, discount its selectivity */
    1428         866 :     if (fixed_prefix_len > 0)
    1429         506 :         sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
    1430             : 
    1431             :     /* Make sure result stays in range */
    1432         866 :     CLAMP_PROBABILITY(sel);
    1433         866 :     return sel;
    1434             : }
    1435             : 
    1436             : /*
    1437             :  * Check whether char is a letter (and, hence, subject to case-folding)
    1438             :  *
    1439             :  * In multibyte character sets or with ICU, we can't use isalpha, and it does
    1440             :  * not seem worth trying to convert to wchar_t to use iswalpha.  Instead, just
    1441             :  * assume any multibyte char is potentially case-varying.
    1442             :  */
    1443             : static int
    1444          84 : pattern_char_isalpha(char c, bool is_multibyte,
    1445             :                      pg_locale_t locale, bool locale_is_c)
    1446             : {
    1447          84 :     if (locale_is_c)
    1448          84 :         return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
    1449           0 :     else if (is_multibyte && IS_HIGHBIT_SET(c))
    1450           0 :         return true;
    1451           0 :     else if (locale && locale->provider == COLLPROVIDER_ICU)
    1452           0 :         return IS_HIGHBIT_SET(c) ? true : false;
    1453             : #ifdef HAVE_LOCALE_T
    1454           0 :     else if (locale && locale->provider == COLLPROVIDER_LIBC)
    1455           0 :         return isalpha_l((unsigned char) c, locale->info.lt);
    1456             : #endif
    1457             :     else
    1458           0 :         return isalpha((unsigned char) c);
    1459             : }
    1460             : 
    1461             : 
    1462             : /*
    1463             :  * For bytea, the increment function need only increment the current byte
    1464             :  * (there are no multibyte characters to worry about).
    1465             :  */
    1466             : static bool
    1467           0 : byte_increment(unsigned char *ptr, int len)
    1468             : {
    1469           0 :     if (*ptr >= 255)
    1470           0 :         return false;
    1471           0 :     (*ptr)++;
    1472           0 :     return true;
    1473             : }
    1474             : 
    1475             : /*
    1476             :  * Try to generate a string greater than the given string or any
    1477             :  * string it is a prefix of.  If successful, return a palloc'd string
    1478             :  * in the form of a Const node; else return NULL.
    1479             :  *
    1480             :  * The caller must provide the appropriate "less than" comparison function
    1481             :  * for testing the strings, along with the collation to use.
    1482             :  *
    1483             :  * The key requirement here is that given a prefix string, say "foo",
    1484             :  * we must be able to generate another string "fop" that is greater than
    1485             :  * all strings "foobar" starting with "foo".  We can test that we have
    1486             :  * generated a string greater than the prefix string, but in non-C collations
    1487             :  * that is not a bulletproof guarantee that an extension of the string might
    1488             :  * not sort after it; an example is that "foo " is less than "foo!", but it
    1489             :  * is not clear that a "dictionary" sort ordering will consider "foo!" less
    1490             :  * than "foo bar".  CAUTION: Therefore, this function should be used only for
    1491             :  * estimation purposes when working in a non-C collation.
    1492             :  *
    1493             :  * To try to catch most cases where an extended string might otherwise sort
    1494             :  * before the result value, we determine which of the strings "Z", "z", "y",
    1495             :  * and "9" is seen as largest by the collation, and append that to the given
    1496             :  * prefix before trying to find a string that compares as larger.
    1497             :  *
    1498             :  * To search for a greater string, we repeatedly "increment" the rightmost
    1499             :  * character, using an encoding-specific character incrementer function.
    1500             :  * When it's no longer possible to increment the last character, we truncate
    1501             :  * off that character and start incrementing the next-to-rightmost.
    1502             :  * For example, if "z" were the last character in the sort order, then we
    1503             :  * could produce "foo" as a string greater than "fonz".
    1504             :  *
    1505             :  * This could be rather slow in the worst case, but in most cases we
    1506             :  * won't have to try more than one or two strings before succeeding.
    1507             :  *
    1508             :  * Note that it's important for the character incrementer not to be too anal
    1509             :  * about producing every possible character code, since in some cases the only
    1510             :  * way to get a larger string is to increment a previous character position.
    1511             :  * So we don't want to spend too much time trying every possible character
    1512             :  * code at the last position.  A good rule of thumb is to be sure that we
    1513             :  * don't try more than 256*K values for a K-byte character (and definitely
    1514             :  * not 256^K, which is what an exhaustive search would approach).
    1515             :  */
    1516             : static Const *
    1517        1392 : make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
    1518             : {
    1519        1392 :     Oid         datatype = str_const->consttype;
    1520             :     char       *workstr;
    1521             :     int         len;
    1522             :     Datum       cmpstr;
    1523        1392 :     char       *cmptxt = NULL;
    1524             :     mbcharacter_incrementer charinc;
    1525             : 
    1526             :     /*
    1527             :      * Get a modifiable copy of the prefix string in C-string format, and set
    1528             :      * up the string we will compare to as a Datum.  In C locale this can just
    1529             :      * be the given prefix string, otherwise we need to add a suffix.  Type
    1530             :      * BYTEA sorts bytewise so it never needs a suffix either.
    1531             :      */
    1532        1392 :     if (datatype == BYTEAOID)
    1533             :     {
    1534           0 :         bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
    1535             : 
    1536           0 :         len = VARSIZE_ANY_EXHDR(bstr);
    1537           0 :         workstr = (char *) palloc(len);
    1538           0 :         memcpy(workstr, VARDATA_ANY(bstr), len);
    1539             :         Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
    1540           0 :         cmpstr = str_const->constvalue;
    1541             :     }
    1542             :     else
    1543             :     {
    1544        1392 :         if (datatype == NAMEOID)
    1545         480 :             workstr = DatumGetCString(DirectFunctionCall1(nameout,
    1546             :                                                           str_const->constvalue));
    1547             :         else
    1548         912 :             workstr = TextDatumGetCString(str_const->constvalue);
    1549        1392 :         len = strlen(workstr);
    1550        1392 :         if (lc_collate_is_c(collation) || len == 0)
    1551        1392 :             cmpstr = str_const->constvalue;
    1552             :         else
    1553             :         {
    1554             :             /* If first time through, determine the suffix to use */
    1555             :             static char suffixchar = 0;
    1556             :             static Oid  suffixcollation = 0;
    1557             : 
    1558           0 :             if (!suffixchar || suffixcollation != collation)
    1559             :             {
    1560             :                 char       *best;
    1561             : 
    1562           0 :                 best = "Z";
    1563           0 :                 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
    1564           0 :                     best = "z";
    1565           0 :                 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
    1566           0 :                     best = "y";
    1567           0 :                 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
    1568           0 :                     best = "9";
    1569           0 :                 suffixchar = *best;
    1570           0 :                 suffixcollation = collation;
    1571             :             }
    1572             : 
    1573             :             /* And build the string to compare to */
    1574           0 :             if (datatype == NAMEOID)
    1575             :             {
    1576           0 :                 cmptxt = palloc(len + 2);
    1577           0 :                 memcpy(cmptxt, workstr, len);
    1578           0 :                 cmptxt[len] = suffixchar;
    1579           0 :                 cmptxt[len + 1] = '\0';
    1580           0 :                 cmpstr = PointerGetDatum(cmptxt);
    1581             :             }
    1582             :             else
    1583             :             {
    1584           0 :                 cmptxt = palloc(VARHDRSZ + len + 1);
    1585           0 :                 SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
    1586           0 :                 memcpy(VARDATA(cmptxt), workstr, len);
    1587           0 :                 *(VARDATA(cmptxt) + len) = suffixchar;
    1588           0 :                 cmpstr = PointerGetDatum(cmptxt);
    1589             :             }
    1590             :         }
    1591             :     }
    1592             : 
    1593             :     /* Select appropriate character-incrementer function */
    1594        1392 :     if (datatype == BYTEAOID)
    1595           0 :         charinc = byte_increment;
    1596             :     else
    1597        1392 :         charinc = pg_database_encoding_character_incrementer();
    1598             : 
    1599             :     /* And search ... */
    1600        2784 :     while (len > 0)
    1601             :     {
    1602             :         int         charlen;
    1603             :         unsigned char *lastchar;
    1604             : 
    1605             :         /* Identify the last character --- for bytea, just the last byte */
    1606        1392 :         if (datatype == BYTEAOID)
    1607           0 :             charlen = 1;
    1608             :         else
    1609        1392 :             charlen = len - pg_mbcliplen(workstr, len, len - 1);
    1610        1392 :         lastchar = (unsigned char *) (workstr + len - charlen);
    1611             : 
    1612             :         /*
    1613             :          * Try to generate a larger string by incrementing the last character
    1614             :          * (for BYTEA, we treat each byte as a character).
    1615             :          *
    1616             :          * Note: the incrementer function is expected to return true if it's
    1617             :          * generated a valid-per-the-encoding new character, otherwise false.
    1618             :          * The contents of the character on false return are unspecified.
    1619             :          */
    1620        2784 :         while (charinc(lastchar, charlen))
    1621             :         {
    1622             :             Const      *workstr_const;
    1623             : 
    1624        1392 :             if (datatype == BYTEAOID)
    1625           0 :                 workstr_const = string_to_bytea_const(workstr, len);
    1626             :             else
    1627        1392 :                 workstr_const = string_to_const(workstr, datatype);
    1628             : 
    1629        1392 :             if (DatumGetBool(FunctionCall2Coll(ltproc,
    1630             :                                                collation,
    1631             :                                                cmpstr,
    1632             :                                                workstr_const->constvalue)))
    1633             :             {
    1634             :                 /* Successfully made a string larger than cmpstr */
    1635        1392 :                 if (cmptxt)
    1636           0 :                     pfree(cmptxt);
    1637        1392 :                 pfree(workstr);
    1638        1392 :                 return workstr_const;
    1639             :             }
    1640             : 
    1641             :             /* No good, release unusable value and try again */
    1642           0 :             pfree(DatumGetPointer(workstr_const->constvalue));
    1643           0 :             pfree(workstr_const);
    1644             :         }
    1645             : 
    1646             :         /*
    1647             :          * No luck here, so truncate off the last character and try to
    1648             :          * increment the next one.
    1649             :          */
    1650           0 :         len -= charlen;
    1651           0 :         workstr[len] = '\0';
    1652             :     }
    1653             : 
    1654             :     /* Failed... */
    1655           0 :     if (cmptxt)
    1656           0 :         pfree(cmptxt);
    1657           0 :     pfree(workstr);
    1658             : 
    1659           0 :     return NULL;
    1660             : }
    1661             : 
    1662             : /*
    1663             :  * Generate a Datum of the appropriate type from a C string.
    1664             :  * Note that all of the supported types are pass-by-ref, so the
    1665             :  * returned value should be pfree'd if no longer needed.
    1666             :  */
    1667             : static Datum
    1668       11802 : string_to_datum(const char *str, Oid datatype)
    1669             : {
    1670             :     Assert(str != NULL);
    1671             : 
    1672             :     /*
    1673             :      * We cheat a little by assuming that CStringGetTextDatum() will do for
    1674             :      * bpchar and varchar constants too...
    1675             :      */
    1676       11802 :     if (datatype == NAMEOID)
    1677        3884 :         return DirectFunctionCall1(namein, CStringGetDatum(str));
    1678        7918 :     else if (datatype == BYTEAOID)
    1679           0 :         return DirectFunctionCall1(byteain, CStringGetDatum(str));
    1680             :     else
    1681        7918 :         return CStringGetTextDatum(str);
    1682             : }
    1683             : 
    1684             : /*
    1685             :  * Generate a Const node of the appropriate type from a C string.
    1686             :  */
    1687             : static Const *
    1688       11802 : string_to_const(const char *str, Oid datatype)
    1689             : {
    1690       11802 :     Datum       conval = string_to_datum(str, datatype);
    1691             :     Oid         collation;
    1692             :     int         constlen;
    1693             : 
    1694             :     /*
    1695             :      * We only need to support a few datatypes here, so hard-wire properties
    1696             :      * instead of incurring the expense of catalog lookups.
    1697             :      */
    1698       11802 :     switch (datatype)
    1699             :     {
    1700             :         case TEXTOID:
    1701             :         case VARCHAROID:
    1702             :         case BPCHAROID:
    1703        7918 :             collation = DEFAULT_COLLATION_OID;
    1704        7918 :             constlen = -1;
    1705        7918 :             break;
    1706             : 
    1707             :         case NAMEOID:
    1708        3884 :             collation = C_COLLATION_OID;
    1709        3884 :             constlen = NAMEDATALEN;
    1710        3884 :             break;
    1711             : 
    1712             :         case BYTEAOID:
    1713           0 :             collation = InvalidOid;
    1714           0 :             constlen = -1;
    1715           0 :             break;
    1716             : 
    1717             :         default:
    1718           0 :             elog(ERROR, "unexpected datatype in string_to_const: %u",
    1719             :                  datatype);
    1720             :             return NULL;
    1721             :     }
    1722             : 
    1723       11802 :     return makeConst(datatype, -1, collation, constlen,
    1724             :                      conval, false, false);
    1725             : }
    1726             : 
    1727             : /*
    1728             :  * Generate a Const node of bytea type from a binary C string and a length.
    1729             :  */
    1730             : static Const *
    1731           8 : string_to_bytea_const(const char *str, size_t str_len)
    1732             : {
    1733           8 :     bytea      *bstr = palloc(VARHDRSZ + str_len);
    1734             :     Datum       conval;
    1735             : 
    1736           8 :     memcpy(VARDATA(bstr), str, str_len);
    1737           8 :     SET_VARSIZE(bstr, VARHDRSZ + str_len);
    1738           8 :     conval = PointerGetDatum(bstr);
    1739             : 
    1740           8 :     return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
    1741             : }

Generated by: LCOV version 1.13