LCOV - code coverage report
Current view: top level - src/backend/utils/adt - like_support.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16beta1 Lines: 464 560 82.9 %
Date: 2023-05-31 03:12:18 Functions: 32 41 78.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * like_support.c
       4             :  *    Planner support functions for LIKE, regex, and related operators.
       5             :  *
       6             :  * These routines handle special optimization of operators that can be
       7             :  * used with index scans even though they are not known to the executor's
       8             :  * indexscan machinery.  The key idea is that these operators allow us
       9             :  * to derive approximate indexscan qual clauses, such that any tuples
      10             :  * that pass the operator clause itself must also satisfy the simpler
      11             :  * indexscan condition(s).  Then we can use the indexscan machinery
      12             :  * to avoid scanning as much of the table as we'd otherwise have to,
      13             :  * while applying the original operator as a qpqual condition to ensure
      14             :  * we deliver only the tuples we want.  (In essence, we're using a regular
      15             :  * index as if it were a lossy index.)
      16             :  *
      17             :  * An example of what we're doing is
      18             :  *          textfield LIKE 'abc%def'
      19             :  * from which we can generate the indexscanable conditions
      20             :  *          textfield >= 'abc' AND textfield < 'abd'
      21             :  * which allow efficient scanning of an index on textfield.
      22             :  * (In reality, character set and collation issues make the transformation
      23             :  * from LIKE to indexscan limits rather harder than one might think ...
      24             :  * but that's the basic idea.)
      25             :  *
      26             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      27             :  * Portions Copyright (c) 1994, Regents of the University of California
      28             :  *
      29             :  *
      30             :  * IDENTIFICATION
      31             :  *    src/backend/utils/adt/like_support.c
      32             :  *
      33             :  *-------------------------------------------------------------------------
      34             :  */
      35             : #include "postgres.h"
      36             : 
      37             : #include <math.h>
      38             : 
      39             : #include "access/htup_details.h"
      40             : #include "access/stratnum.h"
      41             : #include "catalog/pg_collation.h"
      42             : #include "catalog/pg_operator.h"
      43             : #include "catalog/pg_opfamily.h"
      44             : #include "catalog/pg_statistic.h"
      45             : #include "catalog/pg_type.h"
      46             : #include "mb/pg_wchar.h"
      47             : #include "miscadmin.h"
      48             : #include "nodes/makefuncs.h"
      49             : #include "nodes/nodeFuncs.h"
      50             : #include "nodes/supportnodes.h"
      51             : #include "utils/builtins.h"
      52             : #include "utils/datum.h"
      53             : #include "utils/lsyscache.h"
      54             : #include "utils/pg_locale.h"
      55             : #include "utils/selfuncs.h"
      56             : #include "utils/varlena.h"
      57             : 
      58             : 
      59             : typedef enum
      60             : {
      61             :     Pattern_Type_Like,
      62             :     Pattern_Type_Like_IC,
      63             :     Pattern_Type_Regex,
      64             :     Pattern_Type_Regex_IC,
      65             :     Pattern_Type_Prefix
      66             : } Pattern_Type;
      67             : 
      68             : typedef enum
      69             : {
      70             :     Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
      71             : } Pattern_Prefix_Status;
      72             : 
      73             : static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
      74             : static List *match_pattern_prefix(Node *leftop,
      75             :                                   Node *rightop,
      76             :                                   Pattern_Type ptype,
      77             :                                   Oid expr_coll,
      78             :                                   Oid opfamily,
      79             :                                   Oid indexcollation);
      80             : static double patternsel_common(PlannerInfo *root,
      81             :                                 Oid oprid,
      82             :                                 Oid opfuncid,
      83             :                                 List *args,
      84             :                                 int varRelid,
      85             :                                 Oid collation,
      86             :                                 Pattern_Type ptype,
      87             :                                 bool negate);
      88             : static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
      89             :                                                   Pattern_Type ptype,
      90             :                                                   Oid collation,
      91             :                                                   Const **prefix,
      92             :                                                   Selectivity *rest_selec);
      93             : static Selectivity prefix_selectivity(PlannerInfo *root,
      94             :                                       VariableStatData *vardata,
      95             :                                       Oid eqopr, Oid ltopr, Oid geopr,
      96             :                                       Oid collation,
      97             :                                       Const *prefixcon);
      98             : static Selectivity like_selectivity(const char *patt, int pattlen,
      99             :                                     bool case_insensitive);
     100             : static Selectivity regex_selectivity(const char *patt, int pattlen,
     101             :                                      bool case_insensitive,
     102             :                                      int fixed_prefix_len);
     103             : static int  pattern_char_isalpha(char c, bool is_multibyte,
     104             :                                  pg_locale_t locale, bool locale_is_c);
     105             : static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
     106             :                                   Oid collation);
     107             : static Datum string_to_datum(const char *str, Oid datatype);
     108             : static Const *string_to_const(const char *str, Oid datatype);
     109             : static Const *string_to_bytea_const(const char *str, size_t str_len);
     110             : 
     111             : 
     112             : /*
     113             :  * Planner support functions for LIKE, regex, and related operators
     114             :  */
     115             : Datum
     116        5832 : textlike_support(PG_FUNCTION_ARGS)
     117             : {
     118        5832 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     119             : 
     120        5832 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
     121             : }
     122             : 
     123             : Datum
     124         358 : texticlike_support(PG_FUNCTION_ARGS)
     125             : {
     126         358 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     127             : 
     128         358 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
     129             : }
     130             : 
     131             : Datum
     132       18710 : textregexeq_support(PG_FUNCTION_ARGS)
     133             : {
     134       18710 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     135             : 
     136       18710 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
     137             : }
     138             : 
     139             : Datum
     140         118 : texticregexeq_support(PG_FUNCTION_ARGS)
     141             : {
     142         118 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     143             : 
     144         118 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
     145             : }
     146             : 
     147             : Datum
     148         156 : text_starts_with_support(PG_FUNCTION_ARGS)
     149             : {
     150         156 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     151             : 
     152         156 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix));
     153             : }
     154             : 
     155             : /* Common code for the above */
     156             : static Node *
     157       25174 : like_regex_support(Node *rawreq, Pattern_Type ptype)
     158             : {
     159       25174 :     Node       *ret = NULL;
     160             : 
     161       25174 :     if (IsA(rawreq, SupportRequestSelectivity))
     162             :     {
     163             :         /*
     164             :          * Make a selectivity estimate for a function call, just as we'd do if
     165             :          * the call was via the corresponding operator.
     166             :          */
     167          24 :         SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
     168             :         Selectivity s1;
     169             : 
     170          24 :         if (req->is_join)
     171             :         {
     172             :             /*
     173             :              * For the moment we just punt.  If patternjoinsel is ever
     174             :              * improved to do better, this should be made to call it.
     175             :              */
     176           0 :             s1 = DEFAULT_MATCH_SEL;
     177             :         }
     178             :         else
     179             :         {
     180             :             /* Share code with operator restriction selectivity functions */
     181          24 :             s1 = patternsel_common(req->root,
     182             :                                    InvalidOid,
     183             :                                    req->funcid,
     184             :                                    req->args,
     185             :                                    req->varRelid,
     186             :                                    req->inputcollid,
     187             :                                    ptype,
     188             :                                    false);
     189             :         }
     190          24 :         req->selectivity = s1;
     191          24 :         ret = (Node *) req;
     192             :     }
     193       25150 :     else if (IsA(rawreq, SupportRequestIndexCondition))
     194             :     {
     195             :         /* Try to convert operator/function call to index conditions */
     196        6866 :         SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
     197             : 
     198             :         /*
     199             :          * Currently we have no "reverse" match operators with the pattern on
     200             :          * the left, so we only need consider cases with the indexkey on the
     201             :          * left.
     202             :          */
     203        6866 :         if (req->indexarg != 0)
     204           0 :             return NULL;
     205             : 
     206        6866 :         if (is_opclause(req->node))
     207             :         {
     208        6842 :             OpExpr     *clause = (OpExpr *) req->node;
     209             : 
     210             :             Assert(list_length(clause->args) == 2);
     211             :             ret = (Node *)
     212        6842 :                 match_pattern_prefix((Node *) linitial(clause->args),
     213        6842 :                                      (Node *) lsecond(clause->args),
     214             :                                      ptype,
     215             :                                      clause->inputcollid,
     216             :                                      req->opfamily,
     217             :                                      req->indexcollation);
     218             :         }
     219          24 :         else if (is_funcclause(req->node))   /* be paranoid */
     220             :         {
     221          24 :             FuncExpr   *clause = (FuncExpr *) req->node;
     222             : 
     223             :             Assert(list_length(clause->args) == 2);
     224             :             ret = (Node *)
     225          24 :                 match_pattern_prefix((Node *) linitial(clause->args),
     226          24 :                                      (Node *) lsecond(clause->args),
     227             :                                      ptype,
     228             :                                      clause->inputcollid,
     229             :                                      req->opfamily,
     230             :                                      req->indexcollation);
     231             :         }
     232             :     }
     233             : 
     234       25174 :     return ret;
     235             : }
     236             : 
     237             : /*
     238             :  * match_pattern_prefix
     239             :  *    Try to generate an indexqual for a LIKE or regex operator.
     240             :  */
     241             : static List *
     242        6866 : match_pattern_prefix(Node *leftop,
     243             :                      Node *rightop,
     244             :                      Pattern_Type ptype,
     245             :                      Oid expr_coll,
     246             :                      Oid opfamily,
     247             :                      Oid indexcollation)
     248             : {
     249             :     List       *result;
     250             :     Const      *patt;
     251             :     Const      *prefix;
     252             :     Pattern_Prefix_Status pstatus;
     253             :     Oid         ldatatype;
     254             :     Oid         rdatatype;
     255             :     Oid         eqopr;
     256             :     Oid         ltopr;
     257             :     Oid         geopr;
     258        6866 :     Oid         preopr = InvalidOid;
     259             :     bool        collation_aware;
     260             :     Expr       *expr;
     261             :     FmgrInfo    ltproc;
     262             :     Const      *greaterstr;
     263             : 
     264             :     /*
     265             :      * Can't do anything with a non-constant or NULL pattern argument.
     266             :      *
     267             :      * Note that since we restrict ourselves to cases with a hard constant on
     268             :      * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
     269             :      * about verifying that.
     270             :      */
     271        6866 :     if (!IsA(rightop, Const) ||
     272        6766 :         ((Const *) rightop)->constisnull)
     273         100 :         return NIL;
     274        6766 :     patt = (Const *) rightop;
     275             : 
     276             :     /*
     277             :      * Not supported if the expression collation is nondeterministic.  The
     278             :      * optimized equality or prefix tests use bytewise comparisons, which is
     279             :      * not consistent with nondeterministic collations.  The actual
     280             :      * pattern-matching implementation functions will later error out that
     281             :      * pattern-matching is not supported with nondeterministic collations. (We
     282             :      * could also error out here, but by doing it later we get more precise
     283             :      * error messages.)  (It should be possible to support at least
     284             :      * Pattern_Prefix_Exact, but no point as long as the actual
     285             :      * pattern-matching implementations don't support it.)
     286             :      *
     287             :      * expr_coll is not set for a non-collation-aware data type such as bytea.
     288             :      */
     289        6766 :     if (expr_coll && !get_collation_isdeterministic(expr_coll))
     290          12 :         return NIL;
     291             : 
     292             :     /*
     293             :      * Try to extract a fixed prefix from the pattern.
     294             :      */
     295        6754 :     pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
     296             :                                    &prefix, NULL);
     297             : 
     298             :     /* fail if no fixed prefix */
     299        6754 :     if (pstatus == Pattern_Prefix_None)
     300         298 :         return NIL;
     301             : 
     302             :     /*
     303             :      * Identify the operators we want to use, based on the type of the
     304             :      * left-hand argument.  Usually these are just the type's regular
     305             :      * comparison operators, but if we are considering one of the semi-legacy
     306             :      * "pattern" opclasses, use the "pattern" operators instead.  Those are
     307             :      * not collation-sensitive but always use C collation, as we want.  The
     308             :      * selected operators also determine the needed type of the prefix
     309             :      * constant.
     310             :      */
     311        6456 :     ldatatype = exprType(leftop);
     312        6456 :     switch (ldatatype)
     313             :     {
     314          68 :         case TEXTOID:
     315          68 :             if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
     316             :             {
     317           0 :                 eqopr = TextEqualOperator;
     318           0 :                 ltopr = TextPatternLessOperator;
     319           0 :                 geopr = TextPatternGreaterEqualOperator;
     320           0 :                 collation_aware = false;
     321             :             }
     322          68 :             else if (opfamily == TEXT_SPGIST_FAM_OID)
     323             :             {
     324          24 :                 eqopr = TextEqualOperator;
     325          24 :                 ltopr = TextPatternLessOperator;
     326          24 :                 geopr = TextPatternGreaterEqualOperator;
     327             :                 /* This opfamily has direct support for prefixing */
     328          24 :                 preopr = TextPrefixOperator;
     329          24 :                 collation_aware = false;
     330             :             }
     331             :             else
     332             :             {
     333          44 :                 eqopr = TextEqualOperator;
     334          44 :                 ltopr = TextLessOperator;
     335          44 :                 geopr = TextGreaterEqualOperator;
     336          44 :                 collation_aware = true;
     337             :             }
     338          68 :             rdatatype = TEXTOID;
     339          68 :             break;
     340        6364 :         case NAMEOID:
     341             : 
     342             :             /*
     343             :              * Note that here, we need the RHS type to be text, so that the
     344             :              * comparison value isn't improperly truncated to NAMEDATALEN.
     345             :              */
     346        6364 :             eqopr = NameEqualTextOperator;
     347        6364 :             ltopr = NameLessTextOperator;
     348        6364 :             geopr = NameGreaterEqualTextOperator;
     349        6364 :             collation_aware = true;
     350        6364 :             rdatatype = TEXTOID;
     351        6364 :             break;
     352          24 :         case BPCHAROID:
     353          24 :             if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
     354             :             {
     355           0 :                 eqopr = BpcharEqualOperator;
     356           0 :                 ltopr = BpcharPatternLessOperator;
     357           0 :                 geopr = BpcharPatternGreaterEqualOperator;
     358           0 :                 collation_aware = false;
     359             :             }
     360             :             else
     361             :             {
     362          24 :                 eqopr = BpcharEqualOperator;
     363          24 :                 ltopr = BpcharLessOperator;
     364          24 :                 geopr = BpcharGreaterEqualOperator;
     365          24 :                 collation_aware = true;
     366             :             }
     367          24 :             rdatatype = BPCHAROID;
     368          24 :             break;
     369           0 :         case BYTEAOID:
     370           0 :             eqopr = ByteaEqualOperator;
     371           0 :             ltopr = ByteaLessOperator;
     372           0 :             geopr = ByteaGreaterEqualOperator;
     373           0 :             collation_aware = false;
     374           0 :             rdatatype = BYTEAOID;
     375           0 :             break;
     376           0 :         default:
     377             :             /* Can't get here unless we're attached to the wrong operator */
     378           0 :             return NIL;
     379             :     }
     380             : 
     381             :     /*
     382             :      * If necessary, coerce the prefix constant to the right type.  The given
     383             :      * prefix constant is either text or bytea type, therefore the only case
     384             :      * where we need to do anything is when converting text to bpchar.  Those
     385             :      * two types are binary-compatible, so relabeling the Const node is
     386             :      * sufficient.
     387             :      */
     388        6456 :     if (prefix->consttype != rdatatype)
     389             :     {
     390             :         Assert(prefix->consttype == TEXTOID &&
     391             :                rdatatype == BPCHAROID);
     392          24 :         prefix->consttype = rdatatype;
     393             :     }
     394             : 
     395             :     /*
     396             :      * If we found an exact-match pattern, generate an "=" indexqual.
     397             :      *
     398             :      * Here and below, check to see whether the desired operator is actually
     399             :      * supported by the index opclass, and fail quietly if not.  This allows
     400             :      * us to not be concerned with specific opclasses (except for the legacy
     401             :      * "pattern" cases); any index that correctly implements the operators
     402             :      * will work.
     403             :      */
     404        6456 :     if (pstatus == Pattern_Prefix_Exact)
     405             :     {
     406        5258 :         if (!op_in_opfamily(eqopr, opfamily))
     407          12 :             return NIL;
     408        5246 :         expr = make_opclause(eqopr, BOOLOID, false,
     409             :                              (Expr *) leftop, (Expr *) prefix,
     410             :                              InvalidOid, indexcollation);
     411        5246 :         result = list_make1(expr);
     412        5246 :         return result;
     413             :     }
     414             : 
     415             :     /*
     416             :      * Otherwise, we have a nonempty required prefix of the values.  Some
     417             :      * opclasses support prefix checks directly, otherwise we'll try to
     418             :      * generate a range constraint.
     419             :      */
     420        1198 :     if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
     421             :     {
     422          24 :         expr = make_opclause(preopr, BOOLOID, false,
     423             :                              (Expr *) leftop, (Expr *) prefix,
     424             :                              InvalidOid, indexcollation);
     425          24 :         result = list_make1(expr);
     426          24 :         return result;
     427             :     }
     428             : 
     429             :     /*
     430             :      * Since we need a range constraint, it's only going to work reliably if
     431             :      * the index is collation-insensitive or has "C" collation.  Note that
     432             :      * here we are looking at the index's collation, not the expression's
     433             :      * collation -- this test is *not* dependent on the LIKE/regex operator's
     434             :      * collation.
     435             :      */
     436        1174 :     if (collation_aware &&
     437        1174 :         !lc_collate_is_c(indexcollation))
     438           8 :         return NIL;
     439             : 
     440             :     /*
     441             :      * We can always say "x >= prefix".
     442             :      */
     443        1166 :     if (!op_in_opfamily(geopr, opfamily))
     444          12 :         return NIL;
     445        1154 :     expr = make_opclause(geopr, BOOLOID, false,
     446             :                          (Expr *) leftop, (Expr *) prefix,
     447             :                          InvalidOid, indexcollation);
     448        1154 :     result = list_make1(expr);
     449             : 
     450             :     /*-------
     451             :      * If we can create a string larger than the prefix, we can say
     452             :      * "x < greaterstr".  NB: we rely on make_greater_string() to generate
     453             :      * a guaranteed-greater string, not just a probably-greater string.
     454             :      * In general this is only guaranteed in C locale, so we'd better be
     455             :      * using a C-locale index collation.
     456             :      *-------
     457             :      */
     458        1154 :     if (!op_in_opfamily(ltopr, opfamily))
     459           0 :         return result;
     460        1154 :     fmgr_info(get_opcode(ltopr), &ltproc);
     461        1154 :     greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
     462        1154 :     if (greaterstr)
     463             :     {
     464        1154 :         expr = make_opclause(ltopr, BOOLOID, false,
     465             :                              (Expr *) leftop, (Expr *) greaterstr,
     466             :                              InvalidOid, indexcollation);
     467        1154 :         result = lappend(result, expr);
     468             :     }
     469             : 
     470        1154 :     return result;
     471             : }
     472             : 
     473             : 
     474             : /*
     475             :  * patternsel_common - generic code for pattern-match restriction selectivity.
     476             :  *
     477             :  * To support using this from either the operator or function paths, caller
     478             :  * may pass either operator OID or underlying function OID; we look up the
     479             :  * latter from the former if needed.  (We could just have patternsel() call
     480             :  * get_opcode(), but the work would be wasted if we don't have a need to
     481             :  * compare a fixed prefix to the pg_statistic data.)
     482             :  *
     483             :  * Note that oprid and/or opfuncid should be for the positive-match operator
     484             :  * even when negate is true.
     485             :  */
     486             : static double
     487        9416 : patternsel_common(PlannerInfo *root,
     488             :                   Oid oprid,
     489             :                   Oid opfuncid,
     490             :                   List *args,
     491             :                   int varRelid,
     492             :                   Oid collation,
     493             :                   Pattern_Type ptype,
     494             :                   bool negate)
     495             : {
     496             :     VariableStatData vardata;
     497             :     Node       *other;
     498             :     bool        varonleft;
     499             :     Datum       constval;
     500             :     Oid         consttype;
     501             :     Oid         vartype;
     502             :     Oid         rdatatype;
     503             :     Oid         eqopr;
     504             :     Oid         ltopr;
     505             :     Oid         geopr;
     506             :     Pattern_Prefix_Status pstatus;
     507             :     Const      *patt;
     508        9416 :     Const      *prefix = NULL;
     509        9416 :     Selectivity rest_selec = 0;
     510        9416 :     double      nullfrac = 0.0;
     511             :     double      result;
     512             : 
     513             :     /*
     514             :      * Initialize result to the appropriate default estimate depending on
     515             :      * whether it's a match or not-match operator.
     516             :      */
     517        9416 :     if (negate)
     518         938 :         result = 1.0 - DEFAULT_MATCH_SEL;
     519             :     else
     520        8478 :         result = DEFAULT_MATCH_SEL;
     521             : 
     522             :     /*
     523             :      * If expression is not variable op constant, then punt and return the
     524             :      * default estimate.
     525             :      */
     526        9416 :     if (!get_restriction_variable(root, args, varRelid,
     527             :                                   &vardata, &other, &varonleft))
     528         136 :         return result;
     529        9280 :     if (!varonleft || !IsA(other, Const))
     530             :     {
     531          40 :         ReleaseVariableStats(vardata);
     532          40 :         return result;
     533             :     }
     534             : 
     535             :     /*
     536             :      * If the constant is NULL, assume operator is strict and return zero, ie,
     537             :      * operator will never return TRUE.  (It's zero even for a negator op.)
     538             :      */
     539        9240 :     if (((Const *) other)->constisnull)
     540             :     {
     541           0 :         ReleaseVariableStats(vardata);
     542           0 :         return 0.0;
     543             :     }
     544        9240 :     constval = ((Const *) other)->constvalue;
     545        9240 :     consttype = ((Const *) other)->consttype;
     546             : 
     547             :     /*
     548             :      * The right-hand const is type text or bytea for all supported operators.
     549             :      * We do not expect to see binary-compatible types here, since
     550             :      * const-folding should have relabeled the const to exactly match the
     551             :      * operator's declared type.
     552             :      */
     553        9240 :     if (consttype != TEXTOID && consttype != BYTEAOID)
     554             :     {
     555          24 :         ReleaseVariableStats(vardata);
     556          24 :         return result;
     557             :     }
     558             : 
     559             :     /*
     560             :      * Similarly, the exposed type of the left-hand side should be one of
     561             :      * those we know.  (Do not look at vardata.atttype, which might be
     562             :      * something binary-compatible but different.)  We can use it to identify
     563             :      * the comparison operators and the required type of the comparison
     564             :      * constant, much as in match_pattern_prefix().
     565             :      */
     566        9216 :     vartype = vardata.vartype;
     567             : 
     568        9216 :     switch (vartype)
     569             :     {
     570        1830 :         case TEXTOID:
     571        1830 :             eqopr = TextEqualOperator;
     572        1830 :             ltopr = TextLessOperator;
     573        1830 :             geopr = TextGreaterEqualOperator;
     574        1830 :             rdatatype = TEXTOID;
     575        1830 :             break;
     576        7292 :         case NAMEOID:
     577             : 
     578             :             /*
     579             :              * Note that here, we need the RHS type to be text, so that the
     580             :              * comparison value isn't improperly truncated to NAMEDATALEN.
     581             :              */
     582        7292 :             eqopr = NameEqualTextOperator;
     583        7292 :             ltopr = NameLessTextOperator;
     584        7292 :             geopr = NameGreaterEqualTextOperator;
     585        7292 :             rdatatype = TEXTOID;
     586        7292 :             break;
     587          84 :         case BPCHAROID:
     588          84 :             eqopr = BpcharEqualOperator;
     589          84 :             ltopr = BpcharLessOperator;
     590          84 :             geopr = BpcharGreaterEqualOperator;
     591          84 :             rdatatype = BPCHAROID;
     592          84 :             break;
     593           6 :         case BYTEAOID:
     594           6 :             eqopr = ByteaEqualOperator;
     595           6 :             ltopr = ByteaLessOperator;
     596           6 :             geopr = ByteaGreaterEqualOperator;
     597           6 :             rdatatype = BYTEAOID;
     598           6 :             break;
     599           4 :         default:
     600             :             /* Can't get here unless we're attached to the wrong operator */
     601           4 :             ReleaseVariableStats(vardata);
     602           4 :             return result;
     603             :     }
     604             : 
     605             :     /*
     606             :      * Grab the nullfrac for use below.
     607             :      */
     608        9212 :     if (HeapTupleIsValid(vardata.statsTuple))
     609             :     {
     610             :         Form_pg_statistic stats;
     611             : 
     612        6938 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     613        6938 :         nullfrac = stats->stanullfrac;
     614             :     }
     615             : 
     616             :     /*
     617             :      * Pull out any fixed prefix implied by the pattern, and estimate the
     618             :      * fractional selectivity of the remainder of the pattern.  Unlike many
     619             :      * other selectivity estimators, we use the pattern operator's actual
     620             :      * collation for this step.  This is not because we expect the collation
     621             :      * to make a big difference in the selectivity estimate (it seldom would),
     622             :      * but because we want to be sure we cache compiled regexps under the
     623             :      * right cache key, so that they can be re-used at runtime.
     624             :      */
     625        9212 :     patt = (Const *) other;
     626        9212 :     pstatus = pattern_fixed_prefix(patt, ptype, collation,
     627             :                                    &prefix, &rest_selec);
     628             : 
     629             :     /*
     630             :      * If necessary, coerce the prefix constant to the right type.  The only
     631             :      * case where we need to do anything is when converting text to bpchar.
     632             :      * Those two types are binary-compatible, so relabeling the Const node is
     633             :      * sufficient.
     634             :      */
     635        9188 :     if (prefix && prefix->consttype != rdatatype)
     636             :     {
     637             :         Assert(prefix->consttype == TEXTOID &&
     638             :                rdatatype == BPCHAROID);
     639          36 :         prefix->consttype = rdatatype;
     640             :     }
     641             : 
     642        9188 :     if (pstatus == Pattern_Prefix_Exact)
     643             :     {
     644             :         /*
     645             :          * Pattern specifies an exact match, so estimate as for '='
     646             :          */
     647        5418 :         result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
     648             :                               false, true, false);
     649             :     }
     650             :     else
     651             :     {
     652             :         /*
     653             :          * Not exact-match pattern.  If we have a sufficiently large
     654             :          * histogram, estimate selectivity for the histogram part of the
     655             :          * population by counting matches in the histogram.  If not, estimate
     656             :          * selectivity of the fixed prefix and remainder of pattern
     657             :          * separately, then combine the two to get an estimate of the
     658             :          * selectivity for the part of the column population represented by
     659             :          * the histogram.  (For small histograms, we combine these
     660             :          * approaches.)
     661             :          *
     662             :          * We then add up data for any most-common-values values; these are
     663             :          * not in the histogram population, and we can get exact answers for
     664             :          * them by applying the pattern operator, so there's no reason to
     665             :          * approximate.  (If the MCVs cover a significant part of the total
     666             :          * population, this gives us a big leg up in accuracy.)
     667             :          */
     668             :         Selectivity selec;
     669             :         int         hist_size;
     670             :         FmgrInfo    opproc;
     671             :         double      mcv_selec,
     672             :                     sumcommon;
     673             : 
     674             :         /* Try to use the histogram entries to get selectivity */
     675        3770 :         if (!OidIsValid(opfuncid))
     676        3746 :             opfuncid = get_opcode(oprid);
     677        3770 :         fmgr_info(opfuncid, &opproc);
     678             : 
     679        3770 :         selec = histogram_selectivity(&vardata, &opproc, collation,
     680             :                                       constval, true,
     681             :                                       10, 1, &hist_size);
     682             : 
     683             :         /* If not at least 100 entries, use the heuristic method */
     684        3770 :         if (hist_size < 100)
     685             :         {
     686             :             Selectivity heursel;
     687             :             Selectivity prefixsel;
     688             : 
     689        2532 :             if (pstatus == Pattern_Prefix_Partial)
     690        1830 :                 prefixsel = prefix_selectivity(root, &vardata,
     691             :                                                eqopr, ltopr, geopr,
     692             :                                                collation,
     693             :                                                prefix);
     694             :             else
     695         702 :                 prefixsel = 1.0;
     696        2532 :             heursel = prefixsel * rest_selec;
     697             : 
     698        2532 :             if (selec < 0)       /* fewer than 10 histogram entries? */
     699        2304 :                 selec = heursel;
     700             :             else
     701             :             {
     702             :                 /*
     703             :                  * For histogram sizes from 10 to 100, we combine the
     704             :                  * histogram and heuristic selectivities, putting increasingly
     705             :                  * more trust in the histogram for larger sizes.
     706             :                  */
     707         228 :                 double      hist_weight = hist_size / 100.0;
     708             : 
     709         228 :                 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
     710             :             }
     711             :         }
     712             : 
     713             :         /* In any case, don't believe extremely small or large estimates. */
     714        3770 :         if (selec < 0.0001)
     715        1274 :             selec = 0.0001;
     716        2496 :         else if (selec > 0.9999)
     717         134 :             selec = 0.9999;
     718             : 
     719             :         /*
     720             :          * If we have most-common-values info, add up the fractions of the MCV
     721             :          * entries that satisfy MCV OP PATTERN.  These fractions contribute
     722             :          * directly to the result selectivity.  Also add up the total fraction
     723             :          * represented by MCV entries.
     724             :          */
     725        3770 :         mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
     726             :                                     constval, true,
     727             :                                     &sumcommon);
     728             : 
     729             :         /*
     730             :          * Now merge the results from the MCV and histogram calculations,
     731             :          * realizing that the histogram covers only the non-null values that
     732             :          * are not listed in MCV.
     733             :          */
     734        3770 :         selec *= 1.0 - nullfrac - sumcommon;
     735        3770 :         selec += mcv_selec;
     736        3770 :         result = selec;
     737             :     }
     738             : 
     739             :     /* now adjust if we wanted not-match rather than match */
     740        9188 :     if (negate)
     741         826 :         result = 1.0 - result - nullfrac;
     742             : 
     743             :     /* result should be in range, but make sure... */
     744        9188 :     CLAMP_PROBABILITY(result);
     745             : 
     746        9188 :     if (prefix)
     747             :     {
     748        8618 :         pfree(DatumGetPointer(prefix->constvalue));
     749        8618 :         pfree(prefix);
     750             :     }
     751             : 
     752        9188 :     ReleaseVariableStats(vardata);
     753             : 
     754        9188 :     return result;
     755             : }
     756             : 
     757             : /*
     758             :  * Fix impedance mismatch between SQL-callable functions and patternsel_common
     759             :  */
     760             : static double
     761        9392 : patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
     762             : {
     763        9392 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     764        9392 :     Oid         operator = PG_GETARG_OID(1);
     765        9392 :     List       *args = (List *) PG_GETARG_POINTER(2);
     766        9392 :     int         varRelid = PG_GETARG_INT32(3);
     767        9392 :     Oid         collation = PG_GET_COLLATION();
     768             : 
     769             :     /*
     770             :      * If this is for a NOT LIKE or similar operator, get the corresponding
     771             :      * positive-match operator and work with that.
     772             :      */
     773        9392 :     if (negate)
     774             :     {
     775         938 :         operator = get_negator(operator);
     776         938 :         if (!OidIsValid(operator))
     777           0 :             elog(ERROR, "patternsel called for operator without a negator");
     778             :     }
     779             : 
     780        9392 :     return patternsel_common(root,
     781             :                              operator,
     782             :                              InvalidOid,
     783             :                              args,
     784             :                              varRelid,
     785             :                              collation,
     786             :                              ptype,
     787             :                              negate);
     788             : }
     789             : 
     790             : /*
     791             :  *      regexeqsel      - Selectivity of regular-expression pattern match.
     792             :  */
     793             : Datum
     794        6244 : regexeqsel(PG_FUNCTION_ARGS)
     795             : {
     796        6244 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
     797             : }
     798             : 
     799             : /*
     800             :  *      icregexeqsel    - Selectivity of case-insensitive regex match.
     801             :  */
     802             : Datum
     803          64 : icregexeqsel(PG_FUNCTION_ARGS)
     804             : {
     805          64 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
     806             : }
     807             : 
     808             : /*
     809             :  *      likesel         - Selectivity of LIKE pattern match.
     810             :  */
     811             : Datum
     812        1964 : likesel(PG_FUNCTION_ARGS)
     813             : {
     814        1964 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
     815             : }
     816             : 
     817             : /*
     818             :  *      prefixsel           - selectivity of prefix operator
     819             :  */
     820             : Datum
     821          54 : prefixsel(PG_FUNCTION_ARGS)
     822             : {
     823          54 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
     824             : }
     825             : 
     826             : /*
     827             :  *
     828             :  *      iclikesel           - Selectivity of ILIKE pattern match.
     829             :  */
     830             : Datum
     831         128 : iclikesel(PG_FUNCTION_ARGS)
     832             : {
     833         128 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
     834             : }
     835             : 
     836             : /*
     837             :  *      regexnesel      - Selectivity of regular-expression pattern non-match.
     838             :  */
     839             : Datum
     840         784 : regexnesel(PG_FUNCTION_ARGS)
     841             : {
     842         784 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
     843             : }
     844             : 
     845             : /*
     846             :  *      icregexnesel    - Selectivity of case-insensitive regex non-match.
     847             :  */
     848             : Datum
     849          16 : icregexnesel(PG_FUNCTION_ARGS)
     850             : {
     851          16 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
     852             : }
     853             : 
     854             : /*
     855             :  *      nlikesel        - Selectivity of LIKE pattern non-match.
     856             :  */
     857             : Datum
     858         130 : nlikesel(PG_FUNCTION_ARGS)
     859             : {
     860         130 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
     861             : }
     862             : 
     863             : /*
     864             :  *      icnlikesel      - Selectivity of ILIKE pattern non-match.
     865             :  */
     866             : Datum
     867           8 : icnlikesel(PG_FUNCTION_ARGS)
     868             : {
     869           8 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
     870             : }
     871             : 
     872             : /*
     873             :  * patternjoinsel       - Generic code for pattern-match join selectivity.
     874             :  */
     875             : static double
     876         182 : patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
     877             : {
     878             :     /* For the moment we just punt. */
     879         182 :     return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
     880             : }
     881             : 
     882             : /*
     883             :  *      regexeqjoinsel  - Join selectivity of regular-expression pattern match.
     884             :  */
     885             : Datum
     886         182 : regexeqjoinsel(PG_FUNCTION_ARGS)
     887             : {
     888         182 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
     889             : }
     890             : 
     891             : /*
     892             :  *      icregexeqjoinsel    - Join selectivity of case-insensitive regex match.
     893             :  */
     894             : Datum
     895           0 : icregexeqjoinsel(PG_FUNCTION_ARGS)
     896             : {
     897           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
     898             : }
     899             : 
     900             : /*
     901             :  *      likejoinsel         - Join selectivity of LIKE pattern match.
     902             :  */
     903             : Datum
     904           0 : likejoinsel(PG_FUNCTION_ARGS)
     905             : {
     906           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
     907             : }
     908             : 
     909             : /*
     910             :  *      prefixjoinsel           - Join selectivity of prefix operator
     911             :  */
     912             : Datum
     913           0 : prefixjoinsel(PG_FUNCTION_ARGS)
     914             : {
     915           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
     916             : }
     917             : 
     918             : /*
     919             :  *      iclikejoinsel           - Join selectivity of ILIKE pattern match.
     920             :  */
     921             : Datum
     922           0 : iclikejoinsel(PG_FUNCTION_ARGS)
     923             : {
     924           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
     925             : }
     926             : 
     927             : /*
     928             :  *      regexnejoinsel  - Join selectivity of regex non-match.
     929             :  */
     930             : Datum
     931           0 : regexnejoinsel(PG_FUNCTION_ARGS)
     932             : {
     933           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
     934             : }
     935             : 
     936             : /*
     937             :  *      icregexnejoinsel    - Join selectivity of case-insensitive regex non-match.
     938             :  */
     939             : Datum
     940           0 : icregexnejoinsel(PG_FUNCTION_ARGS)
     941             : {
     942           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
     943             : }
     944             : 
     945             : /*
     946             :  *      nlikejoinsel        - Join selectivity of LIKE pattern non-match.
     947             :  */
     948             : Datum
     949           0 : nlikejoinsel(PG_FUNCTION_ARGS)
     950             : {
     951           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
     952             : }
     953             : 
     954             : /*
     955             :  *      icnlikejoinsel      - Join selectivity of ILIKE pattern non-match.
     956             :  */
     957             : Datum
     958           0 : icnlikejoinsel(PG_FUNCTION_ARGS)
     959             : {
     960           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
     961             : }
     962             : 
     963             : 
     964             : /*-------------------------------------------------------------------------
     965             :  *
     966             :  * Pattern analysis functions
     967             :  *
     968             :  * These routines support analysis of LIKE and regular-expression patterns
     969             :  * by the planner/optimizer.  It's important that they agree with the
     970             :  * regular-expression code in backend/regex/ and the LIKE code in
     971             :  * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
     972             :  * must be conservative: if we report a string longer than the true fixed
     973             :  * prefix, the query may produce actually wrong answers, rather than just
     974             :  * getting a bad selectivity estimate!
     975             :  *
     976             :  *-------------------------------------------------------------------------
     977             :  */
     978             : 
     979             : /*
     980             :  * Extract the fixed prefix, if any, for a pattern.
     981             :  *
     982             :  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
     983             :  *  or to NULL if no fixed prefix exists for the pattern.
     984             :  * If rest_selec is not NULL, *rest_selec is set to an estimate of the
     985             :  *  selectivity of the remainder of the pattern (without any fixed prefix).
     986             :  * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
     987             :  *
     988             :  * The return value distinguishes no fixed prefix, a partial prefix,
     989             :  * or an exact-match-only pattern.
     990             :  */
     991             : 
     992             : static Pattern_Prefix_Status
     993        3314 : like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
     994             :                   Const **prefix_const, Selectivity *rest_selec)
     995             : {
     996             :     char       *match;
     997             :     char       *patt;
     998             :     int         pattlen;
     999        3314 :     Oid         typeid = patt_const->consttype;
    1000             :     int         pos,
    1001             :                 match_pos;
    1002        3314 :     bool        is_multibyte = (pg_database_encoding_max_length() > 1);
    1003        3314 :     pg_locale_t locale = 0;
    1004        3314 :     bool        locale_is_c = false;
    1005             : 
    1006             :     /* the right-hand const is type text or bytea */
    1007             :     Assert(typeid == BYTEAOID || typeid == TEXTOID);
    1008             : 
    1009        3314 :     if (case_insensitive)
    1010             :     {
    1011         238 :         if (typeid == BYTEAOID)
    1012           0 :             ereport(ERROR,
    1013             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1014             :                      errmsg("case insensitive matching not supported on type bytea")));
    1015             : 
    1016         238 :         if (!OidIsValid(collation))
    1017             :         {
    1018             :             /*
    1019             :              * This typically means that the parser could not resolve a
    1020             :              * conflict of implicit collations, so report it that way.
    1021             :              */
    1022           0 :             ereport(ERROR,
    1023             :                     (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1024             :                      errmsg("could not determine which collation to use for ILIKE"),
    1025             :                      errhint("Use the COLLATE clause to set the collation explicitly.")));
    1026             :         }
    1027             : 
    1028             :         /* If case-insensitive, we need locale info */
    1029         238 :         if (lc_ctype_is_c(collation))
    1030         104 :             locale_is_c = true;
    1031             :         else
    1032         134 :             locale = pg_newlocale_from_collation(collation);
    1033             :     }
    1034             : 
    1035        3314 :     if (typeid != BYTEAOID)
    1036             :     {
    1037        3302 :         patt = TextDatumGetCString(patt_const->constvalue);
    1038        3302 :         pattlen = strlen(patt);
    1039             :     }
    1040             :     else
    1041             :     {
    1042          12 :         bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
    1043             : 
    1044          12 :         pattlen = VARSIZE_ANY_EXHDR(bstr);
    1045          12 :         patt = (char *) palloc(pattlen);
    1046          12 :         memcpy(patt, VARDATA_ANY(bstr), pattlen);
    1047             :         Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
    1048             :     }
    1049             : 
    1050        3314 :     match = palloc(pattlen + 1);
    1051        3314 :     match_pos = 0;
    1052       20726 :     for (pos = 0; pos < pattlen; pos++)
    1053             :     {
    1054             :         /* % and _ are wildcard characters in LIKE */
    1055       20638 :         if (patt[pos] == '%' ||
    1056       18558 :             patt[pos] == '_')
    1057             :             break;
    1058             : 
    1059             :         /* Backslash escapes the next character */
    1060       17586 :         if (patt[pos] == '\\')
    1061             :         {
    1062         262 :             pos++;
    1063         262 :             if (pos >= pattlen)
    1064           0 :                 break;
    1065             :         }
    1066             : 
    1067             :         /* Stop if case-varying character (it's sort of a wildcard) */
    1068       17856 :         if (case_insensitive &&
    1069         270 :             pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
    1070         174 :             break;
    1071             : 
    1072       17412 :         match[match_pos++] = patt[pos];
    1073             :     }
    1074             : 
    1075        3314 :     match[match_pos] = '\0';
    1076             : 
    1077        3314 :     if (typeid != BYTEAOID)
    1078        3302 :         *prefix_const = string_to_const(match, typeid);
    1079             :     else
    1080          12 :         *prefix_const = string_to_bytea_const(match, match_pos);
    1081             : 
    1082        3314 :     if (rest_selec != NULL)
    1083        2214 :         *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
    1084             :                                        case_insensitive);
    1085             : 
    1086        3314 :     pfree(patt);
    1087        3314 :     pfree(match);
    1088             : 
    1089             :     /* in LIKE, an empty pattern is an exact match! */
    1090        3314 :     if (pos == pattlen)
    1091          88 :         return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
    1092             : 
    1093        3226 :     if (match_pos > 0)
    1094        2708 :         return Pattern_Prefix_Partial;
    1095             : 
    1096         518 :     return Pattern_Prefix_None;
    1097             : }
    1098             : 
    1099             : static Pattern_Prefix_Status
    1100       12550 : regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
    1101             :                    Const **prefix_const, Selectivity *rest_selec)
    1102             : {
    1103       12550 :     Oid         typeid = patt_const->consttype;
    1104             :     char       *prefix;
    1105             :     bool        exact;
    1106             : 
    1107             :     /*
    1108             :      * Should be unnecessary, there are no bytea regex operators defined. As
    1109             :      * such, it should be noted that the rest of this function has *not* been
    1110             :      * made safe for binary (possibly NULL containing) strings.
    1111             :      */
    1112       12550 :     if (typeid == BYTEAOID)
    1113           0 :         ereport(ERROR,
    1114             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1115             :                  errmsg("regular-expression matching not supported on type bytea")));
    1116             : 
    1117             :     /* Use the regexp machinery to extract the prefix, if any */
    1118       12550 :     prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
    1119             :                                  case_insensitive, collation,
    1120             :                                  &exact);
    1121             : 
    1122       12526 :     if (prefix == NULL)
    1123             :     {
    1124         692 :         *prefix_const = NULL;
    1125             : 
    1126         692 :         if (rest_selec != NULL)
    1127             :         {
    1128         570 :             char       *patt = TextDatumGetCString(patt_const->constvalue);
    1129             : 
    1130         570 :             *rest_selec = regex_selectivity(patt, strlen(patt),
    1131             :                                             case_insensitive,
    1132             :                                             0);
    1133         570 :             pfree(patt);
    1134             :         }
    1135             : 
    1136         692 :         return Pattern_Prefix_None;
    1137             :     }
    1138             : 
    1139       11834 :     *prefix_const = string_to_const(prefix, typeid);
    1140             : 
    1141       11834 :     if (rest_selec != NULL)
    1142             :     {
    1143        6326 :         if (exact)
    1144             :         {
    1145             :             /* Exact match, so there's no additional selectivity */
    1146        5366 :             *rest_selec = 1.0;
    1147             :         }
    1148             :         else
    1149             :         {
    1150         960 :             char       *patt = TextDatumGetCString(patt_const->constvalue);
    1151             : 
    1152        1920 :             *rest_selec = regex_selectivity(patt, strlen(patt),
    1153             :                                             case_insensitive,
    1154         960 :                                             strlen(prefix));
    1155         960 :             pfree(patt);
    1156             :         }
    1157             :     }
    1158             : 
    1159       11834 :     pfree(prefix);
    1160             : 
    1161       11834 :     if (exact)
    1162       10588 :         return Pattern_Prefix_Exact;    /* pattern specifies exact match */
    1163             :     else
    1164        1246 :         return Pattern_Prefix_Partial;
    1165             : }
    1166             : 
    1167             : static Pattern_Prefix_Status
    1168       15966 : pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
    1169             :                      Const **prefix, Selectivity *rest_selec)
    1170             : {
    1171             :     Pattern_Prefix_Status result;
    1172             : 
    1173       15966 :     switch (ptype)
    1174             :     {
    1175        3076 :         case Pattern_Type_Like:
    1176        3076 :             result = like_fixed_prefix(patt, false, collation,
    1177             :                                        prefix, rest_selec);
    1178        3076 :             break;
    1179         238 :         case Pattern_Type_Like_IC:
    1180         238 :             result = like_fixed_prefix(patt, true, collation,
    1181             :                                        prefix, rest_selec);
    1182         238 :             break;
    1183       12488 :         case Pattern_Type_Regex:
    1184       12488 :             result = regex_fixed_prefix(patt, false, collation,
    1185             :                                         prefix, rest_selec);
    1186       12464 :             break;
    1187          62 :         case Pattern_Type_Regex_IC:
    1188          62 :             result = regex_fixed_prefix(patt, true, collation,
    1189             :                                         prefix, rest_selec);
    1190          62 :             break;
    1191         102 :         case Pattern_Type_Prefix:
    1192             :             /* Prefix type work is trivial.  */
    1193         102 :             result = Pattern_Prefix_Partial;
    1194         102 :             *prefix = makeConst(patt->consttype,
    1195             :                                 patt->consttypmod,
    1196             :                                 patt->constcollid,
    1197             :                                 patt->constlen,
    1198             :                                 datumCopy(patt->constvalue,
    1199         102 :                                           patt->constbyval,
    1200             :                                           patt->constlen),
    1201         102 :                                 patt->constisnull,
    1202         102 :                                 patt->constbyval);
    1203         102 :             if (rest_selec != NULL)
    1204          78 :                 *rest_selec = 1.0;  /* all */
    1205         102 :             break;
    1206           0 :         default:
    1207           0 :             elog(ERROR, "unrecognized ptype: %d", (int) ptype);
    1208             :             result = Pattern_Prefix_None;   /* keep compiler quiet */
    1209             :             break;
    1210             :     }
    1211       15942 :     return result;
    1212             : }
    1213             : 
    1214             : /*
    1215             :  * Estimate the selectivity of a fixed prefix for a pattern match.
    1216             :  *
    1217             :  * A fixed prefix "foo" is estimated as the selectivity of the expression
    1218             :  * "variable >= 'foo' AND variable < 'fop'".
    1219             :  *
    1220             :  * The selectivity estimate is with respect to the portion of the column
    1221             :  * population represented by the histogram --- the caller must fold this
    1222             :  * together with info about MCVs and NULLs.
    1223             :  *
    1224             :  * We use the given comparison operators and collation to do the estimation.
    1225             :  * The given variable and Const must be of the associated datatype(s).
    1226             :  *
    1227             :  * XXX Note: we make use of the upper bound to estimate operator selectivity
    1228             :  * even if the locale is such that we cannot rely on the upper-bound string.
    1229             :  * The selectivity only needs to be approximately right anyway, so it seems
    1230             :  * more useful to use the upper-bound code than not.
    1231             :  */
    1232             : static Selectivity
    1233        1830 : prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
    1234             :                    Oid eqopr, Oid ltopr, Oid geopr,
    1235             :                    Oid collation,
    1236             :                    Const *prefixcon)
    1237             : {
    1238             :     Selectivity prefixsel;
    1239             :     FmgrInfo    opproc;
    1240             :     Const      *greaterstrcon;
    1241             :     Selectivity eq_sel;
    1242             : 
    1243             :     /* Estimate the selectivity of "x >= prefix" */
    1244        1830 :     fmgr_info(get_opcode(geopr), &opproc);
    1245             : 
    1246        1830 :     prefixsel = ineq_histogram_selectivity(root, vardata,
    1247             :                                            geopr, &opproc, true, true,
    1248             :                                            collation,
    1249             :                                            prefixcon->constvalue,
    1250             :                                            prefixcon->consttype);
    1251             : 
    1252        1830 :     if (prefixsel < 0.0)
    1253             :     {
    1254             :         /* No histogram is present ... return a suitable default estimate */
    1255        1052 :         return DEFAULT_MATCH_SEL;
    1256             :     }
    1257             : 
    1258             :     /*
    1259             :      * If we can create a string larger than the prefix, say "x < greaterstr".
    1260             :      */
    1261         778 :     fmgr_info(get_opcode(ltopr), &opproc);
    1262         778 :     greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
    1263         778 :     if (greaterstrcon)
    1264             :     {
    1265             :         Selectivity topsel;
    1266             : 
    1267         778 :         topsel = ineq_histogram_selectivity(root, vardata,
    1268             :                                             ltopr, &opproc, false, false,
    1269             :                                             collation,
    1270             :                                             greaterstrcon->constvalue,
    1271             :                                             greaterstrcon->consttype);
    1272             : 
    1273             :         /* ineq_histogram_selectivity worked before, it shouldn't fail now */
    1274             :         Assert(topsel >= 0.0);
    1275             : 
    1276             :         /*
    1277             :          * Merge the two selectivities in the same way as for a range query
    1278             :          * (see clauselist_selectivity()).  Note that we don't need to worry
    1279             :          * about double-exclusion of nulls, since ineq_histogram_selectivity
    1280             :          * doesn't count those anyway.
    1281             :          */
    1282         778 :         prefixsel = topsel + prefixsel - 1.0;
    1283             :     }
    1284             : 
    1285             :     /*
    1286             :      * If the prefix is long then the two bounding values might be too close
    1287             :      * together for the histogram to distinguish them usefully, resulting in a
    1288             :      * zero estimate (plus or minus roundoff error). To avoid returning a
    1289             :      * ridiculously small estimate, compute the estimated selectivity for
    1290             :      * "variable = 'foo'", and clamp to that. (Obviously, the resultant
    1291             :      * estimate should be at least that.)
    1292             :      *
    1293             :      * We apply this even if we couldn't make a greater string.  That case
    1294             :      * suggests that the prefix is near the maximum possible, and thus
    1295             :      * probably off the end of the histogram, and thus we probably got a very
    1296             :      * small estimate from the >= condition; so we still need to clamp.
    1297             :      */
    1298         778 :     eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
    1299             :                           false, true, false);
    1300             : 
    1301         778 :     prefixsel = Max(prefixsel, eq_sel);
    1302             : 
    1303         778 :     return prefixsel;
    1304             : }
    1305             : 
    1306             : 
    1307             : /*
    1308             :  * Estimate the selectivity of a pattern of the specified type.
    1309             :  * Note that any fixed prefix of the pattern will have been removed already,
    1310             :  * so actually we may be looking at just a fragment of the pattern.
    1311             :  *
    1312             :  * For now, we use a very simplistic approach: fixed characters reduce the
    1313             :  * selectivity a good deal, character ranges reduce it a little,
    1314             :  * wildcards (such as % for LIKE or .* for regex) increase it.
    1315             :  */
    1316             : 
    1317             : #define FIXED_CHAR_SEL  0.20    /* about 1/5 */
    1318             : #define CHAR_RANGE_SEL  0.25
    1319             : #define ANY_CHAR_SEL    0.9     /* not 1, since it won't match end-of-string */
    1320             : #define FULL_WILDCARD_SEL 5.0
    1321             : #define PARTIAL_WILDCARD_SEL 2.0
    1322             : 
    1323             : static Selectivity
    1324        2214 : like_selectivity(const char *patt, int pattlen, bool case_insensitive)
    1325             : {
    1326        2214 :     Selectivity sel = 1.0;
    1327             :     int         pos;
    1328             : 
    1329             :     /* Skip any leading wildcard; it's already factored into initial sel */
    1330        4314 :     for (pos = 0; pos < pattlen; pos++)
    1331             :     {
    1332        2880 :         if (patt[pos] != '%' && patt[pos] != '_')
    1333         780 :             break;
    1334             :     }
    1335             : 
    1336        6498 :     for (; pos < pattlen; pos++)
    1337             :     {
    1338             :         /* % and _ are wildcard characters in LIKE */
    1339        4284 :         if (patt[pos] == '%')
    1340         676 :             sel *= FULL_WILDCARD_SEL;
    1341        3608 :         else if (patt[pos] == '_')
    1342         148 :             sel *= ANY_CHAR_SEL;
    1343        3460 :         else if (patt[pos] == '\\')
    1344             :         {
    1345             :             /* Backslash quotes the next character */
    1346          40 :             pos++;
    1347          40 :             if (pos >= pattlen)
    1348           0 :                 break;
    1349          40 :             sel *= FIXED_CHAR_SEL;
    1350             :         }
    1351             :         else
    1352        3420 :             sel *= FIXED_CHAR_SEL;
    1353             :     }
    1354             :     /* Could get sel > 1 if multiple wildcards */
    1355        2214 :     if (sel > 1.0)
    1356           0 :         sel = 1.0;
    1357        2214 :     return sel;
    1358             : }
    1359             : 
    1360             : static Selectivity
    1361        1816 : regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
    1362             : {
    1363        1816 :     Selectivity sel = 1.0;
    1364        1816 :     int         paren_depth = 0;
    1365        1816 :     int         paren_pos = 0;  /* dummy init to keep compiler quiet */
    1366             :     int         pos;
    1367             : 
    1368             :     /* since this function recurses, it could be driven to stack overflow */
    1369        1816 :     check_stack_depth();
    1370             : 
    1371       17800 :     for (pos = 0; pos < pattlen; pos++)
    1372             :     {
    1373       16002 :         if (patt[pos] == '(')
    1374             :         {
    1375         274 :             if (paren_depth == 0)
    1376         268 :                 paren_pos = pos;    /* remember start of parenthesized item */
    1377         274 :             paren_depth++;
    1378             :         }
    1379       15728 :         else if (patt[pos] == ')' && paren_depth > 0)
    1380             :         {
    1381         274 :             paren_depth--;
    1382         274 :             if (paren_depth == 0)
    1383         268 :                 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
    1384         268 :                                              pos - (paren_pos + 1),
    1385             :                                              case_insensitive);
    1386             :         }
    1387       15454 :         else if (patt[pos] == '|' && paren_depth == 0)
    1388             :         {
    1389             :             /*
    1390             :              * If unquoted | is present at paren level 0 in pattern, we have
    1391             :              * multiple alternatives; sum their probabilities.
    1392             :              */
    1393          36 :             sel += regex_selectivity_sub(patt + (pos + 1),
    1394          18 :                                          pattlen - (pos + 1),
    1395             :                                          case_insensitive);
    1396          18 :             break;              /* rest of pattern is now processed */
    1397             :         }
    1398       15436 :         else if (patt[pos] == '[')
    1399             :         {
    1400          76 :             bool        negclass = false;
    1401             : 
    1402          76 :             if (patt[++pos] == '^')
    1403             :             {
    1404           0 :                 negclass = true;
    1405           0 :                 pos++;
    1406             :             }
    1407          76 :             if (patt[pos] == ']')   /* ']' at start of class is not special */
    1408           0 :                 pos++;
    1409         372 :             while (pos < pattlen && patt[pos] != ']')
    1410         296 :                 pos++;
    1411          76 :             if (paren_depth == 0)
    1412          76 :                 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
    1413             :         }
    1414       15360 :         else if (patt[pos] == '.')
    1415             :         {
    1416         646 :             if (paren_depth == 0)
    1417         386 :                 sel *= ANY_CHAR_SEL;
    1418             :         }
    1419       14714 :         else if (patt[pos] == '*' ||
    1420       14148 :                  patt[pos] == '?' ||
    1421       14096 :                  patt[pos] == '+')
    1422             :         {
    1423             :             /* Ought to be smarter about quantifiers... */
    1424         632 :             if (paren_depth == 0)
    1425         362 :                 sel *= PARTIAL_WILDCARD_SEL;
    1426             :         }
    1427       14082 :         else if (patt[pos] == '{')
    1428             :         {
    1429         264 :             while (pos < pattlen && patt[pos] != '}')
    1430         188 :                 pos++;
    1431          76 :             if (paren_depth == 0)
    1432          64 :                 sel *= PARTIAL_WILDCARD_SEL;
    1433             :         }
    1434       14006 :         else if (patt[pos] == '\\')
    1435             :         {
    1436             :             /* backslash quotes the next character */
    1437         200 :             pos++;
    1438         200 :             if (pos >= pattlen)
    1439           0 :                 break;
    1440         200 :             if (paren_depth == 0)
    1441         104 :                 sel *= FIXED_CHAR_SEL;
    1442             :         }
    1443             :         else
    1444             :         {
    1445       13806 :             if (paren_depth == 0)
    1446       11992 :                 sel *= FIXED_CHAR_SEL;
    1447             :         }
    1448             :     }
    1449             :     /* Could get sel > 1 if multiple wildcards */
    1450        1816 :     if (sel > 1.0)
    1451          26 :         sel = 1.0;
    1452        1816 :     return sel;
    1453             : }
    1454             : 
    1455             : static Selectivity
    1456        1530 : regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
    1457             :                   int fixed_prefix_len)
    1458             : {
    1459             :     Selectivity sel;
    1460             : 
    1461             :     /* If patt doesn't end with $, consider it to have a trailing wildcard */
    1462        1530 :     if (pattlen > 0 && patt[pattlen - 1] == '$' &&
    1463         268 :         (pattlen == 1 || patt[pattlen - 2] != '\\'))
    1464             :     {
    1465             :         /* has trailing $ */
    1466         268 :         sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
    1467             :     }
    1468             :     else
    1469             :     {
    1470             :         /* no trailing $ */
    1471        1262 :         sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
    1472        1262 :         sel *= FULL_WILDCARD_SEL;
    1473             :     }
    1474             : 
    1475             :     /*
    1476             :      * If there's a fixed prefix, discount its selectivity.  We have to be
    1477             :      * careful here since a very long prefix could result in pow's result
    1478             :      * underflowing to zero (in which case "sel" probably has as well).
    1479             :      */
    1480        1530 :     if (fixed_prefix_len > 0)
    1481             :     {
    1482         960 :         double      prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
    1483             : 
    1484         960 :         if (prefixsel > 0.0)
    1485         960 :             sel /= prefixsel;
    1486             :     }
    1487             : 
    1488             :     /* Make sure result stays in range */
    1489        1530 :     CLAMP_PROBABILITY(sel);
    1490        1530 :     return sel;
    1491             : }
    1492             : 
    1493             : /*
    1494             :  * Check whether char is a letter (and, hence, subject to case-folding)
    1495             :  *
    1496             :  * In multibyte character sets or with ICU, we can't use isalpha, and it does
    1497             :  * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
    1498             :  * Instead, just assume any non-ASCII char is potentially case-varying, and
    1499             :  * hard-wire knowledge of which ASCII chars are letters.
    1500             :  */
    1501             : static int
    1502         270 : pattern_char_isalpha(char c, bool is_multibyte,
    1503             :                      pg_locale_t locale, bool locale_is_c)
    1504             : {
    1505         270 :     if (locale_is_c)
    1506         144 :         return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
    1507         126 :     else if (is_multibyte && IS_HIGHBIT_SET(c))
    1508           0 :         return true;
    1509         126 :     else if (locale && locale->provider == COLLPROVIDER_ICU)
    1510         126 :         return IS_HIGHBIT_SET(c) ||
    1511         252 :             (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
    1512             : #ifdef HAVE_LOCALE_T
    1513           0 :     else if (locale && locale->provider == COLLPROVIDER_LIBC)
    1514           0 :         return isalpha_l((unsigned char) c, locale->info.lt);
    1515             : #endif
    1516             :     else
    1517           0 :         return isalpha((unsigned char) c);
    1518             : }
    1519             : 
    1520             : 
    1521             : /*
    1522             :  * For bytea, the increment function need only increment the current byte
    1523             :  * (there are no multibyte characters to worry about).
    1524             :  */
    1525             : static bool
    1526           0 : byte_increment(unsigned char *ptr, int len)
    1527             : {
    1528           0 :     if (*ptr >= 255)
    1529           0 :         return false;
    1530           0 :     (*ptr)++;
    1531           0 :     return true;
    1532             : }
    1533             : 
    1534             : /*
    1535             :  * Try to generate a string greater than the given string or any
    1536             :  * string it is a prefix of.  If successful, return a palloc'd string
    1537             :  * in the form of a Const node; else return NULL.
    1538             :  *
    1539             :  * The caller must provide the appropriate "less than" comparison function
    1540             :  * for testing the strings, along with the collation to use.
    1541             :  *
    1542             :  * The key requirement here is that given a prefix string, say "foo",
    1543             :  * we must be able to generate another string "fop" that is greater than
    1544             :  * all strings "foobar" starting with "foo".  We can test that we have
    1545             :  * generated a string greater than the prefix string, but in non-C collations
    1546             :  * that is not a bulletproof guarantee that an extension of the string might
    1547             :  * not sort after it; an example is that "foo " is less than "foo!", but it
    1548             :  * is not clear that a "dictionary" sort ordering will consider "foo!" less
    1549             :  * than "foo bar".  CAUTION: Therefore, this function should be used only for
    1550             :  * estimation purposes when working in a non-C collation.
    1551             :  *
    1552             :  * To try to catch most cases where an extended string might otherwise sort
    1553             :  * before the result value, we determine which of the strings "Z", "z", "y",
    1554             :  * and "9" is seen as largest by the collation, and append that to the given
    1555             :  * prefix before trying to find a string that compares as larger.
    1556             :  *
    1557             :  * To search for a greater string, we repeatedly "increment" the rightmost
    1558             :  * character, using an encoding-specific character incrementer function.
    1559             :  * When it's no longer possible to increment the last character, we truncate
    1560             :  * off that character and start incrementing the next-to-rightmost.
    1561             :  * For example, if "z" were the last character in the sort order, then we
    1562             :  * could produce "foo" as a string greater than "fonz".
    1563             :  *
    1564             :  * This could be rather slow in the worst case, but in most cases we
    1565             :  * won't have to try more than one or two strings before succeeding.
    1566             :  *
    1567             :  * Note that it's important for the character incrementer not to be too anal
    1568             :  * about producing every possible character code, since in some cases the only
    1569             :  * way to get a larger string is to increment a previous character position.
    1570             :  * So we don't want to spend too much time trying every possible character
    1571             :  * code at the last position.  A good rule of thumb is to be sure that we
    1572             :  * don't try more than 256*K values for a K-byte character (and definitely
    1573             :  * not 256^K, which is what an exhaustive search would approach).
    1574             :  */
    1575             : static Const *
    1576        1932 : make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
    1577             : {
    1578        1932 :     Oid         datatype = str_const->consttype;
    1579             :     char       *workstr;
    1580             :     int         len;
    1581             :     Datum       cmpstr;
    1582        1932 :     char       *cmptxt = NULL;
    1583             :     mbcharacter_incrementer charinc;
    1584             : 
    1585             :     /*
    1586             :      * Get a modifiable copy of the prefix string in C-string format, and set
    1587             :      * up the string we will compare to as a Datum.  In C locale this can just
    1588             :      * be the given prefix string, otherwise we need to add a suffix.  Type
    1589             :      * BYTEA sorts bytewise so it never needs a suffix either.
    1590             :      */
    1591        1932 :     if (datatype == BYTEAOID)
    1592             :     {
    1593           0 :         bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
    1594             : 
    1595           0 :         len = VARSIZE_ANY_EXHDR(bstr);
    1596           0 :         workstr = (char *) palloc(len);
    1597           0 :         memcpy(workstr, VARDATA_ANY(bstr), len);
    1598             :         Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
    1599           0 :         cmpstr = str_const->constvalue;
    1600             :     }
    1601             :     else
    1602             :     {
    1603        1932 :         if (datatype == NAMEOID)
    1604           0 :             workstr = DatumGetCString(DirectFunctionCall1(nameout,
    1605             :                                                           str_const->constvalue));
    1606             :         else
    1607        1932 :             workstr = TextDatumGetCString(str_const->constvalue);
    1608        1932 :         len = strlen(workstr);
    1609        1932 :         if (lc_collate_is_c(collation) || len == 0)
    1610        1906 :             cmpstr = str_const->constvalue;
    1611             :         else
    1612             :         {
    1613             :             /* If first time through, determine the suffix to use */
    1614             :             static char suffixchar = 0;
    1615             :             static Oid  suffixcollation = 0;
    1616             : 
    1617          26 :             if (!suffixchar || suffixcollation != collation)
    1618             :             {
    1619             :                 char       *best;
    1620             : 
    1621           8 :                 best = "Z";
    1622           8 :                 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
    1623           6 :                     best = "z";
    1624           8 :                 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
    1625           0 :                     best = "y";
    1626           8 :                 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
    1627           0 :                     best = "9";
    1628           8 :                 suffixchar = *best;
    1629           8 :                 suffixcollation = collation;
    1630             :             }
    1631             : 
    1632             :             /* And build the string to compare to */
    1633          26 :             if (datatype == NAMEOID)
    1634             :             {
    1635           0 :                 cmptxt = palloc(len + 2);
    1636           0 :                 memcpy(cmptxt, workstr, len);
    1637           0 :                 cmptxt[len] = suffixchar;
    1638           0 :                 cmptxt[len + 1] = '\0';
    1639           0 :                 cmpstr = PointerGetDatum(cmptxt);
    1640             :             }
    1641             :             else
    1642             :             {
    1643          26 :                 cmptxt = palloc(VARHDRSZ + len + 1);
    1644          26 :                 SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
    1645          26 :                 memcpy(VARDATA(cmptxt), workstr, len);
    1646          26 :                 *(VARDATA(cmptxt) + len) = suffixchar;
    1647          26 :                 cmpstr = PointerGetDatum(cmptxt);
    1648             :             }
    1649             :         }
    1650             :     }
    1651             : 
    1652             :     /* Select appropriate character-incrementer function */
    1653        1932 :     if (datatype == BYTEAOID)
    1654           0 :         charinc = byte_increment;
    1655             :     else
    1656        1932 :         charinc = pg_database_encoding_character_incrementer();
    1657             : 
    1658             :     /* And search ... */
    1659        1932 :     while (len > 0)
    1660             :     {
    1661             :         int         charlen;
    1662             :         unsigned char *lastchar;
    1663             : 
    1664             :         /* Identify the last character --- for bytea, just the last byte */
    1665        1932 :         if (datatype == BYTEAOID)
    1666           0 :             charlen = 1;
    1667             :         else
    1668        1932 :             charlen = len - pg_mbcliplen(workstr, len, len - 1);
    1669        1932 :         lastchar = (unsigned char *) (workstr + len - charlen);
    1670             : 
    1671             :         /*
    1672             :          * Try to generate a larger string by incrementing the last character
    1673             :          * (for BYTEA, we treat each byte as a character).
    1674             :          *
    1675             :          * Note: the incrementer function is expected to return true if it's
    1676             :          * generated a valid-per-the-encoding new character, otherwise false.
    1677             :          * The contents of the character on false return are unspecified.
    1678             :          */
    1679        1932 :         while (charinc(lastchar, charlen))
    1680             :         {
    1681             :             Const      *workstr_const;
    1682             : 
    1683        1932 :             if (datatype == BYTEAOID)
    1684           0 :                 workstr_const = string_to_bytea_const(workstr, len);
    1685             :             else
    1686        1932 :                 workstr_const = string_to_const(workstr, datatype);
    1687             : 
    1688        1932 :             if (DatumGetBool(FunctionCall2Coll(ltproc,
    1689             :                                                collation,
    1690             :                                                cmpstr,
    1691             :                                                workstr_const->constvalue)))
    1692             :             {
    1693             :                 /* Successfully made a string larger than cmpstr */
    1694        1932 :                 if (cmptxt)
    1695          26 :                     pfree(cmptxt);
    1696        1932 :                 pfree(workstr);
    1697        1932 :                 return workstr_const;
    1698             :             }
    1699             : 
    1700             :             /* No good, release unusable value and try again */
    1701           0 :             pfree(DatumGetPointer(workstr_const->constvalue));
    1702           0 :             pfree(workstr_const);
    1703             :         }
    1704             : 
    1705             :         /*
    1706             :          * No luck here, so truncate off the last character and try to
    1707             :          * increment the next one.
    1708             :          */
    1709           0 :         len -= charlen;
    1710           0 :         workstr[len] = '\0';
    1711             :     }
    1712             : 
    1713             :     /* Failed... */
    1714           0 :     if (cmptxt)
    1715           0 :         pfree(cmptxt);
    1716           0 :     pfree(workstr);
    1717             : 
    1718           0 :     return NULL;
    1719             : }
    1720             : 
    1721             : /*
    1722             :  * Generate a Datum of the appropriate type from a C string.
    1723             :  * Note that all of the supported types are pass-by-ref, so the
    1724             :  * returned value should be pfree'd if no longer needed.
    1725             :  */
    1726             : static Datum
    1727       17068 : string_to_datum(const char *str, Oid datatype)
    1728             : {
    1729             :     Assert(str != NULL);
    1730             : 
    1731             :     /*
    1732             :      * We cheat a little by assuming that CStringGetTextDatum() will do for
    1733             :      * bpchar and varchar constants too...
    1734             :      */
    1735       17068 :     if (datatype == NAMEOID)
    1736           0 :         return DirectFunctionCall1(namein, CStringGetDatum(str));
    1737       17068 :     else if (datatype == BYTEAOID)
    1738           0 :         return DirectFunctionCall1(byteain, CStringGetDatum(str));
    1739             :     else
    1740       17068 :         return CStringGetTextDatum(str);
    1741             : }
    1742             : 
    1743             : /*
    1744             :  * Generate a Const node of the appropriate type from a C string.
    1745             :  */
    1746             : static Const *
    1747       17068 : string_to_const(const char *str, Oid datatype)
    1748             : {
    1749       17068 :     Datum       conval = string_to_datum(str, datatype);
    1750             :     Oid         collation;
    1751             :     int         constlen;
    1752             : 
    1753             :     /*
    1754             :      * We only need to support a few datatypes here, so hard-wire properties
    1755             :      * instead of incurring the expense of catalog lookups.
    1756             :      */
    1757       17068 :     switch (datatype)
    1758             :     {
    1759       17068 :         case TEXTOID:
    1760             :         case VARCHAROID:
    1761             :         case BPCHAROID:
    1762       17068 :             collation = DEFAULT_COLLATION_OID;
    1763       17068 :             constlen = -1;
    1764       17068 :             break;
    1765             : 
    1766           0 :         case NAMEOID:
    1767           0 :             collation = C_COLLATION_OID;
    1768           0 :             constlen = NAMEDATALEN;
    1769           0 :             break;
    1770             : 
    1771           0 :         case BYTEAOID:
    1772           0 :             collation = InvalidOid;
    1773           0 :             constlen = -1;
    1774           0 :             break;
    1775             : 
    1776           0 :         default:
    1777           0 :             elog(ERROR, "unexpected datatype in string_to_const: %u",
    1778             :                  datatype);
    1779             :             return NULL;
    1780             :     }
    1781             : 
    1782       17068 :     return makeConst(datatype, -1, collation, constlen,
    1783             :                      conval, false, false);
    1784             : }
    1785             : 
    1786             : /*
    1787             :  * Generate a Const node of bytea type from a binary C string and a length.
    1788             :  */
    1789             : static Const *
    1790          12 : string_to_bytea_const(const char *str, size_t str_len)
    1791             : {
    1792          12 :     bytea      *bstr = palloc(VARHDRSZ + str_len);
    1793             :     Datum       conval;
    1794             : 
    1795          12 :     memcpy(VARDATA(bstr), str, str_len);
    1796          12 :     SET_VARSIZE(bstr, VARHDRSZ + str_len);
    1797          12 :     conval = PointerGetDatum(bstr);
    1798             : 
    1799          12 :     return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
    1800             : }

Generated by: LCOV version 1.14