LCOV - code coverage report
Current view: top level - src/backend/utils/adt - like_support.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 486 580 83.8 %
Date: 2025-12-18 01:18:31 Functions: 32 41 78.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * like_support.c
       4             :  *    Planner support functions for LIKE, regex, and related operators.
       5             :  *
       6             :  * These routines handle special optimization of operators that can be
       7             :  * used with index scans even though they are not known to the executor's
       8             :  * indexscan machinery.  The key idea is that these operators allow us
       9             :  * to derive approximate indexscan qual clauses, such that any tuples
      10             :  * that pass the operator clause itself must also satisfy the simpler
      11             :  * indexscan condition(s).  Then we can use the indexscan machinery
      12             :  * to avoid scanning as much of the table as we'd otherwise have to,
      13             :  * while applying the original operator as a qpqual condition to ensure
      14             :  * we deliver only the tuples we want.  (In essence, we're using a regular
      15             :  * index as if it were a lossy index.)
      16             :  *
      17             :  * An example of what we're doing is
      18             :  *          textfield LIKE 'abc%def'
      19             :  * from which we can generate the indexscanable conditions
      20             :  *          textfield >= 'abc' AND textfield < 'abd'
      21             :  * which allow efficient scanning of an index on textfield.
      22             :  * (In reality, character set and collation issues make the transformation
      23             :  * from LIKE to indexscan limits rather harder than one might think ...
      24             :  * but that's the basic idea.)
      25             :  *
      26             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      27             :  * Portions Copyright (c) 1994, Regents of the University of California
      28             :  *
      29             :  *
      30             :  * IDENTIFICATION
      31             :  *    src/backend/utils/adt/like_support.c
      32             :  *
      33             :  *-------------------------------------------------------------------------
      34             :  */
      35             : #include "postgres.h"
      36             : 
      37             : #include <math.h>
      38             : 
      39             : #include "access/htup_details.h"
      40             : #include "catalog/pg_collation.h"
      41             : #include "catalog/pg_operator.h"
      42             : #include "catalog/pg_opfamily.h"
      43             : #include "catalog/pg_statistic.h"
      44             : #include "catalog/pg_type.h"
      45             : #include "mb/pg_wchar.h"
      46             : #include "miscadmin.h"
      47             : #include "nodes/makefuncs.h"
      48             : #include "nodes/nodeFuncs.h"
      49             : #include "nodes/supportnodes.h"
      50             : #include "utils/builtins.h"
      51             : #include "utils/datum.h"
      52             : #include "utils/lsyscache.h"
      53             : #include "utils/pg_locale.h"
      54             : #include "utils/selfuncs.h"
      55             : #include "utils/varlena.h"
      56             : 
      57             : 
      58             : typedef enum
      59             : {
      60             :     Pattern_Type_Like,
      61             :     Pattern_Type_Like_IC,
      62             :     Pattern_Type_Regex,
      63             :     Pattern_Type_Regex_IC,
      64             :     Pattern_Type_Prefix,
      65             : } Pattern_Type;
      66             : 
      67             : typedef enum
      68             : {
      69             :     Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact,
      70             : } Pattern_Prefix_Status;
      71             : 
      72             : static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
      73             : static List *match_pattern_prefix(Node *leftop,
      74             :                                   Node *rightop,
      75             :                                   Pattern_Type ptype,
      76             :                                   Oid expr_coll,
      77             :                                   Oid opfamily,
      78             :                                   Oid indexcollation);
      79             : static double patternsel_common(PlannerInfo *root,
      80             :                                 Oid oprid,
      81             :                                 Oid opfuncid,
      82             :                                 List *args,
      83             :                                 int varRelid,
      84             :                                 Oid collation,
      85             :                                 Pattern_Type ptype,
      86             :                                 bool negate);
      87             : static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
      88             :                                                   Pattern_Type ptype,
      89             :                                                   Oid collation,
      90             :                                                   Const **prefix,
      91             :                                                   Selectivity *rest_selec);
      92             : static Selectivity prefix_selectivity(PlannerInfo *root,
      93             :                                       VariableStatData *vardata,
      94             :                                       Oid eqopr, Oid ltopr, Oid geopr,
      95             :                                       Oid collation,
      96             :                                       Const *prefixcon);
      97             : static Selectivity like_selectivity(const char *patt, int pattlen,
      98             :                                     bool case_insensitive);
      99             : static Selectivity regex_selectivity(const char *patt, int pattlen,
     100             :                                      bool case_insensitive,
     101             :                                      int fixed_prefix_len);
     102             : static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
     103             :                                   Oid collation);
     104             : static Datum string_to_datum(const char *str, Oid datatype);
     105             : static Const *string_to_const(const char *str, Oid datatype);
     106             : static Const *string_to_bytea_const(const char *str, size_t str_len);
     107             : 
     108             : 
     109             : /*
     110             :  * Planner support functions for LIKE, regex, and related operators
     111             :  */
     112             : Datum
     113        5112 : textlike_support(PG_FUNCTION_ARGS)
     114             : {
     115        5112 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     116             : 
     117        5112 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
     118             : }
     119             : 
     120             : Datum
     121         358 : texticlike_support(PG_FUNCTION_ARGS)
     122             : {
     123         358 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     124             : 
     125         358 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
     126             : }
     127             : 
     128             : Datum
     129       23016 : textregexeq_support(PG_FUNCTION_ARGS)
     130             : {
     131       23016 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     132             : 
     133       23016 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
     134             : }
     135             : 
     136             : Datum
     137         118 : texticregexeq_support(PG_FUNCTION_ARGS)
     138             : {
     139         118 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     140             : 
     141         118 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
     142             : }
     143             : 
     144             : Datum
     145         156 : text_starts_with_support(PG_FUNCTION_ARGS)
     146             : {
     147         156 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     148             : 
     149         156 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix));
     150             : }
     151             : 
     152             : /* Common code for the above */
     153             : static Node *
     154       28760 : like_regex_support(Node *rawreq, Pattern_Type ptype)
     155             : {
     156       28760 :     Node       *ret = NULL;
     157             : 
     158       28760 :     if (IsA(rawreq, SupportRequestSelectivity))
     159             :     {
     160             :         /*
     161             :          * Make a selectivity estimate for a function call, just as we'd do if
     162             :          * the call was via the corresponding operator.
     163             :          */
     164          24 :         SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
     165             :         Selectivity s1;
     166             : 
     167          24 :         if (req->is_join)
     168             :         {
     169             :             /*
     170             :              * For the moment we just punt.  If patternjoinsel is ever
     171             :              * improved to do better, this should be made to call it.
     172             :              */
     173           0 :             s1 = DEFAULT_MATCH_SEL;
     174             :         }
     175             :         else
     176             :         {
     177             :             /* Share code with operator restriction selectivity functions */
     178          24 :             s1 = patternsel_common(req->root,
     179             :                                    InvalidOid,
     180             :                                    req->funcid,
     181             :                                    req->args,
     182             :                                    req->varRelid,
     183             :                                    req->inputcollid,
     184             :                                    ptype,
     185             :                                    false);
     186             :         }
     187          24 :         req->selectivity = s1;
     188          24 :         ret = (Node *) req;
     189             :     }
     190       28736 :     else if (IsA(rawreq, SupportRequestIndexCondition))
     191             :     {
     192             :         /* Try to convert operator/function call to index conditions */
     193        8142 :         SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
     194             : 
     195             :         /*
     196             :          * Currently we have no "reverse" match operators with the pattern on
     197             :          * the left, so we only need consider cases with the indexkey on the
     198             :          * left.
     199             :          */
     200        8142 :         if (req->indexarg != 0)
     201           0 :             return NULL;
     202             : 
     203        8142 :         if (is_opclause(req->node))
     204             :         {
     205        8118 :             OpExpr     *clause = (OpExpr *) req->node;
     206             : 
     207             :             Assert(list_length(clause->args) == 2);
     208             :             ret = (Node *)
     209        8118 :                 match_pattern_prefix((Node *) linitial(clause->args),
     210        8118 :                                      (Node *) lsecond(clause->args),
     211             :                                      ptype,
     212             :                                      clause->inputcollid,
     213             :                                      req->opfamily,
     214             :                                      req->indexcollation);
     215             :         }
     216          24 :         else if (is_funcclause(req->node))   /* be paranoid */
     217             :         {
     218          24 :             FuncExpr   *clause = (FuncExpr *) req->node;
     219             : 
     220             :             Assert(list_length(clause->args) == 2);
     221             :             ret = (Node *)
     222          24 :                 match_pattern_prefix((Node *) linitial(clause->args),
     223          24 :                                      (Node *) lsecond(clause->args),
     224             :                                      ptype,
     225             :                                      clause->inputcollid,
     226             :                                      req->opfamily,
     227             :                                      req->indexcollation);
     228             :         }
     229             :     }
     230             : 
     231       28760 :     return ret;
     232             : }
     233             : 
     234             : /*
     235             :  * match_pattern_prefix
     236             :  *    Try to generate an indexqual for a LIKE or regex operator.
     237             :  */
     238             : static List *
     239        8142 : match_pattern_prefix(Node *leftop,
     240             :                      Node *rightop,
     241             :                      Pattern_Type ptype,
     242             :                      Oid expr_coll,
     243             :                      Oid opfamily,
     244             :                      Oid indexcollation)
     245             : {
     246             :     List       *result;
     247             :     Const      *patt;
     248             :     Const      *prefix;
     249             :     Pattern_Prefix_Status pstatus;
     250             :     Oid         ldatatype;
     251             :     Oid         rdatatype;
     252             :     Oid         eqopr;
     253             :     Oid         ltopr;
     254             :     Oid         geopr;
     255        8142 :     Oid         preopr = InvalidOid;
     256             :     bool        collation_aware;
     257             :     Expr       *expr;
     258             :     FmgrInfo    ltproc;
     259             :     Const      *greaterstr;
     260             : 
     261             :     /*
     262             :      * Can't do anything with a non-constant or NULL pattern argument.
     263             :      *
     264             :      * Note that since we restrict ourselves to cases with a hard constant on
     265             :      * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
     266             :      * about verifying that.
     267             :      */
     268        8142 :     if (!IsA(rightop, Const) ||
     269        8094 :         ((Const *) rightop)->constisnull)
     270          48 :         return NIL;
     271        8094 :     patt = (Const *) rightop;
     272             : 
     273             :     /*
     274             :      * Try to extract a fixed prefix from the pattern.
     275             :      */
     276        8094 :     pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
     277             :                                    &prefix, NULL);
     278             : 
     279             :     /* fail if no fixed prefix */
     280        8094 :     if (pstatus == Pattern_Prefix_None)
     281         300 :         return NIL;
     282             : 
     283             :     /*
     284             :      * Identify the operators we want to use, based on the type of the
     285             :      * left-hand argument.  Usually these are just the type's regular
     286             :      * comparison operators, but if we are considering one of the semi-legacy
     287             :      * "pattern" opclasses, use the "pattern" operators instead.  Those are
     288             :      * not collation-sensitive but always use C collation, as we want.  The
     289             :      * selected operators also determine the needed type of the prefix
     290             :      * constant.
     291             :      */
     292        7794 :     ldatatype = exprType(leftop);
     293        7794 :     switch (ldatatype)
     294             :     {
     295          80 :         case TEXTOID:
     296          80 :             if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
     297             :             {
     298           0 :                 eqopr = TextEqualOperator;
     299           0 :                 ltopr = TextPatternLessOperator;
     300           0 :                 geopr = TextPatternGreaterEqualOperator;
     301           0 :                 collation_aware = false;
     302             :             }
     303          80 :             else if (opfamily == TEXT_SPGIST_FAM_OID)
     304             :             {
     305          24 :                 eqopr = TextEqualOperator;
     306          24 :                 ltopr = TextPatternLessOperator;
     307          24 :                 geopr = TextPatternGreaterEqualOperator;
     308             :                 /* This opfamily has direct support for prefixing */
     309          24 :                 preopr = TextPrefixOperator;
     310          24 :                 collation_aware = false;
     311             :             }
     312             :             else
     313             :             {
     314          56 :                 eqopr = TextEqualOperator;
     315          56 :                 ltopr = TextLessOperator;
     316          56 :                 geopr = TextGreaterEqualOperator;
     317          56 :                 collation_aware = true;
     318             :             }
     319          80 :             rdatatype = TEXTOID;
     320          80 :             break;
     321        7690 :         case NAMEOID:
     322             : 
     323             :             /*
     324             :              * Note that here, we need the RHS type to be text, so that the
     325             :              * comparison value isn't improperly truncated to NAMEDATALEN.
     326             :              */
     327        7690 :             eqopr = NameEqualTextOperator;
     328        7690 :             ltopr = NameLessTextOperator;
     329        7690 :             geopr = NameGreaterEqualTextOperator;
     330        7690 :             collation_aware = true;
     331        7690 :             rdatatype = TEXTOID;
     332        7690 :             break;
     333          24 :         case BPCHAROID:
     334          24 :             if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
     335             :             {
     336           0 :                 eqopr = BpcharEqualOperator;
     337           0 :                 ltopr = BpcharPatternLessOperator;
     338           0 :                 geopr = BpcharPatternGreaterEqualOperator;
     339           0 :                 collation_aware = false;
     340             :             }
     341             :             else
     342             :             {
     343          24 :                 eqopr = BpcharEqualOperator;
     344          24 :                 ltopr = BpcharLessOperator;
     345          24 :                 geopr = BpcharGreaterEqualOperator;
     346          24 :                 collation_aware = true;
     347             :             }
     348          24 :             rdatatype = BPCHAROID;
     349          24 :             break;
     350           0 :         case BYTEAOID:
     351           0 :             eqopr = ByteaEqualOperator;
     352           0 :             ltopr = ByteaLessOperator;
     353           0 :             geopr = ByteaGreaterEqualOperator;
     354           0 :             collation_aware = false;
     355           0 :             rdatatype = BYTEAOID;
     356           0 :             break;
     357           0 :         default:
     358             :             /* Can't get here unless we're attached to the wrong operator */
     359           0 :             return NIL;
     360             :     }
     361             : 
     362             :     /*
     363             :      * If necessary, coerce the prefix constant to the right type.  The given
     364             :      * prefix constant is either text or bytea type, therefore the only case
     365             :      * where we need to do anything is when converting text to bpchar.  Those
     366             :      * two types are binary-compatible, so relabeling the Const node is
     367             :      * sufficient.
     368             :      */
     369        7794 :     if (prefix->consttype != rdatatype)
     370             :     {
     371             :         Assert(prefix->consttype == TEXTOID &&
     372             :                rdatatype == BPCHAROID);
     373          24 :         prefix->consttype = rdatatype;
     374             :     }
     375             : 
     376             :     /*
     377             :      * If we found an exact-match pattern, generate an "=" indexqual.
     378             :      *
     379             :      * Here and below, check to see whether the desired operator is actually
     380             :      * supported by the index opclass, and fail quietly if not.  This allows
     381             :      * us to not be concerned with specific opclasses (except for the legacy
     382             :      * "pattern" cases); any index that correctly implements the operators
     383             :      * will work.
     384             :      */
     385        7794 :     if (pstatus == Pattern_Prefix_Exact)
     386             :     {
     387        6458 :         if (!op_in_opfamily(eqopr, opfamily))
     388          12 :             return NIL;
     389        6446 :         if (indexcollation != expr_coll)
     390        6392 :             return NIL;
     391          54 :         expr = make_opclause(eqopr, BOOLOID, false,
     392             :                              (Expr *) leftop, (Expr *) prefix,
     393             :                              InvalidOid, indexcollation);
     394          54 :         result = list_make1(expr);
     395          54 :         return result;
     396             :     }
     397             : 
     398             :     /*
     399             :      * Anything other than Pattern_Prefix_Exact is not supported if the
     400             :      * expression collation is nondeterministic.  The optimized equality or
     401             :      * prefix tests use bytewise comparisons, which is not consistent with
     402             :      * nondeterministic collations.
     403             :      *
     404             :      * expr_coll is not set for a non-collation-aware data type such as bytea.
     405             :      */
     406        1336 :     if (expr_coll && !get_collation_isdeterministic(expr_coll))
     407           6 :         return NIL;
     408             : 
     409             :     /*
     410             :      * Otherwise, we have a nonempty required prefix of the values.  Some
     411             :      * opclasses support prefix checks directly, otherwise we'll try to
     412             :      * generate a range constraint.
     413             :      */
     414        1330 :     if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
     415             :     {
     416          24 :         expr = make_opclause(preopr, BOOLOID, false,
     417             :                              (Expr *) leftop, (Expr *) prefix,
     418             :                              InvalidOid, indexcollation);
     419          24 :         result = list_make1(expr);
     420          24 :         return result;
     421             :     }
     422             : 
     423             :     /*
     424             :      * Since we need a range constraint, it's only going to work reliably if
     425             :      * the index is collation-insensitive or has "C" collation.  Note that
     426             :      * here we are looking at the index's collation, not the expression's
     427             :      * collation -- this test is *not* dependent on the LIKE/regex operator's
     428             :      * collation.
     429             :      */
     430        1306 :     if (collation_aware &&
     431        1306 :         !pg_newlocale_from_collation(indexcollation)->collate_is_c)
     432          14 :         return NIL;
     433             : 
     434             :     /*
     435             :      * We can always say "x >= prefix".
     436             :      */
     437        1292 :     if (!op_in_opfamily(geopr, opfamily))
     438          12 :         return NIL;
     439        1280 :     expr = make_opclause(geopr, BOOLOID, false,
     440             :                          (Expr *) leftop, (Expr *) prefix,
     441             :                          InvalidOid, indexcollation);
     442        1280 :     result = list_make1(expr);
     443             : 
     444             :     /*-------
     445             :      * If we can create a string larger than the prefix, we can say
     446             :      * "x < greaterstr".  NB: we rely on make_greater_string() to generate
     447             :      * a guaranteed-greater string, not just a probably-greater string.
     448             :      * In general this is only guaranteed in C locale, so we'd better be
     449             :      * using a C-locale index collation.
     450             :      *-------
     451             :      */
     452        1280 :     if (!op_in_opfamily(ltopr, opfamily))
     453           0 :         return result;
     454        1280 :     fmgr_info(get_opcode(ltopr), &ltproc);
     455        1280 :     greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
     456        1280 :     if (greaterstr)
     457             :     {
     458        1280 :         expr = make_opclause(ltopr, BOOLOID, false,
     459             :                              (Expr *) leftop, (Expr *) greaterstr,
     460             :                              InvalidOid, indexcollation);
     461        1280 :         result = lappend(result, expr);
     462             :     }
     463             : 
     464        1280 :     return result;
     465             : }
     466             : 
     467             : 
     468             : /*
     469             :  * patternsel_common - generic code for pattern-match restriction selectivity.
     470             :  *
     471             :  * To support using this from either the operator or function paths, caller
     472             :  * may pass either operator OID or underlying function OID; we look up the
     473             :  * latter from the former if needed.  (We could just have patternsel() call
     474             :  * get_opcode(), but the work would be wasted if we don't have a need to
     475             :  * compare a fixed prefix to the pg_statistic data.)
     476             :  *
     477             :  * Note that oprid and/or opfuncid should be for the positive-match operator
     478             :  * even when negate is true.
     479             :  */
     480             : static double
     481       12368 : patternsel_common(PlannerInfo *root,
     482             :                   Oid oprid,
     483             :                   Oid opfuncid,
     484             :                   List *args,
     485             :                   int varRelid,
     486             :                   Oid collation,
     487             :                   Pattern_Type ptype,
     488             :                   bool negate)
     489             : {
     490             :     VariableStatData vardata;
     491             :     Node       *other;
     492             :     bool        varonleft;
     493             :     Datum       constval;
     494             :     Oid         consttype;
     495             :     Oid         vartype;
     496             :     Oid         rdatatype;
     497             :     Oid         eqopr;
     498             :     Oid         ltopr;
     499             :     Oid         geopr;
     500             :     Pattern_Prefix_Status pstatus;
     501             :     Const      *patt;
     502       12368 :     Const      *prefix = NULL;
     503       12368 :     Selectivity rest_selec = 0;
     504       12368 :     double      nullfrac = 0.0;
     505             :     double      result;
     506             : 
     507             :     /*
     508             :      * Initialize result to the appropriate default estimate depending on
     509             :      * whether it's a match or not-match operator.
     510             :      */
     511       12368 :     if (negate)
     512        2846 :         result = 1.0 - DEFAULT_MATCH_SEL;
     513             :     else
     514        9522 :         result = DEFAULT_MATCH_SEL;
     515             : 
     516             :     /*
     517             :      * If expression is not variable op constant, then punt and return the
     518             :      * default estimate.
     519             :      */
     520       12368 :     if (!get_restriction_variable(root, args, varRelid,
     521             :                                   &vardata, &other, &varonleft))
     522         444 :         return result;
     523       11924 :     if (!varonleft || !IsA(other, Const))
     524             :     {
     525          50 :         ReleaseVariableStats(vardata);
     526          50 :         return result;
     527             :     }
     528             : 
     529             :     /*
     530             :      * If the constant is NULL, assume operator is strict and return zero, ie,
     531             :      * operator will never return TRUE.  (It's zero even for a negator op.)
     532             :      */
     533       11874 :     if (((Const *) other)->constisnull)
     534             :     {
     535           0 :         ReleaseVariableStats(vardata);
     536           0 :         return 0.0;
     537             :     }
     538       11874 :     constval = ((Const *) other)->constvalue;
     539       11874 :     consttype = ((Const *) other)->consttype;
     540             : 
     541             :     /*
     542             :      * The right-hand const is type text or bytea for all supported operators.
     543             :      * We do not expect to see binary-compatible types here, since
     544             :      * const-folding should have relabeled the const to exactly match the
     545             :      * operator's declared type.
     546             :      */
     547       11874 :     if (consttype != TEXTOID && consttype != BYTEAOID)
     548             :     {
     549          24 :         ReleaseVariableStats(vardata);
     550          24 :         return result;
     551             :     }
     552             : 
     553             :     /*
     554             :      * Similarly, the exposed type of the left-hand side should be one of
     555             :      * those we know.  (Do not look at vardata.atttype, which might be
     556             :      * something binary-compatible but different.)  We can use it to identify
     557             :      * the comparison operators and the required type of the comparison
     558             :      * constant, much as in match_pattern_prefix().
     559             :      */
     560       11850 :     vartype = vardata.vartype;
     561             : 
     562       11850 :     switch (vartype)
     563             :     {
     564        1586 :         case TEXTOID:
     565        1586 :             eqopr = TextEqualOperator;
     566        1586 :             ltopr = TextLessOperator;
     567        1586 :             geopr = TextGreaterEqualOperator;
     568        1586 :             rdatatype = TEXTOID;
     569        1586 :             break;
     570       10170 :         case NAMEOID:
     571             : 
     572             :             /*
     573             :              * Note that here, we need the RHS type to be text, so that the
     574             :              * comparison value isn't improperly truncated to NAMEDATALEN.
     575             :              */
     576       10170 :             eqopr = NameEqualTextOperator;
     577       10170 :             ltopr = NameLessTextOperator;
     578       10170 :             geopr = NameGreaterEqualTextOperator;
     579       10170 :             rdatatype = TEXTOID;
     580       10170 :             break;
     581          84 :         case BPCHAROID:
     582          84 :             eqopr = BpcharEqualOperator;
     583          84 :             ltopr = BpcharLessOperator;
     584          84 :             geopr = BpcharGreaterEqualOperator;
     585          84 :             rdatatype = BPCHAROID;
     586          84 :             break;
     587           6 :         case BYTEAOID:
     588           6 :             eqopr = ByteaEqualOperator;
     589           6 :             ltopr = ByteaLessOperator;
     590           6 :             geopr = ByteaGreaterEqualOperator;
     591           6 :             rdatatype = BYTEAOID;
     592           6 :             break;
     593           4 :         default:
     594             :             /* Can't get here unless we're attached to the wrong operator */
     595           4 :             ReleaseVariableStats(vardata);
     596           4 :             return result;
     597             :     }
     598             : 
     599             :     /*
     600             :      * Grab the nullfrac for use below.
     601             :      */
     602       11846 :     if (HeapTupleIsValid(vardata.statsTuple))
     603             :     {
     604             :         Form_pg_statistic stats;
     605             : 
     606        9756 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     607        9756 :         nullfrac = stats->stanullfrac;
     608             :     }
     609             : 
     610             :     /*
     611             :      * Pull out any fixed prefix implied by the pattern, and estimate the
     612             :      * fractional selectivity of the remainder of the pattern.  Unlike many
     613             :      * other selectivity estimators, we use the pattern operator's actual
     614             :      * collation for this step.  This is not because we expect the collation
     615             :      * to make a big difference in the selectivity estimate (it seldom would),
     616             :      * but because we want to be sure we cache compiled regexps under the
     617             :      * right cache key, so that they can be re-used at runtime.
     618             :      */
     619       11846 :     patt = (Const *) other;
     620       11846 :     pstatus = pattern_fixed_prefix(patt, ptype, collation,
     621             :                                    &prefix, &rest_selec);
     622             : 
     623             :     /*
     624             :      * If necessary, coerce the prefix constant to the right type.  The only
     625             :      * case where we need to do anything is when converting text to bpchar.
     626             :      * Those two types are binary-compatible, so relabeling the Const node is
     627             :      * sufficient.
     628             :      */
     629       11822 :     if (prefix && prefix->consttype != rdatatype)
     630             :     {
     631             :         Assert(prefix->consttype == TEXTOID &&
     632             :                rdatatype == BPCHAROID);
     633          36 :         prefix->consttype = rdatatype;
     634             :     }
     635             : 
     636       11822 :     if (pstatus == Pattern_Prefix_Exact)
     637             :     {
     638             :         /*
     639             :          * Pattern specifies an exact match, so estimate as for '='
     640             :          */
     641        6666 :         result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
     642             :                               false, true, false);
     643             :     }
     644             :     else
     645             :     {
     646             :         /*
     647             :          * Not exact-match pattern.  If we have a sufficiently large
     648             :          * histogram, estimate selectivity for the histogram part of the
     649             :          * population by counting matches in the histogram.  If not, estimate
     650             :          * selectivity of the fixed prefix and remainder of pattern
     651             :          * separately, then combine the two to get an estimate of the
     652             :          * selectivity for the part of the column population represented by
     653             :          * the histogram.  (For small histograms, we combine these
     654             :          * approaches.)
     655             :          *
     656             :          * We then add up data for any most-common-values values; these are
     657             :          * not in the histogram population, and we can get exact answers for
     658             :          * them by applying the pattern operator, so there's no reason to
     659             :          * approximate.  (If the MCVs cover a significant part of the total
     660             :          * population, this gives us a big leg up in accuracy.)
     661             :          */
     662             :         Selectivity selec;
     663             :         int         hist_size;
     664             :         FmgrInfo    opproc;
     665             :         double      mcv_selec,
     666             :                     sumcommon;
     667             : 
     668             :         /* Try to use the histogram entries to get selectivity */
     669        5156 :         if (!OidIsValid(opfuncid))
     670        5132 :             opfuncid = get_opcode(oprid);
     671        5156 :         fmgr_info(opfuncid, &opproc);
     672             : 
     673        5156 :         selec = histogram_selectivity(&vardata, &opproc, collation,
     674             :                                       constval, true,
     675             :                                       10, 1, &hist_size);
     676             : 
     677             :         /* If not at least 100 entries, use the heuristic method */
     678        5156 :         if (hist_size < 100)
     679             :         {
     680             :             Selectivity heursel;
     681             :             Selectivity prefixsel;
     682             : 
     683        3778 :             if (pstatus == Pattern_Prefix_Partial)
     684        3006 :                 prefixsel = prefix_selectivity(root, &vardata,
     685             :                                                eqopr, ltopr, geopr,
     686             :                                                collation,
     687             :                                                prefix);
     688             :             else
     689         772 :                 prefixsel = 1.0;
     690        3778 :             heursel = prefixsel * rest_selec;
     691             : 
     692        3778 :             if (selec < 0)       /* fewer than 10 histogram entries? */
     693        3360 :                 selec = heursel;
     694             :             else
     695             :             {
     696             :                 /*
     697             :                  * For histogram sizes from 10 to 100, we combine the
     698             :                  * histogram and heuristic selectivities, putting increasingly
     699             :                  * more trust in the histogram for larger sizes.
     700             :                  */
     701         418 :                 double      hist_weight = hist_size / 100.0;
     702             : 
     703         418 :                 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
     704             :             }
     705             :         }
     706             : 
     707             :         /* In any case, don't believe extremely small or large estimates. */
     708        5156 :         if (selec < 0.0001)
     709        1470 :             selec = 0.0001;
     710        3686 :         else if (selec > 0.9999)
     711         132 :             selec = 0.9999;
     712             : 
     713             :         /*
     714             :          * If we have most-common-values info, add up the fractions of the MCV
     715             :          * entries that satisfy MCV OP PATTERN.  These fractions contribute
     716             :          * directly to the result selectivity.  Also add up the total fraction
     717             :          * represented by MCV entries.
     718             :          */
     719        5156 :         mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
     720             :                                     constval, true,
     721             :                                     &sumcommon);
     722             : 
     723             :         /*
     724             :          * Now merge the results from the MCV and histogram calculations,
     725             :          * realizing that the histogram covers only the non-null values that
     726             :          * are not listed in MCV.
     727             :          */
     728        5156 :         selec *= 1.0 - nullfrac - sumcommon;
     729        5156 :         selec += mcv_selec;
     730        5156 :         result = selec;
     731             :     }
     732             : 
     733             :     /* now adjust if we wanted not-match rather than match */
     734       11822 :     if (negate)
     735        2390 :         result = 1.0 - result - nullfrac;
     736             : 
     737             :     /* result should be in range, but make sure... */
     738       11822 :     CLAMP_PROBABILITY(result);
     739             : 
     740       11822 :     if (prefix)
     741             :     {
     742       11196 :         pfree(DatumGetPointer(prefix->constvalue));
     743       11196 :         pfree(prefix);
     744             :     }
     745             : 
     746       11822 :     ReleaseVariableStats(vardata);
     747             : 
     748       11822 :     return result;
     749             : }
     750             : 
     751             : /*
     752             :  * Fix impedance mismatch between SQL-callable functions and patternsel_common
     753             :  */
     754             : static double
     755       12344 : patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
     756             : {
     757       12344 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     758       12344 :     Oid         operator = PG_GETARG_OID(1);
     759       12344 :     List       *args = (List *) PG_GETARG_POINTER(2);
     760       12344 :     int         varRelid = PG_GETARG_INT32(3);
     761       12344 :     Oid         collation = PG_GET_COLLATION();
     762             : 
     763             :     /*
     764             :      * If this is for a NOT LIKE or similar operator, get the corresponding
     765             :      * positive-match operator and work with that.
     766             :      */
     767       12344 :     if (negate)
     768             :     {
     769        2846 :         operator = get_negator(operator);
     770        2846 :         if (!OidIsValid(operator))
     771           0 :             elog(ERROR, "patternsel called for operator without a negator");
     772             :     }
     773             : 
     774       12344 :     return patternsel_common(root,
     775             :                              operator,
     776             :                              InvalidOid,
     777             :                              args,
     778             :                              varRelid,
     779             :                              collation,
     780             :                              ptype,
     781             :                              negate);
     782             : }
     783             : 
     784             : /*
     785             :  *      regexeqsel      - Selectivity of regular-expression pattern match.
     786             :  */
     787             : Datum
     788        7680 : regexeqsel(PG_FUNCTION_ARGS)
     789             : {
     790        7680 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
     791             : }
     792             : 
     793             : /*
     794             :  *      icregexeqsel    - Selectivity of case-insensitive regex match.
     795             :  */
     796             : Datum
     797          64 : icregexeqsel(PG_FUNCTION_ARGS)
     798             : {
     799          64 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
     800             : }
     801             : 
     802             : /*
     803             :  *      likesel         - Selectivity of LIKE pattern match.
     804             :  */
     805             : Datum
     806        1572 : likesel(PG_FUNCTION_ARGS)
     807             : {
     808        1572 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
     809             : }
     810             : 
     811             : /*
     812             :  *      prefixsel           - selectivity of prefix operator
     813             :  */
     814             : Datum
     815          54 : prefixsel(PG_FUNCTION_ARGS)
     816             : {
     817          54 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
     818             : }
     819             : 
     820             : /*
     821             :  *
     822             :  *      iclikesel           - Selectivity of ILIKE pattern match.
     823             :  */
     824             : Datum
     825         128 : iclikesel(PG_FUNCTION_ARGS)
     826             : {
     827         128 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
     828             : }
     829             : 
     830             : /*
     831             :  *      regexnesel      - Selectivity of regular-expression pattern non-match.
     832             :  */
     833             : Datum
     834        2686 : regexnesel(PG_FUNCTION_ARGS)
     835             : {
     836        2686 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
     837             : }
     838             : 
     839             : /*
     840             :  *      icregexnesel    - Selectivity of case-insensitive regex non-match.
     841             :  */
     842             : Datum
     843          16 : icregexnesel(PG_FUNCTION_ARGS)
     844             : {
     845          16 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
     846             : }
     847             : 
     848             : /*
     849             :  *      nlikesel        - Selectivity of LIKE pattern non-match.
     850             :  */
     851             : Datum
     852         136 : nlikesel(PG_FUNCTION_ARGS)
     853             : {
     854         136 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
     855             : }
     856             : 
     857             : /*
     858             :  *      icnlikesel      - Selectivity of ILIKE pattern non-match.
     859             :  */
     860             : Datum
     861           8 : icnlikesel(PG_FUNCTION_ARGS)
     862             : {
     863           8 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
     864             : }
     865             : 
     866             : /*
     867             :  * patternjoinsel       - Generic code for pattern-match join selectivity.
     868             :  */
     869             : static double
     870         236 : patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
     871             : {
     872             :     /* For the moment we just punt. */
     873         236 :     return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
     874             : }
     875             : 
     876             : /*
     877             :  *      regexeqjoinsel  - Join selectivity of regular-expression pattern match.
     878             :  */
     879             : Datum
     880         236 : regexeqjoinsel(PG_FUNCTION_ARGS)
     881             : {
     882         236 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
     883             : }
     884             : 
     885             : /*
     886             :  *      icregexeqjoinsel    - Join selectivity of case-insensitive regex match.
     887             :  */
     888             : Datum
     889           0 : icregexeqjoinsel(PG_FUNCTION_ARGS)
     890             : {
     891           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
     892             : }
     893             : 
     894             : /*
     895             :  *      likejoinsel         - Join selectivity of LIKE pattern match.
     896             :  */
     897             : Datum
     898           0 : likejoinsel(PG_FUNCTION_ARGS)
     899             : {
     900           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
     901             : }
     902             : 
     903             : /*
     904             :  *      prefixjoinsel           - Join selectivity of prefix operator
     905             :  */
     906             : Datum
     907           0 : prefixjoinsel(PG_FUNCTION_ARGS)
     908             : {
     909           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
     910             : }
     911             : 
     912             : /*
     913             :  *      iclikejoinsel           - Join selectivity of ILIKE pattern match.
     914             :  */
     915             : Datum
     916           0 : iclikejoinsel(PG_FUNCTION_ARGS)
     917             : {
     918           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
     919             : }
     920             : 
     921             : /*
     922             :  *      regexnejoinsel  - Join selectivity of regex non-match.
     923             :  */
     924             : Datum
     925           0 : regexnejoinsel(PG_FUNCTION_ARGS)
     926             : {
     927           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
     928             : }
     929             : 
     930             : /*
     931             :  *      icregexnejoinsel    - Join selectivity of case-insensitive regex non-match.
     932             :  */
     933             : Datum
     934           0 : icregexnejoinsel(PG_FUNCTION_ARGS)
     935             : {
     936           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
     937             : }
     938             : 
     939             : /*
     940             :  *      nlikejoinsel        - Join selectivity of LIKE pattern non-match.
     941             :  */
     942             : Datum
     943           0 : nlikejoinsel(PG_FUNCTION_ARGS)
     944             : {
     945           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
     946             : }
     947             : 
     948             : /*
     949             :  *      icnlikejoinsel      - Join selectivity of ILIKE pattern non-match.
     950             :  */
     951             : Datum
     952           0 : icnlikejoinsel(PG_FUNCTION_ARGS)
     953             : {
     954           0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
     955             : }
     956             : 
     957             : 
     958             : /*-------------------------------------------------------------------------
     959             :  *
     960             :  * Pattern analysis functions
     961             :  *
     962             :  * These routines support analysis of LIKE and regular-expression patterns
     963             :  * by the planner/optimizer.  It's important that they agree with the
     964             :  * regular-expression code in backend/regex/ and the LIKE code in
     965             :  * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
     966             :  * must be conservative: if we report a string longer than the true fixed
     967             :  * prefix, the query may produce actually wrong answers, rather than just
     968             :  * getting a bad selectivity estimate!
     969             :  *
     970             :  *-------------------------------------------------------------------------
     971             :  */
     972             : 
     973             : /*
     974             :  * Extract the fixed prefix, if any, for a pattern.
     975             :  *
     976             :  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
     977             :  *  or to NULL if no fixed prefix exists for the pattern.
     978             :  * If rest_selec is not NULL, *rest_selec is set to an estimate of the
     979             :  *  selectivity of the remainder of the pattern (without any fixed prefix).
     980             :  * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
     981             :  *
     982             :  * The return value distinguishes no fixed prefix, a partial prefix,
     983             :  * or an exact-match-only pattern.
     984             :  */
     985             : 
     986             : static Pattern_Prefix_Status
     987        2736 : like_fixed_prefix(Const *patt_const, Const **prefix_const,
     988             :                   Selectivity *rest_selec)
     989             : {
     990             :     char       *match;
     991             :     char       *patt;
     992             :     int         pattlen;
     993        2736 :     Oid         typeid = patt_const->consttype;
     994             :     int         pos,
     995             :                 match_pos;
     996             : 
     997             :     /* the right-hand const is type text or bytea */
     998             :     Assert(typeid == BYTEAOID || typeid == TEXTOID);
     999             : 
    1000        2736 :     if (typeid != BYTEAOID)
    1001             :     {
    1002        2724 :         patt = TextDatumGetCString(patt_const->constvalue);
    1003        2724 :         pattlen = strlen(patt);
    1004             :     }
    1005             :     else
    1006             :     {
    1007          12 :         bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
    1008             : 
    1009          12 :         pattlen = VARSIZE_ANY_EXHDR(bstr);
    1010          12 :         patt = (char *) palloc(pattlen);
    1011          12 :         memcpy(patt, VARDATA_ANY(bstr), pattlen);
    1012             :         Assert(bstr == DatumGetPointer(patt_const->constvalue));
    1013             :     }
    1014             : 
    1015        2736 :     match = palloc(pattlen + 1);
    1016        2736 :     match_pos = 0;
    1017       15628 :     for (pos = 0; pos < pattlen; pos++)
    1018             :     {
    1019             :         /* % and _ are wildcard characters in LIKE */
    1020       15522 :         if (patt[pos] == '%' ||
    1021       13978 :             patt[pos] == '_')
    1022             :             break;
    1023             : 
    1024             :         /* Backslash escapes the next character */
    1025       12892 :         if (patt[pos] == '\\')
    1026             :         {
    1027         274 :             pos++;
    1028         274 :             if (pos >= pattlen)
    1029           0 :                 break;
    1030             :         }
    1031             : 
    1032       12892 :         match[match_pos++] = patt[pos];
    1033             :     }
    1034             : 
    1035        2736 :     match[match_pos] = '\0';
    1036             : 
    1037        2736 :     if (typeid != BYTEAOID)
    1038        2724 :         *prefix_const = string_to_const(match, typeid);
    1039             :     else
    1040          12 :         *prefix_const = string_to_bytea_const(match, match_pos);
    1041             : 
    1042        2736 :     if (rest_selec != NULL)
    1043        1708 :         *rest_selec = like_selectivity(&patt[pos], pattlen - pos, false);
    1044             : 
    1045        2736 :     pfree(patt);
    1046        2736 :     pfree(match);
    1047             : 
    1048             :     /* in LIKE, an empty pattern is an exact match! */
    1049        2736 :     if (pos == pattlen)
    1050         106 :         return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
    1051             : 
    1052        2630 :     if (match_pos > 0)
    1053        2300 :         return Pattern_Prefix_Partial;
    1054             : 
    1055         330 :     return Pattern_Prefix_None;
    1056             : }
    1057             : 
    1058             : /*
    1059             :  * Case-insensitive variant of like_fixed_prefix().  Multibyte and
    1060             :  * locale-aware for detecting cased characters.
    1061             :  */
    1062             : static Pattern_Prefix_Status
    1063         238 : like_fixed_prefix_ci(Const *patt_const, Oid collation, Const **prefix_const,
    1064             :                      Selectivity *rest_selec)
    1065             : {
    1066         238 :     text       *val = DatumGetTextPP(patt_const->constvalue);
    1067         238 :     Oid         typeid = patt_const->consttype;
    1068         238 :     int         nbytes = VARSIZE_ANY_EXHDR(val);
    1069             :     int         wpos;
    1070             :     pg_wchar   *wpatt;
    1071             :     int         wpattlen;
    1072             :     pg_wchar   *wmatch;
    1073         238 :     int         wmatch_pos = 0;
    1074             :     char       *match;
    1075             :     int         match_mblen;
    1076         238 :     pg_locale_t locale = 0;
    1077             : 
    1078             :     /* the right-hand const is type text or bytea */
    1079             :     Assert(typeid == BYTEAOID || typeid == TEXTOID);
    1080             : 
    1081         238 :     if (typeid == BYTEAOID)
    1082           0 :         ereport(ERROR,
    1083             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1084             :                  errmsg("case insensitive matching not supported on type bytea")));
    1085             : 
    1086         238 :     if (!OidIsValid(collation))
    1087             :     {
    1088             :         /*
    1089             :          * This typically means that the parser could not resolve a conflict
    1090             :          * of implicit collations, so report it that way.
    1091             :          */
    1092           0 :         ereport(ERROR,
    1093             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1094             :                  errmsg("could not determine which collation to use for ILIKE"),
    1095             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
    1096             :     }
    1097             : 
    1098         238 :     locale = pg_newlocale_from_collation(collation);
    1099             : 
    1100         238 :     wpatt = palloc((nbytes + 1) * sizeof(pg_wchar));
    1101         238 :     wpattlen = pg_mb2wchar_with_len(VARDATA_ANY(val), wpatt, nbytes);
    1102             : 
    1103         238 :     wmatch = palloc((nbytes + 1) * sizeof(pg_wchar));
    1104         334 :     for (wpos = 0; wpos < wpattlen; wpos++)
    1105             :     {
    1106             :         /* % and _ are wildcard characters in LIKE */
    1107         334 :         if (wpatt[wpos] == '%' ||
    1108         270 :             wpatt[wpos] == '_')
    1109             :             break;
    1110             : 
    1111             :         /* Backslash escapes the next character */
    1112         270 :         if (wpatt[wpos] == '\\')
    1113             :         {
    1114           0 :             wpos++;
    1115           0 :             if (wpos >= wpattlen)
    1116           0 :                 break;
    1117             :         }
    1118             : 
    1119             :         /*
    1120             :          * For ILIKE, stop if it's a case-varying character (it's sort of a
    1121             :          * wildcard).
    1122             :          */
    1123         270 :         if (pg_iswcased(wpatt[wpos], locale))
    1124         174 :             break;
    1125             : 
    1126          96 :         wmatch[wmatch_pos++] = wpatt[wpos];
    1127             :     }
    1128             : 
    1129         238 :     wmatch[wmatch_pos] = '\0';
    1130             : 
    1131         238 :     match = palloc(pg_database_encoding_max_length() * wmatch_pos + 1);
    1132         238 :     match_mblen = pg_wchar2mb_with_len(wmatch, match, wmatch_pos);
    1133         238 :     match[match_mblen] = '\0';
    1134         238 :     pfree(wmatch);
    1135             : 
    1136         238 :     *prefix_const = string_to_const(match, TEXTOID);
    1137         238 :     pfree(match);
    1138             : 
    1139         238 :     if (rest_selec != NULL)
    1140             :     {
    1141         120 :         int         wrestlen = wpattlen - wmatch_pos;
    1142             :         char       *rest;
    1143             :         int         rest_mblen;
    1144             : 
    1145         120 :         rest = palloc(pg_database_encoding_max_length() * wrestlen + 1);
    1146         120 :         rest_mblen = pg_wchar2mb_with_len(&wpatt[wmatch_pos], rest, wrestlen);
    1147             : 
    1148         120 :         *rest_selec = like_selectivity(rest, rest_mblen, true);
    1149         120 :         pfree(rest);
    1150             :     }
    1151             : 
    1152         238 :     pfree(wpatt);
    1153             : 
    1154             :     /* in LIKE, an empty pattern is an exact match! */
    1155         238 :     if (wpos == wpattlen)
    1156           0 :         return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
    1157             : 
    1158         238 :     if (wmatch_pos > 0)
    1159          48 :         return Pattern_Prefix_Partial;
    1160             : 
    1161         190 :     return Pattern_Prefix_None;
    1162             : }
    1163             : 
    1164             : static Pattern_Prefix_Status
    1165       16864 : regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
    1166             :                    Const **prefix_const, Selectivity *rest_selec)
    1167             : {
    1168       16864 :     Oid         typeid = patt_const->consttype;
    1169             :     char       *prefix;
    1170             :     bool        exact;
    1171             : 
    1172             :     /*
    1173             :      * Should be unnecessary, there are no bytea regex operators defined. As
    1174             :      * such, it should be noted that the rest of this function has *not* been
    1175             :      * made safe for binary (possibly NULL containing) strings.
    1176             :      */
    1177       16864 :     if (typeid == BYTEAOID)
    1178           0 :         ereport(ERROR,
    1179             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1180             :                  errmsg("regular-expression matching not supported on type bytea")));
    1181             : 
    1182             :     /* Use the regexp machinery to extract the prefix, if any */
    1183       16864 :     prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
    1184             :                                  case_insensitive, collation,
    1185             :                                  &exact);
    1186             : 
    1187       16840 :     if (prefix == NULL)
    1188             :     {
    1189         762 :         *prefix_const = NULL;
    1190             : 
    1191         762 :         if (rest_selec != NULL)
    1192             :         {
    1193         626 :             char       *patt = TextDatumGetCString(patt_const->constvalue);
    1194             : 
    1195         626 :             *rest_selec = regex_selectivity(patt, strlen(patt),
    1196             :                                             case_insensitive,
    1197             :                                             0);
    1198         626 :             pfree(patt);
    1199             :         }
    1200             : 
    1201         762 :         return Pattern_Prefix_None;
    1202             :     }
    1203             : 
    1204       16078 :     *prefix_const = string_to_const(prefix, typeid);
    1205             : 
    1206       16078 :     if (rest_selec != NULL)
    1207             :     {
    1208        9290 :         if (exact)
    1209             :         {
    1210             :             /* Exact match, so there's no additional selectivity */
    1211        6602 :             *rest_selec = 1.0;
    1212             :         }
    1213             :         else
    1214             :         {
    1215        2688 :             char       *patt = TextDatumGetCString(patt_const->constvalue);
    1216             : 
    1217        5376 :             *rest_selec = regex_selectivity(patt, strlen(patt),
    1218             :                                             case_insensitive,
    1219        2688 :                                             strlen(prefix));
    1220        2688 :             pfree(patt);
    1221             :         }
    1222             :     }
    1223             : 
    1224       16078 :     pfree(prefix);
    1225             : 
    1226       16078 :     if (exact)
    1227       13018 :         return Pattern_Prefix_Exact;    /* pattern specifies exact match */
    1228             :     else
    1229        3060 :         return Pattern_Prefix_Partial;
    1230             : }
    1231             : 
    1232             : static Pattern_Prefix_Status
    1233       19940 : pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
    1234             :                      Const **prefix, Selectivity *rest_selec)
    1235             : {
    1236             :     Pattern_Prefix_Status result;
    1237             : 
    1238       19940 :     switch (ptype)
    1239             :     {
    1240        2736 :         case Pattern_Type_Like:
    1241        2736 :             result = like_fixed_prefix(patt, prefix, rest_selec);
    1242        2736 :             break;
    1243         238 :         case Pattern_Type_Like_IC:
    1244         238 :             result = like_fixed_prefix_ci(patt, collation, prefix,
    1245             :                                           rest_selec);
    1246         238 :             break;
    1247       16802 :         case Pattern_Type_Regex:
    1248       16802 :             result = regex_fixed_prefix(patt, false, collation,
    1249             :                                         prefix, rest_selec);
    1250       16778 :             break;
    1251          62 :         case Pattern_Type_Regex_IC:
    1252          62 :             result = regex_fixed_prefix(patt, true, collation,
    1253             :                                         prefix, rest_selec);
    1254          62 :             break;
    1255         102 :         case Pattern_Type_Prefix:
    1256             :             /* Prefix type work is trivial.  */
    1257         102 :             result = Pattern_Prefix_Partial;
    1258         102 :             *prefix = makeConst(patt->consttype,
    1259             :                                 patt->consttypmod,
    1260             :                                 patt->constcollid,
    1261             :                                 patt->constlen,
    1262             :                                 datumCopy(patt->constvalue,
    1263         102 :                                           patt->constbyval,
    1264             :                                           patt->constlen),
    1265         102 :                                 patt->constisnull,
    1266         102 :                                 patt->constbyval);
    1267         102 :             if (rest_selec != NULL)
    1268          78 :                 *rest_selec = 1.0;  /* all */
    1269         102 :             break;
    1270           0 :         default:
    1271           0 :             elog(ERROR, "unrecognized ptype: %d", (int) ptype);
    1272             :             result = Pattern_Prefix_None;   /* keep compiler quiet */
    1273             :             break;
    1274             :     }
    1275       19916 :     return result;
    1276             : }
    1277             : 
    1278             : /*
    1279             :  * Estimate the selectivity of a fixed prefix for a pattern match.
    1280             :  *
    1281             :  * A fixed prefix "foo" is estimated as the selectivity of the expression
    1282             :  * "variable >= 'foo' AND variable < 'fop'".
    1283             :  *
    1284             :  * The selectivity estimate is with respect to the portion of the column
    1285             :  * population represented by the histogram --- the caller must fold this
    1286             :  * together with info about MCVs and NULLs.
    1287             :  *
    1288             :  * We use the given comparison operators and collation to do the estimation.
    1289             :  * The given variable and Const must be of the associated datatype(s).
    1290             :  *
    1291             :  * XXX Note: we make use of the upper bound to estimate operator selectivity
    1292             :  * even if the locale is such that we cannot rely on the upper-bound string.
    1293             :  * The selectivity only needs to be approximately right anyway, so it seems
    1294             :  * more useful to use the upper-bound code than not.
    1295             :  */
    1296             : static Selectivity
    1297        3006 : prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
    1298             :                    Oid eqopr, Oid ltopr, Oid geopr,
    1299             :                    Oid collation,
    1300             :                    Const *prefixcon)
    1301             : {
    1302             :     Selectivity prefixsel;
    1303             :     FmgrInfo    opproc;
    1304             :     Const      *greaterstrcon;
    1305             :     Selectivity eq_sel;
    1306             : 
    1307             :     /* Estimate the selectivity of "x >= prefix" */
    1308        3006 :     fmgr_info(get_opcode(geopr), &opproc);
    1309             : 
    1310        3006 :     prefixsel = ineq_histogram_selectivity(root, vardata,
    1311             :                                            geopr, &opproc, true, true,
    1312             :                                            collation,
    1313             :                                            prefixcon->constvalue,
    1314             :                                            prefixcon->consttype);
    1315             : 
    1316        3006 :     if (prefixsel < 0.0)
    1317             :     {
    1318             :         /* No histogram is present ... return a suitable default estimate */
    1319         702 :         return DEFAULT_MATCH_SEL;
    1320             :     }
    1321             : 
    1322             :     /*
    1323             :      * If we can create a string larger than the prefix, say "x < greaterstr".
    1324             :      */
    1325        2304 :     fmgr_info(get_opcode(ltopr), &opproc);
    1326        2304 :     greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
    1327        2304 :     if (greaterstrcon)
    1328             :     {
    1329             :         Selectivity topsel;
    1330             : 
    1331        2304 :         topsel = ineq_histogram_selectivity(root, vardata,
    1332             :                                             ltopr, &opproc, false, false,
    1333             :                                             collation,
    1334             :                                             greaterstrcon->constvalue,
    1335             :                                             greaterstrcon->consttype);
    1336             : 
    1337             :         /* ineq_histogram_selectivity worked before, it shouldn't fail now */
    1338             :         Assert(topsel >= 0.0);
    1339             : 
    1340             :         /*
    1341             :          * Merge the two selectivities in the same way as for a range query
    1342             :          * (see clauselist_selectivity()).  Note that we don't need to worry
    1343             :          * about double-exclusion of nulls, since ineq_histogram_selectivity
    1344             :          * doesn't count those anyway.
    1345             :          */
    1346        2304 :         prefixsel = topsel + prefixsel - 1.0;
    1347             :     }
    1348             : 
    1349             :     /*
    1350             :      * If the prefix is long then the two bounding values might be too close
    1351             :      * together for the histogram to distinguish them usefully, resulting in a
    1352             :      * zero estimate (plus or minus roundoff error). To avoid returning a
    1353             :      * ridiculously small estimate, compute the estimated selectivity for
    1354             :      * "variable = 'foo'", and clamp to that. (Obviously, the resultant
    1355             :      * estimate should be at least that.)
    1356             :      *
    1357             :      * We apply this even if we couldn't make a greater string.  That case
    1358             :      * suggests that the prefix is near the maximum possible, and thus
    1359             :      * probably off the end of the histogram, and thus we probably got a very
    1360             :      * small estimate from the >= condition; so we still need to clamp.
    1361             :      */
    1362        2304 :     eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
    1363             :                           false, true, false);
    1364             : 
    1365        2304 :     prefixsel = Max(prefixsel, eq_sel);
    1366             : 
    1367        2304 :     return prefixsel;
    1368             : }
    1369             : 
    1370             : 
    1371             : /*
    1372             :  * Estimate the selectivity of a pattern of the specified type.
    1373             :  * Note that any fixed prefix of the pattern will have been removed already,
    1374             :  * so actually we may be looking at just a fragment of the pattern.
    1375             :  *
    1376             :  * For now, we use a very simplistic approach: fixed characters reduce the
    1377             :  * selectivity a good deal, character ranges reduce it a little,
    1378             :  * wildcards (such as % for LIKE or .* for regex) increase it.
    1379             :  */
    1380             : 
    1381             : #define FIXED_CHAR_SEL  0.20    /* about 1/5 */
    1382             : #define CHAR_RANGE_SEL  0.25
    1383             : #define ANY_CHAR_SEL    0.9     /* not 1, since it won't match end-of-string */
    1384             : #define FULL_WILDCARD_SEL 5.0
    1385             : #define PARTIAL_WILDCARD_SEL 2.0
    1386             : 
    1387             : static Selectivity
    1388        1828 : like_selectivity(const char *patt, int pattlen, bool case_insensitive)
    1389             : {
    1390        1828 :     Selectivity sel = 1.0;
    1391             :     int         pos;
    1392             : 
    1393             :     /* Skip any leading wildcard; it's already factored into initial sel */
    1394        3530 :     for (pos = 0; pos < pattlen; pos++)
    1395             :     {
    1396        2588 :         if (patt[pos] != '%' && patt[pos] != '_')
    1397         886 :             break;
    1398             :     }
    1399             : 
    1400        7294 :     for (; pos < pattlen; pos++)
    1401             :     {
    1402             :         /* % and _ are wildcard characters in LIKE */
    1403        5466 :         if (patt[pos] == '%')
    1404         782 :             sel *= FULL_WILDCARD_SEL;
    1405        4684 :         else if (patt[pos] == '_')
    1406         192 :             sel *= ANY_CHAR_SEL;
    1407        4492 :         else if (patt[pos] == '\\')
    1408             :         {
    1409             :             /* Backslash quotes the next character */
    1410          40 :             pos++;
    1411          40 :             if (pos >= pattlen)
    1412           0 :                 break;
    1413          40 :             sel *= FIXED_CHAR_SEL;
    1414             :         }
    1415             :         else
    1416        4452 :             sel *= FIXED_CHAR_SEL;
    1417             :     }
    1418             :     /* Could get sel > 1 if multiple wildcards */
    1419        1828 :     if (sel > 1.0)
    1420           0 :         sel = 1.0;
    1421        1828 :     return sel;
    1422             : }
    1423             : 
    1424             : static Selectivity
    1425        3746 : regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
    1426             : {
    1427        3746 :     Selectivity sel = 1.0;
    1428        3746 :     int         paren_depth = 0;
    1429        3746 :     int         paren_pos = 0;  /* dummy init to keep compiler quiet */
    1430             :     int         pos;
    1431             : 
    1432             :     /* since this function recurses, it could be driven to stack overflow */
    1433        3746 :     check_stack_depth();
    1434             : 
    1435       40706 :     for (pos = 0; pos < pattlen; pos++)
    1436             :     {
    1437       36984 :         if (patt[pos] == '(')
    1438             :         {
    1439         438 :             if (paren_depth == 0)
    1440         414 :                 paren_pos = pos;    /* remember start of parenthesized item */
    1441         438 :             paren_depth++;
    1442             :         }
    1443       36546 :         else if (patt[pos] == ')' && paren_depth > 0)
    1444             :         {
    1445         432 :             paren_depth--;
    1446         432 :             if (paren_depth == 0)
    1447         408 :                 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
    1448         408 :                                              pos - (paren_pos + 1),
    1449             :                                              case_insensitive);
    1450             :         }
    1451       36114 :         else if (patt[pos] == '|' && paren_depth == 0)
    1452             :         {
    1453             :             /*
    1454             :              * If unquoted | is present at paren level 0 in pattern, we have
    1455             :              * multiple alternatives; sum their probabilities.
    1456             :              */
    1457          48 :             sel += regex_selectivity_sub(patt + (pos + 1),
    1458          24 :                                          pattlen - (pos + 1),
    1459             :                                          case_insensitive);
    1460          24 :             break;              /* rest of pattern is now processed */
    1461             :         }
    1462       36090 :         else if (patt[pos] == '[')
    1463             :         {
    1464         252 :             bool        negclass = false;
    1465             : 
    1466         252 :             if (patt[++pos] == '^')
    1467             :             {
    1468          60 :                 negclass = true;
    1469          60 :                 pos++;
    1470             :             }
    1471         252 :             if (patt[pos] == ']')   /* ']' at start of class is not special */
    1472          24 :                 pos++;
    1473        1316 :             while (pos < pattlen && patt[pos] != ']')
    1474        1064 :                 pos++;
    1475         252 :             if (paren_depth == 0)
    1476         162 :                 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
    1477             :         }
    1478       35838 :         else if (patt[pos] == '.')
    1479             :         {
    1480         926 :             if (paren_depth == 0)
    1481         526 :                 sel *= ANY_CHAR_SEL;
    1482             :         }
    1483       34912 :         else if (patt[pos] == '*' ||
    1484       34100 :                  patt[pos] == '?' ||
    1485       33934 :                  patt[pos] == '+')
    1486             :         {
    1487             :             /* Ought to be smarter about quantifiers... */
    1488         992 :             if (paren_depth == 0)
    1489         534 :                 sel *= PARTIAL_WILDCARD_SEL;
    1490             :         }
    1491       33920 :         else if (patt[pos] == '{')
    1492             :         {
    1493         264 :             while (pos < pattlen && patt[pos] != '}')
    1494         188 :                 pos++;
    1495          76 :             if (paren_depth == 0)
    1496          64 :                 sel *= PARTIAL_WILDCARD_SEL;
    1497             :         }
    1498       33844 :         else if (patt[pos] == '\\')
    1499             :         {
    1500             :             /* backslash quotes the next character */
    1501         296 :             pos++;
    1502         296 :             if (pos >= pattlen)
    1503           0 :                 break;
    1504         296 :             if (paren_depth == 0)
    1505         152 :                 sel *= FIXED_CHAR_SEL;
    1506             :         }
    1507             :         else
    1508             :         {
    1509       33548 :             if (paren_depth == 0)
    1510       30772 :                 sel *= FIXED_CHAR_SEL;
    1511             :         }
    1512             :     }
    1513             :     /* Could get sel > 1 if multiple wildcards */
    1514        3746 :     if (sel > 1.0)
    1515          26 :         sel = 1.0;
    1516        3746 :     return sel;
    1517             : }
    1518             : 
    1519             : static Selectivity
    1520        3314 : regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
    1521             :                   int fixed_prefix_len)
    1522             : {
    1523             :     Selectivity sel;
    1524             : 
    1525             :     /* If patt doesn't end with $, consider it to have a trailing wildcard */
    1526        3314 :     if (pattlen > 0 && patt[pattlen - 1] == '$' &&
    1527         414 :         (pattlen == 1 || patt[pattlen - 2] != '\\'))
    1528             :     {
    1529             :         /* has trailing $ */
    1530         414 :         sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
    1531             :     }
    1532             :     else
    1533             :     {
    1534             :         /* no trailing $ */
    1535        2900 :         sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
    1536        2900 :         sel *= FULL_WILDCARD_SEL;
    1537             :     }
    1538             : 
    1539             :     /*
    1540             :      * If there's a fixed prefix, discount its selectivity.  We have to be
    1541             :      * careful here since a very long prefix could result in pow's result
    1542             :      * underflowing to zero (in which case "sel" probably has as well).
    1543             :      */
    1544        3314 :     if (fixed_prefix_len > 0)
    1545             :     {
    1546        2688 :         double      prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
    1547             : 
    1548        2688 :         if (prefixsel > 0.0)
    1549        2688 :             sel /= prefixsel;
    1550             :     }
    1551             : 
    1552             :     /* Make sure result stays in range */
    1553        3314 :     CLAMP_PROBABILITY(sel);
    1554        3314 :     return sel;
    1555             : }
    1556             : 
    1557             : 
    1558             : /*
    1559             :  * For bytea, the increment function need only increment the current byte
    1560             :  * (there are no multibyte characters to worry about).
    1561             :  */
    1562             : static bool
    1563           0 : byte_increment(unsigned char *ptr, int len)
    1564             : {
    1565           0 :     if (*ptr >= 255)
    1566           0 :         return false;
    1567           0 :     (*ptr)++;
    1568           0 :     return true;
    1569             : }
    1570             : 
    1571             : /*
    1572             :  * Try to generate a string greater than the given string or any
    1573             :  * string it is a prefix of.  If successful, return a palloc'd string
    1574             :  * in the form of a Const node; else return NULL.
    1575             :  *
    1576             :  * The caller must provide the appropriate "less than" comparison function
    1577             :  * for testing the strings, along with the collation to use.
    1578             :  *
    1579             :  * The key requirement here is that given a prefix string, say "foo",
    1580             :  * we must be able to generate another string "fop" that is greater than
    1581             :  * all strings "foobar" starting with "foo".  We can test that we have
    1582             :  * generated a string greater than the prefix string, but in non-C collations
    1583             :  * that is not a bulletproof guarantee that an extension of the string might
    1584             :  * not sort after it; an example is that "foo " is less than "foo!", but it
    1585             :  * is not clear that a "dictionary" sort ordering will consider "foo!" less
    1586             :  * than "foo bar".  CAUTION: Therefore, this function should be used only for
    1587             :  * estimation purposes when working in a non-C collation.
    1588             :  *
    1589             :  * To try to catch most cases where an extended string might otherwise sort
    1590             :  * before the result value, we determine which of the strings "Z", "z", "y",
    1591             :  * and "9" is seen as largest by the collation, and append that to the given
    1592             :  * prefix before trying to find a string that compares as larger.
    1593             :  *
    1594             :  * To search for a greater string, we repeatedly "increment" the rightmost
    1595             :  * character, using an encoding-specific character incrementer function.
    1596             :  * When it's no longer possible to increment the last character, we truncate
    1597             :  * off that character and start incrementing the next-to-rightmost.
    1598             :  * For example, if "z" were the last character in the sort order, then we
    1599             :  * could produce "foo" as a string greater than "fonz".
    1600             :  *
    1601             :  * This could be rather slow in the worst case, but in most cases we
    1602             :  * won't have to try more than one or two strings before succeeding.
    1603             :  *
    1604             :  * Note that it's important for the character incrementer not to be too anal
    1605             :  * about producing every possible character code, since in some cases the only
    1606             :  * way to get a larger string is to increment a previous character position.
    1607             :  * So we don't want to spend too much time trying every possible character
    1608             :  * code at the last position.  A good rule of thumb is to be sure that we
    1609             :  * don't try more than 256*K values for a K-byte character (and definitely
    1610             :  * not 256^K, which is what an exhaustive search would approach).
    1611             :  */
    1612             : static Const *
    1613        3584 : make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
    1614             : {
    1615        3584 :     Oid         datatype = str_const->consttype;
    1616             :     char       *workstr;
    1617             :     int         len;
    1618             :     Datum       cmpstr;
    1619        3584 :     char       *cmptxt = NULL;
    1620             :     mbcharacter_incrementer charinc;
    1621             : 
    1622             :     /*
    1623             :      * Get a modifiable copy of the prefix string in C-string format, and set
    1624             :      * up the string we will compare to as a Datum.  In C locale this can just
    1625             :      * be the given prefix string, otherwise we need to add a suffix.  Type
    1626             :      * BYTEA sorts bytewise so it never needs a suffix either.
    1627             :      */
    1628        3584 :     if (datatype == BYTEAOID)
    1629             :     {
    1630           0 :         bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
    1631             : 
    1632           0 :         len = VARSIZE_ANY_EXHDR(bstr);
    1633           0 :         workstr = (char *) palloc(len);
    1634           0 :         memcpy(workstr, VARDATA_ANY(bstr), len);
    1635             :         Assert(bstr == DatumGetPointer(str_const->constvalue));
    1636           0 :         cmpstr = str_const->constvalue;
    1637             :     }
    1638             :     else
    1639             :     {
    1640        3584 :         if (datatype == NAMEOID)
    1641           0 :             workstr = DatumGetCString(DirectFunctionCall1(nameout,
    1642             :                                                           str_const->constvalue));
    1643             :         else
    1644        3584 :             workstr = TextDatumGetCString(str_const->constvalue);
    1645        3584 :         len = strlen(workstr);
    1646        3584 :         if (len == 0 || pg_newlocale_from_collation(collation)->collate_is_c)
    1647        3558 :             cmpstr = str_const->constvalue;
    1648             :         else
    1649             :         {
    1650             :             /* If first time through, determine the suffix to use */
    1651             :             static char suffixchar = 0;
    1652             :             static Oid  suffixcollation = 0;
    1653             : 
    1654          26 :             if (!suffixchar || suffixcollation != collation)
    1655             :             {
    1656             :                 char       *best;
    1657             : 
    1658           6 :                 best = "Z";
    1659           6 :                 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
    1660           0 :                     best = "z";
    1661           6 :                 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
    1662           0 :                     best = "y";
    1663           6 :                 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
    1664           0 :                     best = "9";
    1665           6 :                 suffixchar = *best;
    1666           6 :                 suffixcollation = collation;
    1667             :             }
    1668             : 
    1669             :             /* And build the string to compare to */
    1670          26 :             if (datatype == NAMEOID)
    1671             :             {
    1672           0 :                 cmptxt = palloc(len + 2);
    1673           0 :                 memcpy(cmptxt, workstr, len);
    1674           0 :                 cmptxt[len] = suffixchar;
    1675           0 :                 cmptxt[len + 1] = '\0';
    1676           0 :                 cmpstr = PointerGetDatum(cmptxt);
    1677             :             }
    1678             :             else
    1679             :             {
    1680          26 :                 cmptxt = palloc(VARHDRSZ + len + 1);
    1681          26 :                 SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
    1682          26 :                 memcpy(VARDATA(cmptxt), workstr, len);
    1683          26 :                 *(VARDATA(cmptxt) + len) = suffixchar;
    1684          26 :                 cmpstr = PointerGetDatum(cmptxt);
    1685             :             }
    1686             :         }
    1687             :     }
    1688             : 
    1689             :     /* Select appropriate character-incrementer function */
    1690        3584 :     if (datatype == BYTEAOID)
    1691           0 :         charinc = byte_increment;
    1692             :     else
    1693        3584 :         charinc = pg_database_encoding_character_incrementer();
    1694             : 
    1695             :     /* And search ... */
    1696        3584 :     while (len > 0)
    1697             :     {
    1698             :         int         charlen;
    1699             :         unsigned char *lastchar;
    1700             : 
    1701             :         /* Identify the last character --- for bytea, just the last byte */
    1702        3584 :         if (datatype == BYTEAOID)
    1703           0 :             charlen = 1;
    1704             :         else
    1705        3584 :             charlen = len - pg_mbcliplen(workstr, len, len - 1);
    1706        3584 :         lastchar = (unsigned char *) (workstr + len - charlen);
    1707             : 
    1708             :         /*
    1709             :          * Try to generate a larger string by incrementing the last character
    1710             :          * (for BYTEA, we treat each byte as a character).
    1711             :          *
    1712             :          * Note: the incrementer function is expected to return true if it's
    1713             :          * generated a valid-per-the-encoding new character, otherwise false.
    1714             :          * The contents of the character on false return are unspecified.
    1715             :          */
    1716        3584 :         while (charinc(lastchar, charlen))
    1717             :         {
    1718             :             Const      *workstr_const;
    1719             : 
    1720        3584 :             if (datatype == BYTEAOID)
    1721           0 :                 workstr_const = string_to_bytea_const(workstr, len);
    1722             :             else
    1723        3584 :                 workstr_const = string_to_const(workstr, datatype);
    1724             : 
    1725        3584 :             if (DatumGetBool(FunctionCall2Coll(ltproc,
    1726             :                                                collation,
    1727             :                                                cmpstr,
    1728             :                                                workstr_const->constvalue)))
    1729             :             {
    1730             :                 /* Successfully made a string larger than cmpstr */
    1731        3584 :                 if (cmptxt)
    1732          26 :                     pfree(cmptxt);
    1733        3584 :                 pfree(workstr);
    1734        3584 :                 return workstr_const;
    1735             :             }
    1736             : 
    1737             :             /* No good, release unusable value and try again */
    1738           0 :             pfree(DatumGetPointer(workstr_const->constvalue));
    1739           0 :             pfree(workstr_const);
    1740             :         }
    1741             : 
    1742             :         /*
    1743             :          * No luck here, so truncate off the last character and try to
    1744             :          * increment the next one.
    1745             :          */
    1746           0 :         len -= charlen;
    1747           0 :         workstr[len] = '\0';
    1748             :     }
    1749             : 
    1750             :     /* Failed... */
    1751           0 :     if (cmptxt)
    1752           0 :         pfree(cmptxt);
    1753           0 :     pfree(workstr);
    1754             : 
    1755           0 :     return NULL;
    1756             : }
    1757             : 
    1758             : /*
    1759             :  * Generate a Datum of the appropriate type from a C string.
    1760             :  * Note that all of the supported types are pass-by-ref, so the
    1761             :  * returned value should be pfree'd if no longer needed.
    1762             :  */
    1763             : static Datum
    1764       22624 : string_to_datum(const char *str, Oid datatype)
    1765             : {
    1766             :     Assert(str != NULL);
    1767             : 
    1768             :     /*
    1769             :      * We cheat a little by assuming that CStringGetTextDatum() will do for
    1770             :      * bpchar and varchar constants too...
    1771             :      */
    1772       22624 :     if (datatype == NAMEOID)
    1773           0 :         return DirectFunctionCall1(namein, CStringGetDatum(str));
    1774       22624 :     else if (datatype == BYTEAOID)
    1775           0 :         return DirectFunctionCall1(byteain, CStringGetDatum(str));
    1776             :     else
    1777       22624 :         return CStringGetTextDatum(str);
    1778             : }
    1779             : 
    1780             : /*
    1781             :  * Generate a Const node of the appropriate type from a C string.
    1782             :  */
    1783             : static Const *
    1784       22624 : string_to_const(const char *str, Oid datatype)
    1785             : {
    1786       22624 :     Datum       conval = string_to_datum(str, datatype);
    1787             :     Oid         collation;
    1788             :     int         constlen;
    1789             : 
    1790             :     /*
    1791             :      * We only need to support a few datatypes here, so hard-wire properties
    1792             :      * instead of incurring the expense of catalog lookups.
    1793             :      */
    1794       22624 :     switch (datatype)
    1795             :     {
    1796       22624 :         case TEXTOID:
    1797             :         case VARCHAROID:
    1798             :         case BPCHAROID:
    1799       22624 :             collation = DEFAULT_COLLATION_OID;
    1800       22624 :             constlen = -1;
    1801       22624 :             break;
    1802             : 
    1803           0 :         case NAMEOID:
    1804           0 :             collation = C_COLLATION_OID;
    1805           0 :             constlen = NAMEDATALEN;
    1806           0 :             break;
    1807             : 
    1808           0 :         case BYTEAOID:
    1809           0 :             collation = InvalidOid;
    1810           0 :             constlen = -1;
    1811           0 :             break;
    1812             : 
    1813           0 :         default:
    1814           0 :             elog(ERROR, "unexpected datatype in string_to_const: %u",
    1815             :                  datatype);
    1816             :             return NULL;
    1817             :     }
    1818             : 
    1819       22624 :     return makeConst(datatype, -1, collation, constlen,
    1820             :                      conval, false, false);
    1821             : }
    1822             : 
    1823             : /*
    1824             :  * Generate a Const node of bytea type from a binary C string and a length.
    1825             :  */
    1826             : static Const *
    1827          12 : string_to_bytea_const(const char *str, size_t str_len)
    1828             : {
    1829          12 :     bytea      *bstr = palloc(VARHDRSZ + str_len);
    1830             :     Datum       conval;
    1831             : 
    1832          12 :     memcpy(VARDATA(bstr), str, str_len);
    1833          12 :     SET_VARSIZE(bstr, VARHDRSZ + str_len);
    1834          12 :     conval = PointerGetDatum(bstr);
    1835             : 
    1836          12 :     return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
    1837             : }

Generated by: LCOV version 1.16