LCOV - code coverage report
Current view: top level - src/backend/utils/adt - like_support.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 83.8 % 580 486
Test Date: 2026-02-17 17:20:33 Functions: 78.0 % 41 32
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * like_support.c
       4              :  *    Planner support functions for LIKE, regex, and related operators.
       5              :  *
       6              :  * These routines handle special optimization of operators that can be
       7              :  * used with index scans even though they are not known to the executor's
       8              :  * indexscan machinery.  The key idea is that these operators allow us
       9              :  * to derive approximate indexscan qual clauses, such that any tuples
      10              :  * that pass the operator clause itself must also satisfy the simpler
      11              :  * indexscan condition(s).  Then we can use the indexscan machinery
      12              :  * to avoid scanning as much of the table as we'd otherwise have to,
      13              :  * while applying the original operator as a qpqual condition to ensure
      14              :  * we deliver only the tuples we want.  (In essence, we're using a regular
      15              :  * index as if it were a lossy index.)
      16              :  *
      17              :  * An example of what we're doing is
      18              :  *          textfield LIKE 'abc%def'
      19              :  * from which we can generate the indexscanable conditions
      20              :  *          textfield >= 'abc' AND textfield < 'abd'
      21              :  * which allow efficient scanning of an index on textfield.
      22              :  * (In reality, character set and collation issues make the transformation
      23              :  * from LIKE to indexscan limits rather harder than one might think ...
      24              :  * but that's the basic idea.)
      25              :  *
      26              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      27              :  * Portions Copyright (c) 1994, Regents of the University of California
      28              :  *
      29              :  *
      30              :  * IDENTIFICATION
      31              :  *    src/backend/utils/adt/like_support.c
      32              :  *
      33              :  *-------------------------------------------------------------------------
      34              :  */
      35              : #include "postgres.h"
      36              : 
      37              : #include <math.h>
      38              : 
      39              : #include "access/htup_details.h"
      40              : #include "catalog/pg_collation.h"
      41              : #include "catalog/pg_operator.h"
      42              : #include "catalog/pg_opfamily.h"
      43              : #include "catalog/pg_statistic.h"
      44              : #include "catalog/pg_type.h"
      45              : #include "mb/pg_wchar.h"
      46              : #include "miscadmin.h"
      47              : #include "nodes/makefuncs.h"
      48              : #include "nodes/nodeFuncs.h"
      49              : #include "nodes/supportnodes.h"
      50              : #include "utils/builtins.h"
      51              : #include "utils/datum.h"
      52              : #include "utils/lsyscache.h"
      53              : #include "utils/pg_locale.h"
      54              : #include "utils/selfuncs.h"
      55              : #include "utils/varlena.h"
      56              : 
      57              : 
      58              : typedef enum
      59              : {
      60              :     Pattern_Type_Like,
      61              :     Pattern_Type_Like_IC,
      62              :     Pattern_Type_Regex,
      63              :     Pattern_Type_Regex_IC,
      64              :     Pattern_Type_Prefix,
      65              : } Pattern_Type;
      66              : 
      67              : typedef enum
      68              : {
      69              :     Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact,
      70              : } Pattern_Prefix_Status;
      71              : 
      72              : static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
      73              : static List *match_pattern_prefix(Node *leftop,
      74              :                                   Node *rightop,
      75              :                                   Pattern_Type ptype,
      76              :                                   Oid expr_coll,
      77              :                                   Oid opfamily,
      78              :                                   Oid indexcollation);
      79              : static double patternsel_common(PlannerInfo *root,
      80              :                                 Oid oprid,
      81              :                                 Oid opfuncid,
      82              :                                 List *args,
      83              :                                 int varRelid,
      84              :                                 Oid collation,
      85              :                                 Pattern_Type ptype,
      86              :                                 bool negate);
      87              : static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
      88              :                                                   Pattern_Type ptype,
      89              :                                                   Oid collation,
      90              :                                                   Const **prefix,
      91              :                                                   Selectivity *rest_selec);
      92              : static Selectivity prefix_selectivity(PlannerInfo *root,
      93              :                                       VariableStatData *vardata,
      94              :                                       Oid eqopr, Oid ltopr, Oid geopr,
      95              :                                       Oid collation,
      96              :                                       Const *prefixcon);
      97              : static Selectivity like_selectivity(const char *patt, int pattlen,
      98              :                                     bool case_insensitive);
      99              : static Selectivity regex_selectivity(const char *patt, int pattlen,
     100              :                                      bool case_insensitive,
     101              :                                      int fixed_prefix_len);
     102              : static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
     103              :                                   Oid collation);
     104              : static Datum string_to_datum(const char *str, Oid datatype);
     105              : static Const *string_to_const(const char *str, Oid datatype);
     106              : static Const *string_to_bytea_const(const char *str, size_t str_len);
     107              : 
     108              : 
     109              : /*
     110              :  * Planner support functions for LIKE, regex, and related operators
     111              :  */
     112              : Datum
     113         2600 : textlike_support(PG_FUNCTION_ARGS)
     114              : {
     115         2600 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     116              : 
     117         2600 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
     118              : }
     119              : 
     120              : Datum
     121          176 : texticlike_support(PG_FUNCTION_ARGS)
     122              : {
     123          176 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     124              : 
     125          176 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
     126              : }
     127              : 
     128              : Datum
     129        11507 : textregexeq_support(PG_FUNCTION_ARGS)
     130              : {
     131        11507 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     132              : 
     133        11507 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
     134              : }
     135              : 
     136              : Datum
     137           59 : texticregexeq_support(PG_FUNCTION_ARGS)
     138              : {
     139           59 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     140              : 
     141           59 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
     142              : }
     143              : 
     144              : Datum
     145           78 : text_starts_with_support(PG_FUNCTION_ARGS)
     146              : {
     147           78 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     148              : 
     149           78 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix));
     150              : }
     151              : 
     152              : /* Common code for the above */
     153              : static Node *
     154        14420 : like_regex_support(Node *rawreq, Pattern_Type ptype)
     155              : {
     156        14420 :     Node       *ret = NULL;
     157              : 
     158        14420 :     if (IsA(rawreq, SupportRequestSelectivity))
     159              :     {
     160              :         /*
     161              :          * Make a selectivity estimate for a function call, just as we'd do if
     162              :          * the call was via the corresponding operator.
     163              :          */
     164           12 :         SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
     165              :         Selectivity s1;
     166              : 
     167           12 :         if (req->is_join)
     168              :         {
     169              :             /*
     170              :              * For the moment we just punt.  If patternjoinsel is ever
     171              :              * improved to do better, this should be made to call it.
     172              :              */
     173            0 :             s1 = DEFAULT_MATCH_SEL;
     174              :         }
     175              :         else
     176              :         {
     177              :             /* Share code with operator restriction selectivity functions */
     178           12 :             s1 = patternsel_common(req->root,
     179              :                                    InvalidOid,
     180              :                                    req->funcid,
     181              :                                    req->args,
     182              :                                    req->varRelid,
     183              :                                    req->inputcollid,
     184              :                                    ptype,
     185              :                                    false);
     186              :         }
     187           12 :         req->selectivity = s1;
     188           12 :         ret = (Node *) req;
     189              :     }
     190        14408 :     else if (IsA(rawreq, SupportRequestIndexCondition))
     191              :     {
     192              :         /* Try to convert operator/function call to index conditions */
     193         4069 :         SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
     194              : 
     195              :         /*
     196              :          * Currently we have no "reverse" match operators with the pattern on
     197              :          * the left, so we only need consider cases with the indexkey on the
     198              :          * left.
     199              :          */
     200         4069 :         if (req->indexarg != 0)
     201            0 :             return NULL;
     202              : 
     203         4069 :         if (is_opclause(req->node))
     204              :         {
     205         4057 :             OpExpr     *clause = (OpExpr *) req->node;
     206              : 
     207              :             Assert(list_length(clause->args) == 2);
     208              :             ret = (Node *)
     209         4057 :                 match_pattern_prefix((Node *) linitial(clause->args),
     210         4057 :                                      (Node *) lsecond(clause->args),
     211              :                                      ptype,
     212              :                                      clause->inputcollid,
     213              :                                      req->opfamily,
     214              :                                      req->indexcollation);
     215              :         }
     216           12 :         else if (is_funcclause(req->node))   /* be paranoid */
     217              :         {
     218           12 :             FuncExpr   *clause = (FuncExpr *) req->node;
     219              : 
     220              :             Assert(list_length(clause->args) == 2);
     221              :             ret = (Node *)
     222           12 :                 match_pattern_prefix((Node *) linitial(clause->args),
     223           12 :                                      (Node *) lsecond(clause->args),
     224              :                                      ptype,
     225              :                                      clause->inputcollid,
     226              :                                      req->opfamily,
     227              :                                      req->indexcollation);
     228              :         }
     229              :     }
     230              : 
     231        14420 :     return ret;
     232              : }
     233              : 
     234              : /*
     235              :  * match_pattern_prefix
     236              :  *    Try to generate an indexqual for a LIKE or regex operator.
     237              :  */
     238              : static List *
     239         4069 : match_pattern_prefix(Node *leftop,
     240              :                      Node *rightop,
     241              :                      Pattern_Type ptype,
     242              :                      Oid expr_coll,
     243              :                      Oid opfamily,
     244              :                      Oid indexcollation)
     245              : {
     246              :     List       *result;
     247              :     Const      *patt;
     248              :     Const      *prefix;
     249              :     Pattern_Prefix_Status pstatus;
     250              :     Oid         ldatatype;
     251              :     Oid         rdatatype;
     252              :     Oid         eqopr;
     253              :     Oid         ltopr;
     254              :     Oid         geopr;
     255         4069 :     Oid         preopr = InvalidOid;
     256              :     bool        collation_aware;
     257              :     Expr       *expr;
     258              :     FmgrInfo    ltproc;
     259              :     Const      *greaterstr;
     260              : 
     261              :     /*
     262              :      * Can't do anything with a non-constant or NULL pattern argument.
     263              :      *
     264              :      * Note that since we restrict ourselves to cases with a hard constant on
     265              :      * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
     266              :      * about verifying that.
     267              :      */
     268         4069 :     if (!IsA(rightop, Const) ||
     269         4045 :         ((Const *) rightop)->constisnull)
     270           24 :         return NIL;
     271         4045 :     patt = (Const *) rightop;
     272              : 
     273              :     /*
     274              :      * Try to extract a fixed prefix from the pattern.
     275              :      */
     276         4045 :     pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
     277              :                                    &prefix, NULL);
     278              : 
     279              :     /* fail if no fixed prefix */
     280         4045 :     if (pstatus == Pattern_Prefix_None)
     281          149 :         return NIL;
     282              : 
     283              :     /*
     284              :      * Identify the operators we want to use, based on the type of the
     285              :      * left-hand argument.  Usually these are just the type's regular
     286              :      * comparison operators, but if we are considering one of the semi-legacy
     287              :      * "pattern" opclasses, use the "pattern" operators instead.  Those are
     288              :      * not collation-sensitive but always use C collation, as we want.  The
     289              :      * selected operators also determine the needed type of the prefix
     290              :      * constant.
     291              :      */
     292         3896 :     ldatatype = exprType(leftop);
     293         3896 :     switch (ldatatype)
     294              :     {
     295           40 :         case TEXTOID:
     296           40 :             if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
     297              :             {
     298            0 :                 eqopr = TextEqualOperator;
     299            0 :                 ltopr = TextPatternLessOperator;
     300            0 :                 geopr = TextPatternGreaterEqualOperator;
     301            0 :                 collation_aware = false;
     302              :             }
     303           40 :             else if (opfamily == TEXT_SPGIST_FAM_OID)
     304              :             {
     305           12 :                 eqopr = TextEqualOperator;
     306           12 :                 ltopr = TextPatternLessOperator;
     307           12 :                 geopr = TextPatternGreaterEqualOperator;
     308              :                 /* This opfamily has direct support for prefixing */
     309           12 :                 preopr = TextPrefixOperator;
     310           12 :                 collation_aware = false;
     311              :             }
     312              :             else
     313              :             {
     314           28 :                 eqopr = TextEqualOperator;
     315           28 :                 ltopr = TextLessOperator;
     316           28 :                 geopr = TextGreaterEqualOperator;
     317           28 :                 collation_aware = true;
     318              :             }
     319           40 :             rdatatype = TEXTOID;
     320           40 :             break;
     321         3844 :         case NAMEOID:
     322              : 
     323              :             /*
     324              :              * Note that here, we need the RHS type to be text, so that the
     325              :              * comparison value isn't improperly truncated to NAMEDATALEN.
     326              :              */
     327         3844 :             eqopr = NameEqualTextOperator;
     328         3844 :             ltopr = NameLessTextOperator;
     329         3844 :             geopr = NameGreaterEqualTextOperator;
     330         3844 :             collation_aware = true;
     331         3844 :             rdatatype = TEXTOID;
     332         3844 :             break;
     333           12 :         case BPCHAROID:
     334           12 :             if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
     335              :             {
     336            0 :                 eqopr = BpcharEqualOperator;
     337            0 :                 ltopr = BpcharPatternLessOperator;
     338            0 :                 geopr = BpcharPatternGreaterEqualOperator;
     339            0 :                 collation_aware = false;
     340              :             }
     341              :             else
     342              :             {
     343           12 :                 eqopr = BpcharEqualOperator;
     344           12 :                 ltopr = BpcharLessOperator;
     345           12 :                 geopr = BpcharGreaterEqualOperator;
     346           12 :                 collation_aware = true;
     347              :             }
     348           12 :             rdatatype = BPCHAROID;
     349           12 :             break;
     350            0 :         case BYTEAOID:
     351            0 :             eqopr = ByteaEqualOperator;
     352            0 :             ltopr = ByteaLessOperator;
     353            0 :             geopr = ByteaGreaterEqualOperator;
     354            0 :             collation_aware = false;
     355            0 :             rdatatype = BYTEAOID;
     356            0 :             break;
     357            0 :         default:
     358              :             /* Can't get here unless we're attached to the wrong operator */
     359            0 :             return NIL;
     360              :     }
     361              : 
     362              :     /*
     363              :      * If necessary, coerce the prefix constant to the right type.  The given
     364              :      * prefix constant is either text or bytea type, therefore the only case
     365              :      * where we need to do anything is when converting text to bpchar.  Those
     366              :      * two types are binary-compatible, so relabeling the Const node is
     367              :      * sufficient.
     368              :      */
     369         3896 :     if (prefix->consttype != rdatatype)
     370              :     {
     371              :         Assert(prefix->consttype == TEXTOID &&
     372              :                rdatatype == BPCHAROID);
     373           12 :         prefix->consttype = rdatatype;
     374              :     }
     375              : 
     376              :     /*
     377              :      * If we found an exact-match pattern, generate an "=" indexqual.
     378              :      *
     379              :      * Here and below, check to see whether the desired operator is actually
     380              :      * supported by the index opclass, and fail quietly if not.  This allows
     381              :      * us to not be concerned with specific opclasses (except for the legacy
     382              :      * "pattern" cases); any index that correctly implements the operators
     383              :      * will work.
     384              :      */
     385         3896 :     if (pstatus == Pattern_Prefix_Exact)
     386              :     {
     387         3228 :         if (!op_in_opfamily(eqopr, opfamily))
     388            6 :             return NIL;
     389         3222 :         if (indexcollation != expr_coll)
     390         3195 :             return NIL;
     391           27 :         expr = make_opclause(eqopr, BOOLOID, false,
     392              :                              (Expr *) leftop, (Expr *) prefix,
     393              :                              InvalidOid, indexcollation);
     394           27 :         result = list_make1(expr);
     395           27 :         return result;
     396              :     }
     397              : 
     398              :     /*
     399              :      * Anything other than Pattern_Prefix_Exact is not supported if the
     400              :      * expression collation is nondeterministic.  The optimized equality or
     401              :      * prefix tests use bytewise comparisons, which is not consistent with
     402              :      * nondeterministic collations.
     403              :      *
     404              :      * expr_coll is not set for a non-collation-aware data type such as bytea.
     405              :      */
     406          668 :     if (expr_coll && !get_collation_isdeterministic(expr_coll))
     407            3 :         return NIL;
     408              : 
     409              :     /*
     410              :      * Otherwise, we have a nonempty required prefix of the values.  Some
     411              :      * opclasses support prefix checks directly, otherwise we'll try to
     412              :      * generate a range constraint.
     413              :      */
     414          665 :     if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
     415              :     {
     416           12 :         expr = make_opclause(preopr, BOOLOID, false,
     417              :                              (Expr *) leftop, (Expr *) prefix,
     418              :                              InvalidOid, indexcollation);
     419           12 :         result = list_make1(expr);
     420           12 :         return result;
     421              :     }
     422              : 
     423              :     /*
     424              :      * Since we need a range constraint, it's only going to work reliably if
     425              :      * the index is collation-insensitive or has "C" collation.  Note that
     426              :      * here we are looking at the index's collation, not the expression's
     427              :      * collation -- this test is *not* dependent on the LIKE/regex operator's
     428              :      * collation.
     429              :      */
     430          653 :     if (collation_aware &&
     431          653 :         !pg_newlocale_from_collation(indexcollation)->collate_is_c)
     432            7 :         return NIL;
     433              : 
     434              :     /*
     435              :      * We can always say "x >= prefix".
     436              :      */
     437          646 :     if (!op_in_opfamily(geopr, opfamily))
     438            6 :         return NIL;
     439          640 :     expr = make_opclause(geopr, BOOLOID, false,
     440              :                          (Expr *) leftop, (Expr *) prefix,
     441              :                          InvalidOid, indexcollation);
     442          640 :     result = list_make1(expr);
     443              : 
     444              :     /*-------
     445              :      * If we can create a string larger than the prefix, we can say
     446              :      * "x < greaterstr".  NB: we rely on make_greater_string() to generate
     447              :      * a guaranteed-greater string, not just a probably-greater string.
     448              :      * In general this is only guaranteed in C locale, so we'd better be
     449              :      * using a C-locale index collation.
     450              :      *-------
     451              :      */
     452          640 :     if (!op_in_opfamily(ltopr, opfamily))
     453            0 :         return result;
     454          640 :     fmgr_info(get_opcode(ltopr), &ltproc);
     455          640 :     greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
     456          640 :     if (greaterstr)
     457              :     {
     458          640 :         expr = make_opclause(ltopr, BOOLOID, false,
     459              :                              (Expr *) leftop, (Expr *) greaterstr,
     460              :                              InvalidOid, indexcollation);
     461          640 :         result = lappend(result, expr);
     462              :     }
     463              : 
     464          640 :     return result;
     465              : }
     466              : 
     467              : 
     468              : /*
     469              :  * patternsel_common - generic code for pattern-match restriction selectivity.
     470              :  *
     471              :  * To support using this from either the operator or function paths, caller
     472              :  * may pass either operator OID or underlying function OID; we look up the
     473              :  * latter from the former if needed.  (We could just have patternsel() call
     474              :  * get_opcode(), but the work would be wasted if we don't have a need to
     475              :  * compare a fixed prefix to the pg_statistic data.)
     476              :  *
     477              :  * Note that oprid and/or opfuncid should be for the positive-match operator
     478              :  * even when negate is true.
     479              :  */
     480              : static double
     481         6187 : patternsel_common(PlannerInfo *root,
     482              :                   Oid oprid,
     483              :                   Oid opfuncid,
     484              :                   List *args,
     485              :                   int varRelid,
     486              :                   Oid collation,
     487              :                   Pattern_Type ptype,
     488              :                   bool negate)
     489              : {
     490              :     VariableStatData vardata;
     491              :     Node       *other;
     492              :     bool        varonleft;
     493              :     Datum       constval;
     494              :     Oid         consttype;
     495              :     Oid         vartype;
     496              :     Oid         rdatatype;
     497              :     Oid         eqopr;
     498              :     Oid         ltopr;
     499              :     Oid         geopr;
     500              :     Pattern_Prefix_Status pstatus;
     501              :     Const      *patt;
     502         6187 :     Const      *prefix = NULL;
     503         6187 :     Selectivity rest_selec = 0;
     504         6187 :     double      nullfrac = 0.0;
     505              :     double      result;
     506              : 
     507              :     /*
     508              :      * Initialize result to the appropriate default estimate depending on
     509              :      * whether it's a match or not-match operator.
     510              :      */
     511         6187 :     if (negate)
     512         1423 :         result = 1.0 - DEFAULT_MATCH_SEL;
     513              :     else
     514         4764 :         result = DEFAULT_MATCH_SEL;
     515              : 
     516              :     /*
     517              :      * If expression is not variable op constant, then punt and return the
     518              :      * default estimate.
     519              :      */
     520         6187 :     if (!get_restriction_variable(root, args, varRelid,
     521              :                                   &vardata, &other, &varonleft))
     522          222 :         return result;
     523         5965 :     if (!varonleft || !IsA(other, Const))
     524              :     {
     525           25 :         ReleaseVariableStats(vardata);
     526           25 :         return result;
     527              :     }
     528              : 
     529              :     /*
     530              :      * If the constant is NULL, assume operator is strict and return zero, ie,
     531              :      * operator will never return TRUE.  (It's zero even for a negator op.)
     532              :      */
     533         5940 :     if (((Const *) other)->constisnull)
     534              :     {
     535            0 :         ReleaseVariableStats(vardata);
     536            0 :         return 0.0;
     537              :     }
     538         5940 :     constval = ((Const *) other)->constvalue;
     539         5940 :     consttype = ((Const *) other)->consttype;
     540              : 
     541              :     /*
     542              :      * The right-hand const is type text or bytea for all supported operators.
     543              :      * We do not expect to see binary-compatible types here, since
     544              :      * const-folding should have relabeled the const to exactly match the
     545              :      * operator's declared type.
     546              :      */
     547         5940 :     if (consttype != TEXTOID && consttype != BYTEAOID)
     548              :     {
     549           12 :         ReleaseVariableStats(vardata);
     550           12 :         return result;
     551              :     }
     552              : 
     553              :     /*
     554              :      * Similarly, the exposed type of the left-hand side should be one of
     555              :      * those we know.  (Do not look at vardata.atttype, which might be
     556              :      * something binary-compatible but different.)  We can use it to identify
     557              :      * the comparison operators and the required type of the comparison
     558              :      * constant, much as in match_pattern_prefix().
     559              :      */
     560         5928 :     vartype = vardata.vartype;
     561              : 
     562         5928 :     switch (vartype)
     563              :     {
     564          798 :         case TEXTOID:
     565          798 :             eqopr = TextEqualOperator;
     566          798 :             ltopr = TextLessOperator;
     567          798 :             geopr = TextGreaterEqualOperator;
     568          798 :             rdatatype = TEXTOID;
     569          798 :             break;
     570         5083 :         case NAMEOID:
     571              : 
     572              :             /*
     573              :              * Note that here, we need the RHS type to be text, so that the
     574              :              * comparison value isn't improperly truncated to NAMEDATALEN.
     575              :              */
     576         5083 :             eqopr = NameEqualTextOperator;
     577         5083 :             ltopr = NameLessTextOperator;
     578         5083 :             geopr = NameGreaterEqualTextOperator;
     579         5083 :             rdatatype = TEXTOID;
     580         5083 :             break;
     581           42 :         case BPCHAROID:
     582           42 :             eqopr = BpcharEqualOperator;
     583           42 :             ltopr = BpcharLessOperator;
     584           42 :             geopr = BpcharGreaterEqualOperator;
     585           42 :             rdatatype = BPCHAROID;
     586           42 :             break;
     587            3 :         case BYTEAOID:
     588            3 :             eqopr = ByteaEqualOperator;
     589            3 :             ltopr = ByteaLessOperator;
     590            3 :             geopr = ByteaGreaterEqualOperator;
     591            3 :             rdatatype = BYTEAOID;
     592            3 :             break;
     593            2 :         default:
     594              :             /* Can't get here unless we're attached to the wrong operator */
     595            2 :             ReleaseVariableStats(vardata);
     596            2 :             return result;
     597              :     }
     598              : 
     599              :     /*
     600              :      * Grab the nullfrac for use below.
     601              :      */
     602         5926 :     if (HeapTupleIsValid(vardata.statsTuple))
     603              :     {
     604              :         Form_pg_statistic stats;
     605              : 
     606         4876 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     607         4876 :         nullfrac = stats->stanullfrac;
     608              :     }
     609              : 
     610              :     /*
     611              :      * Pull out any fixed prefix implied by the pattern, and estimate the
     612              :      * fractional selectivity of the remainder of the pattern.  Unlike many
     613              :      * other selectivity estimators, we use the pattern operator's actual
     614              :      * collation for this step.  This is not because we expect the collation
     615              :      * to make a big difference in the selectivity estimate (it seldom would),
     616              :      * but because we want to be sure we cache compiled regexps under the
     617              :      * right cache key, so that they can be re-used at runtime.
     618              :      */
     619         5926 :     patt = (Const *) other;
     620         5926 :     pstatus = pattern_fixed_prefix(patt, ptype, collation,
     621              :                                    &prefix, &rest_selec);
     622              : 
     623              :     /*
     624              :      * If necessary, coerce the prefix constant to the right type.  The only
     625              :      * case where we need to do anything is when converting text to bpchar.
     626              :      * Those two types are binary-compatible, so relabeling the Const node is
     627              :      * sufficient.
     628              :      */
     629         5914 :     if (prefix && prefix->consttype != rdatatype)
     630              :     {
     631              :         Assert(prefix->consttype == TEXTOID &&
     632              :                rdatatype == BPCHAROID);
     633           18 :         prefix->consttype = rdatatype;
     634              :     }
     635              : 
     636         5914 :     if (pstatus == Pattern_Prefix_Exact)
     637              :     {
     638              :         /*
     639              :          * Pattern specifies an exact match, so estimate as for '='
     640              :          */
     641         3332 :         result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
     642              :                               false, true, false);
     643              :     }
     644              :     else
     645              :     {
     646              :         /*
     647              :          * Not exact-match pattern.  If we have a sufficiently large
     648              :          * histogram, estimate selectivity for the histogram part of the
     649              :          * population by counting matches in the histogram.  If not, estimate
     650              :          * selectivity of the fixed prefix and remainder of pattern
     651              :          * separately, then combine the two to get an estimate of the
     652              :          * selectivity for the part of the column population represented by
     653              :          * the histogram.  (For small histograms, we combine these
     654              :          * approaches.)
     655              :          *
     656              :          * We then add up data for any most-common-values values; these are
     657              :          * not in the histogram population, and we can get exact answers for
     658              :          * them by applying the pattern operator, so there's no reason to
     659              :          * approximate.  (If the MCVs cover a significant part of the total
     660              :          * population, this gives us a big leg up in accuracy.)
     661              :          */
     662              :         Selectivity selec;
     663              :         int         hist_size;
     664              :         FmgrInfo    opproc;
     665              :         double      mcv_selec,
     666              :                     sumcommon;
     667              : 
     668              :         /* Try to use the histogram entries to get selectivity */
     669         2582 :         if (!OidIsValid(opfuncid))
     670         2570 :             opfuncid = get_opcode(oprid);
     671         2582 :         fmgr_info(opfuncid, &opproc);
     672              : 
     673         2582 :         selec = histogram_selectivity(&vardata, &opproc, collation,
     674              :                                       constval, true,
     675              :                                       10, 1, &hist_size);
     676              : 
     677              :         /* If not at least 100 entries, use the heuristic method */
     678         2582 :         if (hist_size < 100)
     679              :         {
     680              :             Selectivity heursel;
     681              :             Selectivity prefixsel;
     682              : 
     683         1914 :             if (pstatus == Pattern_Prefix_Partial)
     684         1524 :                 prefixsel = prefix_selectivity(root, &vardata,
     685              :                                                eqopr, ltopr, geopr,
     686              :                                                collation,
     687              :                                                prefix);
     688              :             else
     689          390 :                 prefixsel = 1.0;
     690         1914 :             heursel = prefixsel * rest_selec;
     691              : 
     692         1914 :             if (selec < 0)       /* fewer than 10 histogram entries? */
     693         1707 :                 selec = heursel;
     694              :             else
     695              :             {
     696              :                 /*
     697              :                  * For histogram sizes from 10 to 100, we combine the
     698              :                  * histogram and heuristic selectivities, putting increasingly
     699              :                  * more trust in the histogram for larger sizes.
     700              :                  */
     701          207 :                 double      hist_weight = hist_size / 100.0;
     702              : 
     703          207 :                 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
     704              :             }
     705              :         }
     706              : 
     707              :         /* In any case, don't believe extremely small or large estimates. */
     708         2582 :         if (selec < 0.0001)
     709          743 :             selec = 0.0001;
     710         1839 :         else if (selec > 0.9999)
     711           65 :             selec = 0.9999;
     712              : 
     713              :         /*
     714              :          * If we have most-common-values info, add up the fractions of the MCV
     715              :          * entries that satisfy MCV OP PATTERN.  These fractions contribute
     716              :          * directly to the result selectivity.  Also add up the total fraction
     717              :          * represented by MCV entries.
     718              :          */
     719         2582 :         mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
     720              :                                     constval, true,
     721              :                                     &sumcommon);
     722              : 
     723              :         /*
     724              :          * Now merge the results from the MCV and histogram calculations,
     725              :          * realizing that the histogram covers only the non-null values that
     726              :          * are not listed in MCV.
     727              :          */
     728         2582 :         selec *= 1.0 - nullfrac - sumcommon;
     729         2582 :         selec += mcv_selec;
     730         2582 :         result = selec;
     731              :     }
     732              : 
     733              :     /* now adjust if we wanted not-match rather than match */
     734         5914 :     if (negate)
     735         1195 :         result = 1.0 - result - nullfrac;
     736              : 
     737              :     /* result should be in range, but make sure... */
     738         5914 :     CLAMP_PROBABILITY(result);
     739              : 
     740         5914 :     if (prefix)
     741              :     {
     742         5598 :         pfree(DatumGetPointer(prefix->constvalue));
     743         5598 :         pfree(prefix);
     744              :     }
     745              : 
     746         5914 :     ReleaseVariableStats(vardata);
     747              : 
     748         5914 :     return result;
     749              : }
     750              : 
     751              : /*
     752              :  * Fix impedance mismatch between SQL-callable functions and patternsel_common
     753              :  */
     754              : static double
     755         6175 : patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
     756              : {
     757         6175 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     758         6175 :     Oid         operator = PG_GETARG_OID(1);
     759         6175 :     List       *args = (List *) PG_GETARG_POINTER(2);
     760         6175 :     int         varRelid = PG_GETARG_INT32(3);
     761         6175 :     Oid         collation = PG_GET_COLLATION();
     762              : 
     763              :     /*
     764              :      * If this is for a NOT LIKE or similar operator, get the corresponding
     765              :      * positive-match operator and work with that.
     766              :      */
     767         6175 :     if (negate)
     768              :     {
     769         1423 :         operator = get_negator(operator);
     770         1423 :         if (!OidIsValid(operator))
     771            0 :             elog(ERROR, "patternsel called for operator without a negator");
     772              :     }
     773              : 
     774         6175 :     return patternsel_common(root,
     775              :                              operator,
     776              :                              InvalidOid,
     777              :                              args,
     778              :                              varRelid,
     779              :                              collation,
     780              :                              ptype,
     781              :                              negate);
     782              : }
     783              : 
     784              : /*
     785              :  *      regexeqsel      - Selectivity of regular-expression pattern match.
     786              :  */
     787              : Datum
     788         3842 : regexeqsel(PG_FUNCTION_ARGS)
     789              : {
     790         3842 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
     791              : }
     792              : 
     793              : /*
     794              :  *      icregexeqsel    - Selectivity of case-insensitive regex match.
     795              :  */
     796              : Datum
     797           32 : icregexeqsel(PG_FUNCTION_ARGS)
     798              : {
     799           32 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
     800              : }
     801              : 
     802              : /*
     803              :  *      likesel         - Selectivity of LIKE pattern match.
     804              :  */
     805              : Datum
     806          788 : likesel(PG_FUNCTION_ARGS)
     807              : {
     808          788 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
     809              : }
     810              : 
     811              : /*
     812              :  *      prefixsel           - selectivity of prefix operator
     813              :  */
     814              : Datum
     815           27 : prefixsel(PG_FUNCTION_ARGS)
     816              : {
     817           27 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
     818              : }
     819              : 
     820              : /*
     821              :  *
     822              :  *      iclikesel           - Selectivity of ILIKE pattern match.
     823              :  */
     824              : Datum
     825           63 : iclikesel(PG_FUNCTION_ARGS)
     826              : {
     827           63 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
     828              : }
     829              : 
     830              : /*
     831              :  *      regexnesel      - Selectivity of regular-expression pattern non-match.
     832              :  */
     833              : Datum
     834         1343 : regexnesel(PG_FUNCTION_ARGS)
     835              : {
     836         1343 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
     837              : }
     838              : 
     839              : /*
     840              :  *      icregexnesel    - Selectivity of case-insensitive regex non-match.
     841              :  */
     842              : Datum
     843            8 : icregexnesel(PG_FUNCTION_ARGS)
     844              : {
     845            8 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
     846              : }
     847              : 
     848              : /*
     849              :  *      nlikesel        - Selectivity of LIKE pattern non-match.
     850              :  */
     851              : Datum
     852           68 : nlikesel(PG_FUNCTION_ARGS)
     853              : {
     854           68 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
     855              : }
     856              : 
     857              : /*
     858              :  *      icnlikesel      - Selectivity of ILIKE pattern non-match.
     859              :  */
     860              : Datum
     861            4 : icnlikesel(PG_FUNCTION_ARGS)
     862              : {
     863            4 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
     864              : }
     865              : 
     866              : /*
     867              :  * patternjoinsel       - Generic code for pattern-match join selectivity.
     868              :  */
     869              : static double
     870          118 : patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
     871              : {
     872              :     /* For the moment we just punt. */
     873          118 :     return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
     874              : }
     875              : 
     876              : /*
     877              :  *      regexeqjoinsel  - Join selectivity of regular-expression pattern match.
     878              :  */
     879              : Datum
     880          118 : regexeqjoinsel(PG_FUNCTION_ARGS)
     881              : {
     882          118 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
     883              : }
     884              : 
     885              : /*
     886              :  *      icregexeqjoinsel    - Join selectivity of case-insensitive regex match.
     887              :  */
     888              : Datum
     889            0 : icregexeqjoinsel(PG_FUNCTION_ARGS)
     890              : {
     891            0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
     892              : }
     893              : 
     894              : /*
     895              :  *      likejoinsel         - Join selectivity of LIKE pattern match.
     896              :  */
     897              : Datum
     898            0 : likejoinsel(PG_FUNCTION_ARGS)
     899              : {
     900            0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
     901              : }
     902              : 
     903              : /*
     904              :  *      prefixjoinsel           - Join selectivity of prefix operator
     905              :  */
     906              : Datum
     907            0 : prefixjoinsel(PG_FUNCTION_ARGS)
     908              : {
     909            0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
     910              : }
     911              : 
     912              : /*
     913              :  *      iclikejoinsel           - Join selectivity of ILIKE pattern match.
     914              :  */
     915              : Datum
     916            0 : iclikejoinsel(PG_FUNCTION_ARGS)
     917              : {
     918            0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
     919              : }
     920              : 
     921              : /*
     922              :  *      regexnejoinsel  - Join selectivity of regex non-match.
     923              :  */
     924              : Datum
     925            0 : regexnejoinsel(PG_FUNCTION_ARGS)
     926              : {
     927            0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
     928              : }
     929              : 
     930              : /*
     931              :  *      icregexnejoinsel    - Join selectivity of case-insensitive regex non-match.
     932              :  */
     933              : Datum
     934            0 : icregexnejoinsel(PG_FUNCTION_ARGS)
     935              : {
     936            0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
     937              : }
     938              : 
     939              : /*
     940              :  *      nlikejoinsel        - Join selectivity of LIKE pattern non-match.
     941              :  */
     942              : Datum
     943            0 : nlikejoinsel(PG_FUNCTION_ARGS)
     944              : {
     945            0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
     946              : }
     947              : 
     948              : /*
     949              :  *      icnlikejoinsel      - Join selectivity of ILIKE pattern non-match.
     950              :  */
     951              : Datum
     952            0 : icnlikejoinsel(PG_FUNCTION_ARGS)
     953              : {
     954            0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
     955              : }
     956              : 
     957              : 
     958              : /*-------------------------------------------------------------------------
     959              :  *
     960              :  * Pattern analysis functions
     961              :  *
     962              :  * These routines support analysis of LIKE and regular-expression patterns
     963              :  * by the planner/optimizer.  It's important that they agree with the
     964              :  * regular-expression code in backend/regex/ and the LIKE code in
     965              :  * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
     966              :  * must be conservative: if we report a string longer than the true fixed
     967              :  * prefix, the query may produce actually wrong answers, rather than just
     968              :  * getting a bad selectivity estimate!
     969              :  *
     970              :  *-------------------------------------------------------------------------
     971              :  */
     972              : 
     973              : /*
     974              :  * Extract the fixed prefix, if any, for a pattern.
     975              :  *
     976              :  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
     977              :  *  or to NULL if no fixed prefix exists for the pattern.
     978              :  * If rest_selec is not NULL, *rest_selec is set to an estimate of the
     979              :  *  selectivity of the remainder of the pattern (without any fixed prefix).
     980              :  * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
     981              :  *
     982              :  * The return value distinguishes no fixed prefix, a partial prefix,
     983              :  * or an exact-match-only pattern.
     984              :  */
     985              : 
     986              : static Pattern_Prefix_Status
     987         1370 : like_fixed_prefix(Const *patt_const, Const **prefix_const,
     988              :                   Selectivity *rest_selec)
     989              : {
     990              :     char       *match;
     991              :     char       *patt;
     992              :     int         pattlen;
     993         1370 :     Oid         typeid = patt_const->consttype;
     994              :     int         pos,
     995              :                 match_pos;
     996              : 
     997              :     /* the right-hand const is type text or bytea */
     998              :     Assert(typeid == BYTEAOID || typeid == TEXTOID);
     999              : 
    1000         1370 :     if (typeid != BYTEAOID)
    1001              :     {
    1002         1364 :         patt = TextDatumGetCString(patt_const->constvalue);
    1003         1364 :         pattlen = strlen(patt);
    1004              :     }
    1005              :     else
    1006              :     {
    1007            6 :         bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
    1008              : 
    1009            6 :         pattlen = VARSIZE_ANY_EXHDR(bstr);
    1010            6 :         patt = (char *) palloc(pattlen);
    1011            6 :         memcpy(patt, VARDATA_ANY(bstr), pattlen);
    1012              :         Assert(bstr == DatumGetPointer(patt_const->constvalue));
    1013              :     }
    1014              : 
    1015         1370 :     match = palloc(pattlen + 1);
    1016         1370 :     match_pos = 0;
    1017         7826 :     for (pos = 0; pos < pattlen; pos++)
    1018              :     {
    1019              :         /* % and _ are wildcard characters in LIKE */
    1020         7773 :         if (patt[pos] == '%' ||
    1021         6999 :             patt[pos] == '_')
    1022              :             break;
    1023              : 
    1024              :         /* Backslash escapes the next character */
    1025         6456 :         if (patt[pos] == '\\')
    1026              :         {
    1027          137 :             pos++;
    1028          137 :             if (pos >= pattlen)
    1029            0 :                 break;
    1030              :         }
    1031              : 
    1032         6456 :         match[match_pos++] = patt[pos];
    1033              :     }
    1034              : 
    1035         1370 :     match[match_pos] = '\0';
    1036              : 
    1037         1370 :     if (typeid != BYTEAOID)
    1038         1364 :         *prefix_const = string_to_const(match, typeid);
    1039              :     else
    1040            6 :         *prefix_const = string_to_bytea_const(match, match_pos);
    1041              : 
    1042         1370 :     if (rest_selec != NULL)
    1043          856 :         *rest_selec = like_selectivity(&patt[pos], pattlen - pos, false);
    1044              : 
    1045         1370 :     pfree(patt);
    1046         1370 :     pfree(match);
    1047              : 
    1048              :     /* in LIKE, an empty pattern is an exact match! */
    1049         1370 :     if (pos == pattlen)
    1050           53 :         return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
    1051              : 
    1052         1317 :     if (match_pos > 0)
    1053         1151 :         return Pattern_Prefix_Partial;
    1054              : 
    1055          166 :     return Pattern_Prefix_None;
    1056              : }
    1057              : 
    1058              : /*
    1059              :  * Case-insensitive variant of like_fixed_prefix().  Multibyte and
    1060              :  * locale-aware for detecting cased characters.
    1061              :  */
    1062              : static Pattern_Prefix_Status
    1063          117 : like_fixed_prefix_ci(Const *patt_const, Oid collation, Const **prefix_const,
    1064              :                      Selectivity *rest_selec)
    1065              : {
    1066          117 :     text       *val = DatumGetTextPP(patt_const->constvalue);
    1067          117 :     Oid         typeid = patt_const->consttype;
    1068          117 :     int         nbytes = VARSIZE_ANY_EXHDR(val);
    1069              :     int         wpos;
    1070              :     pg_wchar   *wpatt;
    1071              :     int         wpattlen;
    1072              :     pg_wchar   *wmatch;
    1073          117 :     int         wmatch_pos = 0;
    1074              :     char       *match;
    1075              :     int         match_mblen;
    1076          117 :     pg_locale_t locale = 0;
    1077              : 
    1078              :     /* the right-hand const is type text or bytea */
    1079              :     Assert(typeid == BYTEAOID || typeid == TEXTOID);
    1080              : 
    1081          117 :     if (typeid == BYTEAOID)
    1082            0 :         ereport(ERROR,
    1083              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1084              :                  errmsg("case insensitive matching not supported on type bytea")));
    1085              : 
    1086          117 :     if (!OidIsValid(collation))
    1087              :     {
    1088              :         /*
    1089              :          * This typically means that the parser could not resolve a conflict
    1090              :          * of implicit collations, so report it that way.
    1091              :          */
    1092            0 :         ereport(ERROR,
    1093              :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1094              :                  errmsg("could not determine which collation to use for ILIKE"),
    1095              :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
    1096              :     }
    1097              : 
    1098          117 :     locale = pg_newlocale_from_collation(collation);
    1099              : 
    1100          117 :     wpatt = palloc((nbytes + 1) * sizeof(pg_wchar));
    1101          117 :     wpattlen = pg_mb2wchar_with_len(VARDATA_ANY(val), wpatt, nbytes);
    1102              : 
    1103          117 :     wmatch = palloc((nbytes + 1) * sizeof(pg_wchar));
    1104          165 :     for (wpos = 0; wpos < wpattlen; wpos++)
    1105              :     {
    1106              :         /* % and _ are wildcard characters in LIKE */
    1107          165 :         if (wpatt[wpos] == '%' ||
    1108          133 :             wpatt[wpos] == '_')
    1109              :             break;
    1110              : 
    1111              :         /* Backslash escapes the next character */
    1112          133 :         if (wpatt[wpos] == '\\')
    1113              :         {
    1114            0 :             wpos++;
    1115            0 :             if (wpos >= wpattlen)
    1116            0 :                 break;
    1117              :         }
    1118              : 
    1119              :         /*
    1120              :          * For ILIKE, stop if it's a case-varying character (it's sort of a
    1121              :          * wildcard).
    1122              :          */
    1123          133 :         if (pg_iswcased(wpatt[wpos], locale))
    1124           85 :             break;
    1125              : 
    1126           48 :         wmatch[wmatch_pos++] = wpatt[wpos];
    1127              :     }
    1128              : 
    1129          117 :     wmatch[wmatch_pos] = '\0';
    1130              : 
    1131          117 :     match = palloc(pg_database_encoding_max_length() * wmatch_pos + 1);
    1132          117 :     match_mblen = pg_wchar2mb_with_len(wmatch, match, wmatch_pos);
    1133          117 :     match[match_mblen] = '\0';
    1134          117 :     pfree(wmatch);
    1135              : 
    1136          117 :     *prefix_const = string_to_const(match, TEXTOID);
    1137          117 :     pfree(match);
    1138              : 
    1139          117 :     if (rest_selec != NULL)
    1140              :     {
    1141           59 :         int         wrestlen = wpattlen - wmatch_pos;
    1142              :         char       *rest;
    1143              :         int         rest_mblen;
    1144              : 
    1145           59 :         rest = palloc(pg_database_encoding_max_length() * wrestlen + 1);
    1146           59 :         rest_mblen = pg_wchar2mb_with_len(&wpatt[wmatch_pos], rest, wrestlen);
    1147              : 
    1148           59 :         *rest_selec = like_selectivity(rest, rest_mblen, true);
    1149           59 :         pfree(rest);
    1150              :     }
    1151              : 
    1152          117 :     pfree(wpatt);
    1153              : 
    1154              :     /* in LIKE, an empty pattern is an exact match! */
    1155          117 :     if (wpos == wpattlen)
    1156            0 :         return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
    1157              : 
    1158          117 :     if (wmatch_pos > 0)
    1159           24 :         return Pattern_Prefix_Partial;
    1160              : 
    1161           93 :     return Pattern_Prefix_None;
    1162              : }
    1163              : 
    1164              : static Pattern_Prefix_Status
    1165         8433 : regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
    1166              :                    Const **prefix_const, Selectivity *rest_selec)
    1167              : {
    1168         8433 :     Oid         typeid = patt_const->consttype;
    1169              :     char       *prefix;
    1170              :     bool        exact;
    1171              : 
    1172              :     /*
    1173              :      * Should be unnecessary, there are no bytea regex operators defined. As
    1174              :      * such, it should be noted that the rest of this function has *not* been
    1175              :      * made safe for binary (possibly NULL containing) strings.
    1176              :      */
    1177         8433 :     if (typeid == BYTEAOID)
    1178            0 :         ereport(ERROR,
    1179              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1180              :                  errmsg("regular-expression matching not supported on type bytea")));
    1181              : 
    1182              :     /* Use the regexp machinery to extract the prefix, if any */
    1183         8433 :     prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
    1184              :                                  case_insensitive, collation,
    1185              :                                  &exact);
    1186              : 
    1187         8421 :     if (prefix == NULL)
    1188              :     {
    1189          384 :         *prefix_const = NULL;
    1190              : 
    1191          384 :         if (rest_selec != NULL)
    1192              :         {
    1193          316 :             char       *patt = TextDatumGetCString(patt_const->constvalue);
    1194              : 
    1195          316 :             *rest_selec = regex_selectivity(patt, strlen(patt),
    1196              :                                             case_insensitive,
    1197              :                                             0);
    1198          316 :             pfree(patt);
    1199              :         }
    1200              : 
    1201          384 :         return Pattern_Prefix_None;
    1202              :     }
    1203              : 
    1204         8037 :     *prefix_const = string_to_const(prefix, typeid);
    1205              : 
    1206         8037 :     if (rest_selec != NULL)
    1207              :     {
    1208         4644 :         if (exact)
    1209              :         {
    1210              :             /* Exact match, so there's no additional selectivity */
    1211         3300 :             *rest_selec = 1.0;
    1212              :         }
    1213              :         else
    1214              :         {
    1215         1344 :             char       *patt = TextDatumGetCString(patt_const->constvalue);
    1216              : 
    1217         2688 :             *rest_selec = regex_selectivity(patt, strlen(patt),
    1218              :                                             case_insensitive,
    1219         1344 :                                             strlen(prefix));
    1220         1344 :             pfree(patt);
    1221              :         }
    1222              :     }
    1223              : 
    1224         8037 :     pfree(prefix);
    1225              : 
    1226         8037 :     if (exact)
    1227         6507 :         return Pattern_Prefix_Exact;    /* pattern specifies exact match */
    1228              :     else
    1229         1530 :         return Pattern_Prefix_Partial;
    1230              : }
    1231              : 
    1232              : static Pattern_Prefix_Status
    1233         9971 : pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
    1234              :                      Const **prefix, Selectivity *rest_selec)
    1235              : {
    1236              :     Pattern_Prefix_Status result;
    1237              : 
    1238         9971 :     switch (ptype)
    1239              :     {
    1240         1370 :         case Pattern_Type_Like:
    1241         1370 :             result = like_fixed_prefix(patt, prefix, rest_selec);
    1242         1370 :             break;
    1243          117 :         case Pattern_Type_Like_IC:
    1244          117 :             result = like_fixed_prefix_ci(patt, collation, prefix,
    1245              :                                           rest_selec);
    1246          117 :             break;
    1247         8402 :         case Pattern_Type_Regex:
    1248         8402 :             result = regex_fixed_prefix(patt, false, collation,
    1249              :                                         prefix, rest_selec);
    1250         8390 :             break;
    1251           31 :         case Pattern_Type_Regex_IC:
    1252           31 :             result = regex_fixed_prefix(patt, true, collation,
    1253              :                                         prefix, rest_selec);
    1254           31 :             break;
    1255           51 :         case Pattern_Type_Prefix:
    1256              :             /* Prefix type work is trivial.  */
    1257           51 :             result = Pattern_Prefix_Partial;
    1258           51 :             *prefix = makeConst(patt->consttype,
    1259              :                                 patt->consttypmod,
    1260              :                                 patt->constcollid,
    1261              :                                 patt->constlen,
    1262              :                                 datumCopy(patt->constvalue,
    1263           51 :                                           patt->constbyval,
    1264              :                                           patt->constlen),
    1265           51 :                                 patt->constisnull,
    1266           51 :                                 patt->constbyval);
    1267           51 :             if (rest_selec != NULL)
    1268           39 :                 *rest_selec = 1.0;  /* all */
    1269           51 :             break;
    1270            0 :         default:
    1271            0 :             elog(ERROR, "unrecognized ptype: %d", (int) ptype);
    1272              :             result = Pattern_Prefix_None;   /* keep compiler quiet */
    1273              :             break;
    1274              :     }
    1275         9959 :     return result;
    1276              : }
    1277              : 
    1278              : /*
    1279              :  * Estimate the selectivity of a fixed prefix for a pattern match.
    1280              :  *
    1281              :  * A fixed prefix "foo" is estimated as the selectivity of the expression
    1282              :  * "variable >= 'foo' AND variable < 'fop'".
    1283              :  *
    1284              :  * The selectivity estimate is with respect to the portion of the column
    1285              :  * population represented by the histogram --- the caller must fold this
    1286              :  * together with info about MCVs and NULLs.
    1287              :  *
    1288              :  * We use the given comparison operators and collation to do the estimation.
    1289              :  * The given variable and Const must be of the associated datatype(s).
    1290              :  *
    1291              :  * XXX Note: we make use of the upper bound to estimate operator selectivity
    1292              :  * even if the locale is such that we cannot rely on the upper-bound string.
    1293              :  * The selectivity only needs to be approximately right anyway, so it seems
    1294              :  * more useful to use the upper-bound code than not.
    1295              :  */
    1296              : static Selectivity
    1297         1524 : prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
    1298              :                    Oid eqopr, Oid ltopr, Oid geopr,
    1299              :                    Oid collation,
    1300              :                    Const *prefixcon)
    1301              : {
    1302              :     Selectivity prefixsel;
    1303              :     FmgrInfo    opproc;
    1304              :     Const      *greaterstrcon;
    1305              :     Selectivity eq_sel;
    1306              : 
    1307              :     /* Estimate the selectivity of "x >= prefix" */
    1308         1524 :     fmgr_info(get_opcode(geopr), &opproc);
    1309              : 
    1310         1524 :     prefixsel = ineq_histogram_selectivity(root, vardata,
    1311              :                                            geopr, &opproc, true, true,
    1312              :                                            collation,
    1313              :                                            prefixcon->constvalue,
    1314              :                                            prefixcon->consttype);
    1315              : 
    1316         1524 :     if (prefixsel < 0.0)
    1317              :     {
    1318              :         /* No histogram is present ... return a suitable default estimate */
    1319          352 :         return DEFAULT_MATCH_SEL;
    1320              :     }
    1321              : 
    1322              :     /*
    1323              :      * If we can create a string larger than the prefix, say "x < greaterstr".
    1324              :      */
    1325         1172 :     fmgr_info(get_opcode(ltopr), &opproc);
    1326         1172 :     greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
    1327         1172 :     if (greaterstrcon)
    1328              :     {
    1329              :         Selectivity topsel;
    1330              : 
    1331         1172 :         topsel = ineq_histogram_selectivity(root, vardata,
    1332              :                                             ltopr, &opproc, false, false,
    1333              :                                             collation,
    1334              :                                             greaterstrcon->constvalue,
    1335              :                                             greaterstrcon->consttype);
    1336              : 
    1337              :         /* ineq_histogram_selectivity worked before, it shouldn't fail now */
    1338              :         Assert(topsel >= 0.0);
    1339              : 
    1340              :         /*
    1341              :          * Merge the two selectivities in the same way as for a range query
    1342              :          * (see clauselist_selectivity()).  Note that we don't need to worry
    1343              :          * about double-exclusion of nulls, since ineq_histogram_selectivity
    1344              :          * doesn't count those anyway.
    1345              :          */
    1346         1172 :         prefixsel = topsel + prefixsel - 1.0;
    1347              :     }
    1348              : 
    1349              :     /*
    1350              :      * If the prefix is long then the two bounding values might be too close
    1351              :      * together for the histogram to distinguish them usefully, resulting in a
    1352              :      * zero estimate (plus or minus roundoff error). To avoid returning a
    1353              :      * ridiculously small estimate, compute the estimated selectivity for
    1354              :      * "variable = 'foo'", and clamp to that. (Obviously, the resultant
    1355              :      * estimate should be at least that.)
    1356              :      *
    1357              :      * We apply this even if we couldn't make a greater string.  That case
    1358              :      * suggests that the prefix is near the maximum possible, and thus
    1359              :      * probably off the end of the histogram, and thus we probably got a very
    1360              :      * small estimate from the >= condition; so we still need to clamp.
    1361              :      */
    1362         1172 :     eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
    1363              :                           false, true, false);
    1364              : 
    1365         1172 :     prefixsel = Max(prefixsel, eq_sel);
    1366              : 
    1367         1172 :     return prefixsel;
    1368              : }
    1369              : 
    1370              : 
    1371              : /*
    1372              :  * Estimate the selectivity of a pattern of the specified type.
    1373              :  * Note that any fixed prefix of the pattern will have been removed already,
    1374              :  * so actually we may be looking at just a fragment of the pattern.
    1375              :  *
    1376              :  * For now, we use a very simplistic approach: fixed characters reduce the
    1377              :  * selectivity a good deal, character ranges reduce it a little,
    1378              :  * wildcards (such as % for LIKE or .* for regex) increase it.
    1379              :  */
    1380              : 
    1381              : #define FIXED_CHAR_SEL  0.20    /* about 1/5 */
    1382              : #define CHAR_RANGE_SEL  0.25
    1383              : #define ANY_CHAR_SEL    0.9     /* not 1, since it won't match end-of-string */
    1384              : #define FULL_WILDCARD_SEL 5.0
    1385              : #define PARTIAL_WILDCARD_SEL 2.0
    1386              : 
    1387              : static Selectivity
    1388          915 : like_selectivity(const char *patt, int pattlen, bool case_insensitive)
    1389              : {
    1390          915 :     Selectivity sel = 1.0;
    1391              :     int         pos;
    1392              : 
    1393              :     /* Skip any leading wildcard; it's already factored into initial sel */
    1394         1768 :     for (pos = 0; pos < pattlen; pos++)
    1395              :     {
    1396         1296 :         if (patt[pos] != '%' && patt[pos] != '_')
    1397          443 :             break;
    1398              :     }
    1399              : 
    1400         3639 :     for (; pos < pattlen; pos++)
    1401              :     {
    1402              :         /* % and _ are wildcard characters in LIKE */
    1403         2724 :         if (patt[pos] == '%')
    1404          391 :             sel *= FULL_WILDCARD_SEL;
    1405         2333 :         else if (patt[pos] == '_')
    1406           93 :             sel *= ANY_CHAR_SEL;
    1407         2240 :         else if (patt[pos] == '\\')
    1408              :         {
    1409              :             /* Backslash quotes the next character */
    1410           20 :             pos++;
    1411           20 :             if (pos >= pattlen)
    1412            0 :                 break;
    1413           20 :             sel *= FIXED_CHAR_SEL;
    1414              :         }
    1415              :         else
    1416         2220 :             sel *= FIXED_CHAR_SEL;
    1417              :     }
    1418              :     /* Could get sel > 1 if multiple wildcards */
    1419          915 :     if (sel > 1.0)
    1420            0 :         sel = 1.0;
    1421          915 :     return sel;
    1422              : }
    1423              : 
    1424              : static Selectivity
    1425         1876 : regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
    1426              : {
    1427         1876 :     Selectivity sel = 1.0;
    1428         1876 :     int         paren_depth = 0;
    1429         1876 :     int         paren_pos = 0;  /* dummy init to keep compiler quiet */
    1430              :     int         pos;
    1431              : 
    1432              :     /* since this function recurses, it could be driven to stack overflow */
    1433         1876 :     check_stack_depth();
    1434              : 
    1435        20397 :     for (pos = 0; pos < pattlen; pos++)
    1436              :     {
    1437        18533 :         if (patt[pos] == '(')
    1438              :         {
    1439          219 :             if (paren_depth == 0)
    1440          207 :                 paren_pos = pos;    /* remember start of parenthesized item */
    1441          219 :             paren_depth++;
    1442              :         }
    1443        18314 :         else if (patt[pos] == ')' && paren_depth > 0)
    1444              :         {
    1445          216 :             paren_depth--;
    1446          216 :             if (paren_depth == 0)
    1447          204 :                 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
    1448          204 :                                              pos - (paren_pos + 1),
    1449              :                                              case_insensitive);
    1450              :         }
    1451        18098 :         else if (patt[pos] == '|' && paren_depth == 0)
    1452              :         {
    1453              :             /*
    1454              :              * If unquoted | is present at paren level 0 in pattern, we have
    1455              :              * multiple alternatives; sum their probabilities.
    1456              :              */
    1457           24 :             sel += regex_selectivity_sub(patt + (pos + 1),
    1458           12 :                                          pattlen - (pos + 1),
    1459              :                                          case_insensitive);
    1460           12 :             break;              /* rest of pattern is now processed */
    1461              :         }
    1462        18086 :         else if (patt[pos] == '[')
    1463              :         {
    1464          126 :             bool        negclass = false;
    1465              : 
    1466          126 :             if (patt[++pos] == '^')
    1467              :             {
    1468           30 :                 negclass = true;
    1469           30 :                 pos++;
    1470              :             }
    1471          126 :             if (patt[pos] == ']')   /* ']' at start of class is not special */
    1472           12 :                 pos++;
    1473          658 :             while (pos < pattlen && patt[pos] != ']')
    1474          532 :                 pos++;
    1475          126 :             if (paren_depth == 0)
    1476           81 :                 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
    1477              :         }
    1478        17960 :         else if (patt[pos] == '.')
    1479              :         {
    1480          463 :             if (paren_depth == 0)
    1481          263 :                 sel *= ANY_CHAR_SEL;
    1482              :         }
    1483        17497 :         else if (patt[pos] == '*' ||
    1484        17091 :                  patt[pos] == '?' ||
    1485        17008 :                  patt[pos] == '+')
    1486              :         {
    1487              :             /* Ought to be smarter about quantifiers... */
    1488          496 :             if (paren_depth == 0)
    1489          267 :                 sel *= PARTIAL_WILDCARD_SEL;
    1490              :         }
    1491        17001 :         else if (patt[pos] == '{')
    1492              :         {
    1493          132 :             while (pos < pattlen && patt[pos] != '}')
    1494           94 :                 pos++;
    1495           38 :             if (paren_depth == 0)
    1496           32 :                 sel *= PARTIAL_WILDCARD_SEL;
    1497              :         }
    1498        16963 :         else if (patt[pos] == '\\')
    1499              :         {
    1500              :             /* backslash quotes the next character */
    1501          148 :             pos++;
    1502          148 :             if (pos >= pattlen)
    1503            0 :                 break;
    1504          148 :             if (paren_depth == 0)
    1505           76 :                 sel *= FIXED_CHAR_SEL;
    1506              :         }
    1507              :         else
    1508              :         {
    1509        16815 :             if (paren_depth == 0)
    1510        15427 :                 sel *= FIXED_CHAR_SEL;
    1511              :         }
    1512              :     }
    1513              :     /* Could get sel > 1 if multiple wildcards */
    1514         1876 :     if (sel > 1.0)
    1515           13 :         sel = 1.0;
    1516         1876 :     return sel;
    1517              : }
    1518              : 
    1519              : static Selectivity
    1520         1660 : regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
    1521              :                   int fixed_prefix_len)
    1522              : {
    1523              :     Selectivity sel;
    1524              : 
    1525              :     /* If patt doesn't end with $, consider it to have a trailing wildcard */
    1526         1660 :     if (pattlen > 0 && patt[pattlen - 1] == '$' &&
    1527          207 :         (pattlen == 1 || patt[pattlen - 2] != '\\'))
    1528              :     {
    1529              :         /* has trailing $ */
    1530          207 :         sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
    1531              :     }
    1532              :     else
    1533              :     {
    1534              :         /* no trailing $ */
    1535         1453 :         sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
    1536         1453 :         sel *= FULL_WILDCARD_SEL;
    1537              :     }
    1538              : 
    1539              :     /*
    1540              :      * If there's a fixed prefix, discount its selectivity.  We have to be
    1541              :      * careful here since a very long prefix could result in pow's result
    1542              :      * underflowing to zero (in which case "sel" probably has as well).
    1543              :      */
    1544         1660 :     if (fixed_prefix_len > 0)
    1545              :     {
    1546         1344 :         double      prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
    1547              : 
    1548         1344 :         if (prefixsel > 0.0)
    1549         1344 :             sel /= prefixsel;
    1550              :     }
    1551              : 
    1552              :     /* Make sure result stays in range */
    1553         1660 :     CLAMP_PROBABILITY(sel);
    1554         1660 :     return sel;
    1555              : }
    1556              : 
    1557              : 
    1558              : /*
    1559              :  * For bytea, the increment function need only increment the current byte
    1560              :  * (there are no multibyte characters to worry about).
    1561              :  */
    1562              : static bool
    1563            0 : byte_increment(unsigned char *ptr, int len)
    1564              : {
    1565            0 :     if (*ptr >= 255)
    1566            0 :         return false;
    1567            0 :     (*ptr)++;
    1568            0 :     return true;
    1569              : }
    1570              : 
    1571              : /*
    1572              :  * Try to generate a string greater than the given string or any
    1573              :  * string it is a prefix of.  If successful, return a palloc'd string
    1574              :  * in the form of a Const node; else return NULL.
    1575              :  *
    1576              :  * The caller must provide the appropriate "less than" comparison function
    1577              :  * for testing the strings, along with the collation to use.
    1578              :  *
    1579              :  * The key requirement here is that given a prefix string, say "foo",
    1580              :  * we must be able to generate another string "fop" that is greater than
    1581              :  * all strings "foobar" starting with "foo".  We can test that we have
    1582              :  * generated a string greater than the prefix string, but in non-C collations
    1583              :  * that is not a bulletproof guarantee that an extension of the string might
    1584              :  * not sort after it; an example is that "foo " is less than "foo!", but it
    1585              :  * is not clear that a "dictionary" sort ordering will consider "foo!" less
    1586              :  * than "foo bar".  CAUTION: Therefore, this function should be used only for
    1587              :  * estimation purposes when working in a non-C collation.
    1588              :  *
    1589              :  * To try to catch most cases where an extended string might otherwise sort
    1590              :  * before the result value, we determine which of the strings "Z", "z", "y",
    1591              :  * and "9" is seen as largest by the collation, and append that to the given
    1592              :  * prefix before trying to find a string that compares as larger.
    1593              :  *
    1594              :  * To search for a greater string, we repeatedly "increment" the rightmost
    1595              :  * character, using an encoding-specific character incrementer function.
    1596              :  * When it's no longer possible to increment the last character, we truncate
    1597              :  * off that character and start incrementing the next-to-rightmost.
    1598              :  * For example, if "z" were the last character in the sort order, then we
    1599              :  * could produce "foo" as a string greater than "fonz".
    1600              :  *
    1601              :  * This could be rather slow in the worst case, but in most cases we
    1602              :  * won't have to try more than one or two strings before succeeding.
    1603              :  *
    1604              :  * Note that it's important for the character incrementer not to be too anal
    1605              :  * about producing every possible character code, since in some cases the only
    1606              :  * way to get a larger string is to increment a previous character position.
    1607              :  * So we don't want to spend too much time trying every possible character
    1608              :  * code at the last position.  A good rule of thumb is to be sure that we
    1609              :  * don't try more than 256*K values for a K-byte character (and definitely
    1610              :  * not 256^K, which is what an exhaustive search would approach).
    1611              :  */
    1612              : static Const *
    1613         1812 : make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
    1614              : {
    1615         1812 :     Oid         datatype = str_const->consttype;
    1616              :     char       *workstr;
    1617              :     int         len;
    1618              :     Datum       cmpstr;
    1619         1812 :     char       *cmptxt = NULL;
    1620              :     mbcharacter_incrementer charinc;
    1621              : 
    1622              :     /*
    1623              :      * Get a modifiable copy of the prefix string in C-string format, and set
    1624              :      * up the string we will compare to as a Datum.  In C locale this can just
    1625              :      * be the given prefix string, otherwise we need to add a suffix.  Type
    1626              :      * BYTEA sorts bytewise so it never needs a suffix either.
    1627              :      */
    1628         1812 :     if (datatype == BYTEAOID)
    1629              :     {
    1630            0 :         bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
    1631              : 
    1632            0 :         len = VARSIZE_ANY_EXHDR(bstr);
    1633            0 :         workstr = (char *) palloc(len);
    1634            0 :         memcpy(workstr, VARDATA_ANY(bstr), len);
    1635              :         Assert(bstr == DatumGetPointer(str_const->constvalue));
    1636            0 :         cmpstr = str_const->constvalue;
    1637              :     }
    1638              :     else
    1639              :     {
    1640         1812 :         if (datatype == NAMEOID)
    1641            0 :             workstr = DatumGetCString(DirectFunctionCall1(nameout,
    1642              :                                                           str_const->constvalue));
    1643              :         else
    1644         1812 :             workstr = TextDatumGetCString(str_const->constvalue);
    1645         1812 :         len = strlen(workstr);
    1646         1812 :         if (len == 0 || pg_newlocale_from_collation(collation)->collate_is_c)
    1647         1799 :             cmpstr = str_const->constvalue;
    1648              :         else
    1649              :         {
    1650              :             /* If first time through, determine the suffix to use */
    1651              :             static char suffixchar = 0;
    1652              :             static Oid  suffixcollation = 0;
    1653              : 
    1654           13 :             if (!suffixchar || suffixcollation != collation)
    1655              :             {
    1656              :                 char       *best;
    1657              : 
    1658            3 :                 best = "Z";
    1659            3 :                 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
    1660            0 :                     best = "z";
    1661            3 :                 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
    1662            0 :                     best = "y";
    1663            3 :                 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
    1664            0 :                     best = "9";
    1665            3 :                 suffixchar = *best;
    1666            3 :                 suffixcollation = collation;
    1667              :             }
    1668              : 
    1669              :             /* And build the string to compare to */
    1670           13 :             if (datatype == NAMEOID)
    1671              :             {
    1672            0 :                 cmptxt = palloc(len + 2);
    1673            0 :                 memcpy(cmptxt, workstr, len);
    1674            0 :                 cmptxt[len] = suffixchar;
    1675            0 :                 cmptxt[len + 1] = '\0';
    1676            0 :                 cmpstr = PointerGetDatum(cmptxt);
    1677              :             }
    1678              :             else
    1679              :             {
    1680           13 :                 cmptxt = palloc(VARHDRSZ + len + 1);
    1681           13 :                 SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
    1682           13 :                 memcpy(VARDATA(cmptxt), workstr, len);
    1683           13 :                 *(VARDATA(cmptxt) + len) = suffixchar;
    1684           13 :                 cmpstr = PointerGetDatum(cmptxt);
    1685              :             }
    1686              :         }
    1687              :     }
    1688              : 
    1689              :     /* Select appropriate character-incrementer function */
    1690         1812 :     if (datatype == BYTEAOID)
    1691            0 :         charinc = byte_increment;
    1692              :     else
    1693         1812 :         charinc = pg_database_encoding_character_incrementer();
    1694              : 
    1695              :     /* And search ... */
    1696         1812 :     while (len > 0)
    1697              :     {
    1698              :         int         charlen;
    1699              :         unsigned char *lastchar;
    1700              : 
    1701              :         /* Identify the last character --- for bytea, just the last byte */
    1702         1812 :         if (datatype == BYTEAOID)
    1703            0 :             charlen = 1;
    1704              :         else
    1705         1812 :             charlen = len - pg_mbcliplen(workstr, len, len - 1);
    1706         1812 :         lastchar = (unsigned char *) (workstr + len - charlen);
    1707              : 
    1708              :         /*
    1709              :          * Try to generate a larger string by incrementing the last character
    1710              :          * (for BYTEA, we treat each byte as a character).
    1711              :          *
    1712              :          * Note: the incrementer function is expected to return true if it's
    1713              :          * generated a valid-per-the-encoding new character, otherwise false.
    1714              :          * The contents of the character on false return are unspecified.
    1715              :          */
    1716         1812 :         while (charinc(lastchar, charlen))
    1717              :         {
    1718              :             Const      *workstr_const;
    1719              : 
    1720         1812 :             if (datatype == BYTEAOID)
    1721            0 :                 workstr_const = string_to_bytea_const(workstr, len);
    1722              :             else
    1723         1812 :                 workstr_const = string_to_const(workstr, datatype);
    1724              : 
    1725         1812 :             if (DatumGetBool(FunctionCall2Coll(ltproc,
    1726              :                                                collation,
    1727              :                                                cmpstr,
    1728              :                                                workstr_const->constvalue)))
    1729              :             {
    1730              :                 /* Successfully made a string larger than cmpstr */
    1731         1812 :                 if (cmptxt)
    1732           13 :                     pfree(cmptxt);
    1733         1812 :                 pfree(workstr);
    1734         1812 :                 return workstr_const;
    1735              :             }
    1736              : 
    1737              :             /* No good, release unusable value and try again */
    1738            0 :             pfree(DatumGetPointer(workstr_const->constvalue));
    1739            0 :             pfree(workstr_const);
    1740              :         }
    1741              : 
    1742              :         /*
    1743              :          * No luck here, so truncate off the last character and try to
    1744              :          * increment the next one.
    1745              :          */
    1746            0 :         len -= charlen;
    1747            0 :         workstr[len] = '\0';
    1748              :     }
    1749              : 
    1750              :     /* Failed... */
    1751            0 :     if (cmptxt)
    1752            0 :         pfree(cmptxt);
    1753            0 :     pfree(workstr);
    1754              : 
    1755            0 :     return NULL;
    1756              : }
    1757              : 
    1758              : /*
    1759              :  * Generate a Datum of the appropriate type from a C string.
    1760              :  * Note that all of the supported types are pass-by-ref, so the
    1761              :  * returned value should be pfree'd if no longer needed.
    1762              :  */
    1763              : static Datum
    1764        11330 : string_to_datum(const char *str, Oid datatype)
    1765              : {
    1766              :     Assert(str != NULL);
    1767              : 
    1768              :     /*
    1769              :      * We cheat a little by assuming that CStringGetTextDatum() will do for
    1770              :      * bpchar and varchar constants too...
    1771              :      */
    1772        11330 :     if (datatype == NAMEOID)
    1773            0 :         return DirectFunctionCall1(namein, CStringGetDatum(str));
    1774        11330 :     else if (datatype == BYTEAOID)
    1775            0 :         return DirectFunctionCall1(byteain, CStringGetDatum(str));
    1776              :     else
    1777        11330 :         return CStringGetTextDatum(str);
    1778              : }
    1779              : 
    1780              : /*
    1781              :  * Generate a Const node of the appropriate type from a C string.
    1782              :  */
    1783              : static Const *
    1784        11330 : string_to_const(const char *str, Oid datatype)
    1785              : {
    1786        11330 :     Datum       conval = string_to_datum(str, datatype);
    1787              :     Oid         collation;
    1788              :     int         constlen;
    1789              : 
    1790              :     /*
    1791              :      * We only need to support a few datatypes here, so hard-wire properties
    1792              :      * instead of incurring the expense of catalog lookups.
    1793              :      */
    1794        11330 :     switch (datatype)
    1795              :     {
    1796        11330 :         case TEXTOID:
    1797              :         case VARCHAROID:
    1798              :         case BPCHAROID:
    1799        11330 :             collation = DEFAULT_COLLATION_OID;
    1800        11330 :             constlen = -1;
    1801        11330 :             break;
    1802              : 
    1803            0 :         case NAMEOID:
    1804            0 :             collation = C_COLLATION_OID;
    1805            0 :             constlen = NAMEDATALEN;
    1806            0 :             break;
    1807              : 
    1808            0 :         case BYTEAOID:
    1809            0 :             collation = InvalidOid;
    1810            0 :             constlen = -1;
    1811            0 :             break;
    1812              : 
    1813            0 :         default:
    1814            0 :             elog(ERROR, "unexpected datatype in string_to_const: %u",
    1815              :                  datatype);
    1816              :             return NULL;
    1817              :     }
    1818              : 
    1819        11330 :     return makeConst(datatype, -1, collation, constlen,
    1820              :                      conval, false, false);
    1821              : }
    1822              : 
    1823              : /*
    1824              :  * Generate a Const node of bytea type from a binary C string and a length.
    1825              :  */
    1826              : static Const *
    1827            6 : string_to_bytea_const(const char *str, size_t str_len)
    1828              : {
    1829            6 :     bytea      *bstr = palloc(VARHDRSZ + str_len);
    1830              :     Datum       conval;
    1831              : 
    1832            6 :     memcpy(VARDATA(bstr), str, str_len);
    1833            6 :     SET_VARSIZE(bstr, VARHDRSZ + str_len);
    1834            6 :     conval = PointerGetDatum(bstr);
    1835              : 
    1836            6 :     return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
    1837              : }
        

Generated by: LCOV version 2.0-1