LCOV - code coverage report
Current view: top level - src/backend/utils/adt - varlena.c (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta2 Lines: 1528 1876 81.4 %
Date: 2019-06-19 14:06:47 Functions: 124 144 86.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * varlena.c
       4             :  *    Functions for the variable-length built-in types.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/utils/adt/varlena.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <ctype.h>
      18             : #include <limits.h>
      19             : 
      20             : #include "access/tuptoaster.h"
      21             : #include "catalog/pg_collation.h"
      22             : #include "catalog/pg_type.h"
      23             : #include "common/int.h"
      24             : #include "lib/hyperloglog.h"
      25             : #include "libpq/pqformat.h"
      26             : #include "miscadmin.h"
      27             : #include "parser/scansup.h"
      28             : #include "port/pg_bswap.h"
      29             : #include "regex/regex.h"
      30             : #include "utils/builtins.h"
      31             : #include "utils/bytea.h"
      32             : #include "utils/hashutils.h"
      33             : #include "utils/lsyscache.h"
      34             : #include "utils/memutils.h"
      35             : #include "utils/pg_locale.h"
      36             : #include "utils/sortsupport.h"
      37             : #include "utils/varlena.h"
      38             : 
      39             : 
      40             : /* GUC variable */
      41             : int         bytea_output = BYTEA_OUTPUT_HEX;
      42             : 
      43             : typedef struct varlena unknown;
      44             : typedef struct varlena VarString;
      45             : 
      46             : /*
      47             :  * State for text_position_* functions.
      48             :  */
      49             : typedef struct
      50             : {
      51             :     bool        is_multibyte;   /* T if multibyte encoding */
      52             :     bool        is_multibyte_char_in_char;
      53             : 
      54             :     char       *str1;           /* haystack string */
      55             :     char       *str2;           /* needle string */
      56             :     int         len1;           /* string lengths in bytes */
      57             :     int         len2;
      58             : 
      59             :     /* Skip table for Boyer-Moore-Horspool search algorithm: */
      60             :     int         skiptablemask;  /* mask for ANDing with skiptable subscripts */
      61             :     int         skiptable[256]; /* skip distance for given mismatched char */
      62             : 
      63             :     char       *last_match;     /* pointer to last match in 'str1' */
      64             : 
      65             :     /*
      66             :      * Sometimes we need to convert the byte position of a match to a
      67             :      * character position.  These store the last position that was converted,
      68             :      * so that on the next call, we can continue from that point, rather than
      69             :      * count characters from the very beginning.
      70             :      */
      71             :     char       *refpoint;       /* pointer within original haystack string */
      72             :     int         refpos;         /* 0-based character offset of the same point */
      73             : } TextPositionState;
      74             : 
      75             : typedef struct
      76             : {
      77             :     char       *buf1;           /* 1st string, or abbreviation original string
      78             :                                  * buf */
      79             :     char       *buf2;           /* 2nd string, or abbreviation strxfrm() buf */
      80             :     int         buflen1;
      81             :     int         buflen2;
      82             :     int         last_len1;      /* Length of last buf1 string/strxfrm() input */
      83             :     int         last_len2;      /* Length of last buf2 string/strxfrm() blob */
      84             :     int         last_returned;  /* Last comparison result (cache) */
      85             :     bool        cache_blob;     /* Does buf2 contain strxfrm() blob, etc? */
      86             :     bool        collate_c;
      87             :     Oid         typid;          /* Actual datatype (text/bpchar/bytea/name) */
      88             :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
      89             :     hyperLogLogState full_card; /* Full key cardinality state */
      90             :     double      prop_card;      /* Required cardinality proportion */
      91             :     pg_locale_t locale;
      92             : } VarStringSortSupport;
      93             : 
      94             : /*
      95             :  * This should be large enough that most strings will fit, but small enough
      96             :  * that we feel comfortable putting it on the stack
      97             :  */
      98             : #define TEXTBUFLEN      1024
      99             : 
     100             : #define DatumGetUnknownP(X)         ((unknown *) PG_DETOAST_DATUM(X))
     101             : #define DatumGetUnknownPCopy(X)     ((unknown *) PG_DETOAST_DATUM_COPY(X))
     102             : #define PG_GETARG_UNKNOWN_P(n)      DatumGetUnknownP(PG_GETARG_DATUM(n))
     103             : #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
     104             : #define PG_RETURN_UNKNOWN_P(x)      PG_RETURN_POINTER(x)
     105             : 
     106             : #define DatumGetVarStringP(X)       ((VarString *) PG_DETOAST_DATUM(X))
     107             : #define DatumGetVarStringPP(X)      ((VarString *) PG_DETOAST_DATUM_PACKED(X))
     108             : 
     109             : static int  varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
     110             : static int  bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
     111             : static int  namefastcmp_c(Datum x, Datum y, SortSupport ssup);
     112             : static int  varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
     113             : static int  namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
     114             : static int  varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
     115             : static int  varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
     116             : static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
     117             : static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
     118             : static int32 text_length(Datum str);
     119             : static text *text_catenate(text *t1, text *t2);
     120             : static text *text_substring(Datum str,
     121             :                             int32 start,
     122             :                             int32 length,
     123             :                             bool length_not_specified);
     124             : static text *text_overlay(text *t1, text *t2, int sp, int sl);
     125             : static int  text_position(text *t1, text *t2, Oid collid);
     126             : static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
     127             : static bool text_position_next(TextPositionState *state);
     128             : static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
     129             : static char *text_position_get_match_ptr(TextPositionState *state);
     130             : static int  text_position_get_match_pos(TextPositionState *state);
     131             : static void text_position_cleanup(TextPositionState *state);
     132             : static void check_collation_set(Oid collid);
     133             : static int  text_cmp(text *arg1, text *arg2, Oid collid);
     134             : static bytea *bytea_catenate(bytea *t1, bytea *t2);
     135             : static bytea *bytea_substring(Datum str,
     136             :                               int S,
     137             :                               int L,
     138             :                               bool length_not_specified);
     139             : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
     140             : static void appendStringInfoText(StringInfo str, const text *t);
     141             : static Datum text_to_array_internal(PG_FUNCTION_ARGS);
     142             : static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
     143             :                                     const char *fldsep, const char *null_string);
     144             : static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
     145             : static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
     146             :                                      int *value);
     147             : static const char *text_format_parse_format(const char *start_ptr,
     148             :                                             const char *end_ptr,
     149             :                                             int *argpos, int *widthpos,
     150             :                                             int *flags, int *width);
     151             : static void text_format_string_conversion(StringInfo buf, char conversion,
     152             :                                           FmgrInfo *typOutputInfo,
     153             :                                           Datum value, bool isNull,
     154             :                                           int flags, int width);
     155             : static void text_format_append_string(StringInfo buf, const char *str,
     156             :                                       int flags, int width);
     157             : 
     158             : 
     159             : /*****************************************************************************
     160             :  *   CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                          *
     161             :  *****************************************************************************/
     162             : 
     163             : /*
     164             :  * cstring_to_text
     165             :  *
     166             :  * Create a text value from a null-terminated C string.
     167             :  *
     168             :  * The new text value is freshly palloc'd with a full-size VARHDR.
     169             :  */
     170             : text *
     171     4832028 : cstring_to_text(const char *s)
     172             : {
     173     4832028 :     return cstring_to_text_with_len(s, strlen(s));
     174             : }
     175             : 
     176             : /*
     177             :  * cstring_to_text_with_len
     178             :  *
     179             :  * Same as cstring_to_text except the caller specifies the string length;
     180             :  * the string need not be null_terminated.
     181             :  */
     182             : text *
     183     8007736 : cstring_to_text_with_len(const char *s, int len)
     184             : {
     185     8007736 :     text       *result = (text *) palloc(len + VARHDRSZ);
     186             : 
     187     8007736 :     SET_VARSIZE(result, len + VARHDRSZ);
     188     8007736 :     memcpy(VARDATA(result), s, len);
     189             : 
     190     8007736 :     return result;
     191             : }
     192             : 
     193             : /*
     194             :  * text_to_cstring
     195             :  *
     196             :  * Create a palloc'd, null-terminated C string from a text value.
     197             :  *
     198             :  * We support being passed a compressed or toasted text value.
     199             :  * This is a bit bogus since such values shouldn't really be referred to as
     200             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     201             :  * case here, we'd need another routine that did, anyway.
     202             :  */
     203             : char *
     204     4100884 : text_to_cstring(const text *t)
     205             : {
     206             :     /* must cast away the const, unfortunately */
     207     4100884 :     text       *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
     208     4100884 :     int         len = VARSIZE_ANY_EXHDR(tunpacked);
     209             :     char       *result;
     210             : 
     211     4100884 :     result = (char *) palloc(len + 1);
     212     4100884 :     memcpy(result, VARDATA_ANY(tunpacked), len);
     213     4100884 :     result[len] = '\0';
     214             : 
     215     4100884 :     if (tunpacked != t)
     216       57160 :         pfree(tunpacked);
     217             : 
     218     4100884 :     return result;
     219             : }
     220             : 
     221             : /*
     222             :  * text_to_cstring_buffer
     223             :  *
     224             :  * Copy a text value into a caller-supplied buffer of size dst_len.
     225             :  *
     226             :  * The text string is truncated if necessary to fit.  The result is
     227             :  * guaranteed null-terminated (unless dst_len == 0).
     228             :  *
     229             :  * We support being passed a compressed or toasted text value.
     230             :  * This is a bit bogus since such values shouldn't really be referred to as
     231             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     232             :  * case here, we'd need another routine that did, anyway.
     233             :  */
     234             : void
     235         390 : text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
     236             : {
     237             :     /* must cast away the const, unfortunately */
     238         390 :     text       *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
     239         390 :     size_t      src_len = VARSIZE_ANY_EXHDR(srcunpacked);
     240             : 
     241         390 :     if (dst_len > 0)
     242             :     {
     243         390 :         dst_len--;
     244         390 :         if (dst_len >= src_len)
     245         390 :             dst_len = src_len;
     246             :         else                    /* ensure truncation is encoding-safe */
     247           0 :             dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
     248         390 :         memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
     249         390 :         dst[dst_len] = '\0';
     250             :     }
     251             : 
     252         390 :     if (srcunpacked != src)
     253           0 :         pfree(srcunpacked);
     254         390 : }
     255             : 
     256             : 
     257             : /*****************************************************************************
     258             :  *   USER I/O ROUTINES                                                       *
     259             :  *****************************************************************************/
     260             : 
     261             : 
     262             : #define VAL(CH)         ((CH) - '0')
     263             : #define DIG(VAL)        ((VAL) + '0')
     264             : 
     265             : /*
     266             :  *      byteain         - converts from printable representation of byte array
     267             :  *
     268             :  *      Non-printable characters must be passed as '\nnn' (octal) and are
     269             :  *      converted to internal form.  '\' must be passed as '\\'.
     270             :  *      ereport(ERROR, ...) if bad form.
     271             :  *
     272             :  *      BUGS:
     273             :  *              The input is scanned twice.
     274             :  *              The error checking of input is minimal.
     275             :  */
     276             : Datum
     277        9994 : byteain(PG_FUNCTION_ARGS)
     278             : {
     279        9994 :     char       *inputText = PG_GETARG_CSTRING(0);
     280             :     char       *tp;
     281             :     char       *rp;
     282             :     int         bc;
     283             :     bytea      *result;
     284             : 
     285             :     /* Recognize hex input */
     286        9994 :     if (inputText[0] == '\\' && inputText[1] == 'x')
     287             :     {
     288          44 :         size_t      len = strlen(inputText);
     289             : 
     290          44 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
     291          44 :         result = palloc(bc);
     292          44 :         bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
     293          36 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
     294             : 
     295          36 :         PG_RETURN_BYTEA_P(result);
     296             :     }
     297             : 
     298             :     /* Else, it's the traditional escaped style */
     299      198950 :     for (bc = 0, tp = inputText; *tp != '\0'; bc++)
     300             :     {
     301      189004 :         if (tp[0] != '\\')
     302      188362 :             tp++;
     303        1284 :         else if ((tp[0] == '\\') &&
     304        1922 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     305        1914 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     306        1276 :                  (tp[3] >= '0' && tp[3] <= '7'))
     307         638 :             tp += 4;
     308           8 :         else if ((tp[0] == '\\') &&
     309           4 :                  (tp[1] == '\\'))
     310           0 :             tp += 2;
     311             :         else
     312             :         {
     313             :             /*
     314             :              * one backslash, not followed by another or ### valid octal
     315             :              */
     316           4 :             ereport(ERROR,
     317             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     318             :                      errmsg("invalid input syntax for type %s", "bytea")));
     319             :         }
     320             :     }
     321             : 
     322        9946 :     bc += VARHDRSZ;
     323             : 
     324        9946 :     result = (bytea *) palloc(bc);
     325        9946 :     SET_VARSIZE(result, bc);
     326             : 
     327        9946 :     tp = inputText;
     328        9946 :     rp = VARDATA(result);
     329      208884 :     while (*tp != '\0')
     330             :     {
     331      188992 :         if (tp[0] != '\\')
     332      188354 :             *rp++ = *tp++;
     333        1276 :         else if ((tp[0] == '\\') &&
     334        1914 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     335        1914 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     336        1276 :                  (tp[3] >= '0' && tp[3] <= '7'))
     337             :         {
     338         638 :             bc = VAL(tp[1]);
     339         638 :             bc <<= 3;
     340         638 :             bc += VAL(tp[2]);
     341         638 :             bc <<= 3;
     342         638 :             *rp++ = bc + VAL(tp[3]);
     343             : 
     344         638 :             tp += 4;
     345             :         }
     346           0 :         else if ((tp[0] == '\\') &&
     347           0 :                  (tp[1] == '\\'))
     348             :         {
     349           0 :             *rp++ = '\\';
     350           0 :             tp += 2;
     351             :         }
     352             :         else
     353             :         {
     354             :             /*
     355             :              * We should never get here. The first pass should not allow it.
     356             :              */
     357           0 :             ereport(ERROR,
     358             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     359             :                      errmsg("invalid input syntax for type %s", "bytea")));
     360             :         }
     361             :     }
     362             : 
     363        9946 :     PG_RETURN_BYTEA_P(result);
     364             : }
     365             : 
     366             : /*
     367             :  *      byteaout        - converts to printable representation of byte array
     368             :  *
     369             :  *      In the traditional escaped format, non-printable characters are
     370             :  *      printed as '\nnn' (octal) and '\' as '\\'.
     371             :  */
     372             : Datum
     373        4864 : byteaout(PG_FUNCTION_ARGS)
     374             : {
     375        4864 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
     376             :     char       *result;
     377             :     char       *rp;
     378             : 
     379        4864 :     if (bytea_output == BYTEA_OUTPUT_HEX)
     380             :     {
     381             :         /* Print hex format */
     382        4658 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
     383        4658 :         *rp++ = '\\';
     384        4658 :         *rp++ = 'x';
     385        4658 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
     386             :     }
     387         206 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
     388             :     {
     389             :         /* Print traditional escaped format */
     390             :         char       *vp;
     391             :         int         len;
     392             :         int         i;
     393             : 
     394         206 :         len = 1;                /* empty string has 1 char */
     395         206 :         vp = VARDATA_ANY(vlena);
     396        2060 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     397             :         {
     398        1854 :             if (*vp == '\\')
     399           0 :                 len += 2;
     400        1854 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     401         328 :                 len += 4;
     402             :             else
     403        1526 :                 len++;
     404             :         }
     405         206 :         rp = result = (char *) palloc(len);
     406         206 :         vp = VARDATA_ANY(vlena);
     407        2060 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     408             :         {
     409        1854 :             if (*vp == '\\')
     410             :             {
     411           0 :                 *rp++ = '\\';
     412           0 :                 *rp++ = '\\';
     413             :             }
     414        1854 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     415         328 :             {
     416             :                 int         val;    /* holds unprintable chars */
     417             : 
     418         328 :                 val = *vp;
     419         328 :                 rp[0] = '\\';
     420         328 :                 rp[3] = DIG(val & 07);
     421         328 :                 val >>= 3;
     422         328 :                 rp[2] = DIG(val & 07);
     423         328 :                 val >>= 3;
     424         328 :                 rp[1] = DIG(val & 03);
     425         328 :                 rp += 4;
     426             :             }
     427             :             else
     428        1526 :                 *rp++ = *vp;
     429             :         }
     430             :     }
     431             :     else
     432             :     {
     433           0 :         elog(ERROR, "unrecognized bytea_output setting: %d",
     434             :              bytea_output);
     435             :         rp = result = NULL;     /* keep compiler quiet */
     436             :     }
     437        4864 :     *rp = '\0';
     438        4864 :     PG_RETURN_CSTRING(result);
     439             : }
     440             : 
     441             : /*
     442             :  *      bytearecv           - converts external binary format to bytea
     443             :  */
     444             : Datum
     445         696 : bytearecv(PG_FUNCTION_ARGS)
     446             : {
     447         696 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     448             :     bytea      *result;
     449             :     int         nbytes;
     450             : 
     451         696 :     nbytes = buf->len - buf->cursor;
     452         696 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
     453         696 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
     454         696 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
     455         696 :     PG_RETURN_BYTEA_P(result);
     456             : }
     457             : 
     458             : /*
     459             :  *      byteasend           - converts bytea to binary format
     460             :  *
     461             :  * This is a special case: just copy the input...
     462             :  */
     463             : Datum
     464        3760 : byteasend(PG_FUNCTION_ARGS)
     465             : {
     466        3760 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
     467             : 
     468        3760 :     PG_RETURN_BYTEA_P(vlena);
     469             : }
     470             : 
     471             : Datum
     472          28 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
     473             : {
     474             :     StringInfo  state;
     475             : 
     476          28 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     477             : 
     478             :     /* Append the value unless null. */
     479          28 :     if (!PG_ARGISNULL(1))
     480             :     {
     481          28 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
     482             : 
     483             :         /* On the first time through, we ignore the delimiter. */
     484          28 :         if (state == NULL)
     485          16 :             state = makeStringAggState(fcinfo);
     486          12 :         else if (!PG_ARGISNULL(2))
     487             :         {
     488           8 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
     489             : 
     490           8 :             appendBinaryStringInfo(state, VARDATA_ANY(delim), VARSIZE_ANY_EXHDR(delim));
     491             :         }
     492             : 
     493          28 :         appendBinaryStringInfo(state, VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value));
     494             :     }
     495             : 
     496             :     /*
     497             :      * The transition type for string_agg() is declared to be "internal",
     498             :      * which is a pass-by-value type the same size as a pointer.
     499             :      */
     500          28 :     PG_RETURN_POINTER(state);
     501             : }
     502             : 
     503             : Datum
     504          20 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
     505             : {
     506             :     StringInfo  state;
     507             : 
     508             :     /* cannot be called directly because of internal-type argument */
     509             :     Assert(AggCheckCallContext(fcinfo, NULL));
     510             : 
     511          20 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     512             : 
     513          20 :     if (state != NULL)
     514             :     {
     515             :         bytea      *result;
     516             : 
     517          16 :         result = (bytea *) palloc(state->len + VARHDRSZ);
     518          16 :         SET_VARSIZE(result, state->len + VARHDRSZ);
     519          16 :         memcpy(VARDATA(result), state->data, state->len);
     520          16 :         PG_RETURN_BYTEA_P(result);
     521             :     }
     522             :     else
     523           4 :         PG_RETURN_NULL();
     524             : }
     525             : 
     526             : /*
     527             :  *      textin          - converts "..." to internal representation
     528             :  */
     529             : Datum
     530     3871780 : textin(PG_FUNCTION_ARGS)
     531             : {
     532     3871780 :     char       *inputText = PG_GETARG_CSTRING(0);
     533             : 
     534     3871780 :     PG_RETURN_TEXT_P(cstring_to_text(inputText));
     535             : }
     536             : 
     537             : /*
     538             :  *      textout         - converts internal representation to "..."
     539             :  */
     540             : Datum
     541     1725570 : textout(PG_FUNCTION_ARGS)
     542             : {
     543     1725570 :     Datum       txt = PG_GETARG_DATUM(0);
     544             : 
     545     1725570 :     PG_RETURN_CSTRING(TextDatumGetCString(txt));
     546             : }
     547             : 
     548             : /*
     549             :  *      textrecv            - converts external binary format to text
     550             :  */
     551             : Datum
     552          12 : textrecv(PG_FUNCTION_ARGS)
     553             : {
     554          12 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     555             :     text       *result;
     556             :     char       *str;
     557             :     int         nbytes;
     558             : 
     559          12 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     560             : 
     561          12 :     result = cstring_to_text_with_len(str, nbytes);
     562          12 :     pfree(str);
     563          12 :     PG_RETURN_TEXT_P(result);
     564             : }
     565             : 
     566             : /*
     567             :  *      textsend            - converts text to binary format
     568             :  */
     569             : Datum
     570        3324 : textsend(PG_FUNCTION_ARGS)
     571             : {
     572        3324 :     text       *t = PG_GETARG_TEXT_PP(0);
     573             :     StringInfoData buf;
     574             : 
     575        3324 :     pq_begintypsend(&buf);
     576        3324 :     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
     577        3324 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     578             : }
     579             : 
     580             : 
     581             : /*
     582             :  *      unknownin           - converts "..." to internal representation
     583             :  */
     584             : Datum
     585           0 : unknownin(PG_FUNCTION_ARGS)
     586             : {
     587           0 :     char       *str = PG_GETARG_CSTRING(0);
     588             : 
     589             :     /* representation is same as cstring */
     590           0 :     PG_RETURN_CSTRING(pstrdup(str));
     591             : }
     592             : 
     593             : /*
     594             :  *      unknownout          - converts internal representation to "..."
     595             :  */
     596             : Datum
     597         318 : unknownout(PG_FUNCTION_ARGS)
     598             : {
     599             :     /* representation is same as cstring */
     600         318 :     char       *str = PG_GETARG_CSTRING(0);
     601             : 
     602         318 :     PG_RETURN_CSTRING(pstrdup(str));
     603             : }
     604             : 
     605             : /*
     606             :  *      unknownrecv         - converts external binary format to unknown
     607             :  */
     608             : Datum
     609           0 : unknownrecv(PG_FUNCTION_ARGS)
     610             : {
     611           0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     612             :     char       *str;
     613             :     int         nbytes;
     614             : 
     615           0 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     616             :     /* representation is same as cstring */
     617           0 :     PG_RETURN_CSTRING(str);
     618             : }
     619             : 
     620             : /*
     621             :  *      unknownsend         - converts unknown to binary format
     622             :  */
     623             : Datum
     624           0 : unknownsend(PG_FUNCTION_ARGS)
     625             : {
     626             :     /* representation is same as cstring */
     627           0 :     char       *str = PG_GETARG_CSTRING(0);
     628             :     StringInfoData buf;
     629             : 
     630           0 :     pq_begintypsend(&buf);
     631           0 :     pq_sendtext(&buf, str, strlen(str));
     632           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     633             : }
     634             : 
     635             : 
     636             : /* ========== PUBLIC ROUTINES ========== */
     637             : 
     638             : /*
     639             :  * textlen -
     640             :  *    returns the logical length of a text*
     641             :  *     (which is less than the VARSIZE of the text*)
     642             :  */
     643             : Datum
     644      202544 : textlen(PG_FUNCTION_ARGS)
     645             : {
     646      202544 :     Datum       str = PG_GETARG_DATUM(0);
     647             : 
     648             :     /* try to avoid decompressing argument */
     649      202544 :     PG_RETURN_INT32(text_length(str));
     650             : }
     651             : 
     652             : /*
     653             :  * text_length -
     654             :  *  Does the real work for textlen()
     655             :  *
     656             :  *  This is broken out so it can be called directly by other string processing
     657             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     658             :  *  it may still be in compressed form.  We can avoid decompressing it at all
     659             :  *  in some cases.
     660             :  */
     661             : static int32
     662      202552 : text_length(Datum str)
     663             : {
     664             :     /* fastpath when max encoding length is one */
     665      202552 :     if (pg_database_encoding_max_length() == 1)
     666          24 :         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     667             :     else
     668             :     {
     669      202528 :         text       *t = DatumGetTextPP(str);
     670             : 
     671      202528 :         PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
     672             :                                              VARSIZE_ANY_EXHDR(t)));
     673             :     }
     674             : }
     675             : 
     676             : /*
     677             :  * textoctetlen -
     678             :  *    returns the physical length of a text*
     679             :  *     (which is less than the VARSIZE of the text*)
     680             :  */
     681             : Datum
     682          46 : textoctetlen(PG_FUNCTION_ARGS)
     683             : {
     684          46 :     Datum       str = PG_GETARG_DATUM(0);
     685             : 
     686             :     /* We need not detoast the input at all */
     687          46 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     688             : }
     689             : 
     690             : /*
     691             :  * textcat -
     692             :  *    takes two text* and returns a text* that is the concatenation of
     693             :  *    the two.
     694             :  *
     695             :  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
     696             :  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
     697             :  * Allocate space for output in all cases.
     698             :  * XXX - thomas 1997-07-10
     699             :  */
     700             : Datum
     701     1011968 : textcat(PG_FUNCTION_ARGS)
     702             : {
     703     1011968 :     text       *t1 = PG_GETARG_TEXT_PP(0);
     704     1011968 :     text       *t2 = PG_GETARG_TEXT_PP(1);
     705             : 
     706     1011968 :     PG_RETURN_TEXT_P(text_catenate(t1, t2));
     707             : }
     708             : 
     709             : /*
     710             :  * text_catenate
     711             :  *  Guts of textcat(), broken out so it can be used by other functions
     712             :  *
     713             :  * Arguments can be in short-header form, but not compressed or out-of-line
     714             :  */
     715             : static text *
     716     1012032 : text_catenate(text *t1, text *t2)
     717             : {
     718             :     text       *result;
     719             :     int         len1,
     720             :                 len2,
     721             :                 len;
     722             :     char       *ptr;
     723             : 
     724     1012032 :     len1 = VARSIZE_ANY_EXHDR(t1);
     725     1012032 :     len2 = VARSIZE_ANY_EXHDR(t2);
     726             : 
     727             :     /* paranoia ... probably should throw error instead? */
     728     1012032 :     if (len1 < 0)
     729           0 :         len1 = 0;
     730     1012032 :     if (len2 < 0)
     731           0 :         len2 = 0;
     732             : 
     733     1012032 :     len = len1 + len2 + VARHDRSZ;
     734     1012032 :     result = (text *) palloc(len);
     735             : 
     736             :     /* Set size of result string... */
     737     1012032 :     SET_VARSIZE(result, len);
     738             : 
     739             :     /* Fill data field of result string... */
     740     1012032 :     ptr = VARDATA(result);
     741     1012032 :     if (len1 > 0)
     742     1011520 :         memcpy(ptr, VARDATA_ANY(t1), len1);
     743     1012032 :     if (len2 > 0)
     744     1011928 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
     745             : 
     746     1012032 :     return result;
     747             : }
     748             : 
     749             : /*
     750             :  * charlen_to_bytelen()
     751             :  *  Compute the number of bytes occupied by n characters starting at *p
     752             :  *
     753             :  * It is caller's responsibility that there actually are n characters;
     754             :  * the string need not be null-terminated.
     755             :  */
     756             : static int
     757        1162 : charlen_to_bytelen(const char *p, int n)
     758             : {
     759        1162 :     if (pg_database_encoding_max_length() == 1)
     760             :     {
     761             :         /* Optimization for single-byte encodings */
     762           0 :         return n;
     763             :     }
     764             :     else
     765             :     {
     766             :         const char *s;
     767             : 
     768     5669088 :         for (s = p; n > 0; n--)
     769     5667926 :             s += pg_mblen(s);
     770             : 
     771        1162 :         return s - p;
     772             :     }
     773             : }
     774             : 
     775             : /*
     776             :  * text_substr()
     777             :  * Return a substring starting at the specified position.
     778             :  * - thomas 1997-12-31
     779             :  *
     780             :  * Input:
     781             :  *  - string
     782             :  *  - starting position (is one-based)
     783             :  *  - string length
     784             :  *
     785             :  * If the starting position is zero or less, then return from the start of the string
     786             :  *  adjusting the length to be consistent with the "negative start" per SQL.
     787             :  * If the length is less than zero, return the remaining string.
     788             :  *
     789             :  * Added multibyte support.
     790             :  * - Tatsuo Ishii 1998-4-21
     791             :  * Changed behavior if starting position is less than one to conform to SQL behavior.
     792             :  * Formerly returned the entire string; now returns a portion.
     793             :  * - Thomas Lockhart 1998-12-10
     794             :  * Now uses faster TOAST-slicing interface
     795             :  * - John Gray 2002-02-22
     796             :  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
     797             :  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
     798             :  * error; if E < 1, return '', not entire string). Fixed MB related bug when
     799             :  * S > LC and < LC + 4 sometimes garbage characters are returned.
     800             :  * - Joe Conway 2002-08-10
     801             :  */
     802             : Datum
     803       69242 : text_substr(PG_FUNCTION_ARGS)
     804             : {
     805       69242 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     806             :                                     PG_GETARG_INT32(1),
     807             :                                     PG_GETARG_INT32(2),
     808             :                                     false));
     809             : }
     810             : 
     811             : /*
     812             :  * text_substr_no_len -
     813             :  *    Wrapper to avoid opr_sanity failure due to
     814             :  *    one function accepting a different number of args.
     815             :  */
     816             : Datum
     817          26 : text_substr_no_len(PG_FUNCTION_ARGS)
     818             : {
     819          26 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     820             :                                     PG_GETARG_INT32(1),
     821             :                                     -1, true));
     822             : }
     823             : 
     824             : /*
     825             :  * text_substring -
     826             :  *  Does the real work for text_substr() and text_substr_no_len()
     827             :  *
     828             :  *  This is broken out so it can be called directly by other string processing
     829             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     830             :  *  it may still be in compressed/toasted form.  We can avoid detoasting all
     831             :  *  of it in some cases.
     832             :  *
     833             :  *  The result is always a freshly palloc'd datum.
     834             :  */
     835             : static text *
     836       94488 : text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
     837             : {
     838       94488 :     int32       eml = pg_database_encoding_max_length();
     839       94488 :     int32       S = start;      /* start position */
     840             :     int32       S1;             /* adjusted start position */
     841             :     int32       L1;             /* adjusted substring length */
     842             : 
     843             :     /* life is easy if the encoding max length is 1 */
     844       94488 :     if (eml == 1)
     845             :     {
     846          12 :         S1 = Max(S, 1);
     847             : 
     848          12 :         if (length_not_specified)   /* special case - get length to end of
     849             :                                      * string */
     850           0 :             L1 = -1;
     851             :         else
     852             :         {
     853             :             /* end position */
     854          12 :             int         E = S + length;
     855             : 
     856             :             /*
     857             :              * A negative value for L is the only way for the end position to
     858             :              * be before the start. SQL99 says to throw an error.
     859             :              */
     860          12 :             if (E < S)
     861           0 :                 ereport(ERROR,
     862             :                         (errcode(ERRCODE_SUBSTRING_ERROR),
     863             :                          errmsg("negative substring length not allowed")));
     864             : 
     865             :             /*
     866             :              * A zero or negative value for the end position can happen if the
     867             :              * start was negative or one. SQL99 says to return a zero-length
     868             :              * string.
     869             :              */
     870          12 :             if (E < 1)
     871           0 :                 return cstring_to_text("");
     872             : 
     873          12 :             L1 = E - S1;
     874             :         }
     875             : 
     876             :         /*
     877             :          * If the start position is past the end of the string, SQL99 says to
     878             :          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
     879             :          * that for us. Convert to zero-based starting position
     880             :          */
     881          12 :         return DatumGetTextPSlice(str, S1 - 1, L1);
     882             :     }
     883       94476 :     else if (eml > 1)
     884             :     {
     885             :         /*
     886             :          * When encoding max length is > 1, we can't get LC without
     887             :          * detoasting, so we'll grab a conservatively large slice now and go
     888             :          * back later to do the right thing
     889             :          */
     890             :         int32       slice_start;
     891             :         int32       slice_size;
     892             :         int32       slice_strlen;
     893             :         text       *slice;
     894             :         int32       E1;
     895             :         int32       i;
     896             :         char       *p;
     897             :         char       *s;
     898             :         text       *ret;
     899             : 
     900             :         /*
     901             :          * if S is past the end of the string, the tuple toaster will return a
     902             :          * zero-length string to us
     903             :          */
     904       94476 :         S1 = Max(S, 1);
     905             : 
     906             :         /*
     907             :          * We need to start at position zero because there is no way to know
     908             :          * in advance which byte offset corresponds to the supplied start
     909             :          * position.
     910             :          */
     911       94476 :         slice_start = 0;
     912             : 
     913       94476 :         if (length_not_specified)   /* special case - get length to end of
     914             :                                      * string */
     915          58 :             slice_size = L1 = -1;
     916             :         else
     917             :         {
     918       94418 :             int         E = S + length;
     919             : 
     920             :             /*
     921             :              * A negative value for L is the only way for the end position to
     922             :              * be before the start. SQL99 says to throw an error.
     923             :              */
     924       94418 :             if (E < S)
     925           4 :                 ereport(ERROR,
     926             :                         (errcode(ERRCODE_SUBSTRING_ERROR),
     927             :                          errmsg("negative substring length not allowed")));
     928             : 
     929             :             /*
     930             :              * A zero or negative value for the end position can happen if the
     931             :              * start was negative or one. SQL99 says to return a zero-length
     932             :              * string.
     933             :              */
     934       94414 :             if (E < 1)
     935           0 :                 return cstring_to_text("");
     936             : 
     937             :             /*
     938             :              * if E is past the end of the string, the tuple toaster will
     939             :              * truncate the length for us
     940             :              */
     941       94414 :             L1 = E - S1;
     942             : 
     943             :             /*
     944             :              * Total slice size in bytes can't be any longer than the start
     945             :              * position plus substring length times the encoding max length.
     946             :              */
     947       94414 :             slice_size = (S1 + L1) * eml;
     948             :         }
     949             : 
     950             :         /*
     951             :          * If we're working with an untoasted source, no need to do an extra
     952             :          * copying step.
     953             :          */
     954      188920 :         if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
     955       94448 :             VARATT_IS_EXTERNAL(DatumGetPointer(str)))
     956          68 :             slice = DatumGetTextPSlice(str, slice_start, slice_size);
     957             :         else
     958       94404 :             slice = (text *) DatumGetPointer(str);
     959             : 
     960             :         /* see if we got back an empty string */
     961       94472 :         if (VARSIZE_ANY_EXHDR(slice) == 0)
     962             :         {
     963           0 :             if (slice != (text *) DatumGetPointer(str))
     964           0 :                 pfree(slice);
     965           0 :             return cstring_to_text("");
     966             :         }
     967             : 
     968             :         /* Now we can get the actual length of the slice in MB characters */
     969      283416 :         slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
     970      283416 :                                             VARSIZE_ANY_EXHDR(slice));
     971             : 
     972             :         /*
     973             :          * Check that the start position wasn't > slice_strlen. If so, SQL99
     974             :          * says to return a zero-length string.
     975             :          */
     976       94472 :         if (S1 > slice_strlen)
     977             :         {
     978          20 :             if (slice != (text *) DatumGetPointer(str))
     979           0 :                 pfree(slice);
     980          20 :             return cstring_to_text("");
     981             :         }
     982             : 
     983             :         /*
     984             :          * Adjust L1 and E1 now that we know the slice string length. Again
     985             :          * remember that S1 is one based, and slice_start is zero based.
     986             :          */
     987       94452 :         if (L1 > -1)
     988       94414 :             E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
     989             :         else
     990          38 :             E1 = slice_start + 1 + slice_strlen;
     991             : 
     992             :         /*
     993             :          * Find the start position in the slice; remember S1 is not zero based
     994             :          */
     995       94452 :         p = VARDATA_ANY(slice);
     996     3294458 :         for (i = 0; i < S1 - 1; i++)
     997     3200006 :             p += pg_mblen(p);
     998             : 
     999             :         /* hang onto a pointer to our start position */
    1000       94452 :         s = p;
    1001             : 
    1002             :         /*
    1003             :          * Count the actual bytes used by the substring of the requested
    1004             :          * length.
    1005             :          */
    1006     1641434 :         for (i = S1; i < E1; i++)
    1007     1546982 :             p += pg_mblen(p);
    1008             : 
    1009       94452 :         ret = (text *) palloc(VARHDRSZ + (p - s));
    1010       94452 :         SET_VARSIZE(ret, VARHDRSZ + (p - s));
    1011       94452 :         memcpy(VARDATA(ret), s, (p - s));
    1012             : 
    1013       94452 :         if (slice != (text *) DatumGetPointer(str))
    1014          68 :             pfree(slice);
    1015             : 
    1016       94452 :         return ret;
    1017             :     }
    1018             :     else
    1019           0 :         elog(ERROR, "invalid backend encoding: encoding max length < 1");
    1020             : 
    1021             :     /* not reached: suppress compiler warning */
    1022             :     return NULL;
    1023             : }
    1024             : 
    1025             : /*
    1026             :  * textoverlay
    1027             :  *  Replace specified substring of first string with second
    1028             :  *
    1029             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    1030             :  * This code is a direct implementation of what the standard says.
    1031             :  */
    1032             : Datum
    1033          24 : textoverlay(PG_FUNCTION_ARGS)
    1034             : {
    1035          24 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1036          24 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1037          24 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1038          24 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    1039             : 
    1040          24 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1041             : }
    1042             : 
    1043             : Datum
    1044           8 : textoverlay_no_len(PG_FUNCTION_ARGS)
    1045             : {
    1046           8 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1047           8 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1048           8 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1049             :     int         sl;
    1050             : 
    1051           8 :     sl = text_length(PointerGetDatum(t2));  /* defaults to length(t2) */
    1052           8 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1053             : }
    1054             : 
    1055             : static text *
    1056          32 : text_overlay(text *t1, text *t2, int sp, int sl)
    1057             : {
    1058             :     text       *result;
    1059             :     text       *s1;
    1060             :     text       *s2;
    1061             :     int         sp_pl_sl;
    1062             : 
    1063             :     /*
    1064             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    1065             :      * "substring length" error because that's what should be expected
    1066             :      * according to the spec's definition of OVERLAY().
    1067             :      */
    1068          32 :     if (sp <= 0)
    1069           0 :         ereport(ERROR,
    1070             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    1071             :                  errmsg("negative substring length not allowed")));
    1072          32 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    1073           0 :         ereport(ERROR,
    1074             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1075             :                  errmsg("integer out of range")));
    1076             : 
    1077          32 :     s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
    1078          32 :     s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    1079          32 :     result = text_catenate(s1, t2);
    1080          32 :     result = text_catenate(result, s2);
    1081             : 
    1082          32 :     return result;
    1083             : }
    1084             : 
    1085             : /*
    1086             :  * textpos -
    1087             :  *    Return the position of the specified substring.
    1088             :  *    Implements the SQL POSITION() function.
    1089             :  *    Ref: A Guide To The SQL Standard, Date & Darwen, 1997
    1090             :  * - thomas 1997-07-27
    1091             :  */
    1092             : Datum
    1093          80 : textpos(PG_FUNCTION_ARGS)
    1094             : {
    1095          80 :     text       *str = PG_GETARG_TEXT_PP(0);
    1096          80 :     text       *search_str = PG_GETARG_TEXT_PP(1);
    1097             : 
    1098          80 :     PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
    1099             : }
    1100             : 
    1101             : /*
    1102             :  * text_position -
    1103             :  *  Does the real work for textpos()
    1104             :  *
    1105             :  * Inputs:
    1106             :  *      t1 - string to be searched
    1107             :  *      t2 - pattern to match within t1
    1108             :  * Result:
    1109             :  *      Character index of the first matched char, starting from 1,
    1110             :  *      or 0 if no match.
    1111             :  *
    1112             :  *  This is broken out so it can be called directly by other string processing
    1113             :  *  functions.
    1114             :  */
    1115             : static int
    1116          80 : text_position(text *t1, text *t2, Oid collid)
    1117             : {
    1118             :     TextPositionState state;
    1119             :     int         result;
    1120             : 
    1121          80 :     if (VARSIZE_ANY_EXHDR(t1) < 1 || VARSIZE_ANY_EXHDR(t2) < 1)
    1122          16 :         return 0;
    1123             : 
    1124          64 :     text_position_setup(t1, t2, collid, &state);
    1125          64 :     if (!text_position_next(&state))
    1126          22 :         result = 0;
    1127             :     else
    1128          42 :         result = text_position_get_match_pos(&state);
    1129          64 :     text_position_cleanup(&state);
    1130          64 :     return result;
    1131             : }
    1132             : 
    1133             : 
    1134             : /*
    1135             :  * text_position_setup, text_position_next, text_position_cleanup -
    1136             :  *  Component steps of text_position()
    1137             :  *
    1138             :  * These are broken out so that a string can be efficiently searched for
    1139             :  * multiple occurrences of the same pattern.  text_position_next may be
    1140             :  * called multiple times, and it advances to the next match on each call.
    1141             :  * text_position_get_match_ptr() and text_position_get_match_pos() return
    1142             :  * a pointer or 1-based character position of the last match, respectively.
    1143             :  *
    1144             :  * The "state" variable is normally just a local variable in the caller.
    1145             :  *
    1146             :  * NOTE: text_position_next skips over the matched portion.  For example,
    1147             :  * searching for "xx" in "xxx" returns only one match, not two.
    1148             :  */
    1149             : 
    1150             : static void
    1151        1086 : text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
    1152             : {
    1153        1086 :     int         len1 = VARSIZE_ANY_EXHDR(t1);
    1154        1086 :     int         len2 = VARSIZE_ANY_EXHDR(t2);
    1155        1086 :     pg_locale_t mylocale = 0;
    1156             : 
    1157        1086 :     check_collation_set(collid);
    1158             : 
    1159        1086 :     if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
    1160           0 :         mylocale = pg_newlocale_from_collation(collid);
    1161             : 
    1162        1086 :     if (mylocale && !mylocale->deterministic)
    1163           0 :         ereport(ERROR,
    1164             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1165             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1166             : 
    1167             :     Assert(len1 > 0);
    1168             :     Assert(len2 > 0);
    1169             : 
    1170             :     /*
    1171             :      * Even with a multi-byte encoding, we perform the search using the raw
    1172             :      * byte sequence, ignoring multibyte issues.  For UTF-8, that works fine,
    1173             :      * because in UTF-8 the byte sequence of one character cannot contain
    1174             :      * another character.  For other multi-byte encodings, we do the search
    1175             :      * initially as a simple byte search, ignoring multibyte issues, but
    1176             :      * verify afterwards that the match we found is at a character boundary,
    1177             :      * and continue the search if it was a false match.
    1178             :      */
    1179        1086 :     if (pg_database_encoding_max_length() == 1)
    1180             :     {
    1181          24 :         state->is_multibyte = false;
    1182          24 :         state->is_multibyte_char_in_char = false;
    1183             :     }
    1184        1062 :     else if (GetDatabaseEncoding() == PG_UTF8)
    1185             :     {
    1186        1062 :         state->is_multibyte = true;
    1187        1062 :         state->is_multibyte_char_in_char = false;
    1188             :     }
    1189             :     else
    1190             :     {
    1191           0 :         state->is_multibyte = true;
    1192           0 :         state->is_multibyte_char_in_char = true;
    1193             :     }
    1194             : 
    1195        1086 :     state->str1 = VARDATA_ANY(t1);
    1196        1086 :     state->str2 = VARDATA_ANY(t2);
    1197        1086 :     state->len1 = len1;
    1198        1086 :     state->len2 = len2;
    1199        1086 :     state->last_match = NULL;
    1200        1086 :     state->refpoint = state->str1;
    1201        1086 :     state->refpos = 0;
    1202             : 
    1203             :     /*
    1204             :      * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
    1205             :      * notes we use the terminology that the "haystack" is the string to be
    1206             :      * searched (t1) and the "needle" is the pattern being sought (t2).
    1207             :      *
    1208             :      * If the needle is empty or bigger than the haystack then there is no
    1209             :      * point in wasting cycles initializing the table.  We also choose not to
    1210             :      * use B-M-H for needles of length 1, since the skip table can't possibly
    1211             :      * save anything in that case.
    1212             :      */
    1213        1086 :     if (len1 >= len2 && len2 > 1)
    1214             :     {
    1215         984 :         int         searchlength = len1 - len2;
    1216             :         int         skiptablemask;
    1217             :         int         last;
    1218             :         int         i;
    1219         984 :         const char *str2 = state->str2;
    1220             : 
    1221             :         /*
    1222             :          * First we must determine how much of the skip table to use.  The
    1223             :          * declaration of TextPositionState allows up to 256 elements, but for
    1224             :          * short search problems we don't really want to have to initialize so
    1225             :          * many elements --- it would take too long in comparison to the
    1226             :          * actual search time.  So we choose a useful skip table size based on
    1227             :          * the haystack length minus the needle length.  The closer the needle
    1228             :          * length is to the haystack length the less useful skipping becomes.
    1229             :          *
    1230             :          * Note: since we use bit-masking to select table elements, the skip
    1231             :          * table size MUST be a power of 2, and so the mask must be 2^N-1.
    1232             :          */
    1233         984 :         if (searchlength < 16)
    1234          36 :             skiptablemask = 3;
    1235         948 :         else if (searchlength < 64)
    1236           4 :             skiptablemask = 7;
    1237         944 :         else if (searchlength < 128)
    1238           2 :             skiptablemask = 15;
    1239         942 :         else if (searchlength < 512)
    1240         784 :             skiptablemask = 31;
    1241         158 :         else if (searchlength < 2048)
    1242          88 :             skiptablemask = 63;
    1243          70 :         else if (searchlength < 4096)
    1244          16 :             skiptablemask = 127;
    1245             :         else
    1246          54 :             skiptablemask = 255;
    1247         984 :         state->skiptablemask = skiptablemask;
    1248             : 
    1249             :         /*
    1250             :          * Initialize the skip table.  We set all elements to the needle
    1251             :          * length, since this is the correct skip distance for any character
    1252             :          * not found in the needle.
    1253             :          */
    1254       47784 :         for (i = 0; i <= skiptablemask; i++)
    1255       46800 :             state->skiptable[i] = len2;
    1256             : 
    1257             :         /*
    1258             :          * Now examine the needle.  For each character except the last one,
    1259             :          * set the corresponding table element to the appropriate skip
    1260             :          * distance.  Note that when two characters share the same skip table
    1261             :          * entry, the one later in the needle must determine the skip
    1262             :          * distance.
    1263             :          */
    1264         984 :         last = len2 - 1;
    1265             : 
    1266       12808 :         for (i = 0; i < last; i++)
    1267       11824 :             state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
    1268             :     }
    1269        1086 : }
    1270             : 
    1271             : /*
    1272             :  * Advance to the next match, starting from the end of the previous match
    1273             :  * (or the beginning of the string, on first call).  Returns true if a match
    1274             :  * is found.
    1275             :  */
    1276             : static bool
    1277        4106 : text_position_next(TextPositionState *state)
    1278             : {
    1279        4106 :     int         needle_len = state->len2;
    1280             :     char       *start_ptr;
    1281             :     char       *matchptr;
    1282             : 
    1283        4106 :     if (needle_len <= 0)
    1284           0 :         return false;           /* result for empty pattern */
    1285             : 
    1286             :     /* Start from the point right after the previous match. */
    1287        4106 :     if (state->last_match)
    1288        3020 :         start_ptr = state->last_match + needle_len;
    1289             :     else
    1290        1086 :         start_ptr = state->str1;
    1291             : 
    1292             : retry:
    1293        4106 :     matchptr = text_position_next_internal(start_ptr, state);
    1294             : 
    1295        4106 :     if (!matchptr)
    1296        1036 :         return false;
    1297             : 
    1298             :     /*
    1299             :      * Found a match for the byte sequence.  If this is a multibyte encoding,
    1300             :      * where one character's byte sequence can appear inside a longer
    1301             :      * multi-byte character, we need to verify that the match was at a
    1302             :      * character boundary, not in the middle of a multi-byte character.
    1303             :      */
    1304        3070 :     if (state->is_multibyte_char_in_char)
    1305             :     {
    1306             :         /* Walk one character at a time, until we reach the match. */
    1307             : 
    1308             :         /* the search should never move backwards. */
    1309             :         Assert(state->refpoint <= matchptr);
    1310             : 
    1311           0 :         while (state->refpoint < matchptr)
    1312             :         {
    1313             :             /* step to next character. */
    1314           0 :             state->refpoint += pg_mblen(state->refpoint);
    1315           0 :             state->refpos++;
    1316             : 
    1317             :             /*
    1318             :              * If we stepped over the match's start position, then it was a
    1319             :              * false positive, where the byte sequence appeared in the middle
    1320             :              * of a multi-byte character.  Skip it, and continue the search at
    1321             :              * the next character boundary.
    1322             :              */
    1323           0 :             if (state->refpoint > matchptr)
    1324             :             {
    1325           0 :                 start_ptr = state->refpoint;
    1326           0 :                 goto retry;
    1327             :             }
    1328             :         }
    1329             :     }
    1330             : 
    1331        3070 :     state->last_match = matchptr;
    1332        3070 :     return true;
    1333             : }
    1334             : 
    1335             : /*
    1336             :  * Subroutine of text_position_next().  This searches for the raw byte
    1337             :  * sequence, ignoring any multi-byte encoding issues.  Returns the first
    1338             :  * match starting at 'start_ptr', or NULL if no match is found.
    1339             :  */
    1340             : static char *
    1341        4106 : text_position_next_internal(char *start_ptr, TextPositionState *state)
    1342             : {
    1343        4106 :     int         haystack_len = state->len1;
    1344        4106 :     int         needle_len = state->len2;
    1345        4106 :     int         skiptablemask = state->skiptablemask;
    1346        4106 :     const char *haystack = state->str1;
    1347        4106 :     const char *needle = state->str2;
    1348        4106 :     const char *haystack_end = &haystack[haystack_len];
    1349             :     const char *hptr;
    1350             : 
    1351             :     Assert(start_ptr >= haystack && start_ptr <= haystack_end);
    1352             : 
    1353        4106 :     if (needle_len == 1)
    1354             :     {
    1355             :         /* No point in using B-M-H for a one-character needle */
    1356         338 :         char        nchar = *needle;
    1357             : 
    1358         338 :         hptr = start_ptr;
    1359        3456 :         while (hptr < haystack_end)
    1360             :         {
    1361        3050 :             if (*hptr == nchar)
    1362         270 :                 return (char *) hptr;
    1363        2780 :             hptr++;
    1364             :         }
    1365             :     }
    1366             :     else
    1367             :     {
    1368        3768 :         const char *needle_last = &needle[needle_len - 1];
    1369             : 
    1370             :         /* Start at startpos plus the length of the needle */
    1371        3768 :         hptr = start_ptr + needle_len - 1;
    1372      103558 :         while (hptr < haystack_end)
    1373             :         {
    1374             :             /* Match the needle scanning *backward* */
    1375             :             const char *nptr;
    1376             :             const char *p;
    1377             : 
    1378       98822 :             nptr = needle_last;
    1379       98822 :             p = hptr;
    1380      240416 :             while (*nptr == *p)
    1381             :             {
    1382             :                 /* Matched it all?  If so, return 1-based position */
    1383       45572 :                 if (nptr == needle)
    1384        2800 :                     return (char *) p;
    1385       42772 :                 nptr--, p--;
    1386             :             }
    1387             : 
    1388             :             /*
    1389             :              * No match, so use the haystack char at hptr to decide how far to
    1390             :              * advance.  If the needle had any occurrence of that character
    1391             :              * (or more precisely, one sharing the same skiptable entry)
    1392             :              * before its last character, then we advance far enough to align
    1393             :              * the last such needle character with that haystack position.
    1394             :              * Otherwise we can advance by the whole needle length.
    1395             :              */
    1396       96022 :             hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
    1397             :         }
    1398             :     }
    1399             : 
    1400        1036 :     return 0;                   /* not found */
    1401             : }
    1402             : 
    1403             : /*
    1404             :  * Return a pointer to the current match.
    1405             :  *
    1406             :  * The returned pointer points into correct position in the original
    1407             :  * the haystack string.
    1408             :  */
    1409             : static char *
    1410        3028 : text_position_get_match_ptr(TextPositionState *state)
    1411             : {
    1412        3028 :     return state->last_match;
    1413             : }
    1414             : 
    1415             : /*
    1416             :  * Return the offset of the current match.
    1417             :  *
    1418             :  * The offset is in characters, 1-based.
    1419             :  */
    1420             : static int
    1421          42 : text_position_get_match_pos(TextPositionState *state)
    1422             : {
    1423          42 :     if (!state->is_multibyte)
    1424           0 :         return state->last_match - state->str1 + 1;
    1425             :     else
    1426             :     {
    1427             :         /* Convert the byte position to char position. */
    1428         144 :         while (state->refpoint < state->last_match)
    1429             :         {
    1430          60 :             state->refpoint += pg_mblen(state->refpoint);
    1431          60 :             state->refpos++;
    1432             :         }
    1433             :         Assert(state->refpoint == state->last_match);
    1434          42 :         return state->refpos + 1;
    1435             :     }
    1436             : }
    1437             : 
    1438             : static void
    1439        1086 : text_position_cleanup(TextPositionState *state)
    1440             : {
    1441             :     /* no cleanup needed */
    1442        1086 : }
    1443             : 
    1444             : static void
    1445     9035504 : check_collation_set(Oid collid)
    1446             : {
    1447     9035504 :     if (!OidIsValid(collid))
    1448             :     {
    1449             :         /*
    1450             :          * This typically means that the parser could not resolve a conflict
    1451             :          * of implicit collations, so report it that way.
    1452             :          */
    1453           8 :         ereport(ERROR,
    1454             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1455             :                  errmsg("could not determine which collation to use for string comparison"),
    1456             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
    1457             :     }
    1458     9035496 : }
    1459             : 
    1460             : /* varstr_cmp()
    1461             :  * Comparison function for text strings with given lengths.
    1462             :  * Includes locale support, but must copy strings to temporary memory
    1463             :  *  to allow null-termination for inputs to strcoll().
    1464             :  * Returns an integer less than, equal to, or greater than zero, indicating
    1465             :  * whether arg1 is less than, equal to, or greater than arg2.
    1466             :  */
    1467             : int
    1468     7125652 : varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
    1469             : {
    1470             :     int         result;
    1471             : 
    1472     7125652 :     check_collation_set(collid);
    1473             : 
    1474             :     /*
    1475             :      * Unfortunately, there is no strncoll(), so in the non-C locale case we
    1476             :      * have to do some memory copying.  This turns out to be significantly
    1477             :      * slower, so we optimize the case where LC_COLLATE is C.  We also try to
    1478             :      * optimize relatively-short strings by avoiding palloc/pfree overhead.
    1479             :      */
    1480     7125648 :     if (lc_collate_is_c(collid))
    1481             :     {
    1482     2354330 :         result = memcmp(arg1, arg2, Min(len1, len2));
    1483     2354330 :         if ((result == 0) && (len1 != len2))
    1484       51100 :             result = (len1 < len2) ? -1 : 1;
    1485             :     }
    1486             :     else
    1487             :     {
    1488             :         char        a1buf[TEXTBUFLEN];
    1489             :         char        a2buf[TEXTBUFLEN];
    1490             :         char       *a1p,
    1491             :                    *a2p;
    1492     4771318 :         pg_locale_t mylocale = 0;
    1493             : 
    1494     4771318 :         if (collid != DEFAULT_COLLATION_OID)
    1495           0 :             mylocale = pg_newlocale_from_collation(collid);
    1496             : 
    1497             :         /*
    1498             :          * memcmp() can't tell us which of two unequal strings sorts first,
    1499             :          * but it's a cheap way to tell if they're equal.  Testing shows that
    1500             :          * memcmp() followed by strcoll() is only trivially slower than
    1501             :          * strcoll() by itself, so we don't lose much if this doesn't work out
    1502             :          * very often, and if it does - for example, because there are many
    1503             :          * equal strings in the input - then we win big by avoiding expensive
    1504             :          * collation-aware comparisons.
    1505             :          */
    1506     4771318 :         if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
    1507     2629950 :             return 0;
    1508             : 
    1509             : #ifdef WIN32
    1510             :         /* Win32 does not have UTF-8, so we need to map to UTF-16 */
    1511             :         if (GetDatabaseEncoding() == PG_UTF8
    1512             :             && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
    1513             :         {
    1514             :             int         a1len;
    1515             :             int         a2len;
    1516             :             int         r;
    1517             : 
    1518             :             if (len1 >= TEXTBUFLEN / 2)
    1519             :             {
    1520             :                 a1len = len1 * 2 + 2;
    1521             :                 a1p = palloc(a1len);
    1522             :             }
    1523             :             else
    1524             :             {
    1525             :                 a1len = TEXTBUFLEN;
    1526             :                 a1p = a1buf;
    1527             :             }
    1528             :             if (len2 >= TEXTBUFLEN / 2)
    1529             :             {
    1530             :                 a2len = len2 * 2 + 2;
    1531             :                 a2p = palloc(a2len);
    1532             :             }
    1533             :             else
    1534             :             {
    1535             :                 a2len = TEXTBUFLEN;
    1536             :                 a2p = a2buf;
    1537             :             }
    1538             : 
    1539             :             /* stupid Microsloth API does not work for zero-length input */
    1540             :             if (len1 == 0)
    1541             :                 r = 0;
    1542             :             else
    1543             :             {
    1544             :                 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1545             :                                         (LPWSTR) a1p, a1len / 2);
    1546             :                 if (!r)
    1547             :                     ereport(ERROR,
    1548             :                             (errmsg("could not convert string to UTF-16: error code %lu",
    1549             :                                     GetLastError())));
    1550             :             }
    1551             :             ((LPWSTR) a1p)[r] = 0;
    1552             : 
    1553             :             if (len2 == 0)
    1554             :                 r = 0;
    1555             :             else
    1556             :             {
    1557             :                 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1558             :                                         (LPWSTR) a2p, a2len / 2);
    1559             :                 if (!r)
    1560             :                     ereport(ERROR,
    1561             :                             (errmsg("could not convert string to UTF-16: error code %lu",
    1562             :                                     GetLastError())));
    1563             :             }
    1564             :             ((LPWSTR) a2p)[r] = 0;
    1565             : 
    1566             :             errno = 0;
    1567             : #ifdef HAVE_LOCALE_T
    1568             :             if (mylocale)
    1569             :                 result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
    1570             :             else
    1571             : #endif
    1572             :                 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
    1573             :             if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw
    1574             :                                          * headers */
    1575             :                 ereport(ERROR,
    1576             :                         (errmsg("could not compare Unicode strings: %m")));
    1577             : 
    1578             :             /* Break tie if necessary. */
    1579             :             if (result == 0 &&
    1580             :                 (!mylocale || mylocale->deterministic))
    1581             :             {
    1582             :                 result = memcmp(arg1, arg2, Min(len1, len2));
    1583             :                 if ((result == 0) && (len1 != len2))
    1584             :                     result = (len1 < len2) ? -1 : 1;
    1585             :             }
    1586             : 
    1587             :             if (a1p != a1buf)
    1588             :                 pfree(a1p);
    1589             :             if (a2p != a2buf)
    1590             :                 pfree(a2p);
    1591             : 
    1592             :             return result;
    1593             :         }
    1594             : #endif                          /* WIN32 */
    1595             : 
    1596     2141368 :         if (len1 >= TEXTBUFLEN)
    1597         200 :             a1p = (char *) palloc(len1 + 1);
    1598             :         else
    1599     2141168 :             a1p = a1buf;
    1600     2141368 :         if (len2 >= TEXTBUFLEN)
    1601          72 :             a2p = (char *) palloc(len2 + 1);
    1602             :         else
    1603     2141296 :             a2p = a2buf;
    1604             : 
    1605     2141368 :         memcpy(a1p, arg1, len1);
    1606     2141368 :         a1p[len1] = '\0';
    1607     2141368 :         memcpy(a2p, arg2, len2);
    1608     2141368 :         a2p[len2] = '\0';
    1609             : 
    1610     2141368 :         if (mylocale)
    1611             :         {
    1612           0 :             if (mylocale->provider == COLLPROVIDER_ICU)
    1613             :             {
    1614             : #ifdef USE_ICU
    1615             : #ifdef HAVE_UCOL_STRCOLLUTF8
    1616             :                 if (GetDatabaseEncoding() == PG_UTF8)
    1617             :                 {
    1618             :                     UErrorCode  status;
    1619             : 
    1620             :                     status = U_ZERO_ERROR;
    1621             :                     result = ucol_strcollUTF8(mylocale->info.icu.ucol,
    1622             :                                               arg1, len1,
    1623             :                                               arg2, len2,
    1624             :                                               &status);
    1625             :                     if (U_FAILURE(status))
    1626             :                         ereport(ERROR,
    1627             :                                 (errmsg("collation failed: %s", u_errorName(status))));
    1628             :                 }
    1629             :                 else
    1630             : #endif
    1631             :                 {
    1632             :                     int32_t     ulen1,
    1633             :                                 ulen2;
    1634             :                     UChar      *uchar1,
    1635             :                                *uchar2;
    1636             : 
    1637             :                     ulen1 = icu_to_uchar(&uchar1, arg1, len1);
    1638             :                     ulen2 = icu_to_uchar(&uchar2, arg2, len2);
    1639             : 
    1640             :                     result = ucol_strcoll(mylocale->info.icu.ucol,
    1641             :                                           uchar1, ulen1,
    1642             :                                           uchar2, ulen2);
    1643             : 
    1644             :                     pfree(uchar1);
    1645             :                     pfree(uchar2);
    1646             :                 }
    1647             : #else                           /* not USE_ICU */
    1648             :                 /* shouldn't happen */
    1649           0 :                 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
    1650             : #endif                          /* not USE_ICU */
    1651             :             }
    1652             :             else
    1653             :             {
    1654             : #ifdef HAVE_LOCALE_T
    1655           0 :                 result = strcoll_l(a1p, a2p, mylocale->info.lt);
    1656             : #else
    1657             :                 /* shouldn't happen */
    1658             :                 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
    1659             : #endif
    1660             :             }
    1661             :         }
    1662             :         else
    1663     2141368 :             result = strcoll(a1p, a2p);
    1664             : 
    1665             :         /* Break tie if necessary. */
    1666     2141368 :         if (result == 0 &&
    1667           0 :             (!mylocale || mylocale->deterministic))
    1668           0 :             result = strcmp(a1p, a2p);
    1669             : 
    1670     2141368 :         if (a1p != a1buf)
    1671         200 :             pfree(a1p);
    1672     2141368 :         if (a2p != a2buf)
    1673          72 :             pfree(a2p);
    1674             :     }
    1675             : 
    1676     4495698 :     return result;
    1677             : }
    1678             : 
    1679             : /* text_cmp()
    1680             :  * Internal comparison function for text strings.
    1681             :  * Returns -1, 0 or 1
    1682             :  */
    1683             : static int
    1684     6064362 : text_cmp(text *arg1, text *arg2, Oid collid)
    1685             : {
    1686             :     char       *a1p,
    1687             :                *a2p;
    1688             :     int         len1,
    1689             :                 len2;
    1690             : 
    1691     6064362 :     a1p = VARDATA_ANY(arg1);
    1692     6064362 :     a2p = VARDATA_ANY(arg2);
    1693             : 
    1694     6064362 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1695     6064362 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1696             : 
    1697     6064362 :     return varstr_cmp(a1p, len1, a2p, len2, collid);
    1698             : }
    1699             : 
    1700             : /*
    1701             :  * Comparison functions for text strings.
    1702             :  *
    1703             :  * Note: btree indexes need these routines not to leak memory; therefore,
    1704             :  * be careful to free working copies of toasted datums.  Most places don't
    1705             :  * need to be so careful.
    1706             :  */
    1707             : 
    1708             : Datum
    1709     1581878 : texteq(PG_FUNCTION_ARGS)
    1710             : {
    1711     1581878 :     Oid         collid = PG_GET_COLLATION();
    1712             :     bool        result;
    1713             : 
    1714     1581878 :     check_collation_set(collid);
    1715             : 
    1716     1581878 :     if (lc_collate_is_c(collid) ||
    1717           0 :         collid == DEFAULT_COLLATION_OID ||
    1718           0 :         pg_newlocale_from_collation(collid)->deterministic)
    1719     1581878 :     {
    1720     1581878 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1721     1581878 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1722             :         Size        len1,
    1723             :                     len2;
    1724             : 
    1725             :         /*
    1726             :          * Since we only care about equality or not-equality, we can avoid all
    1727             :          * the expense of strcoll() here, and just do bitwise comparison.  In
    1728             :          * fact, we don't even have to do a bitwise comparison if we can show
    1729             :          * the lengths of the strings are unequal; which might save us from
    1730             :          * having to detoast one or both values.
    1731             :          */
    1732     1581878 :         len1 = toast_raw_datum_size(arg1);
    1733     1581878 :         len2 = toast_raw_datum_size(arg2);
    1734     1581878 :         if (len1 != len2)
    1735      269124 :             result = false;
    1736             :         else
    1737             :         {
    1738     1312754 :             text       *targ1 = DatumGetTextPP(arg1);
    1739     1312754 :             text       *targ2 = DatumGetTextPP(arg2);
    1740             : 
    1741     1312754 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1742             :                              len1 - VARHDRSZ) == 0);
    1743             : 
    1744     1312754 :             PG_FREE_IF_COPY(targ1, 0);
    1745     1312754 :             PG_FREE_IF_COPY(targ2, 1);
    1746             :         }
    1747             :     }
    1748             :     else
    1749             :     {
    1750           0 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1751           0 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1752             : 
    1753           0 :         result = (text_cmp(arg1, arg2, collid) == 0);
    1754             : 
    1755           0 :         PG_FREE_IF_COPY(arg1, 0);
    1756           0 :         PG_FREE_IF_COPY(arg2, 1);
    1757             :     }
    1758             : 
    1759     1581878 :     PG_RETURN_BOOL(result);
    1760             : }
    1761             : 
    1762             : Datum
    1763        9832 : textne(PG_FUNCTION_ARGS)
    1764             : {
    1765        9832 :     Oid         collid = PG_GET_COLLATION();
    1766             :     bool        result;
    1767             : 
    1768        9832 :     check_collation_set(collid);
    1769             : 
    1770        9832 :     if (lc_collate_is_c(collid) ||
    1771           0 :         collid == DEFAULT_COLLATION_OID ||
    1772           0 :         pg_newlocale_from_collation(collid)->deterministic)
    1773        9832 :     {
    1774        9832 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1775        9832 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1776             :         Size        len1,
    1777             :                     len2;
    1778             : 
    1779             :         /* See comment in texteq() */
    1780        9832 :         len1 = toast_raw_datum_size(arg1);
    1781        9832 :         len2 = toast_raw_datum_size(arg2);
    1782        9832 :         if (len1 != len2)
    1783         506 :             result = true;
    1784             :         else
    1785             :         {
    1786        9326 :             text       *targ1 = DatumGetTextPP(arg1);
    1787        9326 :             text       *targ2 = DatumGetTextPP(arg2);
    1788             : 
    1789        9326 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1790             :                              len1 - VARHDRSZ) != 0);
    1791             : 
    1792        9326 :             PG_FREE_IF_COPY(targ1, 0);
    1793        9326 :             PG_FREE_IF_COPY(targ2, 1);
    1794             :         }
    1795             :     }
    1796             :     else
    1797             :     {
    1798           0 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1799           0 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1800             : 
    1801           0 :         result = (text_cmp(arg1, arg2, collid) != 0);
    1802             : 
    1803           0 :         PG_FREE_IF_COPY(arg1, 0);
    1804           0 :         PG_FREE_IF_COPY(arg2, 1);
    1805             :     }
    1806             : 
    1807        9832 :     PG_RETURN_BOOL(result);
    1808             : }
    1809             : 
    1810             : Datum
    1811       73590 : text_lt(PG_FUNCTION_ARGS)
    1812             : {
    1813       73590 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1814       73590 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1815             :     bool        result;
    1816             : 
    1817       73590 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
    1818             : 
    1819       73586 :     PG_FREE_IF_COPY(arg1, 0);
    1820       73586 :     PG_FREE_IF_COPY(arg2, 1);
    1821             : 
    1822       73586 :     PG_RETURN_BOOL(result);
    1823             : }
    1824             : 
    1825             : Datum
    1826       55538 : text_le(PG_FUNCTION_ARGS)
    1827             : {
    1828       55538 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1829       55538 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1830             :     bool        result;
    1831             : 
    1832       55538 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
    1833             : 
    1834       55538 :     PG_FREE_IF_COPY(arg1, 0);
    1835       55538 :     PG_FREE_IF_COPY(arg2, 1);
    1836             : 
    1837       55538 :     PG_RETURN_BOOL(result);
    1838             : }
    1839             : 
    1840             : Datum
    1841       34634 : text_gt(PG_FUNCTION_ARGS)
    1842             : {
    1843       34634 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1844       34634 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1845             :     bool        result;
    1846             : 
    1847       34634 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
    1848             : 
    1849       34634 :     PG_FREE_IF_COPY(arg1, 0);
    1850       34634 :     PG_FREE_IF_COPY(arg2, 1);
    1851             : 
    1852       34634 :     PG_RETURN_BOOL(result);
    1853             : }
    1854             : 
    1855             : Datum
    1856       40204 : text_ge(PG_FUNCTION_ARGS)
    1857             : {
    1858       40204 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1859       40204 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1860             :     bool        result;
    1861             : 
    1862       40204 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
    1863             : 
    1864       40204 :     PG_FREE_IF_COPY(arg1, 0);
    1865       40204 :     PG_FREE_IF_COPY(arg2, 1);
    1866             : 
    1867       40204 :     PG_RETURN_BOOL(result);
    1868             : }
    1869             : 
    1870             : Datum
    1871       25132 : text_starts_with(PG_FUNCTION_ARGS)
    1872             : {
    1873       25132 :     Datum       arg1 = PG_GETARG_DATUM(0);
    1874       25132 :     Datum       arg2 = PG_GETARG_DATUM(1);
    1875       25132 :     Oid         collid = PG_GET_COLLATION();
    1876       25132 :     pg_locale_t mylocale = 0;
    1877             :     bool        result;
    1878             :     Size        len1,
    1879             :                 len2;
    1880             : 
    1881       25132 :     check_collation_set(collid);
    1882             : 
    1883       25132 :     if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
    1884           0 :         mylocale = pg_newlocale_from_collation(collid);
    1885             : 
    1886       25132 :     if (mylocale && !mylocale->deterministic)
    1887           0 :         ereport(ERROR,
    1888             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1889             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1890             : 
    1891       25132 :     len1 = toast_raw_datum_size(arg1);
    1892       25132 :     len2 = toast_raw_datum_size(arg2);
    1893       25132 :     if (len2 > len1)
    1894           0 :         result = false;
    1895             :     else
    1896             :     {
    1897       25132 :         text       *targ1 = text_substring(arg1, 1, len2, false);
    1898       25132 :         text       *targ2 = DatumGetTextPP(arg2);
    1899             : 
    1900       50264 :         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1901       50264 :                          VARSIZE_ANY_EXHDR(targ2)) == 0);
    1902             : 
    1903       25132 :         PG_FREE_IF_COPY(targ1, 0);
    1904       25132 :         PG_FREE_IF_COPY(targ2, 1);
    1905             :     }
    1906             : 
    1907       25132 :     PG_RETURN_BOOL(result);
    1908             : }
    1909             : 
    1910             : Datum
    1911     5776242 : bttextcmp(PG_FUNCTION_ARGS)
    1912             : {
    1913     5776242 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1914     5776242 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1915             :     int32       result;
    1916             : 
    1917     5776242 :     result = text_cmp(arg1, arg2, PG_GET_COLLATION());
    1918             : 
    1919     5776242 :     PG_FREE_IF_COPY(arg1, 0);
    1920     5776242 :     PG_FREE_IF_COPY(arg2, 1);
    1921             : 
    1922     5776242 :     PG_RETURN_INT32(result);
    1923             : }
    1924             : 
    1925             : Datum
    1926       51284 : bttextsortsupport(PG_FUNCTION_ARGS)
    1927             : {
    1928       51284 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    1929       51284 :     Oid         collid = ssup->ssup_collation;
    1930             :     MemoryContext oldcontext;
    1931             : 
    1932       51284 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    1933             : 
    1934             :     /* Use generic string SortSupport */
    1935       51284 :     varstr_sortsupport(ssup, TEXTOID, collid);
    1936             : 
    1937       51280 :     MemoryContextSwitchTo(oldcontext);
    1938             : 
    1939       51280 :     PG_RETURN_VOID();
    1940             : }
    1941             : 
    1942             : /*
    1943             :  * Generic sortsupport interface for character type's operator classes.
    1944             :  * Includes locale support, and support for BpChar semantics (i.e. removing
    1945             :  * trailing spaces before comparison).
    1946             :  *
    1947             :  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
    1948             :  * same representation.  Callers that always use the C collation (e.g.
    1949             :  * non-collatable type callers like bytea) may have NUL bytes in their strings;
    1950             :  * this will not work with any other collation, though.
    1951             :  */
    1952             : void
    1953       96282 : varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
    1954             : {
    1955       96282 :     bool        abbreviate = ssup->abbreviate;
    1956       96282 :     bool        collate_c = false;
    1957             :     VarStringSortSupport *sss;
    1958       96282 :     pg_locale_t locale = 0;
    1959             : 
    1960       96282 :     check_collation_set(collid);
    1961             : 
    1962             :     /*
    1963             :      * If possible, set ssup->comparator to a function which can be used to
    1964             :      * directly compare two datums.  If we can do this, we'll avoid the
    1965             :      * overhead of a trip through the fmgr layer for every comparison, which
    1966             :      * can be substantial.
    1967             :      *
    1968             :      * Most typically, we'll set the comparator to varlenafastcmp_locale,
    1969             :      * which uses strcoll() to perform comparisons.  We use that for the
    1970             :      * BpChar case too, but type NAME uses namefastcmp_locale. However, if
    1971             :      * LC_COLLATE = C, we can make things quite a bit faster with
    1972             :      * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
    1973             :      * memcmp() rather than strcoll().
    1974             :      */
    1975       96278 :     if (lc_collate_is_c(collid))
    1976             :     {
    1977       68220 :         if (typid == BPCHAROID)
    1978          12 :             ssup->comparator = bpcharfastcmp_c;
    1979       68208 :         else if (typid == NAMEOID)
    1980             :         {
    1981       44066 :             ssup->comparator = namefastcmp_c;
    1982             :             /* Not supporting abbreviation with type NAME, for now */
    1983       44066 :             abbreviate = false;
    1984             :         }
    1985             :         else
    1986       24142 :             ssup->comparator = varstrfastcmp_c;
    1987             : 
    1988       68220 :         collate_c = true;
    1989             :     }
    1990             :     else
    1991             :     {
    1992             :         /*
    1993             :          * We need a collation-sensitive comparison.  To make things faster,
    1994             :          * we'll figure out the collation based on the locale id and cache the
    1995             :          * result.
    1996             :          */
    1997       28058 :         if (collid != DEFAULT_COLLATION_OID)
    1998           0 :             locale = pg_newlocale_from_collation(collid);
    1999             : 
    2000             :         /*
    2001             :          * There is a further exception on Windows.  When the database
    2002             :          * encoding is UTF-8 and we are not using the C collation, complex
    2003             :          * hacks are required.  We don't currently have a comparator that
    2004             :          * handles that case, so we fall back on the slow method of having the
    2005             :          * sort code invoke bttextcmp() (in the case of text) via the fmgr
    2006             :          * trampoline.  ICU locales work just the same on Windows, however.
    2007             :          */
    2008             : #ifdef WIN32
    2009             :         if (GetDatabaseEncoding() == PG_UTF8 &&
    2010             :             !(locale && locale->provider == COLLPROVIDER_ICU))
    2011             :             return;
    2012             : #endif
    2013             : 
    2014             :         /*
    2015             :          * We use varlenafastcmp_locale except for type NAME.
    2016             :          */
    2017       28058 :         if (typid == NAMEOID)
    2018             :         {
    2019           0 :             ssup->comparator = namefastcmp_locale;
    2020             :             /* Not supporting abbreviation with type NAME, for now */
    2021           0 :             abbreviate = false;
    2022             :         }
    2023             :         else
    2024       28058 :             ssup->comparator = varlenafastcmp_locale;
    2025             :     }
    2026             : 
    2027             :     /*
    2028             :      * Unfortunately, it seems that abbreviation for non-C collations is
    2029             :      * broken on many common platforms; testing of multiple versions of glibc
    2030             :      * reveals that, for many locales, strcoll() and strxfrm() do not return
    2031             :      * consistent results, which is fatal to this optimization.  While no
    2032             :      * other libc other than Cygwin has so far been shown to have a problem,
    2033             :      * we take the conservative course of action for right now and disable
    2034             :      * this categorically.  (Users who are certain this isn't a problem on
    2035             :      * their system can define TRUST_STRXFRM.)
    2036             :      *
    2037             :      * Even apart from the risk of broken locales, it's possible that there
    2038             :      * are platforms where the use of abbreviated keys should be disabled at
    2039             :      * compile time.  Having only 4 byte datums could make worst-case
    2040             :      * performance drastically more likely, for example.  Moreover, macOS's
    2041             :      * strxfrm() implementation is known to not effectively concentrate a
    2042             :      * significant amount of entropy from the original string in earlier
    2043             :      * transformed blobs.  It's possible that other supported platforms are
    2044             :      * similarly encumbered.  So, if we ever get past disabling this
    2045             :      * categorically, we may still want or need to disable it for particular
    2046             :      * platforms.
    2047             :      */
    2048             : #ifndef TRUST_STRXFRM
    2049       96278 :     if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
    2050       28058 :         abbreviate = false;
    2051             : #endif
    2052             : 
    2053             :     /*
    2054             :      * If we're using abbreviated keys, or if we're using a locale-aware
    2055             :      * comparison, we need to initialize a StringSortSupport object.  Both
    2056             :      * cases will make use of the temporary buffers we initialize here for
    2057             :      * scratch space (and to detect requirement for BpChar semantics from
    2058             :      * caller), and the abbreviation case requires additional state.
    2059             :      */
    2060       96278 :     if (abbreviate || !collate_c)
    2061             :     {
    2062       29272 :         sss = palloc(sizeof(VarStringSortSupport));
    2063       29272 :         sss->buf1 = palloc(TEXTBUFLEN);
    2064       29272 :         sss->buflen1 = TEXTBUFLEN;
    2065       29272 :         sss->buf2 = palloc(TEXTBUFLEN);
    2066       29272 :         sss->buflen2 = TEXTBUFLEN;
    2067             :         /* Start with invalid values */
    2068       29272 :         sss->last_len1 = -1;
    2069       29272 :         sss->last_len2 = -1;
    2070             :         /* Initialize */
    2071       29272 :         sss->last_returned = 0;
    2072       29272 :         sss->locale = locale;
    2073             : 
    2074             :         /*
    2075             :          * To avoid somehow confusing a strxfrm() blob and an original string,
    2076             :          * constantly keep track of the variety of data that buf1 and buf2
    2077             :          * currently contain.
    2078             :          *
    2079             :          * Comparisons may be interleaved with conversion calls.  Frequently,
    2080             :          * conversions and comparisons are batched into two distinct phases,
    2081             :          * but the correctness of caching cannot hinge upon this.  For
    2082             :          * comparison caching, buffer state is only trusted if cache_blob is
    2083             :          * found set to false, whereas strxfrm() caching only trusts the state
    2084             :          * when cache_blob is found set to true.
    2085             :          *
    2086             :          * Arbitrarily initialize cache_blob to true.
    2087             :          */
    2088       29272 :         sss->cache_blob = true;
    2089       29272 :         sss->collate_c = collate_c;
    2090       29272 :         sss->typid = typid;
    2091       29272 :         ssup->ssup_extra = sss;
    2092             : 
    2093             :         /*
    2094             :          * If possible, plan to use the abbreviated keys optimization.  The
    2095             :          * core code may switch back to authoritative comparator should
    2096             :          * abbreviation be aborted.
    2097             :          */
    2098       29272 :         if (abbreviate)
    2099             :         {
    2100        1214 :             sss->prop_card = 0.20;
    2101        1214 :             initHyperLogLog(&sss->abbr_card, 10);
    2102        1214 :             initHyperLogLog(&sss->full_card, 10);
    2103        1214 :             ssup->abbrev_full_comparator = ssup->comparator;
    2104        1214 :             ssup->comparator = varstrcmp_abbrev;
    2105        1214 :             ssup->abbrev_converter = varstr_abbrev_convert;
    2106        1214 :             ssup->abbrev_abort = varstr_abbrev_abort;
    2107             :         }
    2108             :     }
    2109       96278 : }
    2110             : 
    2111             : /*
    2112             :  * sortsupport comparison func (for C locale case)
    2113             :  */
    2114             : static int
    2115    51630868 : varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2116             : {
    2117    51630868 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2118    51630868 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2119             :     char       *a1p,
    2120             :                *a2p;
    2121             :     int         len1,
    2122             :                 len2,
    2123             :                 result;
    2124             : 
    2125    51630868 :     a1p = VARDATA_ANY(arg1);
    2126    51630868 :     a2p = VARDATA_ANY(arg2);
    2127             : 
    2128    51630868 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2129    51630868 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2130             : 
    2131    51630868 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2132    51630868 :     if ((result == 0) && (len1 != len2))
    2133     1046308 :         result = (len1 < len2) ? -1 : 1;
    2134             : 
    2135             :     /* We can't afford to leak memory here. */
    2136    51630868 :     if (PointerGetDatum(arg1) != x)
    2137           0 :         pfree(arg1);
    2138    51630868 :     if (PointerGetDatum(arg2) != y)
    2139           0 :         pfree(arg2);
    2140             : 
    2141    51630868 :     return result;
    2142             : }
    2143             : 
    2144             : /*
    2145             :  * sortsupport comparison func (for BpChar C locale case)
    2146             :  *
    2147             :  * BpChar outsources its sortsupport to this module.  Specialization for the
    2148             :  * varstr_sortsupport BpChar case, modeled on
    2149             :  * internal_bpchar_pattern_compare().
    2150             :  */
    2151             : static int
    2152          16 : bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2153             : {
    2154          16 :     BpChar     *arg1 = DatumGetBpCharPP(x);
    2155          16 :     BpChar     *arg2 = DatumGetBpCharPP(y);
    2156             :     char       *a1p,
    2157             :                *a2p;
    2158             :     int         len1,
    2159             :                 len2,
    2160             :                 result;
    2161             : 
    2162          16 :     a1p = VARDATA_ANY(arg1);
    2163          16 :     a2p = VARDATA_ANY(arg2);
    2164             : 
    2165          16 :     len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
    2166          16 :     len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
    2167             : 
    2168          16 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2169          16 :     if ((result == 0) && (len1 != len2))
    2170           0 :         result = (len1 < len2) ? -1 : 1;
    2171             : 
    2172             :     /* We can't afford to leak memory here. */
    2173          16 :     if (PointerGetDatum(arg1) != x)
    2174           0 :         pfree(arg1);
    2175          16 :     if (PointerGetDatum(arg2) != y)
    2176           0 :         pfree(arg2);
    2177             : 
    2178          16 :     return result;
    2179             : }
    2180             : 
    2181             : /*
    2182             :  * sortsupport comparison func (for NAME C locale case)
    2183             :  */
    2184             : static int
    2185    65224724 : namefastcmp_c(Datum x, Datum y, SortSupport ssup)
    2186             : {
    2187    65224724 :     Name        arg1 = DatumGetName(x);
    2188    65224724 :     Name        arg2 = DatumGetName(y);
    2189             : 
    2190    65224724 :     return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
    2191             : }
    2192             : 
    2193             : /*
    2194             :  * sortsupport comparison func (for locale case with all varlena types)
    2195             :  */
    2196             : static int
    2197    28869422 : varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2198             : {
    2199    28869422 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2200    28869422 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2201             :     char       *a1p,
    2202             :                *a2p;
    2203             :     int         len1,
    2204             :                 len2,
    2205             :                 result;
    2206             : 
    2207    28869422 :     a1p = VARDATA_ANY(arg1);
    2208    28869422 :     a2p = VARDATA_ANY(arg2);
    2209             : 
    2210    28869422 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2211    28869422 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2212             : 
    2213    28869422 :     result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
    2214             : 
    2215             :     /* We can't afford to leak memory here. */
    2216    28869422 :     if (PointerGetDatum(arg1) != x)
    2217           0 :         pfree(arg1);
    2218    28869422 :     if (PointerGetDatum(arg2) != y)
    2219           0 :         pfree(arg2);
    2220             : 
    2221    28869422 :     return result;
    2222             : }
    2223             : 
    2224             : /*
    2225             :  * sortsupport comparison func (for locale case with NAME type)
    2226             :  */
    2227             : static int
    2228           0 : namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2229             : {
    2230           0 :     Name        arg1 = DatumGetName(x);
    2231           0 :     Name        arg2 = DatumGetName(y);
    2232             : 
    2233           0 :     return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
    2234           0 :                                 NameStr(*arg2), strlen(NameStr(*arg2)),
    2235             :                                 ssup);
    2236             : }
    2237             : 
    2238             : /*
    2239             :  * sortsupport comparison func for locale cases
    2240             :  */
    2241             : static int
    2242    28869422 : varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
    2243             : {
    2244    28869422 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2245             :     int         result;
    2246             :     bool        arg1_match;
    2247             : 
    2248             :     /* Fast pre-check for equality, as discussed in varstr_cmp() */
    2249    28869422 :     if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
    2250             :     {
    2251             :         /*
    2252             :          * No change in buf1 or buf2 contents, so avoid changing last_len1 or
    2253             :          * last_len2.  Existing contents of buffers might still be used by
    2254             :          * next call.
    2255             :          *
    2256             :          * It's fine to allow the comparison of BpChar padding bytes here,
    2257             :          * even though that implies that the memcmp() will usually be
    2258             :          * performed for BpChar callers (though multibyte characters could
    2259             :          * still prevent that from occurring).  The memcmp() is still very
    2260             :          * cheap, and BpChar's funny semantics have us remove trailing spaces
    2261             :          * (not limited to padding), so we need make no distinction between
    2262             :          * padding space characters and "real" space characters.
    2263             :          */
    2264    12537158 :         return 0;
    2265             :     }
    2266             : 
    2267    16332264 :     if (sss->typid == BPCHAROID)
    2268             :     {
    2269             :         /* Get true number of bytes, ignoring trailing spaces */
    2270       58852 :         len1 = bpchartruelen(a1p, len1);
    2271       58852 :         len2 = bpchartruelen(a2p, len2);
    2272             :     }
    2273             : 
    2274    16332264 :     if (len1 >= sss->buflen1)
    2275             :     {
    2276           0 :         pfree(sss->buf1);
    2277           0 :         sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2278           0 :         sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
    2279             :     }
    2280    16332264 :     if (len2 >= sss->buflen2)
    2281             :     {
    2282           0 :         pfree(sss->buf2);
    2283           0 :         sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
    2284           0 :         sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
    2285             :     }
    2286             : 
    2287             :     /*
    2288             :      * We're likely to be asked to compare the same strings repeatedly, and
    2289             :      * memcmp() is so much cheaper than strcoll() that it pays to try to cache
    2290             :      * comparisons, even though in general there is no reason to think that
    2291             :      * that will work out (every string datum may be unique).  Caching does
    2292             :      * not slow things down measurably when it doesn't work out, and can speed
    2293             :      * things up by rather a lot when it does.  In part, this is because the
    2294             :      * memcmp() compares data from cachelines that are needed in L1 cache even
    2295             :      * when the last comparison's result cannot be reused.
    2296             :      */
    2297    16332264 :     arg1_match = true;
    2298    16332264 :     if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
    2299             :     {
    2300    12116966 :         arg1_match = false;
    2301    12116966 :         memcpy(sss->buf1, a1p, len1);
    2302    12116966 :         sss->buf1[len1] = '\0';
    2303    12116966 :         sss->last_len1 = len1;
    2304             :     }
    2305             : 
    2306             :     /*
    2307             :      * If we're comparing the same two strings as last time, we can return the
    2308             :      * same answer without calling strcoll() again.  This is more likely than
    2309             :      * it seems (at least with moderate to low cardinality sets), because
    2310             :      * quicksort compares the same pivot against many values.
    2311             :      */
    2312    16332264 :     if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
    2313             :     {
    2314     2547784 :         memcpy(sss->buf2, a2p, len2);
    2315     2547784 :         sss->buf2[len2] = '\0';
    2316     2547784 :         sss->last_len2 = len2;
    2317             :     }
    2318    13784480 :     else if (arg1_match && !sss->cache_blob)
    2319             :     {
    2320             :         /* Use result cached following last actual strcoll() call */
    2321     3327978 :         return sss->last_returned;
    2322             :     }
    2323             : 
    2324    13004286 :     if (sss->locale)
    2325             :     {
    2326           0 :         if (sss->locale->provider == COLLPROVIDER_ICU)
    2327             :         {
    2328             : #ifdef USE_ICU
    2329             : #ifdef HAVE_UCOL_STRCOLLUTF8
    2330             :             if (GetDatabaseEncoding() == PG_UTF8)
    2331             :             {
    2332             :                 UErrorCode  status;
    2333             : 
    2334             :                 status = U_ZERO_ERROR;
    2335             :                 result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
    2336             :                                           a1p, len1,
    2337             :                                           a2p, len2,
    2338             :                                           &status);
    2339             :                 if (U_FAILURE(status))
    2340             :                     ereport(ERROR,
    2341             :                             (errmsg("collation failed: %s", u_errorName(status))));
    2342             :             }
    2343             :             else
    2344             : #endif
    2345             :             {
    2346             :                 int32_t     ulen1,
    2347             :                             ulen2;
    2348             :                 UChar      *uchar1,
    2349             :                            *uchar2;
    2350             : 
    2351             :                 ulen1 = icu_to_uchar(&uchar1, a1p, len1);
    2352             :                 ulen2 = icu_to_uchar(&uchar2, a2p, len2);
    2353             : 
    2354             :                 result = ucol_strcoll(sss->locale->info.icu.ucol,
    2355             :                                       uchar1, ulen1,
    2356             :                                       uchar2, ulen2);
    2357             : 
    2358             :                 pfree(uchar1);
    2359             :                 pfree(uchar2);
    2360             :             }
    2361             : #else                           /* not USE_ICU */
    2362             :             /* shouldn't happen */
    2363           0 :             elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
    2364             : #endif                          /* not USE_ICU */
    2365             :         }
    2366             :         else
    2367             :         {
    2368             : #ifdef HAVE_LOCALE_T
    2369           0 :             result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
    2370             : #else
    2371             :             /* shouldn't happen */
    2372             :             elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
    2373             : #endif
    2374             :         }
    2375             :     }
    2376             :     else
    2377    13004286 :         result = strcoll(sss->buf1, sss->buf2);
    2378             : 
    2379             :     /* Break tie if necessary. */
    2380    13004286 :     if (result == 0 &&
    2381           0 :         (!sss->locale || sss->locale->deterministic))
    2382           0 :         result = strcmp(sss->buf1, sss->buf2);
    2383             : 
    2384             :     /* Cache result, perhaps saving an expensive strcoll() call next time */
    2385    13004286 :     sss->cache_blob = false;
    2386    13004286 :     sss->last_returned = result;
    2387    13004286 :     return result;
    2388             : }
    2389             : 
    2390             : /*
    2391             :  * Abbreviated key comparison func
    2392             :  */
    2393             : static int
    2394     3183246 : varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
    2395             : {
    2396             :     /*
    2397             :      * When 0 is returned, the core system will call varstrfastcmp_c()
    2398             :      * (bpcharfastcmp_c() in BpChar case) or varlenafastcmp_locale().  Even a
    2399             :      * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
    2400             :      * authoritatively, for the same reason that there is a strcoll()
    2401             :      * tie-breaker call to strcmp() in varstr_cmp().
    2402             :      */
    2403     3183246 :     if (x > y)
    2404     1443392 :         return 1;
    2405     1739854 :     else if (x == y)
    2406      421540 :         return 0;
    2407             :     else
    2408     1318314 :         return -1;
    2409             : }
    2410             : 
    2411             : /*
    2412             :  * Conversion routine for sortsupport.  Converts original to abbreviated key
    2413             :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
    2414             :  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
    2415             :  * stored in reverse order), and treat it as an unsigned integer.  When the "C"
    2416             :  * locale is used, or in case of bytea, just memcpy() from original instead.
    2417             :  */
    2418             : static Datum
    2419      330020 : varstr_abbrev_convert(Datum original, SortSupport ssup)
    2420             : {
    2421      330020 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2422      330020 :     VarString  *authoritative = DatumGetVarStringPP(original);
    2423      330020 :     char       *authoritative_data = VARDATA_ANY(authoritative);
    2424             : 
    2425             :     /* working state */
    2426             :     Datum       res;
    2427             :     char       *pres;
    2428             :     int         len;
    2429             :     uint32      hash;
    2430             : 
    2431      330020 :     pres = (char *) &res;
    2432             :     /* memset(), so any non-overwritten bytes are NUL */
    2433      330020 :     memset(pres, 0, sizeof(Datum));
    2434      330020 :     len = VARSIZE_ANY_EXHDR(authoritative);
    2435             : 
    2436             :     /* Get number of bytes, ignoring trailing spaces */
    2437      330020 :     if (sss->typid == BPCHAROID)
    2438           0 :         len = bpchartruelen(authoritative_data, len);
    2439             : 
    2440             :     /*
    2441             :      * If we're using the C collation, use memcpy(), rather than strxfrm(), to
    2442             :      * abbreviate keys.  The full comparator for the C locale is always
    2443             :      * memcmp().  It would be incorrect to allow bytea callers (callers that
    2444             :      * always force the C collation -- bytea isn't a collatable type, but this
    2445             :      * approach is convenient) to use strxfrm().  This is because bytea
    2446             :      * strings may contain NUL bytes.  Besides, this should be faster, too.
    2447             :      *
    2448             :      * More generally, it's okay that bytea callers can have NUL bytes in
    2449             :      * strings because varstrcmp_abbrev() need not make a distinction between
    2450             :      * terminating NUL bytes, and NUL bytes representing actual NULs in the
    2451             :      * authoritative representation.  Hopefully a comparison at or past one
    2452             :      * abbreviated key's terminating NUL byte will resolve the comparison
    2453             :      * without consulting the authoritative representation; specifically, some
    2454             :      * later non-NUL byte in the longer string can resolve the comparison
    2455             :      * against a subsequent terminating NUL in the shorter string.  There will
    2456             :      * usually be what is effectively a "length-wise" resolution there and
    2457             :      * then.
    2458             :      *
    2459             :      * If that doesn't work out -- if all bytes in the longer string
    2460             :      * positioned at or past the offset of the smaller string's (first)
    2461             :      * terminating NUL are actually representative of NUL bytes in the
    2462             :      * authoritative binary string (perhaps with some *terminating* NUL bytes
    2463             :      * towards the end of the longer string iff it happens to still be small)
    2464             :      * -- then an authoritative tie-breaker will happen, and do the right
    2465             :      * thing: explicitly consider string length.
    2466             :      */
    2467      330020 :     if (sss->collate_c)
    2468      330020 :         memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
    2469             :     else
    2470             :     {
    2471             :         Size        bsize;
    2472             : #ifdef USE_ICU
    2473             :         int32_t     ulen = -1;
    2474             :         UChar      *uchar = NULL;
    2475             : #endif
    2476             : 
    2477             :         /*
    2478             :          * We're not using the C collation, so fall back on strxfrm or ICU
    2479             :          * analogs.
    2480             :          */
    2481             : 
    2482             :         /* By convention, we use buffer 1 to store and NUL-terminate */
    2483           0 :         if (len >= sss->buflen1)
    2484             :         {
    2485           0 :             pfree(sss->buf1);
    2486           0 :             sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2487           0 :             sss->buf1 = palloc(sss->buflen1);
    2488             :         }
    2489             : 
    2490             :         /* Might be able to reuse strxfrm() blob from last call */
    2491           0 :         if (sss->last_len1 == len && sss->cache_blob &&
    2492           0 :             memcmp(sss->buf1, authoritative_data, len) == 0)
    2493             :         {
    2494           0 :             memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
    2495             :             /* No change affecting cardinality, so no hashing required */
    2496           0 :             goto done;
    2497             :         }
    2498             : 
    2499           0 :         memcpy(sss->buf1, authoritative_data, len);
    2500             : 
    2501             :         /*
    2502             :          * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
    2503             :          * necessary for ICU, but doesn't hurt.
    2504             :          */
    2505           0 :         sss->buf1[len] = '\0';
    2506           0 :         sss->last_len1 = len;
    2507             : 
    2508             : #ifdef USE_ICU
    2509             :         /* When using ICU and not UTF8, convert string to UChar. */
    2510             :         if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
    2511             :             GetDatabaseEncoding() != PG_UTF8)
    2512             :             ulen = icu_to_uchar(&uchar, sss->buf1, len);
    2513             : #endif
    2514             : 
    2515             :         /*
    2516             :          * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
    2517             :          * and try again.  Both of these functions have the result buffer
    2518             :          * content undefined if the result did not fit, so we need to retry
    2519             :          * until everything fits, even though we only need the first few bytes
    2520             :          * in the end.  When using ucol_nextSortKeyPart(), however, we only
    2521             :          * ask for as many bytes as we actually need.
    2522             :          */
    2523             :         for (;;)
    2524             :         {
    2525             : #ifdef USE_ICU
    2526             :             if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
    2527             :             {
    2528             :                 /*
    2529             :                  * When using UTF8, use the iteration interface so we only
    2530             :                  * need to produce as many bytes as we actually need.
    2531             :                  */
    2532             :                 if (GetDatabaseEncoding() == PG_UTF8)
    2533             :                 {
    2534             :                     UCharIterator iter;
    2535             :                     uint32_t    state[2];
    2536             :                     UErrorCode  status;
    2537             : 
    2538             :                     uiter_setUTF8(&iter, sss->buf1, len);
    2539             :                     state[0] = state[1] = 0;    /* won't need that again */
    2540             :                     status = U_ZERO_ERROR;
    2541             :                     bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
    2542             :                                                  &iter,
    2543             :                                                  state,
    2544             :                                                  (uint8_t *) sss->buf2,
    2545             :                                                  Min(sizeof(Datum), sss->buflen2),
    2546             :                                                  &status);
    2547             :                     if (U_FAILURE(status))
    2548             :                         ereport(ERROR,
    2549             :                                 (errmsg("sort key generation failed: %s",
    2550             :                                         u_errorName(status))));
    2551             :                 }
    2552             :                 else
    2553             :                     bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
    2554             :                                             uchar, ulen,
    2555             :                                             (uint8_t *) sss->buf2, sss->buflen2);
    2556             :             }
    2557             :             else
    2558             : #endif
    2559             : #ifdef HAVE_LOCALE_T
    2560           0 :             if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
    2561           0 :                 bsize = strxfrm_l(sss->buf2, sss->buf1,
    2562           0 :                                   sss->buflen2, sss->locale->info.lt);
    2563             :             else
    2564             : #endif
    2565           0 :                 bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
    2566             : 
    2567           0 :             sss->last_len2 = bsize;
    2568           0 :             if (bsize < sss->buflen2)
    2569           0 :                 break;
    2570             : 
    2571             :             /*
    2572             :              * Grow buffer and retry.
    2573             :              */
    2574           0 :             pfree(sss->buf2);
    2575           0 :             sss->buflen2 = Max(bsize + 1,
    2576             :                                Min(sss->buflen2 * 2, MaxAllocSize));
    2577           0 :             sss->buf2 = palloc(sss->buflen2);
    2578             :         }
    2579             : 
    2580             :         /*
    2581             :          * Every Datum byte is always compared.  This is safe because the
    2582             :          * strxfrm() blob is itself NUL terminated, leaving no danger of
    2583             :          * misinterpreting any NUL bytes not intended to be interpreted as
    2584             :          * logically representing termination.
    2585             :          *
    2586             :          * (Actually, even if there were NUL bytes in the blob it would be
    2587             :          * okay.  See remarks on bytea case above.)
    2588             :          */
    2589           0 :         memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
    2590             : 
    2591             : #ifdef USE_ICU
    2592             :         if (uchar)
    2593             :             pfree(uchar);
    2594             : #endif
    2595             :     }
    2596             : 
    2597             :     /*
    2598             :      * Maintain approximate cardinality of both abbreviated keys and original,
    2599             :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
    2600             :      * the worst case, where we do many string transformations for no saving
    2601             :      * in full strcoll()-based comparisons.  These statistics are used by
    2602             :      * varstr_abbrev_abort().
    2603             :      *
    2604             :      * First, Hash key proper, or a significant fraction of it.  Mix in length
    2605             :      * in order to compensate for cases where differences are past
    2606             :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
    2607             :      */
    2608      330020 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
    2609             :                                    Min(len, PG_CACHE_LINE_SIZE)));
    2610             : 
    2611      330020 :     if (len > PG_CACHE_LINE_SIZE)
    2612           6 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
    2613             : 
    2614      330020 :     addHyperLogLog(&sss->full_card, hash);
    2615             : 
    2616             :     /* Hash abbreviated key */
    2617             : #if SIZEOF_DATUM == 8
    2618             :     {
    2619             :         uint32      lohalf,
    2620             :                     hihalf;
    2621             : 
    2622      330020 :         lohalf = (uint32) res;
    2623      330020 :         hihalf = (uint32) (res >> 32);
    2624      330020 :         hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
    2625             :     }
    2626             : #else                           /* SIZEOF_DATUM != 8 */
    2627             :     hash = DatumGetUInt32(hash_uint32((uint32) res));
    2628             : #endif
    2629             : 
    2630      330020 :     addHyperLogLog(&sss->abbr_card, hash);
    2631             : 
    2632             :     /* Cache result, perhaps saving an expensive strxfrm() call next time */
    2633      330020 :     sss->cache_blob = true;
    2634             : done:
    2635             : 
    2636             :     /*
    2637             :      * Byteswap on little-endian machines.
    2638             :      *
    2639             :      * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
    2640             :      * comparator) works correctly on all platforms.  If we didn't do this,
    2641             :      * the comparator would have to call memcmp() with a pair of pointers to
    2642             :      * the first byte of each abbreviated key, which is slower.
    2643             :      */
    2644      330020 :     res = DatumBigEndianToNative(res);
    2645             : 
    2646             :     /* Don't leak memory here */
    2647      330020 :     if (PointerGetDatum(authoritative) != original)
    2648           0 :         pfree(authoritative);
    2649             : 
    2650      330020 :     return res;
    2651             : }
    2652             : 
    2653             : /*
    2654             :  * Callback for estimating effectiveness of abbreviated key optimization, using
    2655             :  * heuristic rules.  Returns value indicating if the abbreviation optimization
    2656             :  * should be aborted, based on its projected effectiveness.
    2657             :  */
    2658             : static bool
    2659         892 : varstr_abbrev_abort(int memtupcount, SortSupport ssup)
    2660             : {
    2661         892 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2662             :     double      abbrev_distinct,
    2663             :                 key_distinct;
    2664             : 
    2665             :     Assert(ssup->abbreviate);
    2666             : 
    2667             :     /* Have a little patience */
    2668         892 :     if (memtupcount < 100)
    2669         402 :         return false;
    2670             : 
    2671         490 :     abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
    2672         490 :     key_distinct = estimateHyperLogLog(&sss->full_card);
    2673             : 
    2674             :     /*
    2675             :      * Clamp cardinality estimates to at least one distinct value.  While
    2676             :      * NULLs are generally disregarded, if only NULL values were seen so far,
    2677             :      * that might misrepresent costs if we failed to clamp.
    2678             :      */
    2679         490 :     if (abbrev_distinct <= 1.0)
    2680           0 :         abbrev_distinct = 1.0;
    2681             : 
    2682         490 :     if (key_distinct <= 1.0)
    2683           0 :         key_distinct = 1.0;
    2684             : 
    2685             :     /*
    2686             :      * In the worst case all abbreviated keys are identical, while at the same
    2687             :      * time there are differences within full key strings not captured in
    2688             :      * abbreviations.
    2689             :      */
    2690             : #ifdef TRACE_SORT
    2691         490 :     if (trace_sort)
    2692             :     {
    2693           0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
    2694             : 
    2695           0 :         elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
    2696             :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
    2697             :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
    2698             :              sss->prop_card);
    2699             :     }
    2700             : #endif
    2701             : 
    2702             :     /*
    2703             :      * If the number of distinct abbreviated keys approximately matches the
    2704             :      * number of distinct authoritative original keys, that's reason enough to
    2705             :      * proceed.  We can win even with a very low cardinality set if most
    2706             :      * tie-breakers only memcmp().  This is by far the most important
    2707             :      * consideration.
    2708             :      *
    2709             :      * While comparisons that are resolved at the abbreviated key level are
    2710             :      * considerably cheaper than tie-breakers resolved with memcmp(), both of
    2711             :      * those two outcomes are so much cheaper than a full strcoll() once
    2712             :      * sorting is underway that it doesn't seem worth it to weigh abbreviated
    2713             :      * cardinality against the overall size of the set in order to more
    2714             :      * accurately model costs.  Assume that an abbreviated comparison, and an
    2715             :      * abbreviated comparison with a cheap memcmp()-based authoritative
    2716             :      * resolution are equivalent.
    2717             :      */
    2718         490 :     if (abbrev_distinct > key_distinct * sss->prop_card)
    2719             :     {
    2720             :         /*
    2721             :          * When we have exceeded 10,000 tuples, decay required cardinality
    2722             :          * aggressively for next call.
    2723             :          *
    2724             :          * This is useful because the number of comparisons required on
    2725             :          * average increases at a linearithmic rate, and at roughly 10,000
    2726             :          * tuples that factor will start to dominate over the linear costs of
    2727             :          * string transformation (this is a conservative estimate).  The decay
    2728             :          * rate is chosen to be a little less aggressive than halving -- which
    2729             :          * (since we're called at points at which memtupcount has doubled)
    2730             :          * would never see the cost model actually abort past the first call
    2731             :          * following a decay.  This decay rate is mostly a precaution against
    2732             :          * a sudden, violent swing in how well abbreviated cardinality tracks
    2733             :          * full key cardinality.  The decay also serves to prevent a marginal
    2734             :          * case from being aborted too late, when too much has already been
    2735             :          * invested in string transformation.
    2736             :          *
    2737             :          * It's possible for sets of several million distinct strings with
    2738             :          * mere tens of thousands of distinct abbreviated keys to still
    2739             :          * benefit very significantly.  This will generally occur provided
    2740             :          * each abbreviated key is a proxy for a roughly uniform number of the
    2741             :          * set's full keys. If it isn't so, we hope to catch that early and
    2742             :          * abort.  If it isn't caught early, by the time the problem is
    2743             :          * apparent it's probably not worth aborting.
    2744             :          */
    2745         490 :         if (memtupcount > 10000)
    2746           0 :             sss->prop_card *= 0.65;
    2747             : 
    2748         490 :         return false;
    2749             :     }
    2750             : 
    2751             :     /*
    2752             :      * Abort abbreviation strategy.
    2753             :      *
    2754             :      * The worst case, where all abbreviated keys are identical while all
    2755             :      * original strings differ will typically only see a regression of about
    2756             :      * 10% in execution time for small to medium sized lists of strings.
    2757             :      * Whereas on modern CPUs where cache stalls are the dominant cost, we can
    2758             :      * often expect very large improvements, particularly with sets of strings
    2759             :      * of moderately high to high abbreviated cardinality.  There is little to
    2760             :      * lose but much to gain, which our strategy reflects.
    2761             :      */
    2762             : #ifdef TRACE_SORT
    2763           0 :     if (trace_sort)
    2764           0 :         elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
    2765             :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
    2766             :              memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
    2767             : #endif
    2768             : 
    2769           0 :     return true;
    2770             : }
    2771             : 
    2772             : Datum
    2773       43082 : text_larger(PG_FUNCTION_ARGS)
    2774             : {
    2775       43082 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2776       43082 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2777             :     text       *result;
    2778             : 
    2779       43082 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
    2780             : 
    2781       43082 :     PG_RETURN_TEXT_P(result);
    2782             : }
    2783             : 
    2784             : Datum
    2785       41072 : text_smaller(PG_FUNCTION_ARGS)
    2786             : {
    2787       41072 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2788       41072 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2789             :     text       *result;
    2790             : 
    2791       41072 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
    2792             : 
    2793       41072 :     PG_RETURN_TEXT_P(result);
    2794             : }
    2795             : 
    2796             : 
    2797             : /*
    2798             :  * Cross-type comparison functions for types text and name.
    2799             :  */
    2800             : 
    2801             : Datum
    2802       94664 : nameeqtext(PG_FUNCTION_ARGS)
    2803             : {
    2804       94664 :     Name        arg1 = PG_GETARG_NAME(0);
    2805       94664 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2806       94664 :     size_t      len1 = strlen(NameStr(*arg1));
    2807       94664 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2808       94664 :     Oid         collid = PG_GET_COLLATION();
    2809             :     bool        result;
    2810             : 
    2811       94664 :     check_collation_set(collid);
    2812             : 
    2813       94664 :     if (collid == C_COLLATION_OID)
    2814      179146 :         result = (len1 == len2 &&
    2815       84482 :                   memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2816             :     else
    2817           0 :         result = (varstr_cmp(NameStr(*arg1), len1,
    2818           0 :                              VARDATA_ANY(arg2), len2,
    2819             :                              collid) == 0);
    2820             : 
    2821       94664 :     PG_FREE_IF_COPY(arg2, 1);
    2822             : 
    2823       94664 :     PG_RETURN_BOOL(result);
    2824             : }
    2825             : 
    2826             : Datum
    2827         260 : texteqname(PG_FUNCTION_ARGS)
    2828             : {
    2829         260 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2830         260 :     Name        arg2 = PG_GETARG_NAME(1);
    2831         260 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2832         260 :     size_t      len2 = strlen(NameStr(*arg2));
    2833         260 :     Oid         collid = PG_GET_COLLATION();
    2834             :     bool        result;
    2835             : 
    2836         260 :     check_collation_set(collid);
    2837             : 
    2838         260 :     if (collid == C_COLLATION_OID)
    2839         380 :         result = (len1 == len2 &&
    2840         120 :                   memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2841             :     else
    2842           0 :         result = (varstr_cmp(VARDATA_ANY(arg1), len1,
    2843           0 :                              NameStr(*arg2), len2,
    2844             :                              collid) == 0);
    2845             : 
    2846         260 :     PG_FREE_IF_COPY(arg1, 0);
    2847             : 
    2848         260 :     PG_RETURN_BOOL(result);
    2849             : }
    2850             : 
    2851             : Datum
    2852           0 : namenetext(PG_FUNCTION_ARGS)
    2853             : {
    2854           0 :     Name        arg1 = PG_GETARG_NAME(0);
    2855           0 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2856           0 :     size_t      len1 = strlen(NameStr(*arg1));
    2857           0 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2858           0 :     Oid         collid = PG_GET_COLLATION();
    2859             :     bool        result;
    2860             : 
    2861           0 :     check_collation_set(collid);
    2862             : 
    2863           0 :     if (collid == C_COLLATION_OID)
    2864           0 :         result = !(len1 == len2 &&
    2865           0 :                    memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2866             :     else
    2867           0 :         result = !(varstr_cmp(NameStr(*arg1), len1,
    2868           0 :                               VARDATA_ANY(arg2), len2,
    2869             :                               collid) == 0);
    2870             : 
    2871           0 :     PG_FREE_IF_COPY(arg2, 1);
    2872             : 
    2873           0 :     PG_RETURN_BOOL(result);
    2874             : }
    2875             : 
    2876             : Datum
    2877           0 : textnename(PG_FUNCTION_ARGS)
    2878             : {
    2879           0 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2880           0 :     Name        arg2 = PG_GETARG_NAME(1);
    2881           0 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2882           0 :     size_t      len2 = strlen(NameStr(*arg2));
    2883           0 :     Oid         collid = PG_GET_COLLATION();
    2884             :     bool        result;
    2885             : 
    2886           0 :     check_collation_set(collid);
    2887             : 
    2888           0 :     if (collid == C_COLLATION_OID)
    2889           0 :         result = !(len1 == len2 &&
    2890           0 :                    memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2891             :     else
    2892           0 :         result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
    2893           0 :                               NameStr(*arg2), len2,
    2894             :                               collid) == 0);
    2895             : 
    2896           0 :     PG_FREE_IF_COPY(arg1, 0);
    2897             : 
    2898           0 :     PG_RETURN_BOOL(result);
    2899             : }
    2900             : 
    2901             : Datum
    2902       67420 : btnametextcmp(PG_FUNCTION_ARGS)
    2903             : {
    2904       67420 :     Name        arg1 = PG_GETARG_NAME(0);
    2905       67420 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2906             :     int32       result;
    2907             : 
    2908      337100 :     result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
    2909      269680 :                         VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
    2910             :                         PG_GET_COLLATION());
    2911             : 
    2912       67420 :     PG_FREE_IF_COPY(arg2, 1);
    2913             : 
    2914       67420 :     PG_RETURN_INT32(result);
    2915             : }
    2916             : 
    2917             : Datum
    2918           0 : bttextnamecmp(PG_FUNCTION_ARGS)
    2919             : {
    2920           0 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2921           0 :     Name        arg2 = PG_GETARG_NAME(1);
    2922             :     int32       result;
    2923             : 
    2924           0 :     result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
    2925           0 :                         NameStr(*arg2), strlen(NameStr(*arg2)),
    2926             :                         PG_GET_COLLATION());
    2927             : 
    2928           0 :     PG_FREE_IF_COPY(arg1, 0);
    2929             : 
    2930           0 :     PG_RETURN_INT32(result);
    2931             : }
    2932             : 
    2933             : #define CmpCall(cmpfunc) \
    2934             :     DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
    2935             :                                           PG_GET_COLLATION(), \
    2936             :                                           PG_GETARG_DATUM(0), \
    2937             :                                           PG_GETARG_DATUM(1)))
    2938             : 
    2939             : Datum
    2940       21342 : namelttext(PG_FUNCTION_ARGS)
    2941             : {
    2942       21342 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
    2943             : }
    2944             : 
    2945             : Datum
    2946           0 : nameletext(PG_FUNCTION_ARGS)
    2947             : {
    2948           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
    2949             : }
    2950             : 
    2951             : Datum
    2952           0 : namegttext(PG_FUNCTION_ARGS)
    2953             : {
    2954           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
    2955             : }
    2956             : 
    2957             : Datum
    2958       20492 : namegetext(PG_FUNCTION_ARGS)
    2959             : {
    2960       20492 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
    2961             : }
    2962             : 
    2963             : Datum
    2964           0 : textltname(PG_FUNCTION_ARGS)
    2965             : {
    2966           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
    2967             : }
    2968             : 
    2969             : Datum
    2970           0 : textlename(PG_FUNCTION_ARGS)
    2971             : {
    2972           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
    2973             : }
    2974             : 
    2975             : Datum
    2976           0 : textgtname(PG_FUNCTION_ARGS)
    2977             : {
    2978           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
    2979             : }
    2980             : 
    2981             : Datum
    2982           0 : textgename(PG_FUNCTION_ARGS)
    2983             : {
    2984           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
    2985             : }
    2986             : 
    2987             : #undef CmpCall
    2988             : 
    2989             : 
    2990             : /*
    2991             :  * The following operators support character-by-character comparison
    2992             :  * of text datums, to allow building indexes suitable for LIKE clauses.
    2993             :  * Note that the regular texteq/textne comparison operators, and regular
    2994             :  * support functions 1 and 2 with "C" collation are assumed to be
    2995             :  * compatible with these!
    2996             :  */
    2997             : 
    2998             : static int
    2999      100640 : internal_text_pattern_compare(text *arg1, text *arg2, Oid collid)
    3000             : {
    3001             :     int         result;
    3002             :     int         len1,
    3003             :                 len2;
    3004             : 
    3005      100640 :     check_collation_set(collid);
    3006             : 
    3007             :     /*
    3008             :      * XXX We cannot use a text_pattern_ops index for nondeterministic
    3009             :      * collations, because these operators intentionally ignore the collation.
    3010             :      * However, the planner has no way to know that, so it might choose such
    3011             :      * an index for an "=" clause, which would lead to wrong results.  This
    3012             :      * check here doesn't prevent choosing the index, but it will at least
    3013             :      * error out if the index is chosen.  A text_pattern_ops index on a column
    3014             :      * with nondeterministic collation is pretty useless anyway, since LIKE
    3015             :      * etc. won't work there either.  A future possibility would be to
    3016             :      * annotate the operator class or its members in the catalog to avoid the
    3017             :      * index.  Another alternative is to stay away from the *_pattern_ops
    3018             :      * operator classes and prefer creating LIKE-supporting indexes with
    3019             :      * COLLATE "C".
    3020             :      */
    3021      100640 :     if (!get_collation_isdeterministic(collid))
    3022           0 :         ereport(ERROR,
    3023             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    3024             :                  errmsg("nondeterministic collations are not supported for operator class \"%s\"",
    3025             :                         "text_pattern_ops")));
    3026             : 
    3027      100640 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3028      100640 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3029             : 
    3030      100640 :     result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3031      100640 :     if (result != 0)
    3032      100604 :         return result;
    3033          36 :     else if (len1 < len2)
    3034           0 :         return -1;
    3035          36 :     else if (len1 > len2)
    3036          12 :         return 1;
    3037             :     else
    3038          24 :         return 0;
    3039             : }
    3040             : 
    3041             : 
    3042             : Datum
    3043       25608 : text_pattern_lt(PG_FUNCTION_ARGS)
    3044             : {
    3045       25608 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3046       25608 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3047             :     int         result;
    3048             : 
    3049       25608 :     result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
    3050             : 
    3051       25608 :     PG_FREE_IF_COPY(arg1, 0);
    3052       25608 :     PG_FREE_IF_COPY(arg2, 1);
    3053             : 
    3054       25608 :     PG_RETURN_BOOL(result < 0);
    3055             : }
    3056             : 
    3057             : 
    3058             : Datum
    3059       25008 : text_pattern_le(PG_FUNCTION_ARGS)
    3060             : {
    3061       25008 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3062       25008 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3063             :     int         result;
    3064             : 
    3065       25008 :     result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
    3066             : 
    3067       25008 :     PG_FREE_IF_COPY(arg1, 0);
    3068       25008 :     PG_FREE_IF_COPY(arg2, 1);
    3069             : 
    3070       25008 :     PG_RETURN_BOOL(result <= 0);
    3071             : }
    3072             : 
    3073             : 
    3074             : Datum
    3075       25008 : text_pattern_ge(PG_FUNCTION_ARGS)
    3076             : {
    3077       25008 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3078       25008 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3079             :     int         result;
    3080             : 
    3081       25008 :     result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
    3082             : 
    3083       25008 :     PG_FREE_IF_COPY(arg1, 0);
    3084       25008 :     PG_FREE_IF_COPY(arg2, 1);
    3085             : 
    3086       25008 :     PG_RETURN_BOOL(result >= 0);
    3087             : }
    3088             : 
    3089             : 
    3090             : Datum
    3091       25008 : text_pattern_gt(PG_FUNCTION_ARGS)
    3092             : {
    3093       25008 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3094       25008 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3095             :     int         result;
    3096             : 
    3097       25008 :     result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
    3098             : 
    3099       25008 :     PG_FREE_IF_COPY(arg1, 0);
    3100       25008 :     PG_FREE_IF_COPY(arg2, 1);
    3101             : 
    3102       25008 :     PG_RETURN_BOOL(result > 0);
    3103             : }
    3104             : 
    3105             : 
    3106             : Datum
    3107           8 : bttext_pattern_cmp(PG_FUNCTION_ARGS)
    3108             : {
    3109           8 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3110           8 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3111             :     int         result;
    3112             : 
    3113           8 :     result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
    3114             : 
    3115           8 :     PG_FREE_IF_COPY(arg1, 0);
    3116           8 :     PG_FREE_IF_COPY(arg2, 1);
    3117             : 
    3118           8 :     PG_RETURN_INT32(result);
    3119             : }
    3120             : 
    3121             : 
    3122             : Datum
    3123          78 : bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
    3124             : {
    3125          78 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    3126          78 :     Oid         collid = ssup->ssup_collation;
    3127             :     MemoryContext oldcontext;
    3128             : 
    3129          78 :     check_collation_set(collid);
    3130             : 
    3131          78 :     if (!get_collation_isdeterministic(collid))
    3132           0 :         ereport(ERROR,
    3133             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    3134             :                  errmsg("nondeterministic collations are not supported for operator class \"%s\"",
    3135             :                         "text_pattern_ops")));
    3136             : 
    3137          78 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    3138             : 
    3139             :     /* Use generic string SortSupport, forcing "C" collation */
    3140          78 :     varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
    3141             : 
    3142          78 :     MemoryContextSwitchTo(oldcontext);
    3143             : 
    3144          78 :     PG_RETURN_VOID();
    3145             : }
    3146             : 
    3147             : 
    3148             : /*-------------------------------------------------------------
    3149             :  * byteaoctetlen
    3150             :  *
    3151             :  * get the number of bytes contained in an instance of type 'bytea'
    3152             :  *-------------------------------------------------------------
    3153             :  */
    3154             : Datum
    3155          22 : byteaoctetlen(PG_FUNCTION_ARGS)
    3156             : {
    3157          22 :     Datum       str = PG_GETARG_DATUM(0);
    3158             : 
    3159             :     /* We need not detoast the input at all */
    3160          22 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
    3161             : }
    3162             : 
    3163             : /*
    3164             :  * byteacat -
    3165             :  *    takes two bytea* and returns a bytea* that is the concatenation of
    3166             :  *    the two.
    3167             :  *
    3168             :  * Cloned from textcat and modified as required.
    3169             :  */
    3170             : Datum
    3171           0 : byteacat(PG_FUNCTION_ARGS)
    3172             : {
    3173           0 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3174           0 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3175             : 
    3176           0 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
    3177             : }
    3178             : 
    3179             : /*
    3180             :  * bytea_catenate
    3181             :  *  Guts of byteacat(), broken out so it can be used by other functions
    3182             :  *
    3183             :  * Arguments can be in short-header form, but not compressed or out-of-line
    3184             :  */
    3185             : static bytea *
    3186          24 : bytea_catenate(bytea *t1, bytea *t2)
    3187             : {
    3188             :     bytea      *result;
    3189             :     int         len1,
    3190             :                 len2,
    3191             :                 len;
    3192             :     char       *ptr;
    3193             : 
    3194          24 :     len1 = VARSIZE_ANY_EXHDR(t1);
    3195          24 :     len2 = VARSIZE_ANY_EXHDR(t2);
    3196             : 
    3197             :     /* paranoia ... probably should throw error instead? */
    3198          24 :     if (len1 < 0)
    3199           0 :         len1 = 0;
    3200          24 :     if (len2 < 0)
    3201           0 :         len2 = 0;
    3202             : 
    3203          24 :     len = len1 + len2 + VARHDRSZ;
    3204          24 :     result = (bytea *) palloc(len);
    3205             : 
    3206             :     /* Set size of result string... */
    3207          24 :     SET_VARSIZE(result, len);
    3208             : 
    3209             :     /* Fill data field of result string... */
    3210          24 :     ptr = VARDATA(result);
    3211          24 :     if (len1 > 0)
    3212          24 :         memcpy(ptr, VARDATA_ANY(t1), len1);
    3213          24 :     if (len2 > 0)
    3214          12 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
    3215             : 
    3216          24 :     return result;
    3217             : }
    3218             : 
    3219             : #define PG_STR_GET_BYTEA(str_) \
    3220             :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
    3221             : 
    3222             : /*
    3223             :  * bytea_substr()
    3224             :  * Return a substring starting at the specified position.
    3225             :  * Cloned from text_substr and modified as required.
    3226             :  *
    3227             :  * Input:
    3228             :  *  - string
    3229             :  *  - starting position (is one-based)
    3230             :  *  - string length (optional)
    3231             :  *
    3232             :  * If the starting position is zero or less, then return from the start of the string
    3233             :  * adjusting the length to be consistent with the "negative start" per SQL.
    3234             :  * If the length is less than zero, an ERROR is thrown. If no third argument
    3235             :  * (length) is provided, the length to the end of the string is assumed.
    3236             :  */
    3237             : Datum
    3238          36 : bytea_substr(PG_FUNCTION_ARGS)
    3239             : {
    3240          36 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    3241             :                                       PG_GETARG_INT32(1),
    3242             :                                       PG_GETARG_INT32(2),
    3243             :                                       false));
    3244             : }
    3245             : 
    3246             : /*
    3247             :  * bytea_substr_no_len -
    3248             :  *    Wrapper to avoid opr_sanity failure due to
    3249             :  *    one function accepting a different number of args.
    3250             :  */
    3251             : Datum
    3252          16 : bytea_substr_no_len(PG_FUNCTION_ARGS)
    3253             : {
    3254          16 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    3255             :                                       PG_GETARG_INT32(1),
    3256             :                                       -1,
    3257             :                                       true));
    3258             : }
    3259             : 
    3260             : static bytea *
    3261          76 : bytea_substring(Datum str,
    3262             :                 int S,
    3263             :                 int L,
    3264             :                 bool length_not_specified)
    3265             : {
    3266             :     int         S1;             /* adjusted start position */
    3267             :     int         L1;             /* adjusted substring length */
    3268             : 
    3269          76 :     S1 = Max(S, 1);
    3270             : 
    3271          76 :     if (length_not_specified)
    3272             :     {
    3273             :         /*
    3274             :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
    3275             :          * end of the string if we pass it a negative value for length.
    3276             :          */
    3277          28 :         L1 = -1;
    3278             :     }
    3279             :     else
    3280             :     {
    3281             :         /* end position */
    3282          48 :         int         E = S + L;
    3283             : 
    3284             :         /*
    3285             :          * A negative value for L is the only way for the end position to be
    3286             :          * before the start. SQL99 says to throw an error.
    3287             :          */
    3288          48 :         if (E < S)
    3289           4 :             ereport(ERROR,
    3290             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
    3291             :                      errmsg("negative substring length not allowed")));
    3292             : 
    3293             :         /*
    3294             :          * A zero or negative value for the end position can happen if the
    3295             :          * start was negative or one. SQL99 says to return a zero-length
    3296             :          * string.
    3297             :          */
    3298          44 :         if (E < 1)
    3299           0 :             return PG_STR_GET_BYTEA("");
    3300             : 
    3301          44 :         L1 = E - S1;
    3302             :     }
    3303             : 
    3304             :     /*
    3305             :      * If the start position is past the end of the string, SQL99 says to
    3306             :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
    3307             :      * us. Convert to zero-based starting position
    3308             :      */
    3309          72 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
    3310             : }
    3311             : 
    3312             : /*
    3313             :  * byteaoverlay
    3314             :  *  Replace specified substring of first string with second
    3315             :  *
    3316             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    3317             :  * This code is a direct implementation of what the standard says.
    3318             :  */
    3319             : Datum
    3320           4 : byteaoverlay(PG_FUNCTION_ARGS)
    3321             : {
    3322           4 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3323           4 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3324           4 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3325           4 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    3326             : 
    3327           4 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3328             : }
    3329             : 
    3330             : Datum
    3331           8 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
    3332             : {
    3333           8 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3334           8 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3335           8 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3336             :     int         sl;
    3337             : 
    3338           8 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
    3339           8 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3340             : }
    3341             : 
    3342             : static bytea *
    3343          12 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
    3344             : {
    3345             :     bytea      *result;
    3346             :     bytea      *s1;
    3347             :     bytea      *s2;
    3348             :     int         sp_pl_sl;
    3349             : 
    3350             :     /*
    3351             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    3352             :      * "substring length" error because that's what should be expected
    3353             :      * according to the spec's definition of OVERLAY().
    3354             :      */
    3355          12 :     if (sp <= 0)
    3356           0 :         ereport(ERROR,
    3357             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    3358             :                  errmsg("negative substring length not allowed")));
    3359          12 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    3360           0 :         ereport(ERROR,
    3361             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    3362             :                  errmsg("integer out of range")));
    3363             : 
    3364          12 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
    3365          12 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    3366          12 :     result = bytea_catenate(s1, t2);
    3367          12 :     result = bytea_catenate(result, s2);
    3368             : 
    3369          12 :     return result;
    3370             : }
    3371             : 
    3372             : /*
    3373             :  * byteapos -
    3374             :  *    Return the position of the specified substring.
    3375             :  *    Implements the SQL POSITION() function.
    3376             :  * Cloned from textpos and modified as required.
    3377             :  */
    3378             : Datum
    3379           0 : byteapos(PG_FUNCTION_ARGS)
    3380             : {
    3381           0 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3382           0 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3383             :     int         pos;
    3384             :     int         px,
    3385             :                 p;
    3386             :     int         len1,
    3387             :                 len2;
    3388             :     char       *p1,
    3389             :                *p2;
    3390             : 
    3391           0 :     len1 = VARSIZE_ANY_EXHDR(t1);
    3392           0 :     len2 = VARSIZE_ANY_EXHDR(t2);
    3393             : 
    3394           0 :     if (len2 <= 0)
    3395           0 :         PG_RETURN_INT32(1);     /* result for empty pattern */
    3396             : 
    3397           0 :     p1 = VARDATA_ANY(t1);
    3398           0 :     p2 = VARDATA_ANY(t2);
    3399             : 
    3400           0 :     pos = 0;
    3401           0 :     px = (len1 - len2);
    3402           0 :     for (p = 0; p <= px; p++)
    3403             :     {
    3404           0 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
    3405             :         {
    3406           0 :             pos = p + 1;
    3407           0 :             break;
    3408             :         };
    3409           0 :         p1++;
    3410             :     };
    3411             : 
    3412           0 :     PG_RETURN_INT32(pos);
    3413             : }
    3414             : 
    3415             : /*-------------------------------------------------------------
    3416             :  * byteaGetByte
    3417             :  *
    3418             :  * this routine treats "bytea" as an array of bytes.
    3419             :  * It returns the Nth byte (a number between 0 and 255).
    3420             :  *-------------------------------------------------------------
    3421             :  */
    3422             : Datum
    3423           0 : byteaGetByte(PG_FUNCTION_ARGS)
    3424             : {
    3425           0 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3426           0 :     int32       n = PG_GETARG_INT32(1);
    3427             :     int         len;
    3428             :     int         byte;
    3429             : 
    3430           0 :     len = VARSIZE_ANY_EXHDR(v);
    3431             : 
    3432           0 :     if (n < 0 || n >= len)
    3433           0 :         ereport(ERROR,
    3434             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3435             :                  errmsg("index %d out of valid range, 0..%d",
    3436             :                         n, len - 1)));
    3437             : 
    3438           0 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
    3439             : 
    3440           0 :     PG_RETURN_INT32(byte);
    3441             : }
    3442             : 
    3443             : /*-------------------------------------------------------------
    3444             :  * byteaGetBit
    3445             :  *
    3446             :  * This routine treats a "bytea" type like an array of bits.
    3447             :  * It returns the value of the Nth bit (0 or 1).
    3448             :  *
    3449             :  *-------------------------------------------------------------
    3450             :  */
    3451             : Datum
    3452           0 : byteaGetBit(PG_FUNCTION_ARGS)
    3453             : {
    3454           0 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3455           0 :     int32       n = PG_GETARG_INT32(1);
    3456             :     int         byteNo,
    3457             :                 bitNo;
    3458             :     int         len;
    3459             :     int         byte;
    3460             : 
    3461           0 :     len = VARSIZE_ANY_EXHDR(v);
    3462             : 
    3463           0 :     if (n < 0 || n >= len * 8)
    3464           0 :         ereport(ERROR,
    3465             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3466             :                  errmsg("index %d out of valid range, 0..%d",
    3467             :                         n, len * 8 - 1)));
    3468             : 
    3469           0 :     byteNo = n / 8;
    3470           0 :     bitNo = n % 8;
    3471             : 
    3472           0 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
    3473             : 
    3474           0 :     if (byte & (1 << bitNo))
    3475           0 :         PG_RETURN_INT32(1);
    3476             :     else
    3477           0 :         PG_RETURN_INT32(0);
    3478             : }
    3479             : 
    3480             : /*-------------------------------------------------------------
    3481             :  * byteaSetByte
    3482             :  *
    3483             :  * Given an instance of type 'bytea' creates a new one with
    3484             :  * the Nth byte set to the given value.
    3485             :  *
    3486             :  *-------------------------------------------------------------
    3487             :  */
    3488             : Datum
    3489           0 : byteaSetByte(PG_FUNCTION_ARGS)
    3490             : {
    3491           0 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3492           0 :     int32       n = PG_GETARG_INT32(1);
    3493           0 :     int32       newByte = PG_GETARG_INT32(2);
    3494             :     int         len;
    3495             : 
    3496           0 :     len = VARSIZE(res) - VARHDRSZ;
    3497             : 
    3498           0 :     if (n < 0 || n >= len)
    3499           0 :         ereport(ERROR,
    3500             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3501             :                  errmsg("index %d out of valid range, 0..%d",
    3502             :                         n, len - 1)));
    3503             : 
    3504             :     /*
    3505             :      * Now set the byte.
    3506             :      */
    3507           0 :     ((unsigned char *) VARDATA(res))[n] = newByte;
    3508             : 
    3509           0 :     PG_RETURN_BYTEA_P(res);
    3510             : }
    3511             : 
    3512             : /*-------------------------------------------------------------
    3513             :  * byteaSetBit
    3514             :  *
    3515             :  * Given an instance of type 'bytea' creates a new one with
    3516             :  * the Nth bit set to the given value.
    3517             :  *
    3518             :  *-------------------------------------------------------------
    3519             :  */
    3520             : Datum
    3521           0 : byteaSetBit(PG_FUNCTION_ARGS)
    3522             : {
    3523           0 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3524           0 :     int32       n = PG_GETARG_INT32(1);
    3525           0 :     int32       newBit = PG_GETARG_INT32(2);
    3526             :     int         len;
    3527             :     int         oldByte,
    3528             :                 newByte;
    3529             :     int         byteNo,
    3530             :                 bitNo;
    3531             : 
    3532           0 :     len = VARSIZE(res) - VARHDRSZ;
    3533             : 
    3534           0 :     if (n < 0 || n >= len * 8)
    3535           0 :         ereport(ERROR,
    3536             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3537             :                  errmsg("index %d out of valid range, 0..%d",
    3538             :                         n, len * 8 - 1)));
    3539             : 
    3540           0 :     byteNo = n / 8;
    3541           0 :     bitNo = n % 8;
    3542             : 
    3543             :     /*
    3544             :      * sanity check!
    3545             :      */
    3546           0 :     if (newBit != 0 && newBit != 1)
    3547           0 :         ereport(ERROR,
    3548             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    3549             :                  errmsg("new bit must be 0 or 1")));
    3550             : 
    3551             :     /*
    3552             :      * Update the byte.
    3553             :      */
    3554           0 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
    3555             : 
    3556           0 :     if (newBit == 0)
    3557           0 :         newByte = oldByte & (~(1 << bitNo));
    3558             :     else
    3559           0 :         newByte = oldByte | (1 << bitNo);
    3560             : 
    3561           0 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
    3562             : 
    3563           0 :     PG_RETURN_BYTEA_P(res);
    3564             : }
    3565             : 
    3566             : 
    3567             : /* text_name()
    3568             :  * Converts a text type to a Name type.
    3569             :  */
    3570             : Datum
    3571         826 : text_name(PG_FUNCTION_ARGS)
    3572             : {
    3573         826 :     text       *s = PG_GETARG_TEXT_PP(0);
    3574             :     Name        result;
    3575             :     int         len;
    3576             : 
    3577         826 :     len = VARSIZE_ANY_EXHDR(s);
    3578             : 
    3579             :     /* Truncate oversize input */
    3580         826 :     if (len >= NAMEDATALEN)
    3581           4 :         len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
    3582             : 
    3583             :     /* We use palloc0 here to ensure result is zero-padded */
    3584         826 :     result = (Name) palloc0(NAMEDATALEN);
    3585         826 :     memcpy(NameStr(*result), VARDATA_ANY(s), len);
    3586             : 
    3587         826 :     PG_RETURN_NAME(result);
    3588             : }
    3589             : 
    3590             : /* name_text()
    3591             :  * Converts a Name type to a text type.
    3592             :  */
    3593             : Datum
    3594      488336 : name_text(PG_FUNCTION_ARGS)
    3595             : {
    3596      488336 :     Name        s = PG_GETARG_NAME(0);
    3597             : 
    3598      488336 :     PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
    3599             : }
    3600             : 
    3601             : 
    3602             : /*
    3603             :  * textToQualifiedNameList - convert a text object to list of names
    3604             :  *
    3605             :  * This implements the input parsing needed by nextval() and other
    3606             :  * functions that take a text parameter representing a qualified name.
    3607             :  * We split the name at dots, downcase if not double-quoted, and
    3608             :  * truncate names if they're too long.
    3609             :  */
    3610             : List *
    3611         788 : textToQualifiedNameList(text *textval)
    3612             : {
    3613             :     char       *rawname;
    3614         788 :     List       *result = NIL;
    3615             :     List       *namelist;
    3616             :     ListCell   *l;
    3617             : 
    3618             :     /* Convert to C string (handles possible detoasting). */
    3619             :     /* Note we rely on being able to modify rawname below. */
    3620         788 :     rawname = text_to_cstring(textval);
    3621             : 
    3622         788 :     if (!SplitIdentifierString(rawname, '.', &namelist))
    3623           0 :         ereport(ERROR,
    3624             :                 (errcode(ERRCODE_INVALID_NAME),
    3625             :                  errmsg("invalid name syntax")));
    3626             : 
    3627         788 :     if (namelist == NIL)
    3628           0 :         ereport(ERROR,
    3629             :                 (errcode(ERRCODE_INVALID_NAME),
    3630             :                  errmsg("invalid name syntax")));
    3631             : 
    3632        1652 :     foreach(l, namelist)
    3633             :     {
    3634         864 :         char       *curname = (char *) lfirst(l);
    3635             : 
    3636         864 :         result = lappend(result, makeString(pstrdup(curname)));
    3637             :     }
    3638             : 
    3639         788 :     pfree(rawname);
    3640         788 :     list_free(namelist);
    3641             : 
    3642         788 :     return result;
    3643             : }
    3644             : 
    3645             : /*
    3646             :  * SplitIdentifierString --- parse a string containing identifiers
    3647             :  *
    3648             :  * This is the guts of textToQualifiedNameList, and is exported for use in
    3649             :  * other situations such as parsing GUC variables.  In the GUC case, it's
    3650             :  * important to avoid memory leaks, so the API is designed to minimize the
    3651             :  * amount of stuff that needs to be allocated and freed.
    3652             :  *
    3653             :  * Inputs:
    3654             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3655             :  *             been modified to contain the separated identifiers.
    3656             :  *  separator: the separator punctuation expected between identifiers
    3657             :  *             (typically '.' or ',').  Whitespace may also appear around
    3658             :  *             identifiers.
    3659             :  * Outputs:
    3660             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3661             :  *            rawstring.  Caller should list_free() this even on error return.
    3662             :  *
    3663             :  * Returns true if okay, false if there is a syntax error in the string.
    3664             :  *
    3665             :  * Note that an empty string is considered okay here, though not in
    3666             :  * textToQualifiedNameList.
    3667             :  */
    3668             : bool
    3669       70198 : SplitIdentifierString(char *rawstring, char separator,
    3670             :                       List **namelist)
    3671             : {
    3672       70198 :     char       *nextp = rawstring;
    3673       70198 :     bool        done = false;
    3674             : 
    3675       70198 :     *namelist = NIL;
    3676             : 
    3677      140396 :     while (scanner_isspace(*nextp))
    3678           0 :         nextp++;                /* skip leading whitespace */
    3679             : 
    3680       70198 :     if (*nextp == '\0')
    3681        8640 :         return true;            /* allow empty string */
    3682             : 
    3683             :     /* At the top of the loop, we are at start of a new identifier. */
    3684             :     do
    3685             :     {
    3686             :         char       *curname;
    3687             :         char       *endp;
    3688             : 
    3689       97472 :         if (*nextp == '"')
    3690             :         {
    3691             :             /* Quoted name --- collapse quote-quote pairs, no downcasing */
    3692       13406 :             curname = nextp + 1;
    3693             :             for (;;)
    3694             :             {
    3695       13410 :                 endp = strchr(nextp + 1, '"');
    3696       13408 :                 if (endp == NULL)
    3697           0 :                     return false;   /* mismatched quotes */
    3698       13408 :                 if (endp[1] != '"')
    3699       13406 :                     break;      /* found end of quoted name */
    3700             :                 /* Collapse adjacent quotes into one quote, and look again */
    3701           2 :                 memmove(endp, endp + 1, strlen(endp));
    3702           2 :                 nextp = endp;
    3703             :             }
    3704             :             /* endp now points at the terminating quote */
    3705       13406 :             nextp = endp + 1;
    3706             :         }
    3707             :         else
    3708             :         {
    3709             :             /* Unquoted name --- extends to separator or whitespace */
    3710             :             char       *downname;
    3711             :             int         len;
    3712             : 
    3713       84066 :             curname = nextp;
    3714     1442310 :             while (*nextp && *nextp != separator &&
    3715      637090 :                    !scanner_isspace(*nextp))
    3716      637088 :                 nextp++;
    3717       84066 :             endp = nextp;
    3718       84066 :             if (curname == nextp)
    3719           0 :                 return false;   /* empty unquoted name not allowed */
    3720             : 
    3721             :             /*
    3722             :              * Downcase the identifier, using same code as main lexer does.
    3723             :              *
    3724             :              * XXX because we want to overwrite the input in-place, we cannot
    3725             :              * support a downcasing transformation that increases the string
    3726             :              * length.  This is not a problem given the current implementation
    3727             :              * of downcase_truncate_identifier, but we'll probably have to do
    3728             :              * something about this someday.
    3729             :              */
    3730       84066 :             len = endp - curname;
    3731       84066 :             downname = downcase_truncate_identifier(curname, len, false);
    3732             :             Assert(strlen(downname) <= len);
    3733       84066 :             strncpy(curname, downname, len);    /* strncpy is required here */
    3734       84066 :             pfree(downname);
    3735             :         }
    3736             : 
    3737      194946 :         while (scanner_isspace(*nextp))
    3738           2 :             nextp++;            /* skip trailing whitespace */
    3739             : 
    3740       97472 :         if (*nextp == separator)
    3741             :         {
    3742       35914 :             nextp++;
    3743       91630 :             while (scanner_isspace(*nextp))
    3744       19802 :                 nextp++;        /* skip leading whitespace for next */
    3745             :             /* we expect another name, so done remains false */
    3746             :         }
    3747       61558 :         else if (*nextp == '\0')
    3748       61556 :             done = true;
    3749             :         else
    3750           2 :             return false;       /* invalid syntax */
    3751             : 
    3752             :         /* Now safe to overwrite separator with a null */
    3753       97470 :         *endp = '\0';
    3754             : 
    3755             :         /* Truncate name if it's overlength */
    3756       97470 :         truncate_identifier(curname, strlen(curname), false);
    3757             : 
    3758             :         /*
    3759             :          * Finished isolating current name --- add it to list
    3760             :          */
    3761       97470 :         *namelist = lappend(*namelist, curname);
    3762             : 
    3763             :         /* Loop back if we didn't reach end of string */
    3764       97470 :     } while (!done);
    3765             : 
    3766       61556 :     return true;
    3767             : }
    3768             : 
    3769             : 
    3770             : /*
    3771             :  * SplitDirectoriesString --- parse a string containing file/directory names
    3772             :  *
    3773             :  * This works fine on file names too; the function name is historical.
    3774             :  *
    3775             :  * This is similar to SplitIdentifierString, except that the parsing
    3776             :  * rules are meant to handle pathnames instead of identifiers: there is
    3777             :  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
    3778             :  * and we apply canonicalize_path() to each extracted string.  Because of the
    3779             :  * last, the returned strings are separately palloc'd rather than being
    3780             :  * pointers into rawstring --- but we still scribble on rawstring.
    3781             :  *
    3782             :  * Inputs:
    3783             :  *  rawstring: the input string; must be modifiable!
    3784             :  *  separator: the separator punctuation expected between directories
    3785             :  *             (typically ',' or ';').  Whitespace may also appear around
    3786             :  *             directories.
    3787             :  * Outputs:
    3788             :  *  namelist: filled with a palloc'd list of directory names.
    3789             :  *            Caller should list_free_deep() this even on error return.
    3790             :  *
    3791             :  * Returns true if okay, false if there is a syntax error in the string.
    3792             :  *
    3793             :  * Note that an empty string is considered okay here.
    3794             :  */
    3795             : bool
    3796         580 : SplitDirectoriesString(char *rawstring, char separator,
    3797             :                        List **namelist)
    3798             : {
    3799         580 :     char       *nextp = rawstring;
    3800         580 :     bool        done = false;
    3801             : 
    3802         580 :     *namelist = NIL;
    3803             : 
    3804        1160 :     while (scanner_isspace(*nextp))
    3805           0 :         nextp++;                /* skip leading whitespace */
    3806             : 
    3807         580 :     if (*nextp == '\0')
    3808           2 :         return true;            /* allow empty string */
    3809             : 
    3810             :     /* At the top of the loop, we are at start of a new directory. */
    3811             :     do
    3812             :     {
    3813             :         char       *curname;
    3814             :         char       *endp;
    3815             : 
    3816         578 :         if (*nextp == '"')
    3817             :         {
    3818             :             /* Quoted name --- collapse quote-quote pairs */
    3819           0 :             curname = nextp + 1;
    3820             :             for (;;)
    3821             :             {
    3822           0 :                 endp = strchr(nextp + 1, '"');
    3823           0 :                 if (endp == NULL)
    3824           0 :                     return false;   /* mismatched quotes */
    3825           0 :                 if (endp[1] != '"')
    3826           0 :                     break;      /* found end of quoted name */
    3827             :                 /* Collapse adjacent quotes into one quote, and look again */
    3828           0 :                 memmove(endp, endp + 1, strlen(endp));
    3829           0 :                 nextp = endp;
    3830             :             }
    3831             :             /* endp now points at the terminating quote */
    3832           0 :             nextp = endp + 1;
    3833             :         }
    3834             :         else
    3835             :         {
    3836             :             /* Unquoted name --- extends to separator or end of string */
    3837         578 :             curname = endp = nextp;
    3838       11096 :             while (*nextp && *nextp != separator)
    3839             :             {
    3840             :                 /* trailing whitespace should not be included in name */
    3841        9940 :                 if (!scanner_isspace(*nextp))
    3842        9940 :                     endp = nextp + 1;
    3843        9940 :                 nextp++;
    3844             :             }
    3845         578 :             if (curname == endp)
    3846           0 :                 return false;   /* empty unquoted name not allowed */
    3847             :         }
    3848             : 
    3849        1156 :         while (scanner_isspace(*nextp))
    3850           0 :             nextp++;            /* skip trailing whitespace */
    3851             : 
    3852         578 :         if (*nextp == separator)
    3853             :         {
    3854           0 :             nextp++;
    3855           0 :             while (scanner_isspace(*nextp))
    3856           0 :                 nextp++;        /* skip leading whitespace for next */
    3857             :             /* we expect another name, so done remains false */
    3858             :         }
    3859         578 :         else if (*nextp == '\0')
    3860         578 :             done = true;
    3861             :         else
    3862           0 :             return false;       /* invalid syntax */
    3863             : 
    3864             :         /* Now safe to overwrite separator with a null */
    3865         578 :         *endp = '\0';
    3866             : 
    3867             :         /* Truncate path if it's overlength */
    3868         578 :         if (strlen(curname) >= MAXPGPATH)
    3869           0 :             curname[MAXPGPATH - 1] = '\0';
    3870             : 
    3871             :         /*
    3872             :          * Finished isolating current name --- add it to list
    3873             :          */
    3874         578 :         curname = pstrdup(curname);
    3875         578 :         canonicalize_path(curname);
    3876         578 :         *namelist = lappend(*namelist, curname);
    3877             : 
    3878             :         /* Loop back if we didn't reach end of string */
    3879         578 :     } while (!done);
    3880             : 
    3881         578 :     return true;
    3882             : }
    3883             : 
    3884             : 
    3885             : /*
    3886             :  * SplitGUCList --- parse a string containing identifiers or file names
    3887             :  *
    3888             :  * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
    3889             :  * presuming whether the elements will be taken as identifiers or file names.
    3890             :  * We assume the input has already been through flatten_set_variable_args(),
    3891             :  * so that we need never downcase (if appropriate, that was done already).
    3892             :  * Nor do we ever truncate, since we don't know the correct max length.
    3893             :  * We disallow embedded whitespace for simplicity (it shouldn't matter,
    3894             :  * because any embedded whitespace should have led to double-quoting).
    3895             :  * Otherwise the API is identical to SplitIdentifierString.
    3896             :  *
    3897             :  * XXX it's annoying to have so many copies of this string-splitting logic.
    3898             :  * However, it's not clear that having one function with a bunch of option
    3899             :  * flags would be much better.
    3900             :  *
    3901             :  * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
    3902             :  * Be sure to update that if you have to change this.
    3903             :  *
    3904             :  * Inputs:
    3905             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3906             :  *             been modified to contain the separated identifiers.
    3907             :  *  separator: the separator punctuation expected between identifiers
    3908             :  *             (typically '.' or ',').  Whitespace may also appear around
    3909             :  *             identifiers.
    3910             :  * Outputs:
    3911             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3912             :  *            rawstring.  Caller should list_free() this even on error return.
    3913             :  *
    3914             :  * Returns true if okay, false if there is a syntax error in the string.
    3915             :  */
    3916             : bool
    3917           8 : SplitGUCList(char *rawstring, char separator,
    3918             :              List **namelist)
    3919             : {
    3920           8 :     char       *nextp = rawstring;
    3921           8 :     bool        done = false;
    3922             : 
    3923           8 :     *namelist = NIL;
    3924             : 
    3925          16 :     while (scanner_isspace(*nextp))
    3926           0 :         nextp++;                /* skip leading whitespace */
    3927             : 
    3928           8 :     if (*nextp == '\0')
    3929           0 :         return true;            /* allow empty string */
    3930             : 
    3931             :     /* At the top of the loop, we are at start of a new identifier. */
    3932             :     do
    3933             :     {
    3934             :         char       *curname;
    3935             :         char       *endp;
    3936             : 
    3937          20 :         if (*nextp == '"')
    3938             :         {
    3939             :             /* Quoted name --- collapse quote-quote pairs */
    3940          16 :             curname = nextp + 1;
    3941             :             for (;;)
    3942             :             {
    3943          24 :                 endp = strchr(nextp + 1, '"');
    3944          20 :                 if (endp == NULL)
    3945           0 :                     return false;   /* mismatched quotes */
    3946          20 :                 if (endp[1] != '"')
    3947          16 :                     break;      /* found end of quoted name */
    3948             :                 /* Collapse adjacent quotes into one quote, and look again */
    3949           4 :                 memmove(endp, endp + 1, strlen(endp));
    3950           4 :                 nextp = endp;
    3951             :             }
    3952             :             /* endp now points at the terminating quote */
    3953          16 :             nextp = endp + 1;
    3954             :         }
    3955             :         else
    3956             :         {
    3957             :             /* Unquoted name --- extends to separator or whitespace */
    3958           4 :             curname = nextp;
    3959          88 :             while (*nextp && *nextp != separator &&
    3960          40 :                    !scanner_isspace(*nextp))
    3961          40 :                 nextp++;
    3962           4 :             endp = nextp;
    3963           4 :             if (curname == nextp)
    3964           0 :                 return false;   /* empty unquoted name not allowed */
    3965             :         }
    3966             : 
    3967          40 :         while (scanner_isspace(*nextp))
    3968           0 :             nextp++;            /* skip trailing whitespace */
    3969             : 
    3970          20 :         if (*nextp == separator)
    3971             :         {
    3972          12 :             nextp++;
    3973          36 :             while (scanner_isspace(*nextp))
    3974          12 :                 nextp++;        /* skip leading whitespace for next */
    3975             :             /* we expect another name, so done remains false */
    3976             :         }
    3977           8 :         else if (*nextp == '\0')
    3978           8 :             done = true;
    3979             :         else
    3980           0 :             return false;       /* invalid syntax */
    3981             : 
    3982             :         /* Now safe to overwrite separator with a null */
    3983          20 :         *endp = '\0';
    3984             : 
    3985             :         /*
    3986             :          * Finished isolating current name --- add it to list
    3987             :          */
    3988          20 :         *namelist = lappend(*namelist, curname);
    3989             : 
    3990             :         /* Loop back if we didn't reach end of string */
    3991          20 :     } while (!done);
    3992             : 
    3993           8 :     return true;
    3994             : }
    3995             : 
    3996             : 
    3997             : /*****************************************************************************
    3998             :  *  Comparison Functions used for bytea
    3999             :  *
    4000             :  * Note: btree indexes need these routines not to leak memory; therefore,
    4001             :  * be careful to free working copies of toasted datums.  Most places don't
    4002             :  * need to be so careful.
    4003             :  *****************************************************************************/
    4004             : 
    4005             : Datum
    4006        7954 : byteaeq(PG_FUNCTION_ARGS)
    4007             : {
    4008        7954 :     Datum       arg1 = PG_GETARG_DATUM(0);
    4009        7954 :     Datum       arg2 = PG_GETARG_DATUM(1);
    4010             :     bool        result;
    4011             :     Size        len1,
    4012             :                 len2;
    4013             : 
    4014             :     /*
    4015             :      * We can use a fast path for unequal lengths, which might save us from
    4016             :      * having to detoast one or both values.
    4017             :      */
    4018        7954 :     len1 = toast_raw_datum_size(arg1);
    4019        7954 :     len2 = toast_raw_datum_size(arg2);
    4020        7954 :     if (len1 != len2)
    4021        4304 :         result = false;
    4022             :     else
    4023             :     {
    4024        3650 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    4025        3650 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    4026             : 
    4027        3650 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    4028             :                          len1 - VARHDRSZ) == 0);
    4029             : 
    4030        3650 :         PG_FREE_IF_COPY(barg1, 0);
    4031        3650 :         PG_FREE_IF_COPY(barg2, 1);
    4032             :     }
    4033             : 
    4034        7954 :     PG_RETURN_BOOL(result);
    4035             : }
    4036             : 
    4037             : Datum
    4038         528 : byteane(PG_FUNCTION_ARGS)
    4039             : {
    4040         528 :     Datum       arg1 = PG_GETARG_DATUM(0);
    4041         528 :     Datum       arg2 = PG_GETARG_DATUM(1);
    4042             :     bool        result;
    4043             :     Size        len1,
    4044             :                 len2;
    4045             : 
    4046             :     /*
    4047             :      * We can use a fast path for unequal lengths, which might save us from
    4048             :      * having to detoast one or both values.
    4049             :      */
    4050         528 :     len1 = toast_raw_datum_size(arg1);
    4051         528 :     len2 = toast_raw_datum_size(arg2);
    4052         528 :     if (len1 != len2)
    4053           0 :         result = true;
    4054             :     else
    4055             :     {
    4056         528 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    4057         528 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    4058             : 
    4059         528 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    4060             :                          len1 - VARHDRSZ) != 0);
    4061             : 
    4062         528 :         PG_FREE_IF_COPY(barg1, 0);
    4063         528 :         PG_FREE_IF_COPY(barg2, 1);
    4064             :     }
    4065             : 
    4066         528 :     PG_RETURN_BOOL(result);
    4067             : }
    4068             : 
    4069             : Datum
    4070        7300 : bytealt(PG_FUNCTION_ARGS)
    4071             : {
    4072        7300 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4073        7300 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4074             :     int         len1,
    4075             :                 len2;
    4076             :     int         cmp;
    4077             : 
    4078        7300 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4079        7300 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4080             : 
    4081        7300 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4082             : 
    4083        7300 :     PG_FREE_IF_COPY(arg1, 0);
    4084        7300 :     PG_FREE_IF_COPY(arg2, 1);
    4085             : 
    4086        7300 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
    4087             : }
    4088             : 
    4089             : Datum
    4090        5556 : byteale(PG_FUNCTION_ARGS)
    4091             : {
    4092        5556 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4093        5556 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4094             :     int         len1,
    4095             :                 len2;
    4096             :     int         cmp;
    4097             : 
    4098        5556 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4099        5556 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4100             : 
    4101        5556 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4102             : 
    4103        5556 :     PG_FREE_IF_COPY(arg1, 0);
    4104        5556 :     PG_FREE_IF_COPY(arg2, 1);
    4105             : 
    4106        5556 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
    4107             : }
    4108             : 
    4109             : Datum
    4110        5214 : byteagt(PG_FUNCTION_ARGS)
    4111             : {
    4112        5214 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4113        5214 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4114             :     int         len1,
    4115             :                 len2;
    4116             :     int         cmp;
    4117             : 
    4118        5214 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4119        5214 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4120             : 
    4121        5214 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4122             : 
    4123        5214 :     PG_FREE_IF_COPY(arg1, 0);
    4124        5214 :     PG_FREE_IF_COPY(arg2, 1);
    4125             : 
    4126        5214 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
    4127             : }
    4128             : 
    4129             : Datum
    4130        4394 : byteage(PG_FUNCTION_ARGS)
    4131             : {
    4132        4394 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4133        4394 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4134             :     int         len1,
    4135             :                 len2;
    4136             :     int         cmp;
    4137             : 
    4138        4394 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4139        4394 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4140             : 
    4141        4394 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4142             : 
    4143        4394 :     PG_FREE_IF_COPY(arg1, 0);
    4144        4394 :     PG_FREE_IF_COPY(arg2, 1);
    4145             : 
    4146        4394 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
    4147             : }
    4148             : 
    4149             : Datum
    4150       87402 : byteacmp(PG_FUNCTION_ARGS)
    4151             : {
    4152       87402 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4153       87402 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4154             :     int         len1,
    4155             :                 len2;
    4156             :     int         cmp;
    4157             : 
    4158       87402 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4159       87402 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4160             : 
    4161       87402 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4162       87402 :     if ((cmp == 0) && (len1 != len2))
    4163       14500 :         cmp = (len1 < len2) ? -1 : 1;
    4164             : 
    4165       87402 :     PG_FREE_IF_COPY(arg1, 0);
    4166       87402 :     PG_FREE_IF_COPY(arg2, 1);
    4167             : 
    4168       87402 :     PG_RETURN_INT32(cmp);
    4169             : }
    4170             : 
    4171             : Datum
    4172          86 : bytea_sortsupport(PG_FUNCTION_ARGS)
    4173             : {
    4174          86 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    4175             :     MemoryContext oldcontext;
    4176             : 
    4177          86 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    4178             : 
    4179             :     /* Use generic string SortSupport, forcing "C" collation */
    4180          86 :     varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
    4181             : 
    4182          86 :     MemoryContextSwitchTo(oldcontext);
    4183             : 
    4184          86 :     PG_RETURN_VOID();
    4185             : }
    4186             : 
    4187             : /*
    4188             :  * appendStringInfoText
    4189             :  *
    4190             :  * Append a text to str.
    4191             :  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
    4192             :  */
    4193             : static void
    4194     1230954 : appendStringInfoText(StringInfo str, const text *t)
    4195             : {
    4196     1230954 :     appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
    4197     1230954 : }
    4198             : 
    4199             : /*
    4200             :  * replace_text
    4201             :  * replace all occurrences of 'old_sub_str' in 'orig_str'
    4202             :  * with 'new_sub_str' to form 'new_str'
    4203             :  *
    4204             :  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
    4205             :  * otherwise returns 'new_str'
    4206             :  */
    4207             : Datum
    4208         966 : replace_text(PG_FUNCTION_ARGS)
    4209             : {
    4210         966 :     text       *src_text = PG_GETARG_TEXT_PP(0);
    4211         966 :     text       *from_sub_text = PG_GETARG_TEXT_PP(1);
    4212         966 :     text       *to_sub_text = PG_GETARG_TEXT_PP(2);
    4213             :     int         src_text_len;
    4214             :     int         from_sub_text_len;
    4215             :     TextPositionState state;
    4216             :     text       *ret_text;
    4217             :     int         chunk_len;
    4218             :     char       *curr_ptr;
    4219             :     char       *start_ptr;
    4220             :     StringInfoData str;
    4221             :     bool        found;
    4222             : 
    4223         966 :     src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4224         966 :     from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
    4225             : 
    4226             :     /* Return unmodified source string if empty source or pattern */
    4227         966 :     if (src_text_len < 1 || from_sub_text_len < 1)
    4228             :     {
    4229           0 :         PG_RETURN_TEXT_P(src_text);
    4230             :     }
    4231             : 
    4232         966 :     text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
    4233             : 
    4234         966 :     found = text_position_next(&state);
    4235             : 
    4236             :     /* When the from_sub_text is not found, there is nothing to do. */
    4237         966 :     if (!found)
    4238             :     {
    4239         762 :         text_position_cleanup(&state);
    4240         762 :         PG_RETURN_TEXT_P(src_text);
    4241             :     }
    4242         204 :     curr_ptr = text_position_get_match_ptr(&state);
    4243         204 :     start_ptr = VARDATA_ANY(src_text);
    4244             : 
    4245         204 :     initStringInfo(&str);
    4246             : 
    4247             :     do
    4248             :     {
    4249        2800 :         CHECK_FOR_INTERRUPTS();
    4250             : 
    4251             :         /* copy the data skipped over by last text_position_next() */
    4252        2800 :         chunk_len = curr_ptr - start_ptr;
    4253        2800 :         appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4254             : 
    4255        2800 :         appendStringInfoText(&str, to_sub_text);
    4256             : 
    4257        2800 :         start_ptr = curr_ptr + from_sub_text_len;
    4258             : 
    4259        2800 :         found = text_position_next(&state);
    4260        2800 :         if (found)
    4261        2596 :             curr_ptr = text_position_get_match_ptr(&state);
    4262             :     }
    4263        2800 :     while (found);
    4264             : 
    4265             :     /* copy trailing data */
    4266         204 :     chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4267         204 :     appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4268             : 
    4269         204 :     text_position_cleanup(&state);
    4270             : 
    4271         204 :     ret_text = cstring_to_text_with_len(str.data, str.len);
    4272         204 :     pfree(str.data);
    4273             : 
    4274         204 :     PG_RETURN_TEXT_P(ret_text);
    4275             : }
    4276             : 
    4277             : /*
    4278             :  * check_replace_text_has_escape_char
    4279             :  *
    4280             :  * check whether replace_text contains escape char.
    4281             :  */
    4282             : static bool
    4283         988 : check_replace_text_has_escape_char(const text *replace_text)
    4284             : {
    4285         988 :     const char *p = VARDATA_ANY(replace_text);
    4286         988 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4287             : 
    4288         988 :     if (pg_database_encoding_max_length() == 1)
    4289             :     {
    4290          12 :         for (; p < p_end; p++)
    4291             :         {
    4292           0 :             if (*p == '\\')
    4293           0 :                 return true;
    4294             :         }
    4295             :     }
    4296             :     else
    4297             :     {
    4298        3050 :         for (; p < p_end; p += pg_mblen(p))
    4299             :         {
    4300        2182 :             if (*p == '\\')
    4301         108 :                 return true;
    4302             :         }
    4303             :     }
    4304             : 
    4305         880 :     return false;
    4306             : }
    4307             : 
    4308             : /*
    4309             :  * appendStringInfoRegexpSubstr
    4310             :  *
    4311             :  * Append replace_text to str, substituting regexp back references for
    4312             :  * \n escapes.  start_ptr is the start of the match in the source string,
    4313             :  * at logical character position data_pos.
    4314             :  */
    4315             : static void
    4316          60 : appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
    4317             :                              regmatch_t *pmatch,
    4318             :                              char *start_ptr, int data_pos)
    4319             : {
    4320          60 :     const char *p = VARDATA_ANY(replace_text);
    4321          60 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4322          60 :     int         eml = pg_database_encoding_max_length();
    4323             : 
    4324             :     for (;;)
    4325         122 :     {
    4326         182 :         const char *chunk_start = p;
    4327             :         int         so;
    4328             :         int         eo;
    4329             : 
    4330             :         /* Find next escape char. */
    4331         182 :         if (eml == 1)
    4332             :         {
    4333           0 :             for (; p < p_end && *p != '\\'; p++)
    4334             :                  /* nothing */ ;
    4335             :         }
    4336             :         else
    4337             :         {
    4338         182 :             for (; p < p_end && *p != '\\'; p += pg_mblen(p))
    4339             :                  /* nothing */ ;
    4340             :         }
    4341             : 
    4342             :         /* Copy the text we just scanned over, if any. */
    4343         182 :         if (p > chunk_start)
    4344          72 :             appendBinaryStringInfo(str, chunk_start, p - chunk_start);
    4345             : 
    4346             :         /* Done if at end of string, else advance over escape char. */
    4347         182 :         if (p >= p_end)
    4348          60 :             break;
    4349         122 :         p++;
    4350             : 
    4351         122 :         if (p >= p_end)
    4352             :         {
    4353             :             /* Escape at very end of input.  Treat same as unexpected char */
    4354           0 :             appendStringInfoChar(str, '\\');
    4355           0 :             break;
    4356             :         }
    4357             : 
    4358         122 :         if (*p >= '1' && *p <= '9')
    4359         104 :         {
    4360             :             /* Use the back reference of regexp. */
    4361         104 :             int         idx = *p - '0';
    4362             : 
    4363         104 :             so = pmatch[idx].rm_so;
    4364         104 :             eo = pmatch[idx].rm_eo;
    4365         104 :             p++;
    4366             :         }
    4367          18 :         else if (*p == '&')
    4368             :         {
    4369             :             /* Use the entire matched string. */
    4370           0 :             so = pmatch[0].rm_so;
    4371           0 :             eo = pmatch[0].rm_eo;
    4372           0 :             p++;
    4373             :         }
    4374          18 :         else if (*p == '\\')
    4375             :         {
    4376             :             /* \\ means transfer one \ to output. */
    4377          18 :             appendStringInfoChar(str, '\\');
    4378          18 :             p++;
    4379          18 :             continue;
    4380             :         }
    4381             :         else
    4382             :         {
    4383             :             /*
    4384             :              * If escape char is not followed by any expected char, just treat
    4385             :              * it as ordinary data to copy.  (XXX would it be better to throw
    4386             :              * an error?)
    4387             :              */
    4388           0 :             appendStringInfoChar(str, '\\');
    4389           0 :             continue;
    4390             :         }
    4391             : 
    4392         104 :         if (so != -1 && eo != -1)
    4393             :         {
    4394             :             /*
    4395             :              * Copy the text that is back reference of regexp.  Note so and eo
    4396             :              * are counted in characters not bytes.
    4397             :              */
    4398             :             char       *chunk_start;
    4399             :             int         chunk_len;
    4400             : 
    4401             :             Assert(so >= data_pos);
    4402         104 :             chunk_start = start_ptr;
    4403         104 :             chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
    4404         104 :             chunk_len = charlen_to_bytelen(chunk_start, eo - so);
    4405         104 :             appendBinaryStringInfo(str, chunk_start, chunk_len);
    4406             :         }
    4407             :     }
    4408          60 : }
    4409             : 
    4410             : #define REGEXP_REPLACE_BACKREF_CNT      10
    4411             : 
    4412             : /*
    4413             :  * replace_text_regexp
    4414             :  *
    4415             :  * replace text that matches to regexp in src_text to replace_text.
    4416             :  *
    4417             :  * Note: to avoid having to include regex.h in builtins.h, we declare
    4418             :  * the regexp argument as void *, but really it's regex_t *.
    4419             :  */
    4420             : text *
    4421         988 : replace_text_regexp(text *src_text, void *regexp,
    4422             :                     text *replace_text, bool glob)
    4423             : {
    4424             :     text       *ret_text;
    4425         988 :     regex_t    *re = (regex_t *) regexp;
    4426         988 :     int         src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4427             :     StringInfoData buf;
    4428             :     regmatch_t  pmatch[REGEXP_REPLACE_BACKREF_CNT];
    4429             :     pg_wchar   *data;
    4430             :     size_t      data_len;
    4431             :     int         search_start;
    4432             :     int         data_pos;
    4433             :     char       *start_ptr;
    4434             :     bool        have_escape;
    4435             : 
    4436         988 :     initStringInfo(&buf);
    4437             : 
    4438             :     /* Convert data string to wide characters. */
    4439         988 :     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
    4440         988 :     data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
    4441             : 
    4442             :     /* Check whether replace_text has escape char. */
    4443         988 :     have_escape = check_replace_text_has_escape_char(replace_text);
    4444             : 
    4445             :     /* start_ptr points to the data_pos'th character of src_text */
    4446         988 :     start_ptr = (char *) VARDATA_ANY(src_text);
    4447         988 :     data_pos = 0;
    4448             : 
    4449         988 :     search_start = 0;
    4450        2314 :     while (search_start <= data_len)
    4451             :     {
    4452             :         int         regexec_result;
    4453             : 
    4454        1322 :         CHECK_FOR_INTERRUPTS();
    4455             : 
    4456        1322 :         regexec_result = pg_regexec(re,
    4457             :                                     data,
    4458             :                                     data_len,
    4459             :                                     search_start,
    4460             :                                     NULL,   /* no details */
    4461             :                                     REGEXP_REPLACE_BACKREF_CNT,
    4462             :                                     pmatch,
    4463             :                                     0);
    4464             : 
    4465        1322 :         if (regexec_result == REG_NOMATCH)
    4466         814 :             break;
    4467             : 
    4468         508 :         if (regexec_result != REG_OKAY)
    4469             :         {
    4470             :             char        errMsg[100];
    4471             : 
    4472           0 :             CHECK_FOR_INTERRUPTS();
    4473           0 :             pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
    4474           0 :             ereport(ERROR,
    4475             :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
    4476             :                      errmsg("regular expression failed: %s", errMsg)));
    4477             :         }
    4478             : 
    4479             :         /*
    4480             :          * Copy the text to the left of the match position.  Note we are given
    4481             :          * character not byte indexes.
    4482             :          */
    4483         508 :         if (pmatch[0].rm_so - data_pos > 0)
    4484             :         {
    4485             :             int         chunk_len;
    4486             : 
    4487         446 :             chunk_len = charlen_to_bytelen(start_ptr,
    4488         446 :                                            pmatch[0].rm_so - data_pos);
    4489         446 :             appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4490             : 
    4491             :             /*
    4492             :              * Advance start_ptr over that text, to avoid multiple rescans of
    4493             :              * it if the replace_text contains multiple back-references.
    4494             :              */
    4495         446 :             start_ptr += chunk_len;
    4496         446 :             data_pos = pmatch[0].rm_so;
    4497             :         }
    4498             : 
    4499             :         /*
    4500             :          * Copy the replace_text. Process back references when the
    4501             :          * replace_text has escape characters.
    4502             :          */
    4503         508 :         if (have_escape)
    4504          60 :             appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
    4505             :                                          start_ptr, data_pos);
    4506             :         else
    4507         448 :             appendStringInfoText(&buf, replace_text);
    4508             : 
    4509             :         /* Advance start_ptr and data_pos over the matched text. */
    4510         508 :         start_ptr += charlen_to_bytelen(start_ptr,
    4511         508 :                                         pmatch[0].rm_eo - data_pos);
    4512         508 :         data_pos = pmatch[0].rm_eo;
    4513             : 
    4514             :         /*
    4515             :          * When global option is off, replace the first instance only.
    4516             :          */
    4517         508 :         if (!glob)
    4518         170 :             break;
    4519             : 
    4520             :         /*
    4521             :          * Advance search position.  Normally we start the next search at the
    4522             :          * end of the previous match; but if the match was of zero length, we
    4523             :          * have to advance by one character, or we'd just find the same match
    4524             :          * again.
    4525             :          */
    4526         338 :         search_start = data_pos;
    4527         338 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4528           8 :             search_start++;
    4529             :     }
    4530             : 
    4531             :     /*
    4532             :      * Copy the text to the right of the last match.
    4533             :      */
    4534         988 :     if (data_pos < data_len)
    4535             :     {
    4536             :         int         chunk_len;
    4537             : 
    4538         894 :         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4539         894 :         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4540             :     }
    4541             : 
    4542         988 :     ret_text = cstring_to_text_with_len(buf.data, buf.len);
    4543         988 :     pfree(buf.data);
    4544         988 :     pfree(data);
    4545             : 
    4546         988 :     return ret_text;
    4547             : }
    4548             : 
    4549             : /*
    4550             :  * split_text
    4551             :  * parse input string
    4552             :  * return ord item (1 based)
    4553             :  * based on provided field separator
    4554             :  */
    4555             : Datum
    4556          20 : split_text(PG_FUNCTION_ARGS)
    4557             : {
    4558          20 :     text       *inputstring = PG_GETARG_TEXT_PP(0);
    4559          20 :     text       *fldsep = PG_GETARG_TEXT_PP(1);
    4560          20 :     int         fldnum = PG_GETARG_INT32(2);
    4561             :     int         inputstring_len;
    4562             :     int         fldsep_len;
    4563             :     TextPositionState state;
    4564             :     char       *start_ptr;
    4565             :     char       *end_ptr;
    4566             :     text       *result_text;
    4567             :     bool        found;
    4568             : 
    4569             :     /* field number is 1 based */
    4570          20 :     if (fldnum < 1)
    4571           4 :         ereport(ERROR,
    4572             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    4573             :                  errmsg("field position must be greater than zero")));
    4574             : 
    4575          16 :     inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4576          16 :     fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4577             : 
    4578             :     /* return empty string for empty input string */
    4579          16 :     if (inputstring_len < 1)
    4580           0 :         PG_RETURN_TEXT_P(cstring_to_text(""));
    4581             : 
    4582             :     /* empty field separator */
    4583          16 :     if (fldsep_len < 1)
    4584             :     {
    4585           0 :         text_position_cleanup(&state);
    4586             :         /* if first field, return input string, else empty string */
    4587           0 :         if (fldnum == 1)
    4588           0 :             PG_RETURN_TEXT_P(inputstring);
    4589             :         else
    4590           0 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4591             :     }
    4592             : 
    4593          16 :     text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
    4594             : 
    4595             :     /* identify bounds of first field */
    4596          16 :     start_ptr = VARDATA_ANY(inputstring);
    4597          16 :     found = text_position_next(&state);
    4598             : 
    4599             :     /* special case if fldsep not found at all */
    4600          16 :     if (!found)
    4601             :     {
    4602           0 :         text_position_cleanup(&state);
    4603             :         /* if field 1 requested, return input string, else empty string */
    4604           0 :         if (fldnum == 1)
    4605           0 :             PG_RETURN_TEXT_P(inputstring);
    4606             :         else
    4607           0 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4608             :     }
    4609          16 :     end_ptr = text_position_get_match_ptr(&state);
    4610             : 
    4611          44 :     while (found && --fldnum > 0)
    4612             :     {
    4613             :         /* identify bounds of next field */
    4614          12 :         start_ptr = end_ptr + fldsep_len;
    4615          12 :         found = text_position_next(&state);
    4616          12 :         if (found)
    4617           4 :             end_ptr = text_position_get_match_ptr(&state);
    4618             :     }
    4619             : 
    4620          16 :     text_position_cleanup(&state);
    4621             : 
    4622          16 :     if (fldnum > 0)
    4623             :     {
    4624             :         /* N'th field separator not found */
    4625             :         /* if last field requested, return it, else empty string */
    4626           8 :         if (fldnum == 1)
    4627             :         {
    4628           4 :             int         last_len = start_ptr - VARDATA_ANY(inputstring);
    4629             : 
    4630           4 :             result_text = cstring_to_text_with_len(start_ptr,
    4631             :                                                    inputstring_len - last_len);
    4632             :         }
    4633             :         else
    4634           4 :             result_text = cstring_to_text("");
    4635             :     }
    4636             :     else
    4637             :     {
    4638             :         /* non-last field requested */
    4639           8 :         result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
    4640             :     }
    4641             : 
    4642          16 :     PG_RETURN_TEXT_P(result_text);
    4643             : }
    4644             : 
    4645             : /*
    4646             :  * Convenience function to return true when two text params are equal.
    4647             :  */
    4648             : static bool
    4649          56 : text_isequal(text *txt1, text *txt2, Oid collid)
    4650             : {
    4651          56 :     return DatumGetBool(DirectFunctionCall2Coll(texteq,
    4652             :                                                 collid,
    4653             :                                                 PointerGetDatum(txt1),
    4654             :                                                 PointerGetDatum(txt2)));
    4655             : }
    4656             : 
    4657             : /*
    4658             :  * text_to_array
    4659             :  * parse input string and return text array of elements,
    4660             :  * based on provided field separator
    4661             :  */
    4662             : Datum
    4663          56 : text_to_array(PG_FUNCTION_ARGS)
    4664             : {
    4665          56 :     return text_to_array_internal(fcinfo);
    4666             : }
    4667             : 
    4668             : /*
    4669             :  * text_to_array_null
    4670             :  * parse input string and return text array of elements,
    4671             :  * based on provided field separator and null string
    4672             :  *
    4673             :  * This is a separate entry point only to prevent the regression tests from
    4674             :  * complaining about different argument sets for the same internal function.
    4675             :  */
    4676             : Datum
    4677          16 : text_to_array_null(PG_FUNCTION_ARGS)
    4678             : {
    4679          16 :     return text_to_array_internal(fcinfo);
    4680             : }
    4681             : 
    4682             : /*
    4683             :  * common code for text_to_array and text_to_array_null functions
    4684             :  *
    4685             :  * These are not strict so we have to test for null inputs explicitly.
    4686             :  */
    4687             : static Datum
    4688          72 : text_to_array_internal(PG_FUNCTION_ARGS)
    4689             : {
    4690             :     text       *inputstring;
    4691             :     text       *fldsep;
    4692             :     text       *null_string;
    4693             :     int         inputstring_len;
    4694             :     int         fldsep_len;
    4695             :     char       *start_ptr;
    4696             :     text       *result_text;
    4697             :     bool        is_null;
    4698          72 :     ArrayBuildState *astate = NULL;
    4699             : 
    4700             :     /* when input string is NULL, then result is NULL too */
    4701          72 :     if (PG_ARGISNULL(0))
    4702           4 :         PG_RETURN_NULL();
    4703             : 
    4704          68 :     inputstring = PG_GETARG_TEXT_PP(0);
    4705             : 
    4706             :     /* fldsep can be NULL */
    4707          68 :     if (!PG_ARGISNULL(1))
    4708          64 :         fldsep = PG_GETARG_TEXT_PP(1);
    4709             :     else
    4710           4 :         fldsep = NULL;
    4711             : 
    4712             :     /* null_string can be NULL or omitted */
    4713          68 :     if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
    4714          16 :         null_string = PG_GETARG_TEXT_PP(2);
    4715             :     else
    4716          52 :         null_string = NULL;
    4717             : 
    4718          68 :     if (fldsep != NULL)
    4719             :     {
    4720             :         /*
    4721             :          * Normal case with non-null fldsep.  Use the text_position machinery
    4722             :          * to search for occurrences of fldsep.
    4723             :          */
    4724             :         TextPositionState state;
    4725             : 
    4726          64 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4727          64 :         fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4728             : 
    4729             :         /* return empty array for empty input string */
    4730          64 :         if (inputstring_len < 1)
    4731          28 :             PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
    4732             : 
    4733             :         /*
    4734             :          * empty field separator: return the input string as a one-element
    4735             :          * array
    4736             :          */
    4737          60 :         if (fldsep_len < 1)
    4738             :         {
    4739             :             Datum       elems[1];
    4740             :             bool        nulls[1];
    4741             :             int         dims[1];
    4742             :             int         lbs[1];
    4743             : 
    4744             :             /* single element can be a NULL too */
    4745          20 :             is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false;
    4746             : 
    4747          20 :             elems[0] = PointerGetDatum(inputstring);
    4748          20 :             nulls[0] = is_null;
    4749          20 :             dims[0] = 1;
    4750          20 :             lbs[0] = 1;
    4751             :             /* XXX: this hardcodes assumptions about the text type */
    4752          20 :             PG_RETURN_ARRAYTYPE_P(construct_md_array(elems, nulls,
    4753             :                                                      1, dims, lbs,
    4754             :                                                      TEXTOID, -1, false, 'i'));
    4755             :         }
    4756             : 
    4757          40 :         text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
    4758             : 
    4759          40 :         start_ptr = VARDATA_ANY(inputstring);
    4760             : 
    4761             :         for (;;)
    4762         208 :         {
    4763             :             bool        found;
    4764             :             char       *end_ptr;
    4765             :             int         chunk_len;
    4766             : 
    4767         248 :             CHECK_FOR_INTERRUPTS();
    4768             : 
    4769         248 :             found = text_position_next(&state);
    4770         248 :             if (!found)
    4771             :             {
    4772             :                 /* fetch last field */
    4773          40 :                 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
    4774          40 :                 end_ptr = NULL; /* not used, but some compilers complain */
    4775             :             }
    4776             :             else
    4777             :             {
    4778             :                 /* fetch non-last field */
    4779         208 :                 end_ptr = text_position_get_match_ptr(&state);
    4780         208 :                 chunk_len = end_ptr - start_ptr;
    4781             :             }
    4782             : 
    4783             :             /* must build a temp text datum to pass to accumArrayResult */
    4784         248 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4785         248 :             is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
    4786             : 
    4787             :             /* stash away this field */
    4788         248 :             astate = accumArrayResult(astate,
    4789             :                                       PointerGetDatum(result_text),
    4790             :                                       is_null,
    4791             :                                       TEXTOID,
    4792             :                                       CurrentMemoryContext);
    4793             : 
    4794         248 :             pfree(result_text);
    4795             : 
    4796         248 :             if (!found)
    4797          40 :                 break;
    4798             : 
    4799         208 :             start_ptr = end_ptr + fldsep_len;
    4800             :         }
    4801             : 
    4802          40 :         text_position_cleanup(&state);
    4803             :     }
    4804             :     else
    4805             :     {
    4806             :         /*
    4807             :          * When fldsep is NULL, each character in the inputstring becomes an
    4808             :          * element in the result array.  The separator is effectively the
    4809             :          * space between characters.
    4810             :          */
    4811           4 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4812             : 
    4813             :         /* return empty array for empty input string */
    4814           4 :         if (inputstring_len < 1)
    4815           0 :             PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
    4816             : 
    4817           4 :         start_ptr = VARDATA_ANY(inputstring);
    4818             : 
    4819          28 :         while (inputstring_len > 0)
    4820             :         {
    4821          20 :             int         chunk_len = pg_mblen(start_ptr);
    4822             : 
    4823          20 :             CHECK_FOR_INTERRUPTS();
    4824             : 
    4825             :             /* must build a temp text datum to pass to accumArrayResult */
    4826          20 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4827          20 :             is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
    4828             : 
    4829             :             /* stash away this field */
    4830          20 :             astate = accumArrayResult(astate,
    4831             :                                       PointerGetDatum(result_text),
    4832             :                                       is_null,
    4833             :                                       TEXTOID,
    4834             :                                       CurrentMemoryContext);
    4835             : 
    4836          20 :             pfree(result_text);
    4837             : 
    4838          20 :             start_ptr += chunk_len;
    4839          20 :             inputstring_len -= chunk_len;
    4840             :         }
    4841             :     }
    4842             : 
    4843          44 :     PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
    4844             :                                           CurrentMemoryContext));
    4845             : }
    4846             : 
    4847             : /*
    4848             :  * array_to_text
    4849             :  * concatenate Cstring representation of input array elements
    4850             :  * using provided field separator
    4851             :  */
    4852             : Datum
    4853       34592 : array_to_text(PG_FUNCTION_ARGS)
    4854             : {
    4855       34592 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
    4856       34592 :     char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4857             : 
    4858       34592 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
    4859             : }
    4860             : 
    4861             : /*
    4862             :  * array_to_text_null
    4863             :  * concatenate Cstring representation of input array elements
    4864             :  * using provided field separator and null string
    4865             :  *
    4866             :  * This version is not strict so we have to test for null inputs explicitly.
    4867             :  */
    4868             : Datum
    4869           8 : array_to_text_null(PG_FUNCTION_ARGS)
    4870             : {
    4871             :     ArrayType  *v;
    4872             :     char       *fldsep;
    4873             :     char       *null_string;
    4874             : 
    4875             :     /* returns NULL when first or second parameter is NULL */
    4876           8 :     if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
    4877           0 :         PG_RETURN_NULL();
    4878             : 
    4879           8 :     v = PG_GETARG_ARRAYTYPE_P(0);
    4880           8 :     fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4881             : 
    4882             :     /* NULL null string is passed through as a null pointer */
    4883           8 :     if (!PG_ARGISNULL(2))
    4884           4 :         null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
    4885             :     else
    4886           4 :         null_string = NULL;
    4887             : 
    4888           8 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
    4889             : }
    4890             : 
    4891             : /*
    4892             :  * common code for array_to_text and array_to_text_null functions
    4893             :  */
    4894             : static text *
    4895       34612 : array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
    4896             :                        const char *fldsep, const char *null_string)
    4897             : {
    4898             :     text       *result;
    4899             :     int         nitems,
    4900             :                *dims,
    4901             :                 ndims;
    4902             :     Oid         element_type;
    4903             :     int         typlen;
    4904             :     bool        typbyval;
    4905             :     char        typalign;
    4906             :     StringInfoData buf;
    4907       34612 :     bool        printed = false;
    4908             :     char       *p;
    4909             :     bits8      *bitmap;
    4910             :     int         bitmask;
    4911             :     int         i;
    4912             :     ArrayMetaState *my_extra;
    4913             : 
    4914       34612 :     ndims = ARR_NDIM(v);
    4915       34612 :     dims = ARR_DIMS(v);
    4916       34612 :     nitems = ArrayGetNItems(ndims, dims);
    4917             : 
    4918             :     /* if there are no elements, return an empty string */
    4919       34612 :     if (nitems == 0)
    4920       20280 :         return cstring_to_text_with_len("", 0);
    4921             : 
    4922       14332 :     element_type = ARR_ELEMTYPE(v);
    4923       14332 :     initStringInfo(&buf);
    4924             : 
    4925             :     /*
    4926             :      * We arrange to look up info about element type, including its output
    4927             :      * conversion proc, only once per series of calls, assuming the element
    4928             :      * type doesn't change underneath us.
    4929             :      */
    4930       14332 :     my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    4931       14332 :     if (my_extra == NULL)
    4932             :     {
    4933         920 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    4934             :                                                       sizeof(ArrayMetaState));
    4935         920 :         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    4936         920 :         my_extra->element_type = ~element_type;
    4937             :     }
    4938             : 
    4939       14332 :     if (my_extra->element_type != element_type)
    4940             :     {
    4941             :         /*
    4942             :          * Get info about element type, including its output conversion proc
    4943             :          */
    4944         920 :         get_type_io_data(element_type, IOFunc_output,
    4945             :                          &my_extra->typlen, &my_extra->typbyval,
    4946             :                          &my_extra->typalign, &my_extra->typdelim,
    4947             :                          &my_extra->typioparam, &my_extra->typiofunc);
    4948         920 :         fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
    4949         920 :                       fcinfo->flinfo->fn_mcxt);
    4950         920 :         my_extra->element_type = element_type;
    4951             :     }
    4952       14332 :     typlen = my_extra->typlen;
    4953       14332 :     typbyval = my_extra->typbyval;
    4954       14332 :     typalign = my_extra->typalign;
    4955             : 
    4956       14332 :     p = ARR_DATA_PTR(v);
    4957       14332 :     bitmap = ARR_NULLBITMAP(v);
    4958       14332 :     bitmask = 1;
    4959             : 
    4960       49034 :     for (i = 0; i < nitems; i++)
    4961             :     {
    4962             :         Datum       itemvalue;
    4963             :         char       *value;
    4964             : 
    4965             :         /* Get source element, checking for NULL */
    4966       34702 :         if (bitmap && (*bitmap & bitmask) == 0)
    4967             :         {
    4968             :             /* if null_string is NULL, we just ignore null elements */
    4969          24 :             if (null_string != NULL)
    4970             :             {
    4971           4 :                 if (printed)
    4972           4 :                     appendStringInfo(&buf, "%s%s", fldsep, null_string);
    4973             :                 else
    4974           0 :                     appendStringInfoString(&buf, null_string);
    4975           4 :                 printed = true;
    4976             :             }
    4977             :         }
    4978             :         else
    4979             :         {
    4980       34690 :             itemvalue = fetch_att(p, typbyval, typlen);
    4981             : 
    4982       34690 :             value = OutputFunctionCall(&my_extra->proc, itemvalue);
    4983             : 
    4984       34690 :             if (printed)
    4985       20358 :                 appendStringInfo(&buf, "%s%s", fldsep, value);
    4986             :             else
    4987       14332 :                 appendStringInfoString(&buf, value);
    4988       34690 :             printed = true;
    4989             : 
    4990       34690 :             p = att_addlength_pointer(p, typlen, p);
    4991       34690 :             p = (char *) att_align_nominal(p, typalign);
    4992             :         }
    4993             : 
    4994             :         /* advance bitmap pointer if any */
    4995       34702 :         if (bitmap)
    4996             :         {
    4997          72 :             bitmask <<= 1;
    4998          72 :             if (bitmask == 0x100)
    4999             :             {
    5000           0 :                 bitmap++;
    5001           0 :                 bitmask = 1;
    5002             :             }
    5003             :         }
    5004             :     }
    5005             : 
    5006       14332 :     result = cstring_to_text_with_len(buf.data, buf.len);
    5007       14332 :     pfree(buf.data);
    5008             : 
    5009       14332 :     return result;
    5010             : }
    5011             : 
    5012             : #define HEXBASE 16
    5013             : /*
    5014             :  * Convert an int32 to a string containing a base 16 (hex) representation of
    5015             :  * the number.
    5016             :  */
    5017             : Datum
    5018        1324 : to_hex32(PG_FUNCTION_ARGS)
    5019             : {
    5020        1324 :     uint32      value = (uint32) PG_GETARG_INT32(0);
    5021             :     char       *ptr;
    5022        1324 :     const char *digits = "0123456789abcdef";
    5023             :     char        buf[32];        /* bigger than needed, but reasonable */
    5024             : 
    5025        1324 :     ptr = buf + sizeof(buf) - 1;
    5026        1324 :     *ptr = '\0';
    5027             : 
    5028             :     do
    5029             :     {
    5030        2472 :         *--ptr = digits[value % HEXBASE];
    5031        2472 :         value /= HEXBASE;
    5032        2472 :     } while (ptr > buf && value);
    5033             : 
    5034        1324 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
    5035             : }
    5036             : 
    5037             : /*
    5038             :  * Convert an int64 to a string containing a base 16 (hex) representation of
    5039             :  * the number.
    5040             :  */
    5041             : Datum
    5042           4 : to_hex64(PG_FUNCTION_ARGS)
    5043             : {
    5044           4 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    5045             :     char       *ptr;
    5046           4 :     const char *digits = "0123456789abcdef";
    5047             :     char        buf[32];        /* bigger than needed, but reasonable */
    5048             : 
    5049           4 :     ptr = buf + sizeof(buf) - 1;
    5050           4 :     *ptr = '\0';
    5051             : 
    5052             :     do
    5053             :     {
    5054          32 :         *--ptr = digits[value % HEXBASE];
    5055          32 :         value /= HEXBASE;
    5056          32 :     } while (ptr > buf && value);
    5057             : 
    5058           4 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
    5059             : }
    5060             : 
    5061             : /*
    5062             :  * Return the size of a datum, possibly compressed
    5063             :  *
    5064             :  * Works on any data type
    5065             :  */
    5066             : Datum
    5067         102 : pg_column_size(PG_FUNCTION_ARGS)
    5068             : {
    5069         102 :     Datum       value = PG_GETARG_DATUM(0);
    5070             :     int32       result;
    5071             :     int         typlen;
    5072             : 
    5073             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5074         102 :     if (fcinfo->flinfo->fn_extra == NULL)
    5075             :     {
    5076             :         /* Lookup the datatype of the supplied argument */
    5077         102 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5078             : 
    5079         102 :         typlen = get_typlen(argtypeid);
    5080         102 :         if (typlen == 0)        /* should not happen */
    5081           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5082             : 
    5083         102 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5084             :                                                       sizeof(int));
    5085         102 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5086             :     }
    5087             :     else
    5088           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5089             : 
    5090         102 :     if (typlen == -1)
    5091             :     {
    5092             :         /* varlena type, possibly toasted */
    5093         102 :         result = toast_datum_size(value);
    5094             :     }
    5095           0 :     else if (typlen == -2)
    5096             :     {
    5097             :         /* cstring */
    5098           0 :         result = strlen(DatumGetCString(value)) + 1;
    5099             :     }
    5100             :     else
    5101             :     {
    5102             :         /* ordinary fixed-width type */
    5103           0 :         result = typlen;
    5104             :     }
    5105             : 
    5106         102 :     PG_RETURN_INT32(result);
    5107             : }
    5108             : 
    5109             : /*
    5110             :  * string_agg - Concatenates values and returns string.
    5111             :  *
    5112             :  * Syntax: string_agg(value text, delimiter text) RETURNS text
    5113             :  *
    5114             :  * Note: Any NULL values are ignored. The first-call delimiter isn't
    5115             :  * actually used at all, and on subsequent calls the delimiter precedes
    5116             :  * the associated value.
    5117             :  */
    5118             : 
    5119             : /* subroutine to initialize state */
    5120             : static StringInfo
    5121         578 : makeStringAggState(FunctionCallInfo fcinfo)
    5122             : {
    5123             :     StringInfo  state;
    5124             :     MemoryContext aggcontext;
    5125             :     MemoryContext oldcontext;
    5126             : 
    5127         578 :     if (!AggCheckCallContext(fcinfo, &aggcontext))
    5128             :     {
    5129             :         /* cannot be called directly because of internal-type argument */
    5130           0 :         elog(ERROR, "string_agg_transfn called in non-aggregate context");
    5131             :     }
    5132             : 
    5133             :     /*
    5134             :      * Create state in aggregate context.  It'll stay there across subsequent
    5135             :      * calls.
    5136             :      */
    5137         578 :     oldcontext = MemoryContextSwitchTo(aggcontext);
    5138         578 :     state = makeStringInfo();
    5139         578 :     MemoryContextSwitchTo(oldcontext);
    5140             : 
    5141         578 :     return state;
    5142             : }
    5143             : 
    5144             : Datum
    5145      614166 : string_agg_transfn(PG_FUNCTION_ARGS)
    5146             : {
    5147             :     StringInfo  state;
    5148             : 
    5149      614166 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5150             : 
    5151             :     /* Append the value unless null. */
    5152      614166 :     if (!PG_ARGISNULL(1))
    5153             :     {
    5154             :         /* On the first time through, we ignore the delimiter. */
    5155      614134 :         if (state == NULL)
    5156         562 :             state = makeStringAggState(fcinfo);
    5157      613572 :         else if (!PG_ARGISNULL(2))
    5158      613572 :             appendStringInfoText(state, PG_GETARG_TEXT_PP(2));  /* delimiter */
    5159             : 
    5160      614134 :         appendStringInfoText(state, PG_GETARG_TEXT_PP(1));  /* value */
    5161             :     }
    5162             : 
    5163             :     /*
    5164             :      * The transition type for string_agg() is declared to be "internal",
    5165             :      * which is a pass-by-value type the same size as a pointer.
    5166             :      */
    5167      614166 :     PG_RETURN_POINTER(state);
    5168             : }
    5169             : 
    5170             : Datum
    5171         602 : string_agg_finalfn(PG_FUNCTION_ARGS)
    5172             : {
    5173             :     StringInfo  state;
    5174             : 
    5175             :     /* cannot be called directly because of internal-type argument */
    5176             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5177             : 
    5178         602 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5179             : 
    5180         602 :     if (state != NULL)
    5181         562 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
    5182             :     else
    5183          40 :         PG_RETURN_NULL();
    5184             : }
    5185             : 
    5186             : /*
    5187             :  * Prepare cache with fmgr info for the output functions of the datatypes of
    5188             :  * the arguments of a concat-like function, beginning with argument "argidx".
    5189             :  * (Arguments before that will have corresponding slots in the resulting
    5190             :  * FmgrInfo array, but we don't fill those slots.)
    5191             :  */
    5192             : static FmgrInfo *
    5193          24 : build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
    5194             : {
    5195             :     FmgrInfo   *foutcache;
    5196             :     int         i;
    5197             : 
    5198             :     /* We keep the info in fn_mcxt so it survives across calls */
    5199          24 :     foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5200          24 :                                                 PG_NARGS() * sizeof(FmgrInfo));
    5201             : 
    5202         120 :     for (i = argidx; i < PG_NARGS(); i++)
    5203             :     {
    5204             :         Oid         valtype;
    5205             :         Oid         typOutput;
    5206             :         bool        typIsVarlena;
    5207             : 
    5208          96 :         valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
    5209          96 :         if (!OidIsValid(valtype))
    5210           0 :             elog(ERROR, "could not determine data type of concat() input");
    5211             : 
    5212          96 :         getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
    5213          96 :         fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
    5214             :     }
    5215             : 
    5216          24 :     fcinfo->flinfo->fn_extra = foutcache;
    5217             : 
    5218          24 :     return foutcache;
    5219             : }
    5220             : 
    5221             : /*
    5222             :  * Implementation of both concat() and concat_ws().
    5223             :  *
    5224             :  * sepstr is the separator string to place between values.
    5225             :  * argidx identifies the first argument to concatenate (counting from zero);
    5226             :  * note that this must be constant across any one series of calls.
    5227             :  *
    5228             :  * Returns NULL if result should be NULL, else text value.
    5229             :  */
    5230             : static text *
    5231          44 : concat_internal(const char *sepstr, int argidx,
    5232             :                 FunctionCallInfo fcinfo)
    5233             : {
    5234             :     text       *result;
    5235             :     StringInfoData str;
    5236             :     FmgrInfo   *foutcache;
    5237          44 :     bool        first_arg = true;
    5238             :     int         i;
    5239             : 
    5240             :     /*
    5241             :      * concat(VARIADIC some-array) is essentially equivalent to
    5242             :      * array_to_text(), ie concat the array elements with the given separator.
    5243             :      * So we just pass the case off to that code.
    5244             :      */
    5245          44 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5246             :     {
    5247             :         ArrayType  *arr;
    5248             : 
    5249             :         /* Should have just the one argument */
    5250             :         Assert(argidx == PG_NARGS() - 1);
    5251             : 
    5252             :         /* concat(VARIADIC NULL) is defined as NULL */
    5253          20 :         if (PG_ARGISNULL(argidx))
    5254           8 :             return NULL;
    5255             : 
    5256             :         /*
    5257             :          * Non-null argument had better be an array.  We assume that any call
    5258             :          * context that could let get_fn_expr_variadic return true will have
    5259             :          * checked that a VARIADIC-labeled parameter actually is an array.  So
    5260             :          * it should be okay to just Assert that it's an array rather than
    5261             :          * doing a full-fledged error check.
    5262             :          */
    5263             :         Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
    5264             : 
    5265             :         /* OK, safe to fetch the array value */
    5266          12 :         arr = PG_GETARG_ARRAYTYPE_P(argidx);
    5267             : 
    5268             :         /*
    5269             :          * And serialize the array.  We tell array_to_text to ignore null
    5270             :          * elements, which matches the behavior of the loop below.
    5271             :          */
    5272          12 :         return array_to_text_internal(fcinfo, arr, sepstr, NULL);
    5273             :     }
    5274             : 
    5275             :     /* Normal case without explicit VARIADIC marker */
    5276          24 :     initStringInfo(&str);
    5277             : 
    5278             :     /* Get output function info, building it if first time through */
    5279          24 :     foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
    5280          24 :     if (foutcache == NULL)
    5281          24 :         foutcache = build_concat_foutcache(fcinfo, argidx);
    5282             : 
    5283         120 :     for (i = argidx; i < PG_NARGS(); i++)
    5284             :     {
    5285          96 :         if (!PG_ARGISNULL(i))
    5286             :         {
    5287          88 :             Datum       value = PG_GETARG_DATUM(i);
    5288             : 
    5289             :             /* add separator if appropriate */
    5290          88 :             if (first_arg)
    5291          24 :                 first_arg = false;
    5292             :             else
    5293          64 :                 appendStringInfoString(&str, sepstr);
    5294             : 
    5295             :             /* call the appropriate type output function, append the result */
    5296          88 :             appendStringInfoString(&str,
    5297          88 :                                    OutputFunctionCall(&foutcache[i], value));
    5298             :         }
    5299             :     }
    5300             : 
    5301          24 :     result = cstring_to_text_with_len(str.data, str.len);
    5302          24 :     pfree(str.data);
    5303             : 
    5304          24 :     return result;
    5305             : }
    5306             : 
    5307             : /*
    5308             :  * Concatenate all arguments. NULL arguments are ignored.
    5309             :  */
    5310             : Datum
    5311          20 : text_concat(PG_FUNCTION_ARGS)
    5312             : {
    5313             :     text       *result;
    5314             : 
    5315          20 :     result = concat_internal("", 0, fcinfo);
    5316          20 :     if (result == NULL)
    5317           4 :         PG_RETURN_NULL();
    5318          16 :     PG_RETURN_TEXT_P(result);
    5319             : }
    5320             : 
    5321             : /*
    5322             :  * Concatenate all but first argument value with separators. The first
    5323             :  * parameter is used as the separator. NULL arguments are ignored.
    5324             :  */
    5325             : Datum
    5326          28 : text_concat_ws(PG_FUNCTION_ARGS)
    5327             : {
    5328             :     char       *sep;
    5329             :     text       *result;
    5330             : 
    5331             :     /* return NULL when separator is NULL */
    5332          28 :     if (PG_ARGISNULL(0))
    5333           4 :         PG_RETURN_NULL();
    5334          24 :     sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
    5335             : 
    5336          24 :     result = concat_internal(sep, 1, fcinfo);
    5337          24 :     if (result == NULL)
    5338           4 :         PG_RETURN_NULL();
    5339          20 :     PG_RETURN_TEXT_P(result);
    5340             : }
    5341             : 
    5342             : /*
    5343             :  * Return first n characters in the string. When n is negative,
    5344             :  * return all but last |n| characters.
    5345             :  */
    5346             : Datum
    5347          44 : text_left(PG_FUNCTION_ARGS)
    5348             : {
    5349          44 :     int         n = PG_GETARG_INT32(1);
    5350             : 
    5351          44 :     if (n < 0)
    5352             :     {
    5353          20 :         text       *str = PG_GETARG_TEXT_PP(0);
    5354          20 :         const char *p = VARDATA_ANY(str);
    5355          20 :         int         len = VARSIZE_ANY_EXHDR(str);
    5356             :         int         rlen;
    5357             : 
    5358          20 :         n = pg_mbstrlen_with_len(p, len) + n;
    5359          20 :         rlen = pg_mbcharcliplen(p, len, n);
    5360          20 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
    5361             :     }
    5362             :     else
    5363          24 :         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
    5364             : }
    5365             : 
    5366             : /*
    5367             :  * Return last n characters in the string. When n is negative,
    5368             :  * return all but first |n| characters.
    5369             :  */
    5370             : Datum
    5371          44 : text_right(PG_FUNCTION_ARGS)
    5372             : {
    5373          44 :     text       *str = PG_GETARG_TEXT_PP(0);
    5374          44 :     const char *p = VARDATA_ANY(str);
    5375          44 :     int         len = VARSIZE_ANY_EXHDR(str);
    5376          44 :     int         n = PG_GETARG_INT32(1);
    5377             :     int         off;
    5378             : 
    5379          44 :     if (n < 0)
    5380          20 :         n = -n;
    5381             :     else
    5382          24 :         n = pg_mbstrlen_with_len(p, len) - n;
    5383          44 :     off = pg_mbcharcliplen(p, len, n);
    5384             : 
    5385          44 :     PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
    5386             : }
    5387             : 
    5388             : /*
    5389             :  * Return reversed string
    5390             :  */
    5391             : Datum
    5392           4 : text_reverse(PG_FUNCTION_ARGS)
    5393             : {
    5394           4 :     text       *str = PG_GETARG_TEXT_PP(0);
    5395           4 :     const char *p = VARDATA_ANY(str);
    5396           4 :     int         len = VARSIZE_ANY_EXHDR(str);
    5397           4 :     const char *endp = p + len;
    5398             :     text       *result;
    5399             :     char       *dst;
    5400             : 
    5401           4 :     result = palloc(len + VARHDRSZ);
    5402           4 :     dst = (char *) VARDATA(result) + len;
    5403           4 :     SET_VARSIZE(result, len + VARHDRSZ);
    5404             : 
    5405           4 :     if (pg_database_encoding_max_length() > 1)
    5406             :     {
    5407             :         /* multibyte version */
    5408          28 :         while (p < endp)
    5409             :         {
    5410             :             int         sz;
    5411             : 
    5412          20 :             sz = pg_mblen(p);
    5413          20 :             dst -= sz;
    5414          20 :             memcpy(dst, p, sz);
    5415          20 :             p += sz;
    5416             :         }
    5417             :     }
    5418             :     else
    5419             :     {
    5420             :         /* single byte version */
    5421           0 :         while (p < endp)
    5422           0 :             *(--dst) = *p++;
    5423             :     }
    5424             : 
    5425           4 :     PG_RETURN_TEXT_P(result);
    5426             : }
    5427             : 
    5428             : 
    5429             : /*
    5430             :  * Support macros for text_format()
    5431             :  */
    5432             : #define TEXT_FORMAT_FLAG_MINUS  0x0001  /* is minus flag present? */
    5433             : 
    5434             : #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
    5435             :     do { \
    5436             :         if (++(ptr) >= (end_ptr)) \
    5437             :             ereport(ERROR, \
    5438             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
    5439             :                      errmsg("unterminated format() type specifier"), \
    5440             :                      errhint("For a single \"%%\" use \"%%%%\"."))); \
    5441             :     } while (0)
    5442             : 
    5443             : /*
    5444             :  * Returns a formatted string
    5445             :  */
    5446             : Datum
    5447        7500 : text_format(PG_FUNCTION_ARGS)
    5448             : {
    5449             :     text       *fmt;
    5450             :     StringInfoData str;
    5451             :     const char *cp;
    5452             :     const char *start_ptr;
    5453             :     const char *end_ptr;
    5454             :     text       *result;
    5455             :     int         arg;
    5456             :     bool        funcvariadic;
    5457             :     int         nargs;
    5458        7500 :     Datum      *elements = NULL;
    5459        7500 :     bool       *nulls = NULL;
    5460        7500 :     Oid         element_type = InvalidOid;
    5461        7500 :     Oid         prev_type = InvalidOid;
    5462        7500 :     Oid         prev_width_type = InvalidOid;
    5463             :     FmgrInfo    typoutputfinfo;
    5464             :     FmgrInfo    typoutputinfo_width;
    5465             : 
    5466             :     /* When format string is null, immediately return null */
    5467        7500 :     if (PG_ARGISNULL(0))
    5468           4 :         PG_RETURN_NULL();
    5469             : 
    5470             :     /* If argument is marked VARIADIC, expand array into elements */
    5471        7496 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5472             :     {
    5473             :         ArrayType  *arr;
    5474             :         int16       elmlen;
    5475             :         bool        elmbyval;
    5476             :         char        elmalign;
    5477             :         int         nitems;
    5478             : 
    5479             :         /* Should have just the one argument */
    5480             :         Assert(PG_NARGS() == 2);
    5481             : 
    5482             :         /* If argument is NULL, we treat it as zero-length array */
    5483          32 :         if (PG_ARGISNULL(1))
    5484           4 :             nitems = 0;
    5485             :         else
    5486             :         {
    5487             :             /*
    5488             :              * Non-null argument had better be an array.  We assume that any
    5489             :              * call context that could let get_fn_expr_variadic return true
    5490             :              * will have checked that a VARIADIC-labeled parameter actually is
    5491             :              * an array.  So it should be okay to just Assert that it's an
    5492             :              * array rather than doing a full-fledged error check.
    5493             :              */
    5494             :             Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
    5495             : 
    5496             :             /* OK, safe to fetch the array value */
    5497          28 :             arr = PG_GETARG_ARRAYTYPE_P(1);
    5498             : 
    5499             :             /* Get info about array element type */
    5500          28 :             element_type = ARR_ELEMTYPE(arr);
    5501          28 :             get_typlenbyvalalign(element_type,
    5502             :                                  &elmlen, &elmbyval, &elmalign);
    5503             : 
    5504             :             /* Extract all array elements */
    5505          28 :             deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
    5506             :                               &elements, &nulls, &nitems);
    5507             :         }
    5508             : 
    5509          32 :         nargs = nitems + 1;
    5510          32 :         funcvariadic = true;
    5511             :     }
    5512             :     else
    5513             :     {
    5514             :         /* Non-variadic case, we'll process the arguments individually */
    5515        7464 :         nargs = PG_NARGS();
    5516        7464 :         funcvariadic = false;
    5517             :     }
    5518             : 
    5519             :     /* Setup for main loop. */
    5520        7496 :     fmt = PG_GETARG_TEXT_PP(0);
    5521        7496 :     start_ptr = VARDATA_ANY(fmt);
    5522        7496 :     end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
    5523        7496 :     initStringInfo(&str);
    5524        7496 :     arg = 1;                    /* next argument position to print */
    5525             : 
    5526             :     /* Scan format string, looking for conversion specifiers. */
    5527      238866 :     for (cp = start_ptr; cp < end_ptr; cp++)
    5528             :     {
    5529             :         int         argpos;
    5530             :         int         widthpos;
    5531             :         int         flags;
    5532             :         int         width;
    5533             :         Datum       value;
    5534             :         bool        isNull;
    5535             :         Oid         typid;
    5536             : 
    5537             :         /*
    5538             :          * If it's not the start of a conversion specifier, just copy it to
    5539             :          * the output buffer.
    5540             :          */
    5541      231410 :         if (*cp != '%')
    5542             :         {
    5543      214666 :             appendStringInfoCharMacro(&str, *cp);
    5544      429344 :             continue;
    5545             :         }
    5546             : 
    5547       16744 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5548             : 
    5549             :         /* Easy case: %% outputs a single % */
    5550       16744 :         if (*cp == '%')
    5551             :         {
    5552          12 :             appendStringInfoCharMacro(&str, *cp);
    5553          12 :             continue;
    5554             :         }
    5555             : 
    5556             :         /* Parse the optional portions of the format specifier */
    5557       16732 :         cp = text_format_parse_format(cp, end_ptr,
    5558             :                                       &argpos, &widthpos,
    5559             :                                       &flags, &width);
    5560             : 
    5561             :         /*
    5562             :          * Next we should see the main conversion specifier.  Whether or not
    5563             :          * an argument position was present, it's known that at least one
    5564             :          * character remains in the string at this point.  Experience suggests
    5565             :          * that it's worth checking that that character is one of the expected
    5566             :          * ones before we try to fetch arguments, so as to produce the least
    5567             :          * confusing response to a mis-formatted specifier.
    5568             :          */
    5569       16716 :         if (strchr("sIL", *cp) == NULL)
    5570           4 :             ereport(ERROR,
    5571             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5572             :                      errmsg("unrecognized format() type specifier \"%c\"",
    5573             :                             *cp),
    5574             :                      errhint("For a single \"%%\" use \"%%%%\".")));
    5575             : 
    5576             :         /* If indirect width was specified, get its value */
    5577       16712 :         if (widthpos >= 0)
    5578             :         {
    5579             :             /* Collect the specified or next argument position */
    5580          28 :             if (widthpos > 0)
    5581          24 :                 arg = widthpos;
    5582          28 :             if (arg >= nargs)
    5583           0 :                 ereport(ERROR,
    5584             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5585             :                          errmsg("too few arguments for format()")));
    5586             : 
    5587             :             /* Get the value and type of the selected argument */
    5588          28 :             if (!funcvariadic)
    5589             :             {
    5590          28 :                 value = PG_GETARG_DATUM(arg);
    5591          28 :                 isNull = PG_ARGISNULL(arg);
    5592          28 :                 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5593             :             }
    5594             :             else
    5595             :             {
    5596           0 :                 value = elements[arg - 1];
    5597           0 :                 isNull = nulls[arg - 1];
    5598           0 :                 typid = element_type;
    5599             :             }
    5600          28 :             if (!OidIsValid(typid))
    5601           0 :                 elog(ERROR, "could not determine data type of format() input");
    5602             : 
    5603          28 :             arg++;
    5604             : 
    5605             :             /* We can treat NULL width the same as zero */
    5606          28 :             if (isNull)
    5607           4 :                 width = 0;
    5608          24 :             else if (typid == INT4OID)
    5609          24 :                 width = DatumGetInt32(value);
    5610           0 :             else if (typid == INT2OID)
    5611           0 :                 width = DatumGetInt16(value);
    5612             :             else
    5613             :             {
    5614             :                 /* For less-usual datatypes, convert to text then to int */
    5615             :                 char       *str;
    5616             : 
    5617           0 :                 if (typid != prev_width_type)
    5618             :                 {
    5619             :                     Oid         typoutputfunc;
    5620             :                     bool        typIsVarlena;
    5621             : 
    5622           0 :                     getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5623           0 :                     fmgr_info(typoutputfunc, &typoutputinfo_width);
    5624           0 :                     prev_width_type = typid;
    5625             :                 }
    5626             : 
    5627           0 :                 str = OutputFunctionCall(&typoutputinfo_width, value);
    5628             : 
    5629             :                 /* pg_strtoint32 will complain about bad data or overflow */
    5630           0 :                 width = pg_strtoint32(str);
    5631             : 
    5632           0 :                 pfree(str);
    5633             :             }
    5634             :         }
    5635             : 
    5636             :         /* Collect the specified or next argument position */
    5637       16712 :         if (argpos > 0)
    5638          88 :             arg = argpos;
    5639       16712 :         if (arg >= nargs)
    5640          16 :             ereport(ERROR,
    5641             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5642             :                      errmsg("too few arguments for format()")));
    5643             : 
    5644             :         /* Get the value and type of the selected argument */
    5645       16696 :         if (!funcvariadic)
    5646             :         {
    5647       15848 :             value = PG_GETARG_DATUM(arg);
    5648       15848 :             isNull = PG_ARGISNULL(arg);
    5649       15848 :             typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5650             :         }
    5651             :         else
    5652             :         {
    5653         848 :             value = elements[arg - 1];
    5654         848 :             isNull = nulls[arg - 1];
    5655         848 :             typid = element_type;
    5656             :         }
    5657       16696 :         if (!OidIsValid(typid))
    5658           0 :             elog(ERROR, "could not determine data type of format() input");
    5659             : 
    5660       16696 :         arg++;
    5661             : 
    5662             :         /*
    5663             :          * Get the appropriate typOutput function, reusing previous one if
    5664             :          * same type as previous argument.  That's particularly useful in the
    5665             :          * variadic-array case, but often saves work even for ordinary calls.
    5666             :          */
    5667       16696 :         if (typid != prev_type)
    5668             :         {
    5669             :             Oid         typoutputfunc;
    5670             :             bool        typIsVarlena;
    5671             : 
    5672        8484 :             getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5673        8484 :             fmgr_info(typoutputfunc, &typoutputfinfo);
    5674        8484 :             prev_type = typid;
    5675             :         }
    5676             : 
    5677             :         /*
    5678             :          * And now we can format the value.
    5679             :          */
    5680       16696 :         switch (*cp)
    5681             :         {
    5682             :             case 's':
    5683             :             case 'I':
    5684             :             case 'L':
    5685       16696 :                 text_format_string_conversion(&str, *cp, &typoutputfinfo,
    5686             :                                               value, isNull,
    5687             :                                               flags, width);
    5688       16692 :                 break;
    5689             :             default:
    5690             :                 /* should not get here, because of previous check */
    5691           0 :                 ereport(ERROR,
    5692             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5693             :                          errmsg("unrecognized format() type specifier \"%c\"",
    5694             :                                 *cp),
    5695             :                          errhint("For a single \"%%\" use \"%%%%\".")));
    5696             :                 break;
    5697             :         }
    5698             :     }
    5699             : 
    5700             :     /* Don't need deconstruct_array results anymore. */
    5701        7456 :     if (elements != NULL)
    5702          28 :         pfree(elements);
    5703        7456 :     if (nulls != NULL)
    5704          28 :         pfree(nulls);
    5705             : 
    5706             :     /* Generate results. */
    5707        7456 :     result = cstring_to_text_with_len(str.data, str.len);
    5708        7456 :     pfree(str.data);
    5709             : 
    5710        7456 :     PG_RETURN_TEXT_P(result);
    5711             : }
    5712             : 
    5713             : /*
    5714             :  * Parse contiguous digits as a decimal number.
    5715             :  *
    5716             :  * Returns true if some digits could be parsed.
    5717             :  * The value is returned into *value, and *ptr is advanced to the next
    5718             :  * character to be parsed.
    5719             :  *
    5720             :  * Note parsing invariant: at least one character is known available before
    5721             :  * string end (end_ptr) at entry, and this is still true at exit.
    5722             :  */
    5723             : static bool
    5724       33440 : text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
    5725             : {
    5726       33440 :     bool        found = false;
    5727       33440 :     const char *cp = *ptr;
    5728       33440 :     int         val = 0;
    5729             : 
    5730       67088 :     while (*cp >= '0' && *cp <= '9')
    5731             :     {
    5732         212 :         int8        digit = (*cp - '0');
    5733             : 
    5734         424 :         if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
    5735         212 :             unlikely(pg_add_s32_overflow(val, digit, &val)))
    5736           0 :             ereport(ERROR,
    5737             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    5738             :                      errmsg("number is out of range")));
    5739         212 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5740         208 :         found = true;
    5741             :     }
    5742             : 
    5743       33436 :     *ptr = cp;
    5744       33436 :     *value = val;
    5745             : 
    5746       33436 :     return found;
    5747             : }
    5748             : 
    5749             : /*
    5750             :  * Parse a format specifier (generally following the SUS printf spec).
    5751             :  *
    5752             :  * We have already advanced over the initial '%', and we are looking for
    5753             :  * [argpos][flags][width]type (but the type character is not consumed here).
    5754             :  *
    5755             :  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
    5756             :  * Output parameters:
    5757             :  *  argpos: argument position for value to be printed.  -1 means unspecified.
    5758             :  *  widthpos: argument position for width.  Zero means the argument position
    5759             :  *          was unspecified (ie, take the next arg) and -1 means no width
    5760             :  *          argument (width was omitted or specified as a constant).
    5761             :  *  flags: bitmask of flags.
    5762             :  *  width: directly-specified width value.  Zero means the width was omitted
    5763             :  *          (note it's not necessary to distinguish this case from an explicit
    5764             :  *          zero width value).
    5765             :  *
    5766             :  * The function result is the next character position to be parsed, ie, the
    5767             :  * location where the type character is/should be.
    5768             :  *
    5769             :  * Note parsing invariant: at least one character is known available before
    5770             :  * string end (end_ptr) at entry, and this is still true at exit.
    5771             :  */
    5772             : static const char *
    5773       16732 : text_format_parse_format(const char *start_ptr, const char *end_ptr,
    5774             :                          int *argpos, int *widthpos,
    5775             :                          int *flags, int *width)
    5776             : {
    5777       16732 :     const char *cp = start_ptr;
    5778             :     int         n;
    5779             : 
    5780             :     /* set defaults for output parameters */
    5781       16732 :     *argpos = -1;
    5782       16732 :     *widthpos = -1;
    5783       16732 :     *flags = 0;
    5784       16732 :     *width = 0;
    5785             : 
    5786             :     /* try to identify first number */
    5787       16732 :     if (text_format_parse_digits(&cp, end_ptr, &n))
    5788             :     {
    5789         116 :         if (*cp != '$')
    5790             :         {
    5791             :             /* Must be just a width and a type, so we're done */
    5792          16 :             *width = n;
    5793          16 :             return cp;
    5794             :         }
    5795             :         /* The number was argument position */
    5796         100 :         *argpos = n;
    5797             :         /* Explicit 0 for argument index is immediately refused */
    5798         100 :         if (n == 0)
    5799           4 :             ereport(ERROR,
    5800             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5801             :                      errmsg("format specifies argument 0, but arguments are numbered from 1")));
    5802          96 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5803             :     }
    5804             : 
    5805             :     /* Handle flags (only minus is supported now) */
    5806       33436 :     while (*cp == '-')
    5807             :     {
    5808          20 :         *flags |= TEXT_FORMAT_FLAG_MINUS;
    5809          20 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5810             :     }
    5811             : 
    5812       16708 :     if (*cp == '*')
    5813             :     {
    5814             :         /* Handle indirect width */
    5815          32 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5816          32 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    5817             :         {
    5818             :             /* number in this position must be closed by $ */
    5819          28 :             if (*cp != '$')
    5820           0 :                 ereport(ERROR,
    5821             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5822             :                          errmsg("width argument position must be ended by \"$\"")));
    5823             :             /* The number was width argument position */
    5824          28 :             *widthpos = n;
    5825             :             /* Explicit 0 for argument index is immediately refused */
    5826          28 :             if (n == 0)
    5827           4 :                 ereport(ERROR,
    5828             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5829             :                          errmsg("format specifies argument 0, but arguments are numbered from 1")));
    5830          24 :             ADVANCE_PARSE_POINTER(cp, end_ptr);
    5831             :         }
    5832             :         else
    5833           4 :             *widthpos = 0;      /* width's argument position is unspecified */
    5834             :     }
    5835             :     else
    5836             :     {
    5837             :         /* Check for direct width specification */
    5838       16676 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    5839          20 :             *width = n;
    5840             :     }
    5841             : 
    5842             :     /* cp should now be pointing at type character */
    5843       16700 :     return cp;
    5844             : }
    5845             : 
    5846             : /*
    5847             :  * Format a %s, %I, or %L conversion
    5848             :  */
    5849             : static void
    5850       16696 : text_format_string_conversion(StringInfo buf, char conversion,
    5851             :                               FmgrInfo *typOutputInfo,
    5852             :                               Datum value, bool isNull,
    5853             :                               int flags, int width)
    5854             : {
    5855             :     char       *str;
    5856             : 
    5857             :     /* Handle NULL arguments before trying to stringify the value. */
    5858       16696 :     if (isNull)
    5859             :     {
    5860          44 :         if (conversion == 's')
    5861          12 :             text_format_append_string(buf, "", flags, width);
    5862          32 :         else if (conversion == 'L')
    5863          28 :             text_format_append_string(buf, "NULL", flags, width);
    5864           4 :         else if (conversion == 'I')
    5865           4 :             ereport(ERROR,
    5866             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    5867             :                      errmsg("null values cannot be formatted as an SQL identifier")));
    5868          40 :         return;
    5869             :     }
    5870             : 
    5871             :     /* Stringify. */
    5872       16652 :     str = OutputFunctionCall(typOutputInfo, value);
    5873             : 
    5874             :     /* Escape. */
    5875       16652 :     if (conversion == 'I')
    5876             :     {
    5877             :         /* quote_identifier may or may not allocate a new string. */
    5878        1104 :         text_format_append_string(buf, quote_identifier(str), flags, width);
    5879             :     }
    5880       15548 :     else if (conversion == 'L')
    5881             :     {
    5882        1018 :         char       *qstr = quote_literal_cstr(str);
    5883             : 
    5884        1018 :         text_format_append_string(buf, qstr, flags, width);
    5885             :         /* quote_literal_cstr() always allocates a new string */
    5886        1018 :         pfree(qstr);
    5887             :     }
    5888             :     else
    5889       14530 :         text_format_append_string(buf, str, flags, width);
    5890             : 
    5891             :     /* Cleanup. */
    5892       16652 :     pfree(str);
    5893             : }
    5894             : 
    5895             : /*
    5896             :  * Append str to buf, padding as directed by flags/width
    5897             :  */
    5898             : static void
    5899       16692 : text_format_append_string(StringInfo buf, const char *str,
    5900             :                           int flags, int width)
    5901             : {
    5902       16692 :     bool        align_to_left = false;
    5903             :     int         len;
    5904             : 
    5905             :     /* fast path for typical easy case */
    5906       16692 :     if (width == 0)
    5907             :     {
    5908       16636 :         appendStringInfoString(buf, str);
    5909       16636 :         return;
    5910             :     }
    5911             : 
    5912          56 :     if (width < 0)
    5913             :     {
    5914             :         /* Negative width: implicit '-' flag, then take absolute value */
    5915           4 :         align_to_left = true;
    5916             :         /* -INT_MIN is undefined */
    5917           4 :         if (width <= INT_MIN)
    5918           0 :             ereport(ERROR,
    5919             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    5920             :                      errmsg("number is out of range")));
    5921           4 :         width = -width;
    5922             :     }
    5923          52 :     else if (flags & TEXT_FORMAT_FLAG_MINUS)
    5924          16 :         align_to_left = true;
    5925             : 
    5926          56 :     len = pg_mbstrlen(str);
    5927          56 :     if (align_to_left)
    5928             :     {
    5929             :         /* left justify */
    5930          20 :         appendStringInfoString(buf, str);
    5931          20 :         if (len < width)
    5932          20 :             appendStringInfoSpaces(buf, width - len);
    5933             :     }
    5934             :     else
    5935             :     {
    5936             :         /* right justify */
    5937          36 :         if (len < width)
    5938          36 :             appendStringInfoSpaces(buf, width - len);
    5939          36 :         appendStringInfoString(buf, str);
    5940             :     }
    5941             : }
    5942             : 
    5943             : /*
    5944             :  * text_format_nv - nonvariadic wrapper for text_format function.
    5945             :  *
    5946             :  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
    5947             :  * which checks that all built-in functions that share the implementing C
    5948             :  * function take the same number of arguments.
    5949             :  */
    5950             : Datum
    5951          20 : text_format_nv(PG_FUNCTION_ARGS)
    5952             : {
    5953          20 :     return text_format(fcinfo);
    5954             : }
    5955             : 
    5956             : /*
    5957             :  * Helper function for Levenshtein distance functions. Faster than memcmp(),
    5958             :  * for this use case.
    5959             :  */
    5960             : static inline bool
    5961           0 : rest_of_char_same(const char *s1, const char *s2, int len)
    5962             : {
    5963           0 :     while (len > 0)
    5964             :     {
    5965           0 :         len--;
    5966           0 :         if (s1[len] != s2[len])
    5967           0 :             return false;
    5968             :     }
    5969           0 :     return true;
    5970             : }
    5971             : 
    5972             : /* Expand each Levenshtein distance variant */
    5973             : #include "levenshtein.c"
    5974             : #define LEVENSHTEIN_LESS_EQUAL
    5975             : #include "levenshtein.c"

Generated by: LCOV version 1.13