LCOV - code coverage report
Current view: top level - src/backend/utils/adt - varlena.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 1637 1945 84.2 %
Date: 2020-06-03 11:07:14 Functions: 132 148 89.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * varlena.c
       4             :  *    Functions for the variable-length built-in types.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/utils/adt/varlena.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <ctype.h>
      18             : #include <limits.h>
      19             : 
      20             : #include "access/detoast.h"
      21             : #include "catalog/pg_collation.h"
      22             : #include "catalog/pg_type.h"
      23             : #include "common/hashfn.h"
      24             : #include "common/int.h"
      25             : #include "common/unicode_norm.h"
      26             : #include "lib/hyperloglog.h"
      27             : #include "libpq/pqformat.h"
      28             : #include "miscadmin.h"
      29             : #include "parser/scansup.h"
      30             : #include "port/pg_bswap.h"
      31             : #include "regex/regex.h"
      32             : #include "utils/builtins.h"
      33             : #include "utils/bytea.h"
      34             : #include "utils/lsyscache.h"
      35             : #include "utils/memutils.h"
      36             : #include "utils/pg_locale.h"
      37             : #include "utils/sortsupport.h"
      38             : #include "utils/varlena.h"
      39             : 
      40             : 
      41             : /* GUC variable */
      42             : int         bytea_output = BYTEA_OUTPUT_HEX;
      43             : 
      44             : typedef struct varlena unknown;
      45             : typedef struct varlena VarString;
      46             : 
      47             : /*
      48             :  * State for text_position_* functions.
      49             :  */
      50             : typedef struct
      51             : {
      52             :     bool        is_multibyte;   /* T if multibyte encoding */
      53             :     bool        is_multibyte_char_in_char;
      54             : 
      55             :     char       *str1;           /* haystack string */
      56             :     char       *str2;           /* needle string */
      57             :     int         len1;           /* string lengths in bytes */
      58             :     int         len2;
      59             : 
      60             :     /* Skip table for Boyer-Moore-Horspool search algorithm: */
      61             :     int         skiptablemask;  /* mask for ANDing with skiptable subscripts */
      62             :     int         skiptable[256]; /* skip distance for given mismatched char */
      63             : 
      64             :     char       *last_match;     /* pointer to last match in 'str1' */
      65             : 
      66             :     /*
      67             :      * Sometimes we need to convert the byte position of a match to a
      68             :      * character position.  These store the last position that was converted,
      69             :      * so that on the next call, we can continue from that point, rather than
      70             :      * count characters from the very beginning.
      71             :      */
      72             :     char       *refpoint;       /* pointer within original haystack string */
      73             :     int         refpos;         /* 0-based character offset of the same point */
      74             : } TextPositionState;
      75             : 
      76             : typedef struct
      77             : {
      78             :     char       *buf1;           /* 1st string, or abbreviation original string
      79             :                                  * buf */
      80             :     char       *buf2;           /* 2nd string, or abbreviation strxfrm() buf */
      81             :     int         buflen1;
      82             :     int         buflen2;
      83             :     int         last_len1;      /* Length of last buf1 string/strxfrm() input */
      84             :     int         last_len2;      /* Length of last buf2 string/strxfrm() blob */
      85             :     int         last_returned;  /* Last comparison result (cache) */
      86             :     bool        cache_blob;     /* Does buf2 contain strxfrm() blob, etc? */
      87             :     bool        collate_c;
      88             :     Oid         typid;          /* Actual datatype (text/bpchar/bytea/name) */
      89             :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
      90             :     hyperLogLogState full_card; /* Full key cardinality state */
      91             :     double      prop_card;      /* Required cardinality proportion */
      92             :     pg_locale_t locale;
      93             : } VarStringSortSupport;
      94             : 
      95             : /*
      96             :  * This should be large enough that most strings will fit, but small enough
      97             :  * that we feel comfortable putting it on the stack
      98             :  */
      99             : #define TEXTBUFLEN      1024
     100             : 
     101             : #define DatumGetUnknownP(X)         ((unknown *) PG_DETOAST_DATUM(X))
     102             : #define DatumGetUnknownPCopy(X)     ((unknown *) PG_DETOAST_DATUM_COPY(X))
     103             : #define PG_GETARG_UNKNOWN_P(n)      DatumGetUnknownP(PG_GETARG_DATUM(n))
     104             : #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
     105             : #define PG_RETURN_UNKNOWN_P(x)      PG_RETURN_POINTER(x)
     106             : 
     107             : #define DatumGetVarStringP(X)       ((VarString *) PG_DETOAST_DATUM(X))
     108             : #define DatumGetVarStringPP(X)      ((VarString *) PG_DETOAST_DATUM_PACKED(X))
     109             : 
     110             : static int  varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
     111             : static int  bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
     112             : static int  namefastcmp_c(Datum x, Datum y, SortSupport ssup);
     113             : static int  varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
     114             : static int  namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
     115             : static int  varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
     116             : static int  varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
     117             : static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
     118             : static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
     119             : static int32 text_length(Datum str);
     120             : static text *text_catenate(text *t1, text *t2);
     121             : static text *text_substring(Datum str,
     122             :                             int32 start,
     123             :                             int32 length,
     124             :                             bool length_not_specified);
     125             : static text *text_overlay(text *t1, text *t2, int sp, int sl);
     126             : static int  text_position(text *t1, text *t2, Oid collid);
     127             : static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
     128             : static bool text_position_next(TextPositionState *state);
     129             : static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
     130             : static char *text_position_get_match_ptr(TextPositionState *state);
     131             : static int  text_position_get_match_pos(TextPositionState *state);
     132             : static void text_position_cleanup(TextPositionState *state);
     133             : static void check_collation_set(Oid collid);
     134             : static int  text_cmp(text *arg1, text *arg2, Oid collid);
     135             : static bytea *bytea_catenate(bytea *t1, bytea *t2);
     136             : static bytea *bytea_substring(Datum str,
     137             :                               int S,
     138             :                               int L,
     139             :                               bool length_not_specified);
     140             : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
     141             : static void appendStringInfoText(StringInfo str, const text *t);
     142             : static Datum text_to_array_internal(PG_FUNCTION_ARGS);
     143             : static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
     144             :                                     const char *fldsep, const char *null_string);
     145             : static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
     146             : static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
     147             :                                      int *value);
     148             : static const char *text_format_parse_format(const char *start_ptr,
     149             :                                             const char *end_ptr,
     150             :                                             int *argpos, int *widthpos,
     151             :                                             int *flags, int *width);
     152             : static void text_format_string_conversion(StringInfo buf, char conversion,
     153             :                                           FmgrInfo *typOutputInfo,
     154             :                                           Datum value, bool isNull,
     155             :                                           int flags, int width);
     156             : static void text_format_append_string(StringInfo buf, const char *str,
     157             :                                       int flags, int width);
     158             : 
     159             : 
     160             : /*****************************************************************************
     161             :  *   CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                          *
     162             :  *****************************************************************************/
     163             : 
     164             : /*
     165             :  * cstring_to_text
     166             :  *
     167             :  * Create a text value from a null-terminated C string.
     168             :  *
     169             :  * The new text value is freshly palloc'd with a full-size VARHDR.
     170             :  */
     171             : text *
     172     6232824 : cstring_to_text(const char *s)
     173             : {
     174     6232824 :     return cstring_to_text_with_len(s, strlen(s));
     175             : }
     176             : 
     177             : /*
     178             :  * cstring_to_text_with_len
     179             :  *
     180             :  * Same as cstring_to_text except the caller specifies the string length;
     181             :  * the string need not be null_terminated.
     182             :  */
     183             : text *
     184     9584582 : cstring_to_text_with_len(const char *s, int len)
     185             : {
     186     9584582 :     text       *result = (text *) palloc(len + VARHDRSZ);
     187             : 
     188     9584582 :     SET_VARSIZE(result, len + VARHDRSZ);
     189     9584582 :     memcpy(VARDATA(result), s, len);
     190             : 
     191     9584582 :     return result;
     192             : }
     193             : 
     194             : /*
     195             :  * text_to_cstring
     196             :  *
     197             :  * Create a palloc'd, null-terminated C string from a text value.
     198             :  *
     199             :  * We support being passed a compressed or toasted text value.
     200             :  * This is a bit bogus since such values shouldn't really be referred to as
     201             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     202             :  * case here, we'd need another routine that did, anyway.
     203             :  */
     204             : char *
     205     4634534 : text_to_cstring(const text *t)
     206             : {
     207             :     /* must cast away the const, unfortunately */
     208     4634534 :     text       *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
     209     4634534 :     int         len = VARSIZE_ANY_EXHDR(tunpacked);
     210             :     char       *result;
     211             : 
     212     4634534 :     result = (char *) palloc(len + 1);
     213     4634534 :     memcpy(result, VARDATA_ANY(tunpacked), len);
     214     4634534 :     result[len] = '\0';
     215             : 
     216     4634534 :     if (tunpacked != t)
     217       65174 :         pfree(tunpacked);
     218             : 
     219     4634534 :     return result;
     220             : }
     221             : 
     222             : /*
     223             :  * text_to_cstring_buffer
     224             :  *
     225             :  * Copy a text value into a caller-supplied buffer of size dst_len.
     226             :  *
     227             :  * The text string is truncated if necessary to fit.  The result is
     228             :  * guaranteed null-terminated (unless dst_len == 0).
     229             :  *
     230             :  * We support being passed a compressed or toasted text value.
     231             :  * This is a bit bogus since such values shouldn't really be referred to as
     232             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     233             :  * case here, we'd need another routine that did, anyway.
     234             :  */
     235             : void
     236         390 : text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
     237             : {
     238             :     /* must cast away the const, unfortunately */
     239         390 :     text       *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
     240         390 :     size_t      src_len = VARSIZE_ANY_EXHDR(srcunpacked);
     241             : 
     242         390 :     if (dst_len > 0)
     243             :     {
     244         390 :         dst_len--;
     245         390 :         if (dst_len >= src_len)
     246         390 :             dst_len = src_len;
     247             :         else                    /* ensure truncation is encoding-safe */
     248           0 :             dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
     249         390 :         memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
     250         390 :         dst[dst_len] = '\0';
     251             :     }
     252             : 
     253         390 :     if (srcunpacked != src)
     254           0 :         pfree(srcunpacked);
     255         390 : }
     256             : 
     257             : 
     258             : /*****************************************************************************
     259             :  *   USER I/O ROUTINES                                                       *
     260             :  *****************************************************************************/
     261             : 
     262             : 
     263             : #define VAL(CH)         ((CH) - '0')
     264             : #define DIG(VAL)        ((VAL) + '0')
     265             : 
     266             : /*
     267             :  *      byteain         - converts from printable representation of byte array
     268             :  *
     269             :  *      Non-printable characters must be passed as '\nnn' (octal) and are
     270             :  *      converted to internal form.  '\' must be passed as '\\'.
     271             :  *      ereport(ERROR, ...) if bad form.
     272             :  *
     273             :  *      BUGS:
     274             :  *              The input is scanned twice.
     275             :  *              The error checking of input is minimal.
     276             :  */
     277             : Datum
     278       10670 : byteain(PG_FUNCTION_ARGS)
     279             : {
     280       10670 :     char       *inputText = PG_GETARG_CSTRING(0);
     281             :     char       *tp;
     282             :     char       *rp;
     283             :     int         bc;
     284             :     bytea      *result;
     285             : 
     286             :     /* Recognize hex input */
     287       10670 :     if (inputText[0] == '\\' && inputText[1] == 'x')
     288             :     {
     289          96 :         size_t      len = strlen(inputText);
     290             : 
     291          96 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
     292          96 :         result = palloc(bc);
     293          96 :         bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
     294          88 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
     295             : 
     296          88 :         PG_RETURN_BYTEA_P(result);
     297             :     }
     298             : 
     299             :     /* Else, it's the traditional escaped style */
     300      199758 :     for (bc = 0, tp = inputText; *tp != '\0'; bc++)
     301             :     {
     302      189188 :         if (tp[0] != '\\')
     303      188506 :             tp++;
     304         682 :         else if ((tp[0] == '\\') &&
     305         682 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     306         678 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     307         678 :                  (tp[3] >= '0' && tp[3] <= '7'))
     308         678 :             tp += 4;
     309           4 :         else if ((tp[0] == '\\') &&
     310           4 :                  (tp[1] == '\\'))
     311           0 :             tp += 2;
     312             :         else
     313             :         {
     314             :             /*
     315             :              * one backslash, not followed by another or ### valid octal
     316             :              */
     317           4 :             ereport(ERROR,
     318             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     319             :                      errmsg("invalid input syntax for type %s", "bytea")));
     320             :         }
     321             :     }
     322             : 
     323       10570 :     bc += VARHDRSZ;
     324             : 
     325       10570 :     result = (bytea *) palloc(bc);
     326       10570 :     SET_VARSIZE(result, bc);
     327             : 
     328       10570 :     tp = inputText;
     329       10570 :     rp = VARDATA(result);
     330      199746 :     while (*tp != '\0')
     331             :     {
     332      189176 :         if (tp[0] != '\\')
     333      188498 :             *rp++ = *tp++;
     334         678 :         else if ((tp[0] == '\\') &&
     335         678 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     336         678 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     337         678 :                  (tp[3] >= '0' && tp[3] <= '7'))
     338             :         {
     339         678 :             bc = VAL(tp[1]);
     340         678 :             bc <<= 3;
     341         678 :             bc += VAL(tp[2]);
     342         678 :             bc <<= 3;
     343         678 :             *rp++ = bc + VAL(tp[3]);
     344             : 
     345         678 :             tp += 4;
     346             :         }
     347           0 :         else if ((tp[0] == '\\') &&
     348           0 :                  (tp[1] == '\\'))
     349             :         {
     350           0 :             *rp++ = '\\';
     351           0 :             tp += 2;
     352             :         }
     353             :         else
     354             :         {
     355             :             /*
     356             :              * We should never get here. The first pass should not allow it.
     357             :              */
     358           0 :             ereport(ERROR,
     359             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     360             :                      errmsg("invalid input syntax for type %s", "bytea")));
     361             :         }
     362             :     }
     363             : 
     364       10570 :     PG_RETURN_BYTEA_P(result);
     365             : }
     366             : 
     367             : /*
     368             :  *      byteaout        - converts to printable representation of byte array
     369             :  *
     370             :  *      In the traditional escaped format, non-printable characters are
     371             :  *      printed as '\nnn' (octal) and '\' as '\\'.
     372             :  */
     373             : Datum
     374        4886 : byteaout(PG_FUNCTION_ARGS)
     375             : {
     376        4886 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
     377             :     char       *result;
     378             :     char       *rp;
     379             : 
     380        4886 :     if (bytea_output == BYTEA_OUTPUT_HEX)
     381             :     {
     382             :         /* Print hex format */
     383        4680 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
     384        4680 :         *rp++ = '\\';
     385        4680 :         *rp++ = 'x';
     386        4680 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
     387             :     }
     388         206 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
     389             :     {
     390             :         /* Print traditional escaped format */
     391             :         char       *vp;
     392             :         uint64      len;
     393             :         int         i;
     394             : 
     395         206 :         len = 1;                /* empty string has 1 char */
     396         206 :         vp = VARDATA_ANY(vlena);
     397        2060 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     398             :         {
     399        1854 :             if (*vp == '\\')
     400           0 :                 len += 2;
     401        1854 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     402         328 :                 len += 4;
     403             :             else
     404        1526 :                 len++;
     405             :         }
     406             : 
     407             :         /*
     408             :          * In principle len can't overflow uint32 if the input fit in 1GB, but
     409             :          * for safety let's check rather than relying on palloc's internal
     410             :          * check.
     411             :          */
     412         206 :         if (len > MaxAllocSize)
     413           0 :             ereport(ERROR,
     414             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     415             :                      errmsg_internal("result of bytea output conversion is too large")));
     416         206 :         rp = result = (char *) palloc(len);
     417             : 
     418         206 :         vp = VARDATA_ANY(vlena);
     419        2060 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     420             :         {
     421        1854 :             if (*vp == '\\')
     422             :             {
     423           0 :                 *rp++ = '\\';
     424           0 :                 *rp++ = '\\';
     425             :             }
     426        1854 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     427         328 :             {
     428             :                 int         val;    /* holds unprintable chars */
     429             : 
     430         328 :                 val = *vp;
     431         328 :                 rp[0] = '\\';
     432         328 :                 rp[3] = DIG(val & 07);
     433         328 :                 val >>= 3;
     434         328 :                 rp[2] = DIG(val & 07);
     435         328 :                 val >>= 3;
     436         328 :                 rp[1] = DIG(val & 03);
     437         328 :                 rp += 4;
     438             :             }
     439             :             else
     440        1526 :                 *rp++ = *vp;
     441             :         }
     442             :     }
     443             :     else
     444             :     {
     445           0 :         elog(ERROR, "unrecognized bytea_output setting: %d",
     446             :              bytea_output);
     447             :         rp = result = NULL;     /* keep compiler quiet */
     448             :     }
     449        4886 :     *rp = '\0';
     450        4886 :     PG_RETURN_CSTRING(result);
     451             : }
     452             : 
     453             : /*
     454             :  *      bytearecv           - converts external binary format to bytea
     455             :  */
     456             : Datum
     457         698 : bytearecv(PG_FUNCTION_ARGS)
     458             : {
     459         698 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     460             :     bytea      *result;
     461             :     int         nbytes;
     462             : 
     463         698 :     nbytes = buf->len - buf->cursor;
     464         698 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
     465         698 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
     466         698 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
     467         698 :     PG_RETURN_BYTEA_P(result);
     468             : }
     469             : 
     470             : /*
     471             :  *      byteasend           - converts bytea to binary format
     472             :  *
     473             :  * This is a special case: just copy the input...
     474             :  */
     475             : Datum
     476        3608 : byteasend(PG_FUNCTION_ARGS)
     477             : {
     478        3608 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
     479             : 
     480        3608 :     PG_RETURN_BYTEA_P(vlena);
     481             : }
     482             : 
     483             : Datum
     484          28 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
     485             : {
     486             :     StringInfo  state;
     487             : 
     488          28 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     489             : 
     490             :     /* Append the value unless null. */
     491          28 :     if (!PG_ARGISNULL(1))
     492             :     {
     493          28 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
     494             : 
     495             :         /* On the first time through, we ignore the delimiter. */
     496          28 :         if (state == NULL)
     497          16 :             state = makeStringAggState(fcinfo);
     498          12 :         else if (!PG_ARGISNULL(2))
     499             :         {
     500           8 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
     501             : 
     502           8 :             appendBinaryStringInfo(state, VARDATA_ANY(delim), VARSIZE_ANY_EXHDR(delim));
     503             :         }
     504             : 
     505          28 :         appendBinaryStringInfo(state, VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value));
     506             :     }
     507             : 
     508             :     /*
     509             :      * The transition type for string_agg() is declared to be "internal",
     510             :      * which is a pass-by-value type the same size as a pointer.
     511             :      */
     512          28 :     PG_RETURN_POINTER(state);
     513             : }
     514             : 
     515             : Datum
     516          20 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
     517             : {
     518             :     StringInfo  state;
     519             : 
     520             :     /* cannot be called directly because of internal-type argument */
     521             :     Assert(AggCheckCallContext(fcinfo, NULL));
     522             : 
     523          20 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     524             : 
     525          20 :     if (state != NULL)
     526             :     {
     527             :         bytea      *result;
     528             : 
     529          16 :         result = (bytea *) palloc(state->len + VARHDRSZ);
     530          16 :         SET_VARSIZE(result, state->len + VARHDRSZ);
     531          16 :         memcpy(VARDATA(result), state->data, state->len);
     532          16 :         PG_RETURN_BYTEA_P(result);
     533             :     }
     534             :     else
     535           4 :         PG_RETURN_NULL();
     536             : }
     537             : 
     538             : /*
     539             :  *      textin          - converts "..." to internal representation
     540             :  */
     541             : Datum
     542     5000698 : textin(PG_FUNCTION_ARGS)
     543             : {
     544     5000698 :     char       *inputText = PG_GETARG_CSTRING(0);
     545             : 
     546     5000698 :     PG_RETURN_TEXT_P(cstring_to_text(inputText));
     547             : }
     548             : 
     549             : /*
     550             :  *      textout         - converts internal representation to "..."
     551             :  */
     552             : Datum
     553     2155172 : textout(PG_FUNCTION_ARGS)
     554             : {
     555     2155172 :     Datum       txt = PG_GETARG_DATUM(0);
     556             : 
     557     2155172 :     PG_RETURN_CSTRING(TextDatumGetCString(txt));
     558             : }
     559             : 
     560             : /*
     561             :  *      textrecv            - converts external binary format to text
     562             :  */
     563             : Datum
     564          12 : textrecv(PG_FUNCTION_ARGS)
     565             : {
     566          12 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     567             :     text       *result;
     568             :     char       *str;
     569             :     int         nbytes;
     570             : 
     571          12 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     572             : 
     573          12 :     result = cstring_to_text_with_len(str, nbytes);
     574          12 :     pfree(str);
     575          12 :     PG_RETURN_TEXT_P(result);
     576             : }
     577             : 
     578             : /*
     579             :  *      textsend            - converts text to binary format
     580             :  */
     581             : Datum
     582        3172 : textsend(PG_FUNCTION_ARGS)
     583             : {
     584        3172 :     text       *t = PG_GETARG_TEXT_PP(0);
     585             :     StringInfoData buf;
     586             : 
     587        3172 :     pq_begintypsend(&buf);
     588        3172 :     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
     589        3172 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     590             : }
     591             : 
     592             : 
     593             : /*
     594             :  *      unknownin           - converts "..." to internal representation
     595             :  */
     596             : Datum
     597           0 : unknownin(PG_FUNCTION_ARGS)
     598             : {
     599           0 :     char       *str = PG_GETARG_CSTRING(0);
     600             : 
     601             :     /* representation is same as cstring */
     602           0 :     PG_RETURN_CSTRING(pstrdup(str));
     603             : }
     604             : 
     605             : /*
     606             :  *      unknownout          - converts internal representation to "..."
     607             :  */
     608             : Datum
     609         318 : unknownout(PG_FUNCTION_ARGS)
     610             : {
     611             :     /* representation is same as cstring */
     612         318 :     char       *str = PG_GETARG_CSTRING(0);
     613             : 
     614         318 :     PG_RETURN_CSTRING(pstrdup(str));
     615             : }
     616             : 
     617             : /*
     618             :  *      unknownrecv         - converts external binary format to unknown
     619             :  */
     620             : Datum
     621           0 : unknownrecv(PG_FUNCTION_ARGS)
     622             : {
     623           0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     624             :     char       *str;
     625             :     int         nbytes;
     626             : 
     627           0 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     628             :     /* representation is same as cstring */
     629           0 :     PG_RETURN_CSTRING(str);
     630             : }
     631             : 
     632             : /*
     633             :  *      unknownsend         - converts unknown to binary format
     634             :  */
     635             : Datum
     636           0 : unknownsend(PG_FUNCTION_ARGS)
     637             : {
     638             :     /* representation is same as cstring */
     639           0 :     char       *str = PG_GETARG_CSTRING(0);
     640             :     StringInfoData buf;
     641             : 
     642           0 :     pq_begintypsend(&buf);
     643           0 :     pq_sendtext(&buf, str, strlen(str));
     644           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     645             : }
     646             : 
     647             : 
     648             : /* ========== PUBLIC ROUTINES ========== */
     649             : 
     650             : /*
     651             :  * textlen -
     652             :  *    returns the logical length of a text*
     653             :  *     (which is less than the VARSIZE of the text*)
     654             :  */
     655             : Datum
     656      202646 : textlen(PG_FUNCTION_ARGS)
     657             : {
     658      202646 :     Datum       str = PG_GETARG_DATUM(0);
     659             : 
     660             :     /* try to avoid decompressing argument */
     661      202646 :     PG_RETURN_INT32(text_length(str));
     662             : }
     663             : 
     664             : /*
     665             :  * text_length -
     666             :  *  Does the real work for textlen()
     667             :  *
     668             :  *  This is broken out so it can be called directly by other string processing
     669             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     670             :  *  it may still be in compressed form.  We can avoid decompressing it at all
     671             :  *  in some cases.
     672             :  */
     673             : static int32
     674      202654 : text_length(Datum str)
     675             : {
     676             :     /* fastpath when max encoding length is one */
     677      202654 :     if (pg_database_encoding_max_length() == 1)
     678          24 :         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     679             :     else
     680             :     {
     681      202630 :         text       *t = DatumGetTextPP(str);
     682             : 
     683      202630 :         PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
     684             :                                              VARSIZE_ANY_EXHDR(t)));
     685             :     }
     686             : }
     687             : 
     688             : /*
     689             :  * textoctetlen -
     690             :  *    returns the physical length of a text*
     691             :  *     (which is less than the VARSIZE of the text*)
     692             :  */
     693             : Datum
     694          58 : textoctetlen(PG_FUNCTION_ARGS)
     695             : {
     696          58 :     Datum       str = PG_GETARG_DATUM(0);
     697             : 
     698             :     /* We need not detoast the input at all */
     699          58 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     700             : }
     701             : 
     702             : /*
     703             :  * textcat -
     704             :  *    takes two text* and returns a text* that is the concatenation of
     705             :  *    the two.
     706             :  *
     707             :  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
     708             :  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
     709             :  * Allocate space for output in all cases.
     710             :  * XXX - thomas 1997-07-10
     711             :  */
     712             : Datum
     713     1658886 : textcat(PG_FUNCTION_ARGS)
     714             : {
     715     1658886 :     text       *t1 = PG_GETARG_TEXT_PP(0);
     716     1658886 :     text       *t2 = PG_GETARG_TEXT_PP(1);
     717             : 
     718     1658886 :     PG_RETURN_TEXT_P(text_catenate(t1, t2));
     719             : }
     720             : 
     721             : /*
     722             :  * text_catenate
     723             :  *  Guts of textcat(), broken out so it can be used by other functions
     724             :  *
     725             :  * Arguments can be in short-header form, but not compressed or out-of-line
     726             :  */
     727             : static text *
     728     1658950 : text_catenate(text *t1, text *t2)
     729             : {
     730             :     text       *result;
     731             :     int         len1,
     732             :                 len2,
     733             :                 len;
     734             :     char       *ptr;
     735             : 
     736     1658950 :     len1 = VARSIZE_ANY_EXHDR(t1);
     737     1658950 :     len2 = VARSIZE_ANY_EXHDR(t2);
     738             : 
     739             :     /* paranoia ... probably should throw error instead? */
     740     1658950 :     if (len1 < 0)
     741           0 :         len1 = 0;
     742     1658950 :     if (len2 < 0)
     743           0 :         len2 = 0;
     744             : 
     745     1658950 :     len = len1 + len2 + VARHDRSZ;
     746     1658950 :     result = (text *) palloc(len);
     747             : 
     748             :     /* Set size of result string... */
     749     1658950 :     SET_VARSIZE(result, len);
     750             : 
     751             :     /* Fill data field of result string... */
     752     1658950 :     ptr = VARDATA(result);
     753     1658950 :     if (len1 > 0)
     754     1656846 :         memcpy(ptr, VARDATA_ANY(t1), len1);
     755     1658950 :     if (len2 > 0)
     756     1658846 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
     757             : 
     758     1658950 :     return result;
     759             : }
     760             : 
     761             : /*
     762             :  * charlen_to_bytelen()
     763             :  *  Compute the number of bytes occupied by n characters starting at *p
     764             :  *
     765             :  * It is caller's responsibility that there actually are n characters;
     766             :  * the string need not be null-terminated.
     767             :  */
     768             : static int
     769        5050 : charlen_to_bytelen(const char *p, int n)
     770             : {
     771        5050 :     if (pg_database_encoding_max_length() == 1)
     772             :     {
     773             :         /* Optimization for single-byte encodings */
     774           0 :         return n;
     775             :     }
     776             :     else
     777             :     {
     778             :         const char *s;
     779             : 
     780     5753744 :         for (s = p; n > 0; n--)
     781     5748694 :             s += pg_mblen(s);
     782             : 
     783        5050 :         return s - p;
     784             :     }
     785             : }
     786             : 
     787             : /*
     788             :  * text_substr()
     789             :  * Return a substring starting at the specified position.
     790             :  * - thomas 1997-12-31
     791             :  *
     792             :  * Input:
     793             :  *  - string
     794             :  *  - starting position (is one-based)
     795             :  *  - string length
     796             :  *
     797             :  * If the starting position is zero or less, then return from the start of the string
     798             :  *  adjusting the length to be consistent with the "negative start" per SQL.
     799             :  * If the length is less than zero, return the remaining string.
     800             :  *
     801             :  * Added multibyte support.
     802             :  * - Tatsuo Ishii 1998-4-21
     803             :  * Changed behavior if starting position is less than one to conform to SQL behavior.
     804             :  * Formerly returned the entire string; now returns a portion.
     805             :  * - Thomas Lockhart 1998-12-10
     806             :  * Now uses faster TOAST-slicing interface
     807             :  * - John Gray 2002-02-22
     808             :  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
     809             :  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
     810             :  * error; if E < 1, return '', not entire string). Fixed MB related bug when
     811             :  * S > LC and < LC + 4 sometimes garbage characters are returned.
     812             :  * - Joe Conway 2002-08-10
     813             :  */
     814             : Datum
     815       70190 : text_substr(PG_FUNCTION_ARGS)
     816             : {
     817       70190 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     818             :                                     PG_GETARG_INT32(1),
     819             :                                     PG_GETARG_INT32(2),
     820             :                                     false));
     821             : }
     822             : 
     823             : /*
     824             :  * text_substr_no_len -
     825             :  *    Wrapper to avoid opr_sanity failure due to
     826             :  *    one function accepting a different number of args.
     827             :  */
     828             : Datum
     829          26 : text_substr_no_len(PG_FUNCTION_ARGS)
     830             : {
     831          26 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     832             :                                     PG_GETARG_INT32(1),
     833             :                                     -1, true));
     834             : }
     835             : 
     836             : /*
     837             :  * text_substring -
     838             :  *  Does the real work for text_substr() and text_substr_no_len()
     839             :  *
     840             :  *  This is broken out so it can be called directly by other string processing
     841             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     842             :  *  it may still be in compressed/toasted form.  We can avoid detoasting all
     843             :  *  of it in some cases.
     844             :  *
     845             :  *  The result is always a freshly palloc'd datum.
     846             :  */
     847             : static text *
     848       96648 : text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
     849             : {
     850       96648 :     int32       eml = pg_database_encoding_max_length();
     851       96648 :     int32       S = start;      /* start position */
     852             :     int32       S1;             /* adjusted start position */
     853             :     int32       L1;             /* adjusted substring length */
     854             : 
     855             :     /* life is easy if the encoding max length is 1 */
     856       96648 :     if (eml == 1)
     857             :     {
     858          12 :         S1 = Max(S, 1);
     859             : 
     860          12 :         if (length_not_specified)   /* special case - get length to end of
     861             :                                      * string */
     862           0 :             L1 = -1;
     863             :         else
     864             :         {
     865             :             /* end position */
     866          12 :             int         E = S + length;
     867             : 
     868             :             /*
     869             :              * A negative value for L is the only way for the end position to
     870             :              * be before the start. SQL99 says to throw an error.
     871             :              */
     872          12 :             if (E < S)
     873           0 :                 ereport(ERROR,
     874             :                         (errcode(ERRCODE_SUBSTRING_ERROR),
     875             :                          errmsg("negative substring length not allowed")));
     876             : 
     877             :             /*
     878             :              * A zero or negative value for the end position can happen if the
     879             :              * start was negative or one. SQL99 says to return a zero-length
     880             :              * string.
     881             :              */
     882          12 :             if (E < 1)
     883           0 :                 return cstring_to_text("");
     884             : 
     885          12 :             L1 = E - S1;
     886             :         }
     887             : 
     888             :         /*
     889             :          * If the start position is past the end of the string, SQL99 says to
     890             :          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
     891             :          * that for us. Convert to zero-based starting position
     892             :          */
     893          12 :         return DatumGetTextPSlice(str, S1 - 1, L1);
     894             :     }
     895       96636 :     else if (eml > 1)
     896             :     {
     897             :         /*
     898             :          * When encoding max length is > 1, we can't get LC without
     899             :          * detoasting, so we'll grab a conservatively large slice now and go
     900             :          * back later to do the right thing
     901             :          */
     902             :         int32       slice_start;
     903             :         int32       slice_size;
     904             :         int32       slice_strlen;
     905             :         text       *slice;
     906             :         int32       E1;
     907             :         int32       i;
     908             :         char       *p;
     909             :         char       *s;
     910             :         text       *ret;
     911             : 
     912             :         /*
     913             :          * if S is past the end of the string, the tuple toaster will return a
     914             :          * zero-length string to us
     915             :          */
     916       96636 :         S1 = Max(S, 1);
     917             : 
     918             :         /*
     919             :          * We need to start at position zero because there is no way to know
     920             :          * in advance which byte offset corresponds to the supplied start
     921             :          * position.
     922             :          */
     923       96636 :         slice_start = 0;
     924             : 
     925       96636 :         if (length_not_specified)   /* special case - get length to end of
     926             :                                      * string */
     927          58 :             slice_size = L1 = -1;
     928             :         else
     929             :         {
     930       96578 :             int         E = S + length;
     931             : 
     932             :             /*
     933             :              * A negative value for L is the only way for the end position to
     934             :              * be before the start. SQL99 says to throw an error.
     935             :              */
     936       96578 :             if (E < S)
     937           4 :                 ereport(ERROR,
     938             :                         (errcode(ERRCODE_SUBSTRING_ERROR),
     939             :                          errmsg("negative substring length not allowed")));
     940             : 
     941             :             /*
     942             :              * A zero or negative value for the end position can happen if the
     943             :              * start was negative or one. SQL99 says to return a zero-length
     944             :              * string.
     945             :              */
     946       96574 :             if (E < 1)
     947           0 :                 return cstring_to_text("");
     948             : 
     949             :             /*
     950             :              * if E is past the end of the string, the tuple toaster will
     951             :              * truncate the length for us
     952             :              */
     953       96574 :             L1 = E - S1;
     954             : 
     955             :             /*
     956             :              * Total slice size in bytes can't be any longer than the start
     957             :              * position plus substring length times the encoding max length.
     958             :              */
     959       96574 :             slice_size = (S1 + L1) * eml;
     960             :         }
     961             : 
     962             :         /*
     963             :          * If we're working with an untoasted source, no need to do an extra
     964             :          * copying step.
     965             :          */
     966       96632 :         if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
     967       96608 :             VARATT_IS_EXTERNAL(DatumGetPointer(str)))
     968          68 :             slice = DatumGetTextPSlice(str, slice_start, slice_size);
     969             :         else
     970       96564 :             slice = (text *) DatumGetPointer(str);
     971             : 
     972             :         /* see if we got back an empty string */
     973       96632 :         if (VARSIZE_ANY_EXHDR(slice) == 0)
     974             :         {
     975           0 :             if (slice != (text *) DatumGetPointer(str))
     976           0 :                 pfree(slice);
     977           0 :             return cstring_to_text("");
     978             :         }
     979             : 
     980             :         /* Now we can get the actual length of the slice in MB characters */
     981       96632 :         slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
     982       96632 :                                             VARSIZE_ANY_EXHDR(slice));
     983             : 
     984             :         /*
     985             :          * Check that the start position wasn't > slice_strlen. If so, SQL99
     986             :          * says to return a zero-length string.
     987             :          */
     988       96632 :         if (S1 > slice_strlen)
     989             :         {
     990          20 :             if (slice != (text *) DatumGetPointer(str))
     991           0 :                 pfree(slice);
     992          20 :             return cstring_to_text("");
     993             :         }
     994             : 
     995             :         /*
     996             :          * Adjust L1 and E1 now that we know the slice string length. Again
     997             :          * remember that S1 is one based, and slice_start is zero based.
     998             :          */
     999       96612 :         if (L1 > -1)
    1000       96574 :             E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
    1001             :         else
    1002          38 :             E1 = slice_start + 1 + slice_strlen;
    1003             : 
    1004             :         /*
    1005             :          * Find the start position in the slice; remember S1 is not zero based
    1006             :          */
    1007       96612 :         p = VARDATA_ANY(slice);
    1008     3296618 :         for (i = 0; i < S1 - 1; i++)
    1009     3200006 :             p += pg_mblen(p);
    1010             : 
    1011             :         /* hang onto a pointer to our start position */
    1012       96612 :         s = p;
    1013             : 
    1014             :         /*
    1015             :          * Count the actual bytes used by the substring of the requested
    1016             :          * length.
    1017             :          */
    1018     1652368 :         for (i = S1; i < E1; i++)
    1019     1555756 :             p += pg_mblen(p);
    1020             : 
    1021       96612 :         ret = (text *) palloc(VARHDRSZ + (p - s));
    1022       96612 :         SET_VARSIZE(ret, VARHDRSZ + (p - s));
    1023       96612 :         memcpy(VARDATA(ret), s, (p - s));
    1024             : 
    1025       96612 :         if (slice != (text *) DatumGetPointer(str))
    1026          68 :             pfree(slice);
    1027             : 
    1028       96612 :         return ret;
    1029             :     }
    1030             :     else
    1031           0 :         elog(ERROR, "invalid backend encoding: encoding max length < 1");
    1032             : 
    1033             :     /* not reached: suppress compiler warning */
    1034             :     return NULL;
    1035             : }
    1036             : 
    1037             : /*
    1038             :  * textoverlay
    1039             :  *  Replace specified substring of first string with second
    1040             :  *
    1041             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    1042             :  * This code is a direct implementation of what the standard says.
    1043             :  */
    1044             : Datum
    1045          24 : textoverlay(PG_FUNCTION_ARGS)
    1046             : {
    1047          24 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1048          24 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1049          24 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1050          24 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    1051             : 
    1052          24 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1053             : }
    1054             : 
    1055             : Datum
    1056           8 : textoverlay_no_len(PG_FUNCTION_ARGS)
    1057             : {
    1058           8 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1059           8 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1060           8 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1061             :     int         sl;
    1062             : 
    1063           8 :     sl = text_length(PointerGetDatum(t2));  /* defaults to length(t2) */
    1064           8 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1065             : }
    1066             : 
    1067             : static text *
    1068          32 : text_overlay(text *t1, text *t2, int sp, int sl)
    1069             : {
    1070             :     text       *result;
    1071             :     text       *s1;
    1072             :     text       *s2;
    1073             :     int         sp_pl_sl;
    1074             : 
    1075             :     /*
    1076             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    1077             :      * "substring length" error because that's what should be expected
    1078             :      * according to the spec's definition of OVERLAY().
    1079             :      */
    1080          32 :     if (sp <= 0)
    1081           0 :         ereport(ERROR,
    1082             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    1083             :                  errmsg("negative substring length not allowed")));
    1084          32 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    1085           0 :         ereport(ERROR,
    1086             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1087             :                  errmsg("integer out of range")));
    1088             : 
    1089          32 :     s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
    1090          32 :     s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    1091          32 :     result = text_catenate(s1, t2);
    1092          32 :     result = text_catenate(result, s2);
    1093             : 
    1094          32 :     return result;
    1095             : }
    1096             : 
    1097             : /*
    1098             :  * textpos -
    1099             :  *    Return the position of the specified substring.
    1100             :  *    Implements the SQL POSITION() function.
    1101             :  *    Ref: A Guide To The SQL Standard, Date & Darwen, 1997
    1102             :  * - thomas 1997-07-27
    1103             :  */
    1104             : Datum
    1105          92 : textpos(PG_FUNCTION_ARGS)
    1106             : {
    1107          92 :     text       *str = PG_GETARG_TEXT_PP(0);
    1108          92 :     text       *search_str = PG_GETARG_TEXT_PP(1);
    1109             : 
    1110          92 :     PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
    1111             : }
    1112             : 
    1113             : /*
    1114             :  * text_position -
    1115             :  *  Does the real work for textpos()
    1116             :  *
    1117             :  * Inputs:
    1118             :  *      t1 - string to be searched
    1119             :  *      t2 - pattern to match within t1
    1120             :  * Result:
    1121             :  *      Character index of the first matched char, starting from 1,
    1122             :  *      or 0 if no match.
    1123             :  *
    1124             :  *  This is broken out so it can be called directly by other string processing
    1125             :  *  functions.
    1126             :  */
    1127             : static int
    1128          92 : text_position(text *t1, text *t2, Oid collid)
    1129             : {
    1130             :     TextPositionState state;
    1131             :     int         result;
    1132             : 
    1133             :     /* Empty needle always matches at position 1 */
    1134          92 :     if (VARSIZE_ANY_EXHDR(t2) < 1)
    1135           8 :         return 1;
    1136             : 
    1137             :     /* Otherwise, can't match if haystack is shorter than needle */
    1138          84 :     if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2))
    1139          20 :         return 0;
    1140             : 
    1141          64 :     text_position_setup(t1, t2, collid, &state);
    1142          64 :     if (!text_position_next(&state))
    1143          22 :         result = 0;
    1144             :     else
    1145          42 :         result = text_position_get_match_pos(&state);
    1146          64 :     text_position_cleanup(&state);
    1147          64 :     return result;
    1148             : }
    1149             : 
    1150             : 
    1151             : /*
    1152             :  * text_position_setup, text_position_next, text_position_cleanup -
    1153             :  *  Component steps of text_position()
    1154             :  *
    1155             :  * These are broken out so that a string can be efficiently searched for
    1156             :  * multiple occurrences of the same pattern.  text_position_next may be
    1157             :  * called multiple times, and it advances to the next match on each call.
    1158             :  * text_position_get_match_ptr() and text_position_get_match_pos() return
    1159             :  * a pointer or 1-based character position of the last match, respectively.
    1160             :  *
    1161             :  * The "state" variable is normally just a local variable in the caller.
    1162             :  *
    1163             :  * NOTE: text_position_next skips over the matched portion.  For example,
    1164             :  * searching for "xx" in "xxx" returns only one match, not two.
    1165             :  */
    1166             : 
    1167             : static void
    1168        1594 : text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
    1169             : {
    1170        1594 :     int         len1 = VARSIZE_ANY_EXHDR(t1);
    1171        1594 :     int         len2 = VARSIZE_ANY_EXHDR(t2);
    1172        1594 :     pg_locale_t mylocale = 0;
    1173             : 
    1174        1594 :     check_collation_set(collid);
    1175             : 
    1176        1594 :     if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
    1177           0 :         mylocale = pg_newlocale_from_collation(collid);
    1178             : 
    1179        1594 :     if (mylocale && !mylocale->deterministic)
    1180           0 :         ereport(ERROR,
    1181             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1182             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1183             : 
    1184             :     Assert(len1 > 0);
    1185             :     Assert(len2 > 0);
    1186             : 
    1187             :     /*
    1188             :      * Even with a multi-byte encoding, we perform the search using the raw
    1189             :      * byte sequence, ignoring multibyte issues.  For UTF-8, that works fine,
    1190             :      * because in UTF-8 the byte sequence of one character cannot contain
    1191             :      * another character.  For other multi-byte encodings, we do the search
    1192             :      * initially as a simple byte search, ignoring multibyte issues, but
    1193             :      * verify afterwards that the match we found is at a character boundary,
    1194             :      * and continue the search if it was a false match.
    1195             :      */
    1196        1594 :     if (pg_database_encoding_max_length() == 1)
    1197             :     {
    1198          36 :         state->is_multibyte = false;
    1199          36 :         state->is_multibyte_char_in_char = false;
    1200             :     }
    1201        1558 :     else if (GetDatabaseEncoding() == PG_UTF8)
    1202             :     {
    1203        1558 :         state->is_multibyte = true;
    1204        1558 :         state->is_multibyte_char_in_char = false;
    1205             :     }
    1206             :     else
    1207             :     {
    1208           0 :         state->is_multibyte = true;
    1209           0 :         state->is_multibyte_char_in_char = true;
    1210             :     }
    1211             : 
    1212        1594 :     state->str1 = VARDATA_ANY(t1);
    1213        1594 :     state->str2 = VARDATA_ANY(t2);
    1214        1594 :     state->len1 = len1;
    1215        1594 :     state->len2 = len2;
    1216        1594 :     state->last_match = NULL;
    1217        1594 :     state->refpoint = state->str1;
    1218        1594 :     state->refpos = 0;
    1219             : 
    1220             :     /*
    1221             :      * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
    1222             :      * notes we use the terminology that the "haystack" is the string to be
    1223             :      * searched (t1) and the "needle" is the pattern being sought (t2).
    1224             :      *
    1225             :      * If the needle is empty or bigger than the haystack then there is no
    1226             :      * point in wasting cycles initializing the table.  We also choose not to
    1227             :      * use B-M-H for needles of length 1, since the skip table can't possibly
    1228             :      * save anything in that case.
    1229             :      */
    1230        1594 :     if (len1 >= len2 && len2 > 1)
    1231             :     {
    1232        1492 :         int         searchlength = len1 - len2;
    1233             :         int         skiptablemask;
    1234             :         int         last;
    1235             :         int         i;
    1236        1492 :         const char *str2 = state->str2;
    1237             : 
    1238             :         /*
    1239             :          * First we must determine how much of the skip table to use.  The
    1240             :          * declaration of TextPositionState allows up to 256 elements, but for
    1241             :          * short search problems we don't really want to have to initialize so
    1242             :          * many elements --- it would take too long in comparison to the
    1243             :          * actual search time.  So we choose a useful skip table size based on
    1244             :          * the haystack length minus the needle length.  The closer the needle
    1245             :          * length is to the haystack length the less useful skipping becomes.
    1246             :          *
    1247             :          * Note: since we use bit-masking to select table elements, the skip
    1248             :          * table size MUST be a power of 2, and so the mask must be 2^N-1.
    1249             :          */
    1250        1492 :         if (searchlength < 16)
    1251          36 :             skiptablemask = 3;
    1252        1456 :         else if (searchlength < 64)
    1253           4 :             skiptablemask = 7;
    1254        1452 :         else if (searchlength < 128)
    1255           2 :             skiptablemask = 15;
    1256        1450 :         else if (searchlength < 512)
    1257          96 :             skiptablemask = 31;
    1258        1354 :         else if (searchlength < 2048)
    1259        1280 :             skiptablemask = 63;
    1260          74 :         else if (searchlength < 4096)
    1261          16 :             skiptablemask = 127;
    1262             :         else
    1263          58 :             skiptablemask = 255;
    1264        1492 :         state->skiptablemask = skiptablemask;
    1265             : 
    1266             :         /*
    1267             :          * Initialize the skip table.  We set all elements to the needle
    1268             :          * length, since this is the correct skip distance for any character
    1269             :          * not found in the needle.
    1270             :          */
    1271      103588 :         for (i = 0; i <= skiptablemask; i++)
    1272      102096 :             state->skiptable[i] = len2;
    1273             : 
    1274             :         /*
    1275             :          * Now examine the needle.  For each character except the last one,
    1276             :          * set the corresponding table element to the appropriate skip
    1277             :          * distance.  Note that when two characters share the same skip table
    1278             :          * entry, the one later in the needle must determine the skip
    1279             :          * distance.
    1280             :          */
    1281        1492 :         last = len2 - 1;
    1282             : 
    1283       18376 :         for (i = 0; i < last; i++)
    1284       16884 :             state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
    1285             :     }
    1286        1594 : }
    1287             : 
    1288             : /*
    1289             :  * Advance to the next match, starting from the end of the previous match
    1290             :  * (or the beginning of the string, on first call).  Returns true if a match
    1291             :  * is found.
    1292             :  *
    1293             :  * Note that this refuses to match an empty-string needle.  Most callers
    1294             :  * will have handled that case specially and we'll never see it here.
    1295             :  */
    1296             : static bool
    1297        6264 : text_position_next(TextPositionState *state)
    1298             : {
    1299        6264 :     int         needle_len = state->len2;
    1300             :     char       *start_ptr;
    1301             :     char       *matchptr;
    1302             : 
    1303        6264 :     if (needle_len <= 0)
    1304           0 :         return false;           /* result for empty pattern */
    1305             : 
    1306             :     /* Start from the point right after the previous match. */
    1307        6264 :     if (state->last_match)
    1308        4670 :         start_ptr = state->last_match + needle_len;
    1309             :     else
    1310        1594 :         start_ptr = state->str1;
    1311             : 
    1312        6264 : retry:
    1313        6264 :     matchptr = text_position_next_internal(start_ptr, state);
    1314             : 
    1315        6264 :     if (!matchptr)
    1316        1544 :         return false;
    1317             : 
    1318             :     /*
    1319             :      * Found a match for the byte sequence.  If this is a multibyte encoding,
    1320             :      * where one character's byte sequence can appear inside a longer
    1321             :      * multi-byte character, we need to verify that the match was at a
    1322             :      * character boundary, not in the middle of a multi-byte character.
    1323             :      */
    1324        4720 :     if (state->is_multibyte_char_in_char)
    1325             :     {
    1326             :         /* Walk one character at a time, until we reach the match. */
    1327             : 
    1328             :         /* the search should never move backwards. */
    1329             :         Assert(state->refpoint <= matchptr);
    1330             : 
    1331           0 :         while (state->refpoint < matchptr)
    1332             :         {
    1333             :             /* step to next character. */
    1334           0 :             state->refpoint += pg_mblen(state->refpoint);
    1335           0 :             state->refpos++;
    1336             : 
    1337             :             /*
    1338             :              * If we stepped over the match's start position, then it was a
    1339             :              * false positive, where the byte sequence appeared in the middle
    1340             :              * of a multi-byte character.  Skip it, and continue the search at
    1341             :              * the next character boundary.
    1342             :              */
    1343           0 :             if (state->refpoint > matchptr)
    1344             :             {
    1345           0 :                 start_ptr = state->refpoint;
    1346           0 :                 goto retry;
    1347             :             }
    1348             :         }
    1349             :     }
    1350             : 
    1351        4720 :     state->last_match = matchptr;
    1352        4720 :     return true;
    1353             : }
    1354             : 
    1355             : /*
    1356             :  * Subroutine of text_position_next().  This searches for the raw byte
    1357             :  * sequence, ignoring any multi-byte encoding issues.  Returns the first
    1358             :  * match starting at 'start_ptr', or NULL if no match is found.
    1359             :  */
    1360             : static char *
    1361        6264 : text_position_next_internal(char *start_ptr, TextPositionState *state)
    1362             : {
    1363        6264 :     int         haystack_len = state->len1;
    1364        6264 :     int         needle_len = state->len2;
    1365        6264 :     int         skiptablemask = state->skiptablemask;
    1366        6264 :     const char *haystack = state->str1;
    1367        6264 :     const char *needle = state->str2;
    1368        6264 :     const char *haystack_end = &haystack[haystack_len];
    1369             :     const char *hptr;
    1370             : 
    1371             :     Assert(start_ptr >= haystack && start_ptr <= haystack_end);
    1372             : 
    1373        6264 :     if (needle_len == 1)
    1374             :     {
    1375             :         /* No point in using B-M-H for a one-character needle */
    1376         338 :         char        nchar = *needle;
    1377             : 
    1378         338 :         hptr = start_ptr;
    1379        3126 :         while (hptr < haystack_end)
    1380             :         {
    1381        3058 :             if (*hptr == nchar)
    1382         270 :                 return (char *) hptr;
    1383        2788 :             hptr++;
    1384             :         }
    1385             :     }
    1386             :     else
    1387             :     {
    1388        5926 :         const char *needle_last = &needle[needle_len - 1];
    1389             : 
    1390             :         /* Start at startpos plus the length of the needle */
    1391        5926 :         hptr = start_ptr + needle_len - 1;
    1392      152684 :         while (hptr < haystack_end)
    1393             :         {
    1394             :             /* Match the needle scanning *backward* */
    1395             :             const char *nptr;
    1396             :             const char *p;
    1397             : 
    1398      151208 :             nptr = needle_last;
    1399      151208 :             p = hptr;
    1400      215732 :             while (*nptr == *p)
    1401             :             {
    1402             :                 /* Matched it all?  If so, return 1-based position */
    1403       68974 :                 if (nptr == needle)
    1404        4450 :                     return (char *) p;
    1405       64524 :                 nptr--, p--;
    1406             :             }
    1407             : 
    1408             :             /*
    1409             :              * No match, so use the haystack char at hptr to decide how far to
    1410             :              * advance.  If the needle had any occurrence of that character
    1411             :              * (or more precisely, one sharing the same skiptable entry)
    1412             :              * before its last character, then we advance far enough to align
    1413             :              * the last such needle character with that haystack position.
    1414             :              * Otherwise we can advance by the whole needle length.
    1415             :              */
    1416      146758 :             hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
    1417             :         }
    1418             :     }
    1419             : 
    1420        1544 :     return 0;                   /* not found */
    1421             : }
    1422             : 
    1423             : /*
    1424             :  * Return a pointer to the current match.
    1425             :  *
    1426             :  * The returned pointer points into correct position in the original
    1427             :  * the haystack string.
    1428             :  */
    1429             : static char *
    1430        4678 : text_position_get_match_ptr(TextPositionState *state)
    1431             : {
    1432        4678 :     return state->last_match;
    1433             : }
    1434             : 
    1435             : /*
    1436             :  * Return the offset of the current match.
    1437             :  *
    1438             :  * The offset is in characters, 1-based.
    1439             :  */
    1440             : static int
    1441          42 : text_position_get_match_pos(TextPositionState *state)
    1442             : {
    1443          42 :     if (!state->is_multibyte)
    1444           0 :         return state->last_match - state->str1 + 1;
    1445             :     else
    1446             :     {
    1447             :         /* Convert the byte position to char position. */
    1448         102 :         while (state->refpoint < state->last_match)
    1449             :         {
    1450          60 :             state->refpoint += pg_mblen(state->refpoint);
    1451          60 :             state->refpos++;
    1452             :         }
    1453             :         Assert(state->refpoint == state->last_match);
    1454          42 :         return state->refpos + 1;
    1455             :     }
    1456             : }
    1457             : 
    1458             : static void
    1459        1594 : text_position_cleanup(TextPositionState *state)
    1460             : {
    1461             :     /* no cleanup needed */
    1462        1594 : }
    1463             : 
    1464             : static void
    1465     9160808 : check_collation_set(Oid collid)
    1466             : {
    1467     9160808 :     if (!OidIsValid(collid))
    1468             :     {
    1469             :         /*
    1470             :          * This typically means that the parser could not resolve a conflict
    1471             :          * of implicit collations, so report it that way.
    1472             :          */
    1473           8 :         ereport(ERROR,
    1474             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1475             :                  errmsg("could not determine which collation to use for string comparison"),
    1476             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
    1477             :     }
    1478     9160800 : }
    1479             : 
    1480             : /* varstr_cmp()
    1481             :  * Comparison function for text strings with given lengths.
    1482             :  * Includes locale support, but must copy strings to temporary memory
    1483             :  *  to allow null-termination for inputs to strcoll().
    1484             :  * Returns an integer less than, equal to, or greater than zero, indicating
    1485             :  * whether arg1 is less than, equal to, or greater than arg2.
    1486             :  *
    1487             :  * Note: many functions that depend on this are marked leakproof; therefore,
    1488             :  * avoid reporting the actual contents of the input when throwing errors.
    1489             :  * All errors herein should be things that can't happen except on corrupt
    1490             :  * data, anyway; otherwise we will have trouble with indexing strings that
    1491             :  * would cause them.
    1492             :  */
    1493             : int
    1494     6650648 : varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
    1495             : {
    1496             :     int         result;
    1497             : 
    1498     6650648 :     check_collation_set(collid);
    1499             : 
    1500             :     /*
    1501             :      * Unfortunately, there is no strncoll(), so in the non-C locale case we
    1502             :      * have to do some memory copying.  This turns out to be significantly
    1503             :      * slower, so we optimize the case where LC_COLLATE is C.  We also try to
    1504             :      * optimize relatively-short strings by avoiding palloc/pfree overhead.
    1505             :      */
    1506     6650644 :     if (lc_collate_is_c(collid))
    1507             :     {
    1508     2677168 :         result = memcmp(arg1, arg2, Min(len1, len2));
    1509     2677168 :         if ((result == 0) && (len1 != len2))
    1510       62098 :             result = (len1 < len2) ? -1 : 1;
    1511             :     }
    1512             :     else
    1513             :     {
    1514             :         char        a1buf[TEXTBUFLEN];
    1515             :         char        a2buf[TEXTBUFLEN];
    1516             :         char       *a1p,
    1517             :                    *a2p;
    1518     3973476 :         pg_locale_t mylocale = 0;
    1519             : 
    1520     3973476 :         if (collid != DEFAULT_COLLATION_OID)
    1521           0 :             mylocale = pg_newlocale_from_collation(collid);
    1522             : 
    1523             :         /*
    1524             :          * memcmp() can't tell us which of two unequal strings sorts first,
    1525             :          * but it's a cheap way to tell if they're equal.  Testing shows that
    1526             :          * memcmp() followed by strcoll() is only trivially slower than
    1527             :          * strcoll() by itself, so we don't lose much if this doesn't work out
    1528             :          * very often, and if it does - for example, because there are many
    1529             :          * equal strings in the input - then we win big by avoiding expensive
    1530             :          * collation-aware comparisons.
    1531             :          */
    1532     3973476 :         if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
    1533     1815506 :             return 0;
    1534             : 
    1535             : #ifdef WIN32
    1536             :         /* Win32 does not have UTF-8, so we need to map to UTF-16 */
    1537             :         if (GetDatabaseEncoding() == PG_UTF8
    1538             :             && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
    1539             :         {
    1540             :             int         a1len;
    1541             :             int         a2len;
    1542             :             int         r;
    1543             : 
    1544             :             if (len1 >= TEXTBUFLEN / 2)
    1545             :             {
    1546             :                 a1len = len1 * 2 + 2;
    1547             :                 a1p = palloc(a1len);
    1548             :             }
    1549             :             else
    1550             :             {
    1551             :                 a1len = TEXTBUFLEN;
    1552             :                 a1p = a1buf;
    1553             :             }
    1554             :             if (len2 >= TEXTBUFLEN / 2)
    1555             :             {
    1556             :                 a2len = len2 * 2 + 2;
    1557             :                 a2p = palloc(a2len);
    1558             :             }
    1559             :             else
    1560             :             {
    1561             :                 a2len = TEXTBUFLEN;
    1562             :                 a2p = a2buf;
    1563             :             }
    1564             : 
    1565             :             /* stupid Microsloth API does not work for zero-length input */
    1566             :             if (len1 == 0)
    1567             :                 r = 0;
    1568             :             else
    1569             :             {
    1570             :                 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1571             :                                         (LPWSTR) a1p, a1len / 2);
    1572             :                 if (!r)
    1573             :                     ereport(ERROR,
    1574             :                             (errmsg("could not convert string to UTF-16: error code %lu",
    1575             :                                     GetLastError())));
    1576             :             }
    1577             :             ((LPWSTR) a1p)[r] = 0;
    1578             : 
    1579             :             if (len2 == 0)
    1580             :                 r = 0;
    1581             :             else
    1582             :             {
    1583             :                 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1584             :                                         (LPWSTR) a2p, a2len / 2);
    1585             :                 if (!r)
    1586             :                     ereport(ERROR,
    1587             :                             (errmsg("could not convert string to UTF-16: error code %lu",
    1588             :                                     GetLastError())));
    1589             :             }
    1590             :             ((LPWSTR) a2p)[r] = 0;
    1591             : 
    1592             :             errno = 0;
    1593             : #ifdef HAVE_LOCALE_T
    1594             :             if (mylocale)
    1595             :                 result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
    1596             :             else
    1597             : #endif
    1598             :                 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
    1599             :             if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw
    1600             :                                          * headers */
    1601             :                 ereport(ERROR,
    1602             :                         (errmsg("could not compare Unicode strings: %m")));
    1603             : 
    1604             :             /* Break tie if necessary. */
    1605             :             if (result == 0 &&
    1606             :                 (!mylocale || mylocale->deterministic))
    1607             :             {
    1608             :                 result = memcmp(arg1, arg2, Min(len1, len2));
    1609             :                 if ((result == 0) && (len1 != len2))
    1610             :                     result = (len1 < len2) ? -1 : 1;
    1611             :             }
    1612             : 
    1613             :             if (a1p != a1buf)
    1614             :                 pfree(a1p);
    1615             :             if (a2p != a2buf)
    1616             :                 pfree(a2p);
    1617             : 
    1618             :             return result;
    1619             :         }
    1620             : #endif                          /* WIN32 */
    1621             : 
    1622     2157970 :         if (len1 >= TEXTBUFLEN)
    1623         200 :             a1p = (char *) palloc(len1 + 1);
    1624             :         else
    1625     2157770 :             a1p = a1buf;
    1626     2157970 :         if (len2 >= TEXTBUFLEN)
    1627          72 :             a2p = (char *) palloc(len2 + 1);
    1628             :         else
    1629     2157898 :             a2p = a2buf;
    1630             : 
    1631     2157970 :         memcpy(a1p, arg1, len1);
    1632     2157970 :         a1p[len1] = '\0';
    1633     2157970 :         memcpy(a2p, arg2, len2);
    1634     2157970 :         a2p[len2] = '\0';
    1635             : 
    1636     2157970 :         if (mylocale)
    1637             :         {
    1638           0 :             if (mylocale->provider == COLLPROVIDER_ICU)
    1639             :             {
    1640             : #ifdef USE_ICU
    1641             : #ifdef HAVE_UCOL_STRCOLLUTF8
    1642             :                 if (GetDatabaseEncoding() == PG_UTF8)
    1643             :                 {
    1644             :                     UErrorCode  status;
    1645             : 
    1646             :                     status = U_ZERO_ERROR;
    1647             :                     result = ucol_strcollUTF8(mylocale->info.icu.ucol,
    1648             :                                               arg1, len1,
    1649             :                                               arg2, len2,
    1650             :                                               &status);
    1651             :                     if (U_FAILURE(status))
    1652             :                         ereport(ERROR,
    1653             :                                 (errmsg("collation failed: %s", u_errorName(status))));
    1654             :                 }
    1655             :                 else
    1656             : #endif
    1657             :                 {
    1658             :                     int32_t     ulen1,
    1659             :                                 ulen2;
    1660             :                     UChar      *uchar1,
    1661             :                                *uchar2;
    1662             : 
    1663             :                     ulen1 = icu_to_uchar(&uchar1, arg1, len1);
    1664             :                     ulen2 = icu_to_uchar(&uchar2, arg2, len2);
    1665             : 
    1666             :                     result = ucol_strcoll(mylocale->info.icu.ucol,
    1667             :                                           uchar1, ulen1,
    1668             :                                           uchar2, ulen2);
    1669             : 
    1670             :                     pfree(uchar1);
    1671             :                     pfree(uchar2);
    1672             :                 }
    1673             : #else                           /* not USE_ICU */
    1674             :                 /* shouldn't happen */
    1675           0 :                 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
    1676             : #endif                          /* not USE_ICU */
    1677             :             }
    1678             :             else
    1679             :             {
    1680             : #ifdef HAVE_LOCALE_T
    1681           0 :                 result = strcoll_l(a1p, a2p, mylocale->info.lt);
    1682             : #else
    1683             :                 /* shouldn't happen */
    1684             :                 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
    1685             : #endif
    1686             :             }
    1687             :         }
    1688             :         else
    1689     2157970 :             result = strcoll(a1p, a2p);
    1690             : 
    1691             :         /* Break tie if necessary. */
    1692     2157970 :         if (result == 0 &&
    1693           0 :             (!mylocale || mylocale->deterministic))
    1694           0 :             result = strcmp(a1p, a2p);
    1695             : 
    1696     2157970 :         if (a1p != a1buf)
    1697         200 :             pfree(a1p);
    1698     2157970 :         if (a2p != a2buf)
    1699          72 :             pfree(a2p);
    1700             :     }
    1701             : 
    1702     4835138 :     return result;
    1703             : }
    1704             : 
    1705             : /* text_cmp()
    1706             :  * Internal comparison function for text strings.
    1707             :  * Returns -1, 0 or 1
    1708             :  */
    1709             : static int
    1710     5540786 : text_cmp(text *arg1, text *arg2, Oid collid)
    1711             : {
    1712             :     char       *a1p,
    1713             :                *a2p;
    1714             :     int         len1,
    1715             :                 len2;
    1716             : 
    1717     5540786 :     a1p = VARDATA_ANY(arg1);
    1718     5540786 :     a2p = VARDATA_ANY(arg2);
    1719             : 
    1720     5540786 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1721     5540786 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1722             : 
    1723     5540786 :     return varstr_cmp(a1p, len1, a2p, len2, collid);
    1724             : }
    1725             : 
    1726             : /*
    1727             :  * Comparison functions for text strings.
    1728             :  *
    1729             :  * Note: btree indexes need these routines not to leak memory; therefore,
    1730             :  * be careful to free working copies of toasted datums.  Most places don't
    1731             :  * need to be so careful.
    1732             :  */
    1733             : 
    1734             : Datum
    1735     2256508 : texteq(PG_FUNCTION_ARGS)
    1736             : {
    1737     2256508 :     Oid         collid = PG_GET_COLLATION();
    1738             :     bool        result;
    1739             : 
    1740     2256508 :     check_collation_set(collid);
    1741             : 
    1742     2256508 :     if (lc_collate_is_c(collid) ||
    1743           0 :         collid == DEFAULT_COLLATION_OID ||
    1744           0 :         pg_newlocale_from_collation(collid)->deterministic)
    1745     2256508 :     {
    1746     2256508 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1747     2256508 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1748             :         Size        len1,
    1749             :                     len2;
    1750             : 
    1751             :         /*
    1752             :          * Since we only care about equality or not-equality, we can avoid all
    1753             :          * the expense of strcoll() here, and just do bitwise comparison.  In
    1754             :          * fact, we don't even have to do a bitwise comparison if we can show
    1755             :          * the lengths of the strings are unequal; which might save us from
    1756             :          * having to detoast one or both values.
    1757             :          */
    1758     2256508 :         len1 = toast_raw_datum_size(arg1);
    1759     2256508 :         len2 = toast_raw_datum_size(arg2);
    1760     2256508 :         if (len1 != len2)
    1761      610458 :             result = false;
    1762             :         else
    1763             :         {
    1764     1646050 :             text       *targ1 = DatumGetTextPP(arg1);
    1765     1646050 :             text       *targ2 = DatumGetTextPP(arg2);
    1766             : 
    1767     1646050 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1768             :                              len1 - VARHDRSZ) == 0);
    1769             : 
    1770     1646050 :             PG_FREE_IF_COPY(targ1, 0);
    1771     1646050 :             PG_FREE_IF_COPY(targ2, 1);
    1772             :         }
    1773             :     }
    1774             :     else
    1775             :     {
    1776           0 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1777           0 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1778             : 
    1779           0 :         result = (text_cmp(arg1, arg2, collid) == 0);
    1780             : 
    1781           0 :         PG_FREE_IF_COPY(arg1, 0);
    1782           0 :         PG_FREE_IF_COPY(arg2, 1);
    1783             :     }
    1784             : 
    1785     2256508 :     PG_RETURN_BOOL(result);
    1786             : }
    1787             : 
    1788             : Datum
    1789       11916 : textne(PG_FUNCTION_ARGS)
    1790             : {
    1791       11916 :     Oid         collid = PG_GET_COLLATION();
    1792             :     bool        result;
    1793             : 
    1794       11916 :     check_collation_set(collid);
    1795             : 
    1796       11916 :     if (lc_collate_is_c(collid) ||
    1797           0 :         collid == DEFAULT_COLLATION_OID ||
    1798           0 :         pg_newlocale_from_collation(collid)->deterministic)
    1799       11916 :     {
    1800       11916 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1801       11916 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1802             :         Size        len1,
    1803             :                     len2;
    1804             : 
    1805             :         /* See comment in texteq() */
    1806       11916 :         len1 = toast_raw_datum_size(arg1);
    1807       11916 :         len2 = toast_raw_datum_size(arg2);
    1808       11916 :         if (len1 != len2)
    1809         560 :             result = true;
    1810             :         else
    1811             :         {
    1812       11356 :             text       *targ1 = DatumGetTextPP(arg1);
    1813       11356 :             text       *targ2 = DatumGetTextPP(arg2);
    1814             : 
    1815       11356 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1816             :                              len1 - VARHDRSZ) != 0);
    1817             : 
    1818       11356 :             PG_FREE_IF_COPY(targ1, 0);
    1819       11356 :             PG_FREE_IF_COPY(targ2, 1);
    1820             :         }
    1821             :     }
    1822             :     else
    1823             :     {
    1824           0 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1825           0 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1826             : 
    1827           0 :         result = (text_cmp(arg1, arg2, collid) != 0);
    1828             : 
    1829           0 :         PG_FREE_IF_COPY(arg1, 0);
    1830           0 :         PG_FREE_IF_COPY(arg2, 1);
    1831             :     }
    1832             : 
    1833       11916 :     PG_RETURN_BOOL(result);
    1834             : }
    1835             : 
    1836             : Datum
    1837       88888 : text_lt(PG_FUNCTION_ARGS)
    1838             : {
    1839       88888 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1840       88888 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1841             :     bool        result;
    1842             : 
    1843       88888 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
    1844             : 
    1845       88884 :     PG_FREE_IF_COPY(arg1, 0);
    1846       88884 :     PG_FREE_IF_COPY(arg2, 1);
    1847             : 
    1848       88884 :     PG_RETURN_BOOL(result);
    1849             : }
    1850             : 
    1851             : Datum
    1852      141304 : text_le(PG_FUNCTION_ARGS)
    1853             : {
    1854      141304 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1855      141304 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1856             :     bool        result;
    1857             : 
    1858      141304 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
    1859             : 
    1860      141304 :     PG_FREE_IF_COPY(arg1, 0);
    1861      141304 :     PG_FREE_IF_COPY(arg2, 1);
    1862             : 
    1863      141304 :     PG_RETURN_BOOL(result);
    1864             : }
    1865             : 
    1866             : Datum
    1867       57902 : text_gt(PG_FUNCTION_ARGS)
    1868             : {
    1869       57902 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1870       57902 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1871             :     bool        result;
    1872             : 
    1873       57902 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
    1874             : 
    1875       57902 :     PG_FREE_IF_COPY(arg1, 0);
    1876       57902 :     PG_FREE_IF_COPY(arg2, 1);
    1877             : 
    1878       57902 :     PG_RETURN_BOOL(result);
    1879             : }
    1880             : 
    1881             : Datum
    1882       84276 : text_ge(PG_FUNCTION_ARGS)
    1883             : {
    1884       84276 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1885       84276 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1886             :     bool        result;
    1887             : 
    1888       84276 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
    1889             : 
    1890       84276 :     PG_FREE_IF_COPY(arg1, 0);
    1891       84276 :     PG_FREE_IF_COPY(arg2, 1);
    1892             : 
    1893       84276 :     PG_RETURN_BOOL(result);
    1894             : }
    1895             : 
    1896             : Datum
    1897       25132 : text_starts_with(PG_FUNCTION_ARGS)
    1898             : {
    1899       25132 :     Datum       arg1 = PG_GETARG_DATUM(0);
    1900       25132 :     Datum       arg2 = PG_GETARG_DATUM(1);
    1901       25132 :     Oid         collid = PG_GET_COLLATION();
    1902       25132 :     pg_locale_t mylocale = 0;
    1903             :     bool        result;
    1904             :     Size        len1,
    1905             :                 len2;
    1906             : 
    1907       25132 :     check_collation_set(collid);
    1908             : 
    1909       25132 :     if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
    1910           0 :         mylocale = pg_newlocale_from_collation(collid);
    1911             : 
    1912       25132 :     if (mylocale && !mylocale->deterministic)
    1913           0 :         ereport(ERROR,
    1914             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1915             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1916             : 
    1917       25132 :     len1 = toast_raw_datum_size(arg1);
    1918       25132 :     len2 = toast_raw_datum_size(arg2);
    1919       25132 :     if (len2 > len1)
    1920           0 :         result = false;
    1921             :     else
    1922             :     {
    1923       25132 :         text       *targ1 = text_substring(arg1, 1, len2, false);
    1924       25132 :         text       *targ2 = DatumGetTextPP(arg2);
    1925             : 
    1926       25132 :         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1927       25132 :                          VARSIZE_ANY_EXHDR(targ2)) == 0);
    1928             : 
    1929       25132 :         PG_FREE_IF_COPY(targ1, 0);
    1930       25132 :         PG_FREE_IF_COPY(targ2, 1);
    1931             :     }
    1932             : 
    1933       25132 :     PG_RETURN_BOOL(result);
    1934             : }
    1935             : 
    1936             : Datum
    1937     4989950 : bttextcmp(PG_FUNCTION_ARGS)
    1938             : {
    1939     4989950 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1940     4989950 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1941             :     int32       result;
    1942             : 
    1943     4989950 :     result = text_cmp(arg1, arg2, PG_GET_COLLATION());
    1944             : 
    1945     4989950 :     PG_FREE_IF_COPY(arg1, 0);
    1946     4989950 :     PG_FREE_IF_COPY(arg2, 1);
    1947             : 
    1948     4989950 :     PG_RETURN_INT32(result);
    1949             : }
    1950             : 
    1951             : Datum
    1952       49124 : bttextsortsupport(PG_FUNCTION_ARGS)
    1953             : {
    1954       49124 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    1955       49124 :     Oid         collid = ssup->ssup_collation;
    1956             :     MemoryContext oldcontext;
    1957             : 
    1958       49124 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    1959             : 
    1960             :     /* Use generic string SortSupport */
    1961       49124 :     varstr_sortsupport(ssup, TEXTOID, collid);
    1962             : 
    1963       49120 :     MemoryContextSwitchTo(oldcontext);
    1964             : 
    1965       49120 :     PG_RETURN_VOID();
    1966             : }
    1967             : 
    1968             : /*
    1969             :  * Generic sortsupport interface for character type's operator classes.
    1970             :  * Includes locale support, and support for BpChar semantics (i.e. removing
    1971             :  * trailing spaces before comparison).
    1972             :  *
    1973             :  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
    1974             :  * same representation.  Callers that always use the C collation (e.g.
    1975             :  * non-collatable type callers like bytea) may have NUL bytes in their strings;
    1976             :  * this will not work with any other collation, though.
    1977             :  */
    1978             : void
    1979       98366 : varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
    1980             : {
    1981       98366 :     bool        abbreviate = ssup->abbreviate;
    1982       98366 :     bool        collate_c = false;
    1983             :     VarStringSortSupport *sss;
    1984       98366 :     pg_locale_t locale = 0;
    1985             : 
    1986       98366 :     check_collation_set(collid);
    1987             : 
    1988             :     /*
    1989             :      * If possible, set ssup->comparator to a function which can be used to
    1990             :      * directly compare two datums.  If we can do this, we'll avoid the
    1991             :      * overhead of a trip through the fmgr layer for every comparison, which
    1992             :      * can be substantial.
    1993             :      *
    1994             :      * Most typically, we'll set the comparator to varlenafastcmp_locale,
    1995             :      * which uses strcoll() to perform comparisons.  We use that for the
    1996             :      * BpChar case too, but type NAME uses namefastcmp_locale. However, if
    1997             :      * LC_COLLATE = C, we can make things quite a bit faster with
    1998             :      * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
    1999             :      * memcmp() rather than strcoll().
    2000             :      */
    2001       98362 :     if (lc_collate_is_c(collid))
    2002             :     {
    2003       67930 :         if (typid == BPCHAROID)
    2004          16 :             ssup->comparator = bpcharfastcmp_c;
    2005       67914 :         else if (typid == NAMEOID)
    2006             :         {
    2007       48284 :             ssup->comparator = namefastcmp_c;
    2008             :             /* Not supporting abbreviation with type NAME, for now */
    2009       48284 :             abbreviate = false;
    2010             :         }
    2011             :         else
    2012       19630 :             ssup->comparator = varstrfastcmp_c;
    2013             : 
    2014       67930 :         collate_c = true;
    2015             :     }
    2016             :     else
    2017             :     {
    2018             :         /*
    2019             :          * We need a collation-sensitive comparison.  To make things faster,
    2020             :          * we'll figure out the collation based on the locale id and cache the
    2021             :          * result.
    2022             :          */
    2023       30432 :         if (collid != DEFAULT_COLLATION_OID)
    2024           0 :             locale = pg_newlocale_from_collation(collid);
    2025             : 
    2026             :         /*
    2027             :          * There is a further exception on Windows.  When the database
    2028             :          * encoding is UTF-8 and we are not using the C collation, complex
    2029             :          * hacks are required.  We don't currently have a comparator that
    2030             :          * handles that case, so we fall back on the slow method of having the
    2031             :          * sort code invoke bttextcmp() (in the case of text) via the fmgr
    2032             :          * trampoline.  ICU locales work just the same on Windows, however.
    2033             :          */
    2034             : #ifdef WIN32
    2035             :         if (GetDatabaseEncoding() == PG_UTF8 &&
    2036             :             !(locale && locale->provider == COLLPROVIDER_ICU))
    2037             :             return;
    2038             : #endif
    2039             : 
    2040             :         /*
    2041             :          * We use varlenafastcmp_locale except for type NAME.
    2042             :          */
    2043       30432 :         if (typid == NAMEOID)
    2044             :         {
    2045           0 :             ssup->comparator = namefastcmp_locale;
    2046             :             /* Not supporting abbreviation with type NAME, for now */
    2047           0 :             abbreviate = false;
    2048             :         }
    2049             :         else
    2050       30432 :             ssup->comparator = varlenafastcmp_locale;
    2051             :     }
    2052             : 
    2053             :     /*
    2054             :      * Unfortunately, it seems that abbreviation for non-C collations is
    2055             :      * broken on many common platforms; testing of multiple versions of glibc
    2056             :      * reveals that, for many locales, strcoll() and strxfrm() do not return
    2057             :      * consistent results, which is fatal to this optimization.  While no
    2058             :      * other libc other than Cygwin has so far been shown to have a problem,
    2059             :      * we take the conservative course of action for right now and disable
    2060             :      * this categorically.  (Users who are certain this isn't a problem on
    2061             :      * their system can define TRUST_STRXFRM.)
    2062             :      *
    2063             :      * Even apart from the risk of broken locales, it's possible that there
    2064             :      * are platforms where the use of abbreviated keys should be disabled at
    2065             :      * compile time.  Having only 4 byte datums could make worst-case
    2066             :      * performance drastically more likely, for example.  Moreover, macOS's
    2067             :      * strxfrm() implementation is known to not effectively concentrate a
    2068             :      * significant amount of entropy from the original string in earlier
    2069             :      * transformed blobs.  It's possible that other supported platforms are
    2070             :      * similarly encumbered.  So, if we ever get past disabling this
    2071             :      * categorically, we may still want or need to disable it for particular
    2072             :      * platforms.
    2073             :      */
    2074             : #ifndef TRUST_STRXFRM
    2075       98362 :     if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
    2076       30432 :         abbreviate = false;
    2077             : #endif
    2078             : 
    2079             :     /*
    2080             :      * If we're using abbreviated keys, or if we're using a locale-aware
    2081             :      * comparison, we need to initialize a VarStringSortSupport object. Both
    2082             :      * cases will make use of the temporary buffers we initialize here for
    2083             :      * scratch space (and to detect requirement for BpChar semantics from
    2084             :      * caller), and the abbreviation case requires additional state.
    2085             :      */
    2086       98362 :     if (abbreviate || !collate_c)
    2087             :     {
    2088       31790 :         sss = palloc(sizeof(VarStringSortSupport));
    2089       31790 :         sss->buf1 = palloc(TEXTBUFLEN);
    2090       31790 :         sss->buflen1 = TEXTBUFLEN;
    2091       31790 :         sss->buf2 = palloc(TEXTBUFLEN);
    2092       31790 :         sss->buflen2 = TEXTBUFLEN;
    2093             :         /* Start with invalid values */
    2094       31790 :         sss->last_len1 = -1;
    2095       31790 :         sss->last_len2 = -1;
    2096             :         /* Initialize */
    2097       31790 :         sss->last_returned = 0;
    2098       31790 :         sss->locale = locale;
    2099             : 
    2100             :         /*
    2101             :          * To avoid somehow confusing a strxfrm() blob and an original string,
    2102             :          * constantly keep track of the variety of data that buf1 and buf2
    2103             :          * currently contain.
    2104             :          *
    2105             :          * Comparisons may be interleaved with conversion calls.  Frequently,
    2106             :          * conversions and comparisons are batched into two distinct phases,
    2107             :          * but the correctness of caching cannot hinge upon this.  For
    2108             :          * comparison caching, buffer state is only trusted if cache_blob is
    2109             :          * found set to false, whereas strxfrm() caching only trusts the state
    2110             :          * when cache_blob is found set to true.
    2111             :          *
    2112             :          * Arbitrarily initialize cache_blob to true.
    2113             :          */
    2114       31790 :         sss->cache_blob = true;
    2115       31790 :         sss->collate_c = collate_c;
    2116       31790 :         sss->typid = typid;
    2117       31790 :         ssup->ssup_extra = sss;
    2118             : 
    2119             :         /*
    2120             :          * If possible, plan to use the abbreviated keys optimization.  The
    2121             :          * core code may switch back to authoritative comparator should
    2122             :          * abbreviation be aborted.
    2123             :          */
    2124       31790 :         if (abbreviate)
    2125             :         {
    2126        1358 :             sss->prop_card = 0.20;
    2127        1358 :             initHyperLogLog(&sss->abbr_card, 10);
    2128        1358 :             initHyperLogLog(&sss->full_card, 10);
    2129        1358 :             ssup->abbrev_full_comparator = ssup->comparator;
    2130        1358 :             ssup->comparator = varstrcmp_abbrev;
    2131        1358 :             ssup->abbrev_converter = varstr_abbrev_convert;
    2132        1358 :             ssup->abbrev_abort = varstr_abbrev_abort;
    2133             :         }
    2134             :     }
    2135       98362 : }
    2136             : 
    2137             : /*
    2138             :  * sortsupport comparison func (for C locale case)
    2139             :  */
    2140             : static int
    2141    58491054 : varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2142             : {
    2143    58491054 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2144    58491054 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2145             :     char       *a1p,
    2146             :                *a2p;
    2147             :     int         len1,
    2148             :                 len2,
    2149             :                 result;
    2150             : 
    2151    58491054 :     a1p = VARDATA_ANY(arg1);
    2152    58491054 :     a2p = VARDATA_ANY(arg2);
    2153             : 
    2154    58491054 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2155    58491054 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2156             : 
    2157    58491054 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2158    58491054 :     if ((result == 0) && (len1 != len2))
    2159     1062544 :         result = (len1 < len2) ? -1 : 1;
    2160             : 
    2161             :     /* We can't afford to leak memory here. */
    2162    58491054 :     if (PointerGetDatum(arg1) != x)
    2163           0 :         pfree(arg1);
    2164    58491054 :     if (PointerGetDatum(arg2) != y)
    2165           0 :         pfree(arg2);
    2166             : 
    2167    58491054 :     return result;
    2168             : }
    2169             : 
    2170             : /*
    2171             :  * sortsupport comparison func (for BpChar C locale case)
    2172             :  *
    2173             :  * BpChar outsources its sortsupport to this module.  Specialization for the
    2174             :  * varstr_sortsupport BpChar case, modeled on
    2175             :  * internal_bpchar_pattern_compare().
    2176             :  */
    2177             : static int
    2178          16 : bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2179             : {
    2180          16 :     BpChar     *arg1 = DatumGetBpCharPP(x);
    2181          16 :     BpChar     *arg2 = DatumGetBpCharPP(y);
    2182             :     char       *a1p,
    2183             :                *a2p;
    2184             :     int         len1,
    2185             :                 len2,
    2186             :                 result;
    2187             : 
    2188          16 :     a1p = VARDATA_ANY(arg1);
    2189          16 :     a2p = VARDATA_ANY(arg2);
    2190             : 
    2191          16 :     len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
    2192          16 :     len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
    2193             : 
    2194          16 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2195          16 :     if ((result == 0) && (len1 != len2))
    2196           0 :         result = (len1 < len2) ? -1 : 1;
    2197             : 
    2198             :     /* We can't afford to leak memory here. */
    2199          16 :     if (PointerGetDatum(arg1) != x)
    2200           0 :         pfree(arg1);
    2201          16 :     if (PointerGetDatum(arg2) != y)
    2202           0 :         pfree(arg2);
    2203             : 
    2204          16 :     return result;
    2205             : }
    2206             : 
    2207             : /*
    2208             :  * sortsupport comparison func (for NAME C locale case)
    2209             :  */
    2210             : static int
    2211    71723734 : namefastcmp_c(Datum x, Datum y, SortSupport ssup)
    2212             : {
    2213    71723734 :     Name        arg1 = DatumGetName(x);
    2214    71723734 :     Name        arg2 = DatumGetName(y);
    2215             : 
    2216    71723734 :     return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
    2217             : }
    2218             : 
    2219             : /*
    2220             :  * sortsupport comparison func (for locale case with all varlena types)
    2221             :  */
    2222             : static int
    2223    29702256 : varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2224             : {
    2225    29702256 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2226    29702256 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2227             :     char       *a1p,
    2228             :                *a2p;
    2229             :     int         len1,
    2230             :                 len2,
    2231             :                 result;
    2232             : 
    2233    29702256 :     a1p = VARDATA_ANY(arg1);
    2234    29702256 :     a2p = VARDATA_ANY(arg2);
    2235             : 
    2236    29702256 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2237    29702256 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2238             : 
    2239    29702256 :     result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
    2240             : 
    2241             :     /* We can't afford to leak memory here. */
    2242    29702256 :     if (PointerGetDatum(arg1) != x)
    2243           0 :         pfree(arg1);
    2244    29702256 :     if (PointerGetDatum(arg2) != y)
    2245           0 :         pfree(arg2);
    2246             : 
    2247    29702256 :     return result;
    2248             : }
    2249             : 
    2250             : /*
    2251             :  * sortsupport comparison func (for locale case with NAME type)
    2252             :  */
    2253             : static int
    2254           0 : namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2255             : {
    2256           0 :     Name        arg1 = DatumGetName(x);
    2257           0 :     Name        arg2 = DatumGetName(y);
    2258             : 
    2259           0 :     return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
    2260           0 :                                 NameStr(*arg2), strlen(NameStr(*arg2)),
    2261             :                                 ssup);
    2262             : }
    2263             : 
    2264             : /*
    2265             :  * sortsupport comparison func for locale cases
    2266             :  */
    2267             : static int
    2268    29702256 : varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
    2269             : {
    2270    29702256 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2271             :     int         result;
    2272             :     bool        arg1_match;
    2273             : 
    2274             :     /* Fast pre-check for equality, as discussed in varstr_cmp() */
    2275    29702256 :     if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
    2276             :     {
    2277             :         /*
    2278             :          * No change in buf1 or buf2 contents, so avoid changing last_len1 or
    2279             :          * last_len2.  Existing contents of buffers might still be used by
    2280             :          * next call.
    2281             :          *
    2282             :          * It's fine to allow the comparison of BpChar padding bytes here,
    2283             :          * even though that implies that the memcmp() will usually be
    2284             :          * performed for BpChar callers (though multibyte characters could
    2285             :          * still prevent that from occurring).  The memcmp() is still very
    2286             :          * cheap, and BpChar's funny semantics have us remove trailing spaces
    2287             :          * (not limited to padding), so we need make no distinction between
    2288             :          * padding space characters and "real" space characters.
    2289             :          */
    2290    11164404 :         return 0;
    2291             :     }
    2292             : 
    2293    18537852 :     if (sss->typid == BPCHAROID)
    2294             :     {
    2295             :         /* Get true number of bytes, ignoring trailing spaces */
    2296       56158 :         len1 = bpchartruelen(a1p, len1);
    2297       56158 :         len2 = bpchartruelen(a2p, len2);
    2298             :     }
    2299             : 
    2300    18537852 :     if (len1 >= sss->buflen1)
    2301             :     {
    2302           0 :         pfree(sss->buf1);
    2303           0 :         sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2304           0 :         sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
    2305             :     }
    2306    18537852 :     if (len2 >= sss->buflen2)
    2307             :     {
    2308           0 :         pfree(sss->buf2);
    2309           0 :         sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
    2310           0 :         sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
    2311             :     }
    2312             : 
    2313             :     /*
    2314             :      * We're likely to be asked to compare the same strings repeatedly, and
    2315             :      * memcmp() is so much cheaper than strcoll() that it pays to try to cache
    2316             :      * comparisons, even though in general there is no reason to think that
    2317             :      * that will work out (every string datum may be unique).  Caching does
    2318             :      * not slow things down measurably when it doesn't work out, and can speed
    2319             :      * things up by rather a lot when it does.  In part, this is because the
    2320             :      * memcmp() compares data from cachelines that are needed in L1 cache even
    2321             :      * when the last comparison's result cannot be reused.
    2322             :      */
    2323    18537852 :     arg1_match = true;
    2324    18537852 :     if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
    2325             :     {
    2326    16333116 :         arg1_match = false;
    2327    16333116 :         memcpy(sss->buf1, a1p, len1);
    2328    16333116 :         sss->buf1[len1] = '\0';
    2329    16333116 :         sss->last_len1 = len1;
    2330             :     }
    2331             : 
    2332             :     /*
    2333             :      * If we're comparing the same two strings as last time, we can return the
    2334             :      * same answer without calling strcoll() again.  This is more likely than
    2335             :      * it seems (at least with moderate to low cardinality sets), because
    2336             :      * quicksort compares the same pivot against many values.
    2337             :      */
    2338    18537852 :     if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
    2339             :     {
    2340     2985602 :         memcpy(sss->buf2, a2p, len2);
    2341     2985602 :         sss->buf2[len2] = '\0';
    2342     2985602 :         sss->last_len2 = len2;
    2343             :     }
    2344    15552250 :     else if (arg1_match && !sss->cache_blob)
    2345             :     {
    2346             :         /* Use result cached following last actual strcoll() call */
    2347     1912586 :         return sss->last_returned;
    2348             :     }
    2349             : 
    2350    16625266 :     if (sss->locale)
    2351             :     {
    2352           0 :         if (sss->locale->provider == COLLPROVIDER_ICU)
    2353             :         {
    2354             : #ifdef USE_ICU
    2355             : #ifdef HAVE_UCOL_STRCOLLUTF8
    2356             :             if (GetDatabaseEncoding() == PG_UTF8)
    2357             :             {
    2358             :                 UErrorCode  status;
    2359             : 
    2360             :                 status = U_ZERO_ERROR;
    2361             :                 result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
    2362             :                                           a1p, len1,
    2363             :                                           a2p, len2,
    2364             :                                           &status);
    2365             :                 if (U_FAILURE(status))
    2366             :                     ereport(ERROR,
    2367             :                             (errmsg("collation failed: %s", u_errorName(status))));
    2368             :             }
    2369             :             else
    2370             : #endif
    2371             :             {
    2372             :                 int32_t     ulen1,
    2373             :                             ulen2;
    2374             :                 UChar      *uchar1,
    2375             :                            *uchar2;
    2376             : 
    2377             :                 ulen1 = icu_to_uchar(&uchar1, a1p, len1);
    2378             :                 ulen2 = icu_to_uchar(&uchar2, a2p, len2);
    2379             : 
    2380             :                 result = ucol_strcoll(sss->locale->info.icu.ucol,
    2381             :                                       uchar1, ulen1,
    2382             :                                       uchar2, ulen2);
    2383             : 
    2384             :                 pfree(uchar1);
    2385             :                 pfree(uchar2);
    2386             :             }
    2387             : #else                           /* not USE_ICU */
    2388             :             /* shouldn't happen */
    2389           0 :             elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
    2390             : #endif                          /* not USE_ICU */
    2391             :         }
    2392             :         else
    2393             :         {
    2394             : #ifdef HAVE_LOCALE_T
    2395           0 :             result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
    2396             : #else
    2397             :             /* shouldn't happen */
    2398             :             elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
    2399             : #endif
    2400             :         }
    2401             :     }
    2402             :     else
    2403    16625266 :         result = strcoll(sss->buf1, sss->buf2);
    2404             : 
    2405             :     /* Break tie if necessary. */
    2406    16625266 :     if (result == 0 &&
    2407           0 :         (!sss->locale || sss->locale->deterministic))
    2408           0 :         result = strcmp(sss->buf1, sss->buf2);
    2409             : 
    2410             :     /* Cache result, perhaps saving an expensive strcoll() call next time */
    2411    16625266 :     sss->cache_blob = false;
    2412    16625266 :     sss->last_returned = result;
    2413    16625266 :     return result;
    2414             : }
    2415             : 
    2416             : /*
    2417             :  * Abbreviated key comparison func
    2418             :  */
    2419             : static int
    2420     3413778 : varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
    2421             : {
    2422             :     /*
    2423             :      * When 0 is returned, the core system will call varstrfastcmp_c()
    2424             :      * (bpcharfastcmp_c() in BpChar case) or varlenafastcmp_locale().  Even a
    2425             :      * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
    2426             :      * authoritatively, for the same reason that there is a strcoll()
    2427             :      * tie-breaker call to strcmp() in varstr_cmp().
    2428             :      */
    2429     3413778 :     if (x > y)
    2430     1480162 :         return 1;
    2431     1933616 :     else if (x == y)
    2432      490804 :         return 0;
    2433             :     else
    2434     1442812 :         return -1;
    2435             : }
    2436             : 
    2437             : /*
    2438             :  * Conversion routine for sortsupport.  Converts original to abbreviated key
    2439             :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
    2440             :  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
    2441             :  * stored in reverse order), and treat it as an unsigned integer.  When the "C"
    2442             :  * locale is used, or in case of bytea, just memcpy() from original instead.
    2443             :  */
    2444             : static Datum
    2445      346100 : varstr_abbrev_convert(Datum original, SortSupport ssup)
    2446             : {
    2447      346100 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2448      346100 :     VarString  *authoritative = DatumGetVarStringPP(original);
    2449      346100 :     char       *authoritative_data = VARDATA_ANY(authoritative);
    2450             : 
    2451             :     /* working state */
    2452             :     Datum       res;
    2453             :     char       *pres;
    2454             :     int         len;
    2455             :     uint32      hash;
    2456             : 
    2457      346100 :     pres = (char *) &res;
    2458             :     /* memset(), so any non-overwritten bytes are NUL */
    2459      346100 :     memset(pres, 0, sizeof(Datum));
    2460      346100 :     len = VARSIZE_ANY_EXHDR(authoritative);
    2461             : 
    2462             :     /* Get number of bytes, ignoring trailing spaces */
    2463      346100 :     if (sss->typid == BPCHAROID)
    2464           0 :         len = bpchartruelen(authoritative_data, len);
    2465             : 
    2466             :     /*
    2467             :      * If we're using the C collation, use memcpy(), rather than strxfrm(), to
    2468             :      * abbreviate keys.  The full comparator for the C locale is always
    2469             :      * memcmp().  It would be incorrect to allow bytea callers (callers that
    2470             :      * always force the C collation -- bytea isn't a collatable type, but this
    2471             :      * approach is convenient) to use strxfrm().  This is because bytea
    2472             :      * strings may contain NUL bytes.  Besides, this should be faster, too.
    2473             :      *
    2474             :      * More generally, it's okay that bytea callers can have NUL bytes in
    2475             :      * strings because varstrcmp_abbrev() need not make a distinction between
    2476             :      * terminating NUL bytes, and NUL bytes representing actual NULs in the
    2477             :      * authoritative representation.  Hopefully a comparison at or past one
    2478             :      * abbreviated key's terminating NUL byte will resolve the comparison
    2479             :      * without consulting the authoritative representation; specifically, some
    2480             :      * later non-NUL byte in the longer string can resolve the comparison
    2481             :      * against a subsequent terminating NUL in the shorter string.  There will
    2482             :      * usually be what is effectively a "length-wise" resolution there and
    2483             :      * then.
    2484             :      *
    2485             :      * If that doesn't work out -- if all bytes in the longer string
    2486             :      * positioned at or past the offset of the smaller string's (first)
    2487             :      * terminating NUL are actually representative of NUL bytes in the
    2488             :      * authoritative binary string (perhaps with some *terminating* NUL bytes
    2489             :      * towards the end of the longer string iff it happens to still be small)
    2490             :      * -- then an authoritative tie-breaker will happen, and do the right
    2491             :      * thing: explicitly consider string length.
    2492             :      */
    2493      346100 :     if (sss->collate_c)
    2494      346100 :         memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
    2495             :     else
    2496             :     {
    2497             :         Size        bsize;
    2498             : #ifdef USE_ICU
    2499             :         int32_t     ulen = -1;
    2500             :         UChar      *uchar = NULL;
    2501             : #endif
    2502             : 
    2503             :         /*
    2504             :          * We're not using the C collation, so fall back on strxfrm or ICU
    2505             :          * analogs.
    2506             :          */
    2507             : 
    2508             :         /* By convention, we use buffer 1 to store and NUL-terminate */
    2509           0 :         if (len >= sss->buflen1)
    2510             :         {
    2511           0 :             pfree(sss->buf1);
    2512           0 :             sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2513           0 :             sss->buf1 = palloc(sss->buflen1);
    2514             :         }
    2515             : 
    2516             :         /* Might be able to reuse strxfrm() blob from last call */
    2517           0 :         if (sss->last_len1 == len && sss->cache_blob &&
    2518           0 :             memcmp(sss->buf1, authoritative_data, len) == 0)
    2519             :         {
    2520           0 :             memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
    2521             :             /* No change affecting cardinality, so no hashing required */
    2522           0 :             goto done;
    2523             :         }
    2524             : 
    2525           0 :         memcpy(sss->buf1, authoritative_data, len);
    2526             : 
    2527             :         /*
    2528             :          * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
    2529             :          * necessary for ICU, but doesn't hurt.
    2530             :          */
    2531           0 :         sss->buf1[len] = '\0';
    2532           0 :         sss->last_len1 = len;
    2533             : 
    2534             : #ifdef USE_ICU
    2535             :         /* When using ICU and not UTF8, convert string to UChar. */
    2536             :         if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
    2537             :             GetDatabaseEncoding() != PG_UTF8)
    2538             :             ulen = icu_to_uchar(&uchar, sss->buf1, len);
    2539             : #endif
    2540             : 
    2541             :         /*
    2542             :          * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
    2543             :          * and try again.  Both of these functions have the result buffer
    2544             :          * content undefined if the result did not fit, so we need to retry
    2545             :          * until everything fits, even though we only need the first few bytes
    2546             :          * in the end.  When using ucol_nextSortKeyPart(), however, we only
    2547             :          * ask for as many bytes as we actually need.
    2548             :          */
    2549             :         for (;;)
    2550             :         {
    2551             : #ifdef USE_ICU
    2552             :             if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
    2553             :             {
    2554             :                 /*
    2555             :                  * When using UTF8, use the iteration interface so we only
    2556             :                  * need to produce as many bytes as we actually need.
    2557             :                  */
    2558             :                 if (GetDatabaseEncoding() == PG_UTF8)
    2559             :                 {
    2560             :                     UCharIterator iter;
    2561             :                     uint32_t    state[2];
    2562             :                     UErrorCode  status;
    2563             : 
    2564             :                     uiter_setUTF8(&iter, sss->buf1, len);
    2565             :                     state[0] = state[1] = 0;    /* won't need that again */
    2566             :                     status = U_ZERO_ERROR;
    2567             :                     bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
    2568             :                                                  &iter,
    2569             :                                                  state,
    2570             :                                                  (uint8_t *) sss->buf2,
    2571             :                                                  Min(sizeof(Datum), sss->buflen2),
    2572             :                                                  &status);
    2573             :                     if (U_FAILURE(status))
    2574             :                         ereport(ERROR,
    2575             :                                 (errmsg("sort key generation failed: %s",
    2576             :                                         u_errorName(status))));
    2577             :                 }
    2578             :                 else
    2579             :                     bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
    2580             :                                             uchar, ulen,
    2581             :                                             (uint8_t *) sss->buf2, sss->buflen2);
    2582             :             }
    2583             :             else
    2584             : #endif
    2585             : #ifdef HAVE_LOCALE_T
    2586           0 :             if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
    2587           0 :                 bsize = strxfrm_l(sss->buf2, sss->buf1,
    2588           0 :                                   sss->buflen2, sss->locale->info.lt);
    2589             :             else
    2590             : #endif
    2591           0 :                 bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
    2592             : 
    2593           0 :             sss->last_len2 = bsize;
    2594           0 :             if (bsize < sss->buflen2)
    2595           0 :                 break;
    2596             : 
    2597             :             /*
    2598             :              * Grow buffer and retry.
    2599             :              */
    2600           0 :             pfree(sss->buf2);
    2601           0 :             sss->buflen2 = Max(bsize + 1,
    2602             :                                Min(sss->buflen2 * 2, MaxAllocSize));
    2603           0 :             sss->buf2 = palloc(sss->buflen2);
    2604             :         }
    2605             : 
    2606             :         /*
    2607             :          * Every Datum byte is always compared.  This is safe because the
    2608             :          * strxfrm() blob is itself NUL terminated, leaving no danger of
    2609             :          * misinterpreting any NUL bytes not intended to be interpreted as
    2610             :          * logically representing termination.
    2611             :          *
    2612             :          * (Actually, even if there were NUL bytes in the blob it would be
    2613             :          * okay.  See remarks on bytea case above.)
    2614             :          */
    2615           0 :         memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
    2616             : 
    2617             : #ifdef USE_ICU
    2618             :         if (uchar)
    2619             :             pfree(uchar);
    2620             : #endif
    2621             :     }
    2622             : 
    2623             :     /*
    2624             :      * Maintain approximate cardinality of both abbreviated keys and original,
    2625             :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
    2626             :      * the worst case, where we do many string transformations for no saving
    2627             :      * in full strcoll()-based comparisons.  These statistics are used by
    2628             :      * varstr_abbrev_abort().
    2629             :      *
    2630             :      * First, Hash key proper, or a significant fraction of it.  Mix in length
    2631             :      * in order to compensate for cases where differences are past
    2632             :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
    2633             :      */
    2634      346100 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
    2635             :                                    Min(len, PG_CACHE_LINE_SIZE)));
    2636             : 
    2637      346100 :     if (len > PG_CACHE_LINE_SIZE)
    2638           8 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
    2639             : 
    2640      346100 :     addHyperLogLog(&sss->full_card, hash);
    2641             : 
    2642             :     /* Hash abbreviated key */
    2643             : #if SIZEOF_DATUM == 8
    2644             :     {
    2645             :         uint32      lohalf,
    2646             :                     hihalf;
    2647             : 
    2648      346100 :         lohalf = (uint32) res;
    2649      346100 :         hihalf = (uint32) (res >> 32);
    2650      346100 :         hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
    2651             :     }
    2652             : #else                           /* SIZEOF_DATUM != 8 */
    2653             :     hash = DatumGetUInt32(hash_uint32((uint32) res));
    2654             : #endif
    2655             : 
    2656      346100 :     addHyperLogLog(&sss->abbr_card, hash);
    2657             : 
    2658             :     /* Cache result, perhaps saving an expensive strxfrm() call next time */
    2659      346100 :     sss->cache_blob = true;
    2660      346100 : done:
    2661             : 
    2662             :     /*
    2663             :      * Byteswap on little-endian machines.
    2664             :      *
    2665             :      * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
    2666             :      * comparator) works correctly on all platforms.  If we didn't do this,
    2667             :      * the comparator would have to call memcmp() with a pair of pointers to
    2668             :      * the first byte of each abbreviated key, which is slower.
    2669             :      */
    2670      346100 :     res = DatumBigEndianToNative(res);
    2671             : 
    2672             :     /* Don't leak memory here */
    2673      346100 :     if (PointerGetDatum(authoritative) != original)
    2674           0 :         pfree(authoritative);
    2675             : 
    2676      346100 :     return res;
    2677             : }
    2678             : 
    2679             : /*
    2680             :  * Callback for estimating effectiveness of abbreviated key optimization, using
    2681             :  * heuristic rules.  Returns value indicating if the abbreviation optimization
    2682             :  * should be aborted, based on its projected effectiveness.
    2683             :  */
    2684             : static bool
    2685         962 : varstr_abbrev_abort(int memtupcount, SortSupport ssup)
    2686             : {
    2687         962 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2688             :     double      abbrev_distinct,
    2689             :                 key_distinct;
    2690             : 
    2691             :     Assert(ssup->abbreviate);
    2692             : 
    2693             :     /* Have a little patience */
    2694         962 :     if (memtupcount < 100)
    2695         462 :         return false;
    2696             : 
    2697         500 :     abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
    2698         500 :     key_distinct = estimateHyperLogLog(&sss->full_card);
    2699             : 
    2700             :     /*
    2701             :      * Clamp cardinality estimates to at least one distinct value.  While
    2702             :      * NULLs are generally disregarded, if only NULL values were seen so far,
    2703             :      * that might misrepresent costs if we failed to clamp.
    2704             :      */
    2705         500 :     if (abbrev_distinct <= 1.0)
    2706           0 :         abbrev_distinct = 1.0;
    2707             : 
    2708         500 :     if (key_distinct <= 1.0)
    2709           0 :         key_distinct = 1.0;
    2710             : 
    2711             :     /*
    2712             :      * In the worst case all abbreviated keys are identical, while at the same
    2713             :      * time there are differences within full key strings not captured in
    2714             :      * abbreviations.
    2715             :      */
    2716             : #ifdef TRACE_SORT
    2717         500 :     if (trace_sort)
    2718             :     {
    2719           0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
    2720             : 
    2721           0 :         elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
    2722             :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
    2723             :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
    2724             :              sss->prop_card);
    2725             :     }
    2726             : #endif
    2727             : 
    2728             :     /*
    2729             :      * If the number of distinct abbreviated keys approximately matches the
    2730             :      * number of distinct authoritative original keys, that's reason enough to
    2731             :      * proceed.  We can win even with a very low cardinality set if most
    2732             :      * tie-breakers only memcmp().  This is by far the most important
    2733             :      * consideration.
    2734             :      *
    2735             :      * While comparisons that are resolved at the abbreviated key level are
    2736             :      * considerably cheaper than tie-breakers resolved with memcmp(), both of
    2737             :      * those two outcomes are so much cheaper than a full strcoll() once
    2738             :      * sorting is underway that it doesn't seem worth it to weigh abbreviated
    2739             :      * cardinality against the overall size of the set in order to more
    2740             :      * accurately model costs.  Assume that an abbreviated comparison, and an
    2741             :      * abbreviated comparison with a cheap memcmp()-based authoritative
    2742             :      * resolution are equivalent.
    2743             :      */
    2744         500 :     if (abbrev_distinct > key_distinct * sss->prop_card)
    2745             :     {
    2746             :         /*
    2747             :          * When we have exceeded 10,000 tuples, decay required cardinality
    2748             :          * aggressively for next call.
    2749             :          *
    2750             :          * This is useful because the number of comparisons required on
    2751             :          * average increases at a linearithmic rate, and at roughly 10,000
    2752             :          * tuples that factor will start to dominate over the linear costs of
    2753             :          * string transformation (this is a conservative estimate).  The decay
    2754             :          * rate is chosen to be a little less aggressive than halving -- which
    2755             :          * (since we're called at points at which memtupcount has doubled)
    2756             :          * would never see the cost model actually abort past the first call
    2757             :          * following a decay.  This decay rate is mostly a precaution against
    2758             :          * a sudden, violent swing in how well abbreviated cardinality tracks
    2759             :          * full key cardinality.  The decay also serves to prevent a marginal
    2760             :          * case from being aborted too late, when too much has already been
    2761             :          * invested in string transformation.
    2762             :          *
    2763             :          * It's possible for sets of several million distinct strings with
    2764             :          * mere tens of thousands of distinct abbreviated keys to still
    2765             :          * benefit very significantly.  This will generally occur provided
    2766             :          * each abbreviated key is a proxy for a roughly uniform number of the
    2767             :          * set's full keys. If it isn't so, we hope to catch that early and
    2768             :          * abort.  If it isn't caught early, by the time the problem is
    2769             :          * apparent it's probably not worth aborting.
    2770             :          */
    2771         500 :         if (memtupcount > 10000)
    2772           0 :             sss->prop_card *= 0.65;
    2773             : 
    2774         500 :         return false;
    2775             :     }
    2776             : 
    2777             :     /*
    2778             :      * Abort abbreviation strategy.
    2779             :      *
    2780             :      * The worst case, where all abbreviated keys are identical while all
    2781             :      * original strings differ will typically only see a regression of about
    2782             :      * 10% in execution time for small to medium sized lists of strings.
    2783             :      * Whereas on modern CPUs where cache stalls are the dominant cost, we can
    2784             :      * often expect very large improvements, particularly with sets of strings
    2785             :      * of moderately high to high abbreviated cardinality.  There is little to
    2786             :      * lose but much to gain, which our strategy reflects.
    2787             :      */
    2788             : #ifdef TRACE_SORT
    2789           0 :     if (trace_sort)
    2790           0 :         elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
    2791             :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
    2792             :              memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
    2793             : #endif
    2794             : 
    2795           0 :     return true;
    2796             : }
    2797             : 
    2798             : /*
    2799             :  * Generic equalimage support function for character type's operator classes.
    2800             :  * Disables the use of deduplication with nondeterministic collations.
    2801             :  */
    2802             : Datum
    2803        1860 : btvarstrequalimage(PG_FUNCTION_ARGS)
    2804             : {
    2805             :     /* Oid      opcintype = PG_GETARG_OID(0); */
    2806        1860 :     Oid         collid = PG_GET_COLLATION();
    2807             : 
    2808        1860 :     check_collation_set(collid);
    2809             : 
    2810        1860 :     if (lc_collate_is_c(collid) ||
    2811           0 :         collid == DEFAULT_COLLATION_OID ||
    2812           0 :         get_collation_isdeterministic(collid))
    2813        1860 :         PG_RETURN_BOOL(true);
    2814             :     else
    2815           0 :         PG_RETURN_BOOL(false);
    2816             : }
    2817             : 
    2818             : Datum
    2819      137394 : text_larger(PG_FUNCTION_ARGS)
    2820             : {
    2821      137394 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2822      137394 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2823             :     text       *result;
    2824             : 
    2825      137394 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
    2826             : 
    2827      137394 :     PG_RETURN_TEXT_P(result);
    2828             : }
    2829             : 
    2830             : Datum
    2831       41072 : text_smaller(PG_FUNCTION_ARGS)
    2832             : {
    2833       41072 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2834       41072 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2835             :     text       *result;
    2836             : 
    2837       41072 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
    2838             : 
    2839       41072 :     PG_RETURN_TEXT_P(result);
    2840             : }
    2841             : 
    2842             : 
    2843             : /*
    2844             :  * Cross-type comparison functions for types text and name.
    2845             :  */
    2846             : 
    2847             : Datum
    2848      114528 : nameeqtext(PG_FUNCTION_ARGS)
    2849             : {
    2850      114528 :     Name        arg1 = PG_GETARG_NAME(0);
    2851      114528 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2852      114528 :     size_t      len1 = strlen(NameStr(*arg1));
    2853      114528 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2854      114528 :     Oid         collid = PG_GET_COLLATION();
    2855             :     bool        result;
    2856             : 
    2857      114528 :     check_collation_set(collid);
    2858             : 
    2859      114528 :     if (collid == C_COLLATION_OID)
    2860      201636 :         result = (len1 == len2 &&
    2861       87108 :                   memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2862             :     else
    2863           0 :         result = (varstr_cmp(NameStr(*arg1), len1,
    2864           0 :                              VARDATA_ANY(arg2), len2,
    2865             :                              collid) == 0);
    2866             : 
    2867      114528 :     PG_FREE_IF_COPY(arg2, 1);
    2868             : 
    2869      114528 :     PG_RETURN_BOOL(result);
    2870             : }
    2871             : 
    2872             : Datum
    2873         256 : texteqname(PG_FUNCTION_ARGS)
    2874             : {
    2875         256 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2876         256 :     Name        arg2 = PG_GETARG_NAME(1);
    2877         256 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2878         256 :     size_t      len2 = strlen(NameStr(*arg2));
    2879         256 :     Oid         collid = PG_GET_COLLATION();
    2880             :     bool        result;
    2881             : 
    2882         256 :     check_collation_set(collid);
    2883             : 
    2884         256 :     if (collid == C_COLLATION_OID)
    2885         376 :         result = (len1 == len2 &&
    2886         120 :                   memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2887             :     else
    2888           0 :         result = (varstr_cmp(VARDATA_ANY(arg1), len1,
    2889           0 :                              NameStr(*arg2), len2,
    2890             :                              collid) == 0);
    2891             : 
    2892         256 :     PG_FREE_IF_COPY(arg1, 0);
    2893             : 
    2894         256 :     PG_RETURN_BOOL(result);
    2895             : }
    2896             : 
    2897             : Datum
    2898           0 : namenetext(PG_FUNCTION_ARGS)
    2899             : {
    2900           0 :     Name        arg1 = PG_GETARG_NAME(0);
    2901           0 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2902           0 :     size_t      len1 = strlen(NameStr(*arg1));
    2903           0 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2904           0 :     Oid         collid = PG_GET_COLLATION();
    2905             :     bool        result;
    2906             : 
    2907           0 :     check_collation_set(collid);
    2908             : 
    2909           0 :     if (collid == C_COLLATION_OID)
    2910           0 :         result = !(len1 == len2 &&
    2911           0 :                    memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2912             :     else
    2913           0 :         result = !(varstr_cmp(NameStr(*arg1), len1,
    2914           0 :                               VARDATA_ANY(arg2), len2,
    2915             :                               collid) == 0);
    2916             : 
    2917           0 :     PG_FREE_IF_COPY(arg2, 1);
    2918             : 
    2919           0 :     PG_RETURN_BOOL(result);
    2920             : }
    2921             : 
    2922             : Datum
    2923           0 : textnename(PG_FUNCTION_ARGS)
    2924             : {
    2925           0 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2926           0 :     Name        arg2 = PG_GETARG_NAME(1);
    2927           0 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2928           0 :     size_t      len2 = strlen(NameStr(*arg2));
    2929           0 :     Oid         collid = PG_GET_COLLATION();
    2930             :     bool        result;
    2931             : 
    2932           0 :     check_collation_set(collid);
    2933             : 
    2934           0 :     if (collid == C_COLLATION_OID)
    2935           0 :         result = !(len1 == len2 &&
    2936           0 :                    memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2937             :     else
    2938           0 :         result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
    2939           0 :                               NameStr(*arg2), len2,
    2940             :                               collid) == 0);
    2941             : 
    2942           0 :     PG_FREE_IF_COPY(arg1, 0);
    2943             : 
    2944           0 :     PG_RETURN_BOOL(result);
    2945             : }
    2946             : 
    2947             : Datum
    2948       76156 : btnametextcmp(PG_FUNCTION_ARGS)
    2949             : {
    2950       76156 :     Name        arg1 = PG_GETARG_NAME(0);
    2951       76156 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2952             :     int32       result;
    2953             : 
    2954      152312 :     result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
    2955      152312 :                         VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
    2956             :                         PG_GET_COLLATION());
    2957             : 
    2958       76156 :     PG_FREE_IF_COPY(arg2, 1);
    2959             : 
    2960       76156 :     PG_RETURN_INT32(result);
    2961             : }
    2962             : 
    2963             : Datum
    2964           0 : bttextnamecmp(PG_FUNCTION_ARGS)
    2965             : {
    2966           0 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2967           0 :     Name        arg2 = PG_GETARG_NAME(1);
    2968             :     int32       result;
    2969             : 
    2970           0 :     result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
    2971           0 :                         NameStr(*arg2), strlen(NameStr(*arg2)),
    2972             :                         PG_GET_COLLATION());
    2973             : 
    2974           0 :     PG_FREE_IF_COPY(arg1, 0);
    2975             : 
    2976           0 :     PG_RETURN_INT32(result);
    2977             : }
    2978             : 
    2979             : #define CmpCall(cmpfunc) \
    2980             :     DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
    2981             :                                           PG_GET_COLLATION(), \
    2982             :                                           PG_GETARG_DATUM(0), \
    2983             :                                           PG_GETARG_DATUM(1)))
    2984             : 
    2985             : Datum
    2986       24486 : namelttext(PG_FUNCTION_ARGS)
    2987             : {
    2988       24486 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
    2989             : }
    2990             : 
    2991             : Datum
    2992           0 : nameletext(PG_FUNCTION_ARGS)
    2993             : {
    2994           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
    2995             : }
    2996             : 
    2997             : Datum
    2998           0 : namegttext(PG_FUNCTION_ARGS)
    2999             : {
    3000           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
    3001             : }
    3002             : 
    3003             : Datum
    3004       23210 : namegetext(PG_FUNCTION_ARGS)
    3005             : {
    3006       23210 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
    3007             : }
    3008             : 
    3009             : Datum
    3010           0 : textltname(PG_FUNCTION_ARGS)
    3011             : {
    3012           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
    3013             : }
    3014             : 
    3015             : Datum
    3016           0 : textlename(PG_FUNCTION_ARGS)
    3017             : {
    3018           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
    3019             : }
    3020             : 
    3021             : Datum
    3022           0 : textgtname(PG_FUNCTION_ARGS)
    3023             : {
    3024           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
    3025             : }
    3026             : 
    3027             : Datum
    3028           0 : textgename(PG_FUNCTION_ARGS)
    3029             : {
    3030           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
    3031             : }
    3032             : 
    3033             : #undef CmpCall
    3034             : 
    3035             : 
    3036             : /*
    3037             :  * The following operators support character-by-character comparison
    3038             :  * of text datums, to allow building indexes suitable for LIKE clauses.
    3039             :  * Note that the regular texteq/textne comparison operators, and regular
    3040             :  * support functions 1 and 2 with "C" collation are assumed to be
    3041             :  * compatible with these!
    3042             :  */
    3043             : 
    3044             : static int
    3045      100640 : internal_text_pattern_compare(text *arg1, text *arg2)
    3046             : {
    3047             :     int         result;
    3048             :     int         len1,
    3049             :                 len2;
    3050             : 
    3051      100640 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3052      100640 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3053             : 
    3054      100640 :     result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3055      100640 :     if (result != 0)
    3056      100604 :         return result;
    3057          36 :     else if (len1 < len2)
    3058           0 :         return -1;
    3059          36 :     else if (len1 > len2)
    3060          12 :         return 1;
    3061             :     else
    3062          24 :         return 0;
    3063             : }
    3064             : 
    3065             : 
    3066             : Datum
    3067       25608 : text_pattern_lt(PG_FUNCTION_ARGS)
    3068             : {
    3069       25608 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3070       25608 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3071             :     int         result;
    3072             : 
    3073       25608 :     result = internal_text_pattern_compare(arg1, arg2);
    3074             : 
    3075       25608 :     PG_FREE_IF_COPY(arg1, 0);
    3076       25608 :     PG_FREE_IF_COPY(arg2, 1);
    3077             : 
    3078       25608 :     PG_RETURN_BOOL(result < 0);
    3079             : }
    3080             : 
    3081             : 
    3082             : Datum
    3083       25008 : text_pattern_le(PG_FUNCTION_ARGS)
    3084             : {
    3085       25008 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3086       25008 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3087             :     int         result;
    3088             : 
    3089       25008 :     result = internal_text_pattern_compare(arg1, arg2);
    3090             : 
    3091       25008 :     PG_FREE_IF_COPY(arg1, 0);
    3092       25008 :     PG_FREE_IF_COPY(arg2, 1);
    3093             : 
    3094       25008 :     PG_RETURN_BOOL(result <= 0);
    3095             : }
    3096             : 
    3097             : 
    3098             : Datum
    3099       25008 : text_pattern_ge(PG_FUNCTION_ARGS)
    3100             : {
    3101       25008 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3102       25008 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3103             :     int         result;
    3104             : 
    3105       25008 :     result = internal_text_pattern_compare(arg1, arg2);
    3106             : 
    3107       25008 :     PG_FREE_IF_COPY(arg1, 0);
    3108       25008 :     PG_FREE_IF_COPY(arg2, 1);
    3109             : 
    3110       25008 :     PG_RETURN_BOOL(result >= 0);
    3111             : }
    3112             : 
    3113             : 
    3114             : Datum
    3115       25008 : text_pattern_gt(PG_FUNCTION_ARGS)
    3116             : {
    3117       25008 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3118       25008 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3119             :     int         result;
    3120             : 
    3121       25008 :     result = internal_text_pattern_compare(arg1, arg2);
    3122             : 
    3123       25008 :     PG_FREE_IF_COPY(arg1, 0);
    3124       25008 :     PG_FREE_IF_COPY(arg2, 1);
    3125             : 
    3126       25008 :     PG_RETURN_BOOL(result > 0);
    3127             : }
    3128             : 
    3129             : 
    3130             : Datum
    3131           8 : bttext_pattern_cmp(PG_FUNCTION_ARGS)
    3132             : {
    3133           8 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3134           8 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3135             :     int         result;
    3136             : 
    3137           8 :     result = internal_text_pattern_compare(arg1, arg2);
    3138             : 
    3139           8 :     PG_FREE_IF_COPY(arg1, 0);
    3140           8 :     PG_FREE_IF_COPY(arg2, 1);
    3141             : 
    3142           8 :     PG_RETURN_INT32(result);
    3143             : }
    3144             : 
    3145             : 
    3146             : Datum
    3147          78 : bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
    3148             : {
    3149          78 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    3150             :     MemoryContext oldcontext;
    3151             : 
    3152          78 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    3153             : 
    3154             :     /* Use generic string SortSupport, forcing "C" collation */
    3155          78 :     varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
    3156             : 
    3157          78 :     MemoryContextSwitchTo(oldcontext);
    3158             : 
    3159          78 :     PG_RETURN_VOID();
    3160             : }
    3161             : 
    3162             : 
    3163             : /*-------------------------------------------------------------
    3164             :  * byteaoctetlen
    3165             :  *
    3166             :  * get the number of bytes contained in an instance of type 'bytea'
    3167             :  *-------------------------------------------------------------
    3168             :  */
    3169             : Datum
    3170          22 : byteaoctetlen(PG_FUNCTION_ARGS)
    3171             : {
    3172          22 :     Datum       str = PG_GETARG_DATUM(0);
    3173             : 
    3174             :     /* We need not detoast the input at all */
    3175          22 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
    3176             : }
    3177             : 
    3178             : /*
    3179             :  * byteacat -
    3180             :  *    takes two bytea* and returns a bytea* that is the concatenation of
    3181             :  *    the two.
    3182             :  *
    3183             :  * Cloned from textcat and modified as required.
    3184             :  */
    3185             : Datum
    3186           0 : byteacat(PG_FUNCTION_ARGS)
    3187             : {
    3188           0 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3189           0 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3190             : 
    3191           0 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
    3192             : }
    3193             : 
    3194             : /*
    3195             :  * bytea_catenate
    3196             :  *  Guts of byteacat(), broken out so it can be used by other functions
    3197             :  *
    3198             :  * Arguments can be in short-header form, but not compressed or out-of-line
    3199             :  */
    3200             : static bytea *
    3201          24 : bytea_catenate(bytea *t1, bytea *t2)
    3202             : {
    3203             :     bytea      *result;
    3204             :     int         len1,
    3205             :                 len2,
    3206             :                 len;
    3207             :     char       *ptr;
    3208             : 
    3209          24 :     len1 = VARSIZE_ANY_EXHDR(t1);
    3210          24 :     len2 = VARSIZE_ANY_EXHDR(t2);
    3211             : 
    3212             :     /* paranoia ... probably should throw error instead? */
    3213          24 :     if (len1 < 0)
    3214           0 :         len1 = 0;
    3215          24 :     if (len2 < 0)
    3216           0 :         len2 = 0;
    3217             : 
    3218          24 :     len = len1 + len2 + VARHDRSZ;
    3219          24 :     result = (bytea *) palloc(len);
    3220             : 
    3221             :     /* Set size of result string... */
    3222          24 :     SET_VARSIZE(result, len);
    3223             : 
    3224             :     /* Fill data field of result string... */
    3225          24 :     ptr = VARDATA(result);
    3226          24 :     if (len1 > 0)
    3227          24 :         memcpy(ptr, VARDATA_ANY(t1), len1);
    3228          24 :     if (len2 > 0)
    3229          12 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
    3230             : 
    3231          24 :     return result;
    3232             : }
    3233             : 
    3234             : #define PG_STR_GET_BYTEA(str_) \
    3235             :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
    3236             : 
    3237             : /*
    3238             :  * bytea_substr()
    3239             :  * Return a substring starting at the specified position.
    3240             :  * Cloned from text_substr and modified as required.
    3241             :  *
    3242             :  * Input:
    3243             :  *  - string
    3244             :  *  - starting position (is one-based)
    3245             :  *  - string length (optional)
    3246             :  *
    3247             :  * If the starting position is zero or less, then return from the start of the string
    3248             :  * adjusting the length to be consistent with the "negative start" per SQL.
    3249             :  * If the length is less than zero, an ERROR is thrown. If no third argument
    3250             :  * (length) is provided, the length to the end of the string is assumed.
    3251             :  */
    3252             : Datum
    3253          36 : bytea_substr(PG_FUNCTION_ARGS)
    3254             : {
    3255          36 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    3256             :                                       PG_GETARG_INT32(1),
    3257             :                                       PG_GETARG_INT32(2),
    3258             :                                       false));
    3259             : }
    3260             : 
    3261             : /*
    3262             :  * bytea_substr_no_len -
    3263             :  *    Wrapper to avoid opr_sanity failure due to
    3264             :  *    one function accepting a different number of args.
    3265             :  */
    3266             : Datum
    3267          16 : bytea_substr_no_len(PG_FUNCTION_ARGS)
    3268             : {
    3269          16 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    3270             :                                       PG_GETARG_INT32(1),
    3271             :                                       -1,
    3272             :                                       true));
    3273             : }
    3274             : 
    3275             : static bytea *
    3276          76 : bytea_substring(Datum str,
    3277             :                 int S,
    3278             :                 int L,
    3279             :                 bool length_not_specified)
    3280             : {
    3281             :     int         S1;             /* adjusted start position */
    3282             :     int         L1;             /* adjusted substring length */
    3283             : 
    3284          76 :     S1 = Max(S, 1);
    3285             : 
    3286          76 :     if (length_not_specified)
    3287             :     {
    3288             :         /*
    3289             :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
    3290             :          * end of the string if we pass it a negative value for length.
    3291             :          */
    3292          28 :         L1 = -1;
    3293             :     }
    3294             :     else
    3295             :     {
    3296             :         /* end position */
    3297          48 :         int         E = S + L;
    3298             : 
    3299             :         /*
    3300             :          * A negative value for L is the only way for the end position to be
    3301             :          * before the start. SQL99 says to throw an error.
    3302             :          */
    3303          48 :         if (E < S)
    3304           4 :             ereport(ERROR,
    3305             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
    3306             :                      errmsg("negative substring length not allowed")));
    3307             : 
    3308             :         /*
    3309             :          * A zero or negative value for the end position can happen if the
    3310             :          * start was negative or one. SQL99 says to return a zero-length
    3311             :          * string.
    3312             :          */
    3313          44 :         if (E < 1)
    3314           0 :             return PG_STR_GET_BYTEA("");
    3315             : 
    3316          44 :         L1 = E - S1;
    3317             :     }
    3318             : 
    3319             :     /*
    3320             :      * If the start position is past the end of the string, SQL99 says to
    3321             :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
    3322             :      * us. Convert to zero-based starting position
    3323             :      */
    3324          72 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
    3325             : }
    3326             : 
    3327             : /*
    3328             :  * byteaoverlay
    3329             :  *  Replace specified substring of first string with second
    3330             :  *
    3331             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    3332             :  * This code is a direct implementation of what the standard says.
    3333             :  */
    3334             : Datum
    3335           4 : byteaoverlay(PG_FUNCTION_ARGS)
    3336             : {
    3337           4 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3338           4 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3339           4 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3340           4 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    3341             : 
    3342           4 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3343             : }
    3344             : 
    3345             : Datum
    3346           8 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
    3347             : {
    3348           8 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3349           8 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3350           8 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3351             :     int         sl;
    3352             : 
    3353           8 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
    3354           8 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3355             : }
    3356             : 
    3357             : static bytea *
    3358          12 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
    3359             : {
    3360             :     bytea      *result;
    3361             :     bytea      *s1;
    3362             :     bytea      *s2;
    3363             :     int         sp_pl_sl;
    3364             : 
    3365             :     /*
    3366             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    3367             :      * "substring length" error because that's what should be expected
    3368             :      * according to the spec's definition of OVERLAY().
    3369             :      */
    3370          12 :     if (sp <= 0)
    3371           0 :         ereport(ERROR,
    3372             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    3373             :                  errmsg("negative substring length not allowed")));
    3374          12 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    3375           0 :         ereport(ERROR,
    3376             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    3377             :                  errmsg("integer out of range")));
    3378             : 
    3379          12 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
    3380          12 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    3381          12 :     result = bytea_catenate(s1, t2);
    3382          12 :     result = bytea_catenate(result, s2);
    3383             : 
    3384          12 :     return result;
    3385             : }
    3386             : 
    3387             : /*
    3388             :  * byteapos -
    3389             :  *    Return the position of the specified substring.
    3390             :  *    Implements the SQL POSITION() function.
    3391             :  * Cloned from textpos and modified as required.
    3392             :  */
    3393             : Datum
    3394           0 : byteapos(PG_FUNCTION_ARGS)
    3395             : {
    3396           0 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3397           0 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3398             :     int         pos;
    3399             :     int         px,
    3400             :                 p;
    3401             :     int         len1,
    3402             :                 len2;
    3403             :     char       *p1,
    3404             :                *p2;
    3405             : 
    3406           0 :     len1 = VARSIZE_ANY_EXHDR(t1);
    3407           0 :     len2 = VARSIZE_ANY_EXHDR(t2);
    3408             : 
    3409           0 :     if (len2 <= 0)
    3410           0 :         PG_RETURN_INT32(1);     /* result for empty pattern */
    3411             : 
    3412           0 :     p1 = VARDATA_ANY(t1);
    3413           0 :     p2 = VARDATA_ANY(t2);
    3414             : 
    3415           0 :     pos = 0;
    3416           0 :     px = (len1 - len2);
    3417           0 :     for (p = 0; p <= px; p++)
    3418             :     {
    3419           0 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
    3420             :         {
    3421           0 :             pos = p + 1;
    3422           0 :             break;
    3423             :         };
    3424           0 :         p1++;
    3425             :     };
    3426             : 
    3427           0 :     PG_RETURN_INT32(pos);
    3428             : }
    3429             : 
    3430             : /*-------------------------------------------------------------
    3431             :  * byteaGetByte
    3432             :  *
    3433             :  * this routine treats "bytea" as an array of bytes.
    3434             :  * It returns the Nth byte (a number between 0 and 255).
    3435             :  *-------------------------------------------------------------
    3436             :  */
    3437             : Datum
    3438           8 : byteaGetByte(PG_FUNCTION_ARGS)
    3439             : {
    3440           8 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3441           8 :     int32       n = PG_GETARG_INT32(1);
    3442             :     int         len;
    3443             :     int         byte;
    3444             : 
    3445           8 :     len = VARSIZE_ANY_EXHDR(v);
    3446             : 
    3447           8 :     if (n < 0 || n >= len)
    3448           4 :         ereport(ERROR,
    3449             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3450             :                  errmsg("index %d out of valid range, 0..%d",
    3451             :                         n, len - 1)));
    3452             : 
    3453           4 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
    3454             : 
    3455           4 :     PG_RETURN_INT32(byte);
    3456             : }
    3457             : 
    3458             : /*-------------------------------------------------------------
    3459             :  * byteaGetBit
    3460             :  *
    3461             :  * This routine treats a "bytea" type like an array of bits.
    3462             :  * It returns the value of the Nth bit (0 or 1).
    3463             :  *
    3464             :  *-------------------------------------------------------------
    3465             :  */
    3466             : Datum
    3467           8 : byteaGetBit(PG_FUNCTION_ARGS)
    3468             : {
    3469           8 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3470           8 :     int64       n = PG_GETARG_INT64(1);
    3471             :     int         byteNo,
    3472             :                 bitNo;
    3473             :     int         len;
    3474             :     int         byte;
    3475             : 
    3476           8 :     len = VARSIZE_ANY_EXHDR(v);
    3477             : 
    3478           8 :     if (n < 0 || n >= (int64) len * 8)
    3479           4 :         ereport(ERROR,
    3480             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3481             :                  errmsg("index %lld out of valid range, 0..%lld",
    3482             :                         (long long) n, (long long) len * 8 - 1)));
    3483             : 
    3484             :     /* n/8 is now known < len, so safe to cast to int */
    3485           4 :     byteNo = (int) (n / 8);
    3486           4 :     bitNo = (int) (n % 8);
    3487             : 
    3488           4 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
    3489             : 
    3490           4 :     if (byte & (1 << bitNo))
    3491           4 :         PG_RETURN_INT32(1);
    3492             :     else
    3493           0 :         PG_RETURN_INT32(0);
    3494             : }
    3495             : 
    3496             : /*-------------------------------------------------------------
    3497             :  * byteaSetByte
    3498             :  *
    3499             :  * Given an instance of type 'bytea' creates a new one with
    3500             :  * the Nth byte set to the given value.
    3501             :  *
    3502             :  *-------------------------------------------------------------
    3503             :  */
    3504             : Datum
    3505           8 : byteaSetByte(PG_FUNCTION_ARGS)
    3506             : {
    3507           8 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3508           8 :     int32       n = PG_GETARG_INT32(1);
    3509           8 :     int32       newByte = PG_GETARG_INT32(2);
    3510             :     int         len;
    3511             : 
    3512           8 :     len = VARSIZE(res) - VARHDRSZ;
    3513             : 
    3514           8 :     if (n < 0 || n >= len)
    3515           4 :         ereport(ERROR,
    3516             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3517             :                  errmsg("index %d out of valid range, 0..%d",
    3518             :                         n, len - 1)));
    3519             : 
    3520             :     /*
    3521             :      * Now set the byte.
    3522             :      */
    3523           4 :     ((unsigned char *) VARDATA(res))[n] = newByte;
    3524             : 
    3525           4 :     PG_RETURN_BYTEA_P(res);
    3526             : }
    3527             : 
    3528             : /*-------------------------------------------------------------
    3529             :  * byteaSetBit
    3530             :  *
    3531             :  * Given an instance of type 'bytea' creates a new one with
    3532             :  * the Nth bit set to the given value.
    3533             :  *
    3534             :  *-------------------------------------------------------------
    3535             :  */
    3536             : Datum
    3537           8 : byteaSetBit(PG_FUNCTION_ARGS)
    3538             : {
    3539           8 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3540           8 :     int64       n = PG_GETARG_INT64(1);
    3541           8 :     int32       newBit = PG_GETARG_INT32(2);
    3542             :     int         len;
    3543             :     int         oldByte,
    3544             :                 newByte;
    3545             :     int         byteNo,
    3546             :                 bitNo;
    3547             : 
    3548           8 :     len = VARSIZE(res) - VARHDRSZ;
    3549             : 
    3550           8 :     if (n < 0 || n >= (int64) len * 8)
    3551           4 :         ereport(ERROR,
    3552             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3553             :                  errmsg("index %lld out of valid range, 0..%lld",
    3554             :                         (long long) n, (long long) len * 8 - 1)));
    3555             : 
    3556             :     /* n/8 is now known < len, so safe to cast to int */
    3557           4 :     byteNo = (int) (n / 8);
    3558           4 :     bitNo = (int) (n % 8);
    3559             : 
    3560             :     /*
    3561             :      * sanity check!
    3562             :      */
    3563           4 :     if (newBit != 0 && newBit != 1)
    3564           0 :         ereport(ERROR,
    3565             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    3566             :                  errmsg("new bit must be 0 or 1")));
    3567             : 
    3568             :     /*
    3569             :      * Update the byte.
    3570             :      */
    3571           4 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
    3572             : 
    3573           4 :     if (newBit == 0)
    3574           4 :         newByte = oldByte & (~(1 << bitNo));
    3575             :     else
    3576           0 :         newByte = oldByte | (1 << bitNo);
    3577             : 
    3578           4 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
    3579             : 
    3580           4 :     PG_RETURN_BYTEA_P(res);
    3581             : }
    3582             : 
    3583             : 
    3584             : /* text_name()
    3585             :  * Converts a text type to a Name type.
    3586             :  */
    3587             : Datum
    3588         850 : text_name(PG_FUNCTION_ARGS)
    3589             : {
    3590         850 :     text       *s = PG_GETARG_TEXT_PP(0);
    3591             :     Name        result;
    3592             :     int         len;
    3593             : 
    3594         850 :     len = VARSIZE_ANY_EXHDR(s);
    3595             : 
    3596             :     /* Truncate oversize input */
    3597         850 :     if (len >= NAMEDATALEN)
    3598           4 :         len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
    3599             : 
    3600             :     /* We use palloc0 here to ensure result is zero-padded */
    3601         850 :     result = (Name) palloc0(NAMEDATALEN);
    3602         850 :     memcpy(NameStr(*result), VARDATA_ANY(s), len);
    3603             : 
    3604         850 :     PG_RETURN_NAME(result);
    3605             : }
    3606             : 
    3607             : /* name_text()
    3608             :  * Converts a Name type to a text type.
    3609             :  */
    3610             : Datum
    3611      523972 : name_text(PG_FUNCTION_ARGS)
    3612             : {
    3613      523972 :     Name        s = PG_GETARG_NAME(0);
    3614             : 
    3615      523972 :     PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
    3616             : }
    3617             : 
    3618             : 
    3619             : /*
    3620             :  * textToQualifiedNameList - convert a text object to list of names
    3621             :  *
    3622             :  * This implements the input parsing needed by nextval() and other
    3623             :  * functions that take a text parameter representing a qualified name.
    3624             :  * We split the name at dots, downcase if not double-quoted, and
    3625             :  * truncate names if they're too long.
    3626             :  */
    3627             : List *
    3628         846 : textToQualifiedNameList(text *textval)
    3629             : {
    3630             :     char       *rawname;
    3631         846 :     List       *result = NIL;
    3632             :     List       *namelist;
    3633             :     ListCell   *l;
    3634             : 
    3635             :     /* Convert to C string (handles possible detoasting). */
    3636             :     /* Note we rely on being able to modify rawname below. */
    3637         846 :     rawname = text_to_cstring(textval);
    3638             : 
    3639         846 :     if (!SplitIdentifierString(rawname, '.', &namelist))
    3640           0 :         ereport(ERROR,
    3641             :                 (errcode(ERRCODE_INVALID_NAME),
    3642             :                  errmsg("invalid name syntax")));
    3643             : 
    3644         846 :     if (namelist == NIL)
    3645           0 :         ereport(ERROR,
    3646             :                 (errcode(ERRCODE_INVALID_NAME),
    3647             :                  errmsg("invalid name syntax")));
    3648             : 
    3649        1768 :     foreach(l, namelist)
    3650             :     {
    3651         922 :         char       *curname = (char *) lfirst(l);
    3652             : 
    3653         922 :         result = lappend(result, makeString(pstrdup(curname)));
    3654             :     }
    3655             : 
    3656         846 :     pfree(rawname);
    3657         846 :     list_free(namelist);
    3658             : 
    3659         846 :     return result;
    3660             : }
    3661             : 
    3662             : /*
    3663             :  * SplitIdentifierString --- parse a string containing identifiers
    3664             :  *
    3665             :  * This is the guts of textToQualifiedNameList, and is exported for use in
    3666             :  * other situations such as parsing GUC variables.  In the GUC case, it's
    3667             :  * important to avoid memory leaks, so the API is designed to minimize the
    3668             :  * amount of stuff that needs to be allocated and freed.
    3669             :  *
    3670             :  * Inputs:
    3671             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3672             :  *             been modified to contain the separated identifiers.
    3673             :  *  separator: the separator punctuation expected between identifiers
    3674             :  *             (typically '.' or ',').  Whitespace may also appear around
    3675             :  *             identifiers.
    3676             :  * Outputs:
    3677             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3678             :  *            rawstring.  Caller should list_free() this even on error return.
    3679             :  *
    3680             :  * Returns true if okay, false if there is a syntax error in the string.
    3681             :  *
    3682             :  * Note that an empty string is considered okay here, though not in
    3683             :  * textToQualifiedNameList.
    3684             :  */
    3685             : bool
    3686       76522 : SplitIdentifierString(char *rawstring, char separator,
    3687             :                       List **namelist)
    3688             : {
    3689       76522 :     char       *nextp = rawstring;
    3690       76522 :     bool        done = false;
    3691             : 
    3692       76522 :     *namelist = NIL;
    3693             : 
    3694       76522 :     while (scanner_isspace(*nextp))
    3695           0 :         nextp++;                /* skip leading whitespace */
    3696             : 
    3697       76522 :     if (*nextp == '\0')
    3698        8400 :         return true;            /* allow empty string */
    3699             : 
    3700             :     /* At the top of the loop, we are at start of a new identifier. */
    3701             :     do
    3702             :     {
    3703             :         char       *curname;
    3704             :         char       *endp;
    3705             : 
    3706      108038 :         if (*nextp == '"')
    3707             :         {
    3708             :             /* Quoted name --- collapse quote-quote pairs, no downcasing */
    3709       15546 :             curname = nextp + 1;
    3710             :             for (;;)
    3711             :             {
    3712       15550 :                 endp = strchr(nextp + 1, '"');
    3713       15548 :                 if (endp == NULL)
    3714           0 :                     return false;   /* mismatched quotes */
    3715       15548 :                 if (endp[1] != '"')
    3716       15546 :                     break;      /* found end of quoted name */
    3717             :                 /* Collapse adjacent quotes into one quote, and look again */
    3718           2 :                 memmove(endp, endp + 1, strlen(endp));
    3719           2 :                 nextp = endp;
    3720             :             }
    3721             :             /* endp now points at the terminating quote */
    3722       15546 :             nextp = endp + 1;
    3723             :         }
    3724             :         else
    3725             :         {
    3726             :             /* Unquoted name --- extends to separator or whitespace */
    3727             :             char       *downname;
    3728             :             int         len;
    3729             : 
    3730       92492 :             curname = nextp;
    3731      797922 :             while (*nextp && *nextp != separator &&
    3732      705432 :                    !scanner_isspace(*nextp))
    3733      705430 :                 nextp++;
    3734       92492 :             endp = nextp;
    3735       92492 :             if (curname == nextp)
    3736           0 :                 return false;   /* empty unquoted name not allowed */
    3737             : 
    3738             :             /*
    3739             :              * Downcase the identifier, using same code as main lexer does.
    3740             :              *
    3741             :              * XXX because we want to overwrite the input in-place, we cannot
    3742             :              * support a downcasing transformation that increases the string
    3743             :              * length.  This is not a problem given the current implementation
    3744             :              * of downcase_truncate_identifier, but we'll probably have to do
    3745             :              * something about this someday.
    3746             :              */
    3747       92492 :             len = endp - curname;
    3748       92492 :             downname = downcase_truncate_identifier(curname, len, false);
    3749             :             Assert(strlen(downname) <= len);
    3750       92492 :             strncpy(curname, downname, len);    /* strncpy is required here */
    3751       92492 :             pfree(downname);
    3752             :         }
    3753             : 
    3754      108040 :         while (scanner_isspace(*nextp))
    3755           2 :             nextp++;            /* skip trailing whitespace */
    3756             : 
    3757      108038 :         if (*nextp == separator)
    3758             :         {
    3759       39916 :             nextp++;
    3760       62176 :             while (scanner_isspace(*nextp))
    3761       22260 :                 nextp++;        /* skip leading whitespace for next */
    3762             :             /* we expect another name, so done remains false */
    3763             :         }
    3764       68122 :         else if (*nextp == '\0')
    3765       68120 :             done = true;
    3766             :         else
    3767           2 :             return false;       /* invalid syntax */
    3768             : 
    3769             :         /* Now safe to overwrite separator with a null */
    3770      108036 :         *endp = '\0';
    3771             : 
    3772             :         /* Truncate name if it's overlength */
    3773      108036 :         truncate_identifier(curname, strlen(curname), false);
    3774             : 
    3775             :         /*
    3776             :          * Finished isolating current name --- add it to list
    3777             :          */
    3778      108036 :         *namelist = lappend(*namelist, curname);
    3779             : 
    3780             :         /* Loop back if we didn't reach end of string */
    3781      108036 :     } while (!done);
    3782             : 
    3783       68120 :     return true;
    3784             : }
    3785             : 
    3786             : 
    3787             : /*
    3788             :  * SplitDirectoriesString --- parse a string containing file/directory names
    3789             :  *
    3790             :  * This works fine on file names too; the function name is historical.
    3791             :  *
    3792             :  * This is similar to SplitIdentifierString, except that the parsing
    3793             :  * rules are meant to handle pathnames instead of identifiers: there is
    3794             :  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
    3795             :  * and we apply canonicalize_path() to each extracted string.  Because of the
    3796             :  * last, the returned strings are separately palloc'd rather than being
    3797             :  * pointers into rawstring --- but we still scribble on rawstring.
    3798             :  *
    3799             :  * Inputs:
    3800             :  *  rawstring: the input string; must be modifiable!
    3801             :  *  separator: the separator punctuation expected between directories
    3802             :  *             (typically ',' or ';').  Whitespace may also appear around
    3803             :  *             directories.
    3804             :  * Outputs:
    3805             :  *  namelist: filled with a palloc'd list of directory names.
    3806             :  *            Caller should list_free_deep() this even on error return.
    3807             :  *
    3808             :  * Returns true if okay, false if there is a syntax error in the string.
    3809             :  *
    3810             :  * Note that an empty string is considered okay here.
    3811             :  */
    3812             : bool
    3813         728 : SplitDirectoriesString(char *rawstring, char separator,
    3814             :                        List **namelist)
    3815             : {
    3816         728 :     char       *nextp = rawstring;
    3817         728 :     bool        done = false;
    3818             : 
    3819         728 :     *namelist = NIL;
    3820             : 
    3821         728 :     while (scanner_isspace(*nextp))
    3822           0 :         nextp++;                /* skip leading whitespace */
    3823             : 
    3824         728 :     if (*nextp == '\0')
    3825           2 :         return true;            /* allow empty string */
    3826             : 
    3827             :     /* At the top of the loop, we are at start of a new directory. */
    3828             :     do
    3829             :     {
    3830             :         char       *curname;
    3831             :         char       *endp;
    3832             : 
    3833         726 :         if (*nextp == '"')
    3834             :         {
    3835             :             /* Quoted name --- collapse quote-quote pairs */
    3836           0 :             curname = nextp + 1;
    3837             :             for (;;)
    3838             :             {
    3839           0 :                 endp = strchr(nextp + 1, '"');
    3840           0 :                 if (endp == NULL)
    3841           0 :                     return false;   /* mismatched quotes */
    3842           0 :                 if (endp[1] != '"')
    3843           0 :                     break;      /* found end of quoted name */
    3844             :                 /* Collapse adjacent quotes into one quote, and look again */
    3845           0 :                 memmove(endp, endp + 1, strlen(endp));
    3846           0 :                 nextp = endp;
    3847             :             }
    3848             :             /* endp now points at the terminating quote */
    3849           0 :             nextp = endp + 1;
    3850             :         }
    3851             :         else
    3852             :         {
    3853             :             /* Unquoted name --- extends to separator or end of string */
    3854         726 :             curname = endp = nextp;
    3855       12836 :             while (*nextp && *nextp != separator)
    3856             :             {
    3857             :                 /* trailing whitespace should not be included in name */
    3858       12110 :                 if (!scanner_isspace(*nextp))
    3859       12110 :                     endp = nextp + 1;
    3860       12110 :                 nextp++;
    3861             :             }
    3862         726 :             if (curname == endp)
    3863           0 :                 return false;   /* empty unquoted name not allowed */
    3864             :         }
    3865             : 
    3866         726 :         while (scanner_isspace(*nextp))
    3867           0 :             nextp++;            /* skip trailing whitespace */
    3868             : 
    3869         726 :         if (*nextp == separator)
    3870             :         {
    3871           0 :             nextp++;
    3872           0 :             while (scanner_isspace(*nextp))
    3873           0 :                 nextp++;        /* skip leading whitespace for next */
    3874             :             /* we expect another name, so done remains false */
    3875             :         }
    3876         726 :         else if (*nextp == '\0')
    3877         726 :             done = true;
    3878             :         else
    3879           0 :             return false;       /* invalid syntax */
    3880             : 
    3881             :         /* Now safe to overwrite separator with a null */
    3882         726 :         *endp = '\0';
    3883             : 
    3884             :         /* Truncate path if it's overlength */
    3885         726 :         if (strlen(curname) >= MAXPGPATH)
    3886           0 :             curname[MAXPGPATH - 1] = '\0';
    3887             : 
    3888             :         /*
    3889             :          * Finished isolating current name --- add it to list
    3890             :          */
    3891         726 :         curname = pstrdup(curname);
    3892         726 :         canonicalize_path(curname);
    3893         726 :         *namelist = lappend(*namelist, curname);
    3894             : 
    3895             :         /* Loop back if we didn't reach end of string */
    3896         726 :     } while (!done);
    3897             : 
    3898         726 :     return true;
    3899             : }
    3900             : 
    3901             : 
    3902             : /*
    3903             :  * SplitGUCList --- parse a string containing identifiers or file names
    3904             :  *
    3905             :  * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
    3906             :  * presuming whether the elements will be taken as identifiers or file names.
    3907             :  * We assume the input has already been through flatten_set_variable_args(),
    3908             :  * so that we need never downcase (if appropriate, that was done already).
    3909             :  * Nor do we ever truncate, since we don't know the correct max length.
    3910             :  * We disallow embedded whitespace for simplicity (it shouldn't matter,
    3911             :  * because any embedded whitespace should have led to double-quoting).
    3912             :  * Otherwise the API is identical to SplitIdentifierString.
    3913             :  *
    3914             :  * XXX it's annoying to have so many copies of this string-splitting logic.
    3915             :  * However, it's not clear that having one function with a bunch of option
    3916             :  * flags would be much better.
    3917             :  *
    3918             :  * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
    3919             :  * Be sure to update that if you have to change this.
    3920             :  *
    3921             :  * Inputs:
    3922             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3923             :  *             been modified to contain the separated identifiers.
    3924             :  *  separator: the separator punctuation expected between identifiers
    3925             :  *             (typically '.' or ',').  Whitespace may also appear around
    3926             :  *             identifiers.
    3927             :  * Outputs:
    3928             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3929             :  *            rawstring.  Caller should list_free() this even on error return.
    3930             :  *
    3931             :  * Returns true if okay, false if there is a syntax error in the string.
    3932             :  */
    3933             : bool
    3934         724 : SplitGUCList(char *rawstring, char separator,
    3935             :              List **namelist)
    3936             : {
    3937         724 :     char       *nextp = rawstring;
    3938         724 :     bool        done = false;
    3939             : 
    3940         724 :     *namelist = NIL;
    3941             : 
    3942         724 :     while (scanner_isspace(*nextp))
    3943           0 :         nextp++;                /* skip leading whitespace */
    3944             : 
    3945         724 :     if (*nextp == '\0')
    3946         690 :         return true;            /* allow empty string */
    3947             : 
    3948             :     /* At the top of the loop, we are at start of a new identifier. */
    3949             :     do
    3950             :     {
    3951             :         char       *curname;
    3952             :         char       *endp;
    3953             : 
    3954          46 :         if (*nextp == '"')
    3955             :         {
    3956             :             /* Quoted name --- collapse quote-quote pairs */
    3957          16 :             curname = nextp + 1;
    3958             :             for (;;)
    3959             :             {
    3960          24 :                 endp = strchr(nextp + 1, '"');
    3961          20 :                 if (endp == NULL)
    3962           0 :                     return false;   /* mismatched quotes */
    3963          20 :                 if (endp[1] != '"')
    3964          16 :                     break;      /* found end of quoted name */
    3965             :                 /* Collapse adjacent quotes into one quote, and look again */
    3966           4 :                 memmove(endp, endp + 1, strlen(endp));
    3967           4 :                 nextp = endp;
    3968             :             }
    3969             :             /* endp now points at the terminating quote */
    3970          16 :             nextp = endp + 1;
    3971             :         }
    3972             :         else
    3973             :         {
    3974             :             /* Unquoted name --- extends to separator or whitespace */
    3975          30 :             curname = nextp;
    3976         304 :             while (*nextp && *nextp != separator &&
    3977         274 :                    !scanner_isspace(*nextp))
    3978         274 :                 nextp++;
    3979          30 :             endp = nextp;
    3980          30 :             if (curname == nextp)
    3981           0 :                 return false;   /* empty unquoted name not allowed */
    3982             :         }
    3983             : 
    3984          46 :         while (scanner_isspace(*nextp))
    3985           0 :             nextp++;            /* skip trailing whitespace */
    3986             : 
    3987          46 :         if (*nextp == separator)
    3988             :         {
    3989          12 :             nextp++;
    3990          24 :             while (scanner_isspace(*nextp))
    3991          12 :                 nextp++;        /* skip leading whitespace for next */
    3992             :             /* we expect another name, so done remains false */
    3993             :         }
    3994          34 :         else if (*nextp == '\0')
    3995          34 :             done = true;
    3996             :         else
    3997           0 :             return false;       /* invalid syntax */
    3998             : 
    3999             :         /* Now safe to overwrite separator with a null */
    4000          46 :         *endp = '\0';
    4001             : 
    4002             :         /*
    4003             :          * Finished isolating current name --- add it to list
    4004             :          */
    4005          46 :         *namelist = lappend(*namelist, curname);
    4006             : 
    4007             :         /* Loop back if we didn't reach end of string */
    4008          46 :     } while (!done);
    4009             : 
    4010          34 :     return true;
    4011             : }
    4012             : 
    4013             : 
    4014             : /*****************************************************************************
    4015             :  *  Comparison Functions used for bytea
    4016             :  *
    4017             :  * Note: btree indexes need these routines not to leak memory; therefore,
    4018             :  * be careful to free working copies of toasted datums.  Most places don't
    4019             :  * need to be so careful.
    4020             :  *****************************************************************************/
    4021             : 
    4022             : Datum
    4023        7954 : byteaeq(PG_FUNCTION_ARGS)
    4024             : {
    4025        7954 :     Datum       arg1 = PG_GETARG_DATUM(0);
    4026        7954 :     Datum       arg2 = PG_GETARG_DATUM(1);
    4027             :     bool        result;
    4028             :     Size        len1,
    4029             :                 len2;
    4030             : 
    4031             :     /*
    4032             :      * We can use a fast path for unequal lengths, which might save us from
    4033             :      * having to detoast one or both values.
    4034             :      */
    4035        7954 :     len1 = toast_raw_datum_size(arg1);
    4036        7954 :     len2 = toast_raw_datum_size(arg2);
    4037        7954 :     if (len1 != len2)
    4038        4304 :         result = false;
    4039             :     else
    4040             :     {
    4041        3650 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    4042        3650 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    4043             : 
    4044        3650 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    4045             :                          len1 - VARHDRSZ) == 0);
    4046             : 
    4047        3650 :         PG_FREE_IF_COPY(barg1, 0);
    4048        3650 :         PG_FREE_IF_COPY(barg2, 1);
    4049             :     }
    4050             : 
    4051        7954 :     PG_RETURN_BOOL(result);
    4052             : }
    4053             : 
    4054             : Datum
    4055         512 : byteane(PG_FUNCTION_ARGS)
    4056             : {
    4057         512 :     Datum       arg1 = PG_GETARG_DATUM(0);
    4058         512 :     Datum       arg2 = PG_GETARG_DATUM(1);
    4059             :     bool        result;
    4060             :     Size        len1,
    4061             :                 len2;
    4062             : 
    4063             :     /*
    4064             :      * We can use a fast path for unequal lengths, which might save us from
    4065             :      * having to detoast one or both values.
    4066             :      */
    4067         512 :     len1 = toast_raw_datum_size(arg1);
    4068         512 :     len2 = toast_raw_datum_size(arg2);
    4069         512 :     if (len1 != len2)
    4070           0 :         result = true;
    4071             :     else
    4072             :     {
    4073         512 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    4074         512 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    4075             : 
    4076         512 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    4077             :                          len1 - VARHDRSZ) != 0);
    4078             : 
    4079         512 :         PG_FREE_IF_COPY(barg1, 0);
    4080         512 :         PG_FREE_IF_COPY(barg2, 1);
    4081             :     }
    4082             : 
    4083         512 :     PG_RETURN_BOOL(result);
    4084             : }
    4085             : 
    4086             : Datum
    4087        7302 : bytealt(PG_FUNCTION_ARGS)
    4088             : {
    4089        7302 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4090        7302 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4091             :     int         len1,
    4092             :                 len2;
    4093             :     int         cmp;
    4094             : 
    4095        7302 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4096        7302 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4097             : 
    4098        7302 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4099             : 
    4100        7302 :     PG_FREE_IF_COPY(arg1, 0);
    4101        7302 :     PG_FREE_IF_COPY(arg2, 1);
    4102             : 
    4103        7302 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
    4104             : }
    4105             : 
    4106             : Datum
    4107        5556 : byteale(PG_FUNCTION_ARGS)
    4108             : {
    4109        5556 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4110        5556 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4111             :     int         len1,
    4112             :                 len2;
    4113             :     int         cmp;
    4114             : 
    4115        5556 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4116        5556 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4117             : 
    4118        5556 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4119             : 
    4120        5556 :     PG_FREE_IF_COPY(arg1, 0);
    4121        5556 :     PG_FREE_IF_COPY(arg2, 1);
    4122             : 
    4123        5556 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
    4124             : }
    4125             : 
    4126             : Datum
    4127        5214 : byteagt(PG_FUNCTION_ARGS)
    4128             : {
    4129        5214 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4130        5214 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4131             :     int         len1,
    4132             :                 len2;
    4133             :     int         cmp;
    4134             : 
    4135        5214 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4136        5214 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4137             : 
    4138        5214 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4139             : 
    4140        5214 :     PG_FREE_IF_COPY(arg1, 0);
    4141        5214 :     PG_FREE_IF_COPY(arg2, 1);
    4142             : 
    4143        5214 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
    4144             : }
    4145             : 
    4146             : Datum
    4147        4394 : byteage(PG_FUNCTION_ARGS)
    4148             : {
    4149        4394 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4150        4394 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4151             :     int         len1,
    4152             :                 len2;
    4153             :     int         cmp;
    4154             : 
    4155        4394 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4156        4394 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4157             : 
    4158        4394 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4159             : 
    4160        4394 :     PG_FREE_IF_COPY(arg1, 0);
    4161        4394 :     PG_FREE_IF_COPY(arg2, 1);
    4162             : 
    4163        4394 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
    4164             : }
    4165             : 
    4166             : Datum
    4167       87548 : byteacmp(PG_FUNCTION_ARGS)
    4168             : {
    4169       87548 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4170       87548 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4171             :     int         len1,
    4172             :                 len2;
    4173             :     int         cmp;
    4174             : 
    4175       87548 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4176       87548 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4177             : 
    4178       87548 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4179       87548 :     if ((cmp == 0) && (len1 != len2))
    4180       14556 :         cmp = (len1 < len2) ? -1 : 1;
    4181             : 
    4182       87548 :     PG_FREE_IF_COPY(arg1, 0);
    4183       87548 :     PG_FREE_IF_COPY(arg2, 1);
    4184             : 
    4185       87548 :     PG_RETURN_INT32(cmp);
    4186             : }
    4187             : 
    4188             : Datum
    4189          82 : bytea_sortsupport(PG_FUNCTION_ARGS)
    4190             : {
    4191          82 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    4192             :     MemoryContext oldcontext;
    4193             : 
    4194          82 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    4195             : 
    4196             :     /* Use generic string SortSupport, forcing "C" collation */
    4197          82 :     varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
    4198             : 
    4199          82 :     MemoryContextSwitchTo(oldcontext);
    4200             : 
    4201          82 :     PG_RETURN_VOID();
    4202             : }
    4203             : 
    4204             : /*
    4205             :  * appendStringInfoText
    4206             :  *
    4207             :  * Append a text to str.
    4208             :  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
    4209             :  */
    4210             : static void
    4211     1235612 : appendStringInfoText(StringInfo str, const text *t)
    4212             : {
    4213     1235612 :     appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
    4214     1235612 : }
    4215             : 
    4216             : /*
    4217             :  * replace_text
    4218             :  * replace all occurrences of 'old_sub_str' in 'orig_str'
    4219             :  * with 'new_sub_str' to form 'new_str'
    4220             :  *
    4221             :  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
    4222             :  * otherwise returns 'new_str'
    4223             :  */
    4224             : Datum
    4225        1474 : replace_text(PG_FUNCTION_ARGS)
    4226             : {
    4227        1474 :     text       *src_text = PG_GETARG_TEXT_PP(0);
    4228        1474 :     text       *from_sub_text = PG_GETARG_TEXT_PP(1);
    4229        1474 :     text       *to_sub_text = PG_GETARG_TEXT_PP(2);
    4230             :     int         src_text_len;
    4231             :     int         from_sub_text_len;
    4232             :     TextPositionState state;
    4233             :     text       *ret_text;
    4234             :     int         chunk_len;
    4235             :     char       *curr_ptr;
    4236             :     char       *start_ptr;
    4237             :     StringInfoData str;
    4238             :     bool        found;
    4239             : 
    4240        1474 :     src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4241        1474 :     from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
    4242             : 
    4243             :     /* Return unmodified source string if empty source or pattern */
    4244        1474 :     if (src_text_len < 1 || from_sub_text_len < 1)
    4245             :     {
    4246           0 :         PG_RETURN_TEXT_P(src_text);
    4247             :     }
    4248             : 
    4249        1474 :     text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
    4250             : 
    4251        1474 :     found = text_position_next(&state);
    4252             : 
    4253             :     /* When the from_sub_text is not found, there is nothing to do. */
    4254        1474 :     if (!found)
    4255             :     {
    4256         460 :         text_position_cleanup(&state);
    4257         460 :         PG_RETURN_TEXT_P(src_text);
    4258             :     }
    4259        1014 :     curr_ptr = text_position_get_match_ptr(&state);
    4260        1014 :     start_ptr = VARDATA_ANY(src_text);
    4261             : 
    4262        1014 :     initStringInfo(&str);
    4263             : 
    4264             :     do
    4265             :     {
    4266        4450 :         CHECK_FOR_INTERRUPTS();
    4267             : 
    4268             :         /* copy the data skipped over by last text_position_next() */
    4269        4450 :         chunk_len = curr_ptr - start_ptr;
    4270        4450 :         appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4271             : 
    4272        4450 :         appendStringInfoText(&str, to_sub_text);
    4273             : 
    4274        4450 :         start_ptr = curr_ptr + from_sub_text_len;
    4275             : 
    4276        4450 :         found = text_position_next(&state);
    4277        4450 :         if (found)
    4278        3436 :             curr_ptr = text_position_get_match_ptr(&state);
    4279             :     }
    4280        4450 :     while (found);
    4281             : 
    4282             :     /* copy trailing data */
    4283        1014 :     chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4284        1014 :     appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4285             : 
    4286        1014 :     text_position_cleanup(&state);
    4287             : 
    4288        1014 :     ret_text = cstring_to_text_with_len(str.data, str.len);
    4289        1014 :     pfree(str.data);
    4290             : 
    4291        1014 :     PG_RETURN_TEXT_P(ret_text);
    4292             : }
    4293             : 
    4294             : /*
    4295             :  * check_replace_text_has_escape_char
    4296             :  *
    4297             :  * check whether replace_text contains escape char.
    4298             :  */
    4299             : static bool
    4300        3630 : check_replace_text_has_escape_char(const text *replace_text)
    4301             : {
    4302        3630 :     const char *p = VARDATA_ANY(replace_text);
    4303        3630 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4304             : 
    4305        3630 :     if (pg_database_encoding_max_length() == 1)
    4306             :     {
    4307          12 :         for (; p < p_end; p++)
    4308             :         {
    4309           0 :             if (*p == '\\')
    4310           0 :                 return true;
    4311             :         }
    4312             :     }
    4313             :     else
    4314             :     {
    4315       58584 :         for (; p < p_end; p += pg_mblen(p))
    4316             :         {
    4317       55074 :             if (*p == '\\')
    4318         108 :                 return true;
    4319             :         }
    4320             :     }
    4321             : 
    4322        3522 :     return false;
    4323             : }
    4324             : 
    4325             : /*
    4326             :  * appendStringInfoRegexpSubstr
    4327             :  *
    4328             :  * Append replace_text to str, substituting regexp back references for
    4329             :  * \n escapes.  start_ptr is the start of the match in the source string,
    4330             :  * at logical character position data_pos.
    4331             :  */
    4332             : static void
    4333          60 : appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
    4334             :                              regmatch_t *pmatch,
    4335             :                              char *start_ptr, int data_pos)
    4336             : {
    4337          60 :     const char *p = VARDATA_ANY(replace_text);
    4338          60 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4339          60 :     int         eml = pg_database_encoding_max_length();
    4340             : 
    4341             :     for (;;)
    4342         122 :     {
    4343         182 :         const char *chunk_start = p;
    4344             :         int         so;
    4345             :         int         eo;
    4346             : 
    4347             :         /* Find next escape char. */
    4348         182 :         if (eml == 1)
    4349             :         {
    4350           0 :             for (; p < p_end && *p != '\\'; p++)
    4351             :                  /* nothing */ ;
    4352             :         }
    4353             :         else
    4354             :         {
    4355         990 :             for (; p < p_end && *p != '\\'; p += pg_mblen(p))
    4356             :                  /* nothing */ ;
    4357             :         }
    4358             : 
    4359             :         /* Copy the text we just scanned over, if any. */
    4360         182 :         if (p > chunk_start)
    4361          72 :             appendBinaryStringInfo(str, chunk_start, p - chunk_start);
    4362             : 
    4363             :         /* Done if at end of string, else advance over escape char. */
    4364         182 :         if (p >= p_end)
    4365          60 :             break;
    4366         122 :         p++;
    4367             : 
    4368         122 :         if (p >= p_end)
    4369             :         {
    4370             :             /* Escape at very end of input.  Treat same as unexpected char */
    4371           0 :             appendStringInfoChar(str, '\\');
    4372           0 :             break;
    4373             :         }
    4374             : 
    4375         122 :         if (*p >= '1' && *p <= '9')
    4376         104 :         {
    4377             :             /* Use the back reference of regexp. */
    4378         104 :             int         idx = *p - '0';
    4379             : 
    4380         104 :             so = pmatch[idx].rm_so;
    4381         104 :             eo = pmatch[idx].rm_eo;
    4382         104 :             p++;
    4383             :         }
    4384          18 :         else if (*p == '&')
    4385             :         {
    4386             :             /* Use the entire matched string. */
    4387           0 :             so = pmatch[0].rm_so;
    4388           0 :             eo = pmatch[0].rm_eo;
    4389           0 :             p++;
    4390             :         }
    4391          18 :         else if (*p == '\\')
    4392             :         {
    4393             :             /* \\ means transfer one \ to output. */
    4394          18 :             appendStringInfoChar(str, '\\');
    4395          18 :             p++;
    4396          18 :             continue;
    4397             :         }
    4398             :         else
    4399             :         {
    4400             :             /*
    4401             :              * If escape char is not followed by any expected char, just treat
    4402             :              * it as ordinary data to copy.  (XXX would it be better to throw
    4403             :              * an error?)
    4404             :              */
    4405           0 :             appendStringInfoChar(str, '\\');
    4406           0 :             continue;
    4407             :         }
    4408             : 
    4409         104 :         if (so != -1 && eo != -1)
    4410             :         {
    4411             :             /*
    4412             :              * Copy the text that is back reference of regexp.  Note so and eo
    4413             :              * are counted in characters not bytes.
    4414             :              */
    4415             :             char       *chunk_start;
    4416             :             int         chunk_len;
    4417             : 
    4418             :             Assert(so >= data_pos);
    4419         104 :             chunk_start = start_ptr;
    4420         104 :             chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
    4421         104 :             chunk_len = charlen_to_bytelen(chunk_start, eo - so);
    4422         104 :             appendBinaryStringInfo(str, chunk_start, chunk_len);
    4423             :         }
    4424             :     }
    4425          60 : }
    4426             : 
    4427             : #define REGEXP_REPLACE_BACKREF_CNT      10
    4428             : 
    4429             : /*
    4430             :  * replace_text_regexp
    4431             :  *
    4432             :  * replace text that matches to regexp in src_text to replace_text.
    4433             :  *
    4434             :  * Note: to avoid having to include regex.h in builtins.h, we declare
    4435             :  * the regexp argument as void *, but really it's regex_t *.
    4436             :  */
    4437             : text *
    4438        3630 : replace_text_regexp(text *src_text, void *regexp,
    4439             :                     text *replace_text, bool glob)
    4440             : {
    4441             :     text       *ret_text;
    4442        3630 :     regex_t    *re = (regex_t *) regexp;
    4443        3630 :     int         src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4444             :     StringInfoData buf;
    4445             :     regmatch_t  pmatch[REGEXP_REPLACE_BACKREF_CNT];
    4446             :     pg_wchar   *data;
    4447             :     size_t      data_len;
    4448             :     int         search_start;
    4449             :     int         data_pos;
    4450             :     char       *start_ptr;
    4451             :     bool        have_escape;
    4452             : 
    4453        3630 :     initStringInfo(&buf);
    4454             : 
    4455             :     /* Convert data string to wide characters. */
    4456        3630 :     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
    4457        3630 :     data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
    4458             : 
    4459             :     /* Check whether replace_text has escape char. */
    4460        3630 :     have_escape = check_replace_text_has_escape_char(replace_text);
    4461             : 
    4462             :     /* start_ptr points to the data_pos'th character of src_text */
    4463        3630 :     start_ptr = (char *) VARDATA_ANY(src_text);
    4464        3630 :     data_pos = 0;
    4465             : 
    4466        3630 :     search_start = 0;
    4467        5640 :     while (search_start <= data_len)
    4468             :     {
    4469             :         int         regexec_result;
    4470             : 
    4471        5636 :         CHECK_FOR_INTERRUPTS();
    4472             : 
    4473        5636 :         regexec_result = pg_regexec(re,
    4474             :                                     data,
    4475             :                                     data_len,
    4476             :                                     search_start,
    4477             :                                     NULL,   /* no details */
    4478             :                                     REGEXP_REPLACE_BACKREF_CNT,
    4479             :                                     pmatch,
    4480             :                                     0);
    4481             : 
    4482        5636 :         if (regexec_result == REG_NOMATCH)
    4483        3184 :             break;
    4484             : 
    4485        2452 :         if (regexec_result != REG_OKAY)
    4486             :         {
    4487             :             char        errMsg[100];
    4488             : 
    4489           0 :             CHECK_FOR_INTERRUPTS();
    4490           0 :             pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
    4491           0 :             ereport(ERROR,
    4492             :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
    4493             :                      errmsg("regular expression failed: %s", errMsg)));
    4494             :         }
    4495             : 
    4496             :         /*
    4497             :          * Copy the text to the left of the match position.  Note we are given
    4498             :          * character not byte indexes.
    4499             :          */
    4500        2452 :         if (pmatch[0].rm_so - data_pos > 0)
    4501             :         {
    4502             :             int         chunk_len;
    4503             : 
    4504        2390 :             chunk_len = charlen_to_bytelen(start_ptr,
    4505        2390 :                                            pmatch[0].rm_so - data_pos);
    4506        2390 :             appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4507             : 
    4508             :             /*
    4509             :              * Advance start_ptr over that text, to avoid multiple rescans of
    4510             :              * it if the replace_text contains multiple back-references.
    4511             :              */
    4512        2390 :             start_ptr += chunk_len;
    4513        2390 :             data_pos = pmatch[0].rm_so;
    4514             :         }
    4515             : 
    4516             :         /*
    4517             :          * Copy the replace_text. Process back references when the
    4518             :          * replace_text has escape characters.
    4519             :          */
    4520        2452 :         if (have_escape)
    4521          60 :             appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
    4522             :                                          start_ptr, data_pos);
    4523             :         else
    4524        2392 :             appendStringInfoText(&buf, replace_text);
    4525             : 
    4526             :         /* Advance start_ptr and data_pos over the matched text. */
    4527        4904 :         start_ptr += charlen_to_bytelen(start_ptr,
    4528        2452 :                                         pmatch[0].rm_eo - data_pos);
    4529        2452 :         data_pos = pmatch[0].rm_eo;
    4530             : 
    4531             :         /*
    4532             :          * When global option is off, replace the first instance only.
    4533             :          */
    4534        2452 :         if (!glob)
    4535         442 :             break;
    4536             : 
    4537             :         /*
    4538             :          * Advance search position.  Normally we start the next search at the
    4539             :          * end of the previous match; but if the match was of zero length, we
    4540             :          * have to advance by one character, or we'd just find the same match
    4541             :          * again.
    4542             :          */
    4543        2010 :         search_start = data_pos;
    4544        2010 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4545           8 :             search_start++;
    4546             :     }
    4547             : 
    4548             :     /*
    4549             :      * Copy the text to the right of the last match.
    4550             :      */
    4551        3630 :     if (data_pos < data_len)
    4552             :     {
    4553             :         int         chunk_len;
    4554             : 
    4555        3460 :         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4556        3460 :         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4557             :     }
    4558             : 
    4559        3630 :     ret_text = cstring_to_text_with_len(buf.data, buf.len);
    4560        3630 :     pfree(buf.data);
    4561        3630 :     pfree(data);
    4562             : 
    4563        3630 :     return ret_text;
    4564             : }
    4565             : 
    4566             : /*
    4567             :  * split_text
    4568             :  * parse input string
    4569             :  * return ord item (1 based)
    4570             :  * based on provided field separator
    4571             :  */
    4572             : Datum
    4573          20 : split_text(PG_FUNCTION_ARGS)
    4574             : {
    4575          20 :     text       *inputstring = PG_GETARG_TEXT_PP(0);
    4576          20 :     text       *fldsep = PG_GETARG_TEXT_PP(1);
    4577          20 :     int         fldnum = PG_GETARG_INT32(2);
    4578             :     int         inputstring_len;
    4579             :     int         fldsep_len;
    4580             :     TextPositionState state;
    4581             :     char       *start_ptr;
    4582             :     char       *end_ptr;
    4583             :     text       *result_text;
    4584             :     bool        found;
    4585             : 
    4586             :     /* field number is 1 based */
    4587          20 :     if (fldnum < 1)
    4588           4 :         ereport(ERROR,
    4589             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    4590             :                  errmsg("field position must be greater than zero")));
    4591             : 
    4592          16 :     inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4593          16 :     fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4594             : 
    4595             :     /* return empty string for empty input string */
    4596          16 :     if (inputstring_len < 1)
    4597           0 :         PG_RETURN_TEXT_P(cstring_to_text(""));
    4598             : 
    4599             :     /* empty field separator */
    4600          16 :     if (fldsep_len < 1)
    4601             :     {
    4602           0 :         text_position_cleanup(&state);
    4603             :         /* if first field, return input string, else empty string */
    4604           0 :         if (fldnum == 1)
    4605           0 :             PG_RETURN_TEXT_P(inputstring);
    4606             :         else
    4607           0 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4608             :     }
    4609             : 
    4610          16 :     text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
    4611             : 
    4612             :     /* identify bounds of first field */
    4613          16 :     start_ptr = VARDATA_ANY(inputstring);
    4614          16 :     found = text_position_next(&state);
    4615             : 
    4616             :     /* special case if fldsep not found at all */
    4617          16 :     if (!found)
    4618             :     {
    4619           0 :         text_position_cleanup(&state);
    4620             :         /* if field 1 requested, return input string, else empty string */
    4621           0 :         if (fldnum == 1)
    4622           0 :             PG_RETURN_TEXT_P(inputstring);
    4623             :         else
    4624           0 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4625             :     }
    4626          16 :     end_ptr = text_position_get_match_ptr(&state);
    4627             : 
    4628          28 :     while (found && --fldnum > 0)
    4629             :     {
    4630             :         /* identify bounds of next field */
    4631          12 :         start_ptr = end_ptr + fldsep_len;
    4632          12 :         found = text_position_next(&state);
    4633          12 :         if (found)
    4634           4 :             end_ptr = text_position_get_match_ptr(&state);
    4635             :     }
    4636             : 
    4637          16 :     text_position_cleanup(&state);
    4638             : 
    4639          16 :     if (fldnum > 0)
    4640             :     {
    4641             :         /* N'th field separator not found */
    4642             :         /* if last field requested, return it, else empty string */
    4643           8 :         if (fldnum == 1)
    4644             :         {
    4645           4 :             int         last_len = start_ptr - VARDATA_ANY(inputstring);
    4646             : 
    4647           4 :             result_text = cstring_to_text_with_len(start_ptr,
    4648             :                                                    inputstring_len - last_len);
    4649             :         }
    4650             :         else
    4651           4 :             result_text = cstring_to_text("");
    4652             :     }
    4653             :     else
    4654             :     {
    4655             :         /* non-last field requested */
    4656           8 :         result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
    4657             :     }
    4658             : 
    4659          16 :     PG_RETURN_TEXT_P(result_text);
    4660             : }
    4661             : 
    4662             : /*
    4663             :  * Convenience function to return true when two text params are equal.
    4664             :  */
    4665             : static bool
    4666          56 : text_isequal(text *txt1, text *txt2, Oid collid)
    4667             : {
    4668          56 :     return DatumGetBool(DirectFunctionCall2Coll(texteq,
    4669             :                                                 collid,
    4670             :                                                 PointerGetDatum(txt1),
    4671             :                                                 PointerGetDatum(txt2)));
    4672             : }
    4673             : 
    4674             : /*
    4675             :  * text_to_array
    4676             :  * parse input string and return text array of elements,
    4677             :  * based on provided field separator
    4678             :  */
    4679             : Datum
    4680          56 : text_to_array(PG_FUNCTION_ARGS)
    4681             : {
    4682          56 :     return text_to_array_internal(fcinfo);
    4683             : }
    4684             : 
    4685             : /*
    4686             :  * text_to_array_null
    4687             :  * parse input string and return text array of elements,
    4688             :  * based on provided field separator and null string
    4689             :  *
    4690             :  * This is a separate entry point only to prevent the regression tests from
    4691             :  * complaining about different argument sets for the same internal function.
    4692             :  */
    4693             : Datum
    4694          16 : text_to_array_null(PG_FUNCTION_ARGS)
    4695             : {
    4696          16 :     return text_to_array_internal(fcinfo);
    4697             : }
    4698             : 
    4699             : /*
    4700             :  * common code for text_to_array and text_to_array_null functions
    4701             :  *
    4702             :  * These are not strict so we have to test for null inputs explicitly.
    4703             :  */
    4704             : static Datum
    4705          72 : text_to_array_internal(PG_FUNCTION_ARGS)
    4706             : {
    4707             :     text       *inputstring;
    4708             :     text       *fldsep;
    4709             :     text       *null_string;
    4710             :     int         inputstring_len;
    4711             :     int         fldsep_len;
    4712             :     char       *start_ptr;
    4713             :     text       *result_text;
    4714             :     bool        is_null;
    4715          72 :     ArrayBuildState *astate = NULL;
    4716             : 
    4717             :     /* when input string is NULL, then result is NULL too */
    4718          72 :     if (PG_ARGISNULL(0))
    4719           4 :         PG_RETURN_NULL();
    4720             : 
    4721          68 :     inputstring = PG_GETARG_TEXT_PP(0);
    4722             : 
    4723             :     /* fldsep can be NULL */
    4724          68 :     if (!PG_ARGISNULL(1))
    4725          64 :         fldsep = PG_GETARG_TEXT_PP(1);
    4726             :     else
    4727           4 :         fldsep = NULL;
    4728             : 
    4729             :     /* null_string can be NULL or omitted */
    4730          68 :     if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
    4731          16 :         null_string = PG_GETARG_TEXT_PP(2);
    4732             :     else
    4733          52 :         null_string = NULL;
    4734             : 
    4735          68 :     if (fldsep != NULL)
    4736             :     {
    4737             :         /*
    4738             :          * Normal case with non-null fldsep.  Use the text_position machinery
    4739             :          * to search for occurrences of fldsep.
    4740             :          */
    4741             :         TextPositionState state;
    4742             : 
    4743          64 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4744          64 :         fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4745             : 
    4746             :         /* return empty array for empty input string */
    4747          64 :         if (inputstring_len < 1)
    4748          24 :             PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
    4749             : 
    4750             :         /*
    4751             :          * empty field separator: return the input string as a one-element
    4752             :          * array
    4753             :          */
    4754          60 :         if (fldsep_len < 1)
    4755             :         {
    4756             :             Datum       elems[1];
    4757             :             bool        nulls[1];
    4758             :             int         dims[1];
    4759             :             int         lbs[1];
    4760             : 
    4761             :             /* single element can be a NULL too */
    4762          20 :             is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false;
    4763             : 
    4764          20 :             elems[0] = PointerGetDatum(inputstring);
    4765          20 :             nulls[0] = is_null;
    4766          20 :             dims[0] = 1;
    4767          20 :             lbs[0] = 1;
    4768             :             /* XXX: this hardcodes assumptions about the text type */
    4769          20 :             PG_RETURN_ARRAYTYPE_P(construct_md_array(elems, nulls,
    4770             :                                                      1, dims, lbs,
    4771             :                                                      TEXTOID, -1, false, TYPALIGN_INT));
    4772             :         }
    4773             : 
    4774          40 :         text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
    4775             : 
    4776          40 :         start_ptr = VARDATA_ANY(inputstring);
    4777             : 
    4778             :         for (;;)
    4779         208 :         {
    4780             :             bool        found;
    4781             :             char       *end_ptr;
    4782             :             int         chunk_len;
    4783             : 
    4784         248 :             CHECK_FOR_INTERRUPTS();
    4785             : 
    4786         248 :             found = text_position_next(&state);
    4787         248 :             if (!found)
    4788             :             {
    4789             :                 /* fetch last field */
    4790          40 :                 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
    4791          40 :                 end_ptr = NULL; /* not used, but some compilers complain */
    4792             :             }
    4793             :             else
    4794             :             {
    4795             :                 /* fetch non-last field */
    4796         208 :                 end_ptr = text_position_get_match_ptr(&state);
    4797         208 :                 chunk_len = end_ptr - start_ptr;
    4798             :             }
    4799             : 
    4800             :             /* must build a temp text datum to pass to accumArrayResult */
    4801         248 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4802         248 :             is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
    4803             : 
    4804             :             /* stash away this field */
    4805         248 :             astate = accumArrayResult(astate,
    4806             :                                       PointerGetDatum(result_text),
    4807             :                                       is_null,
    4808             :                                       TEXTOID,
    4809             :                                       CurrentMemoryContext);
    4810             : 
    4811         248 :             pfree(result_text);
    4812             : 
    4813         248 :             if (!found)
    4814          40 :                 break;
    4815             : 
    4816         208 :             start_ptr = end_ptr + fldsep_len;
    4817             :         }
    4818             : 
    4819          40 :         text_position_cleanup(&state);
    4820             :     }
    4821             :     else
    4822             :     {
    4823             :         /*
    4824             :          * When fldsep is NULL, each character in the inputstring becomes an
    4825             :          * element in the result array.  The separator is effectively the
    4826             :          * space between characters.
    4827             :          */
    4828           4 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4829             : 
    4830             :         /* return empty array for empty input string */
    4831           4 :         if (inputstring_len < 1)
    4832           0 :             PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
    4833             : 
    4834           4 :         start_ptr = VARDATA_ANY(inputstring);
    4835             : 
    4836          24 :         while (inputstring_len > 0)
    4837             :         {
    4838          20 :             int         chunk_len = pg_mblen(start_ptr);
    4839             : 
    4840          20 :             CHECK_FOR_INTERRUPTS();
    4841             : 
    4842             :             /* must build a temp text datum to pass to accumArrayResult */
    4843          20 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4844          20 :             is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
    4845             : 
    4846             :             /* stash away this field */
    4847          20 :             astate = accumArrayResult(astate,
    4848             :                                       PointerGetDatum(result_text),
    4849             :                                       is_null,
    4850             :                                       TEXTOID,
    4851             :                                       CurrentMemoryContext);
    4852             : 
    4853          20 :             pfree(result_text);
    4854             : 
    4855          20 :             start_ptr += chunk_len;
    4856          20 :             inputstring_len -= chunk_len;
    4857             :         }
    4858             :     }
    4859             : 
    4860          44 :     PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
    4861             :                                           CurrentMemoryContext));
    4862             : }
    4863             : 
    4864             : /*
    4865             :  * array_to_text
    4866             :  * concatenate Cstring representation of input array elements
    4867             :  * using provided field separator
    4868             :  */
    4869             : Datum
    4870       35626 : array_to_text(PG_FUNCTION_ARGS)
    4871             : {
    4872       35626 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
    4873       35626 :     char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4874             : 
    4875       35626 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
    4876             : }
    4877             : 
    4878             : /*
    4879             :  * array_to_text_null
    4880             :  * concatenate Cstring representation of input array elements
    4881             :  * using provided field separator and null string
    4882             :  *
    4883             :  * This version is not strict so we have to test for null inputs explicitly.
    4884             :  */
    4885             : Datum
    4886           8 : array_to_text_null(PG_FUNCTION_ARGS)
    4887             : {
    4888             :     ArrayType  *v;
    4889             :     char       *fldsep;
    4890             :     char       *null_string;
    4891             : 
    4892             :     /* returns NULL when first or second parameter is NULL */
    4893           8 :     if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
    4894           0 :         PG_RETURN_NULL();
    4895             : 
    4896           8 :     v = PG_GETARG_ARRAYTYPE_P(0);
    4897           8 :     fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4898             : 
    4899             :     /* NULL null string is passed through as a null pointer */
    4900           8 :     if (!PG_ARGISNULL(2))
    4901           4 :         null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
    4902             :     else
    4903           4 :         null_string = NULL;
    4904             : 
    4905           8 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
    4906             : }
    4907             : 
    4908             : /*
    4909             :  * common code for array_to_text and array_to_text_null functions
    4910             :  */
    4911             : static text *
    4912       35646 : array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
    4913             :                        const char *fldsep, const char *null_string)
    4914             : {
    4915             :     text       *result;
    4916             :     int         nitems,
    4917             :                *dims,
    4918             :                 ndims;
    4919             :     Oid         element_type;
    4920             :     int         typlen;
    4921             :     bool        typbyval;
    4922             :     char        typalign;
    4923             :     StringInfoData buf;
    4924       35646 :     bool        printed = false;
    4925             :     char       *p;
    4926             :     bits8      *bitmap;
    4927             :     int         bitmask;
    4928             :     int         i;
    4929             :     ArrayMetaState *my_extra;
    4930             : 
    4931       35646 :     ndims = ARR_NDIM(v);
    4932       35646 :     dims = ARR_DIMS(v);
    4933       35646 :     nitems = ArrayGetNItems(ndims, dims);
    4934             : 
    4935             :     /* if there are no elements, return an empty string */
    4936       35646 :     if (nitems == 0)
    4937       21006 :         return cstring_to_text_with_len("", 0);
    4938             : 
    4939       14640 :     element_type = ARR_ELEMTYPE(v);
    4940       14640 :     initStringInfo(&buf);
    4941             : 
    4942             :     /*
    4943             :      * We arrange to look up info about element type, including its output
    4944             :      * conversion proc, only once per series of calls, assuming the element
    4945             :      * type doesn't change underneath us.
    4946             :      */
    4947       14640 :     my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    4948       14640 :     if (my_extra == NULL)
    4949             :     {
    4950         936 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    4951             :                                                       sizeof(ArrayMetaState));
    4952         936 :         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    4953         936 :         my_extra->element_type = ~element_type;
    4954             :     }
    4955             : 
    4956       14640 :     if (my_extra->element_type != element_type)
    4957             :     {
    4958             :         /*
    4959             :          * Get info about element type, including its output conversion proc
    4960             :          */
    4961         936 :         get_type_io_data(element_type, IOFunc_output,
    4962             :                          &my_extra->typlen, &my_extra->typbyval,
    4963             :                          &my_extra->typalign, &my_extra->typdelim,
    4964             :                          &my_extra->typioparam, &my_extra->typiofunc);
    4965         936 :         fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
    4966         936 :                       fcinfo->flinfo->fn_mcxt);
    4967         936 :         my_extra->element_type = element_type;
    4968             :     }
    4969       14640 :     typlen = my_extra->typlen;
    4970       14640 :     typbyval = my_extra->typbyval;
    4971       14640 :     typalign = my_extra->typalign;
    4972             : 
    4973       14640 :     p = ARR_DATA_PTR(v);
    4974       14640 :     bitmap = ARR_NULLBITMAP(v);
    4975       14640 :     bitmask = 1;
    4976             : 
    4977       49930 :     for (i = 0; i < nitems; i++)
    4978             :     {
    4979             :         Datum       itemvalue;
    4980             :         char       *value;
    4981             : 
    4982             :         /* Get source element, checking for NULL */
    4983       35290 :         if (bitmap && (*bitmap & bitmask) == 0)
    4984             :         {
    4985             :             /* if null_string is NULL, we just ignore null elements */
    4986          16 :             if (null_string != NULL)
    4987             :             {
    4988           4 :                 if (printed)
    4989           4 :                     appendStringInfo(&buf, "%s%s", fldsep, null_string);
    4990             :                 else
    4991           0 :                     appendStringInfoString(&buf, null_string);
    4992           4 :                 printed = true;
    4993             :             }
    4994             :         }
    4995             :         else
    4996             :         {
    4997       35278 :             itemvalue = fetch_att(p, typbyval, typlen);
    4998             : 
    4999       35278 :             value = OutputFunctionCall(&my_extra->proc, itemvalue);
    5000             : 
    5001       35278 :             if (printed)
    5002       20638 :                 appendStringInfo(&buf, "%s%s", fldsep, value);
    5003             :             else
    5004       14640 :                 appendStringInfoString(&buf, value);
    5005       35278 :             printed = true;
    5006             : 
    5007       35278 :             p = att_addlength_pointer(p, typlen, p);
    5008       35278 :             p = (char *) att_align_nominal(p, typalign);
    5009             :         }
    5010             : 
    5011             :         /* advance bitmap pointer if any */
    5012       35290 :         if (bitmap)
    5013             :         {
    5014          72 :             bitmask <<= 1;
    5015          72 :             if (bitmask == 0x100)
    5016             :             {
    5017           0 :                 bitmap++;
    5018           0 :                 bitmask = 1;
    5019             :             }
    5020             :         }
    5021             :     }
    5022             : 
    5023       14640 :     result = cstring_to_text_with_len(buf.data, buf.len);
    5024       14640 :     pfree(buf.data);
    5025             : 
    5026       14640 :     return result;
    5027             : }
    5028             : 
    5029             : #define HEXBASE 16
    5030             : /*
    5031             :  * Convert an int32 to a string containing a base 16 (hex) representation of
    5032             :  * the number.
    5033             :  */
    5034             : Datum
    5035        1324 : to_hex32(PG_FUNCTION_ARGS)
    5036             : {
    5037        1324 :     uint32      value = (uint32) PG_GETARG_INT32(0);
    5038             :     char       *ptr;
    5039        1324 :     const char *digits = "0123456789abcdef";
    5040             :     char        buf[32];        /* bigger than needed, but reasonable */
    5041             : 
    5042        1324 :     ptr = buf + sizeof(buf) - 1;
    5043        1324 :     *ptr = '\0';
    5044             : 
    5045             :     do
    5046             :     {
    5047        2472 :         *--ptr = digits[value % HEXBASE];
    5048        2472 :         value /= HEXBASE;
    5049        2472 :     } while (ptr > buf && value);
    5050             : 
    5051        1324 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
    5052             : }
    5053             : 
    5054             : /*
    5055             :  * Convert an int64 to a string containing a base 16 (hex) representation of
    5056             :  * the number.
    5057             :  */
    5058             : Datum
    5059           4 : to_hex64(PG_FUNCTION_ARGS)
    5060             : {
    5061           4 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    5062             :     char       *ptr;
    5063           4 :     const char *digits = "0123456789abcdef";
    5064             :     char        buf[32];        /* bigger than needed, but reasonable */
    5065             : 
    5066           4 :     ptr = buf + sizeof(buf) - 1;
    5067           4 :     *ptr = '\0';
    5068             : 
    5069             :     do
    5070             :     {
    5071          32 :         *--ptr = digits[value % HEXBASE];
    5072          32 :         value /= HEXBASE;
    5073          32 :     } while (ptr > buf && value);
    5074             : 
    5075           4 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
    5076             : }
    5077             : 
    5078             : /*
    5079             :  * Return the size of a datum, possibly compressed
    5080             :  *
    5081             :  * Works on any data type
    5082             :  */
    5083             : Datum
    5084         102 : pg_column_size(PG_FUNCTION_ARGS)
    5085             : {
    5086         102 :     Datum       value = PG_GETARG_DATUM(0);
    5087             :     int32       result;
    5088             :     int         typlen;
    5089             : 
    5090             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5091         102 :     if (fcinfo->flinfo->fn_extra == NULL)
    5092             :     {
    5093             :         /* Lookup the datatype of the supplied argument */
    5094         102 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5095             : 
    5096         102 :         typlen = get_typlen(argtypeid);
    5097         102 :         if (typlen == 0)        /* should not happen */
    5098           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5099             : 
    5100         102 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5101             :                                                       sizeof(int));
    5102         102 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5103             :     }
    5104             :     else
    5105           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5106             : 
    5107         102 :     if (typlen == -1)
    5108             :     {
    5109             :         /* varlena type, possibly toasted */
    5110         102 :         result = toast_datum_size(value);
    5111             :     }
    5112           0 :     else if (typlen == -2)
    5113             :     {
    5114             :         /* cstring */
    5115           0 :         result = strlen(DatumGetCString(value)) + 1;
    5116             :     }
    5117             :     else
    5118             :     {
    5119             :         /* ordinary fixed-width type */
    5120           0 :         result = typlen;
    5121             :     }
    5122             : 
    5123         102 :     PG_RETURN_INT32(result);
    5124             : }
    5125             : 
    5126             : /*
    5127             :  * string_agg - Concatenates values and returns string.
    5128             :  *
    5129             :  * Syntax: string_agg(value text, delimiter text) RETURNS text
    5130             :  *
    5131             :  * Note: Any NULL values are ignored. The first-call delimiter isn't
    5132             :  * actually used at all, and on subsequent calls the delimiter precedes
    5133             :  * the associated value.
    5134             :  */
    5135             : 
    5136             : /* subroutine to initialize state */
    5137             : static StringInfo
    5138         690 : makeStringAggState(FunctionCallInfo fcinfo)
    5139             : {
    5140             :     StringInfo  state;
    5141             :     MemoryContext aggcontext;
    5142             :     MemoryContext oldcontext;
    5143             : 
    5144         690 :     if (!AggCheckCallContext(fcinfo, &aggcontext))
    5145             :     {
    5146             :         /* cannot be called directly because of internal-type argument */
    5147           0 :         elog(ERROR, "string_agg_transfn called in non-aggregate context");
    5148             :     }
    5149             : 
    5150             :     /*
    5151             :      * Create state in aggregate context.  It'll stay there across subsequent
    5152             :      * calls.
    5153             :      */
    5154         690 :     oldcontext = MemoryContextSwitchTo(aggcontext);
    5155         690 :     state = makeStringInfo();
    5156         690 :     MemoryContextSwitchTo(oldcontext);
    5157             : 
    5158         690 :     return state;
    5159             : }
    5160             : 
    5161             : Datum
    5162      614754 : string_agg_transfn(PG_FUNCTION_ARGS)
    5163             : {
    5164             :     StringInfo  state;
    5165             : 
    5166      614754 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5167             : 
    5168             :     /* Append the value unless null. */
    5169      614754 :     if (!PG_ARGISNULL(1))
    5170             :     {
    5171             :         /* On the first time through, we ignore the delimiter. */
    5172      614722 :         if (state == NULL)
    5173         674 :             state = makeStringAggState(fcinfo);
    5174      614048 :         else if (!PG_ARGISNULL(2))
    5175      614048 :             appendStringInfoText(state, PG_GETARG_TEXT_PP(2));  /* delimiter */
    5176             : 
    5177      614722 :         appendStringInfoText(state, PG_GETARG_TEXT_PP(1));  /* value */
    5178             :     }
    5179             : 
    5180             :     /*
    5181             :      * The transition type for string_agg() is declared to be "internal",
    5182             :      * which is a pass-by-value type the same size as a pointer.
    5183             :      */
    5184      614754 :     PG_RETURN_POINTER(state);
    5185             : }
    5186             : 
    5187             : Datum
    5188         714 : string_agg_finalfn(PG_FUNCTION_ARGS)
    5189             : {
    5190             :     StringInfo  state;
    5191             : 
    5192             :     /* cannot be called directly because of internal-type argument */
    5193             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5194             : 
    5195         714 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5196             : 
    5197         714 :     if (state != NULL)
    5198         674 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
    5199             :     else
    5200          40 :         PG_RETURN_NULL();
    5201             : }
    5202             : 
    5203             : /*
    5204             :  * Prepare cache with fmgr info for the output functions of the datatypes of
    5205             :  * the arguments of a concat-like function, beginning with argument "argidx".
    5206             :  * (Arguments before that will have corresponding slots in the resulting
    5207             :  * FmgrInfo array, but we don't fill those slots.)
    5208             :  */
    5209             : static FmgrInfo *
    5210          24 : build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
    5211             : {
    5212             :     FmgrInfo   *foutcache;
    5213             :     int         i;
    5214             : 
    5215             :     /* We keep the info in fn_mcxt so it survives across calls */
    5216          24 :     foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5217          24 :                                                 PG_NARGS() * sizeof(FmgrInfo));
    5218             : 
    5219         120 :     for (i = argidx; i < PG_NARGS(); i++)
    5220             :     {
    5221             :         Oid         valtype;
    5222             :         Oid         typOutput;
    5223             :         bool        typIsVarlena;
    5224             : 
    5225          96 :         valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
    5226          96 :         if (!OidIsValid(valtype))
    5227           0 :             elog(ERROR, "could not determine data type of concat() input");
    5228             : 
    5229          96 :         getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
    5230          96 :         fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
    5231             :     }
    5232             : 
    5233          24 :     fcinfo->flinfo->fn_extra = foutcache;
    5234             : 
    5235          24 :     return foutcache;
    5236             : }
    5237             : 
    5238             : /*
    5239             :  * Implementation of both concat() and concat_ws().
    5240             :  *
    5241             :  * sepstr is the separator string to place between values.
    5242             :  * argidx identifies the first argument to concatenate (counting from zero);
    5243             :  * note that this must be constant across any one series of calls.
    5244             :  *
    5245             :  * Returns NULL if result should be NULL, else text value.
    5246             :  */
    5247             : static text *
    5248          44 : concat_internal(const char *sepstr, int argidx,
    5249             :                 FunctionCallInfo fcinfo)
    5250             : {
    5251             :     text       *result;
    5252             :     StringInfoData str;
    5253             :     FmgrInfo   *foutcache;
    5254          44 :     bool        first_arg = true;
    5255             :     int         i;
    5256             : 
    5257             :     /*
    5258             :      * concat(VARIADIC some-array) is essentially equivalent to
    5259             :      * array_to_text(), ie concat the array elements with the given separator.
    5260             :      * So we just pass the case off to that code.
    5261             :      */
    5262          44 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5263             :     {
    5264             :         ArrayType  *arr;
    5265             : 
    5266             :         /* Should have just the one argument */
    5267             :         Assert(argidx == PG_NARGS() - 1);
    5268             : 
    5269             :         /* concat(VARIADIC NULL) is defined as NULL */
    5270          20 :         if (PG_ARGISNULL(argidx))
    5271           8 :             return NULL;
    5272             : 
    5273             :         /*
    5274             :          * Non-null argument had better be an array.  We assume that any call
    5275             :          * context that could let get_fn_expr_variadic return true will have
    5276             :          * checked that a VARIADIC-labeled parameter actually is an array.  So
    5277             :          * it should be okay to just Assert that it's an array rather than
    5278             :          * doing a full-fledged error check.
    5279             :          */
    5280             :         Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
    5281             : 
    5282             :         /* OK, safe to fetch the array value */
    5283          12 :         arr = PG_GETARG_ARRAYTYPE_P(argidx);
    5284             : 
    5285             :         /*
    5286             :          * And serialize the array.  We tell array_to_text to ignore null
    5287             :          * elements, which matches the behavior of the loop below.
    5288             :          */
    5289          12 :         return array_to_text_internal(fcinfo, arr, sepstr, NULL);
    5290             :     }
    5291             : 
    5292             :     /* Normal case without explicit VARIADIC marker */
    5293          24 :     initStringInfo(&str);
    5294             : 
    5295             :     /* Get output function info, building it if first time through */
    5296          24 :     foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
    5297          24 :     if (foutcache == NULL)
    5298          24 :         foutcache = build_concat_foutcache(fcinfo, argidx);
    5299             : 
    5300         120 :     for (i = argidx; i < PG_NARGS(); i++)
    5301             :     {
    5302          96 :         if (!PG_ARGISNULL(i))
    5303             :         {
    5304          88 :             Datum       value = PG_GETARG_DATUM(i);
    5305             : 
    5306             :             /* add separator if appropriate */
    5307          88 :             if (first_arg)
    5308          24 :                 first_arg = false;
    5309             :             else
    5310          64 :                 appendStringInfoString(&str, sepstr);
    5311             : 
    5312             :             /* call the appropriate type output function, append the result */
    5313          88 :             appendStringInfoString(&str,
    5314          88 :                                    OutputFunctionCall(&foutcache[i], value));
    5315             :         }
    5316             :     }
    5317             : 
    5318          24 :     result = cstring_to_text_with_len(str.data, str.len);
    5319          24 :     pfree(str.data);
    5320             : 
    5321          24 :     return result;
    5322             : }
    5323             : 
    5324             : /*
    5325             :  * Concatenate all arguments. NULL arguments are ignored.
    5326             :  */
    5327             : Datum
    5328          20 : text_concat(PG_FUNCTION_ARGS)
    5329             : {
    5330             :     text       *result;
    5331             : 
    5332          20 :     result = concat_internal("", 0, fcinfo);
    5333          20 :     if (result == NULL)
    5334           4 :         PG_RETURN_NULL();
    5335          16 :     PG_RETURN_TEXT_P(result);
    5336             : }
    5337             : 
    5338             : /*
    5339             :  * Concatenate all but first argument value with separators. The first
    5340             :  * parameter is used as the separator. NULL arguments are ignored.
    5341             :  */
    5342             : Datum
    5343          28 : text_concat_ws(PG_FUNCTION_ARGS)
    5344             : {
    5345             :     char       *sep;
    5346             :     text       *result;
    5347             : 
    5348             :     /* return NULL when separator is NULL */
    5349          28 :     if (PG_ARGISNULL(0))
    5350           4 :         PG_RETURN_NULL();
    5351          24 :     sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
    5352             : 
    5353          24 :     result = concat_internal(sep, 1, fcinfo);
    5354          24 :     if (result == NULL)
    5355           4 :         PG_RETURN_NULL();
    5356          20 :     PG_RETURN_TEXT_P(result);
    5357             : }
    5358             : 
    5359             : /*
    5360             :  * Return first n characters in the string. When n is negative,
    5361             :  * return all but last |n| characters.
    5362             :  */
    5363             : Datum
    5364        1256 : text_left(PG_FUNCTION_ARGS)
    5365             : {
    5366        1256 :     int         n = PG_GETARG_INT32(1);
    5367             : 
    5368        1256 :     if (n < 0)
    5369             :     {
    5370          20 :         text       *str = PG_GETARG_TEXT_PP(0);
    5371          20 :         const char *p = VARDATA_ANY(str);
    5372          20 :         int         len = VARSIZE_ANY_EXHDR(str);
    5373             :         int         rlen;
    5374             : 
    5375          20 :         n = pg_mbstrlen_with_len(p, len) + n;
    5376          20 :         rlen = pg_mbcharcliplen(p, len, n);
    5377          20 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
    5378             :     }
    5379             :     else
    5380        1236 :         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
    5381             : }
    5382             : 
    5383             : /*
    5384             :  * Return last n characters in the string. When n is negative,
    5385             :  * return all but first |n| characters.
    5386             :  */
    5387             : Datum
    5388          44 : text_right(PG_FUNCTION_ARGS)
    5389             : {
    5390          44 :     text       *str = PG_GETARG_TEXT_PP(0);
    5391          44 :     const char *p = VARDATA_ANY(str);
    5392          44 :     int         len = VARSIZE_ANY_EXHDR(str);
    5393          44 :     int         n = PG_GETARG_INT32(1);
    5394             :     int         off;
    5395             : 
    5396          44 :     if (n < 0)
    5397          20 :         n = -n;
    5398             :     else
    5399          24 :         n = pg_mbstrlen_with_len(p, len) - n;
    5400          44 :     off = pg_mbcharcliplen(p, len, n);
    5401             : 
    5402          44 :     PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
    5403             : }
    5404             : 
    5405             : /*
    5406             :  * Return reversed string
    5407             :  */
    5408             : Datum
    5409           4 : text_reverse(PG_FUNCTION_ARGS)
    5410             : {
    5411           4 :     text       *str = PG_GETARG_TEXT_PP(0);
    5412           4 :     const char *p = VARDATA_ANY(str);
    5413           4 :     int         len = VARSIZE_ANY_EXHDR(str);
    5414           4 :     const char *endp = p + len;
    5415             :     text       *result;
    5416             :     char       *dst;
    5417             : 
    5418           4 :     result = palloc(len + VARHDRSZ);
    5419           4 :     dst = (char *) VARDATA(result) + len;
    5420           4 :     SET_VARSIZE(result, len + VARHDRSZ);
    5421             : 
    5422           4 :     if (pg_database_encoding_max_length() > 1)
    5423             :     {
    5424             :         /* multibyte version */
    5425          24 :         while (p < endp)
    5426             :         {
    5427             :             int         sz;
    5428             : 
    5429          20 :             sz = pg_mblen(p);
    5430          20 :             dst -= sz;
    5431          20 :             memcpy(dst, p, sz);
    5432          20 :             p += sz;
    5433             :         }
    5434             :     }
    5435             :     else
    5436             :     {
    5437             :         /* single byte version */
    5438           0 :         while (p < endp)
    5439           0 :             *(--dst) = *p++;
    5440             :     }
    5441             : 
    5442           4 :     PG_RETURN_TEXT_P(result);
    5443             : }
    5444             : 
    5445             : 
    5446             : /*
    5447             :  * Support macros for text_format()
    5448             :  */
    5449             : #define TEXT_FORMAT_FLAG_MINUS  0x0001  /* is minus flag present? */
    5450             : 
    5451             : #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
    5452             :     do { \
    5453             :         if (++(ptr) >= (end_ptr)) \
    5454             :             ereport(ERROR, \
    5455             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
    5456             :                      errmsg("unterminated format() type specifier"), \
    5457             :                      errhint("For a single \"%%\" use \"%%%%\"."))); \
    5458             :     } while (0)
    5459             : 
    5460             : /*
    5461             :  * Returns a formatted string
    5462             :  */
    5463             : Datum
    5464        8796 : text_format(PG_FUNCTION_ARGS)
    5465             : {
    5466             :     text       *fmt;
    5467             :     StringInfoData str;
    5468             :     const char *cp;
    5469             :     const char *start_ptr;
    5470             :     const char *end_ptr;
    5471             :     text       *result;
    5472             :     int         arg;
    5473             :     bool        funcvariadic;
    5474             :     int         nargs;
    5475        8796 :     Datum      *elements = NULL;
    5476        8796 :     bool       *nulls = NULL;
    5477        8796 :     Oid         element_type = InvalidOid;
    5478        8796 :     Oid         prev_type = InvalidOid;
    5479        8796 :     Oid         prev_width_type = InvalidOid;
    5480             :     FmgrInfo    typoutputfinfo;
    5481             :     FmgrInfo    typoutputinfo_width;
    5482             : 
    5483             :     /* When format string is null, immediately return null */
    5484        8796 :     if (PG_ARGISNULL(0))
    5485           4 :         PG_RETURN_NULL();
    5486             : 
    5487             :     /* If argument is marked VARIADIC, expand array into elements */
    5488        8792 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5489             :     {
    5490             :         ArrayType  *arr;
    5491             :         int16       elmlen;
    5492             :         bool        elmbyval;
    5493             :         char        elmalign;
    5494             :         int         nitems;
    5495             : 
    5496             :         /* Should have just the one argument */
    5497             :         Assert(PG_NARGS() == 2);
    5498             : 
    5499             :         /* If argument is NULL, we treat it as zero-length array */
    5500          32 :         if (PG_ARGISNULL(1))
    5501           4 :             nitems = 0;
    5502             :         else
    5503             :         {
    5504             :             /*
    5505             :              * Non-null argument had better be an array.  We assume that any
    5506             :              * call context that could let get_fn_expr_variadic return true
    5507             :              * will have checked that a VARIADIC-labeled parameter actually is
    5508             :              * an array.  So it should be okay to just Assert that it's an
    5509             :              * array rather than doing a full-fledged error check.
    5510             :              */
    5511             :             Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
    5512             : 
    5513             :             /* OK, safe to fetch the array value */
    5514          28 :             arr = PG_GETARG_ARRAYTYPE_P(1);
    5515             : 
    5516             :             /* Get info about array element type */
    5517          28 :             element_type = ARR_ELEMTYPE(arr);
    5518          28 :             get_typlenbyvalalign(element_type,
    5519             :                                  &elmlen, &elmbyval, &elmalign);
    5520             : 
    5521             :             /* Extract all array elements */
    5522          28 :             deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
    5523             :                               &elements, &nulls, &nitems);
    5524             :         }
    5525             : 
    5526          32 :         nargs = nitems + 1;
    5527          32 :         funcvariadic = true;
    5528             :     }
    5529             :     else
    5530             :     {
    5531             :         /* Non-variadic case, we'll process the arguments individually */
    5532        8760 :         nargs = PG_NARGS();
    5533        8760 :         funcvariadic = false;
    5534             :     }
    5535             : 
    5536             :     /* Setup for main loop. */
    5537        8792 :     fmt = PG_GETARG_TEXT_PP(0);
    5538        8792 :     start_ptr = VARDATA_ANY(fmt);
    5539        8792 :     end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
    5540        8792 :     initStringInfo(&str);
    5541        8792 :     arg = 1;                    /* next argument position to print */
    5542             : 
    5543             :     /* Scan format string, looking for conversion specifiers. */
    5544      256652 :     for (cp = start_ptr; cp < end_ptr; cp++)
    5545             :     {
    5546             :         int         argpos;
    5547             :         int         widthpos;
    5548             :         int         flags;
    5549             :         int         width;
    5550             :         Datum       value;
    5551             :         bool        isNull;
    5552             :         Oid         typid;
    5553             : 
    5554             :         /*
    5555             :          * If it's not the start of a conversion specifier, just copy it to
    5556             :          * the output buffer.
    5557             :          */
    5558      247900 :         if (*cp != '%')
    5559             :         {
    5560      229676 :             appendStringInfoCharMacro(&str, *cp);
    5561      229688 :             continue;
    5562             :         }
    5563             : 
    5564       18224 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5565             : 
    5566             :         /* Easy case: %% outputs a single % */
    5567       18224 :         if (*cp == '%')
    5568             :         {
    5569          12 :             appendStringInfoCharMacro(&str, *cp);
    5570          12 :             continue;
    5571             :         }
    5572             : 
    5573             :         /* Parse the optional portions of the format specifier */
    5574       18212 :         cp = text_format_parse_format(cp, end_ptr,
    5575             :                                       &argpos, &widthpos,
    5576             :                                       &flags, &width);
    5577             : 
    5578             :         /*
    5579             :          * Next we should see the main conversion specifier.  Whether or not
    5580             :          * an argument position was present, it's known that at least one
    5581             :          * character remains in the string at this point.  Experience suggests
    5582             :          * that it's worth checking that that character is one of the expected
    5583             :          * ones before we try to fetch arguments, so as to produce the least
    5584             :          * confusing response to a mis-formatted specifier.
    5585             :          */
    5586       18196 :         if (strchr("sIL", *cp) == NULL)
    5587           4 :             ereport(ERROR,
    5588             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5589             :                      errmsg("unrecognized format() type specifier \"%c\"",
    5590             :                             *cp),
    5591             :                      errhint("For a single \"%%\" use \"%%%%\".")));
    5592             : 
    5593             :         /* If indirect width was specified, get its value */
    5594       18192 :         if (widthpos >= 0)
    5595             :         {
    5596             :             /* Collect the specified or next argument position */
    5597          28 :             if (widthpos > 0)
    5598          24 :                 arg = widthpos;
    5599          28 :             if (arg >= nargs)
    5600           0 :                 ereport(ERROR,
    5601             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5602             :                          errmsg("too few arguments for format()")));
    5603             : 
    5604             :             /* Get the value and type of the selected argument */
    5605          28 :             if (!funcvariadic)
    5606             :             {
    5607          28 :                 value = PG_GETARG_DATUM(arg);
    5608          28 :                 isNull = PG_ARGISNULL(arg);
    5609          28 :                 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5610             :             }
    5611             :             else
    5612             :             {
    5613           0 :                 value = elements[arg - 1];
    5614           0 :                 isNull = nulls[arg - 1];
    5615           0 :                 typid = element_type;
    5616             :             }
    5617          28 :             if (!OidIsValid(typid))
    5618           0 :                 elog(ERROR, "could not determine data type of format() input");
    5619             : 
    5620          28 :             arg++;
    5621             : 
    5622             :             /* We can treat NULL width the same as zero */
    5623          28 :             if (isNull)
    5624           4 :                 width = 0;
    5625          24 :             else if (typid == INT4OID)
    5626          24 :                 width = DatumGetInt32(value);
    5627           0 :             else if (typid == INT2OID)
    5628           0 :                 width = DatumGetInt16(value);
    5629             :             else
    5630             :             {
    5631             :                 /* For less-usual datatypes, convert to text then to int */
    5632             :                 char       *str;
    5633             : 
    5634           0 :                 if (typid != prev_width_type)
    5635             :                 {
    5636             :                     Oid         typoutputfunc;
    5637             :                     bool        typIsVarlena;
    5638             : 
    5639           0 :                     getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5640           0 :                     fmgr_info(typoutputfunc, &typoutputinfo_width);
    5641           0 :                     prev_width_type = typid;
    5642             :                 }
    5643             : 
    5644           0 :                 str = OutputFunctionCall(&typoutputinfo_width, value);
    5645             : 
    5646             :                 /* pg_strtoint32 will complain about bad data or overflow */
    5647           0 :                 width = pg_strtoint32(str);
    5648             : 
    5649           0 :                 pfree(str);
    5650             :             }
    5651             :         }
    5652             : 
    5653             :         /* Collect the specified or next argument position */
    5654       18192 :         if (argpos > 0)
    5655          88 :             arg = argpos;
    5656       18192 :         if (arg >= nargs)
    5657          16 :             ereport(ERROR,
    5658             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5659             :                      errmsg("too few arguments for format()")));
    5660             : 
    5661             :         /* Get the value and type of the selected argument */
    5662       18176 :         if (!funcvariadic)
    5663             :         {
    5664       17328 :             value = PG_GETARG_DATUM(arg);
    5665       17328 :             isNull = PG_ARGISNULL(arg);
    5666       17328 :             typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5667             :         }
    5668             :         else
    5669             :         {
    5670         848 :             value = elements[arg - 1];
    5671         848 :             isNull = nulls[arg - 1];
    5672         848 :             typid = element_type;
    5673             :         }
    5674       18176 :         if (!OidIsValid(typid))
    5675           0 :             elog(ERROR, "could not determine data type of format() input");
    5676             : 
    5677       18176 :         arg++;
    5678             : 
    5679             :         /*
    5680             :          * Get the appropriate typOutput function, reusing previous one if
    5681             :          * same type as previous argument.  That's particularly useful in the
    5682             :          * variadic-array case, but often saves work even for ordinary calls.
    5683             :          */
    5684       18176 :         if (typid != prev_type)
    5685             :         {
    5686             :             Oid         typoutputfunc;
    5687             :             bool        typIsVarlena;
    5688             : 
    5689        9780 :             getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5690        9780 :             fmgr_info(typoutputfunc, &typoutputfinfo);
    5691        9780 :             prev_type = typid;
    5692             :         }
    5693             : 
    5694             :         /*
    5695             :          * And now we can format the value.
    5696             :          */
    5697       18176 :         switch (*cp)
    5698             :         {
    5699       18176 :             case 's':
    5700             :             case 'I':
    5701             :             case 'L':
    5702       18176 :                 text_format_string_conversion(&str, *cp, &typoutputfinfo,
    5703             :                                               value, isNull,
    5704             :                                               flags, width);
    5705       18172 :                 break;
    5706           0 :             default:
    5707             :                 /* should not get here, because of previous check */
    5708           0 :                 ereport(ERROR,
    5709             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5710             :                          errmsg("unrecognized format() type specifier \"%c\"",
    5711             :                                 *cp),
    5712             :                          errhint("For a single \"%%\" use \"%%%%\".")));
    5713             :                 break;
    5714             :         }
    5715             :     }
    5716             : 
    5717             :     /* Don't need deconstruct_array results anymore. */
    5718        8752 :     if (elements != NULL)
    5719          28 :         pfree(elements);
    5720        8752 :     if (nulls != NULL)
    5721          28 :         pfree(nulls);
    5722             : 
    5723             :     /* Generate results. */
    5724        8752 :     result = cstring_to_text_with_len(str.data, str.len);
    5725        8752 :     pfree(str.data);
    5726             : 
    5727        8752 :     PG_RETURN_TEXT_P(result);
    5728             : }
    5729             : 
    5730             : /*
    5731             :  * Parse contiguous digits as a decimal number.
    5732             :  *
    5733             :  * Returns true if some digits could be parsed.
    5734             :  * The value is returned into *value, and *ptr is advanced to the next
    5735             :  * character to be parsed.
    5736             :  *
    5737             :  * Note parsing invariant: at least one character is known available before
    5738             :  * string end (end_ptr) at entry, and this is still true at exit.
    5739             :  */
    5740             : static bool
    5741       36400 : text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
    5742             : {
    5743       36400 :     bool        found = false;
    5744       36400 :     const char *cp = *ptr;
    5745       36400 :     int         val = 0;
    5746             : 
    5747       36608 :     while (*cp >= '0' && *cp <= '9')
    5748             :     {
    5749         212 :         int8        digit = (*cp - '0');
    5750             : 
    5751         212 :         if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
    5752         212 :             unlikely(pg_add_s32_overflow(val, digit, &val)))
    5753           0 :             ereport(ERROR,
    5754             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    5755             :                      errmsg("number is out of range")));
    5756         212 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5757         208 :         found = true;
    5758             :     }
    5759             : 
    5760       36396 :     *ptr = cp;
    5761       36396 :     *value = val;
    5762             : 
    5763       36396 :     return found;
    5764             : }
    5765             : 
    5766             : /*
    5767             :  * Parse a format specifier (generally following the SUS printf spec).
    5768             :  *
    5769             :  * We have already advanced over the initial '%', and we are looking for
    5770             :  * [argpos][flags][width]type (but the type character is not consumed here).
    5771             :  *
    5772             :  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
    5773             :  * Output parameters:
    5774             :  *  argpos: argument position for value to be printed.  -1 means unspecified.
    5775             :  *  widthpos: argument position for width.  Zero means the argument position
    5776             :  *          was unspecified (ie, take the next arg) and -1 means no width
    5777             :  *          argument (width was omitted or specified as a constant).
    5778             :  *  flags: bitmask of flags.
    5779             :  *  width: directly-specified width value.  Zero means the width was omitted
    5780             :  *          (note it's not necessary to distinguish this case from an explicit
    5781             :  *          zero width value).
    5782             :  *
    5783             :  * The function result is the next character position to be parsed, ie, the
    5784             :  * location where the type character is/should be.
    5785             :  *
    5786             :  * Note parsing invariant: at least one character is known available before
    5787             :  * string end (end_ptr) at entry, and this is still true at exit.
    5788             :  */
    5789             : static const char *
    5790       18212 : text_format_parse_format(const char *start_ptr, const char *end_ptr,
    5791             :                          int *argpos, int *widthpos,
    5792             :                          int *flags, int *width)
    5793             : {
    5794       18212 :     const char *cp = start_ptr;
    5795             :     int         n;
    5796             : 
    5797             :     /* set defaults for output parameters */
    5798       18212 :     *argpos = -1;
    5799       18212 :     *widthpos = -1;
    5800       18212 :     *flags = 0;
    5801       18212 :     *width = 0;
    5802             : 
    5803             :     /* try to identify first number */
    5804       18212 :     if (text_format_parse_digits(&cp, end_ptr, &n))
    5805             :     {
    5806         116 :         if (*cp != '$')
    5807             :         {
    5808             :             /* Must be just a width and a type, so we're done */
    5809          16 :             *width = n;
    5810          16 :             return cp;
    5811             :         }
    5812             :         /* The number was argument position */
    5813         100 :         *argpos = n;
    5814             :         /* Explicit 0 for argument index is immediately refused */
    5815         100 :         if (n == 0)
    5816           4 :             ereport(ERROR,
    5817             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5818             :                      errmsg("format specifies argument 0, but arguments are numbered from 1")));
    5819          96 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5820             :     }
    5821             : 
    5822             :     /* Handle flags (only minus is supported now) */
    5823       18208 :     while (*cp == '-')
    5824             :     {
    5825          20 :         *flags |= TEXT_FORMAT_FLAG_MINUS;
    5826          20 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5827             :     }
    5828             : 
    5829       18188 :     if (*cp == '*')
    5830             :     {
    5831             :         /* Handle indirect width */
    5832          32 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5833          32 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    5834             :         {
    5835             :             /* number in this position must be closed by $ */
    5836          28 :             if (*cp != '$')
    5837           0 :                 ereport(ERROR,
    5838             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5839             :                          errmsg("width argument position must be ended by \"$\"")));
    5840             :             /* The number was width argument position */
    5841          28 :             *widthpos = n;
    5842             :             /* Explicit 0 for argument index is immediately refused */
    5843          28 :             if (n == 0)
    5844           4 :                 ereport(ERROR,
    5845             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5846             :                          errmsg("format specifies argument 0, but arguments are numbered from 1")));
    5847          24 :             ADVANCE_PARSE_POINTER(cp, end_ptr);
    5848             :         }
    5849             :         else
    5850           4 :             *widthpos = 0;      /* width's argument position is unspecified */
    5851             :     }
    5852             :     else
    5853             :     {
    5854             :         /* Check for direct width specification */
    5855       18156 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    5856          20 :             *width = n;
    5857             :     }
    5858             : 
    5859             :     /* cp should now be pointing at type character */
    5860       18180 :     return cp;
    5861             : }
    5862             : 
    5863             : /*
    5864             :  * Format a %s, %I, or %L conversion
    5865             :  */
    5866             : static void
    5867       18176 : text_format_string_conversion(StringInfo buf, char conversion,
    5868             :                               FmgrInfo *typOutputInfo,
    5869             :                               Datum value, bool isNull,
    5870             :                               int flags, int width)
    5871             : {
    5872             :     char       *str;
    5873             : 
    5874             :     /* Handle NULL arguments before trying to stringify the value. */
    5875       18176 :     if (isNull)
    5876             :     {
    5877          44 :         if (conversion == 's')
    5878          12 :             text_format_append_string(buf, "", flags, width);
    5879          32 :         else if (conversion == 'L')
    5880          28 :             text_format_append_string(buf, "NULL", flags, width);
    5881           4 :         else if (conversion == 'I')
    5882           4 :             ereport(ERROR,
    5883             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    5884             :                      errmsg("null values cannot be formatted as an SQL identifier")));
    5885          40 :         return;
    5886             :     }
    5887             : 
    5888             :     /* Stringify. */
    5889       18132 :     str = OutputFunctionCall(typOutputInfo, value);
    5890             : 
    5891             :     /* Escape. */
    5892       18132 :     if (conversion == 'I')
    5893             :     {
    5894             :         /* quote_identifier may or may not allocate a new string. */
    5895        1286 :         text_format_append_string(buf, quote_identifier(str), flags, width);
    5896             :     }
    5897       16846 :     else if (conversion == 'L')
    5898             :     {
    5899        1018 :         char       *qstr = quote_literal_cstr(str);
    5900             : 
    5901        1018 :         text_format_append_string(buf, qstr, flags, width);
    5902             :         /* quote_literal_cstr() always allocates a new string */
    5903        1018 :         pfree(qstr);
    5904             :     }
    5905             :     else
    5906       15828 :         text_format_append_string(buf, str, flags, width);
    5907             : 
    5908             :     /* Cleanup. */
    5909       18132 :     pfree(str);
    5910             : }
    5911             : 
    5912             : /*
    5913             :  * Append str to buf, padding as directed by flags/width
    5914             :  */
    5915             : static void
    5916       18172 : text_format_append_string(StringInfo buf, const char *str,
    5917             :                           int flags, int width)
    5918             : {
    5919       18172 :     bool        align_to_left = false;
    5920             :     int         len;
    5921             : 
    5922             :     /* fast path for typical easy case */
    5923       18172 :     if (width == 0)
    5924             :     {
    5925       18116 :         appendStringInfoString(buf, str);
    5926       18116 :         return;
    5927             :     }
    5928             : 
    5929          56 :     if (width < 0)
    5930             :     {
    5931             :         /* Negative width: implicit '-' flag, then take absolute value */
    5932           4 :         align_to_left = true;
    5933             :         /* -INT_MIN is undefined */
    5934           4 :         if (width <= INT_MIN)
    5935           0 :             ereport(ERROR,
    5936             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    5937             :                      errmsg("number is out of range")));
    5938           4 :         width = -width;
    5939             :     }
    5940          52 :     else if (flags & TEXT_FORMAT_FLAG_MINUS)
    5941          16 :         align_to_left = true;
    5942             : 
    5943          56 :     len = pg_mbstrlen(str);
    5944          56 :     if (align_to_left)
    5945             :     {
    5946             :         /* left justify */
    5947          20 :         appendStringInfoString(buf, str);
    5948          20 :         if (len < width)
    5949          20 :             appendStringInfoSpaces(buf, width - len);
    5950             :     }
    5951             :     else
    5952             :     {
    5953             :         /* right justify */
    5954          36 :         if (len < width)
    5955          36 :             appendStringInfoSpaces(buf, width - len);
    5956          36 :         appendStringInfoString(buf, str);
    5957             :     }
    5958             : }
    5959             : 
    5960             : /*
    5961             :  * text_format_nv - nonvariadic wrapper for text_format function.
    5962             :  *
    5963             :  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
    5964             :  * which checks that all built-in functions that share the implementing C
    5965             :  * function take the same number of arguments.
    5966             :  */
    5967             : Datum
    5968          20 : text_format_nv(PG_FUNCTION_ARGS)
    5969             : {
    5970          20 :     return text_format(fcinfo);
    5971             : }
    5972             : 
    5973             : /*
    5974             :  * Helper function for Levenshtein distance functions. Faster than memcmp(),
    5975             :  * for this use case.
    5976             :  */
    5977             : static inline bool
    5978           0 : rest_of_char_same(const char *s1, const char *s2, int len)
    5979             : {
    5980           0 :     while (len > 0)
    5981             :     {
    5982           0 :         len--;
    5983           0 :         if (s1[len] != s2[len])
    5984           0 :             return false;
    5985             :     }
    5986           0 :     return true;
    5987             : }
    5988             : 
    5989             : /* Expand each Levenshtein distance variant */
    5990             : #include "levenshtein.c"
    5991             : #define LEVENSHTEIN_LESS_EQUAL
    5992             : #include "levenshtein.c"
    5993             : 
    5994             : 
    5995             : /*
    5996             :  * Unicode support
    5997             :  */
    5998             : 
    5999             : static UnicodeNormalizationForm
    6000         104 : unicode_norm_form_from_string(const char *formstr)
    6001             : {
    6002         104 :     UnicodeNormalizationForm form = -1;
    6003             : 
    6004             :     /*
    6005             :      * Might as well check this while we're here.
    6006             :      */
    6007         104 :     if (GetDatabaseEncoding() != PG_UTF8)
    6008           0 :         ereport(ERROR,
    6009             :                 (errcode(ERRCODE_SYNTAX_ERROR),
    6010             :                  errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
    6011             : 
    6012         104 :     if (pg_strcasecmp(formstr, "NFC") == 0)
    6013          36 :         form = UNICODE_NFC;
    6014          68 :     else if (pg_strcasecmp(formstr, "NFD") == 0)
    6015          20 :         form = UNICODE_NFD;
    6016          48 :     else if (pg_strcasecmp(formstr, "NFKC") == 0)
    6017          20 :         form = UNICODE_NFKC;
    6018          28 :     else if (pg_strcasecmp(formstr, "NFKD") == 0)
    6019          20 :         form = UNICODE_NFKD;
    6020             :     else
    6021           8 :         ereport(ERROR,
    6022             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6023             :                  errmsg("invalid normalization form: %s", formstr)));
    6024             : 
    6025          96 :     return form;
    6026             : }
    6027             : 
    6028             : Datum
    6029          28 : unicode_normalize_func(PG_FUNCTION_ARGS)
    6030             : {
    6031          28 :     text       *input = PG_GETARG_TEXT_PP(0);
    6032          28 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
    6033             :     UnicodeNormalizationForm form;
    6034             :     int         size;
    6035             :     pg_wchar   *input_chars;
    6036             :     pg_wchar   *output_chars;
    6037             :     unsigned char *p;
    6038             :     text       *result;
    6039             :     int         i;
    6040             : 
    6041          28 :     form = unicode_norm_form_from_string(formstr);
    6042             : 
    6043             :     /* convert to pg_wchar */
    6044          24 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6045          24 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
    6046          24 :     p = (unsigned char *) VARDATA_ANY(input);
    6047         108 :     for (i = 0; i < size; i++)
    6048             :     {
    6049          84 :         input_chars[i] = utf8_to_unicode(p);
    6050          84 :         p += pg_utf_mblen(p);
    6051             :     }
    6052          24 :     input_chars[i] = (pg_wchar) '\0';
    6053             :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
    6054             : 
    6055             :     /* action */
    6056          24 :     output_chars = unicode_normalize(form, input_chars);
    6057             : 
    6058             :     /* convert back to UTF-8 string */
    6059          24 :     size = 0;
    6060         104 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6061             :     {
    6062             :         unsigned char buf[4];
    6063             : 
    6064          80 :         unicode_to_utf8(*wp, buf);
    6065          80 :         size += pg_utf_mblen(buf);
    6066             :     }
    6067             : 
    6068          24 :     result = palloc(size + VARHDRSZ);
    6069          24 :     SET_VARSIZE(result, size + VARHDRSZ);
    6070             : 
    6071          24 :     p = (unsigned char *) VARDATA_ANY(result);
    6072         104 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6073             :     {
    6074          80 :         unicode_to_utf8(*wp, p);
    6075          80 :         p += pg_utf_mblen(p);
    6076             :     }
    6077             :     Assert((char *) p == (char *) result + size + VARHDRSZ);
    6078             : 
    6079          24 :     PG_RETURN_TEXT_P(result);
    6080             : }
    6081             : 
    6082             : /*
    6083             :  * Check whether the string is in the specified Unicode normalization form.
    6084             :  *
    6085             :  * This is done by convering the string to the specified normal form and then
    6086             :  * comparing that to the original string.  To speed that up, we also apply the
    6087             :  * "quick check" algorithm specified in UAX #15, which can give a yes or no
    6088             :  * answer for many strings by just scanning the string once.
    6089             :  *
    6090             :  * This function should generally be optimized for the case where the string
    6091             :  * is in fact normalized.  In that case, we'll end up looking at the entire
    6092             :  * string, so it's probably not worth doing any incremental conversion etc.
    6093             :  */
    6094             : Datum
    6095          76 : unicode_is_normalized(PG_FUNCTION_ARGS)
    6096             : {
    6097          76 :     text       *input = PG_GETARG_TEXT_PP(0);
    6098          76 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
    6099             :     UnicodeNormalizationForm form;
    6100             :     int         size;
    6101             :     pg_wchar   *input_chars;
    6102             :     pg_wchar   *output_chars;
    6103             :     unsigned char *p;
    6104             :     int         i;
    6105             :     UnicodeNormalizationQC quickcheck;
    6106             :     int         output_size;
    6107             :     bool        result;
    6108             : 
    6109          76 :     form = unicode_norm_form_from_string(formstr);
    6110             : 
    6111             :     /* convert to pg_wchar */
    6112          72 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6113          72 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
    6114          72 :     p = (unsigned char *) VARDATA_ANY(input);
    6115         320 :     for (i = 0; i < size; i++)
    6116             :     {
    6117         248 :         input_chars[i] = utf8_to_unicode(p);
    6118         248 :         p += pg_utf_mblen(p);
    6119             :     }
    6120          72 :     input_chars[i] = (pg_wchar) '\0';
    6121             :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
    6122             : 
    6123             :     /* quick check (see UAX #15) */
    6124          72 :     quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
    6125          72 :     if (quickcheck == UNICODE_NORM_QC_YES)
    6126          20 :         PG_RETURN_BOOL(true);
    6127          52 :     else if (quickcheck == UNICODE_NORM_QC_NO)
    6128           8 :         PG_RETURN_BOOL(false);
    6129             : 
    6130             :     /* normalize and compare with original */
    6131          44 :     output_chars = unicode_normalize(form, input_chars);
    6132             : 
    6133          44 :     output_size = 0;
    6134         208 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6135         164 :         output_size++;
    6136             : 
    6137          60 :     result = (size == output_size) &&
    6138          16 :         (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
    6139             : 
    6140          44 :     PG_RETURN_BOOL(result);
    6141             : }

Generated by: LCOV version 1.13