LCOV - code coverage report
Current view: top level - src/backend/utils/adt - varlena.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 2072 2282 90.8 %
Date: 2025-04-01 14:15:22 Functions: 169 181 93.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * varlena.c
       4             :  *    Functions for the variable-length built-in types.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/utils/adt/varlena.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <ctype.h>
      18             : #include <limits.h>
      19             : 
      20             : #include "access/detoast.h"
      21             : #include "access/toast_compression.h"
      22             : #include "catalog/pg_collation.h"
      23             : #include "catalog/pg_type.h"
      24             : #include "common/hashfn.h"
      25             : #include "common/int.h"
      26             : #include "common/unicode_category.h"
      27             : #include "common/unicode_norm.h"
      28             : #include "common/unicode_version.h"
      29             : #include "funcapi.h"
      30             : #include "lib/hyperloglog.h"
      31             : #include "libpq/pqformat.h"
      32             : #include "miscadmin.h"
      33             : #include "nodes/execnodes.h"
      34             : #include "parser/scansup.h"
      35             : #include "port/pg_bswap.h"
      36             : #include "regex/regex.h"
      37             : #include "utils/builtins.h"
      38             : #include "utils/bytea.h"
      39             : #include "utils/guc.h"
      40             : #include "utils/lsyscache.h"
      41             : #include "utils/memutils.h"
      42             : #include "utils/pg_locale.h"
      43             : #include "utils/sortsupport.h"
      44             : #include "utils/varlena.h"
      45             : 
      46             : 
      47             : /* GUC variable */
      48             : int         bytea_output = BYTEA_OUTPUT_HEX;
      49             : 
      50             : typedef struct varlena VarString;
      51             : 
      52             : /*
      53             :  * State for text_position_* functions.
      54             :  */
      55             : typedef struct
      56             : {
      57             :     pg_locale_t locale;         /* collation used for substring matching */
      58             :     bool        is_multibyte_char_in_char;  /* need to check char boundaries? */
      59             :     bool        greedy;         /* find longest possible substring? */
      60             : 
      61             :     char       *str1;           /* haystack string */
      62             :     char       *str2;           /* needle string */
      63             :     int         len1;           /* string lengths in bytes */
      64             :     int         len2;
      65             : 
      66             :     /* Skip table for Boyer-Moore-Horspool search algorithm: */
      67             :     int         skiptablemask;  /* mask for ANDing with skiptable subscripts */
      68             :     int         skiptable[256]; /* skip distance for given mismatched char */
      69             : 
      70             :     /*
      71             :      * Note that with nondeterministic collations, the length of the last
      72             :      * match is not necessarily equal to the length of the "needle" passed in.
      73             :      */
      74             :     char       *last_match;     /* pointer to last match in 'str1' */
      75             :     int         last_match_len; /* length of last match */
      76             :     int         last_match_len_tmp; /* same but for internal use */
      77             : 
      78             :     /*
      79             :      * Sometimes we need to convert the byte position of a match to a
      80             :      * character position.  These store the last position that was converted,
      81             :      * so that on the next call, we can continue from that point, rather than
      82             :      * count characters from the very beginning.
      83             :      */
      84             :     char       *refpoint;       /* pointer within original haystack string */
      85             :     int         refpos;         /* 0-based character offset of the same point */
      86             : } TextPositionState;
      87             : 
      88             : typedef struct
      89             : {
      90             :     char       *buf1;           /* 1st string, or abbreviation original string
      91             :                                  * buf */
      92             :     char       *buf2;           /* 2nd string, or abbreviation strxfrm() buf */
      93             :     int         buflen1;        /* Allocated length of buf1 */
      94             :     int         buflen2;        /* Allocated length of buf2 */
      95             :     int         last_len1;      /* Length of last buf1 string/strxfrm() input */
      96             :     int         last_len2;      /* Length of last buf2 string/strxfrm() blob */
      97             :     int         last_returned;  /* Last comparison result (cache) */
      98             :     bool        cache_blob;     /* Does buf2 contain strxfrm() blob, etc? */
      99             :     bool        collate_c;
     100             :     Oid         typid;          /* Actual datatype (text/bpchar/bytea/name) */
     101             :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
     102             :     hyperLogLogState full_card; /* Full key cardinality state */
     103             :     double      prop_card;      /* Required cardinality proportion */
     104             :     pg_locale_t locale;
     105             : } VarStringSortSupport;
     106             : 
     107             : /*
     108             :  * Output data for split_text(): we output either to an array or a table.
     109             :  * tupstore and tupdesc must be set up in advance to output to a table.
     110             :  */
     111             : typedef struct
     112             : {
     113             :     ArrayBuildState *astate;
     114             :     Tuplestorestate *tupstore;
     115             :     TupleDesc   tupdesc;
     116             : } SplitTextOutputData;
     117             : 
     118             : /*
     119             :  * This should be large enough that most strings will fit, but small enough
     120             :  * that we feel comfortable putting it on the stack
     121             :  */
     122             : #define TEXTBUFLEN      1024
     123             : 
     124             : #define DatumGetVarStringP(X)       ((VarString *) PG_DETOAST_DATUM(X))
     125             : #define DatumGetVarStringPP(X)      ((VarString *) PG_DETOAST_DATUM_PACKED(X))
     126             : 
     127             : static int  varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
     128             : static int  bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
     129             : static int  namefastcmp_c(Datum x, Datum y, SortSupport ssup);
     130             : static int  varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
     131             : static int  namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
     132             : static int  varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
     133             : static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
     134             : static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
     135             : static int32 text_length(Datum str);
     136             : static text *text_catenate(text *t1, text *t2);
     137             : static text *text_substring(Datum str,
     138             :                             int32 start,
     139             :                             int32 length,
     140             :                             bool length_not_specified);
     141             : static text *text_overlay(text *t1, text *t2, int sp, int sl);
     142             : static int  text_position(text *t1, text *t2, Oid collid);
     143             : static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
     144             : static bool text_position_next(TextPositionState *state);
     145             : static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
     146             : static char *text_position_get_match_ptr(TextPositionState *state);
     147             : static int  text_position_get_match_pos(TextPositionState *state);
     148             : static void text_position_cleanup(TextPositionState *state);
     149             : static void check_collation_set(Oid collid);
     150             : static int  text_cmp(text *arg1, text *arg2, Oid collid);
     151             : static bytea *bytea_catenate(bytea *t1, bytea *t2);
     152             : static bytea *bytea_substring(Datum str,
     153             :                               int S,
     154             :                               int L,
     155             :                               bool length_not_specified);
     156             : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
     157             : static void appendStringInfoText(StringInfo str, const text *t);
     158             : static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
     159             : static void split_text_accum_result(SplitTextOutputData *tstate,
     160             :                                     text *field_value,
     161             :                                     text *null_string,
     162             :                                     Oid collation);
     163             : static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
     164             :                                     const char *fldsep, const char *null_string);
     165             : static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
     166             : static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
     167             :                                      int *value);
     168             : static const char *text_format_parse_format(const char *start_ptr,
     169             :                                             const char *end_ptr,
     170             :                                             int *argpos, int *widthpos,
     171             :                                             int *flags, int *width);
     172             : static void text_format_string_conversion(StringInfo buf, char conversion,
     173             :                                           FmgrInfo *typOutputInfo,
     174             :                                           Datum value, bool isNull,
     175             :                                           int flags, int width);
     176             : static void text_format_append_string(StringInfo buf, const char *str,
     177             :                                       int flags, int width);
     178             : 
     179             : 
     180             : /*****************************************************************************
     181             :  *   CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                          *
     182             :  *****************************************************************************/
     183             : 
     184             : /*
     185             :  * cstring_to_text
     186             :  *
     187             :  * Create a text value from a null-terminated C string.
     188             :  *
     189             :  * The new text value is freshly palloc'd with a full-size VARHDR.
     190             :  */
     191             : text *
     192    24706598 : cstring_to_text(const char *s)
     193             : {
     194    24706598 :     return cstring_to_text_with_len(s, strlen(s));
     195             : }
     196             : 
     197             : /*
     198             :  * cstring_to_text_with_len
     199             :  *
     200             :  * Same as cstring_to_text except the caller specifies the string length;
     201             :  * the string need not be null_terminated.
     202             :  */
     203             : text *
     204    27357610 : cstring_to_text_with_len(const char *s, int len)
     205             : {
     206    27357610 :     text       *result = (text *) palloc(len + VARHDRSZ);
     207             : 
     208    27357610 :     SET_VARSIZE(result, len + VARHDRSZ);
     209    27357610 :     memcpy(VARDATA(result), s, len);
     210             : 
     211    27357610 :     return result;
     212             : }
     213             : 
     214             : /*
     215             :  * text_to_cstring
     216             :  *
     217             :  * Create a palloc'd, null-terminated C string from a text value.
     218             :  *
     219             :  * We support being passed a compressed or toasted text value.
     220             :  * This is a bit bogus since such values shouldn't really be referred to as
     221             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     222             :  * case here, we'd need another routine that did, anyway.
     223             :  */
     224             : char *
     225    16110304 : text_to_cstring(const text *t)
     226             : {
     227             :     /* must cast away the const, unfortunately */
     228    16110304 :     text       *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
     229    16110304 :     int         len = VARSIZE_ANY_EXHDR(tunpacked);
     230             :     char       *result;
     231             : 
     232    16110304 :     result = (char *) palloc(len + 1);
     233    16110304 :     memcpy(result, VARDATA_ANY(tunpacked), len);
     234    16110304 :     result[len] = '\0';
     235             : 
     236    16110304 :     if (tunpacked != t)
     237       42986 :         pfree(tunpacked);
     238             : 
     239    16110304 :     return result;
     240             : }
     241             : 
     242             : /*
     243             :  * text_to_cstring_buffer
     244             :  *
     245             :  * Copy a text value into a caller-supplied buffer of size dst_len.
     246             :  *
     247             :  * The text string is truncated if necessary to fit.  The result is
     248             :  * guaranteed null-terminated (unless dst_len == 0).
     249             :  *
     250             :  * We support being passed a compressed or toasted text value.
     251             :  * This is a bit bogus since such values shouldn't really be referred to as
     252             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     253             :  * case here, we'd need another routine that did, anyway.
     254             :  */
     255             : void
     256         978 : text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
     257             : {
     258             :     /* must cast away the const, unfortunately */
     259         978 :     text       *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
     260         978 :     size_t      src_len = VARSIZE_ANY_EXHDR(srcunpacked);
     261             : 
     262         978 :     if (dst_len > 0)
     263             :     {
     264         978 :         dst_len--;
     265         978 :         if (dst_len >= src_len)
     266         978 :             dst_len = src_len;
     267             :         else                    /* ensure truncation is encoding-safe */
     268           0 :             dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
     269         978 :         memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
     270         978 :         dst[dst_len] = '\0';
     271             :     }
     272             : 
     273         978 :     if (srcunpacked != src)
     274           0 :         pfree(srcunpacked);
     275         978 : }
     276             : 
     277             : 
     278             : /*****************************************************************************
     279             :  *   USER I/O ROUTINES                                                       *
     280             :  *****************************************************************************/
     281             : 
     282             : 
     283             : #define VAL(CH)         ((CH) - '0')
     284             : #define DIG(VAL)        ((VAL) + '0')
     285             : 
     286             : /*
     287             :  *      byteain         - converts from printable representation of byte array
     288             :  *
     289             :  *      Non-printable characters must be passed as '\nnn' (octal) and are
     290             :  *      converted to internal form.  '\' must be passed as '\\'.
     291             :  *      ereport(ERROR, ...) if bad form.
     292             :  *
     293             :  *      BUGS:
     294             :  *              The input is scanned twice.
     295             :  *              The error checking of input is minimal.
     296             :  */
     297             : Datum
     298     1385964 : byteain(PG_FUNCTION_ARGS)
     299             : {
     300     1385964 :     char       *inputText = PG_GETARG_CSTRING(0);
     301     1385964 :     Node       *escontext = fcinfo->context;
     302             :     char       *tp;
     303             :     char       *rp;
     304             :     int         bc;
     305             :     bytea      *result;
     306             : 
     307             :     /* Recognize hex input */
     308     1385964 :     if (inputText[0] == '\\' && inputText[1] == 'x')
     309             :     {
     310      111308 :         size_t      len = strlen(inputText);
     311             : 
     312      111308 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
     313      111308 :         result = palloc(bc);
     314      111308 :         bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
     315             :                              escontext);
     316      111296 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
     317             : 
     318      111296 :         PG_RETURN_BYTEA_P(result);
     319             :     }
     320             : 
     321             :     /* Else, it's the traditional escaped style */
     322     9563988 :     for (bc = 0, tp = inputText; *tp != '\0'; bc++)
     323             :     {
     324     8289344 :         if (tp[0] != '\\')
     325     8288326 :             tp++;
     326        1018 :         else if ((tp[0] == '\\') &&
     327        1018 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     328        1006 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     329        1006 :                  (tp[3] >= '0' && tp[3] <= '7'))
     330        1006 :             tp += 4;
     331          12 :         else if ((tp[0] == '\\') &&
     332          12 :                  (tp[1] == '\\'))
     333           0 :             tp += 2;
     334             :         else
     335             :         {
     336             :             /*
     337             :              * one backslash, not followed by another or ### valid octal
     338             :              */
     339          12 :             ereturn(escontext, (Datum) 0,
     340             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     341             :                      errmsg("invalid input syntax for type %s", "bytea")));
     342             :         }
     343             :     }
     344             : 
     345     1274644 :     bc += VARHDRSZ;
     346             : 
     347     1274644 :     result = (bytea *) palloc(bc);
     348     1274644 :     SET_VARSIZE(result, bc);
     349             : 
     350     1274644 :     tp = inputText;
     351     1274644 :     rp = VARDATA(result);
     352     9563946 :     while (*tp != '\0')
     353             :     {
     354     8289302 :         if (tp[0] != '\\')
     355     8288296 :             *rp++ = *tp++;
     356        1006 :         else if ((tp[0] == '\\') &&
     357        1006 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     358        1006 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     359        1006 :                  (tp[3] >= '0' && tp[3] <= '7'))
     360             :         {
     361        1006 :             bc = VAL(tp[1]);
     362        1006 :             bc <<= 3;
     363        1006 :             bc += VAL(tp[2]);
     364        1006 :             bc <<= 3;
     365        1006 :             *rp++ = bc + VAL(tp[3]);
     366             : 
     367        1006 :             tp += 4;
     368             :         }
     369           0 :         else if ((tp[0] == '\\') &&
     370           0 :                  (tp[1] == '\\'))
     371             :         {
     372           0 :             *rp++ = '\\';
     373           0 :             tp += 2;
     374             :         }
     375             :         else
     376             :         {
     377             :             /*
     378             :              * We should never get here. The first pass should not allow it.
     379             :              */
     380           0 :             ereturn(escontext, (Datum) 0,
     381             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     382             :                      errmsg("invalid input syntax for type %s", "bytea")));
     383             :         }
     384             :     }
     385             : 
     386     1274644 :     PG_RETURN_BYTEA_P(result);
     387             : }
     388             : 
     389             : /*
     390             :  *      byteaout        - converts to printable representation of byte array
     391             :  *
     392             :  *      In the traditional escaped format, non-printable characters are
     393             :  *      printed as '\nnn' (octal) and '\' as '\\'.
     394             :  */
     395             : Datum
     396      559590 : byteaout(PG_FUNCTION_ARGS)
     397             : {
     398      559590 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
     399             :     char       *result;
     400             :     char       *rp;
     401             : 
     402      559590 :     if (bytea_output == BYTEA_OUTPUT_HEX)
     403             :     {
     404             :         /* Print hex format */
     405      559206 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
     406      559206 :         *rp++ = '\\';
     407      559206 :         *rp++ = 'x';
     408      559206 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
     409             :     }
     410         384 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
     411             :     {
     412             :         /* Print traditional escaped format */
     413             :         char       *vp;
     414             :         uint64      len;
     415             :         int         i;
     416             : 
     417         384 :         len = 1;                /* empty string has 1 char */
     418         384 :         vp = VARDATA_ANY(vlena);
     419      217660 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     420             :         {
     421      217276 :             if (*vp == '\\')
     422           0 :                 len += 2;
     423      217276 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     424         498 :                 len += 4;
     425             :             else
     426      216778 :                 len++;
     427             :         }
     428             : 
     429             :         /*
     430             :          * In principle len can't overflow uint32 if the input fit in 1GB, but
     431             :          * for safety let's check rather than relying on palloc's internal
     432             :          * check.
     433             :          */
     434         384 :         if (len > MaxAllocSize)
     435           0 :             ereport(ERROR,
     436             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     437             :                      errmsg_internal("result of bytea output conversion is too large")));
     438         384 :         rp = result = (char *) palloc(len);
     439             : 
     440         384 :         vp = VARDATA_ANY(vlena);
     441      217660 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     442             :         {
     443      217276 :             if (*vp == '\\')
     444             :             {
     445           0 :                 *rp++ = '\\';
     446           0 :                 *rp++ = '\\';
     447             :             }
     448      217276 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     449         498 :             {
     450             :                 int         val;    /* holds unprintable chars */
     451             : 
     452         498 :                 val = *vp;
     453         498 :                 rp[0] = '\\';
     454         498 :                 rp[3] = DIG(val & 07);
     455         498 :                 val >>= 3;
     456         498 :                 rp[2] = DIG(val & 07);
     457         498 :                 val >>= 3;
     458         498 :                 rp[1] = DIG(val & 03);
     459         498 :                 rp += 4;
     460             :             }
     461             :             else
     462      216778 :                 *rp++ = *vp;
     463             :         }
     464             :     }
     465             :     else
     466             :     {
     467           0 :         elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
     468             :              bytea_output);
     469             :         rp = result = NULL;     /* keep compiler quiet */
     470             :     }
     471      559590 :     *rp = '\0';
     472      559590 :     PG_RETURN_CSTRING(result);
     473             : }
     474             : 
     475             : /*
     476             :  *      bytearecv           - converts external binary format to bytea
     477             :  */
     478             : Datum
     479      107710 : bytearecv(PG_FUNCTION_ARGS)
     480             : {
     481      107710 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     482             :     bytea      *result;
     483             :     int         nbytes;
     484             : 
     485      107710 :     nbytes = buf->len - buf->cursor;
     486      107710 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
     487      107710 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
     488      107710 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
     489      107710 :     PG_RETURN_BYTEA_P(result);
     490             : }
     491             : 
     492             : /*
     493             :  *      byteasend           - converts bytea to binary format
     494             :  *
     495             :  * This is a special case: just copy the input...
     496             :  */
     497             : Datum
     498       68968 : byteasend(PG_FUNCTION_ARGS)
     499             : {
     500       68968 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
     501             : 
     502       68968 :     PG_RETURN_BYTEA_P(vlena);
     503             : }
     504             : 
     505             : Datum
     506      258774 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
     507             : {
     508             :     StringInfo  state;
     509             : 
     510      258774 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     511             : 
     512             :     /* Append the value unless null, preceding it with the delimiter. */
     513      258774 :     if (!PG_ARGISNULL(1))
     514             :     {
     515      243774 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
     516      243774 :         bool        isfirst = false;
     517             : 
     518             :         /*
     519             :          * You might think we can just throw away the first delimiter, however
     520             :          * we must keep it as we may be a parallel worker doing partial
     521             :          * aggregation building a state to send to the main process.  We need
     522             :          * to keep the delimiter of every aggregation so that the combine
     523             :          * function can properly join up the strings of two separately
     524             :          * partially aggregated results.  The first delimiter is only stripped
     525             :          * off in the final function.  To know how much to strip off the front
     526             :          * of the string, we store the length of the first delimiter in the
     527             :          * StringInfo's cursor field, which we don't otherwise need here.
     528             :          */
     529      243774 :         if (state == NULL)
     530             :         {
     531         148 :             state = makeStringAggState(fcinfo);
     532         148 :             isfirst = true;
     533             :         }
     534             : 
     535      243774 :         if (!PG_ARGISNULL(2))
     536             :         {
     537      243762 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
     538             : 
     539      243762 :             appendBinaryStringInfo(state, VARDATA_ANY(delim),
     540      243762 :                                    VARSIZE_ANY_EXHDR(delim));
     541      243762 :             if (isfirst)
     542         142 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
     543             :         }
     544             : 
     545      243774 :         appendBinaryStringInfo(state, VARDATA_ANY(value),
     546      243774 :                                VARSIZE_ANY_EXHDR(value));
     547             :     }
     548             : 
     549             :     /*
     550             :      * The transition type for string_agg() is declared to be "internal",
     551             :      * which is a pass-by-value type the same size as a pointer.
     552             :      */
     553      258774 :     if (state)
     554      258738 :         PG_RETURN_POINTER(state);
     555          36 :     PG_RETURN_NULL();
     556             : }
     557             : 
     558             : Datum
     559         154 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
     560             : {
     561             :     StringInfo  state;
     562             : 
     563             :     /* cannot be called directly because of internal-type argument */
     564             :     Assert(AggCheckCallContext(fcinfo, NULL));
     565             : 
     566         154 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     567             : 
     568         154 :     if (state != NULL)
     569             :     {
     570             :         /* As per comment in transfn, strip data before the cursor position */
     571             :         bytea      *result;
     572         148 :         int         strippedlen = state->len - state->cursor;
     573             : 
     574         148 :         result = (bytea *) palloc(strippedlen + VARHDRSZ);
     575         148 :         SET_VARSIZE(result, strippedlen + VARHDRSZ);
     576         148 :         memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
     577         148 :         PG_RETURN_BYTEA_P(result);
     578             :     }
     579             :     else
     580           6 :         PG_RETURN_NULL();
     581             : }
     582             : 
     583             : /*
     584             :  *      textin          - converts cstring to internal representation
     585             :  */
     586             : Datum
     587    21519474 : textin(PG_FUNCTION_ARGS)
     588             : {
     589    21519474 :     char       *inputText = PG_GETARG_CSTRING(0);
     590             : 
     591    21519474 :     PG_RETURN_TEXT_P(cstring_to_text(inputText));
     592             : }
     593             : 
     594             : /*
     595             :  *      textout         - converts internal representation to cstring
     596             :  */
     597             : Datum
     598     8073976 : textout(PG_FUNCTION_ARGS)
     599             : {
     600     8073976 :     Datum       txt = PG_GETARG_DATUM(0);
     601             : 
     602     8073976 :     PG_RETURN_CSTRING(TextDatumGetCString(txt));
     603             : }
     604             : 
     605             : /*
     606             :  *      textrecv            - converts external binary format to text
     607             :  */
     608             : Datum
     609          48 : textrecv(PG_FUNCTION_ARGS)
     610             : {
     611          48 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     612             :     text       *result;
     613             :     char       *str;
     614             :     int         nbytes;
     615             : 
     616          48 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     617             : 
     618          48 :     result = cstring_to_text_with_len(str, nbytes);
     619          48 :     pfree(str);
     620          48 :     PG_RETURN_TEXT_P(result);
     621             : }
     622             : 
     623             : /*
     624             :  *      textsend            - converts text to binary format
     625             :  */
     626             : Datum
     627        4902 : textsend(PG_FUNCTION_ARGS)
     628             : {
     629        4902 :     text       *t = PG_GETARG_TEXT_PP(0);
     630             :     StringInfoData buf;
     631             : 
     632        4902 :     pq_begintypsend(&buf);
     633        4902 :     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
     634        4902 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     635             : }
     636             : 
     637             : 
     638             : /*
     639             :  *      unknownin           - converts cstring to internal representation
     640             :  */
     641             : Datum
     642           0 : unknownin(PG_FUNCTION_ARGS)
     643             : {
     644           0 :     char       *str = PG_GETARG_CSTRING(0);
     645             : 
     646             :     /* representation is same as cstring */
     647           0 :     PG_RETURN_CSTRING(pstrdup(str));
     648             : }
     649             : 
     650             : /*
     651             :  *      unknownout          - converts internal representation to cstring
     652             :  */
     653             : Datum
     654         940 : unknownout(PG_FUNCTION_ARGS)
     655             : {
     656             :     /* representation is same as cstring */
     657         940 :     char       *str = PG_GETARG_CSTRING(0);
     658             : 
     659         940 :     PG_RETURN_CSTRING(pstrdup(str));
     660             : }
     661             : 
     662             : /*
     663             :  *      unknownrecv         - converts external binary format to unknown
     664             :  */
     665             : Datum
     666           0 : unknownrecv(PG_FUNCTION_ARGS)
     667             : {
     668           0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     669             :     char       *str;
     670             :     int         nbytes;
     671             : 
     672           0 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     673             :     /* representation is same as cstring */
     674           0 :     PG_RETURN_CSTRING(str);
     675             : }
     676             : 
     677             : /*
     678             :  *      unknownsend         - converts unknown to binary format
     679             :  */
     680             : Datum
     681           0 : unknownsend(PG_FUNCTION_ARGS)
     682             : {
     683             :     /* representation is same as cstring */
     684           0 :     char       *str = PG_GETARG_CSTRING(0);
     685             :     StringInfoData buf;
     686             : 
     687           0 :     pq_begintypsend(&buf);
     688           0 :     pq_sendtext(&buf, str, strlen(str));
     689           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     690             : }
     691             : 
     692             : 
     693             : /* ========== PUBLIC ROUTINES ========== */
     694             : 
     695             : /*
     696             :  * textlen -
     697             :  *    returns the logical length of a text*
     698             :  *     (which is less than the VARSIZE of the text*)
     699             :  */
     700             : Datum
     701      430748 : textlen(PG_FUNCTION_ARGS)
     702             : {
     703      430748 :     Datum       str = PG_GETARG_DATUM(0);
     704             : 
     705             :     /* try to avoid decompressing argument */
     706      430748 :     PG_RETURN_INT32(text_length(str));
     707             : }
     708             : 
     709             : /*
     710             :  * text_length -
     711             :  *  Does the real work for textlen()
     712             :  *
     713             :  *  This is broken out so it can be called directly by other string processing
     714             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     715             :  *  it may still be in compressed form.  We can avoid decompressing it at all
     716             :  *  in some cases.
     717             :  */
     718             : static int32
     719      430760 : text_length(Datum str)
     720             : {
     721             :     /* fastpath when max encoding length is one */
     722      430760 :     if (pg_database_encoding_max_length() == 1)
     723          20 :         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     724             :     else
     725             :     {
     726      430740 :         text       *t = DatumGetTextPP(str);
     727             : 
     728      430740 :         PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
     729             :                                              VARSIZE_ANY_EXHDR(t)));
     730             :     }
     731             : }
     732             : 
     733             : /*
     734             :  * textoctetlen -
     735             :  *    returns the physical length of a text*
     736             :  *     (which is less than the VARSIZE of the text*)
     737             :  */
     738             : Datum
     739          70 : textoctetlen(PG_FUNCTION_ARGS)
     740             : {
     741          70 :     Datum       str = PG_GETARG_DATUM(0);
     742             : 
     743             :     /* We need not detoast the input at all */
     744          70 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     745             : }
     746             : 
     747             : /*
     748             :  * textcat -
     749             :  *    takes two text* and returns a text* that is the concatenation of
     750             :  *    the two.
     751             :  *
     752             :  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
     753             :  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
     754             :  * Allocate space for output in all cases.
     755             :  * XXX - thomas 1997-07-10
     756             :  */
     757             : Datum
     758     1934346 : textcat(PG_FUNCTION_ARGS)
     759             : {
     760     1934346 :     text       *t1 = PG_GETARG_TEXT_PP(0);
     761     1934346 :     text       *t2 = PG_GETARG_TEXT_PP(1);
     762             : 
     763     1934346 :     PG_RETURN_TEXT_P(text_catenate(t1, t2));
     764             : }
     765             : 
     766             : /*
     767             :  * text_catenate
     768             :  *  Guts of textcat(), broken out so it can be used by other functions
     769             :  *
     770             :  * Arguments can be in short-header form, but not compressed or out-of-line
     771             :  */
     772             : static text *
     773     1934426 : text_catenate(text *t1, text *t2)
     774             : {
     775             :     text       *result;
     776             :     int         len1,
     777             :                 len2,
     778             :                 len;
     779             :     char       *ptr;
     780             : 
     781     1934426 :     len1 = VARSIZE_ANY_EXHDR(t1);
     782     1934426 :     len2 = VARSIZE_ANY_EXHDR(t2);
     783             : 
     784             :     /* paranoia ... probably should throw error instead? */
     785     1934426 :     if (len1 < 0)
     786           0 :         len1 = 0;
     787     1934426 :     if (len2 < 0)
     788           0 :         len2 = 0;
     789             : 
     790     1934426 :     len = len1 + len2 + VARHDRSZ;
     791     1934426 :     result = (text *) palloc(len);
     792             : 
     793             :     /* Set size of result string... */
     794     1934426 :     SET_VARSIZE(result, len);
     795             : 
     796             :     /* Fill data field of result string... */
     797     1934426 :     ptr = VARDATA(result);
     798     1934426 :     if (len1 > 0)
     799     1933602 :         memcpy(ptr, VARDATA_ANY(t1), len1);
     800     1934426 :     if (len2 > 0)
     801     1934216 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
     802             : 
     803     1934426 :     return result;
     804             : }
     805             : 
     806             : /*
     807             :  * charlen_to_bytelen()
     808             :  *  Compute the number of bytes occupied by n characters starting at *p
     809             :  *
     810             :  * It is caller's responsibility that there actually are n characters;
     811             :  * the string need not be null-terminated.
     812             :  */
     813             : static int
     814       16586 : charlen_to_bytelen(const char *p, int n)
     815             : {
     816       16586 :     if (pg_database_encoding_max_length() == 1)
     817             :     {
     818             :         /* Optimization for single-byte encodings */
     819         180 :         return n;
     820             :     }
     821             :     else
     822             :     {
     823             :         const char *s;
     824             : 
     825     6033856 :         for (s = p; n > 0; n--)
     826     6017450 :             s += pg_mblen(s);
     827             : 
     828       16406 :         return s - p;
     829             :     }
     830             : }
     831             : 
     832             : /*
     833             :  * text_substr()
     834             :  * Return a substring starting at the specified position.
     835             :  * - thomas 1997-12-31
     836             :  *
     837             :  * Input:
     838             :  *  - string
     839             :  *  - starting position (is one-based)
     840             :  *  - string length
     841             :  *
     842             :  * If the starting position is zero or less, then return from the start of the string
     843             :  *  adjusting the length to be consistent with the "negative start" per SQL.
     844             :  * If the length is less than zero, return the remaining string.
     845             :  *
     846             :  * Added multibyte support.
     847             :  * - Tatsuo Ishii 1998-4-21
     848             :  * Changed behavior if starting position is less than one to conform to SQL behavior.
     849             :  * Formerly returned the entire string; now returns a portion.
     850             :  * - Thomas Lockhart 1998-12-10
     851             :  * Now uses faster TOAST-slicing interface
     852             :  * - John Gray 2002-02-22
     853             :  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
     854             :  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
     855             :  * error; if E < 1, return '', not entire string). Fixed MB related bug when
     856             :  * S > LC and < LC + 4 sometimes garbage characters are returned.
     857             :  * - Joe Conway 2002-08-10
     858             :  */
     859             : Datum
     860      588748 : text_substr(PG_FUNCTION_ARGS)
     861             : {
     862      588748 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     863             :                                     PG_GETARG_INT32(1),
     864             :                                     PG_GETARG_INT32(2),
     865             :                                     false));
     866             : }
     867             : 
     868             : /*
     869             :  * text_substr_no_len -
     870             :  *    Wrapper to avoid opr_sanity failure due to
     871             :  *    one function accepting a different number of args.
     872             :  */
     873             : Datum
     874          36 : text_substr_no_len(PG_FUNCTION_ARGS)
     875             : {
     876          36 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     877             :                                     PG_GETARG_INT32(1),
     878             :                                     -1, true));
     879             : }
     880             : 
     881             : /*
     882             :  * text_substring -
     883             :  *  Does the real work for text_substr() and text_substr_no_len()
     884             :  *
     885             :  *  This is broken out so it can be called directly by other string processing
     886             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     887             :  *  it may still be in compressed/toasted form.  We can avoid detoasting all
     888             :  *  of it in some cases.
     889             :  *
     890             :  *  The result is always a freshly palloc'd datum.
     891             :  */
     892             : static text *
     893      628896 : text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
     894             : {
     895      628896 :     int32       eml = pg_database_encoding_max_length();
     896      628896 :     int32       S = start;      /* start position */
     897             :     int32       S1;             /* adjusted start position */
     898             :     int32       L1;             /* adjusted substring length */
     899             :     int32       E;              /* end position */
     900             : 
     901             :     /*
     902             :      * SQL99 says S can be zero or negative (which we don't document), but we
     903             :      * still must fetch from the start of the string.
     904             :      * https://www.postgresql.org/message-id/170905442373.643.11536838320909376197%40wrigleys.postgresql.org
     905             :      */
     906      628896 :     S1 = Max(S, 1);
     907             : 
     908             :     /* life is easy if the encoding max length is 1 */
     909      628896 :     if (eml == 1)
     910             :     {
     911          22 :         if (length_not_specified)   /* special case - get length to end of
     912             :                                      * string */
     913           0 :             L1 = -1;
     914          22 :         else if (length < 0)
     915             :         {
     916             :             /* SQL99 says to throw an error for E < S, i.e., negative length */
     917           0 :             ereport(ERROR,
     918             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
     919             :                      errmsg("negative substring length not allowed")));
     920             :             L1 = -1;            /* silence stupider compilers */
     921             :         }
     922          22 :         else if (pg_add_s32_overflow(S, length, &E))
     923             :         {
     924             :             /*
     925             :              * L could be large enough for S + L to overflow, in which case
     926             :              * the substring must run to end of string.
     927             :              */
     928           0 :             L1 = -1;
     929             :         }
     930             :         else
     931             :         {
     932             :             /*
     933             :              * A zero or negative value for the end position can happen if the
     934             :              * start was negative or one. SQL99 says to return a zero-length
     935             :              * string.
     936             :              */
     937          22 :             if (E < 1)
     938           0 :                 return cstring_to_text("");
     939             : 
     940          22 :             L1 = E - S1;
     941             :         }
     942             : 
     943             :         /*
     944             :          * If the start position is past the end of the string, SQL99 says to
     945             :          * return a zero-length string -- DatumGetTextPSlice() will do that
     946             :          * for us.  We need only convert S1 to zero-based starting position.
     947             :          */
     948          22 :         return DatumGetTextPSlice(str, S1 - 1, L1);
     949             :     }
     950      628874 :     else if (eml > 1)
     951             :     {
     952             :         /*
     953             :          * When encoding max length is > 1, we can't get LC without
     954             :          * detoasting, so we'll grab a conservatively large slice now and go
     955             :          * back later to do the right thing
     956             :          */
     957             :         int32       slice_start;
     958             :         int32       slice_size;
     959             :         int32       slice_strlen;
     960             :         text       *slice;
     961             :         int32       E1;
     962             :         int32       i;
     963             :         char       *p;
     964             :         char       *s;
     965             :         text       *ret;
     966             : 
     967             :         /*
     968             :          * We need to start at position zero because there is no way to know
     969             :          * in advance which byte offset corresponds to the supplied start
     970             :          * position.
     971             :          */
     972      628874 :         slice_start = 0;
     973             : 
     974      628874 :         if (length_not_specified)   /* special case - get length to end of
     975             :                                      * string */
     976          76 :             slice_size = L1 = -1;
     977      628798 :         else if (length < 0)
     978             :         {
     979             :             /* SQL99 says to throw an error for E < S, i.e., negative length */
     980          12 :             ereport(ERROR,
     981             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
     982             :                      errmsg("negative substring length not allowed")));
     983             :             slice_size = L1 = -1;   /* silence stupider compilers */
     984             :         }
     985      628786 :         else if (pg_add_s32_overflow(S, length, &E))
     986             :         {
     987             :             /*
     988             :              * L could be large enough for S + L to overflow, in which case
     989             :              * the substring must run to end of string.
     990             :              */
     991           6 :             slice_size = L1 = -1;
     992             :         }
     993             :         else
     994             :         {
     995             :             /*
     996             :              * A zero or negative value for the end position can happen if the
     997             :              * start was negative or one. SQL99 says to return a zero-length
     998             :              * string.
     999             :              */
    1000      628780 :             if (E < 1)
    1001           0 :                 return cstring_to_text("");
    1002             : 
    1003             :             /*
    1004             :              * if E is past the end of the string, the tuple toaster will
    1005             :              * truncate the length for us
    1006             :              */
    1007      628780 :             L1 = E - S1;
    1008             : 
    1009             :             /*
    1010             :              * Total slice size in bytes can't be any longer than the start
    1011             :              * position plus substring length times the encoding max length.
    1012             :              * If that overflows, we can just use -1.
    1013             :              */
    1014      628780 :             if (pg_mul_s32_overflow(E, eml, &slice_size))
    1015           6 :                 slice_size = -1;
    1016             :         }
    1017             : 
    1018             :         /*
    1019             :          * If we're working with an untoasted source, no need to do an extra
    1020             :          * copying step.
    1021             :          */
    1022      628862 :         if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
    1023      628808 :             VARATT_IS_EXTERNAL(DatumGetPointer(str)))
    1024         324 :             slice = DatumGetTextPSlice(str, slice_start, slice_size);
    1025             :         else
    1026      628538 :             slice = (text *) DatumGetPointer(str);
    1027             : 
    1028             :         /* see if we got back an empty string */
    1029      628862 :         if (VARSIZE_ANY_EXHDR(slice) == 0)
    1030             :         {
    1031           0 :             if (slice != (text *) DatumGetPointer(str))
    1032           0 :                 pfree(slice);
    1033           0 :             return cstring_to_text("");
    1034             :         }
    1035             : 
    1036             :         /* Now we can get the actual length of the slice in MB characters */
    1037      628862 :         slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
    1038      628862 :                                             VARSIZE_ANY_EXHDR(slice));
    1039             : 
    1040             :         /*
    1041             :          * Check that the start position wasn't > slice_strlen. If so, SQL99
    1042             :          * says to return a zero-length string.
    1043             :          */
    1044      628862 :         if (S1 > slice_strlen)
    1045             :         {
    1046          22 :             if (slice != (text *) DatumGetPointer(str))
    1047           0 :                 pfree(slice);
    1048          22 :             return cstring_to_text("");
    1049             :         }
    1050             : 
    1051             :         /*
    1052             :          * Adjust L1 and E1 now that we know the slice string length. Again
    1053             :          * remember that S1 is one based, and slice_start is zero based.
    1054             :          */
    1055      628840 :         if (L1 > -1)
    1056      628780 :             E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
    1057             :         else
    1058          60 :             E1 = slice_start + 1 + slice_strlen;
    1059             : 
    1060             :         /*
    1061             :          * Find the start position in the slice; remember S1 is not zero based
    1062             :          */
    1063      628840 :         p = VARDATA_ANY(slice);
    1064     5450186 :         for (i = 0; i < S1 - 1; i++)
    1065     4821346 :             p += pg_mblen(p);
    1066             : 
    1067             :         /* hang onto a pointer to our start position */
    1068      628840 :         s = p;
    1069             : 
    1070             :         /*
    1071             :          * Count the actual bytes used by the substring of the requested
    1072             :          * length.
    1073             :          */
    1074     9793252 :         for (i = S1; i < E1; i++)
    1075     9164412 :             p += pg_mblen(p);
    1076             : 
    1077      628840 :         ret = (text *) palloc(VARHDRSZ + (p - s));
    1078      628840 :         SET_VARSIZE(ret, VARHDRSZ + (p - s));
    1079      628840 :         memcpy(VARDATA(ret), s, (p - s));
    1080             : 
    1081      628840 :         if (slice != (text *) DatumGetPointer(str))
    1082         324 :             pfree(slice);
    1083             : 
    1084      628840 :         return ret;
    1085             :     }
    1086             :     else
    1087           0 :         elog(ERROR, "invalid backend encoding: encoding max length < 1");
    1088             : 
    1089             :     /* not reached: suppress compiler warning */
    1090             :     return NULL;
    1091             : }
    1092             : 
    1093             : /*
    1094             :  * textoverlay
    1095             :  *  Replace specified substring of first string with second
    1096             :  *
    1097             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    1098             :  * This code is a direct implementation of what the standard says.
    1099             :  */
    1100             : Datum
    1101          28 : textoverlay(PG_FUNCTION_ARGS)
    1102             : {
    1103          28 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1104          28 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1105          28 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1106          28 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    1107             : 
    1108          28 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1109             : }
    1110             : 
    1111             : Datum
    1112          12 : textoverlay_no_len(PG_FUNCTION_ARGS)
    1113             : {
    1114          12 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1115          12 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1116          12 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1117             :     int         sl;
    1118             : 
    1119          12 :     sl = text_length(PointerGetDatum(t2));  /* defaults to length(t2) */
    1120          12 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1121             : }
    1122             : 
    1123             : static text *
    1124          40 : text_overlay(text *t1, text *t2, int sp, int sl)
    1125             : {
    1126             :     text       *result;
    1127             :     text       *s1;
    1128             :     text       *s2;
    1129             :     int         sp_pl_sl;
    1130             : 
    1131             :     /*
    1132             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    1133             :      * "substring length" error because that's what should be expected
    1134             :      * according to the spec's definition of OVERLAY().
    1135             :      */
    1136          40 :     if (sp <= 0)
    1137           0 :         ereport(ERROR,
    1138             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    1139             :                  errmsg("negative substring length not allowed")));
    1140          40 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    1141           0 :         ereport(ERROR,
    1142             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1143             :                  errmsg("integer out of range")));
    1144             : 
    1145          40 :     s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
    1146          40 :     s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    1147          40 :     result = text_catenate(s1, t2);
    1148          40 :     result = text_catenate(result, s2);
    1149             : 
    1150          40 :     return result;
    1151             : }
    1152             : 
    1153             : /*
    1154             :  * textpos -
    1155             :  *    Return the position of the specified substring.
    1156             :  *    Implements the SQL POSITION() function.
    1157             :  *    Ref: A Guide To The SQL Standard, Date & Darwen, 1997
    1158             :  * - thomas 1997-07-27
    1159             :  */
    1160             : Datum
    1161         130 : textpos(PG_FUNCTION_ARGS)
    1162             : {
    1163         130 :     text       *str = PG_GETARG_TEXT_PP(0);
    1164         130 :     text       *search_str = PG_GETARG_TEXT_PP(1);
    1165             : 
    1166         130 :     PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
    1167             : }
    1168             : 
    1169             : /*
    1170             :  * text_position -
    1171             :  *  Does the real work for textpos()
    1172             :  *
    1173             :  * Inputs:
    1174             :  *      t1 - string to be searched
    1175             :  *      t2 - pattern to match within t1
    1176             :  * Result:
    1177             :  *      Character index of the first matched char, starting from 1,
    1178             :  *      or 0 if no match.
    1179             :  *
    1180             :  *  This is broken out so it can be called directly by other string processing
    1181             :  *  functions.
    1182             :  */
    1183             : static int
    1184         130 : text_position(text *t1, text *t2, Oid collid)
    1185             : {
    1186             :     TextPositionState state;
    1187             :     int         result;
    1188             : 
    1189         130 :     check_collation_set(collid);
    1190             : 
    1191             :     /* Empty needle always matches at position 1 */
    1192         130 :     if (VARSIZE_ANY_EXHDR(t2) < 1)
    1193          12 :         return 1;
    1194             : 
    1195             :     /* Otherwise, can't match if haystack is shorter than needle */
    1196         118 :     if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2) &&
    1197          22 :         pg_newlocale_from_collation(collid)->deterministic)
    1198          22 :         return 0;
    1199             : 
    1200          96 :     text_position_setup(t1, t2, collid, &state);
    1201             :     /* don't need greedy mode here */
    1202          96 :     state.greedy = false;
    1203             : 
    1204          96 :     if (!text_position_next(&state))
    1205          24 :         result = 0;
    1206             :     else
    1207          72 :         result = text_position_get_match_pos(&state);
    1208          96 :     text_position_cleanup(&state);
    1209          96 :     return result;
    1210             : }
    1211             : 
    1212             : 
    1213             : /*
    1214             :  * text_position_setup, text_position_next, text_position_cleanup -
    1215             :  *  Component steps of text_position()
    1216             :  *
    1217             :  * These are broken out so that a string can be efficiently searched for
    1218             :  * multiple occurrences of the same pattern.  text_position_next may be
    1219             :  * called multiple times, and it advances to the next match on each call.
    1220             :  * text_position_get_match_ptr() and text_position_get_match_pos() return
    1221             :  * a pointer or 1-based character position of the last match, respectively.
    1222             :  *
    1223             :  * The "state" variable is normally just a local variable in the caller.
    1224             :  *
    1225             :  * NOTE: text_position_next skips over the matched portion.  For example,
    1226             :  * searching for "xx" in "xxx" returns only one match, not two.
    1227             :  */
    1228             : 
    1229             : static void
    1230        1688 : text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
    1231             : {
    1232        1688 :     int         len1 = VARSIZE_ANY_EXHDR(t1);
    1233        1688 :     int         len2 = VARSIZE_ANY_EXHDR(t2);
    1234             : 
    1235        1688 :     check_collation_set(collid);
    1236             : 
    1237        1688 :     state->locale = pg_newlocale_from_collation(collid);
    1238             : 
    1239             :     /*
    1240             :      * Most callers need greedy mode, but some might want to unset this to
    1241             :      * optimize.
    1242             :      */
    1243        1688 :     state->greedy = true;
    1244             : 
    1245             :     Assert(len2 > 0);
    1246             : 
    1247             :     /*
    1248             :      * Even with a multi-byte encoding, we perform the search using the raw
    1249             :      * byte sequence, ignoring multibyte issues.  For UTF-8, that works fine,
    1250             :      * because in UTF-8 the byte sequence of one character cannot contain
    1251             :      * another character.  For other multi-byte encodings, we do the search
    1252             :      * initially as a simple byte search, ignoring multibyte issues, but
    1253             :      * verify afterwards that the match we found is at a character boundary,
    1254             :      * and continue the search if it was a false match.
    1255             :      */
    1256        1688 :     if (pg_database_encoding_max_length() == 1)
    1257         108 :         state->is_multibyte_char_in_char = false;
    1258        1580 :     else if (GetDatabaseEncoding() == PG_UTF8)
    1259        1580 :         state->is_multibyte_char_in_char = false;
    1260             :     else
    1261           0 :         state->is_multibyte_char_in_char = true;
    1262             : 
    1263        1688 :     state->str1 = VARDATA_ANY(t1);
    1264        1688 :     state->str2 = VARDATA_ANY(t2);
    1265        1688 :     state->len1 = len1;
    1266        1688 :     state->len2 = len2;
    1267        1688 :     state->last_match = NULL;
    1268        1688 :     state->refpoint = state->str1;
    1269        1688 :     state->refpos = 0;
    1270             : 
    1271             :     /*
    1272             :      * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
    1273             :      * notes we use the terminology that the "haystack" is the string to be
    1274             :      * searched (t1) and the "needle" is the pattern being sought (t2).
    1275             :      *
    1276             :      * If the needle is empty or bigger than the haystack then there is no
    1277             :      * point in wasting cycles initializing the table.  We also choose not to
    1278             :      * use B-M-H for needles of length 1, since the skip table can't possibly
    1279             :      * save anything in that case.
    1280             :      *
    1281             :      * (With nondeterministic collations, the search is already
    1282             :      * multibyte-aware, so we don't need this.)
    1283             :      */
    1284        1688 :     if (len1 >= len2 && len2 > 1 && state->locale->deterministic)
    1285             :     {
    1286        1360 :         int         searchlength = len1 - len2;
    1287             :         int         skiptablemask;
    1288             :         int         last;
    1289             :         int         i;
    1290        1360 :         const char *str2 = state->str2;
    1291             : 
    1292             :         /*
    1293             :          * First we must determine how much of the skip table to use.  The
    1294             :          * declaration of TextPositionState allows up to 256 elements, but for
    1295             :          * short search problems we don't really want to have to initialize so
    1296             :          * many elements --- it would take too long in comparison to the
    1297             :          * actual search time.  So we choose a useful skip table size based on
    1298             :          * the haystack length minus the needle length.  The closer the needle
    1299             :          * length is to the haystack length the less useful skipping becomes.
    1300             :          *
    1301             :          * Note: since we use bit-masking to select table elements, the skip
    1302             :          * table size MUST be a power of 2, and so the mask must be 2^N-1.
    1303             :          */
    1304        1360 :         if (searchlength < 16)
    1305         114 :             skiptablemask = 3;
    1306        1246 :         else if (searchlength < 64)
    1307          16 :             skiptablemask = 7;
    1308        1230 :         else if (searchlength < 128)
    1309          14 :             skiptablemask = 15;
    1310        1216 :         else if (searchlength < 512)
    1311         244 :             skiptablemask = 31;
    1312         972 :         else if (searchlength < 2048)
    1313         746 :             skiptablemask = 63;
    1314         226 :         else if (searchlength < 4096)
    1315         154 :             skiptablemask = 127;
    1316             :         else
    1317          72 :             skiptablemask = 255;
    1318        1360 :         state->skiptablemask = skiptablemask;
    1319             : 
    1320             :         /*
    1321             :          * Initialize the skip table.  We set all elements to the needle
    1322             :          * length, since this is the correct skip distance for any character
    1323             :          * not found in the needle.
    1324             :          */
    1325       95864 :         for (i = 0; i <= skiptablemask; i++)
    1326       94504 :             state->skiptable[i] = len2;
    1327             : 
    1328             :         /*
    1329             :          * Now examine the needle.  For each character except the last one,
    1330             :          * set the corresponding table element to the appropriate skip
    1331             :          * distance.  Note that when two characters share the same skip table
    1332             :          * entry, the one later in the needle must determine the skip
    1333             :          * distance.
    1334             :          */
    1335        1360 :         last = len2 - 1;
    1336             : 
    1337       18044 :         for (i = 0; i < last; i++)
    1338       16684 :             state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
    1339             :     }
    1340        1688 : }
    1341             : 
    1342             : /*
    1343             :  * Advance to the next match, starting from the end of the previous match
    1344             :  * (or the beginning of the string, on first call).  Returns true if a match
    1345             :  * is found.
    1346             :  *
    1347             :  * Note that this refuses to match an empty-string needle.  Most callers
    1348             :  * will have handled that case specially and we'll never see it here.
    1349             :  */
    1350             : static bool
    1351        7628 : text_position_next(TextPositionState *state)
    1352             : {
    1353        7628 :     int         needle_len = state->len2;
    1354             :     char       *start_ptr;
    1355             :     char       *matchptr;
    1356             : 
    1357        7628 :     if (needle_len <= 0)
    1358           0 :         return false;           /* result for empty pattern */
    1359             : 
    1360             :     /* Start from the point right after the previous match. */
    1361        7628 :     if (state->last_match)
    1362        5928 :         start_ptr = state->last_match + state->last_match_len;
    1363             :     else
    1364        1700 :         start_ptr = state->str1;
    1365             : 
    1366        7628 : retry:
    1367        7628 :     matchptr = text_position_next_internal(start_ptr, state);
    1368             : 
    1369        7628 :     if (!matchptr)
    1370        1604 :         return false;
    1371             : 
    1372             :     /*
    1373             :      * Found a match for the byte sequence.  If this is a multibyte encoding,
    1374             :      * where one character's byte sequence can appear inside a longer
    1375             :      * multi-byte character, we need to verify that the match was at a
    1376             :      * character boundary, not in the middle of a multi-byte character.
    1377             :      */
    1378        6024 :     if (state->is_multibyte_char_in_char && state->locale->deterministic)
    1379             :     {
    1380             :         /* Walk one character at a time, until we reach the match. */
    1381             : 
    1382             :         /* the search should never move backwards. */
    1383             :         Assert(state->refpoint <= matchptr);
    1384             : 
    1385           0 :         while (state->refpoint < matchptr)
    1386             :         {
    1387             :             /* step to next character. */
    1388           0 :             state->refpoint += pg_mblen(state->refpoint);
    1389           0 :             state->refpos++;
    1390             : 
    1391             :             /*
    1392             :              * If we stepped over the match's start position, then it was a
    1393             :              * false positive, where the byte sequence appeared in the middle
    1394             :              * of a multi-byte character.  Skip it, and continue the search at
    1395             :              * the next character boundary.
    1396             :              */
    1397           0 :             if (state->refpoint > matchptr)
    1398             :             {
    1399           0 :                 start_ptr = state->refpoint;
    1400           0 :                 goto retry;
    1401             :             }
    1402             :         }
    1403             :     }
    1404             : 
    1405        6024 :     state->last_match = matchptr;
    1406        6024 :     state->last_match_len = state->last_match_len_tmp;
    1407        6024 :     return true;
    1408             : }
    1409             : 
    1410             : /*
    1411             :  * Subroutine of text_position_next().  This searches for the raw byte
    1412             :  * sequence, ignoring any multi-byte encoding issues.  Returns the first
    1413             :  * match starting at 'start_ptr', or NULL if no match is found.
    1414             :  */
    1415             : static char *
    1416        7628 : text_position_next_internal(char *start_ptr, TextPositionState *state)
    1417             : {
    1418        7628 :     int         haystack_len = state->len1;
    1419        7628 :     int         needle_len = state->len2;
    1420        7628 :     int         skiptablemask = state->skiptablemask;
    1421        7628 :     const char *haystack = state->str1;
    1422        7628 :     const char *needle = state->str2;
    1423        7628 :     const char *haystack_end = &haystack[haystack_len];
    1424             :     const char *hptr;
    1425             : 
    1426             :     Assert(start_ptr >= haystack && start_ptr <= haystack_end);
    1427             : 
    1428        7628 :     state->last_match_len_tmp = needle_len;
    1429             : 
    1430        7628 :     if (!state->locale->deterministic)
    1431             :     {
    1432             :         /*
    1433             :          * With a nondeterministic collation, we have to use an unoptimized
    1434             :          * route.  We walk through the haystack and see if at each position
    1435             :          * there is a substring of the remaining string that is equal to the
    1436             :          * needle under the given collation.
    1437             :          *
    1438             :          * Note, the found substring could have a different length than the
    1439             :          * needle, including being empty.  Callers that want to skip over the
    1440             :          * found string need to read the length of the found substring from
    1441             :          * last_match_len rather than just using the length of their needle.
    1442             :          *
    1443             :          * Most callers will require "greedy" semantics, meaning that we need
    1444             :          * to find the longest such substring, not the shortest.  For callers
    1445             :          * that don't need greedy semantics, we can finish on the first match.
    1446             :          */
    1447         240 :         const char *result_hptr = NULL;
    1448             : 
    1449         240 :         hptr = start_ptr;
    1450         642 :         while (hptr < haystack_end)
    1451             :         {
    1452             :             /*
    1453             :              * First check the common case that there is a match in the
    1454             :              * haystack of exactly the length of the needle.
    1455             :              */
    1456         534 :             if (!state->greedy &&
    1457         108 :                 haystack_end - hptr >= needle_len &&
    1458          54 :                 pg_strncoll(hptr, needle_len, needle, needle_len, state->locale) == 0)
    1459          12 :                 return (char *) hptr;
    1460             : 
    1461             :             /*
    1462             :              * Else check if any of the possible substrings starting at hptr
    1463             :              * are equal to the needle.
    1464             :              */
    1465        2586 :             for (const char *test_end = hptr; test_end < haystack_end; test_end += pg_mblen(test_end))
    1466             :             {
    1467        2064 :                 if (pg_strncoll(hptr, (test_end - hptr), needle, needle_len, state->locale) == 0)
    1468             :                 {
    1469         132 :                     state->last_match_len_tmp = (test_end - hptr);
    1470         132 :                     result_hptr = hptr;
    1471         132 :                     if (!state->greedy)
    1472           0 :                         break;
    1473             :                 }
    1474             :             }
    1475         522 :             if (result_hptr)
    1476         120 :                 break;
    1477             : 
    1478         402 :             hptr += pg_mblen(hptr);
    1479             :         }
    1480             : 
    1481         228 :         return (char *) result_hptr;
    1482             :     }
    1483        7388 :     else if (needle_len == 1)
    1484             :     {
    1485             :         /* No point in using B-M-H for a one-character needle */
    1486         760 :         char        nchar = *needle;
    1487             : 
    1488         760 :         hptr = start_ptr;
    1489        5878 :         while (hptr < haystack_end)
    1490             :         {
    1491        5712 :             if (*hptr == nchar)
    1492         594 :                 return (char *) hptr;
    1493        5118 :             hptr++;
    1494             :         }
    1495             :     }
    1496             :     else
    1497             :     {
    1498        6628 :         const char *needle_last = &needle[needle_len - 1];
    1499             : 
    1500             :         /* Start at startpos plus the length of the needle */
    1501        6628 :         hptr = start_ptr + needle_len - 1;
    1502      169760 :         while (hptr < haystack_end)
    1503             :         {
    1504             :             /* Match the needle scanning *backward* */
    1505             :             const char *nptr;
    1506             :             const char *p;
    1507             : 
    1508      168430 :             nptr = needle_last;
    1509      168430 :             p = hptr;
    1510      247434 :             while (*nptr == *p)
    1511             :             {
    1512             :                 /* Matched it all?  If so, return 1-based position */
    1513       84302 :                 if (nptr == needle)
    1514        5298 :                     return (char *) p;
    1515       79004 :                 nptr--, p--;
    1516             :             }
    1517             : 
    1518             :             /*
    1519             :              * No match, so use the haystack char at hptr to decide how far to
    1520             :              * advance.  If the needle had any occurrence of that character
    1521             :              * (or more precisely, one sharing the same skiptable entry)
    1522             :              * before its last character, then we advance far enough to align
    1523             :              * the last such needle character with that haystack position.
    1524             :              * Otherwise we can advance by the whole needle length.
    1525             :              */
    1526      163132 :             hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
    1527             :         }
    1528             :     }
    1529             : 
    1530        1496 :     return 0;                   /* not found */
    1531             : }
    1532             : 
    1533             : /*
    1534             :  * Return a pointer to the current match.
    1535             :  *
    1536             :  * The returned pointer points into the original haystack string.
    1537             :  */
    1538             : static char *
    1539        5922 : text_position_get_match_ptr(TextPositionState *state)
    1540             : {
    1541        5922 :     return state->last_match;
    1542             : }
    1543             : 
    1544             : /*
    1545             :  * Return the offset of the current match.
    1546             :  *
    1547             :  * The offset is in characters, 1-based.
    1548             :  */
    1549             : static int
    1550          72 : text_position_get_match_pos(TextPositionState *state)
    1551             : {
    1552             :     /* Convert the byte position to char position. */
    1553         144 :     state->refpos += pg_mbstrlen_with_len(state->refpoint,
    1554          72 :                                           state->last_match - state->refpoint);
    1555          72 :     state->refpoint = state->last_match;
    1556          72 :     return state->refpos + 1;
    1557             : }
    1558             : 
    1559             : /*
    1560             :  * Reset search state to the initial state installed by text_position_setup.
    1561             :  *
    1562             :  * The next call to text_position_next will search from the beginning
    1563             :  * of the string.
    1564             :  */
    1565             : static void
    1566          12 : text_position_reset(TextPositionState *state)
    1567             : {
    1568          12 :     state->last_match = NULL;
    1569          12 :     state->refpoint = state->str1;
    1570          12 :     state->refpos = 0;
    1571          12 : }
    1572             : 
    1573             : static void
    1574        1688 : text_position_cleanup(TextPositionState *state)
    1575             : {
    1576             :     /* no cleanup needed */
    1577        1688 : }
    1578             : 
    1579             : 
    1580             : static void
    1581    16792886 : check_collation_set(Oid collid)
    1582             : {
    1583    16792886 :     if (!OidIsValid(collid))
    1584             :     {
    1585             :         /*
    1586             :          * This typically means that the parser could not resolve a conflict
    1587             :          * of implicit collations, so report it that way.
    1588             :          */
    1589          30 :         ereport(ERROR,
    1590             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1591             :                  errmsg("could not determine which collation to use for string comparison"),
    1592             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
    1593             :     }
    1594    16792856 : }
    1595             : 
    1596             : /*
    1597             :  * varstr_cmp()
    1598             :  *
    1599             :  * Comparison function for text strings with given lengths, using the
    1600             :  * appropriate locale. Returns an integer less than, equal to, or greater than
    1601             :  * zero, indicating whether arg1 is less than, equal to, or greater than arg2.
    1602             :  *
    1603             :  * Note: many functions that depend on this are marked leakproof; therefore,
    1604             :  * avoid reporting the actual contents of the input when throwing errors.
    1605             :  * All errors herein should be things that can't happen except on corrupt
    1606             :  * data, anyway; otherwise we will have trouble with indexing strings that
    1607             :  * would cause them.
    1608             :  */
    1609             : int
    1610     9790138 : varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
    1611             : {
    1612             :     int         result;
    1613             :     pg_locale_t mylocale;
    1614             : 
    1615     9790138 :     check_collation_set(collid);
    1616             : 
    1617     9790120 :     mylocale = pg_newlocale_from_collation(collid);
    1618             : 
    1619     9790120 :     if (mylocale->collate_is_c)
    1620             :     {
    1621     3809724 :         result = memcmp(arg1, arg2, Min(len1, len2));
    1622     3809724 :         if ((result == 0) && (len1 != len2))
    1623      121616 :             result = (len1 < len2) ? -1 : 1;
    1624             :     }
    1625             :     else
    1626             :     {
    1627             :         /*
    1628             :          * memcmp() can't tell us which of two unequal strings sorts first,
    1629             :          * but it's a cheap way to tell if they're equal.  Testing shows that
    1630             :          * memcmp() followed by strcoll() is only trivially slower than
    1631             :          * strcoll() by itself, so we don't lose much if this doesn't work out
    1632             :          * very often, and if it does - for example, because there are many
    1633             :          * equal strings in the input - then we win big by avoiding expensive
    1634             :          * collation-aware comparisons.
    1635             :          */
    1636     5980396 :         if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
    1637     1505640 :             return 0;
    1638             : 
    1639     4474756 :         result = pg_strncoll(arg1, len1, arg2, len2, mylocale);
    1640             : 
    1641             :         /* Break tie if necessary. */
    1642     4474756 :         if (result == 0 && mylocale->deterministic)
    1643             :         {
    1644           0 :             result = memcmp(arg1, arg2, Min(len1, len2));
    1645           0 :             if ((result == 0) && (len1 != len2))
    1646           0 :                 result = (len1 < len2) ? -1 : 1;
    1647             :         }
    1648             :     }
    1649             : 
    1650     8284480 :     return result;
    1651             : }
    1652             : 
    1653             : /* text_cmp()
    1654             :  * Internal comparison function for text strings.
    1655             :  * Returns -1, 0 or 1
    1656             :  */
    1657             : static int
    1658     7630972 : text_cmp(text *arg1, text *arg2, Oid collid)
    1659             : {
    1660             :     char       *a1p,
    1661             :                *a2p;
    1662             :     int         len1,
    1663             :                 len2;
    1664             : 
    1665     7630972 :     a1p = VARDATA_ANY(arg1);
    1666     7630972 :     a2p = VARDATA_ANY(arg2);
    1667             : 
    1668     7630972 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1669     7630972 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1670             : 
    1671     7630972 :     return varstr_cmp(a1p, len1, a2p, len2, collid);
    1672             : }
    1673             : 
    1674             : /*
    1675             :  * Comparison functions for text strings.
    1676             :  *
    1677             :  * Note: btree indexes need these routines not to leak memory; therefore,
    1678             :  * be careful to free working copies of toasted datums.  Most places don't
    1679             :  * need to be so careful.
    1680             :  */
    1681             : 
    1682             : Datum
    1683     6594788 : texteq(PG_FUNCTION_ARGS)
    1684             : {
    1685     6594788 :     Oid         collid = PG_GET_COLLATION();
    1686     6594788 :     pg_locale_t mylocale = 0;
    1687             :     bool        result;
    1688             : 
    1689     6594788 :     check_collation_set(collid);
    1690             : 
    1691     6594788 :     mylocale = pg_newlocale_from_collation(collid);
    1692             : 
    1693     6594788 :     if (mylocale->deterministic)
    1694             :     {
    1695     6586348 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1696     6586348 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1697             :         Size        len1,
    1698             :                     len2;
    1699             : 
    1700             :         /*
    1701             :          * Since we only care about equality or not-equality, we can avoid all
    1702             :          * the expense of strcoll() here, and just do bitwise comparison.  In
    1703             :          * fact, we don't even have to do a bitwise comparison if we can show
    1704             :          * the lengths of the strings are unequal; which might save us from
    1705             :          * having to detoast one or both values.
    1706             :          */
    1707     6586348 :         len1 = toast_raw_datum_size(arg1);
    1708     6586348 :         len2 = toast_raw_datum_size(arg2);
    1709     6586348 :         if (len1 != len2)
    1710     3138376 :             result = false;
    1711             :         else
    1712             :         {
    1713     3447972 :             text       *targ1 = DatumGetTextPP(arg1);
    1714     3447972 :             text       *targ2 = DatumGetTextPP(arg2);
    1715             : 
    1716     3447972 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1717             :                              len1 - VARHDRSZ) == 0);
    1718             : 
    1719     3447972 :             PG_FREE_IF_COPY(targ1, 0);
    1720     3447972 :             PG_FREE_IF_COPY(targ2, 1);
    1721             :         }
    1722             :     }
    1723             :     else
    1724             :     {
    1725        8440 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1726        8440 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1727             : 
    1728        8440 :         result = (text_cmp(arg1, arg2, collid) == 0);
    1729             : 
    1730        8440 :         PG_FREE_IF_COPY(arg1, 0);
    1731        8440 :         PG_FREE_IF_COPY(arg2, 1);
    1732             :     }
    1733             : 
    1734     6594788 :     PG_RETURN_BOOL(result);
    1735             : }
    1736             : 
    1737             : Datum
    1738       22758 : textne(PG_FUNCTION_ARGS)
    1739             : {
    1740       22758 :     Oid         collid = PG_GET_COLLATION();
    1741             :     pg_locale_t mylocale;
    1742             :     bool        result;
    1743             : 
    1744       22758 :     check_collation_set(collid);
    1745             : 
    1746       22758 :     mylocale = pg_newlocale_from_collation(collid);
    1747             : 
    1748       22758 :     if (mylocale->deterministic)
    1749             :     {
    1750       22734 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1751       22734 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1752             :         Size        len1,
    1753             :                     len2;
    1754             : 
    1755             :         /* See comment in texteq() */
    1756       22734 :         len1 = toast_raw_datum_size(arg1);
    1757       22734 :         len2 = toast_raw_datum_size(arg2);
    1758       22734 :         if (len1 != len2)
    1759        4424 :             result = true;
    1760             :         else
    1761             :         {
    1762       18310 :             text       *targ1 = DatumGetTextPP(arg1);
    1763       18310 :             text       *targ2 = DatumGetTextPP(arg2);
    1764             : 
    1765       18310 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1766             :                              len1 - VARHDRSZ) != 0);
    1767             : 
    1768       18310 :             PG_FREE_IF_COPY(targ1, 0);
    1769       18310 :             PG_FREE_IF_COPY(targ2, 1);
    1770             :         }
    1771             :     }
    1772             :     else
    1773             :     {
    1774          24 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1775          24 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1776             : 
    1777          24 :         result = (text_cmp(arg1, arg2, collid) != 0);
    1778             : 
    1779          24 :         PG_FREE_IF_COPY(arg1, 0);
    1780          24 :         PG_FREE_IF_COPY(arg2, 1);
    1781             :     }
    1782             : 
    1783       22758 :     PG_RETURN_BOOL(result);
    1784             : }
    1785             : 
    1786             : Datum
    1787      211848 : text_lt(PG_FUNCTION_ARGS)
    1788             : {
    1789      211848 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1790      211848 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1791             :     bool        result;
    1792             : 
    1793      211848 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
    1794             : 
    1795      211830 :     PG_FREE_IF_COPY(arg1, 0);
    1796      211830 :     PG_FREE_IF_COPY(arg2, 1);
    1797             : 
    1798      211830 :     PG_RETURN_BOOL(result);
    1799             : }
    1800             : 
    1801             : Datum
    1802      318114 : text_le(PG_FUNCTION_ARGS)
    1803             : {
    1804      318114 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1805      318114 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1806             :     bool        result;
    1807             : 
    1808      318114 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
    1809             : 
    1810      318114 :     PG_FREE_IF_COPY(arg1, 0);
    1811      318114 :     PG_FREE_IF_COPY(arg2, 1);
    1812             : 
    1813      318114 :     PG_RETURN_BOOL(result);
    1814             : }
    1815             : 
    1816             : Datum
    1817      195830 : text_gt(PG_FUNCTION_ARGS)
    1818             : {
    1819      195830 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1820      195830 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1821             :     bool        result;
    1822             : 
    1823      195830 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
    1824             : 
    1825      195830 :     PG_FREE_IF_COPY(arg1, 0);
    1826      195830 :     PG_FREE_IF_COPY(arg2, 1);
    1827             : 
    1828      195830 :     PG_RETURN_BOOL(result);
    1829             : }
    1830             : 
    1831             : Datum
    1832      177896 : text_ge(PG_FUNCTION_ARGS)
    1833             : {
    1834      177896 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1835      177896 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1836             :     bool        result;
    1837             : 
    1838      177896 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
    1839             : 
    1840      177896 :     PG_FREE_IF_COPY(arg1, 0);
    1841      177896 :     PG_FREE_IF_COPY(arg2, 1);
    1842             : 
    1843      177896 :     PG_RETURN_BOOL(result);
    1844             : }
    1845             : 
    1846             : Datum
    1847       37914 : text_starts_with(PG_FUNCTION_ARGS)
    1848             : {
    1849       37914 :     Datum       arg1 = PG_GETARG_DATUM(0);
    1850       37914 :     Datum       arg2 = PG_GETARG_DATUM(1);
    1851       37914 :     Oid         collid = PG_GET_COLLATION();
    1852             :     pg_locale_t mylocale;
    1853             :     bool        result;
    1854             :     Size        len1,
    1855             :                 len2;
    1856             : 
    1857       37914 :     check_collation_set(collid);
    1858             : 
    1859       37914 :     mylocale = pg_newlocale_from_collation(collid);
    1860             : 
    1861       37914 :     if (!mylocale->deterministic)
    1862           0 :         ereport(ERROR,
    1863             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1864             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1865             : 
    1866       37914 :     len1 = toast_raw_datum_size(arg1);
    1867       37914 :     len2 = toast_raw_datum_size(arg2);
    1868       37914 :     if (len2 > len1)
    1869           0 :         result = false;
    1870             :     else
    1871             :     {
    1872       37914 :         text       *targ1 = text_substring(arg1, 1, len2, false);
    1873       37914 :         text       *targ2 = DatumGetTextPP(arg2);
    1874             : 
    1875       37914 :         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1876       37914 :                          VARSIZE_ANY_EXHDR(targ2)) == 0);
    1877             : 
    1878       37914 :         PG_FREE_IF_COPY(targ1, 0);
    1879       37914 :         PG_FREE_IF_COPY(targ2, 1);
    1880             :     }
    1881             : 
    1882       37914 :     PG_RETURN_BOOL(result);
    1883             : }
    1884             : 
    1885             : Datum
    1886     6403184 : bttextcmp(PG_FUNCTION_ARGS)
    1887             : {
    1888     6403184 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1889     6403184 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1890             :     int32       result;
    1891             : 
    1892     6403184 :     result = text_cmp(arg1, arg2, PG_GET_COLLATION());
    1893             : 
    1894     6403184 :     PG_FREE_IF_COPY(arg1, 0);
    1895     6403184 :     PG_FREE_IF_COPY(arg2, 1);
    1896             : 
    1897     6403184 :     PG_RETURN_INT32(result);
    1898             : }
    1899             : 
    1900             : Datum
    1901       84468 : bttextsortsupport(PG_FUNCTION_ARGS)
    1902             : {
    1903       84468 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    1904       84468 :     Oid         collid = ssup->ssup_collation;
    1905             :     MemoryContext oldcontext;
    1906             : 
    1907       84468 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    1908             : 
    1909             :     /* Use generic string SortSupport */
    1910       84468 :     varstr_sortsupport(ssup, TEXTOID, collid);
    1911             : 
    1912       84456 :     MemoryContextSwitchTo(oldcontext);
    1913             : 
    1914       84456 :     PG_RETURN_VOID();
    1915             : }
    1916             : 
    1917             : /*
    1918             :  * Generic sortsupport interface for character type's operator classes.
    1919             :  * Includes locale support, and support for BpChar semantics (i.e. removing
    1920             :  * trailing spaces before comparison).
    1921             :  *
    1922             :  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
    1923             :  * same representation.  Callers that always use the C collation (e.g.
    1924             :  * non-collatable type callers like bytea) may have NUL bytes in their strings;
    1925             :  * this will not work with any other collation, though.
    1926             :  */
    1927             : void
    1928      140284 : varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
    1929             : {
    1930      140284 :     bool        abbreviate = ssup->abbreviate;
    1931      140284 :     bool        collate_c = false;
    1932             :     VarStringSortSupport *sss;
    1933             :     pg_locale_t locale;
    1934             : 
    1935      140284 :     check_collation_set(collid);
    1936             : 
    1937      140272 :     locale = pg_newlocale_from_collation(collid);
    1938             : 
    1939             :     /*
    1940             :      * If possible, set ssup->comparator to a function which can be used to
    1941             :      * directly compare two datums.  If we can do this, we'll avoid the
    1942             :      * overhead of a trip through the fmgr layer for every comparison, which
    1943             :      * can be substantial.
    1944             :      *
    1945             :      * Most typically, we'll set the comparator to varlenafastcmp_locale,
    1946             :      * which uses strcoll() to perform comparisons.  We use that for the
    1947             :      * BpChar case too, but type NAME uses namefastcmp_locale. However, if
    1948             :      * LC_COLLATE = C, we can make things quite a bit faster with
    1949             :      * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
    1950             :      * memcmp() rather than strcoll().
    1951             :      */
    1952      140272 :     if (locale->collate_is_c)
    1953             :     {
    1954       95588 :         if (typid == BPCHAROID)
    1955         308 :             ssup->comparator = bpcharfastcmp_c;
    1956       95280 :         else if (typid == NAMEOID)
    1957             :         {
    1958       54752 :             ssup->comparator = namefastcmp_c;
    1959             :             /* Not supporting abbreviation with type NAME, for now */
    1960       54752 :             abbreviate = false;
    1961             :         }
    1962             :         else
    1963       40528 :             ssup->comparator = varstrfastcmp_c;
    1964             : 
    1965       95588 :         collate_c = true;
    1966             :     }
    1967             :     else
    1968             :     {
    1969             :         /*
    1970             :          * We use varlenafastcmp_locale except for type NAME.
    1971             :          */
    1972       44684 :         if (typid == NAMEOID)
    1973             :         {
    1974           0 :             ssup->comparator = namefastcmp_locale;
    1975             :             /* Not supporting abbreviation with type NAME, for now */
    1976           0 :             abbreviate = false;
    1977             :         }
    1978             :         else
    1979       44684 :             ssup->comparator = varlenafastcmp_locale;
    1980             : 
    1981             :         /*
    1982             :          * Unfortunately, it seems that abbreviation for non-C collations is
    1983             :          * broken on many common platforms; see pg_strxfrm_enabled().
    1984             :          *
    1985             :          * Even apart from the risk of broken locales, it's possible that
    1986             :          * there are platforms where the use of abbreviated keys should be
    1987             :          * disabled at compile time.  Having only 4 byte datums could make
    1988             :          * worst-case performance drastically more likely, for example.
    1989             :          * Moreover, macOS's strxfrm() implementation is known to not
    1990             :          * effectively concentrate a significant amount of entropy from the
    1991             :          * original string in earlier transformed blobs.  It's possible that
    1992             :          * other supported platforms are similarly encumbered.  So, if we ever
    1993             :          * get past disabling this categorically, we may still want or need to
    1994             :          * disable it for particular platforms.
    1995             :          */
    1996       44684 :         if (!pg_strxfrm_enabled(locale))
    1997       43894 :             abbreviate = false;
    1998             :     }
    1999             : 
    2000             :     /*
    2001             :      * If we're using abbreviated keys, or if we're using a locale-aware
    2002             :      * comparison, we need to initialize a VarStringSortSupport object. Both
    2003             :      * cases will make use of the temporary buffers we initialize here for
    2004             :      * scratch space (and to detect requirement for BpChar semantics from
    2005             :      * caller), and the abbreviation case requires additional state.
    2006             :      */
    2007      140272 :     if (abbreviate || !collate_c)
    2008             :     {
    2009       67816 :         sss = palloc(sizeof(VarStringSortSupport));
    2010       67816 :         sss->buf1 = palloc(TEXTBUFLEN);
    2011       67816 :         sss->buflen1 = TEXTBUFLEN;
    2012       67816 :         sss->buf2 = palloc(TEXTBUFLEN);
    2013       67816 :         sss->buflen2 = TEXTBUFLEN;
    2014             :         /* Start with invalid values */
    2015       67816 :         sss->last_len1 = -1;
    2016       67816 :         sss->last_len2 = -1;
    2017             :         /* Initialize */
    2018       67816 :         sss->last_returned = 0;
    2019       67816 :         if (collate_c)
    2020       23132 :             sss->locale = NULL;
    2021             :         else
    2022       44684 :             sss->locale = locale;
    2023             : 
    2024             :         /*
    2025             :          * To avoid somehow confusing a strxfrm() blob and an original string,
    2026             :          * constantly keep track of the variety of data that buf1 and buf2
    2027             :          * currently contain.
    2028             :          *
    2029             :          * Comparisons may be interleaved with conversion calls.  Frequently,
    2030             :          * conversions and comparisons are batched into two distinct phases,
    2031             :          * but the correctness of caching cannot hinge upon this.  For
    2032             :          * comparison caching, buffer state is only trusted if cache_blob is
    2033             :          * found set to false, whereas strxfrm() caching only trusts the state
    2034             :          * when cache_blob is found set to true.
    2035             :          *
    2036             :          * Arbitrarily initialize cache_blob to true.
    2037             :          */
    2038       67816 :         sss->cache_blob = true;
    2039       67816 :         sss->collate_c = collate_c;
    2040       67816 :         sss->typid = typid;
    2041       67816 :         ssup->ssup_extra = sss;
    2042             : 
    2043             :         /*
    2044             :          * If possible, plan to use the abbreviated keys optimization.  The
    2045             :          * core code may switch back to authoritative comparator should
    2046             :          * abbreviation be aborted.
    2047             :          */
    2048       67816 :         if (abbreviate)
    2049             :         {
    2050       23724 :             sss->prop_card = 0.20;
    2051       23724 :             initHyperLogLog(&sss->abbr_card, 10);
    2052       23724 :             initHyperLogLog(&sss->full_card, 10);
    2053       23724 :             ssup->abbrev_full_comparator = ssup->comparator;
    2054       23724 :             ssup->comparator = ssup_datum_unsigned_cmp;
    2055       23724 :             ssup->abbrev_converter = varstr_abbrev_convert;
    2056       23724 :             ssup->abbrev_abort = varstr_abbrev_abort;
    2057             :         }
    2058             :     }
    2059      140272 : }
    2060             : 
    2061             : /*
    2062             :  * sortsupport comparison func (for C locale case)
    2063             :  */
    2064             : static int
    2065    41463928 : varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2066             : {
    2067    41463928 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2068    41463928 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2069             :     char       *a1p,
    2070             :                *a2p;
    2071             :     int         len1,
    2072             :                 len2,
    2073             :                 result;
    2074             : 
    2075    41463928 :     a1p = VARDATA_ANY(arg1);
    2076    41463928 :     a2p = VARDATA_ANY(arg2);
    2077             : 
    2078    41463928 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2079    41463928 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2080             : 
    2081    41463928 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2082    41463928 :     if ((result == 0) && (len1 != len2))
    2083     1150456 :         result = (len1 < len2) ? -1 : 1;
    2084             : 
    2085             :     /* We can't afford to leak memory here. */
    2086    41463928 :     if (PointerGetDatum(arg1) != x)
    2087           2 :         pfree(arg1);
    2088    41463928 :     if (PointerGetDatum(arg2) != y)
    2089           2 :         pfree(arg2);
    2090             : 
    2091    41463928 :     return result;
    2092             : }
    2093             : 
    2094             : /*
    2095             :  * sortsupport comparison func (for BpChar C locale case)
    2096             :  *
    2097             :  * BpChar outsources its sortsupport to this module.  Specialization for the
    2098             :  * varstr_sortsupport BpChar case, modeled on
    2099             :  * internal_bpchar_pattern_compare().
    2100             :  */
    2101             : static int
    2102       62424 : bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2103             : {
    2104       62424 :     BpChar     *arg1 = DatumGetBpCharPP(x);
    2105       62424 :     BpChar     *arg2 = DatumGetBpCharPP(y);
    2106             :     char       *a1p,
    2107             :                *a2p;
    2108             :     int         len1,
    2109             :                 len2,
    2110             :                 result;
    2111             : 
    2112       62424 :     a1p = VARDATA_ANY(arg1);
    2113       62424 :     a2p = VARDATA_ANY(arg2);
    2114             : 
    2115       62424 :     len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
    2116       62424 :     len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
    2117             : 
    2118       62424 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2119       62424 :     if ((result == 0) && (len1 != len2))
    2120           4 :         result = (len1 < len2) ? -1 : 1;
    2121             : 
    2122             :     /* We can't afford to leak memory here. */
    2123       62424 :     if (PointerGetDatum(arg1) != x)
    2124           0 :         pfree(arg1);
    2125       62424 :     if (PointerGetDatum(arg2) != y)
    2126           0 :         pfree(arg2);
    2127             : 
    2128       62424 :     return result;
    2129             : }
    2130             : 
    2131             : /*
    2132             :  * sortsupport comparison func (for NAME C locale case)
    2133             :  */
    2134             : static int
    2135    38023972 : namefastcmp_c(Datum x, Datum y, SortSupport ssup)
    2136             : {
    2137    38023972 :     Name        arg1 = DatumGetName(x);
    2138    38023972 :     Name        arg2 = DatumGetName(y);
    2139             : 
    2140    38023972 :     return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
    2141             : }
    2142             : 
    2143             : /*
    2144             :  * sortsupport comparison func (for locale case with all varlena types)
    2145             :  */
    2146             : static int
    2147    35981984 : varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2148             : {
    2149    35981984 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2150    35981984 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2151             :     char       *a1p,
    2152             :                *a2p;
    2153             :     int         len1,
    2154             :                 len2,
    2155             :                 result;
    2156             : 
    2157    35981984 :     a1p = VARDATA_ANY(arg1);
    2158    35981984 :     a2p = VARDATA_ANY(arg2);
    2159             : 
    2160    35981984 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2161    35981984 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2162             : 
    2163    35981984 :     result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
    2164             : 
    2165             :     /* We can't afford to leak memory here. */
    2166    35981984 :     if (PointerGetDatum(arg1) != x)
    2167           4 :         pfree(arg1);
    2168    35981984 :     if (PointerGetDatum(arg2) != y)
    2169           4 :         pfree(arg2);
    2170             : 
    2171    35981984 :     return result;
    2172             : }
    2173             : 
    2174             : /*
    2175             :  * sortsupport comparison func (for locale case with NAME type)
    2176             :  */
    2177             : static int
    2178           0 : namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2179             : {
    2180           0 :     Name        arg1 = DatumGetName(x);
    2181           0 :     Name        arg2 = DatumGetName(y);
    2182             : 
    2183           0 :     return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
    2184           0 :                                 NameStr(*arg2), strlen(NameStr(*arg2)),
    2185             :                                 ssup);
    2186             : }
    2187             : 
    2188             : /*
    2189             :  * sortsupport comparison func for locale cases
    2190             :  */
    2191             : static int
    2192    35981984 : varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
    2193             : {
    2194    35981984 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2195             :     int         result;
    2196             :     bool        arg1_match;
    2197             : 
    2198             :     /* Fast pre-check for equality, as discussed in varstr_cmp() */
    2199    35981984 :     if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
    2200             :     {
    2201             :         /*
    2202             :          * No change in buf1 or buf2 contents, so avoid changing last_len1 or
    2203             :          * last_len2.  Existing contents of buffers might still be used by
    2204             :          * next call.
    2205             :          *
    2206             :          * It's fine to allow the comparison of BpChar padding bytes here,
    2207             :          * even though that implies that the memcmp() will usually be
    2208             :          * performed for BpChar callers (though multibyte characters could
    2209             :          * still prevent that from occurring).  The memcmp() is still very
    2210             :          * cheap, and BpChar's funny semantics have us remove trailing spaces
    2211             :          * (not limited to padding), so we need make no distinction between
    2212             :          * padding space characters and "real" space characters.
    2213             :          */
    2214     9274380 :         return 0;
    2215             :     }
    2216             : 
    2217    26707604 :     if (sss->typid == BPCHAROID)
    2218             :     {
    2219             :         /* Get true number of bytes, ignoring trailing spaces */
    2220       34496 :         len1 = bpchartruelen(a1p, len1);
    2221       34496 :         len2 = bpchartruelen(a2p, len2);
    2222             :     }
    2223             : 
    2224    26707604 :     if (len1 >= sss->buflen1)
    2225             :     {
    2226          14 :         sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2227          14 :         sss->buf1 = repalloc(sss->buf1, sss->buflen1);
    2228             :     }
    2229    26707604 :     if (len2 >= sss->buflen2)
    2230             :     {
    2231          10 :         sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
    2232          10 :         sss->buf2 = repalloc(sss->buf2, sss->buflen2);
    2233             :     }
    2234             : 
    2235             :     /*
    2236             :      * We're likely to be asked to compare the same strings repeatedly, and
    2237             :      * memcmp() is so much cheaper than strcoll() that it pays to try to cache
    2238             :      * comparisons, even though in general there is no reason to think that
    2239             :      * that will work out (every string datum may be unique).  Caching does
    2240             :      * not slow things down measurably when it doesn't work out, and can speed
    2241             :      * things up by rather a lot when it does.  In part, this is because the
    2242             :      * memcmp() compares data from cachelines that are needed in L1 cache even
    2243             :      * when the last comparison's result cannot be reused.
    2244             :      */
    2245    26707604 :     arg1_match = true;
    2246    26707604 :     if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
    2247             :     {
    2248    24726646 :         arg1_match = false;
    2249    24726646 :         memcpy(sss->buf1, a1p, len1);
    2250    24726646 :         sss->buf1[len1] = '\0';
    2251    24726646 :         sss->last_len1 = len1;
    2252             :     }
    2253             : 
    2254             :     /*
    2255             :      * If we're comparing the same two strings as last time, we can return the
    2256             :      * same answer without calling strcoll() again.  This is more likely than
    2257             :      * it seems (at least with moderate to low cardinality sets), because
    2258             :      * quicksort compares the same pivot against many values.
    2259             :      */
    2260    26707604 :     if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
    2261             :     {
    2262     4065954 :         memcpy(sss->buf2, a2p, len2);
    2263     4065954 :         sss->buf2[len2] = '\0';
    2264     4065954 :         sss->last_len2 = len2;
    2265             :     }
    2266    22641650 :     else if (arg1_match && !sss->cache_blob)
    2267             :     {
    2268             :         /* Use result cached following last actual strcoll() call */
    2269     1554488 :         return sss->last_returned;
    2270             :     }
    2271             : 
    2272    25153116 :     result = pg_strcoll(sss->buf1, sss->buf2, sss->locale);
    2273             : 
    2274             :     /* Break tie if necessary. */
    2275    25153116 :     if (result == 0 && sss->locale->deterministic)
    2276           0 :         result = strcmp(sss->buf1, sss->buf2);
    2277             : 
    2278             :     /* Cache result, perhaps saving an expensive strcoll() call next time */
    2279    25153116 :     sss->cache_blob = false;
    2280    25153116 :     sss->last_returned = result;
    2281    25153116 :     return result;
    2282             : }
    2283             : 
    2284             : /*
    2285             :  * Conversion routine for sortsupport.  Converts original to abbreviated key
    2286             :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
    2287             :  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
    2288             :  * stored in reverse order), and treat it as an unsigned integer.  When the "C"
    2289             :  * locale is used, or in case of bytea, just memcpy() from original instead.
    2290             :  */
    2291             : static Datum
    2292      829642 : varstr_abbrev_convert(Datum original, SortSupport ssup)
    2293             : {
    2294      829642 :     const size_t max_prefix_bytes = sizeof(Datum);
    2295      829642 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2296      829642 :     VarString  *authoritative = DatumGetVarStringPP(original);
    2297      829642 :     char       *authoritative_data = VARDATA_ANY(authoritative);
    2298             : 
    2299             :     /* working state */
    2300             :     Datum       res;
    2301             :     char       *pres;
    2302             :     int         len;
    2303             :     uint32      hash;
    2304             : 
    2305      829642 :     pres = (char *) &res;
    2306             :     /* memset(), so any non-overwritten bytes are NUL */
    2307      829642 :     memset(pres, 0, max_prefix_bytes);
    2308      829642 :     len = VARSIZE_ANY_EXHDR(authoritative);
    2309             : 
    2310             :     /* Get number of bytes, ignoring trailing spaces */
    2311      829642 :     if (sss->typid == BPCHAROID)
    2312        1010 :         len = bpchartruelen(authoritative_data, len);
    2313             : 
    2314             :     /*
    2315             :      * If we're using the C collation, use memcpy(), rather than strxfrm(), to
    2316             :      * abbreviate keys.  The full comparator for the C locale is always
    2317             :      * memcmp().  It would be incorrect to allow bytea callers (callers that
    2318             :      * always force the C collation -- bytea isn't a collatable type, but this
    2319             :      * approach is convenient) to use strxfrm().  This is because bytea
    2320             :      * strings may contain NUL bytes.  Besides, this should be faster, too.
    2321             :      *
    2322             :      * More generally, it's okay that bytea callers can have NUL bytes in
    2323             :      * strings because abbreviated cmp need not make a distinction between
    2324             :      * terminating NUL bytes, and NUL bytes representing actual NULs in the
    2325             :      * authoritative representation.  Hopefully a comparison at or past one
    2326             :      * abbreviated key's terminating NUL byte will resolve the comparison
    2327             :      * without consulting the authoritative representation; specifically, some
    2328             :      * later non-NUL byte in the longer string can resolve the comparison
    2329             :      * against a subsequent terminating NUL in the shorter string.  There will
    2330             :      * usually be what is effectively a "length-wise" resolution there and
    2331             :      * then.
    2332             :      *
    2333             :      * If that doesn't work out -- if all bytes in the longer string
    2334             :      * positioned at or past the offset of the smaller string's (first)
    2335             :      * terminating NUL are actually representative of NUL bytes in the
    2336             :      * authoritative binary string (perhaps with some *terminating* NUL bytes
    2337             :      * towards the end of the longer string iff it happens to still be small)
    2338             :      * -- then an authoritative tie-breaker will happen, and do the right
    2339             :      * thing: explicitly consider string length.
    2340             :      */
    2341      829642 :     if (sss->collate_c)
    2342      827818 :         memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
    2343             :     else
    2344             :     {
    2345             :         Size        bsize;
    2346             : 
    2347             :         /*
    2348             :          * We're not using the C collation, so fall back on strxfrm or ICU
    2349             :          * analogs.
    2350             :          */
    2351             : 
    2352             :         /* By convention, we use buffer 1 to store and NUL-terminate */
    2353        1824 :         if (len >= sss->buflen1)
    2354             :         {
    2355           0 :             sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2356           0 :             sss->buf1 = repalloc(sss->buf1, sss->buflen1);
    2357             :         }
    2358             : 
    2359             :         /* Might be able to reuse strxfrm() blob from last call */
    2360        1824 :         if (sss->last_len1 == len && sss->cache_blob &&
    2361         912 :             memcmp(sss->buf1, authoritative_data, len) == 0)
    2362             :         {
    2363         168 :             memcpy(pres, sss->buf2, Min(max_prefix_bytes, sss->last_len2));
    2364             :             /* No change affecting cardinality, so no hashing required */
    2365         168 :             goto done;
    2366             :         }
    2367             : 
    2368        1656 :         memcpy(sss->buf1, authoritative_data, len);
    2369             : 
    2370             :         /*
    2371             :          * pg_strxfrm() and pg_strxfrm_prefix expect NUL-terminated strings.
    2372             :          */
    2373        1656 :         sss->buf1[len] = '\0';
    2374        1656 :         sss->last_len1 = len;
    2375             : 
    2376        1656 :         if (pg_strxfrm_prefix_enabled(sss->locale))
    2377             :         {
    2378        1656 :             if (sss->buflen2 < max_prefix_bytes)
    2379             :             {
    2380           0 :                 sss->buflen2 = Max(max_prefix_bytes,
    2381             :                                    Min(sss->buflen2 * 2, MaxAllocSize));
    2382           0 :                 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
    2383             :             }
    2384             : 
    2385        1656 :             bsize = pg_strxfrm_prefix(sss->buf2, sss->buf1,
    2386             :                                       max_prefix_bytes, sss->locale);
    2387        1656 :             sss->last_len2 = bsize;
    2388             :         }
    2389             :         else
    2390             :         {
    2391             :             /*
    2392             :              * Loop: Call pg_strxfrm(), possibly enlarge buffer, and try
    2393             :              * again.  The pg_strxfrm() function leaves the result buffer
    2394             :              * content undefined if the result did not fit, so we need to
    2395             :              * retry until everything fits, even though we only need the first
    2396             :              * few bytes in the end.
    2397             :              */
    2398             :             for (;;)
    2399             :             {
    2400           0 :                 bsize = pg_strxfrm(sss->buf2, sss->buf1, sss->buflen2,
    2401             :                                    sss->locale);
    2402             : 
    2403           0 :                 sss->last_len2 = bsize;
    2404           0 :                 if (bsize < sss->buflen2)
    2405           0 :                     break;
    2406             : 
    2407             :                 /*
    2408             :                  * Grow buffer and retry.
    2409             :                  */
    2410           0 :                 sss->buflen2 = Max(bsize + 1,
    2411             :                                    Min(sss->buflen2 * 2, MaxAllocSize));
    2412           0 :                 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
    2413             :             }
    2414             :         }
    2415             : 
    2416             :         /*
    2417             :          * Every Datum byte is always compared.  This is safe because the
    2418             :          * strxfrm() blob is itself NUL terminated, leaving no danger of
    2419             :          * misinterpreting any NUL bytes not intended to be interpreted as
    2420             :          * logically representing termination.
    2421             :          *
    2422             :          * (Actually, even if there were NUL bytes in the blob it would be
    2423             :          * okay.  See remarks on bytea case above.)
    2424             :          */
    2425        1656 :         memcpy(pres, sss->buf2, Min(max_prefix_bytes, bsize));
    2426             :     }
    2427             : 
    2428             :     /*
    2429             :      * Maintain approximate cardinality of both abbreviated keys and original,
    2430             :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
    2431             :      * the worst case, where we do many string transformations for no saving
    2432             :      * in full strcoll()-based comparisons.  These statistics are used by
    2433             :      * varstr_abbrev_abort().
    2434             :      *
    2435             :      * First, Hash key proper, or a significant fraction of it.  Mix in length
    2436             :      * in order to compensate for cases where differences are past
    2437             :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
    2438             :      */
    2439      829474 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
    2440             :                                    Min(len, PG_CACHE_LINE_SIZE)));
    2441             : 
    2442      829474 :     if (len > PG_CACHE_LINE_SIZE)
    2443         190 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
    2444             : 
    2445      829474 :     addHyperLogLog(&sss->full_card, hash);
    2446             : 
    2447             :     /* Hash abbreviated key */
    2448             : #if SIZEOF_DATUM == 8
    2449             :     {
    2450             :         uint32      lohalf,
    2451             :                     hihalf;
    2452             : 
    2453      829474 :         lohalf = (uint32) res;
    2454      829474 :         hihalf = (uint32) (res >> 32);
    2455      829474 :         hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
    2456             :     }
    2457             : #else                           /* SIZEOF_DATUM != 8 */
    2458             :     hash = DatumGetUInt32(hash_uint32((uint32) res));
    2459             : #endif
    2460             : 
    2461      829474 :     addHyperLogLog(&sss->abbr_card, hash);
    2462             : 
    2463             :     /* Cache result, perhaps saving an expensive strxfrm() call next time */
    2464      829474 :     sss->cache_blob = true;
    2465      829642 : done:
    2466             : 
    2467             :     /*
    2468             :      * Byteswap on little-endian machines.
    2469             :      *
    2470             :      * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
    2471             :      * 3-way comparator) works correctly on all platforms.  If we didn't do
    2472             :      * this, the comparator would have to call memcmp() with a pair of
    2473             :      * pointers to the first byte of each abbreviated key, which is slower.
    2474             :      */
    2475      829642 :     res = DatumBigEndianToNative(res);
    2476             : 
    2477             :     /* Don't leak memory here */
    2478      829642 :     if (PointerGetDatum(authoritative) != original)
    2479           4 :         pfree(authoritative);
    2480             : 
    2481      829642 :     return res;
    2482             : }
    2483             : 
    2484             : /*
    2485             :  * Callback for estimating effectiveness of abbreviated key optimization, using
    2486             :  * heuristic rules.  Returns value indicating if the abbreviation optimization
    2487             :  * should be aborted, based on its projected effectiveness.
    2488             :  */
    2489             : static bool
    2490        2264 : varstr_abbrev_abort(int memtupcount, SortSupport ssup)
    2491             : {
    2492        2264 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2493             :     double      abbrev_distinct,
    2494             :                 key_distinct;
    2495             : 
    2496             :     Assert(ssup->abbreviate);
    2497             : 
    2498             :     /* Have a little patience */
    2499        2264 :     if (memtupcount < 100)
    2500        1284 :         return false;
    2501             : 
    2502         980 :     abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
    2503         980 :     key_distinct = estimateHyperLogLog(&sss->full_card);
    2504             : 
    2505             :     /*
    2506             :      * Clamp cardinality estimates to at least one distinct value.  While
    2507             :      * NULLs are generally disregarded, if only NULL values were seen so far,
    2508             :      * that might misrepresent costs if we failed to clamp.
    2509             :      */
    2510         980 :     if (abbrev_distinct <= 1.0)
    2511           0 :         abbrev_distinct = 1.0;
    2512             : 
    2513         980 :     if (key_distinct <= 1.0)
    2514           0 :         key_distinct = 1.0;
    2515             : 
    2516             :     /*
    2517             :      * In the worst case all abbreviated keys are identical, while at the same
    2518             :      * time there are differences within full key strings not captured in
    2519             :      * abbreviations.
    2520             :      */
    2521         980 :     if (trace_sort)
    2522             :     {
    2523           0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
    2524             : 
    2525           0 :         elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
    2526             :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
    2527             :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
    2528             :              sss->prop_card);
    2529             :     }
    2530             : 
    2531             :     /*
    2532             :      * If the number of distinct abbreviated keys approximately matches the
    2533             :      * number of distinct authoritative original keys, that's reason enough to
    2534             :      * proceed.  We can win even with a very low cardinality set if most
    2535             :      * tie-breakers only memcmp().  This is by far the most important
    2536             :      * consideration.
    2537             :      *
    2538             :      * While comparisons that are resolved at the abbreviated key level are
    2539             :      * considerably cheaper than tie-breakers resolved with memcmp(), both of
    2540             :      * those two outcomes are so much cheaper than a full strcoll() once
    2541             :      * sorting is underway that it doesn't seem worth it to weigh abbreviated
    2542             :      * cardinality against the overall size of the set in order to more
    2543             :      * accurately model costs.  Assume that an abbreviated comparison, and an
    2544             :      * abbreviated comparison with a cheap memcmp()-based authoritative
    2545             :      * resolution are equivalent.
    2546             :      */
    2547         980 :     if (abbrev_distinct > key_distinct * sss->prop_card)
    2548             :     {
    2549             :         /*
    2550             :          * When we have exceeded 10,000 tuples, decay required cardinality
    2551             :          * aggressively for next call.
    2552             :          *
    2553             :          * This is useful because the number of comparisons required on
    2554             :          * average increases at a linearithmic rate, and at roughly 10,000
    2555             :          * tuples that factor will start to dominate over the linear costs of
    2556             :          * string transformation (this is a conservative estimate).  The decay
    2557             :          * rate is chosen to be a little less aggressive than halving -- which
    2558             :          * (since we're called at points at which memtupcount has doubled)
    2559             :          * would never see the cost model actually abort past the first call
    2560             :          * following a decay.  This decay rate is mostly a precaution against
    2561             :          * a sudden, violent swing in how well abbreviated cardinality tracks
    2562             :          * full key cardinality.  The decay also serves to prevent a marginal
    2563             :          * case from being aborted too late, when too much has already been
    2564             :          * invested in string transformation.
    2565             :          *
    2566             :          * It's possible for sets of several million distinct strings with
    2567             :          * mere tens of thousands of distinct abbreviated keys to still
    2568             :          * benefit very significantly.  This will generally occur provided
    2569             :          * each abbreviated key is a proxy for a roughly uniform number of the
    2570             :          * set's full keys. If it isn't so, we hope to catch that early and
    2571             :          * abort.  If it isn't caught early, by the time the problem is
    2572             :          * apparent it's probably not worth aborting.
    2573             :          */
    2574         980 :         if (memtupcount > 10000)
    2575           4 :             sss->prop_card *= 0.65;
    2576             : 
    2577         980 :         return false;
    2578             :     }
    2579             : 
    2580             :     /*
    2581             :      * Abort abbreviation strategy.
    2582             :      *
    2583             :      * The worst case, where all abbreviated keys are identical while all
    2584             :      * original strings differ will typically only see a regression of about
    2585             :      * 10% in execution time for small to medium sized lists of strings.
    2586             :      * Whereas on modern CPUs where cache stalls are the dominant cost, we can
    2587             :      * often expect very large improvements, particularly with sets of strings
    2588             :      * of moderately high to high abbreviated cardinality.  There is little to
    2589             :      * lose but much to gain, which our strategy reflects.
    2590             :      */
    2591           0 :     if (trace_sort)
    2592           0 :         elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
    2593             :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
    2594             :              memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
    2595             : 
    2596           0 :     return true;
    2597             : }
    2598             : 
    2599             : /*
    2600             :  * Generic equalimage support function for character type's operator classes.
    2601             :  * Disables the use of deduplication with nondeterministic collations.
    2602             :  */
    2603             : Datum
    2604        8464 : btvarstrequalimage(PG_FUNCTION_ARGS)
    2605             : {
    2606             :     /* Oid      opcintype = PG_GETARG_OID(0); */
    2607        8464 :     Oid         collid = PG_GET_COLLATION();
    2608             :     pg_locale_t locale;
    2609             : 
    2610        8464 :     check_collation_set(collid);
    2611             : 
    2612        8464 :     locale = pg_newlocale_from_collation(collid);
    2613             : 
    2614        8464 :     PG_RETURN_BOOL(locale->deterministic);
    2615             : }
    2616             : 
    2617             : Datum
    2618      229560 : text_larger(PG_FUNCTION_ARGS)
    2619             : {
    2620      229560 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2621      229560 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2622             :     text       *result;
    2623             : 
    2624      229560 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
    2625             : 
    2626      229560 :     PG_RETURN_TEXT_P(result);
    2627             : }
    2628             : 
    2629             : Datum
    2630       86076 : text_smaller(PG_FUNCTION_ARGS)
    2631             : {
    2632       86076 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2633       86076 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2634             :     text       *result;
    2635             : 
    2636       86076 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
    2637             : 
    2638       86076 :     PG_RETURN_TEXT_P(result);
    2639             : }
    2640             : 
    2641             : 
    2642             : /*
    2643             :  * Cross-type comparison functions for types text and name.
    2644             :  */
    2645             : 
    2646             : Datum
    2647      188878 : nameeqtext(PG_FUNCTION_ARGS)
    2648             : {
    2649      188878 :     Name        arg1 = PG_GETARG_NAME(0);
    2650      188878 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2651      188878 :     size_t      len1 = strlen(NameStr(*arg1));
    2652      188878 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2653      188878 :     Oid         collid = PG_GET_COLLATION();
    2654             :     bool        result;
    2655             : 
    2656      188878 :     check_collation_set(collid);
    2657             : 
    2658      188878 :     if (collid == C_COLLATION_OID)
    2659      254748 :         result = (len1 == len2 &&
    2660      123254 :                   memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2661             :     else
    2662       57384 :         result = (varstr_cmp(NameStr(*arg1), len1,
    2663       57384 :                              VARDATA_ANY(arg2), len2,
    2664             :                              collid) == 0);
    2665             : 
    2666      188878 :     PG_FREE_IF_COPY(arg2, 1);
    2667             : 
    2668      188878 :     PG_RETURN_BOOL(result);
    2669             : }
    2670             : 
    2671             : Datum
    2672        7808 : texteqname(PG_FUNCTION_ARGS)
    2673             : {
    2674        7808 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2675        7808 :     Name        arg2 = PG_GETARG_NAME(1);
    2676        7808 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2677        7808 :     size_t      len2 = strlen(NameStr(*arg2));
    2678        7808 :     Oid         collid = PG_GET_COLLATION();
    2679             :     bool        result;
    2680             : 
    2681        7808 :     check_collation_set(collid);
    2682             : 
    2683        7808 :     if (collid == C_COLLATION_OID)
    2684         568 :         result = (len1 == len2 &&
    2685         182 :                   memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2686             :     else
    2687        7422 :         result = (varstr_cmp(VARDATA_ANY(arg1), len1,
    2688        7422 :                              NameStr(*arg2), len2,
    2689             :                              collid) == 0);
    2690             : 
    2691        7808 :     PG_FREE_IF_COPY(arg1, 0);
    2692             : 
    2693        7808 :     PG_RETURN_BOOL(result);
    2694             : }
    2695             : 
    2696             : Datum
    2697          18 : namenetext(PG_FUNCTION_ARGS)
    2698             : {
    2699          18 :     Name        arg1 = PG_GETARG_NAME(0);
    2700          18 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2701          18 :     size_t      len1 = strlen(NameStr(*arg1));
    2702          18 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2703          18 :     Oid         collid = PG_GET_COLLATION();
    2704             :     bool        result;
    2705             : 
    2706          18 :     check_collation_set(collid);
    2707             : 
    2708          18 :     if (collid == C_COLLATION_OID)
    2709           0 :         result = !(len1 == len2 &&
    2710           0 :                    memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2711             :     else
    2712          18 :         result = !(varstr_cmp(NameStr(*arg1), len1,
    2713          18 :                               VARDATA_ANY(arg2), len2,
    2714             :                               collid) == 0);
    2715             : 
    2716          18 :     PG_FREE_IF_COPY(arg2, 1);
    2717             : 
    2718          18 :     PG_RETURN_BOOL(result);
    2719             : }
    2720             : 
    2721             : Datum
    2722          18 : textnename(PG_FUNCTION_ARGS)
    2723             : {
    2724          18 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2725          18 :     Name        arg2 = PG_GETARG_NAME(1);
    2726          18 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2727          18 :     size_t      len2 = strlen(NameStr(*arg2));
    2728          18 :     Oid         collid = PG_GET_COLLATION();
    2729             :     bool        result;
    2730             : 
    2731          18 :     check_collation_set(collid);
    2732             : 
    2733          18 :     if (collid == C_COLLATION_OID)
    2734           0 :         result = !(len1 == len2 &&
    2735           0 :                    memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2736             :     else
    2737          18 :         result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
    2738          18 :                               NameStr(*arg2), len2,
    2739             :                               collid) == 0);
    2740             : 
    2741          18 :     PG_FREE_IF_COPY(arg1, 0);
    2742             : 
    2743          18 :     PG_RETURN_BOOL(result);
    2744             : }
    2745             : 
    2746             : Datum
    2747      108296 : btnametextcmp(PG_FUNCTION_ARGS)
    2748             : {
    2749      108296 :     Name        arg1 = PG_GETARG_NAME(0);
    2750      108296 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2751             :     int32       result;
    2752             : 
    2753      216592 :     result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
    2754      216592 :                         VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
    2755             :                         PG_GET_COLLATION());
    2756             : 
    2757      108296 :     PG_FREE_IF_COPY(arg2, 1);
    2758             : 
    2759      108296 :     PG_RETURN_INT32(result);
    2760             : }
    2761             : 
    2762             : Datum
    2763           0 : bttextnamecmp(PG_FUNCTION_ARGS)
    2764             : {
    2765           0 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2766           0 :     Name        arg2 = PG_GETARG_NAME(1);
    2767             :     int32       result;
    2768             : 
    2769           0 :     result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
    2770           0 :                         NameStr(*arg2), strlen(NameStr(*arg2)),
    2771             :                         PG_GET_COLLATION());
    2772             : 
    2773           0 :     PG_FREE_IF_COPY(arg1, 0);
    2774             : 
    2775           0 :     PG_RETURN_INT32(result);
    2776             : }
    2777             : 
    2778             : #define CmpCall(cmpfunc) \
    2779             :     DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
    2780             :                                           PG_GET_COLLATION(), \
    2781             :                                           PG_GETARG_DATUM(0), \
    2782             :                                           PG_GETARG_DATUM(1)))
    2783             : 
    2784             : Datum
    2785       56482 : namelttext(PG_FUNCTION_ARGS)
    2786             : {
    2787       56482 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
    2788             : }
    2789             : 
    2790             : Datum
    2791           0 : nameletext(PG_FUNCTION_ARGS)
    2792             : {
    2793           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
    2794             : }
    2795             : 
    2796             : Datum
    2797           0 : namegttext(PG_FUNCTION_ARGS)
    2798             : {
    2799           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
    2800             : }
    2801             : 
    2802             : Datum
    2803       39330 : namegetext(PG_FUNCTION_ARGS)
    2804             : {
    2805       39330 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
    2806             : }
    2807             : 
    2808             : Datum
    2809           0 : textltname(PG_FUNCTION_ARGS)
    2810             : {
    2811           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
    2812             : }
    2813             : 
    2814             : Datum
    2815           0 : textlename(PG_FUNCTION_ARGS)
    2816             : {
    2817           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
    2818             : }
    2819             : 
    2820             : Datum
    2821           0 : textgtname(PG_FUNCTION_ARGS)
    2822             : {
    2823           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
    2824             : }
    2825             : 
    2826             : Datum
    2827           0 : textgename(PG_FUNCTION_ARGS)
    2828             : {
    2829           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
    2830             : }
    2831             : 
    2832             : #undef CmpCall
    2833             : 
    2834             : 
    2835             : /*
    2836             :  * The following operators support character-by-character comparison
    2837             :  * of text datums, to allow building indexes suitable for LIKE clauses.
    2838             :  * Note that the regular texteq/textne comparison operators, and regular
    2839             :  * support functions 1 and 2 with "C" collation are assumed to be
    2840             :  * compatible with these!
    2841             :  */
    2842             : 
    2843             : static int
    2844      152158 : internal_text_pattern_compare(text *arg1, text *arg2)
    2845             : {
    2846             :     int         result;
    2847             :     int         len1,
    2848             :                 len2;
    2849             : 
    2850      152158 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2851      152158 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2852             : 
    2853      152158 :     result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    2854      152158 :     if (result != 0)
    2855      152092 :         return result;
    2856          66 :     else if (len1 < len2)
    2857           0 :         return -1;
    2858          66 :     else if (len1 > len2)
    2859          18 :         return 1;
    2860             :     else
    2861          48 :         return 0;
    2862             : }
    2863             : 
    2864             : 
    2865             : Datum
    2866       39580 : text_pattern_lt(PG_FUNCTION_ARGS)
    2867             : {
    2868       39580 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2869       39580 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2870             :     int         result;
    2871             : 
    2872       39580 :     result = internal_text_pattern_compare(arg1, arg2);
    2873             : 
    2874       39580 :     PG_FREE_IF_COPY(arg1, 0);
    2875       39580 :     PG_FREE_IF_COPY(arg2, 1);
    2876             : 
    2877       39580 :     PG_RETURN_BOOL(result < 0);
    2878             : }
    2879             : 
    2880             : 
    2881             : Datum
    2882       37510 : text_pattern_le(PG_FUNCTION_ARGS)
    2883             : {
    2884       37510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2885       37510 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2886             :     int         result;
    2887             : 
    2888       37510 :     result = internal_text_pattern_compare(arg1, arg2);
    2889             : 
    2890       37510 :     PG_FREE_IF_COPY(arg1, 0);
    2891       37510 :     PG_FREE_IF_COPY(arg2, 1);
    2892             : 
    2893       37510 :     PG_RETURN_BOOL(result <= 0);
    2894             : }
    2895             : 
    2896             : 
    2897             : Datum
    2898       37534 : text_pattern_ge(PG_FUNCTION_ARGS)
    2899             : {
    2900       37534 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2901       37534 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2902             :     int         result;
    2903             : 
    2904       37534 :     result = internal_text_pattern_compare(arg1, arg2);
    2905             : 
    2906       37534 :     PG_FREE_IF_COPY(arg1, 0);
    2907       37534 :     PG_FREE_IF_COPY(arg2, 1);
    2908             : 
    2909       37534 :     PG_RETURN_BOOL(result >= 0);
    2910             : }
    2911             : 
    2912             : 
    2913             : Datum
    2914       37510 : text_pattern_gt(PG_FUNCTION_ARGS)
    2915             : {
    2916       37510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2917       37510 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2918             :     int         result;
    2919             : 
    2920       37510 :     result = internal_text_pattern_compare(arg1, arg2);
    2921             : 
    2922       37510 :     PG_FREE_IF_COPY(arg1, 0);
    2923       37510 :     PG_FREE_IF_COPY(arg2, 1);
    2924             : 
    2925       37510 :     PG_RETURN_BOOL(result > 0);
    2926             : }
    2927             : 
    2928             : 
    2929             : Datum
    2930          24 : bttext_pattern_cmp(PG_FUNCTION_ARGS)
    2931             : {
    2932          24 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2933          24 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2934             :     int         result;
    2935             : 
    2936          24 :     result = internal_text_pattern_compare(arg1, arg2);
    2937             : 
    2938          24 :     PG_FREE_IF_COPY(arg1, 0);
    2939          24 :     PG_FREE_IF_COPY(arg2, 1);
    2940             : 
    2941          24 :     PG_RETURN_INT32(result);
    2942             : }
    2943             : 
    2944             : 
    2945             : Datum
    2946         116 : bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
    2947             : {
    2948         116 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    2949             :     MemoryContext oldcontext;
    2950             : 
    2951         116 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    2952             : 
    2953             :     /* Use generic string SortSupport, forcing "C" collation */
    2954         116 :     varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
    2955             : 
    2956         116 :     MemoryContextSwitchTo(oldcontext);
    2957             : 
    2958         116 :     PG_RETURN_VOID();
    2959             : }
    2960             : 
    2961             : 
    2962             : /*-------------------------------------------------------------
    2963             :  * byteaoctetlen
    2964             :  *
    2965             :  * get the number of bytes contained in an instance of type 'bytea'
    2966             :  *-------------------------------------------------------------
    2967             :  */
    2968             : Datum
    2969         602 : byteaoctetlen(PG_FUNCTION_ARGS)
    2970             : {
    2971         602 :     Datum       str = PG_GETARG_DATUM(0);
    2972             : 
    2973             :     /* We need not detoast the input at all */
    2974         602 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
    2975             : }
    2976             : 
    2977             : /*
    2978             :  * byteacat -
    2979             :  *    takes two bytea* and returns a bytea* that is the concatenation of
    2980             :  *    the two.
    2981             :  *
    2982             :  * Cloned from textcat and modified as required.
    2983             :  */
    2984             : Datum
    2985        1522 : byteacat(PG_FUNCTION_ARGS)
    2986             : {
    2987        1522 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    2988        1522 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    2989             : 
    2990        1522 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
    2991             : }
    2992             : 
    2993             : /*
    2994             :  * bytea_catenate
    2995             :  *  Guts of byteacat(), broken out so it can be used by other functions
    2996             :  *
    2997             :  * Arguments can be in short-header form, but not compressed or out-of-line
    2998             :  */
    2999             : static bytea *
    3000        1558 : bytea_catenate(bytea *t1, bytea *t2)
    3001             : {
    3002             :     bytea      *result;
    3003             :     int         len1,
    3004             :                 len2,
    3005             :                 len;
    3006             :     char       *ptr;
    3007             : 
    3008        1558 :     len1 = VARSIZE_ANY_EXHDR(t1);
    3009        1558 :     len2 = VARSIZE_ANY_EXHDR(t2);
    3010             : 
    3011             :     /* paranoia ... probably should throw error instead? */
    3012        1558 :     if (len1 < 0)
    3013           0 :         len1 = 0;
    3014        1558 :     if (len2 < 0)
    3015           0 :         len2 = 0;
    3016             : 
    3017        1558 :     len = len1 + len2 + VARHDRSZ;
    3018        1558 :     result = (bytea *) palloc(len);
    3019             : 
    3020             :     /* Set size of result string... */
    3021        1558 :     SET_VARSIZE(result, len);
    3022             : 
    3023             :     /* Fill data field of result string... */
    3024        1558 :     ptr = VARDATA(result);
    3025        1558 :     if (len1 > 0)
    3026        1558 :         memcpy(ptr, VARDATA_ANY(t1), len1);
    3027        1558 :     if (len2 > 0)
    3028        1540 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
    3029             : 
    3030        1558 :     return result;
    3031             : }
    3032             : 
    3033             : #define PG_STR_GET_BYTEA(str_) \
    3034             :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
    3035             : 
    3036             : /*
    3037             :  * bytea_substr()
    3038             :  * Return a substring starting at the specified position.
    3039             :  * Cloned from text_substr and modified as required.
    3040             :  *
    3041             :  * Input:
    3042             :  *  - string
    3043             :  *  - starting position (is one-based)
    3044             :  *  - string length (optional)
    3045             :  *
    3046             :  * If the starting position is zero or less, then return from the start of the string
    3047             :  * adjusting the length to be consistent with the "negative start" per SQL.
    3048             :  * If the length is less than zero, an ERROR is thrown. If no third argument
    3049             :  * (length) is provided, the length to the end of the string is assumed.
    3050             :  */
    3051             : Datum
    3052          86 : bytea_substr(PG_FUNCTION_ARGS)
    3053             : {
    3054          86 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    3055             :                                       PG_GETARG_INT32(1),
    3056             :                                       PG_GETARG_INT32(2),
    3057             :                                       false));
    3058             : }
    3059             : 
    3060             : /*
    3061             :  * bytea_substr_no_len -
    3062             :  *    Wrapper to avoid opr_sanity failure due to
    3063             :  *    one function accepting a different number of args.
    3064             :  */
    3065             : Datum
    3066        3900 : bytea_substr_no_len(PG_FUNCTION_ARGS)
    3067             : {
    3068        3900 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    3069             :                                       PG_GETARG_INT32(1),
    3070             :                                       -1,
    3071             :                                       true));
    3072             : }
    3073             : 
    3074             : static bytea *
    3075        4022 : bytea_substring(Datum str,
    3076             :                 int S,
    3077             :                 int L,
    3078             :                 bool length_not_specified)
    3079             : {
    3080             :     int32       S1;             /* adjusted start position */
    3081             :     int32       L1;             /* adjusted substring length */
    3082             :     int32       E;              /* end position */
    3083             : 
    3084             :     /*
    3085             :      * The logic here should generally match text_substring().
    3086             :      */
    3087        4022 :     S1 = Max(S, 1);
    3088             : 
    3089        4022 :     if (length_not_specified)
    3090             :     {
    3091             :         /*
    3092             :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
    3093             :          * end of the string if we pass it a negative value for length.
    3094             :          */
    3095        3918 :         L1 = -1;
    3096             :     }
    3097         104 :     else if (L < 0)
    3098             :     {
    3099             :         /* SQL99 says to throw an error for E < S, i.e., negative length */
    3100          12 :         ereport(ERROR,
    3101             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    3102             :                  errmsg("negative substring length not allowed")));
    3103             :         L1 = -1;                /* silence stupider compilers */
    3104             :     }
    3105          92 :     else if (pg_add_s32_overflow(S, L, &E))
    3106             :     {
    3107             :         /*
    3108             :          * L could be large enough for S + L to overflow, in which case the
    3109             :          * substring must run to end of string.
    3110             :          */
    3111           6 :         L1 = -1;
    3112             :     }
    3113             :     else
    3114             :     {
    3115             :         /*
    3116             :          * A zero or negative value for the end position can happen if the
    3117             :          * start was negative or one. SQL99 says to return a zero-length
    3118             :          * string.
    3119             :          */
    3120          86 :         if (E < 1)
    3121           0 :             return PG_STR_GET_BYTEA("");
    3122             : 
    3123          86 :         L1 = E - S1;
    3124             :     }
    3125             : 
    3126             :     /*
    3127             :      * If the start position is past the end of the string, SQL99 says to
    3128             :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
    3129             :      * us.  We need only convert S1 to zero-based starting position.
    3130             :      */
    3131        4010 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
    3132             : }
    3133             : 
    3134             : /*
    3135             :  * byteaoverlay
    3136             :  *  Replace specified substring of first string with second
    3137             :  *
    3138             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    3139             :  * This code is a direct implementation of what the standard says.
    3140             :  */
    3141             : Datum
    3142           6 : byteaoverlay(PG_FUNCTION_ARGS)
    3143             : {
    3144           6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3145           6 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3146           6 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3147           6 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    3148             : 
    3149           6 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3150             : }
    3151             : 
    3152             : Datum
    3153          12 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
    3154             : {
    3155          12 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3156          12 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3157          12 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3158             :     int         sl;
    3159             : 
    3160          12 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
    3161          12 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3162             : }
    3163             : 
    3164             : static bytea *
    3165          18 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
    3166             : {
    3167             :     bytea      *result;
    3168             :     bytea      *s1;
    3169             :     bytea      *s2;
    3170             :     int         sp_pl_sl;
    3171             : 
    3172             :     /*
    3173             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    3174             :      * "substring length" error because that's what should be expected
    3175             :      * according to the spec's definition of OVERLAY().
    3176             :      */
    3177          18 :     if (sp <= 0)
    3178           0 :         ereport(ERROR,
    3179             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    3180             :                  errmsg("negative substring length not allowed")));
    3181          18 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    3182           0 :         ereport(ERROR,
    3183             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    3184             :                  errmsg("integer out of range")));
    3185             : 
    3186          18 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
    3187          18 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    3188          18 :     result = bytea_catenate(s1, t2);
    3189          18 :     result = bytea_catenate(result, s2);
    3190             : 
    3191          18 :     return result;
    3192             : }
    3193             : 
    3194             : /*
    3195             :  * bit_count
    3196             :  */
    3197             : Datum
    3198           6 : bytea_bit_count(PG_FUNCTION_ARGS)
    3199             : {
    3200           6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3201             : 
    3202           6 :     PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
    3203             : }
    3204             : 
    3205             : /*
    3206             :  * byteapos -
    3207             :  *    Return the position of the specified substring.
    3208             :  *    Implements the SQL POSITION() function.
    3209             :  * Cloned from textpos and modified as required.
    3210             :  */
    3211             : Datum
    3212          30 : byteapos(PG_FUNCTION_ARGS)
    3213             : {
    3214          30 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3215          30 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3216             :     int         pos;
    3217             :     int         px,
    3218             :                 p;
    3219             :     int         len1,
    3220             :                 len2;
    3221             :     char       *p1,
    3222             :                *p2;
    3223             : 
    3224          30 :     len1 = VARSIZE_ANY_EXHDR(t1);
    3225          30 :     len2 = VARSIZE_ANY_EXHDR(t2);
    3226             : 
    3227          30 :     if (len2 <= 0)
    3228           6 :         PG_RETURN_INT32(1);     /* result for empty pattern */
    3229             : 
    3230          24 :     p1 = VARDATA_ANY(t1);
    3231          24 :     p2 = VARDATA_ANY(t2);
    3232             : 
    3233          24 :     pos = 0;
    3234          24 :     px = (len1 - len2);
    3235          54 :     for (p = 0; p <= px; p++)
    3236             :     {
    3237          42 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
    3238             :         {
    3239          12 :             pos = p + 1;
    3240          12 :             break;
    3241             :         };
    3242          30 :         p1++;
    3243             :     };
    3244             : 
    3245          24 :     PG_RETURN_INT32(pos);
    3246             : }
    3247             : 
    3248             : /*-------------------------------------------------------------
    3249             :  * byteaGetByte
    3250             :  *
    3251             :  * this routine treats "bytea" as an array of bytes.
    3252             :  * It returns the Nth byte (a number between 0 and 255).
    3253             :  *-------------------------------------------------------------
    3254             :  */
    3255             : Datum
    3256          60 : byteaGetByte(PG_FUNCTION_ARGS)
    3257             : {
    3258          60 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3259          60 :     int32       n = PG_GETARG_INT32(1);
    3260             :     int         len;
    3261             :     int         byte;
    3262             : 
    3263          60 :     len = VARSIZE_ANY_EXHDR(v);
    3264             : 
    3265          60 :     if (n < 0 || n >= len)
    3266           6 :         ereport(ERROR,
    3267             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3268             :                  errmsg("index %d out of valid range, 0..%d",
    3269             :                         n, len - 1)));
    3270             : 
    3271          54 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
    3272             : 
    3273          54 :     PG_RETURN_INT32(byte);
    3274             : }
    3275             : 
    3276             : /*-------------------------------------------------------------
    3277             :  * byteaGetBit
    3278             :  *
    3279             :  * This routine treats a "bytea" type like an array of bits.
    3280             :  * It returns the value of the Nth bit (0 or 1).
    3281             :  *
    3282             :  *-------------------------------------------------------------
    3283             :  */
    3284             : Datum
    3285          12 : byteaGetBit(PG_FUNCTION_ARGS)
    3286             : {
    3287          12 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3288          12 :     int64       n = PG_GETARG_INT64(1);
    3289             :     int         byteNo,
    3290             :                 bitNo;
    3291             :     int         len;
    3292             :     int         byte;
    3293             : 
    3294          12 :     len = VARSIZE_ANY_EXHDR(v);
    3295             : 
    3296          12 :     if (n < 0 || n >= (int64) len * 8)
    3297           6 :         ereport(ERROR,
    3298             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3299             :                  errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
    3300             :                         n, (int64) len * 8 - 1)));
    3301             : 
    3302             :     /* n/8 is now known < len, so safe to cast to int */
    3303           6 :     byteNo = (int) (n / 8);
    3304           6 :     bitNo = (int) (n % 8);
    3305             : 
    3306           6 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
    3307             : 
    3308           6 :     if (byte & (1 << bitNo))
    3309           6 :         PG_RETURN_INT32(1);
    3310             :     else
    3311           0 :         PG_RETURN_INT32(0);
    3312             : }
    3313             : 
    3314             : /*-------------------------------------------------------------
    3315             :  * byteaSetByte
    3316             :  *
    3317             :  * Given an instance of type 'bytea' creates a new one with
    3318             :  * the Nth byte set to the given value.
    3319             :  *
    3320             :  *-------------------------------------------------------------
    3321             :  */
    3322             : Datum
    3323          12 : byteaSetByte(PG_FUNCTION_ARGS)
    3324             : {
    3325          12 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3326          12 :     int32       n = PG_GETARG_INT32(1);
    3327          12 :     int32       newByte = PG_GETARG_INT32(2);
    3328             :     int         len;
    3329             : 
    3330          12 :     len = VARSIZE(res) - VARHDRSZ;
    3331             : 
    3332          12 :     if (n < 0 || n >= len)
    3333           6 :         ereport(ERROR,
    3334             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3335             :                  errmsg("index %d out of valid range, 0..%d",
    3336             :                         n, len - 1)));
    3337             : 
    3338             :     /*
    3339             :      * Now set the byte.
    3340             :      */
    3341           6 :     ((unsigned char *) VARDATA(res))[n] = newByte;
    3342             : 
    3343           6 :     PG_RETURN_BYTEA_P(res);
    3344             : }
    3345             : 
    3346             : /*-------------------------------------------------------------
    3347             :  * byteaSetBit
    3348             :  *
    3349             :  * Given an instance of type 'bytea' creates a new one with
    3350             :  * the Nth bit set to the given value.
    3351             :  *
    3352             :  *-------------------------------------------------------------
    3353             :  */
    3354             : Datum
    3355          12 : byteaSetBit(PG_FUNCTION_ARGS)
    3356             : {
    3357          12 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3358          12 :     int64       n = PG_GETARG_INT64(1);
    3359          12 :     int32       newBit = PG_GETARG_INT32(2);
    3360             :     int         len;
    3361             :     int         oldByte,
    3362             :                 newByte;
    3363             :     int         byteNo,
    3364             :                 bitNo;
    3365             : 
    3366          12 :     len = VARSIZE(res) - VARHDRSZ;
    3367             : 
    3368          12 :     if (n < 0 || n >= (int64) len * 8)
    3369           6 :         ereport(ERROR,
    3370             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3371             :                  errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
    3372             :                         n, (int64) len * 8 - 1)));
    3373             : 
    3374             :     /* n/8 is now known < len, so safe to cast to int */
    3375           6 :     byteNo = (int) (n / 8);
    3376           6 :     bitNo = (int) (n % 8);
    3377             : 
    3378             :     /*
    3379             :      * sanity check!
    3380             :      */
    3381           6 :     if (newBit != 0 && newBit != 1)
    3382           0 :         ereport(ERROR,
    3383             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    3384             :                  errmsg("new bit must be 0 or 1")));
    3385             : 
    3386             :     /*
    3387             :      * Update the byte.
    3388             :      */
    3389           6 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
    3390             : 
    3391           6 :     if (newBit == 0)
    3392           6 :         newByte = oldByte & (~(1 << bitNo));
    3393             :     else
    3394           0 :         newByte = oldByte | (1 << bitNo);
    3395             : 
    3396           6 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
    3397             : 
    3398           6 :     PG_RETURN_BYTEA_P(res);
    3399             : }
    3400             : 
    3401             : /*
    3402             :  * Return reversed bytea
    3403             :  */
    3404             : Datum
    3405          18 : bytea_reverse(PG_FUNCTION_ARGS)
    3406             : {
    3407          18 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3408          18 :     const char *p = VARDATA_ANY(v);
    3409          18 :     int         len = VARSIZE_ANY_EXHDR(v);
    3410          18 :     const char *endp = p + len;
    3411          18 :     bytea      *result = palloc(len + VARHDRSZ);
    3412          18 :     char       *dst = (char *) VARDATA(result) + len;
    3413             : 
    3414          18 :     SET_VARSIZE(result, len + VARHDRSZ);
    3415             : 
    3416          36 :     while (p < endp)
    3417          18 :         *(--dst) = *p++;
    3418             : 
    3419          18 :     PG_RETURN_BYTEA_P(result);
    3420             : }
    3421             : 
    3422             : 
    3423             : /* text_name()
    3424             :  * Converts a text type to a Name type.
    3425             :  */
    3426             : Datum
    3427       30590 : text_name(PG_FUNCTION_ARGS)
    3428             : {
    3429       30590 :     text       *s = PG_GETARG_TEXT_PP(0);
    3430             :     Name        result;
    3431             :     int         len;
    3432             : 
    3433       30590 :     len = VARSIZE_ANY_EXHDR(s);
    3434             : 
    3435             :     /* Truncate oversize input */
    3436       30590 :     if (len >= NAMEDATALEN)
    3437           6 :         len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
    3438             : 
    3439             :     /* We use palloc0 here to ensure result is zero-padded */
    3440       30590 :     result = (Name) palloc0(NAMEDATALEN);
    3441       30590 :     memcpy(NameStr(*result), VARDATA_ANY(s), len);
    3442             : 
    3443       30590 :     PG_RETURN_NAME(result);
    3444             : }
    3445             : 
    3446             : /* name_text()
    3447             :  * Converts a Name type to a text type.
    3448             :  */
    3449             : Datum
    3450      647668 : name_text(PG_FUNCTION_ARGS)
    3451             : {
    3452      647668 :     Name        s = PG_GETARG_NAME(0);
    3453             : 
    3454      647668 :     PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
    3455             : }
    3456             : 
    3457             : 
    3458             : /*
    3459             :  * textToQualifiedNameList - convert a text object to list of names
    3460             :  *
    3461             :  * This implements the input parsing needed by nextval() and other
    3462             :  * functions that take a text parameter representing a qualified name.
    3463             :  * We split the name at dots, downcase if not double-quoted, and
    3464             :  * truncate names if they're too long.
    3465             :  */
    3466             : List *
    3467        5408 : textToQualifiedNameList(text *textval)
    3468             : {
    3469             :     char       *rawname;
    3470        5408 :     List       *result = NIL;
    3471             :     List       *namelist;
    3472             :     ListCell   *l;
    3473             : 
    3474             :     /* Convert to C string (handles possible detoasting). */
    3475             :     /* Note we rely on being able to modify rawname below. */
    3476        5408 :     rawname = text_to_cstring(textval);
    3477             : 
    3478        5408 :     if (!SplitIdentifierString(rawname, '.', &namelist))
    3479           0 :         ereport(ERROR,
    3480             :                 (errcode(ERRCODE_INVALID_NAME),
    3481             :                  errmsg("invalid name syntax")));
    3482             : 
    3483        5408 :     if (namelist == NIL)
    3484           0 :         ereport(ERROR,
    3485             :                 (errcode(ERRCODE_INVALID_NAME),
    3486             :                  errmsg("invalid name syntax")));
    3487             : 
    3488       10926 :     foreach(l, namelist)
    3489             :     {
    3490        5518 :         char       *curname = (char *) lfirst(l);
    3491             : 
    3492        5518 :         result = lappend(result, makeString(pstrdup(curname)));
    3493             :     }
    3494             : 
    3495        5408 :     pfree(rawname);
    3496        5408 :     list_free(namelist);
    3497             : 
    3498        5408 :     return result;
    3499             : }
    3500             : 
    3501             : /*
    3502             :  * SplitIdentifierString --- parse a string containing identifiers
    3503             :  *
    3504             :  * This is the guts of textToQualifiedNameList, and is exported for use in
    3505             :  * other situations such as parsing GUC variables.  In the GUC case, it's
    3506             :  * important to avoid memory leaks, so the API is designed to minimize the
    3507             :  * amount of stuff that needs to be allocated and freed.
    3508             :  *
    3509             :  * Inputs:
    3510             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3511             :  *             been modified to contain the separated identifiers.
    3512             :  *  separator: the separator punctuation expected between identifiers
    3513             :  *             (typically '.' or ',').  Whitespace may also appear around
    3514             :  *             identifiers.
    3515             :  * Outputs:
    3516             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3517             :  *            rawstring.  Caller should list_free() this even on error return.
    3518             :  *
    3519             :  * Returns true if okay, false if there is a syntax error in the string.
    3520             :  *
    3521             :  * Note that an empty string is considered okay here, though not in
    3522             :  * textToQualifiedNameList.
    3523             :  */
    3524             : bool
    3525      253140 : SplitIdentifierString(char *rawstring, char separator,
    3526             :                       List **namelist)
    3527             : {
    3528      253140 :     char       *nextp = rawstring;
    3529      253140 :     bool        done = false;
    3530             : 
    3531      253140 :     *namelist = NIL;
    3532             : 
    3533      253146 :     while (scanner_isspace(*nextp))
    3534           6 :         nextp++;                /* skip leading whitespace */
    3535             : 
    3536      253140 :     if (*nextp == '\0')
    3537       26898 :         return true;            /* allow empty string */
    3538             : 
    3539             :     /* At the top of the loop, we are at start of a new identifier. */
    3540             :     do
    3541             :     {
    3542             :         char       *curname;
    3543             :         char       *endp;
    3544             : 
    3545      410712 :         if (*nextp == '"')
    3546             :         {
    3547             :             /* Quoted name --- collapse quote-quote pairs, no downcasing */
    3548       37896 :             curname = nextp + 1;
    3549             :             for (;;)
    3550             :             {
    3551       37900 :                 endp = strchr(nextp + 1, '"');
    3552       37898 :                 if (endp == NULL)
    3553           0 :                     return false;   /* mismatched quotes */
    3554       37898 :                 if (endp[1] != '"')
    3555       37896 :                     break;      /* found end of quoted name */
    3556             :                 /* Collapse adjacent quotes into one quote, and look again */
    3557           2 :                 memmove(endp, endp + 1, strlen(endp));
    3558           2 :                 nextp = endp;
    3559             :             }
    3560             :             /* endp now points at the terminating quote */
    3561       37896 :             nextp = endp + 1;
    3562             :         }
    3563             :         else
    3564             :         {
    3565             :             /* Unquoted name --- extends to separator or whitespace */
    3566             :             char       *downname;
    3567             :             int         len;
    3568             : 
    3569      372816 :             curname = nextp;
    3570     3352576 :             while (*nextp && *nextp != separator &&
    3571     2979762 :                    !scanner_isspace(*nextp))
    3572     2979760 :                 nextp++;
    3573      372816 :             endp = nextp;
    3574      372816 :             if (curname == nextp)
    3575           0 :                 return false;   /* empty unquoted name not allowed */
    3576             : 
    3577             :             /*
    3578             :              * Downcase the identifier, using same code as main lexer does.
    3579             :              *
    3580             :              * XXX because we want to overwrite the input in-place, we cannot
    3581             :              * support a downcasing transformation that increases the string
    3582             :              * length.  This is not a problem given the current implementation
    3583             :              * of downcase_truncate_identifier, but we'll probably have to do
    3584             :              * something about this someday.
    3585             :              */
    3586      372816 :             len = endp - curname;
    3587      372816 :             downname = downcase_truncate_identifier(curname, len, false);
    3588             :             Assert(strlen(downname) <= len);
    3589      372816 :             strncpy(curname, downname, len);    /* strncpy is required here */
    3590      372816 :             pfree(downname);
    3591             :         }
    3592             : 
    3593      410714 :         while (scanner_isspace(*nextp))
    3594           2 :             nextp++;            /* skip trailing whitespace */
    3595             : 
    3596      410712 :         if (*nextp == separator)
    3597             :         {
    3598      184470 :             nextp++;
    3599      344876 :             while (scanner_isspace(*nextp))
    3600      160406 :                 nextp++;        /* skip leading whitespace for next */
    3601             :             /* we expect another name, so done remains false */
    3602             :         }
    3603      226242 :         else if (*nextp == '\0')
    3604      226240 :             done = true;
    3605             :         else
    3606           2 :             return false;       /* invalid syntax */
    3607             : 
    3608             :         /* Now safe to overwrite separator with a null */
    3609      410710 :         *endp = '\0';
    3610             : 
    3611             :         /* Truncate name if it's overlength */
    3612      410710 :         truncate_identifier(curname, strlen(curname), false);
    3613             : 
    3614             :         /*
    3615             :          * Finished isolating current name --- add it to list
    3616             :          */
    3617      410710 :         *namelist = lappend(*namelist, curname);
    3618             : 
    3619             :         /* Loop back if we didn't reach end of string */
    3620      410710 :     } while (!done);
    3621             : 
    3622      226240 :     return true;
    3623             : }
    3624             : 
    3625             : 
    3626             : /*
    3627             :  * SplitDirectoriesString --- parse a string containing file/directory names
    3628             :  *
    3629             :  * This works fine on file names too; the function name is historical.
    3630             :  *
    3631             :  * This is similar to SplitIdentifierString, except that the parsing
    3632             :  * rules are meant to handle pathnames instead of identifiers: there is
    3633             :  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
    3634             :  * and we apply canonicalize_path() to each extracted string.  Because of the
    3635             :  * last, the returned strings are separately palloc'd rather than being
    3636             :  * pointers into rawstring --- but we still scribble on rawstring.
    3637             :  *
    3638             :  * Inputs:
    3639             :  *  rawstring: the input string; must be modifiable!
    3640             :  *  separator: the separator punctuation expected between directories
    3641             :  *             (typically ',' or ';').  Whitespace may also appear around
    3642             :  *             directories.
    3643             :  * Outputs:
    3644             :  *  namelist: filled with a palloc'd list of directory names.
    3645             :  *            Caller should list_free_deep() this even on error return.
    3646             :  *
    3647             :  * Returns true if okay, false if there is a syntax error in the string.
    3648             :  *
    3649             :  * Note that an empty string is considered okay here.
    3650             :  */
    3651             : bool
    3652        1718 : SplitDirectoriesString(char *rawstring, char separator,
    3653             :                        List **namelist)
    3654             : {
    3655        1718 :     char       *nextp = rawstring;
    3656        1718 :     bool        done = false;
    3657             : 
    3658        1718 :     *namelist = NIL;
    3659             : 
    3660        1718 :     while (scanner_isspace(*nextp))
    3661           0 :         nextp++;                /* skip leading whitespace */
    3662             : 
    3663        1718 :     if (*nextp == '\0')
    3664           2 :         return true;            /* allow empty string */
    3665             : 
    3666             :     /* At the top of the loop, we are at start of a new directory. */
    3667             :     do
    3668             :     {
    3669             :         char       *curname;
    3670             :         char       *endp;
    3671             : 
    3672        1718 :         if (*nextp == '"')
    3673             :         {
    3674             :             /* Quoted name --- collapse quote-quote pairs */
    3675           0 :             curname = nextp + 1;
    3676             :             for (;;)
    3677             :             {
    3678           0 :                 endp = strchr(nextp + 1, '"');
    3679           0 :                 if (endp == NULL)
    3680           0 :                     return false;   /* mismatched quotes */
    3681           0 :                 if (endp[1] != '"')
    3682           0 :                     break;      /* found end of quoted name */
    3683             :                 /* Collapse adjacent quotes into one quote, and look again */
    3684           0 :                 memmove(endp, endp + 1, strlen(endp));
    3685           0 :                 nextp = endp;
    3686             :             }
    3687             :             /* endp now points at the terminating quote */
    3688           0 :             nextp = endp + 1;
    3689             :         }
    3690             :         else
    3691             :         {
    3692             :             /* Unquoted name --- extends to separator or end of string */
    3693        1718 :             curname = endp = nextp;
    3694       28736 :             while (*nextp && *nextp != separator)
    3695             :             {
    3696             :                 /* trailing whitespace should not be included in name */
    3697       27018 :                 if (!scanner_isspace(*nextp))
    3698       27018 :                     endp = nextp + 1;
    3699       27018 :                 nextp++;
    3700             :             }
    3701        1718 :             if (curname == endp)
    3702           0 :                 return false;   /* empty unquoted name not allowed */
    3703             :         }
    3704             : 
    3705        1718 :         while (scanner_isspace(*nextp))
    3706           0 :             nextp++;            /* skip trailing whitespace */
    3707             : 
    3708        1718 :         if (*nextp == separator)
    3709             :         {
    3710           2 :             nextp++;
    3711           2 :             while (scanner_isspace(*nextp))
    3712           0 :                 nextp++;        /* skip leading whitespace for next */
    3713             :             /* we expect another name, so done remains false */
    3714             :         }
    3715        1716 :         else if (*nextp == '\0')
    3716        1716 :             done = true;
    3717             :         else
    3718           0 :             return false;       /* invalid syntax */
    3719             : 
    3720             :         /* Now safe to overwrite separator with a null */
    3721        1718 :         *endp = '\0';
    3722             : 
    3723             :         /* Truncate path if it's overlength */
    3724        1718 :         if (strlen(curname) >= MAXPGPATH)
    3725           0 :             curname[MAXPGPATH - 1] = '\0';
    3726             : 
    3727             :         /*
    3728             :          * Finished isolating current name --- add it to list
    3729             :          */
    3730        1718 :         curname = pstrdup(curname);
    3731        1718 :         canonicalize_path(curname);
    3732        1718 :         *namelist = lappend(*namelist, curname);
    3733             : 
    3734             :         /* Loop back if we didn't reach end of string */
    3735        1718 :     } while (!done);
    3736             : 
    3737        1716 :     return true;
    3738             : }
    3739             : 
    3740             : 
    3741             : /*
    3742             :  * SplitGUCList --- parse a string containing identifiers or file names
    3743             :  *
    3744             :  * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
    3745             :  * presuming whether the elements will be taken as identifiers or file names.
    3746             :  * We assume the input has already been through flatten_set_variable_args(),
    3747             :  * so that we need never downcase (if appropriate, that was done already).
    3748             :  * Nor do we ever truncate, since we don't know the correct max length.
    3749             :  * We disallow embedded whitespace for simplicity (it shouldn't matter,
    3750             :  * because any embedded whitespace should have led to double-quoting).
    3751             :  * Otherwise the API is identical to SplitIdentifierString.
    3752             :  *
    3753             :  * XXX it's annoying to have so many copies of this string-splitting logic.
    3754             :  * However, it's not clear that having one function with a bunch of option
    3755             :  * flags would be much better.
    3756             :  *
    3757             :  * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
    3758             :  * Be sure to update that if you have to change this.
    3759             :  *
    3760             :  * Inputs:
    3761             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3762             :  *             been modified to contain the separated identifiers.
    3763             :  *  separator: the separator punctuation expected between identifiers
    3764             :  *             (typically '.' or ',').  Whitespace may also appear around
    3765             :  *             identifiers.
    3766             :  * Outputs:
    3767             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3768             :  *            rawstring.  Caller should list_free() this even on error return.
    3769             :  *
    3770             :  * Returns true if okay, false if there is a syntax error in the string.
    3771             :  */
    3772             : bool
    3773        3758 : SplitGUCList(char *rawstring, char separator,
    3774             :              List **namelist)
    3775             : {
    3776        3758 :     char       *nextp = rawstring;
    3777        3758 :     bool        done = false;
    3778             : 
    3779        3758 :     *namelist = NIL;
    3780             : 
    3781        3758 :     while (scanner_isspace(*nextp))
    3782           0 :         nextp++;                /* skip leading whitespace */
    3783             : 
    3784        3758 :     if (*nextp == '\0')
    3785        3684 :         return true;            /* allow empty string */
    3786             : 
    3787             :     /* At the top of the loop, we are at start of a new identifier. */
    3788             :     do
    3789             :     {
    3790             :         char       *curname;
    3791             :         char       *endp;
    3792             : 
    3793         100 :         if (*nextp == '"')
    3794             :         {
    3795             :             /* Quoted name --- collapse quote-quote pairs */
    3796          24 :             curname = nextp + 1;
    3797             :             for (;;)
    3798             :             {
    3799          36 :                 endp = strchr(nextp + 1, '"');
    3800          30 :                 if (endp == NULL)
    3801           0 :                     return false;   /* mismatched quotes */
    3802          30 :                 if (endp[1] != '"')
    3803          24 :                     break;      /* found end of quoted name */
    3804             :                 /* Collapse adjacent quotes into one quote, and look again */
    3805           6 :                 memmove(endp, endp + 1, strlen(endp));
    3806           6 :                 nextp = endp;
    3807             :             }
    3808             :             /* endp now points at the terminating quote */
    3809          24 :             nextp = endp + 1;
    3810             :         }
    3811             :         else
    3812             :         {
    3813             :             /* Unquoted name --- extends to separator or whitespace */
    3814          76 :             curname = nextp;
    3815         718 :             while (*nextp && *nextp != separator &&
    3816         642 :                    !scanner_isspace(*nextp))
    3817         642 :                 nextp++;
    3818          76 :             endp = nextp;
    3819          76 :             if (curname == nextp)
    3820           0 :                 return false;   /* empty unquoted name not allowed */
    3821             :         }
    3822             : 
    3823         100 :         while (scanner_isspace(*nextp))
    3824           0 :             nextp++;            /* skip trailing whitespace */
    3825             : 
    3826         100 :         if (*nextp == separator)
    3827             :         {
    3828          26 :             nextp++;
    3829          44 :             while (scanner_isspace(*nextp))
    3830          18 :                 nextp++;        /* skip leading whitespace for next */
    3831             :             /* we expect another name, so done remains false */
    3832             :         }
    3833          74 :         else if (*nextp == '\0')
    3834          74 :             done = true;
    3835             :         else
    3836           0 :             return false;       /* invalid syntax */
    3837             : 
    3838             :         /* Now safe to overwrite separator with a null */
    3839         100 :         *endp = '\0';
    3840             : 
    3841             :         /*
    3842             :          * Finished isolating current name --- add it to list
    3843             :          */
    3844         100 :         *namelist = lappend(*namelist, curname);
    3845             : 
    3846             :         /* Loop back if we didn't reach end of string */
    3847         100 :     } while (!done);
    3848             : 
    3849          74 :     return true;
    3850             : }
    3851             : 
    3852             : 
    3853             : /*****************************************************************************
    3854             :  *  Comparison Functions used for bytea
    3855             :  *
    3856             :  * Note: btree indexes need these routines not to leak memory; therefore,
    3857             :  * be careful to free working copies of toasted datums.  Most places don't
    3858             :  * need to be so careful.
    3859             :  *****************************************************************************/
    3860             : 
    3861             : Datum
    3862       10390 : byteaeq(PG_FUNCTION_ARGS)
    3863             : {
    3864       10390 :     Datum       arg1 = PG_GETARG_DATUM(0);
    3865       10390 :     Datum       arg2 = PG_GETARG_DATUM(1);
    3866             :     bool        result;
    3867             :     Size        len1,
    3868             :                 len2;
    3869             : 
    3870             :     /*
    3871             :      * We can use a fast path for unequal lengths, which might save us from
    3872             :      * having to detoast one or both values.
    3873             :      */
    3874       10390 :     len1 = toast_raw_datum_size(arg1);
    3875       10390 :     len2 = toast_raw_datum_size(arg2);
    3876       10390 :     if (len1 != len2)
    3877        4316 :         result = false;
    3878             :     else
    3879             :     {
    3880        6074 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    3881        6074 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    3882             : 
    3883        6074 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    3884             :                          len1 - VARHDRSZ) == 0);
    3885             : 
    3886        6074 :         PG_FREE_IF_COPY(barg1, 0);
    3887        6074 :         PG_FREE_IF_COPY(barg2, 1);
    3888             :     }
    3889             : 
    3890       10390 :     PG_RETURN_BOOL(result);
    3891             : }
    3892             : 
    3893             : Datum
    3894         768 : byteane(PG_FUNCTION_ARGS)
    3895             : {
    3896         768 :     Datum       arg1 = PG_GETARG_DATUM(0);
    3897         768 :     Datum       arg2 = PG_GETARG_DATUM(1);
    3898             :     bool        result;
    3899             :     Size        len1,
    3900             :                 len2;
    3901             : 
    3902             :     /*
    3903             :      * We can use a fast path for unequal lengths, which might save us from
    3904             :      * having to detoast one or both values.
    3905             :      */
    3906         768 :     len1 = toast_raw_datum_size(arg1);
    3907         768 :     len2 = toast_raw_datum_size(arg2);
    3908         768 :     if (len1 != len2)
    3909           0 :         result = true;
    3910             :     else
    3911             :     {
    3912         768 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    3913         768 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    3914             : 
    3915         768 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    3916             :                          len1 - VARHDRSZ) != 0);
    3917             : 
    3918         768 :         PG_FREE_IF_COPY(barg1, 0);
    3919         768 :         PG_FREE_IF_COPY(barg2, 1);
    3920             :     }
    3921             : 
    3922         768 :     PG_RETURN_BOOL(result);
    3923             : }
    3924             : 
    3925             : Datum
    3926        8318 : bytealt(PG_FUNCTION_ARGS)
    3927             : {
    3928        8318 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3929        8318 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3930             :     int         len1,
    3931             :                 len2;
    3932             :     int         cmp;
    3933             : 
    3934        8318 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3935        8318 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3936             : 
    3937        8318 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3938             : 
    3939        8318 :     PG_FREE_IF_COPY(arg1, 0);
    3940        8318 :     PG_FREE_IF_COPY(arg2, 1);
    3941             : 
    3942        8318 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
    3943             : }
    3944             : 
    3945             : Datum
    3946        6356 : byteale(PG_FUNCTION_ARGS)
    3947             : {
    3948        6356 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3949        6356 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3950             :     int         len1,
    3951             :                 len2;
    3952             :     int         cmp;
    3953             : 
    3954        6356 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3955        6356 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3956             : 
    3957        6356 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3958             : 
    3959        6356 :     PG_FREE_IF_COPY(arg1, 0);
    3960        6356 :     PG_FREE_IF_COPY(arg2, 1);
    3961             : 
    3962        6356 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
    3963             : }
    3964             : 
    3965             : Datum
    3966        6228 : byteagt(PG_FUNCTION_ARGS)
    3967             : {
    3968        6228 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3969        6228 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3970             :     int         len1,
    3971             :                 len2;
    3972             :     int         cmp;
    3973             : 
    3974        6228 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3975        6228 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3976             : 
    3977        6228 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3978             : 
    3979        6228 :     PG_FREE_IF_COPY(arg1, 0);
    3980        6228 :     PG_FREE_IF_COPY(arg2, 1);
    3981             : 
    3982        6228 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
    3983             : }
    3984             : 
    3985             : Datum
    3986        5010 : byteage(PG_FUNCTION_ARGS)
    3987             : {
    3988        5010 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3989        5010 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3990             :     int         len1,
    3991             :                 len2;
    3992             :     int         cmp;
    3993             : 
    3994        5010 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3995        5010 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3996             : 
    3997        5010 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3998             : 
    3999        5010 :     PG_FREE_IF_COPY(arg1, 0);
    4000        5010 :     PG_FREE_IF_COPY(arg2, 1);
    4001             : 
    4002        5010 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
    4003             : }
    4004             : 
    4005             : Datum
    4006       87420 : byteacmp(PG_FUNCTION_ARGS)
    4007             : {
    4008       87420 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4009       87420 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4010             :     int         len1,
    4011             :                 len2;
    4012             :     int         cmp;
    4013             : 
    4014       87420 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4015       87420 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4016             : 
    4017       87420 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4018       87420 :     if ((cmp == 0) && (len1 != len2))
    4019       14576 :         cmp = (len1 < len2) ? -1 : 1;
    4020             : 
    4021       87420 :     PG_FREE_IF_COPY(arg1, 0);
    4022       87420 :     PG_FREE_IF_COPY(arg2, 1);
    4023             : 
    4024       87420 :     PG_RETURN_INT32(cmp);
    4025             : }
    4026             : 
    4027             : Datum
    4028          24 : bytea_larger(PG_FUNCTION_ARGS)
    4029             : {
    4030          24 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4031          24 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4032             :     bytea      *result;
    4033             :     int         len1,
    4034             :                 len2;
    4035             :     int         cmp;
    4036             : 
    4037          24 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4038          24 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4039             : 
    4040          24 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4041          24 :     result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
    4042             : 
    4043          24 :     PG_RETURN_BYTEA_P(result);
    4044             : }
    4045             : 
    4046             : Datum
    4047          24 : bytea_smaller(PG_FUNCTION_ARGS)
    4048             : {
    4049          24 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4050          24 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4051             :     bytea      *result;
    4052             :     int         len1,
    4053             :                 len2;
    4054             :     int         cmp;
    4055             : 
    4056          24 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4057          24 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4058             : 
    4059          24 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4060          24 :     result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
    4061             : 
    4062          24 :     PG_RETURN_BYTEA_P(result);
    4063             : }
    4064             : 
    4065             : Datum
    4066          30 : bytea_sortsupport(PG_FUNCTION_ARGS)
    4067             : {
    4068          30 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    4069             :     MemoryContext oldcontext;
    4070             : 
    4071          30 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    4072             : 
    4073             :     /* Use generic string SortSupport, forcing "C" collation */
    4074          30 :     varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
    4075             : 
    4076          30 :     MemoryContextSwitchTo(oldcontext);
    4077             : 
    4078          30 :     PG_RETURN_VOID();
    4079             : }
    4080             : 
    4081             : /* Cast bytea -> int2 */
    4082             : Datum
    4083          36 : bytea_int2(PG_FUNCTION_ARGS)
    4084             : {
    4085          36 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    4086          36 :     int         len = VARSIZE_ANY_EXHDR(v);
    4087             :     uint16      result;
    4088             : 
    4089             :     /* Check that the byte array is not too long */
    4090          36 :     if (len > sizeof(result))
    4091           6 :         ereport(ERROR,
    4092             :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    4093             :                 errmsg("smallint out of range"));
    4094             : 
    4095             :     /* Convert it to an integer; most significant bytes come first */
    4096          30 :     result = 0;
    4097          72 :     for (int i = 0; i < len; i++)
    4098             :     {
    4099          42 :         result <<= BITS_PER_BYTE;
    4100          42 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    4101             :     }
    4102             : 
    4103          30 :     PG_RETURN_INT16(result);
    4104             : }
    4105             : 
    4106             : /* Cast bytea -> int4 */
    4107             : Datum
    4108          36 : bytea_int4(PG_FUNCTION_ARGS)
    4109             : {
    4110          36 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    4111          36 :     int         len = VARSIZE_ANY_EXHDR(v);
    4112             :     uint32      result;
    4113             : 
    4114             :     /* Check that the byte array is not too long */
    4115          36 :     if (len > sizeof(result))
    4116           6 :         ereport(ERROR,
    4117             :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    4118             :                 errmsg("integer out of range"));
    4119             : 
    4120             :     /* Convert it to an integer; most significant bytes come first */
    4121          30 :     result = 0;
    4122         108 :     for (int i = 0; i < len; i++)
    4123             :     {
    4124          78 :         result <<= BITS_PER_BYTE;
    4125          78 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    4126             :     }
    4127             : 
    4128          30 :     PG_RETURN_INT32(result);
    4129             : }
    4130             : 
    4131             : /* Cast bytea -> int8 */
    4132             : Datum
    4133          36 : bytea_int8(PG_FUNCTION_ARGS)
    4134             : {
    4135          36 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    4136          36 :     int         len = VARSIZE_ANY_EXHDR(v);
    4137             :     uint64      result;
    4138             : 
    4139             :     /* Check that the byte array is not too long */
    4140          36 :     if (len > sizeof(result))
    4141           6 :         ereport(ERROR,
    4142             :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    4143             :                 errmsg("bigint out of range"));
    4144             : 
    4145             :     /* Convert it to an integer; most significant bytes come first */
    4146          30 :     result = 0;
    4147         180 :     for (int i = 0; i < len; i++)
    4148             :     {
    4149         150 :         result <<= BITS_PER_BYTE;
    4150         150 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    4151             :     }
    4152             : 
    4153          30 :     PG_RETURN_INT64(result);
    4154             : }
    4155             : 
    4156             : /* Cast int2 -> bytea; can just use int2send() */
    4157             : Datum
    4158          12 : int2_bytea(PG_FUNCTION_ARGS)
    4159             : {
    4160          12 :     return int2send(fcinfo);
    4161             : }
    4162             : 
    4163             : /* Cast int4 -> bytea; can just use int4send() */
    4164             : Datum
    4165          12 : int4_bytea(PG_FUNCTION_ARGS)
    4166             : {
    4167          12 :     return int4send(fcinfo);
    4168             : }
    4169             : 
    4170             : /* Cast int8 -> bytea; can just use int8send() */
    4171             : Datum
    4172          12 : int8_bytea(PG_FUNCTION_ARGS)
    4173             : {
    4174          12 :     return int8send(fcinfo);
    4175             : }
    4176             : 
    4177             : /*
    4178             :  * appendStringInfoText
    4179             :  *
    4180             :  * Append a text to str.
    4181             :  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
    4182             :  */
    4183             : static void
    4184     1706226 : appendStringInfoText(StringInfo str, const text *t)
    4185             : {
    4186     1706226 :     appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
    4187     1706226 : }
    4188             : 
    4189             : /*
    4190             :  * replace_text
    4191             :  * replace all occurrences of 'old_sub_str' in 'orig_str'
    4192             :  * with 'new_sub_str' to form 'new_str'
    4193             :  *
    4194             :  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
    4195             :  * otherwise returns 'new_str'
    4196             :  */
    4197             : Datum
    4198        1332 : replace_text(PG_FUNCTION_ARGS)
    4199             : {
    4200        1332 :     text       *src_text = PG_GETARG_TEXT_PP(0);
    4201        1332 :     text       *from_sub_text = PG_GETARG_TEXT_PP(1);
    4202        1332 :     text       *to_sub_text = PG_GETARG_TEXT_PP(2);
    4203             :     int         src_text_len;
    4204             :     int         from_sub_text_len;
    4205             :     TextPositionState state;
    4206             :     text       *ret_text;
    4207             :     int         chunk_len;
    4208             :     char       *curr_ptr;
    4209             :     char       *start_ptr;
    4210             :     StringInfoData str;
    4211             :     bool        found;
    4212             : 
    4213        1332 :     src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4214        1332 :     from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
    4215             : 
    4216             :     /* Return unmodified source string if empty source or pattern */
    4217        1332 :     if (src_text_len < 1 || from_sub_text_len < 1)
    4218             :     {
    4219           0 :         PG_RETURN_TEXT_P(src_text);
    4220             :     }
    4221             : 
    4222        1332 :     text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
    4223             : 
    4224        1332 :     found = text_position_next(&state);
    4225             : 
    4226             :     /* When the from_sub_text is not found, there is nothing to do. */
    4227        1332 :     if (!found)
    4228             :     {
    4229         298 :         text_position_cleanup(&state);
    4230         298 :         PG_RETURN_TEXT_P(src_text);
    4231             :     }
    4232        1034 :     curr_ptr = text_position_get_match_ptr(&state);
    4233        1034 :     start_ptr = VARDATA_ANY(src_text);
    4234             : 
    4235        1034 :     initStringInfo(&str);
    4236             : 
    4237             :     do
    4238             :     {
    4239        5314 :         CHECK_FOR_INTERRUPTS();
    4240             : 
    4241             :         /* copy the data skipped over by last text_position_next() */
    4242        5314 :         chunk_len = curr_ptr - start_ptr;
    4243        5314 :         appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4244             : 
    4245        5314 :         appendStringInfoText(&str, to_sub_text);
    4246             : 
    4247        5314 :         start_ptr = curr_ptr + state.last_match_len;
    4248             : 
    4249        5314 :         found = text_position_next(&state);
    4250        5314 :         if (found)
    4251        4280 :             curr_ptr = text_position_get_match_ptr(&state);
    4252             :     }
    4253        5314 :     while (found);
    4254             : 
    4255             :     /* copy trailing data */
    4256        1034 :     chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4257        1034 :     appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4258             : 
    4259        1034 :     text_position_cleanup(&state);
    4260             : 
    4261        1034 :     ret_text = cstring_to_text_with_len(str.data, str.len);
    4262        1034 :     pfree(str.data);
    4263             : 
    4264        1034 :     PG_RETURN_TEXT_P(ret_text);
    4265             : }
    4266             : 
    4267             : /*
    4268             :  * check_replace_text_has_escape
    4269             :  *
    4270             :  * Returns 0 if text contains no backslashes that need processing.
    4271             :  * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
    4272             :  * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
    4273             :  */
    4274             : static int
    4275       16790 : check_replace_text_has_escape(const text *replace_text)
    4276             : {
    4277       16790 :     int         result = 0;
    4278       16790 :     const char *p = VARDATA_ANY(replace_text);
    4279       16790 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4280             : 
    4281       16834 :     while (p < p_end)
    4282             :     {
    4283             :         /* Find next escape char, if any. */
    4284       15768 :         p = memchr(p, '\\', p_end - p);
    4285       15768 :         if (p == NULL)
    4286       14946 :             break;
    4287         822 :         p++;
    4288             :         /* Note: a backslash at the end doesn't require extra processing. */
    4289         822 :         if (p < p_end)
    4290             :         {
    4291         822 :             if (*p >= '1' && *p <= '9')
    4292         778 :                 return 2;       /* Found a submatch specifier, so done */
    4293          44 :             result = 1;         /* Found some other sequence, keep looking */
    4294          44 :             p++;
    4295             :         }
    4296             :     }
    4297       16012 :     return result;
    4298             : }
    4299             : 
    4300             : /*
    4301             :  * appendStringInfoRegexpSubstr
    4302             :  *
    4303             :  * Append replace_text to str, substituting regexp back references for
    4304             :  * \n escapes.  start_ptr is the start of the match in the source string,
    4305             :  * at logical character position data_pos.
    4306             :  */
    4307             : static void
    4308         236 : appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
    4309             :                              regmatch_t *pmatch,
    4310             :                              char *start_ptr, int data_pos)
    4311             : {
    4312         236 :     const char *p = VARDATA_ANY(replace_text);
    4313         236 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4314             : 
    4315         574 :     while (p < p_end)
    4316             :     {
    4317         518 :         const char *chunk_start = p;
    4318             :         int         so;
    4319             :         int         eo;
    4320             : 
    4321             :         /* Find next escape char, if any. */
    4322         518 :         p = memchr(p, '\\', p_end - p);
    4323         518 :         if (p == NULL)
    4324         174 :             p = p_end;
    4325             : 
    4326             :         /* Copy the text we just scanned over, if any. */
    4327         518 :         if (p > chunk_start)
    4328         318 :             appendBinaryStringInfo(str, chunk_start, p - chunk_start);
    4329             : 
    4330             :         /* Done if at end of string, else advance over escape char. */
    4331         518 :         if (p >= p_end)
    4332         174 :             break;
    4333         344 :         p++;
    4334             : 
    4335         344 :         if (p >= p_end)
    4336             :         {
    4337             :             /* Escape at very end of input.  Treat same as unexpected char */
    4338           6 :             appendStringInfoChar(str, '\\');
    4339           6 :             break;
    4340             :         }
    4341             : 
    4342         338 :         if (*p >= '1' && *p <= '9')
    4343         278 :         {
    4344             :             /* Use the back reference of regexp. */
    4345         278 :             int         idx = *p - '0';
    4346             : 
    4347         278 :             so = pmatch[idx].rm_so;
    4348         278 :             eo = pmatch[idx].rm_eo;
    4349         278 :             p++;
    4350             :         }
    4351          60 :         else if (*p == '&')
    4352             :         {
    4353             :             /* Use the entire matched string. */
    4354          18 :             so = pmatch[0].rm_so;
    4355          18 :             eo = pmatch[0].rm_eo;
    4356          18 :             p++;
    4357             :         }
    4358          42 :         else if (*p == '\\')
    4359             :         {
    4360             :             /* \\ means transfer one \ to output. */
    4361          36 :             appendStringInfoChar(str, '\\');
    4362          36 :             p++;
    4363          36 :             continue;
    4364             :         }
    4365             :         else
    4366             :         {
    4367             :             /*
    4368             :              * If escape char is not followed by any expected char, just treat
    4369             :              * it as ordinary data to copy.  (XXX would it be better to throw
    4370             :              * an error?)
    4371             :              */
    4372           6 :             appendStringInfoChar(str, '\\');
    4373           6 :             continue;
    4374             :         }
    4375             : 
    4376         296 :         if (so >= 0 && eo >= 0)
    4377             :         {
    4378             :             /*
    4379             :              * Copy the text that is back reference of regexp.  Note so and eo
    4380             :              * are counted in characters not bytes.
    4381             :              */
    4382             :             char       *chunk_start;
    4383             :             int         chunk_len;
    4384             : 
    4385             :             Assert(so >= data_pos);
    4386         296 :             chunk_start = start_ptr;
    4387         296 :             chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
    4388         296 :             chunk_len = charlen_to_bytelen(chunk_start, eo - so);
    4389         296 :             appendBinaryStringInfo(str, chunk_start, chunk_len);
    4390             :         }
    4391             :     }
    4392         236 : }
    4393             : 
    4394             : /*
    4395             :  * replace_text_regexp
    4396             :  *
    4397             :  * replace substring(s) in src_text that match pattern with replace_text.
    4398             :  * The replace_text can contain backslash markers to substitute
    4399             :  * (parts of) the matched text.
    4400             :  *
    4401             :  * cflags: regexp compile flags.
    4402             :  * collation: collation to use.
    4403             :  * search_start: the character (not byte) offset in src_text at which to
    4404             :  * begin searching.
    4405             :  * n: if 0, replace all matches; if > 0, replace only the N'th match.
    4406             :  */
    4407             : text *
    4408       16790 : replace_text_regexp(text *src_text, text *pattern_text,
    4409             :                     text *replace_text,
    4410             :                     int cflags, Oid collation,
    4411             :                     int search_start, int n)
    4412             : {
    4413             :     text       *ret_text;
    4414             :     regex_t    *re;
    4415       16790 :     int         src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4416       16790 :     int         nmatches = 0;
    4417             :     StringInfoData buf;
    4418             :     regmatch_t  pmatch[10];     /* main match, plus \1 to \9 */
    4419       16790 :     int         nmatch = lengthof(pmatch);
    4420             :     pg_wchar   *data;
    4421             :     size_t      data_len;
    4422             :     int         data_pos;
    4423             :     char       *start_ptr;
    4424             :     int         escape_status;
    4425             : 
    4426       16790 :     initStringInfo(&buf);
    4427             : 
    4428             :     /* Convert data string to wide characters. */
    4429       16790 :     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
    4430       16790 :     data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
    4431             : 
    4432             :     /* Check whether replace_text has escapes, especially regexp submatches. */
    4433       16790 :     escape_status = check_replace_text_has_escape(replace_text);
    4434             : 
    4435             :     /* If no regexp submatches, we can use REG_NOSUB. */
    4436       16790 :     if (escape_status < 2)
    4437             :     {
    4438       16012 :         cflags |= REG_NOSUB;
    4439             :         /* Also tell pg_regexec we only want the whole-match location. */
    4440       16012 :         nmatch = 1;
    4441             :     }
    4442             : 
    4443             :     /* Prepare the regexp. */
    4444       16790 :     re = RE_compile_and_cache(pattern_text, cflags, collation);
    4445             : 
    4446             :     /* start_ptr points to the data_pos'th character of src_text */
    4447       16790 :     start_ptr = (char *) VARDATA_ANY(src_text);
    4448       16790 :     data_pos = 0;
    4449             : 
    4450       23108 :     while (search_start <= data_len)
    4451             :     {
    4452             :         int         regexec_result;
    4453             : 
    4454       23102 :         CHECK_FOR_INTERRUPTS();
    4455             : 
    4456       23102 :         regexec_result = pg_regexec(re,
    4457             :                                     data,
    4458             :                                     data_len,
    4459             :                                     search_start,
    4460             :                                     NULL,   /* no details */
    4461             :                                     nmatch,
    4462             :                                     pmatch,
    4463             :                                     0);
    4464             : 
    4465       23102 :         if (regexec_result == REG_NOMATCH)
    4466       14958 :             break;
    4467             : 
    4468        8144 :         if (regexec_result != REG_OKAY)
    4469             :         {
    4470             :             char        errMsg[100];
    4471             : 
    4472           0 :             pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
    4473           0 :             ereport(ERROR,
    4474             :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
    4475             :                      errmsg("regular expression failed: %s", errMsg)));
    4476             :         }
    4477             : 
    4478             :         /*
    4479             :          * Count matches, and decide whether to replace this match.
    4480             :          */
    4481        8144 :         nmatches++;
    4482        8144 :         if (n > 0 && nmatches != n)
    4483             :         {
    4484             :             /*
    4485             :              * No, so advance search_start, but not start_ptr/data_pos. (Thus,
    4486             :              * we treat the matched text as if it weren't matched, and copy it
    4487             :              * to the output later.)
    4488             :              */
    4489          60 :             search_start = pmatch[0].rm_eo;
    4490          60 :             if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4491           0 :                 search_start++;
    4492          60 :             continue;
    4493             :         }
    4494             : 
    4495             :         /*
    4496             :          * Copy the text to the left of the match position.  Note we are given
    4497             :          * character not byte indexes.
    4498             :          */
    4499        8084 :         if (pmatch[0].rm_so - data_pos > 0)
    4500             :         {
    4501             :             int         chunk_len;
    4502             : 
    4503        7910 :             chunk_len = charlen_to_bytelen(start_ptr,
    4504        7910 :                                            pmatch[0].rm_so - data_pos);
    4505        7910 :             appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4506             : 
    4507             :             /*
    4508             :              * Advance start_ptr over that text, to avoid multiple rescans of
    4509             :              * it if the replace_text contains multiple back-references.
    4510             :              */
    4511        7910 :             start_ptr += chunk_len;
    4512        7910 :             data_pos = pmatch[0].rm_so;
    4513             :         }
    4514             : 
    4515             :         /*
    4516             :          * Copy the replace_text, processing escapes if any are present.
    4517             :          */
    4518        8084 :         if (escape_status > 0)
    4519         236 :             appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
    4520             :                                          start_ptr, data_pos);
    4521             :         else
    4522        7848 :             appendStringInfoText(&buf, replace_text);
    4523             : 
    4524             :         /* Advance start_ptr and data_pos over the matched text. */
    4525       16168 :         start_ptr += charlen_to_bytelen(start_ptr,
    4526        8084 :                                         pmatch[0].rm_eo - data_pos);
    4527        8084 :         data_pos = pmatch[0].rm_eo;
    4528             : 
    4529             :         /*
    4530             :          * If we only want to replace one occurrence, we're done.
    4531             :          */
    4532        8084 :         if (n > 0)
    4533        1826 :             break;
    4534             : 
    4535             :         /*
    4536             :          * Advance search position.  Normally we start the next search at the
    4537             :          * end of the previous match; but if the match was of zero length, we
    4538             :          * have to advance by one character, or we'd just find the same match
    4539             :          * again.
    4540             :          */
    4541        6258 :         search_start = data_pos;
    4542        6258 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4543          12 :             search_start++;
    4544             :     }
    4545             : 
    4546             :     /*
    4547             :      * Copy the text to the right of the last match.
    4548             :      */
    4549       16790 :     if (data_pos < data_len)
    4550             :     {
    4551             :         int         chunk_len;
    4552             : 
    4553       16088 :         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4554       16088 :         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4555             :     }
    4556             : 
    4557       16790 :     ret_text = cstring_to_text_with_len(buf.data, buf.len);
    4558       16790 :     pfree(buf.data);
    4559       16790 :     pfree(data);
    4560             : 
    4561       16790 :     return ret_text;
    4562             : }
    4563             : 
    4564             : /*
    4565             :  * split_part
    4566             :  * parse input string based on provided field separator
    4567             :  * return N'th item (1 based, negative counts from end)
    4568             :  */
    4569             : Datum
    4570         150 : split_part(PG_FUNCTION_ARGS)
    4571             : {
    4572         150 :     text       *inputstring = PG_GETARG_TEXT_PP(0);
    4573         150 :     text       *fldsep = PG_GETARG_TEXT_PP(1);
    4574         150 :     int         fldnum = PG_GETARG_INT32(2);
    4575             :     int         inputstring_len;
    4576             :     int         fldsep_len;
    4577             :     TextPositionState state;
    4578             :     char       *start_ptr;
    4579             :     char       *end_ptr;
    4580             :     text       *result_text;
    4581             :     bool        found;
    4582             : 
    4583             :     /* field number is 1 based */
    4584         150 :     if (fldnum == 0)
    4585           6 :         ereport(ERROR,
    4586             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    4587             :                  errmsg("field position must not be zero")));
    4588             : 
    4589         144 :     inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4590         144 :     fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4591             : 
    4592             :     /* return empty string for empty input string */
    4593         144 :     if (inputstring_len < 1)
    4594          12 :         PG_RETURN_TEXT_P(cstring_to_text(""));
    4595             : 
    4596             :     /* handle empty field separator */
    4597         132 :     if (fldsep_len < 1)
    4598             :     {
    4599             :         /* if first or last field, return input string, else empty string */
    4600          24 :         if (fldnum == 1 || fldnum == -1)
    4601          12 :             PG_RETURN_TEXT_P(inputstring);
    4602             :         else
    4603          12 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4604             :     }
    4605             : 
    4606             :     /* find the first field separator */
    4607         108 :     text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
    4608             : 
    4609         108 :     found = text_position_next(&state);
    4610             : 
    4611             :     /* special case if fldsep not found at all */
    4612         108 :     if (!found)
    4613             :     {
    4614          24 :         text_position_cleanup(&state);
    4615             :         /* if first or last field, return input string, else empty string */
    4616          24 :         if (fldnum == 1 || fldnum == -1)
    4617          12 :             PG_RETURN_TEXT_P(inputstring);
    4618             :         else
    4619          12 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4620             :     }
    4621             : 
    4622             :     /*
    4623             :      * take care of a negative field number (i.e. count from the right) by
    4624             :      * converting to a positive field number; we need total number of fields
    4625             :      */
    4626          84 :     if (fldnum < 0)
    4627             :     {
    4628             :         /* we found a fldsep, so there are at least two fields */
    4629          42 :         int         numfields = 2;
    4630             : 
    4631          54 :         while (text_position_next(&state))
    4632          12 :             numfields++;
    4633             : 
    4634             :         /* special case of last field does not require an extra pass */
    4635          42 :         if (fldnum == -1)
    4636             :         {
    4637          24 :             start_ptr = text_position_get_match_ptr(&state) + state.last_match_len;
    4638          24 :             end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
    4639          24 :             text_position_cleanup(&state);
    4640          24 :             PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
    4641             :                                                       end_ptr - start_ptr));
    4642             :         }
    4643             : 
    4644             :         /* else, convert fldnum to positive notation */
    4645          18 :         fldnum += numfields + 1;
    4646             : 
    4647             :         /* if nonexistent field, return empty string */
    4648          18 :         if (fldnum <= 0)
    4649             :         {
    4650           6 :             text_position_cleanup(&state);
    4651           6 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4652             :         }
    4653             : 
    4654             :         /* reset to pointing at first match, but now with positive fldnum */
    4655          12 :         text_position_reset(&state);
    4656          12 :         found = text_position_next(&state);
    4657             :         Assert(found);
    4658             :     }
    4659             : 
    4660             :     /* identify bounds of first field */
    4661          54 :     start_ptr = VARDATA_ANY(inputstring);
    4662          54 :     end_ptr = text_position_get_match_ptr(&state);
    4663             : 
    4664         102 :     while (found && --fldnum > 0)
    4665             :     {
    4666             :         /* identify bounds of next field */
    4667          48 :         start_ptr = end_ptr + state.last_match_len;
    4668          48 :         found = text_position_next(&state);
    4669          48 :         if (found)
    4670          18 :             end_ptr = text_position_get_match_ptr(&state);
    4671             :     }
    4672             : 
    4673          54 :     text_position_cleanup(&state);
    4674             : 
    4675          54 :     if (fldnum > 0)
    4676             :     {
    4677             :         /* N'th field separator not found */
    4678             :         /* if last field requested, return it, else empty string */
    4679          30 :         if (fldnum == 1)
    4680             :         {
    4681          24 :             int         last_len = start_ptr - VARDATA_ANY(inputstring);
    4682             : 
    4683          24 :             result_text = cstring_to_text_with_len(start_ptr,
    4684             :                                                    inputstring_len - last_len);
    4685             :         }
    4686             :         else
    4687           6 :             result_text = cstring_to_text("");
    4688             :     }
    4689             :     else
    4690             :     {
    4691             :         /* non-last field requested */
    4692          24 :         result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
    4693             :     }
    4694             : 
    4695          54 :     PG_RETURN_TEXT_P(result_text);
    4696             : }
    4697             : 
    4698             : /*
    4699             :  * Convenience function to return true when two text params are equal.
    4700             :  */
    4701             : static bool
    4702         384 : text_isequal(text *txt1, text *txt2, Oid collid)
    4703             : {
    4704         384 :     return DatumGetBool(DirectFunctionCall2Coll(texteq,
    4705             :                                                 collid,
    4706             :                                                 PointerGetDatum(txt1),
    4707             :                                                 PointerGetDatum(txt2)));
    4708             : }
    4709             : 
    4710             : /*
    4711             :  * text_to_array
    4712             :  * parse input string and return text array of elements,
    4713             :  * based on provided field separator
    4714             :  */
    4715             : Datum
    4716         170 : text_to_array(PG_FUNCTION_ARGS)
    4717             : {
    4718             :     SplitTextOutputData tstate;
    4719             : 
    4720             :     /* For array output, tstate should start as all zeroes */
    4721         170 :     memset(&tstate, 0, sizeof(tstate));
    4722             : 
    4723         170 :     if (!split_text(fcinfo, &tstate))
    4724           6 :         PG_RETURN_NULL();
    4725             : 
    4726         164 :     if (tstate.astate == NULL)
    4727           6 :         PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
    4728             : 
    4729         158 :     PG_RETURN_DATUM(makeArrayResult(tstate.astate,
    4730             :                                     CurrentMemoryContext));
    4731             : }
    4732             : 
    4733             : /*
    4734             :  * text_to_array_null
    4735             :  * parse input string and return text array of elements,
    4736             :  * based on provided field separator and null string
    4737             :  *
    4738             :  * This is a separate entry point only to prevent the regression tests from
    4739             :  * complaining about different argument sets for the same internal function.
    4740             :  */
    4741             : Datum
    4742          60 : text_to_array_null(PG_FUNCTION_ARGS)
    4743             : {
    4744          60 :     return text_to_array(fcinfo);
    4745             : }
    4746             : 
    4747             : /*
    4748             :  * text_to_table
    4749             :  * parse input string and return table of elements,
    4750             :  * based on provided field separator
    4751             :  */
    4752             : Datum
    4753          84 : text_to_table(PG_FUNCTION_ARGS)
    4754             : {
    4755          84 :     ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
    4756             :     SplitTextOutputData tstate;
    4757             : 
    4758          84 :     tstate.astate = NULL;
    4759          84 :     InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
    4760          84 :     tstate.tupstore = rsi->setResult;
    4761          84 :     tstate.tupdesc = rsi->setDesc;
    4762             : 
    4763          84 :     (void) split_text(fcinfo, &tstate);
    4764             : 
    4765          84 :     return (Datum) 0;
    4766             : }
    4767             : 
    4768             : /*
    4769             :  * text_to_table_null
    4770             :  * parse input string and return table of elements,
    4771             :  * based on provided field separator and null string
    4772             :  *
    4773             :  * This is a separate entry point only to prevent the regression tests from
    4774             :  * complaining about different argument sets for the same internal function.
    4775             :  */
    4776             : Datum
    4777          24 : text_to_table_null(PG_FUNCTION_ARGS)
    4778             : {
    4779          24 :     return text_to_table(fcinfo);
    4780             : }
    4781             : 
    4782             : /*
    4783             :  * Common code for text_to_array, text_to_array_null, text_to_table
    4784             :  * and text_to_table_null functions.
    4785             :  *
    4786             :  * These are not strict so we have to test for null inputs explicitly.
    4787             :  * Returns false if result is to be null, else returns true.
    4788             :  *
    4789             :  * Note that if the result is valid but empty (zero elements), we return
    4790             :  * without changing *tstate --- caller must handle that case, too.
    4791             :  */
    4792             : static bool
    4793         254 : split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
    4794             : {
    4795             :     text       *inputstring;
    4796             :     text       *fldsep;
    4797             :     text       *null_string;
    4798         254 :     Oid         collation = PG_GET_COLLATION();
    4799             :     int         inputstring_len;
    4800             :     int         fldsep_len;
    4801             :     char       *start_ptr;
    4802             :     text       *result_text;
    4803             : 
    4804             :     /* when input string is NULL, then result is NULL too */
    4805         254 :     if (PG_ARGISNULL(0))
    4806          12 :         return false;
    4807             : 
    4808         242 :     inputstring = PG_GETARG_TEXT_PP(0);
    4809             : 
    4810             :     /* fldsep can be NULL */
    4811         242 :     if (!PG_ARGISNULL(1))
    4812         212 :         fldsep = PG_GETARG_TEXT_PP(1);
    4813             :     else
    4814          30 :         fldsep = NULL;
    4815             : 
    4816             :     /* null_string can be NULL or omitted */
    4817         242 :     if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
    4818          84 :         null_string = PG_GETARG_TEXT_PP(2);
    4819             :     else
    4820         158 :         null_string = NULL;
    4821             : 
    4822         242 :     if (fldsep != NULL)
    4823             :     {
    4824             :         /*
    4825             :          * Normal case with non-null fldsep.  Use the text_position machinery
    4826             :          * to search for occurrences of fldsep.
    4827             :          */
    4828             :         TextPositionState state;
    4829             : 
    4830         212 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4831         212 :         fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4832             : 
    4833             :         /* return empty set for empty input string */
    4834         212 :         if (inputstring_len < 1)
    4835          60 :             return true;
    4836             : 
    4837             :         /* empty field separator: return input string as a one-element set */
    4838         200 :         if (fldsep_len < 1)
    4839             :         {
    4840          48 :             split_text_accum_result(tstate, inputstring,
    4841             :                                     null_string, collation);
    4842          48 :             return true;
    4843             :         }
    4844             : 
    4845         152 :         text_position_setup(inputstring, fldsep, collation, &state);
    4846             : 
    4847         152 :         start_ptr = VARDATA_ANY(inputstring);
    4848             : 
    4849             :         for (;;)
    4850         512 :         {
    4851             :             bool        found;
    4852             :             char       *end_ptr;
    4853             :             int         chunk_len;
    4854             : 
    4855         664 :             CHECK_FOR_INTERRUPTS();
    4856             : 
    4857         664 :             found = text_position_next(&state);
    4858         664 :             if (!found)
    4859             :             {
    4860             :                 /* fetch last field */
    4861         152 :                 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
    4862         152 :                 end_ptr = NULL; /* not used, but some compilers complain */
    4863             :             }
    4864             :             else
    4865             :             {
    4866             :                 /* fetch non-last field */
    4867         512 :                 end_ptr = text_position_get_match_ptr(&state);
    4868         512 :                 chunk_len = end_ptr - start_ptr;
    4869             :             }
    4870             : 
    4871             :             /* build a temp text datum to pass to split_text_accum_result */
    4872         664 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4873             : 
    4874             :             /* stash away this field */
    4875         664 :             split_text_accum_result(tstate, result_text,
    4876             :                                     null_string, collation);
    4877             : 
    4878         664 :             pfree(result_text);
    4879             : 
    4880         664 :             if (!found)
    4881         152 :                 break;
    4882             : 
    4883         512 :             start_ptr = end_ptr + state.last_match_len;
    4884             :         }
    4885             : 
    4886         152 :         text_position_cleanup(&state);
    4887             :     }
    4888             :     else
    4889             :     {
    4890             :         /*
    4891             :          * When fldsep is NULL, each character in the input string becomes a
    4892             :          * separate element in the result set.  The separator is effectively
    4893             :          * the space between characters.
    4894             :          */
    4895          30 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4896             : 
    4897          30 :         start_ptr = VARDATA_ANY(inputstring);
    4898             : 
    4899         252 :         while (inputstring_len > 0)
    4900             :         {
    4901         222 :             int         chunk_len = pg_mblen(start_ptr);
    4902             : 
    4903         222 :             CHECK_FOR_INTERRUPTS();
    4904             : 
    4905             :             /* build a temp text datum to pass to split_text_accum_result */
    4906         222 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4907             : 
    4908             :             /* stash away this field */
    4909         222 :             split_text_accum_result(tstate, result_text,
    4910             :                                     null_string, collation);
    4911             : 
    4912         222 :             pfree(result_text);
    4913             : 
    4914         222 :             start_ptr += chunk_len;
    4915         222 :             inputstring_len -= chunk_len;
    4916             :         }
    4917             :     }
    4918             : 
    4919         182 :     return true;
    4920             : }
    4921             : 
    4922             : /*
    4923             :  * Add text item to result set (table or array).
    4924             :  *
    4925             :  * This is also responsible for checking to see if the item matches
    4926             :  * the null_string, in which case we should emit NULL instead.
    4927             :  */
    4928             : static void
    4929         934 : split_text_accum_result(SplitTextOutputData *tstate,
    4930             :                         text *field_value,
    4931             :                         text *null_string,
    4932             :                         Oid collation)
    4933             : {
    4934         934 :     bool        is_null = false;
    4935             : 
    4936         934 :     if (null_string && text_isequal(field_value, null_string, collation))
    4937          72 :         is_null = true;
    4938             : 
    4939         934 :     if (tstate->tupstore)
    4940             :     {
    4941             :         Datum       values[1];
    4942             :         bool        nulls[1];
    4943             : 
    4944         228 :         values[0] = PointerGetDatum(field_value);
    4945         228 :         nulls[0] = is_null;
    4946             : 
    4947         228 :         tuplestore_putvalues(tstate->tupstore,
    4948             :                              tstate->tupdesc,
    4949             :                              values,
    4950             :                              nulls);
    4951             :     }
    4952             :     else
    4953             :     {
    4954         706 :         tstate->astate = accumArrayResult(tstate->astate,
    4955             :                                           PointerGetDatum(field_value),
    4956             :                                           is_null,
    4957             :                                           TEXTOID,
    4958             :                                           CurrentMemoryContext);
    4959             :     }
    4960         934 : }
    4961             : 
    4962             : /*
    4963             :  * array_to_text
    4964             :  * concatenate Cstring representation of input array elements
    4965             :  * using provided field separator
    4966             :  */
    4967             : Datum
    4968       75310 : array_to_text(PG_FUNCTION_ARGS)
    4969             : {
    4970       75310 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
    4971       75310 :     char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4972             : 
    4973       75310 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
    4974             : }
    4975             : 
    4976             : /*
    4977             :  * array_to_text_null
    4978             :  * concatenate Cstring representation of input array elements
    4979             :  * using provided field separator and null string
    4980             :  *
    4981             :  * This version is not strict so we have to test for null inputs explicitly.
    4982             :  */
    4983             : Datum
    4984          12 : array_to_text_null(PG_FUNCTION_ARGS)
    4985             : {
    4986             :     ArrayType  *v;
    4987             :     char       *fldsep;
    4988             :     char       *null_string;
    4989             : 
    4990             :     /* returns NULL when first or second parameter is NULL */
    4991          12 :     if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
    4992           0 :         PG_RETURN_NULL();
    4993             : 
    4994          12 :     v = PG_GETARG_ARRAYTYPE_P(0);
    4995          12 :     fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4996             : 
    4997             :     /* NULL null string is passed through as a null pointer */
    4998          12 :     if (!PG_ARGISNULL(2))
    4999           6 :         null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
    5000             :     else
    5001           6 :         null_string = NULL;
    5002             : 
    5003          12 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
    5004             : }
    5005             : 
    5006             : /*
    5007             :  * common code for array_to_text and array_to_text_null functions
    5008             :  */
    5009             : static text *
    5010       75340 : array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
    5011             :                        const char *fldsep, const char *null_string)
    5012             : {
    5013             :     text       *result;
    5014             :     int         nitems,
    5015             :                *dims,
    5016             :                 ndims;
    5017             :     Oid         element_type;
    5018             :     int         typlen;
    5019             :     bool        typbyval;
    5020             :     char        typalign;
    5021             :     StringInfoData buf;
    5022       75340 :     bool        printed = false;
    5023             :     char       *p;
    5024             :     bits8      *bitmap;
    5025             :     int         bitmask;
    5026             :     int         i;
    5027             :     ArrayMetaState *my_extra;
    5028             : 
    5029       75340 :     ndims = ARR_NDIM(v);
    5030       75340 :     dims = ARR_DIMS(v);
    5031       75340 :     nitems = ArrayGetNItems(ndims, dims);
    5032             : 
    5033             :     /* if there are no elements, return an empty string */
    5034       75340 :     if (nitems == 0)
    5035       50376 :         return cstring_to_text_with_len("", 0);
    5036             : 
    5037       24964 :     element_type = ARR_ELEMTYPE(v);
    5038       24964 :     initStringInfo(&buf);
    5039             : 
    5040             :     /*
    5041             :      * We arrange to look up info about element type, including its output
    5042             :      * conversion proc, only once per series of calls, assuming the element
    5043             :      * type doesn't change underneath us.
    5044             :      */
    5045       24964 :     my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    5046       24964 :     if (my_extra == NULL)
    5047             :     {
    5048        1452 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5049             :                                                       sizeof(ArrayMetaState));
    5050        1452 :         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    5051        1452 :         my_extra->element_type = ~element_type;
    5052             :     }
    5053             : 
    5054       24964 :     if (my_extra->element_type != element_type)
    5055             :     {
    5056             :         /*
    5057             :          * Get info about element type, including its output conversion proc
    5058             :          */
    5059        1452 :         get_type_io_data(element_type, IOFunc_output,
    5060             :                          &my_extra->typlen, &my_extra->typbyval,
    5061             :                          &my_extra->typalign, &my_extra->typdelim,
    5062             :                          &my_extra->typioparam, &my_extra->typiofunc);
    5063        1452 :         fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
    5064        1452 :                       fcinfo->flinfo->fn_mcxt);
    5065        1452 :         my_extra->element_type = element_type;
    5066             :     }
    5067       24964 :     typlen = my_extra->typlen;
    5068       24964 :     typbyval = my_extra->typbyval;
    5069       24964 :     typalign = my_extra->typalign;
    5070             : 
    5071       24964 :     p = ARR_DATA_PTR(v);
    5072       24964 :     bitmap = ARR_NULLBITMAP(v);
    5073       24964 :     bitmask = 1;
    5074             : 
    5075       84842 :     for (i = 0; i < nitems; i++)
    5076             :     {
    5077             :         Datum       itemvalue;
    5078             :         char       *value;
    5079             : 
    5080             :         /* Get source element, checking for NULL */
    5081       59878 :         if (bitmap && (*bitmap & bitmask) == 0)
    5082             :         {
    5083             :             /* if null_string is NULL, we just ignore null elements */
    5084          18 :             if (null_string != NULL)
    5085             :             {
    5086           6 :                 if (printed)
    5087           6 :                     appendStringInfo(&buf, "%s%s", fldsep, null_string);
    5088             :                 else
    5089           0 :                     appendStringInfoString(&buf, null_string);
    5090           6 :                 printed = true;
    5091             :             }
    5092             :         }
    5093             :         else
    5094             :         {
    5095       59860 :             itemvalue = fetch_att(p, typbyval, typlen);
    5096             : 
    5097       59860 :             value = OutputFunctionCall(&my_extra->proc, itemvalue);
    5098             : 
    5099       59860 :             if (printed)
    5100       34896 :                 appendStringInfo(&buf, "%s%s", fldsep, value);
    5101             :             else
    5102       24964 :                 appendStringInfoString(&buf, value);
    5103       59860 :             printed = true;
    5104             : 
    5105       59860 :             p = att_addlength_pointer(p, typlen, p);
    5106       59860 :             p = (char *) att_align_nominal(p, typalign);
    5107             :         }
    5108             : 
    5109             :         /* advance bitmap pointer if any */
    5110       59878 :         if (bitmap)
    5111             :         {
    5112         108 :             bitmask <<= 1;
    5113         108 :             if (bitmask == 0x100)
    5114             :             {
    5115           0 :                 bitmap++;
    5116           0 :                 bitmask = 1;
    5117             :             }
    5118             :         }
    5119             :     }
    5120             : 
    5121       24964 :     result = cstring_to_text_with_len(buf.data, buf.len);
    5122       24964 :     pfree(buf.data);
    5123             : 
    5124       24964 :     return result;
    5125             : }
    5126             : 
    5127             : /*
    5128             :  * Workhorse for to_bin, to_oct, and to_hex.  Note that base must be > 1 and <=
    5129             :  * 16.
    5130             :  */
    5131             : static inline text *
    5132       38750 : convert_to_base(uint64 value, int base)
    5133             : {
    5134       38750 :     const char *digits = "0123456789abcdef";
    5135             : 
    5136             :     /* We size the buffer for to_bin's longest possible return value. */
    5137             :     char        buf[sizeof(uint64) * BITS_PER_BYTE];
    5138       38750 :     char       *const end = buf + sizeof(buf);
    5139       38750 :     char       *ptr = end;
    5140             : 
    5141             :     Assert(base > 1);
    5142             :     Assert(base <= 16);
    5143             : 
    5144             :     do
    5145             :     {
    5146       75974 :         *--ptr = digits[value % base];
    5147       75974 :         value /= base;
    5148       75974 :     } while (ptr > buf && value);
    5149             : 
    5150       38750 :     return cstring_to_text_with_len(ptr, end - ptr);
    5151             : }
    5152             : 
    5153             : /*
    5154             :  * Convert an integer to a string containing a base-2 (binary) representation
    5155             :  * of the number.
    5156             :  */
    5157             : Datum
    5158          12 : to_bin32(PG_FUNCTION_ARGS)
    5159             : {
    5160          12 :     uint64      value = (uint32) PG_GETARG_INT32(0);
    5161             : 
    5162          12 :     PG_RETURN_TEXT_P(convert_to_base(value, 2));
    5163             : }
    5164             : Datum
    5165          12 : to_bin64(PG_FUNCTION_ARGS)
    5166             : {
    5167          12 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    5168             : 
    5169          12 :     PG_RETURN_TEXT_P(convert_to_base(value, 2));
    5170             : }
    5171             : 
    5172             : /*
    5173             :  * Convert an integer to a string containing a base-8 (oct) representation of
    5174             :  * the number.
    5175             :  */
    5176             : Datum
    5177          12 : to_oct32(PG_FUNCTION_ARGS)
    5178             : {
    5179          12 :     uint64      value = (uint32) PG_GETARG_INT32(0);
    5180             : 
    5181          12 :     PG_RETURN_TEXT_P(convert_to_base(value, 8));
    5182             : }
    5183             : Datum
    5184          12 : to_oct64(PG_FUNCTION_ARGS)
    5185             : {
    5186          12 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    5187             : 
    5188          12 :     PG_RETURN_TEXT_P(convert_to_base(value, 8));
    5189             : }
    5190             : 
    5191             : /*
    5192             :  * Convert an integer to a string containing a base-16 (hex) representation of
    5193             :  * the number.
    5194             :  */
    5195             : Datum
    5196       38690 : to_hex32(PG_FUNCTION_ARGS)
    5197             : {
    5198       38690 :     uint64      value = (uint32) PG_GETARG_INT32(0);
    5199             : 
    5200       38690 :     PG_RETURN_TEXT_P(convert_to_base(value, 16));
    5201             : }
    5202             : Datum
    5203          12 : to_hex64(PG_FUNCTION_ARGS)
    5204             : {
    5205          12 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    5206             : 
    5207          12 :     PG_RETURN_TEXT_P(convert_to_base(value, 16));
    5208             : }
    5209             : 
    5210             : /*
    5211             :  * Return the size of a datum, possibly compressed
    5212             :  *
    5213             :  * Works on any data type
    5214             :  */
    5215             : Datum
    5216         122 : pg_column_size(PG_FUNCTION_ARGS)
    5217             : {
    5218         122 :     Datum       value = PG_GETARG_DATUM(0);
    5219             :     int32       result;
    5220             :     int         typlen;
    5221             : 
    5222             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5223         122 :     if (fcinfo->flinfo->fn_extra == NULL)
    5224             :     {
    5225             :         /* Lookup the datatype of the supplied argument */
    5226         122 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5227             : 
    5228         122 :         typlen = get_typlen(argtypeid);
    5229         122 :         if (typlen == 0)        /* should not happen */
    5230           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5231             : 
    5232         122 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5233             :                                                       sizeof(int));
    5234         122 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5235             :     }
    5236             :     else
    5237           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5238             : 
    5239         122 :     if (typlen == -1)
    5240             :     {
    5241             :         /* varlena type, possibly toasted */
    5242         122 :         result = toast_datum_size(value);
    5243             :     }
    5244           0 :     else if (typlen == -2)
    5245             :     {
    5246             :         /* cstring */
    5247           0 :         result = strlen(DatumGetCString(value)) + 1;
    5248             :     }
    5249             :     else
    5250             :     {
    5251             :         /* ordinary fixed-width type */
    5252           0 :         result = typlen;
    5253             :     }
    5254             : 
    5255         122 :     PG_RETURN_INT32(result);
    5256             : }
    5257             : 
    5258             : /*
    5259             :  * Return the compression method stored in the compressed attribute.  Return
    5260             :  * NULL for non varlena type or uncompressed data.
    5261             :  */
    5262             : Datum
    5263         162 : pg_column_compression(PG_FUNCTION_ARGS)
    5264             : {
    5265             :     int         typlen;
    5266             :     char       *result;
    5267             :     ToastCompressionId cmid;
    5268             : 
    5269             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5270         162 :     if (fcinfo->flinfo->fn_extra == NULL)
    5271             :     {
    5272             :         /* Lookup the datatype of the supplied argument */
    5273         108 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5274             : 
    5275         108 :         typlen = get_typlen(argtypeid);
    5276         108 :         if (typlen == 0)        /* should not happen */
    5277           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5278             : 
    5279         108 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5280             :                                                       sizeof(int));
    5281         108 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5282             :     }
    5283             :     else
    5284          54 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5285             : 
    5286         162 :     if (typlen != -1)
    5287           0 :         PG_RETURN_NULL();
    5288             : 
    5289             :     /* get the compression method id stored in the compressed varlena */
    5290         162 :     cmid = toast_get_compression_id((struct varlena *)
    5291         162 :                                     DatumGetPointer(PG_GETARG_DATUM(0)));
    5292         162 :     if (cmid == TOAST_INVALID_COMPRESSION_ID)
    5293           6 :         PG_RETURN_NULL();
    5294             : 
    5295             :     /* convert compression method id to compression method name */
    5296         156 :     switch (cmid)
    5297             :     {
    5298          66 :         case TOAST_PGLZ_COMPRESSION_ID:
    5299          66 :             result = "pglz";
    5300          66 :             break;
    5301          90 :         case TOAST_LZ4_COMPRESSION_ID:
    5302          90 :             result = "lz4";
    5303          90 :             break;
    5304           0 :         default:
    5305           0 :             elog(ERROR, "invalid compression method id %d", cmid);
    5306             :     }
    5307             : 
    5308         156 :     PG_RETURN_TEXT_P(cstring_to_text(result));
    5309             : }
    5310             : 
    5311             : /*
    5312             :  * Return the chunk_id of the on-disk TOASTed value.  Return NULL if the value
    5313             :  * is un-TOASTed or not on-disk.
    5314             :  */
    5315             : Datum
    5316          12 : pg_column_toast_chunk_id(PG_FUNCTION_ARGS)
    5317             : {
    5318             :     int         typlen;
    5319             :     struct varlena *attr;
    5320             :     struct varatt_external toast_pointer;
    5321             : 
    5322             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5323          12 :     if (fcinfo->flinfo->fn_extra == NULL)
    5324             :     {
    5325             :         /* Lookup the datatype of the supplied argument */
    5326          12 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5327             : 
    5328          12 :         typlen = get_typlen(argtypeid);
    5329          12 :         if (typlen == 0)        /* should not happen */
    5330           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5331             : 
    5332          12 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5333             :                                                       sizeof(int));
    5334          12 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5335             :     }
    5336             :     else
    5337           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5338             : 
    5339          12 :     if (typlen != -1)
    5340           0 :         PG_RETURN_NULL();
    5341             : 
    5342          12 :     attr = (struct varlena *) DatumGetPointer(PG_GETARG_DATUM(0));
    5343             : 
    5344          12 :     if (!VARATT_IS_EXTERNAL_ONDISK(attr))
    5345           6 :         PG_RETURN_NULL();
    5346             : 
    5347           6 :     VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
    5348             : 
    5349           6 :     PG_RETURN_OID(toast_pointer.va_valueid);
    5350             : }
    5351             : 
    5352             : /*
    5353             :  * string_agg - Concatenates values and returns string.
    5354             :  *
    5355             :  * Syntax: string_agg(value text, delimiter text) RETURNS text
    5356             :  *
    5357             :  * Note: Any NULL values are ignored. The first-call delimiter isn't
    5358             :  * actually used at all, and on subsequent calls the delimiter precedes
    5359             :  * the associated value.
    5360             :  */
    5361             : 
    5362             : /* subroutine to initialize state */
    5363             : static StringInfo
    5364        2334 : makeStringAggState(FunctionCallInfo fcinfo)
    5365             : {
    5366             :     StringInfo  state;
    5367             :     MemoryContext aggcontext;
    5368             :     MemoryContext oldcontext;
    5369             : 
    5370        2334 :     if (!AggCheckCallContext(fcinfo, &aggcontext))
    5371             :     {
    5372             :         /* cannot be called directly because of internal-type argument */
    5373           0 :         elog(ERROR, "string_agg_transfn called in non-aggregate context");
    5374             :     }
    5375             : 
    5376             :     /*
    5377             :      * Create state in aggregate context.  It'll stay there across subsequent
    5378             :      * calls.
    5379             :      */
    5380        2334 :     oldcontext = MemoryContextSwitchTo(aggcontext);
    5381        2334 :     state = makeStringInfo();
    5382        2334 :     MemoryContextSwitchTo(oldcontext);
    5383             : 
    5384        2334 :     return state;
    5385             : }
    5386             : 
    5387             : Datum
    5388      861580 : string_agg_transfn(PG_FUNCTION_ARGS)
    5389             : {
    5390             :     StringInfo  state;
    5391             : 
    5392      861580 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5393             : 
    5394             :     /* Append the value unless null, preceding it with the delimiter. */
    5395      861580 :     if (!PG_ARGISNULL(1))
    5396             :     {
    5397      846532 :         text       *value = PG_GETARG_TEXT_PP(1);
    5398      846532 :         bool        isfirst = false;
    5399             : 
    5400             :         /*
    5401             :          * You might think we can just throw away the first delimiter, however
    5402             :          * we must keep it as we may be a parallel worker doing partial
    5403             :          * aggregation building a state to send to the main process.  We need
    5404             :          * to keep the delimiter of every aggregation so that the combine
    5405             :          * function can properly join up the strings of two separately
    5406             :          * partially aggregated results.  The first delimiter is only stripped
    5407             :          * off in the final function.  To know how much to strip off the front
    5408             :          * of the string, we store the length of the first delimiter in the
    5409             :          * StringInfo's cursor field, which we don't otherwise need here.
    5410             :          */
    5411      846532 :         if (state == NULL)
    5412             :         {
    5413        1946 :             state = makeStringAggState(fcinfo);
    5414        1946 :             isfirst = true;
    5415             :         }
    5416             : 
    5417      846532 :         if (!PG_ARGISNULL(2))
    5418             :         {
    5419      846532 :             text       *delim = PG_GETARG_TEXT_PP(2);
    5420             : 
    5421      846532 :             appendStringInfoText(state, delim);
    5422      846532 :             if (isfirst)
    5423        1946 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
    5424             :         }
    5425             : 
    5426      846532 :         appendStringInfoText(state, value);
    5427             :     }
    5428             : 
    5429             :     /*
    5430             :      * The transition type for string_agg() is declared to be "internal",
    5431             :      * which is a pass-by-value type the same size as a pointer.
    5432             :      */
    5433      861580 :     if (state)
    5434      861502 :         PG_RETURN_POINTER(state);
    5435          78 :     PG_RETURN_NULL();
    5436             : }
    5437             : 
    5438             : /*
    5439             :  * string_agg_combine
    5440             :  *      Aggregate combine function for string_agg(text) and string_agg(bytea)
    5441             :  */
    5442             : Datum
    5443         120 : string_agg_combine(PG_FUNCTION_ARGS)
    5444             : {
    5445             :     StringInfo  state1;
    5446             :     StringInfo  state2;
    5447             :     MemoryContext agg_context;
    5448             : 
    5449         120 :     if (!AggCheckCallContext(fcinfo, &agg_context))
    5450           0 :         elog(ERROR, "aggregate function called in non-aggregate context");
    5451             : 
    5452         120 :     state1 = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5453         120 :     state2 = PG_ARGISNULL(1) ? NULL : (StringInfo) PG_GETARG_POINTER(1);
    5454             : 
    5455         120 :     if (state2 == NULL)
    5456             :     {
    5457             :         /*
    5458             :          * NULL state2 is easy, just return state1, which we know is already
    5459             :          * in the agg_context
    5460             :          */
    5461           0 :         if (state1 == NULL)
    5462           0 :             PG_RETURN_NULL();
    5463           0 :         PG_RETURN_POINTER(state1);
    5464             :     }
    5465             : 
    5466         120 :     if (state1 == NULL)
    5467             :     {
    5468             :         /* We must copy state2's data into the agg_context */
    5469             :         MemoryContext old_context;
    5470             : 
    5471         120 :         old_context = MemoryContextSwitchTo(agg_context);
    5472         120 :         state1 = makeStringAggState(fcinfo);
    5473         120 :         appendBinaryStringInfo(state1, state2->data, state2->len);
    5474         120 :         state1->cursor = state2->cursor;
    5475         120 :         MemoryContextSwitchTo(old_context);
    5476             :     }
    5477           0 :     else if (state2->len > 0)
    5478             :     {
    5479             :         /* Combine ... state1->cursor does not change in this case */
    5480           0 :         appendBinaryStringInfo(state1, state2->data, state2->len);
    5481             :     }
    5482             : 
    5483         120 :     PG_RETURN_POINTER(state1);
    5484             : }
    5485             : 
    5486             : /*
    5487             :  * string_agg_serialize
    5488             :  *      Aggregate serialize function for string_agg(text) and string_agg(bytea)
    5489             :  *
    5490             :  * This is strict, so we need not handle NULL input
    5491             :  */
    5492             : Datum
    5493         120 : string_agg_serialize(PG_FUNCTION_ARGS)
    5494             : {
    5495             :     StringInfo  state;
    5496             :     StringInfoData buf;
    5497             :     bytea      *result;
    5498             : 
    5499             :     /* cannot be called directly because of internal-type argument */
    5500             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5501             : 
    5502         120 :     state = (StringInfo) PG_GETARG_POINTER(0);
    5503             : 
    5504         120 :     pq_begintypsend(&buf);
    5505             : 
    5506             :     /* cursor */
    5507         120 :     pq_sendint(&buf, state->cursor, 4);
    5508             : 
    5509             :     /* data */
    5510         120 :     pq_sendbytes(&buf, state->data, state->len);
    5511             : 
    5512         120 :     result = pq_endtypsend(&buf);
    5513             : 
    5514         120 :     PG_RETURN_BYTEA_P(result);
    5515             : }
    5516             : 
    5517             : /*
    5518             :  * string_agg_deserialize
    5519             :  *      Aggregate deserial function for string_agg(text) and string_agg(bytea)
    5520             :  *
    5521             :  * This is strict, so we need not handle NULL input
    5522             :  */
    5523             : Datum
    5524         120 : string_agg_deserialize(PG_FUNCTION_ARGS)
    5525             : {
    5526             :     bytea      *sstate;
    5527             :     StringInfo  result;
    5528             :     StringInfoData buf;
    5529             :     char       *data;
    5530             :     int         datalen;
    5531             : 
    5532             :     /* cannot be called directly because of internal-type argument */
    5533             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5534             : 
    5535         120 :     sstate = PG_GETARG_BYTEA_PP(0);
    5536             : 
    5537             :     /*
    5538             :      * Initialize a StringInfo so that we can "receive" it using the standard
    5539             :      * recv-function infrastructure.
    5540             :      */
    5541         120 :     initReadOnlyStringInfo(&buf, VARDATA_ANY(sstate),
    5542         120 :                            VARSIZE_ANY_EXHDR(sstate));
    5543             : 
    5544         120 :     result = makeStringAggState(fcinfo);
    5545             : 
    5546             :     /* cursor */
    5547         120 :     result->cursor = pq_getmsgint(&buf, 4);
    5548             : 
    5549             :     /* data */
    5550         120 :     datalen = VARSIZE_ANY_EXHDR(sstate) - 4;
    5551         120 :     data = (char *) pq_getmsgbytes(&buf, datalen);
    5552         120 :     appendBinaryStringInfo(result, data, datalen);
    5553             : 
    5554         120 :     pq_getmsgend(&buf);
    5555             : 
    5556         120 :     PG_RETURN_POINTER(result);
    5557             : }
    5558             : 
    5559             : Datum
    5560        2018 : string_agg_finalfn(PG_FUNCTION_ARGS)
    5561             : {
    5562             :     StringInfo  state;
    5563             : 
    5564             :     /* cannot be called directly because of internal-type argument */
    5565             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5566             : 
    5567        2018 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5568             : 
    5569        2018 :     if (state != NULL)
    5570             :     {
    5571             :         /* As per comment in transfn, strip data before the cursor position */
    5572        1946 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(&state->data[state->cursor],
    5573             :                                                   state->len - state->cursor));
    5574             :     }
    5575             :     else
    5576          72 :         PG_RETURN_NULL();
    5577             : }
    5578             : 
    5579             : /*
    5580             :  * Prepare cache with fmgr info for the output functions of the datatypes of
    5581             :  * the arguments of a concat-like function, beginning with argument "argidx".
    5582             :  * (Arguments before that will have corresponding slots in the resulting
    5583             :  * FmgrInfo array, but we don't fill those slots.)
    5584             :  */
    5585             : static FmgrInfo *
    5586         106 : build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
    5587             : {
    5588             :     FmgrInfo   *foutcache;
    5589             :     int         i;
    5590             : 
    5591             :     /* We keep the info in fn_mcxt so it survives across calls */
    5592         106 :     foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5593         106 :                                                 PG_NARGS() * sizeof(FmgrInfo));
    5594             : 
    5595         400 :     for (i = argidx; i < PG_NARGS(); i++)
    5596             :     {
    5597             :         Oid         valtype;
    5598             :         Oid         typOutput;
    5599             :         bool        typIsVarlena;
    5600             : 
    5601         294 :         valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
    5602         294 :         if (!OidIsValid(valtype))
    5603           0 :             elog(ERROR, "could not determine data type of concat() input");
    5604             : 
    5605         294 :         getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
    5606         294 :         fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
    5607             :     }
    5608             : 
    5609         106 :     fcinfo->flinfo->fn_extra = foutcache;
    5610             : 
    5611         106 :     return foutcache;
    5612             : }
    5613             : 
    5614             : /*
    5615             :  * Implementation of both concat() and concat_ws().
    5616             :  *
    5617             :  * sepstr is the separator string to place between values.
    5618             :  * argidx identifies the first argument to concatenate (counting from zero);
    5619             :  * note that this must be constant across any one series of calls.
    5620             :  *
    5621             :  * Returns NULL if result should be NULL, else text value.
    5622             :  */
    5623             : static text *
    5624         264 : concat_internal(const char *sepstr, int argidx,
    5625             :                 FunctionCallInfo fcinfo)
    5626             : {
    5627             :     text       *result;
    5628             :     StringInfoData str;
    5629             :     FmgrInfo   *foutcache;
    5630         264 :     bool        first_arg = true;
    5631             :     int         i;
    5632             : 
    5633             :     /*
    5634             :      * concat(VARIADIC some-array) is essentially equivalent to
    5635             :      * array_to_text(), ie concat the array elements with the given separator.
    5636             :      * So we just pass the case off to that code.
    5637             :      */
    5638         264 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5639             :     {
    5640             :         ArrayType  *arr;
    5641             : 
    5642             :         /* Should have just the one argument */
    5643             :         Assert(argidx == PG_NARGS() - 1);
    5644             : 
    5645             :         /* concat(VARIADIC NULL) is defined as NULL */
    5646          30 :         if (PG_ARGISNULL(argidx))
    5647          12 :             return NULL;
    5648             : 
    5649             :         /*
    5650             :          * Non-null argument had better be an array.  We assume that any call
    5651             :          * context that could let get_fn_expr_variadic return true will have
    5652             :          * checked that a VARIADIC-labeled parameter actually is an array.  So
    5653             :          * it should be okay to just Assert that it's an array rather than
    5654             :          * doing a full-fledged error check.
    5655             :          */
    5656             :         Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
    5657             : 
    5658             :         /* OK, safe to fetch the array value */
    5659          18 :         arr = PG_GETARG_ARRAYTYPE_P(argidx);
    5660             : 
    5661             :         /*
    5662             :          * And serialize the array.  We tell array_to_text to ignore null
    5663             :          * elements, which matches the behavior of the loop below.
    5664             :          */
    5665          18 :         return array_to_text_internal(fcinfo, arr, sepstr, NULL);
    5666             :     }
    5667             : 
    5668             :     /* Normal case without explicit VARIADIC marker */
    5669         234 :     initStringInfo(&str);
    5670             : 
    5671             :     /* Get output function info, building it if first time through */
    5672         234 :     foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
    5673         234 :     if (foutcache == NULL)
    5674         106 :         foutcache = build_concat_foutcache(fcinfo, argidx);
    5675             : 
    5676         822 :     for (i = argidx; i < PG_NARGS(); i++)
    5677             :     {
    5678         588 :         if (!PG_ARGISNULL(i))
    5679             :         {
    5680         510 :             Datum       value = PG_GETARG_DATUM(i);
    5681             : 
    5682             :             /* add separator if appropriate */
    5683         510 :             if (first_arg)
    5684         228 :                 first_arg = false;
    5685             :             else
    5686         282 :                 appendStringInfoString(&str, sepstr);
    5687             : 
    5688             :             /* call the appropriate type output function, append the result */
    5689         510 :             appendStringInfoString(&str,
    5690         510 :                                    OutputFunctionCall(&foutcache[i], value));
    5691             :         }
    5692             :     }
    5693             : 
    5694         234 :     result = cstring_to_text_with_len(str.data, str.len);
    5695         234 :     pfree(str.data);
    5696             : 
    5697         234 :     return result;
    5698             : }
    5699             : 
    5700             : /*
    5701             :  * Concatenate all arguments. NULL arguments are ignored.
    5702             :  */
    5703             : Datum
    5704         186 : text_concat(PG_FUNCTION_ARGS)
    5705             : {
    5706             :     text       *result;
    5707             : 
    5708         186 :     result = concat_internal("", 0, fcinfo);
    5709         186 :     if (result == NULL)
    5710           6 :         PG_RETURN_NULL();
    5711         180 :     PG_RETURN_TEXT_P(result);
    5712             : }
    5713             : 
    5714             : /*
    5715             :  * Concatenate all but first argument value with separators. The first
    5716             :  * parameter is used as the separator. NULL arguments are ignored.
    5717             :  */
    5718             : Datum
    5719          84 : text_concat_ws(PG_FUNCTION_ARGS)
    5720             : {
    5721             :     char       *sep;
    5722             :     text       *result;
    5723             : 
    5724             :     /* return NULL when separator is NULL */
    5725          84 :     if (PG_ARGISNULL(0))
    5726           6 :         PG_RETURN_NULL();
    5727          78 :     sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
    5728             : 
    5729          78 :     result = concat_internal(sep, 1, fcinfo);
    5730          78 :     if (result == NULL)
    5731           6 :         PG_RETURN_NULL();
    5732          72 :     PG_RETURN_TEXT_P(result);
    5733             : }
    5734             : 
    5735             : /*
    5736             :  * Return first n characters in the string. When n is negative,
    5737             :  * return all but last |n| characters.
    5738             :  */
    5739             : Datum
    5740        2148 : text_left(PG_FUNCTION_ARGS)
    5741             : {
    5742        2148 :     int         n = PG_GETARG_INT32(1);
    5743             : 
    5744        2148 :     if (n < 0)
    5745             :     {
    5746          30 :         text       *str = PG_GETARG_TEXT_PP(0);
    5747          30 :         const char *p = VARDATA_ANY(str);
    5748          30 :         int         len = VARSIZE_ANY_EXHDR(str);
    5749             :         int         rlen;
    5750             : 
    5751          30 :         n = pg_mbstrlen_with_len(p, len) + n;
    5752          30 :         rlen = pg_mbcharcliplen(p, len, n);
    5753          30 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
    5754             :     }
    5755             :     else
    5756        2118 :         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
    5757             : }
    5758             : 
    5759             : /*
    5760             :  * Return last n characters in the string. When n is negative,
    5761             :  * return all but first |n| characters.
    5762             :  */
    5763             : Datum
    5764          66 : text_right(PG_FUNCTION_ARGS)
    5765             : {
    5766          66 :     text       *str = PG_GETARG_TEXT_PP(0);
    5767          66 :     const char *p = VARDATA_ANY(str);
    5768          66 :     int         len = VARSIZE_ANY_EXHDR(str);
    5769          66 :     int         n = PG_GETARG_INT32(1);
    5770             :     int         off;
    5771             : 
    5772          66 :     if (n < 0)
    5773          30 :         n = -n;
    5774             :     else
    5775          36 :         n = pg_mbstrlen_with_len(p, len) - n;
    5776          66 :     off = pg_mbcharcliplen(p, len, n);
    5777             : 
    5778          66 :     PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
    5779             : }
    5780             : 
    5781             : /*
    5782             :  * Return reversed string
    5783             :  */
    5784             : Datum
    5785           6 : text_reverse(PG_FUNCTION_ARGS)
    5786             : {
    5787           6 :     text       *str = PG_GETARG_TEXT_PP(0);
    5788           6 :     const char *p = VARDATA_ANY(str);
    5789           6 :     int         len = VARSIZE_ANY_EXHDR(str);
    5790           6 :     const char *endp = p + len;
    5791             :     text       *result;
    5792             :     char       *dst;
    5793             : 
    5794           6 :     result = palloc(len + VARHDRSZ);
    5795           6 :     dst = (char *) VARDATA(result) + len;
    5796           6 :     SET_VARSIZE(result, len + VARHDRSZ);
    5797             : 
    5798           6 :     if (pg_database_encoding_max_length() > 1)
    5799             :     {
    5800             :         /* multibyte version */
    5801          36 :         while (p < endp)
    5802             :         {
    5803             :             int         sz;
    5804             : 
    5805          30 :             sz = pg_mblen(p);
    5806          30 :             dst -= sz;
    5807          30 :             memcpy(dst, p, sz);
    5808          30 :             p += sz;
    5809             :         }
    5810             :     }
    5811             :     else
    5812             :     {
    5813             :         /* single byte version */
    5814           0 :         while (p < endp)
    5815           0 :             *(--dst) = *p++;
    5816             :     }
    5817             : 
    5818           6 :     PG_RETURN_TEXT_P(result);
    5819             : }
    5820             : 
    5821             : 
    5822             : /*
    5823             :  * Support macros for text_format()
    5824             :  */
    5825             : #define TEXT_FORMAT_FLAG_MINUS  0x0001  /* is minus flag present? */
    5826             : 
    5827             : #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
    5828             :     do { \
    5829             :         if (++(ptr) >= (end_ptr)) \
    5830             :             ereport(ERROR, \
    5831             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
    5832             :                      errmsg("unterminated format() type specifier"), \
    5833             :                      errhint("For a single \"%%\" use \"%%%%\"."))); \
    5834             :     } while (0)
    5835             : 
    5836             : /*
    5837             :  * Returns a formatted string
    5838             :  */
    5839             : Datum
    5840       33090 : text_format(PG_FUNCTION_ARGS)
    5841             : {
    5842             :     text       *fmt;
    5843             :     StringInfoData str;
    5844             :     const char *cp;
    5845             :     const char *start_ptr;
    5846             :     const char *end_ptr;
    5847             :     text       *result;
    5848             :     int         arg;
    5849             :     bool        funcvariadic;
    5850             :     int         nargs;
    5851       33090 :     Datum      *elements = NULL;
    5852       33090 :     bool       *nulls = NULL;
    5853       33090 :     Oid         element_type = InvalidOid;
    5854       33090 :     Oid         prev_type = InvalidOid;
    5855       33090 :     Oid         prev_width_type = InvalidOid;
    5856             :     FmgrInfo    typoutputfinfo;
    5857             :     FmgrInfo    typoutputinfo_width;
    5858             : 
    5859             :     /* When format string is null, immediately return null */
    5860       33090 :     if (PG_ARGISNULL(0))
    5861           6 :         PG_RETURN_NULL();
    5862             : 
    5863             :     /* If argument is marked VARIADIC, expand array into elements */
    5864       33084 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5865             :     {
    5866             :         ArrayType  *arr;
    5867             :         int16       elmlen;
    5868             :         bool        elmbyval;
    5869             :         char        elmalign;
    5870             :         int         nitems;
    5871             : 
    5872             :         /* Should have just the one argument */
    5873             :         Assert(PG_NARGS() == 2);
    5874             : 
    5875             :         /* If argument is NULL, we treat it as zero-length array */
    5876          48 :         if (PG_ARGISNULL(1))
    5877           6 :             nitems = 0;
    5878             :         else
    5879             :         {
    5880             :             /*
    5881             :              * Non-null argument had better be an array.  We assume that any
    5882             :              * call context that could let get_fn_expr_variadic return true
    5883             :              * will have checked that a VARIADIC-labeled parameter actually is
    5884             :              * an array.  So it should be okay to just Assert that it's an
    5885             :              * array rather than doing a full-fledged error check.
    5886             :              */
    5887             :             Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
    5888             : 
    5889             :             /* OK, safe to fetch the array value */
    5890          42 :             arr = PG_GETARG_ARRAYTYPE_P(1);
    5891             : 
    5892             :             /* Get info about array element type */
    5893          42 :             element_type = ARR_ELEMTYPE(arr);
    5894          42 :             get_typlenbyvalalign(element_type,
    5895             :                                  &elmlen, &elmbyval, &elmalign);
    5896             : 
    5897             :             /* Extract all array elements */
    5898          42 :             deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
    5899             :                               &elements, &nulls, &nitems);
    5900             :         }
    5901             : 
    5902          48 :         nargs = nitems + 1;
    5903          48 :         funcvariadic = true;
    5904             :     }
    5905             :     else
    5906             :     {
    5907             :         /* Non-variadic case, we'll process the arguments individually */
    5908       33036 :         nargs = PG_NARGS();
    5909       33036 :         funcvariadic = false;
    5910             :     }
    5911             : 
    5912             :     /* Setup for main loop. */
    5913       33084 :     fmt = PG_GETARG_TEXT_PP(0);
    5914       33084 :     start_ptr = VARDATA_ANY(fmt);
    5915       33084 :     end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
    5916       33084 :     initStringInfo(&str);
    5917       33084 :     arg = 1;                    /* next argument position to print */
    5918             : 
    5919             :     /* Scan format string, looking for conversion specifiers. */
    5920     1011050 :     for (cp = start_ptr; cp < end_ptr; cp++)
    5921             :     {
    5922             :         int         argpos;
    5923             :         int         widthpos;
    5924             :         int         flags;
    5925             :         int         width;
    5926             :         Datum       value;
    5927             :         bool        isNull;
    5928             :         Oid         typid;
    5929             : 
    5930             :         /*
    5931             :          * If it's not the start of a conversion specifier, just copy it to
    5932             :          * the output buffer.
    5933             :          */
    5934      978026 :         if (*cp != '%')
    5935             :         {
    5936      912284 :             appendStringInfoCharMacro(&str, *cp);
    5937      912302 :             continue;
    5938             :         }
    5939             : 
    5940       65742 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5941             : 
    5942             :         /* Easy case: %% outputs a single % */
    5943       65742 :         if (*cp == '%')
    5944             :         {
    5945          18 :             appendStringInfoCharMacro(&str, *cp);
    5946          18 :             continue;
    5947             :         }
    5948             : 
    5949             :         /* Parse the optional portions of the format specifier */
    5950       65724 :         cp = text_format_parse_format(cp, end_ptr,
    5951             :                                       &argpos, &widthpos,
    5952             :                                       &flags, &width);
    5953             : 
    5954             :         /*
    5955             :          * Next we should see the main conversion specifier.  Whether or not
    5956             :          * an argument position was present, it's known that at least one
    5957             :          * character remains in the string at this point.  Experience suggests
    5958             :          * that it's worth checking that that character is one of the expected
    5959             :          * ones before we try to fetch arguments, so as to produce the least
    5960             :          * confusing response to a mis-formatted specifier.
    5961             :          */
    5962       65700 :         if (strchr("sIL", *cp) == NULL)
    5963           6 :             ereport(ERROR,
    5964             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5965             :                      errmsg("unrecognized format() type specifier \"%.*s\"",
    5966             :                             pg_mblen(cp), cp),
    5967             :                      errhint("For a single \"%%\" use \"%%%%\".")));
    5968             : 
    5969             :         /* If indirect width was specified, get its value */
    5970       65694 :         if (widthpos >= 0)
    5971             :         {
    5972             :             /* Collect the specified or next argument position */
    5973          42 :             if (widthpos > 0)
    5974          36 :                 arg = widthpos;
    5975          42 :             if (arg >= nargs)
    5976           0 :                 ereport(ERROR,
    5977             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5978             :                          errmsg("too few arguments for format()")));
    5979             : 
    5980             :             /* Get the value and type of the selected argument */
    5981          42 :             if (!funcvariadic)
    5982             :             {
    5983          42 :                 value = PG_GETARG_DATUM(arg);
    5984          42 :                 isNull = PG_ARGISNULL(arg);
    5985          42 :                 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5986             :             }
    5987             :             else
    5988             :             {
    5989           0 :                 value = elements[arg - 1];
    5990           0 :                 isNull = nulls[arg - 1];
    5991           0 :                 typid = element_type;
    5992             :             }
    5993          42 :             if (!OidIsValid(typid))
    5994           0 :                 elog(ERROR, "could not determine data type of format() input");
    5995             : 
    5996          42 :             arg++;
    5997             : 
    5998             :             /* We can treat NULL width the same as zero */
    5999          42 :             if (isNull)
    6000           6 :                 width = 0;
    6001          36 :             else if (typid == INT4OID)
    6002          36 :                 width = DatumGetInt32(value);
    6003           0 :             else if (typid == INT2OID)
    6004           0 :                 width = DatumGetInt16(value);
    6005             :             else
    6006             :             {
    6007             :                 /* For less-usual datatypes, convert to text then to int */
    6008             :                 char       *str;
    6009             : 
    6010           0 :                 if (typid != prev_width_type)
    6011             :                 {
    6012             :                     Oid         typoutputfunc;
    6013             :                     bool        typIsVarlena;
    6014             : 
    6015           0 :                     getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    6016           0 :                     fmgr_info(typoutputfunc, &typoutputinfo_width);
    6017           0 :                     prev_width_type = typid;
    6018             :                 }
    6019             : 
    6020           0 :                 str = OutputFunctionCall(&typoutputinfo_width, value);
    6021             : 
    6022             :                 /* pg_strtoint32 will complain about bad data or overflow */
    6023           0 :                 width = pg_strtoint32(str);
    6024             : 
    6025           0 :                 pfree(str);
    6026             :             }
    6027             :         }
    6028             : 
    6029             :         /* Collect the specified or next argument position */
    6030       65694 :         if (argpos > 0)
    6031         132 :             arg = argpos;
    6032       65694 :         if (arg >= nargs)
    6033          24 :             ereport(ERROR,
    6034             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6035             :                      errmsg("too few arguments for format()")));
    6036             : 
    6037             :         /* Get the value and type of the selected argument */
    6038       65670 :         if (!funcvariadic)
    6039             :         {
    6040       64398 :             value = PG_GETARG_DATUM(arg);
    6041       64398 :             isNull = PG_ARGISNULL(arg);
    6042       64398 :             typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    6043             :         }
    6044             :         else
    6045             :         {
    6046        1272 :             value = elements[arg - 1];
    6047        1272 :             isNull = nulls[arg - 1];
    6048        1272 :             typid = element_type;
    6049             :         }
    6050       65670 :         if (!OidIsValid(typid))
    6051           0 :             elog(ERROR, "could not determine data type of format() input");
    6052             : 
    6053       65670 :         arg++;
    6054             : 
    6055             :         /*
    6056             :          * Get the appropriate typOutput function, reusing previous one if
    6057             :          * same type as previous argument.  That's particularly useful in the
    6058             :          * variadic-array case, but often saves work even for ordinary calls.
    6059             :          */
    6060       65670 :         if (typid != prev_type)
    6061             :         {
    6062             :             Oid         typoutputfunc;
    6063             :             bool        typIsVarlena;
    6064             : 
    6065       34152 :             getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    6066       34152 :             fmgr_info(typoutputfunc, &typoutputfinfo);
    6067       34152 :             prev_type = typid;
    6068             :         }
    6069             : 
    6070             :         /*
    6071             :          * And now we can format the value.
    6072             :          */
    6073       65670 :         switch (*cp)
    6074             :         {
    6075       65670 :             case 's':
    6076             :             case 'I':
    6077             :             case 'L':
    6078       65670 :                 text_format_string_conversion(&str, *cp, &typoutputfinfo,
    6079             :                                               value, isNull,
    6080             :                                               flags, width);
    6081       65664 :                 break;
    6082           0 :             default:
    6083             :                 /* should not get here, because of previous check */
    6084           0 :                 ereport(ERROR,
    6085             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6086             :                          errmsg("unrecognized format() type specifier \"%.*s\"",
    6087             :                                 pg_mblen(cp), cp),
    6088             :                          errhint("For a single \"%%\" use \"%%%%\".")));
    6089             :                 break;
    6090             :         }
    6091             :     }
    6092             : 
    6093             :     /* Don't need deconstruct_array results anymore. */
    6094       33024 :     if (elements != NULL)
    6095          42 :         pfree(elements);
    6096       33024 :     if (nulls != NULL)
    6097          42 :         pfree(nulls);
    6098             : 
    6099             :     /* Generate results. */
    6100       33024 :     result = cstring_to_text_with_len(str.data, str.len);
    6101       33024 :     pfree(str.data);
    6102             : 
    6103       33024 :     PG_RETURN_TEXT_P(result);
    6104             : }
    6105             : 
    6106             : /*
    6107             :  * Parse contiguous digits as a decimal number.
    6108             :  *
    6109             :  * Returns true if some digits could be parsed.
    6110             :  * The value is returned into *value, and *ptr is advanced to the next
    6111             :  * character to be parsed.
    6112             :  *
    6113             :  * Note parsing invariant: at least one character is known available before
    6114             :  * string end (end_ptr) at entry, and this is still true at exit.
    6115             :  */
    6116             : static bool
    6117      131412 : text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
    6118             : {
    6119      131412 :     bool        found = false;
    6120      131412 :     const char *cp = *ptr;
    6121      131412 :     int         val = 0;
    6122             : 
    6123      131724 :     while (*cp >= '0' && *cp <= '9')
    6124             :     {
    6125         318 :         int8        digit = (*cp - '0');
    6126             : 
    6127         318 :         if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
    6128         318 :             unlikely(pg_add_s32_overflow(val, digit, &val)))
    6129           0 :             ereport(ERROR,
    6130             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    6131             :                      errmsg("number is out of range")));
    6132         318 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    6133         312 :         found = true;
    6134             :     }
    6135             : 
    6136      131406 :     *ptr = cp;
    6137      131406 :     *value = val;
    6138             : 
    6139      131406 :     return found;
    6140             : }
    6141             : 
    6142             : /*
    6143             :  * Parse a format specifier (generally following the SUS printf spec).
    6144             :  *
    6145             :  * We have already advanced over the initial '%', and we are looking for
    6146             :  * [argpos][flags][width]type (but the type character is not consumed here).
    6147             :  *
    6148             :  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
    6149             :  * Output parameters:
    6150             :  *  argpos: argument position for value to be printed.  -1 means unspecified.
    6151             :  *  widthpos: argument position for width.  Zero means the argument position
    6152             :  *          was unspecified (ie, take the next arg) and -1 means no width
    6153             :  *          argument (width was omitted or specified as a constant).
    6154             :  *  flags: bitmask of flags.
    6155             :  *  width: directly-specified width value.  Zero means the width was omitted
    6156             :  *          (note it's not necessary to distinguish this case from an explicit
    6157             :  *          zero width value).
    6158             :  *
    6159             :  * The function result is the next character position to be parsed, ie, the
    6160             :  * location where the type character is/should be.
    6161             :  *
    6162             :  * Note parsing invariant: at least one character is known available before
    6163             :  * string end (end_ptr) at entry, and this is still true at exit.
    6164             :  */
    6165             : static const char *
    6166       65724 : text_format_parse_format(const char *start_ptr, const char *end_ptr,
    6167             :                          int *argpos, int *widthpos,
    6168             :                          int *flags, int *width)
    6169             : {
    6170       65724 :     const char *cp = start_ptr;
    6171             :     int         n;
    6172             : 
    6173             :     /* set defaults for output parameters */
    6174       65724 :     *argpos = -1;
    6175       65724 :     *widthpos = -1;
    6176       65724 :     *flags = 0;
    6177       65724 :     *width = 0;
    6178             : 
    6179             :     /* try to identify first number */
    6180       65724 :     if (text_format_parse_digits(&cp, end_ptr, &n))
    6181             :     {
    6182         174 :         if (*cp != '$')
    6183             :         {
    6184             :             /* Must be just a width and a type, so we're done */
    6185          24 :             *width = n;
    6186          24 :             return cp;
    6187             :         }
    6188             :         /* The number was argument position */
    6189         150 :         *argpos = n;
    6190             :         /* Explicit 0 for argument index is immediately refused */
    6191         150 :         if (n == 0)
    6192           6 :             ereport(ERROR,
    6193             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6194             :                      errmsg("format specifies argument 0, but arguments are numbered from 1")));
    6195         144 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    6196             :     }
    6197             : 
    6198             :     /* Handle flags (only minus is supported now) */
    6199       65718 :     while (*cp == '-')
    6200             :     {
    6201          30 :         *flags |= TEXT_FORMAT_FLAG_MINUS;
    6202          30 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    6203             :     }
    6204             : 
    6205       65688 :     if (*cp == '*')
    6206             :     {
    6207             :         /* Handle indirect width */
    6208          48 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    6209          48 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    6210             :         {
    6211             :             /* number in this position must be closed by $ */
    6212          42 :             if (*cp != '$')
    6213           0 :                 ereport(ERROR,
    6214             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6215             :                          errmsg("width argument position must be ended by \"$\"")));
    6216             :             /* The number was width argument position */
    6217          42 :             *widthpos = n;
    6218             :             /* Explicit 0 for argument index is immediately refused */
    6219          42 :             if (n == 0)
    6220           6 :                 ereport(ERROR,
    6221             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6222             :                          errmsg("format specifies argument 0, but arguments are numbered from 1")));
    6223          36 :             ADVANCE_PARSE_POINTER(cp, end_ptr);
    6224             :         }
    6225             :         else
    6226           6 :             *widthpos = 0;      /* width's argument position is unspecified */
    6227             :     }
    6228             :     else
    6229             :     {
    6230             :         /* Check for direct width specification */
    6231       65640 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    6232          30 :             *width = n;
    6233             :     }
    6234             : 
    6235             :     /* cp should now be pointing at type character */
    6236       65676 :     return cp;
    6237             : }
    6238             : 
    6239             : /*
    6240             :  * Format a %s, %I, or %L conversion
    6241             :  */
    6242             : static void
    6243       65670 : text_format_string_conversion(StringInfo buf, char conversion,
    6244             :                               FmgrInfo *typOutputInfo,
    6245             :                               Datum value, bool isNull,
    6246             :                               int flags, int width)
    6247             : {
    6248             :     char       *str;
    6249             : 
    6250             :     /* Handle NULL arguments before trying to stringify the value. */
    6251       65670 :     if (isNull)
    6252             :     {
    6253         342 :         if (conversion == 's')
    6254         270 :             text_format_append_string(buf, "", flags, width);
    6255          72 :         else if (conversion == 'L')
    6256          66 :             text_format_append_string(buf, "NULL", flags, width);
    6257           6 :         else if (conversion == 'I')
    6258           6 :             ereport(ERROR,
    6259             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    6260             :                      errmsg("null values cannot be formatted as an SQL identifier")));
    6261         336 :         return;
    6262             :     }
    6263             : 
    6264             :     /* Stringify. */
    6265       65328 :     str = OutputFunctionCall(typOutputInfo, value);
    6266             : 
    6267             :     /* Escape. */
    6268       65328 :     if (conversion == 'I')
    6269             :     {
    6270             :         /* quote_identifier may or may not allocate a new string. */
    6271        4896 :         text_format_append_string(buf, quote_identifier(str), flags, width);
    6272             :     }
    6273       60432 :     else if (conversion == 'L')
    6274             :     {
    6275        3232 :         char       *qstr = quote_literal_cstr(str);
    6276             : 
    6277        3232 :         text_format_append_string(buf, qstr, flags, width);
    6278             :         /* quote_literal_cstr() always allocates a new string */
    6279        3232 :         pfree(qstr);
    6280             :     }
    6281             :     else
    6282       57200 :         text_format_append_string(buf, str, flags, width);
    6283             : 
    6284             :     /* Cleanup. */
    6285       65328 :     pfree(str);
    6286             : }
    6287             : 
    6288             : /*
    6289             :  * Append str to buf, padding as directed by flags/width
    6290             :  */
    6291             : static void
    6292       65664 : text_format_append_string(StringInfo buf, const char *str,
    6293             :                           int flags, int width)
    6294             : {
    6295       65664 :     bool        align_to_left = false;
    6296             :     int         len;
    6297             : 
    6298             :     /* fast path for typical easy case */
    6299       65664 :     if (width == 0)
    6300             :     {
    6301       65580 :         appendStringInfoString(buf, str);
    6302       65580 :         return;
    6303             :     }
    6304             : 
    6305          84 :     if (width < 0)
    6306             :     {
    6307             :         /* Negative width: implicit '-' flag, then take absolute value */
    6308           6 :         align_to_left = true;
    6309             :         /* -INT_MIN is undefined */
    6310           6 :         if (width <= INT_MIN)
    6311           0 :             ereport(ERROR,
    6312             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    6313             :                      errmsg("number is out of range")));
    6314           6 :         width = -width;
    6315             :     }
    6316          78 :     else if (flags & TEXT_FORMAT_FLAG_MINUS)
    6317          24 :         align_to_left = true;
    6318             : 
    6319          84 :     len = pg_mbstrlen(str);
    6320          84 :     if (align_to_left)
    6321             :     {
    6322             :         /* left justify */
    6323          30 :         appendStringInfoString(buf, str);
    6324          30 :         if (len < width)
    6325          30 :             appendStringInfoSpaces(buf, width - len);
    6326             :     }
    6327             :     else
    6328             :     {
    6329             :         /* right justify */
    6330          54 :         if (len < width)
    6331          54 :             appendStringInfoSpaces(buf, width - len);
    6332          54 :         appendStringInfoString(buf, str);
    6333             :     }
    6334             : }
    6335             : 
    6336             : /*
    6337             :  * text_format_nv - nonvariadic wrapper for text_format function.
    6338             :  *
    6339             :  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
    6340             :  * which checks that all built-in functions that share the implementing C
    6341             :  * function take the same number of arguments.
    6342             :  */
    6343             : Datum
    6344        3810 : text_format_nv(PG_FUNCTION_ARGS)
    6345             : {
    6346        3810 :     return text_format(fcinfo);
    6347             : }
    6348             : 
    6349             : /*
    6350             :  * Helper function for Levenshtein distance functions. Faster than memcmp(),
    6351             :  * for this use case.
    6352             :  */
    6353             : static inline bool
    6354           0 : rest_of_char_same(const char *s1, const char *s2, int len)
    6355             : {
    6356           0 :     while (len > 0)
    6357             :     {
    6358           0 :         len--;
    6359           0 :         if (s1[len] != s2[len])
    6360           0 :             return false;
    6361             :     }
    6362           0 :     return true;
    6363             : }
    6364             : 
    6365             : /* Expand each Levenshtein distance variant */
    6366             : #include "levenshtein.c"
    6367             : #define LEVENSHTEIN_LESS_EQUAL
    6368             : #include "levenshtein.c"
    6369             : 
    6370             : 
    6371             : /*
    6372             :  * The following *ClosestMatch() functions can be used to determine whether a
    6373             :  * user-provided string resembles any known valid values, which is useful for
    6374             :  * providing hints in log messages, among other things.  Use these functions
    6375             :  * like so:
    6376             :  *
    6377             :  *      initClosestMatch(&state, source_string, max_distance);
    6378             :  *
    6379             :  *      for (int i = 0; i < num_valid_strings; i++)
    6380             :  *          updateClosestMatch(&state, valid_strings[i]);
    6381             :  *
    6382             :  *      closestMatch = getClosestMatch(&state);
    6383             :  */
    6384             : 
    6385             : /*
    6386             :  * Initialize the given state with the source string and maximum Levenshtein
    6387             :  * distance to consider.
    6388             :  */
    6389             : void
    6390          60 : initClosestMatch(ClosestMatchState *state, const char *source, int max_d)
    6391             : {
    6392             :     Assert(state);
    6393             :     Assert(max_d >= 0);
    6394             : 
    6395          60 :     state->source = source;
    6396          60 :     state->min_d = -1;
    6397          60 :     state->max_d = max_d;
    6398          60 :     state->match = NULL;
    6399          60 : }
    6400             : 
    6401             : /*
    6402             :  * If the candidate string is a closer match than the current one saved (or
    6403             :  * there is no match saved), save it as the closest match.
    6404             :  *
    6405             :  * If the source or candidate string is NULL, empty, or too long, this function
    6406             :  * takes no action.  Likewise, if the Levenshtein distance exceeds the maximum
    6407             :  * allowed or more than half the characters are different, no action is taken.
    6408             :  */
    6409             : void
    6410         372 : updateClosestMatch(ClosestMatchState *state, const char *candidate)
    6411             : {
    6412             :     int         dist;
    6413             : 
    6414             :     Assert(state);
    6415             : 
    6416         372 :     if (state->source == NULL || state->source[0] == '\0' ||
    6417         372 :         candidate == NULL || candidate[0] == '\0')
    6418           0 :         return;
    6419             : 
    6420             :     /*
    6421             :      * To avoid ERROR-ing, we check the lengths here instead of setting
    6422             :      * 'trusted' to false in the call to varstr_levenshtein_less_equal().
    6423             :      */
    6424         372 :     if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN ||
    6425         372 :         strlen(candidate) > MAX_LEVENSHTEIN_STRLEN)
    6426           0 :         return;
    6427             : 
    6428         372 :     dist = varstr_levenshtein_less_equal(state->source, strlen(state->source),
    6429         372 :                                          candidate, strlen(candidate), 1, 1, 1,
    6430             :                                          state->max_d, true);
    6431         372 :     if (dist <= state->max_d &&
    6432          56 :         dist <= strlen(state->source) / 2 &&
    6433          14 :         (state->min_d == -1 || dist < state->min_d))
    6434             :     {
    6435          14 :         state->min_d = dist;
    6436          14 :         state->match = candidate;
    6437             :     }
    6438             : }
    6439             : 
    6440             : /*
    6441             :  * Return the closest match.  If no suitable candidates were provided via
    6442             :  * updateClosestMatch(), return NULL.
    6443             :  */
    6444             : const char *
    6445          60 : getClosestMatch(ClosestMatchState *state)
    6446             : {
    6447             :     Assert(state);
    6448             : 
    6449          60 :     return state->match;
    6450             : }
    6451             : 
    6452             : 
    6453             : /*
    6454             :  * Unicode support
    6455             :  */
    6456             : 
    6457             : static UnicodeNormalizationForm
    6458         210 : unicode_norm_form_from_string(const char *formstr)
    6459             : {
    6460         210 :     UnicodeNormalizationForm form = -1;
    6461             : 
    6462             :     /*
    6463             :      * Might as well check this while we're here.
    6464             :      */
    6465         210 :     if (GetDatabaseEncoding() != PG_UTF8)
    6466           0 :         ereport(ERROR,
    6467             :                 (errcode(ERRCODE_SYNTAX_ERROR),
    6468             :                  errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
    6469             : 
    6470         210 :     if (pg_strcasecmp(formstr, "NFC") == 0)
    6471          66 :         form = UNICODE_NFC;
    6472         144 :     else if (pg_strcasecmp(formstr, "NFD") == 0)
    6473          60 :         form = UNICODE_NFD;
    6474          84 :     else if (pg_strcasecmp(formstr, "NFKC") == 0)
    6475          36 :         form = UNICODE_NFKC;
    6476          48 :     else if (pg_strcasecmp(formstr, "NFKD") == 0)
    6477          36 :         form = UNICODE_NFKD;
    6478             :     else
    6479          12 :         ereport(ERROR,
    6480             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6481             :                  errmsg("invalid normalization form: %s", formstr)));
    6482             : 
    6483         198 :     return form;
    6484             : }
    6485             : 
    6486             : /*
    6487             :  * Returns version of Unicode used by Postgres in "major.minor" format (the
    6488             :  * same format as the Unicode version reported by ICU). The third component
    6489             :  * ("update version") never involves additions to the character repertoire and
    6490             :  * is unimportant for most purposes.
    6491             :  *
    6492             :  * See: https://unicode.org/versions/
    6493             :  */
    6494             : Datum
    6495           6 : unicode_version(PG_FUNCTION_ARGS)
    6496             : {
    6497           6 :     PG_RETURN_TEXT_P(cstring_to_text(PG_UNICODE_VERSION));
    6498             : }
    6499             : 
    6500             : /*
    6501             :  * Returns version of Unicode used by ICU, if enabled; otherwise NULL.
    6502             :  */
    6503             : Datum
    6504           2 : icu_unicode_version(PG_FUNCTION_ARGS)
    6505             : {
    6506             : #ifdef USE_ICU
    6507           2 :     PG_RETURN_TEXT_P(cstring_to_text(U_UNICODE_VERSION));
    6508             : #else
    6509             :     PG_RETURN_NULL();
    6510             : #endif
    6511             : }
    6512             : 
    6513             : /*
    6514             :  * Check whether the string contains only assigned Unicode code
    6515             :  * points. Requires that the database encoding is UTF-8.
    6516             :  */
    6517             : Datum
    6518          12 : unicode_assigned(PG_FUNCTION_ARGS)
    6519             : {
    6520          12 :     text       *input = PG_GETARG_TEXT_PP(0);
    6521             :     unsigned char *p;
    6522             :     int         size;
    6523             : 
    6524          12 :     if (GetDatabaseEncoding() != PG_UTF8)
    6525           0 :         ereport(ERROR,
    6526             :                 (errmsg("Unicode categorization can only be performed if server encoding is UTF8")));
    6527             : 
    6528             :     /* convert to pg_wchar */
    6529          12 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6530          12 :     p = (unsigned char *) VARDATA_ANY(input);
    6531          48 :     for (int i = 0; i < size; i++)
    6532             :     {
    6533          42 :         pg_wchar    uchar = utf8_to_unicode(p);
    6534          42 :         int         category = unicode_category(uchar);
    6535             : 
    6536          42 :         if (category == PG_U_UNASSIGNED)
    6537           6 :             PG_RETURN_BOOL(false);
    6538             : 
    6539          36 :         p += pg_utf_mblen(p);
    6540             :     }
    6541             : 
    6542           6 :     PG_RETURN_BOOL(true);
    6543             : }
    6544             : 
    6545             : Datum
    6546          72 : unicode_normalize_func(PG_FUNCTION_ARGS)
    6547             : {
    6548          72 :     text       *input = PG_GETARG_TEXT_PP(0);
    6549          72 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
    6550             :     UnicodeNormalizationForm form;
    6551             :     int         size;
    6552             :     pg_wchar   *input_chars;
    6553             :     pg_wchar   *output_chars;
    6554             :     unsigned char *p;
    6555             :     text       *result;
    6556             :     int         i;
    6557             : 
    6558          72 :     form = unicode_norm_form_from_string(formstr);
    6559             : 
    6560             :     /* convert to pg_wchar */
    6561          66 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6562          66 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
    6563          66 :     p = (unsigned char *) VARDATA_ANY(input);
    6564         288 :     for (i = 0; i < size; i++)
    6565             :     {
    6566         222 :         input_chars[i] = utf8_to_unicode(p);
    6567         222 :         p += pg_utf_mblen(p);
    6568             :     }
    6569          66 :     input_chars[i] = (pg_wchar) '\0';
    6570             :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
    6571             : 
    6572             :     /* action */
    6573          66 :     output_chars = unicode_normalize(form, input_chars);
    6574             : 
    6575             :     /* convert back to UTF-8 string */
    6576          66 :     size = 0;
    6577         306 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6578             :     {
    6579             :         unsigned char buf[4];
    6580             : 
    6581         240 :         unicode_to_utf8(*wp, buf);
    6582         240 :         size += pg_utf_mblen(buf);
    6583             :     }
    6584             : 
    6585          66 :     result = palloc(size + VARHDRSZ);
    6586          66 :     SET_VARSIZE(result, size + VARHDRSZ);
    6587             : 
    6588          66 :     p = (unsigned char *) VARDATA_ANY(result);
    6589         306 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6590             :     {
    6591         240 :         unicode_to_utf8(*wp, p);
    6592         240 :         p += pg_utf_mblen(p);
    6593             :     }
    6594             :     Assert((char *) p == (char *) result + size + VARHDRSZ);
    6595             : 
    6596          66 :     PG_RETURN_TEXT_P(result);
    6597             : }
    6598             : 
    6599             : /*
    6600             :  * Check whether the string is in the specified Unicode normalization form.
    6601             :  *
    6602             :  * This is done by converting the string to the specified normal form and then
    6603             :  * comparing that to the original string.  To speed that up, we also apply the
    6604             :  * "quick check" algorithm specified in UAX #15, which can give a yes or no
    6605             :  * answer for many strings by just scanning the string once.
    6606             :  *
    6607             :  * This function should generally be optimized for the case where the string
    6608             :  * is in fact normalized.  In that case, we'll end up looking at the entire
    6609             :  * string, so it's probably not worth doing any incremental conversion etc.
    6610             :  */
    6611             : Datum
    6612         138 : unicode_is_normalized(PG_FUNCTION_ARGS)
    6613             : {
    6614         138 :     text       *input = PG_GETARG_TEXT_PP(0);
    6615         138 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
    6616             :     UnicodeNormalizationForm form;
    6617             :     int         size;
    6618             :     pg_wchar   *input_chars;
    6619             :     pg_wchar   *output_chars;
    6620             :     unsigned char *p;
    6621             :     int         i;
    6622             :     UnicodeNormalizationQC quickcheck;
    6623             :     int         output_size;
    6624             :     bool        result;
    6625             : 
    6626         138 :     form = unicode_norm_form_from_string(formstr);
    6627             : 
    6628             :     /* convert to pg_wchar */
    6629         132 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6630         132 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
    6631         132 :     p = (unsigned char *) VARDATA_ANY(input);
    6632         504 :     for (i = 0; i < size; i++)
    6633             :     {
    6634         372 :         input_chars[i] = utf8_to_unicode(p);
    6635         372 :         p += pg_utf_mblen(p);
    6636             :     }
    6637         132 :     input_chars[i] = (pg_wchar) '\0';
    6638             :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
    6639             : 
    6640             :     /* quick check (see UAX #15) */
    6641         132 :     quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
    6642         132 :     if (quickcheck == UNICODE_NORM_QC_YES)
    6643          42 :         PG_RETURN_BOOL(true);
    6644          90 :     else if (quickcheck == UNICODE_NORM_QC_NO)
    6645          12 :         PG_RETURN_BOOL(false);
    6646             : 
    6647             :     /* normalize and compare with original */
    6648          78 :     output_chars = unicode_normalize(form, input_chars);
    6649             : 
    6650          78 :     output_size = 0;
    6651         324 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6652         246 :         output_size++;
    6653             : 
    6654         114 :     result = (size == output_size) &&
    6655          36 :         (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
    6656             : 
    6657          78 :     PG_RETURN_BOOL(result);
    6658             : }
    6659             : 
    6660             : /*
    6661             :  * Check if first n chars are hexadecimal digits
    6662             :  */
    6663             : static bool
    6664         156 : isxdigits_n(const char *instr, size_t n)
    6665             : {
    6666         660 :     for (size_t i = 0; i < n; i++)
    6667         570 :         if (!isxdigit((unsigned char) instr[i]))
    6668          66 :             return false;
    6669             : 
    6670          90 :     return true;
    6671             : }
    6672             : 
    6673             : static unsigned int
    6674         504 : hexval(unsigned char c)
    6675             : {
    6676         504 :     if (c >= '0' && c <= '9')
    6677         384 :         return c - '0';
    6678         120 :     if (c >= 'a' && c <= 'f')
    6679          60 :         return c - 'a' + 0xA;
    6680          60 :     if (c >= 'A' && c <= 'F')
    6681          60 :         return c - 'A' + 0xA;
    6682           0 :     elog(ERROR, "invalid hexadecimal digit");
    6683             :     return 0;                   /* not reached */
    6684             : }
    6685             : 
    6686             : /*
    6687             :  * Translate string with hexadecimal digits to number
    6688             :  */
    6689             : static unsigned int
    6690          90 : hexval_n(const char *instr, size_t n)
    6691             : {
    6692          90 :     unsigned int result = 0;
    6693             : 
    6694         594 :     for (size_t i = 0; i < n; i++)
    6695         504 :         result += hexval(instr[i]) << (4 * (n - i - 1));
    6696             : 
    6697          90 :     return result;
    6698             : }
    6699             : 
    6700             : /*
    6701             :  * Replaces Unicode escape sequences by Unicode characters
    6702             :  */
    6703             : Datum
    6704          66 : unistr(PG_FUNCTION_ARGS)
    6705             : {
    6706          66 :     text       *input_text = PG_GETARG_TEXT_PP(0);
    6707             :     char       *instr;
    6708             :     int         len;
    6709             :     StringInfoData str;
    6710             :     text       *result;
    6711          66 :     pg_wchar    pair_first = 0;
    6712             :     char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
    6713             : 
    6714          66 :     instr = VARDATA_ANY(input_text);
    6715          66 :     len = VARSIZE_ANY_EXHDR(input_text);
    6716             : 
    6717          66 :     initStringInfo(&str);
    6718             : 
    6719         510 :     while (len > 0)
    6720             :     {
    6721         486 :         if (instr[0] == '\\')
    6722             :         {
    6723         102 :             if (len >= 2 &&
    6724         102 :                 instr[1] == '\\')
    6725             :             {
    6726           6 :                 if (pair_first)
    6727           0 :                     goto invalid_pair;
    6728           6 :                 appendStringInfoChar(&str, '\\');
    6729           6 :                 instr += 2;
    6730           6 :                 len -= 2;
    6731             :             }
    6732          96 :             else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
    6733          66 :                      (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
    6734          30 :             {
    6735             :                 pg_wchar    unicode;
    6736          42 :                 int         offset = instr[1] == 'u' ? 2 : 1;
    6737             : 
    6738          42 :                 unicode = hexval_n(instr + offset, 4);
    6739             : 
    6740          42 :                 if (!is_valid_unicode_codepoint(unicode))
    6741           0 :                     ereport(ERROR,
    6742             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6743             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6744             : 
    6745          42 :                 if (pair_first)
    6746             :                 {
    6747          12 :                     if (is_utf16_surrogate_second(unicode))
    6748             :                     {
    6749           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6750           0 :                         pair_first = 0;
    6751             :                     }
    6752             :                     else
    6753          12 :                         goto invalid_pair;
    6754             :                 }
    6755          30 :                 else if (is_utf16_surrogate_second(unicode))
    6756           0 :                     goto invalid_pair;
    6757             : 
    6758          30 :                 if (is_utf16_surrogate_first(unicode))
    6759          18 :                     pair_first = unicode;
    6760             :                 else
    6761             :                 {
    6762          12 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6763          12 :                     appendStringInfoString(&str, cbuf);
    6764             :                 }
    6765             : 
    6766          30 :                 instr += 4 + offset;
    6767          30 :                 len -= 4 + offset;
    6768             :             }
    6769          54 :             else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
    6770          12 :             {
    6771             :                 pg_wchar    unicode;
    6772             : 
    6773          24 :                 unicode = hexval_n(instr + 2, 6);
    6774             : 
    6775          24 :                 if (!is_valid_unicode_codepoint(unicode))
    6776           6 :                     ereport(ERROR,
    6777             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6778             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6779             : 
    6780          18 :                 if (pair_first)
    6781             :                 {
    6782           6 :                     if (is_utf16_surrogate_second(unicode))
    6783             :                     {
    6784           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6785           0 :                         pair_first = 0;
    6786             :                     }
    6787             :                     else
    6788           6 :                         goto invalid_pair;
    6789             :                 }
    6790          12 :                 else if (is_utf16_surrogate_second(unicode))
    6791           0 :                     goto invalid_pair;
    6792             : 
    6793          12 :                 if (is_utf16_surrogate_first(unicode))
    6794           6 :                     pair_first = unicode;
    6795             :                 else
    6796             :                 {
    6797           6 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6798           6 :                     appendStringInfoString(&str, cbuf);
    6799             :                 }
    6800             : 
    6801          12 :                 instr += 8;
    6802          12 :                 len -= 8;
    6803             :             }
    6804          30 :             else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
    6805          12 :             {
    6806             :                 pg_wchar    unicode;
    6807             : 
    6808          24 :                 unicode = hexval_n(instr + 2, 8);
    6809             : 
    6810          24 :                 if (!is_valid_unicode_codepoint(unicode))
    6811           6 :                     ereport(ERROR,
    6812             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6813             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6814             : 
    6815          18 :                 if (pair_first)
    6816             :                 {
    6817           6 :                     if (is_utf16_surrogate_second(unicode))
    6818             :                     {
    6819           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6820           0 :                         pair_first = 0;
    6821             :                     }
    6822             :                     else
    6823           6 :                         goto invalid_pair;
    6824             :                 }
    6825          12 :                 else if (is_utf16_surrogate_second(unicode))
    6826           0 :                     goto invalid_pair;
    6827             : 
    6828          12 :                 if (is_utf16_surrogate_first(unicode))
    6829           6 :                     pair_first = unicode;
    6830             :                 else
    6831             :                 {
    6832           6 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6833           6 :                     appendStringInfoString(&str, cbuf);
    6834             :                 }
    6835             : 
    6836          12 :                 instr += 10;
    6837          12 :                 len -= 10;
    6838             :             }
    6839             :             else
    6840           6 :                 ereport(ERROR,
    6841             :                         (errcode(ERRCODE_SYNTAX_ERROR),
    6842             :                          errmsg("invalid Unicode escape"),
    6843             :                          errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX.")));
    6844             :         }
    6845             :         else
    6846             :         {
    6847         384 :             if (pair_first)
    6848           0 :                 goto invalid_pair;
    6849             : 
    6850         384 :             appendStringInfoChar(&str, *instr++);
    6851         384 :             len--;
    6852             :         }
    6853             :     }
    6854             : 
    6855             :     /* unfinished surrogate pair? */
    6856          24 :     if (pair_first)
    6857           6 :         goto invalid_pair;
    6858             : 
    6859          18 :     result = cstring_to_text_with_len(str.data, str.len);
    6860          18 :     pfree(str.data);
    6861             : 
    6862          18 :     PG_RETURN_TEXT_P(result);
    6863             : 
    6864          30 : invalid_pair:
    6865          30 :     ereport(ERROR,
    6866             :             (errcode(ERRCODE_SYNTAX_ERROR),
    6867             :              errmsg("invalid Unicode surrogate pair")));
    6868             :     PG_RETURN_NULL();           /* keep compiler quiet */
    6869             : }

Generated by: LCOV version 1.14