LCOV - code coverage report
Current view: top level - src/backend/utils/adt - varlena.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15beta1 Lines: 1846 2118 87.2 %
Date: 2022-05-18 03:10:05 Functions: 143 157 91.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * varlena.c
       4             :  *    Functions for the variable-length built-in types.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/utils/adt/varlena.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <ctype.h>
      18             : #include <limits.h>
      19             : 
      20             : #include "access/detoast.h"
      21             : #include "access/toast_compression.h"
      22             : #include "catalog/pg_collation.h"
      23             : #include "catalog/pg_type.h"
      24             : #include "common/hashfn.h"
      25             : #include "common/int.h"
      26             : #include "common/unicode_norm.h"
      27             : #include "funcapi.h"
      28             : #include "lib/hyperloglog.h"
      29             : #include "libpq/pqformat.h"
      30             : #include "miscadmin.h"
      31             : #include "nodes/execnodes.h"
      32             : #include "parser/scansup.h"
      33             : #include "port/pg_bswap.h"
      34             : #include "regex/regex.h"
      35             : #include "utils/builtins.h"
      36             : #include "utils/bytea.h"
      37             : #include "utils/lsyscache.h"
      38             : #include "utils/memutils.h"
      39             : #include "utils/pg_locale.h"
      40             : #include "utils/sortsupport.h"
      41             : #include "utils/varlena.h"
      42             : 
      43             : 
      44             : /* GUC variable */
      45             : int         bytea_output = BYTEA_OUTPUT_HEX;
      46             : 
      47             : typedef struct varlena unknown;
      48             : typedef struct varlena VarString;
      49             : 
      50             : /*
      51             :  * State for text_position_* functions.
      52             :  */
      53             : typedef struct
      54             : {
      55             :     bool        is_multibyte_char_in_char;  /* need to check char boundaries? */
      56             : 
      57             :     char       *str1;           /* haystack string */
      58             :     char       *str2;           /* needle string */
      59             :     int         len1;           /* string lengths in bytes */
      60             :     int         len2;
      61             : 
      62             :     /* Skip table for Boyer-Moore-Horspool search algorithm: */
      63             :     int         skiptablemask;  /* mask for ANDing with skiptable subscripts */
      64             :     int         skiptable[256]; /* skip distance for given mismatched char */
      65             : 
      66             :     char       *last_match;     /* pointer to last match in 'str1' */
      67             : 
      68             :     /*
      69             :      * Sometimes we need to convert the byte position of a match to a
      70             :      * character position.  These store the last position that was converted,
      71             :      * so that on the next call, we can continue from that point, rather than
      72             :      * count characters from the very beginning.
      73             :      */
      74             :     char       *refpoint;       /* pointer within original haystack string */
      75             :     int         refpos;         /* 0-based character offset of the same point */
      76             : } TextPositionState;
      77             : 
      78             : typedef struct
      79             : {
      80             :     char       *buf1;           /* 1st string, or abbreviation original string
      81             :                                  * buf */
      82             :     char       *buf2;           /* 2nd string, or abbreviation strxfrm() buf */
      83             :     int         buflen1;
      84             :     int         buflen2;
      85             :     int         last_len1;      /* Length of last buf1 string/strxfrm() input */
      86             :     int         last_len2;      /* Length of last buf2 string/strxfrm() blob */
      87             :     int         last_returned;  /* Last comparison result (cache) */
      88             :     bool        cache_blob;     /* Does buf2 contain strxfrm() blob, etc? */
      89             :     bool        collate_c;
      90             :     Oid         typid;          /* Actual datatype (text/bpchar/bytea/name) */
      91             :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
      92             :     hyperLogLogState full_card; /* Full key cardinality state */
      93             :     double      prop_card;      /* Required cardinality proportion */
      94             :     pg_locale_t locale;
      95             : } VarStringSortSupport;
      96             : 
      97             : /*
      98             :  * Output data for split_text(): we output either to an array or a table.
      99             :  * tupstore and tupdesc must be set up in advance to output to a table.
     100             :  */
     101             : typedef struct
     102             : {
     103             :     ArrayBuildState *astate;
     104             :     Tuplestorestate *tupstore;
     105             :     TupleDesc   tupdesc;
     106             : } SplitTextOutputData;
     107             : 
     108             : /*
     109             :  * This should be large enough that most strings will fit, but small enough
     110             :  * that we feel comfortable putting it on the stack
     111             :  */
     112             : #define TEXTBUFLEN      1024
     113             : 
     114             : #define DatumGetUnknownP(X)         ((unknown *) PG_DETOAST_DATUM(X))
     115             : #define DatumGetUnknownPCopy(X)     ((unknown *) PG_DETOAST_DATUM_COPY(X))
     116             : #define PG_GETARG_UNKNOWN_P(n)      DatumGetUnknownP(PG_GETARG_DATUM(n))
     117             : #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
     118             : #define PG_RETURN_UNKNOWN_P(x)      PG_RETURN_POINTER(x)
     119             : 
     120             : #define DatumGetVarStringP(X)       ((VarString *) PG_DETOAST_DATUM(X))
     121             : #define DatumGetVarStringPP(X)      ((VarString *) PG_DETOAST_DATUM_PACKED(X))
     122             : 
     123             : static int  varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
     124             : static int  bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
     125             : static int  namefastcmp_c(Datum x, Datum y, SortSupport ssup);
     126             : static int  varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
     127             : static int  namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
     128             : static int  varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
     129             : static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
     130             : static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
     131             : static int32 text_length(Datum str);
     132             : static text *text_catenate(text *t1, text *t2);
     133             : static text *text_substring(Datum str,
     134             :                             int32 start,
     135             :                             int32 length,
     136             :                             bool length_not_specified);
     137             : static text *text_overlay(text *t1, text *t2, int sp, int sl);
     138             : static int  text_position(text *t1, text *t2, Oid collid);
     139             : static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
     140             : static bool text_position_next(TextPositionState *state);
     141             : static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
     142             : static char *text_position_get_match_ptr(TextPositionState *state);
     143             : static int  text_position_get_match_pos(TextPositionState *state);
     144             : static void text_position_cleanup(TextPositionState *state);
     145             : static void check_collation_set(Oid collid);
     146             : static int  text_cmp(text *arg1, text *arg2, Oid collid);
     147             : static bytea *bytea_catenate(bytea *t1, bytea *t2);
     148             : static bytea *bytea_substring(Datum str,
     149             :                               int S,
     150             :                               int L,
     151             :                               bool length_not_specified);
     152             : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
     153             : static void appendStringInfoText(StringInfo str, const text *t);
     154             : static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
     155             : static void split_text_accum_result(SplitTextOutputData *tstate,
     156             :                                     text *field_value,
     157             :                                     text *null_string,
     158             :                                     Oid collation);
     159             : static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
     160             :                                     const char *fldsep, const char *null_string);
     161             : static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
     162             : static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
     163             :                                      int *value);
     164             : static const char *text_format_parse_format(const char *start_ptr,
     165             :                                             const char *end_ptr,
     166             :                                             int *argpos, int *widthpos,
     167             :                                             int *flags, int *width);
     168             : static void text_format_string_conversion(StringInfo buf, char conversion,
     169             :                                           FmgrInfo *typOutputInfo,
     170             :                                           Datum value, bool isNull,
     171             :                                           int flags, int width);
     172             : static void text_format_append_string(StringInfo buf, const char *str,
     173             :                                       int flags, int width);
     174             : 
     175             : 
     176             : /*****************************************************************************
     177             :  *   CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                          *
     178             :  *****************************************************************************/
     179             : 
     180             : /*
     181             :  * cstring_to_text
     182             :  *
     183             :  * Create a text value from a null-terminated C string.
     184             :  *
     185             :  * The new text value is freshly palloc'd with a full-size VARHDR.
     186             :  */
     187             : text *
     188    21610340 : cstring_to_text(const char *s)
     189             : {
     190    21610340 :     return cstring_to_text_with_len(s, strlen(s));
     191             : }
     192             : 
     193             : /*
     194             :  * cstring_to_text_with_len
     195             :  *
     196             :  * Same as cstring_to_text except the caller specifies the string length;
     197             :  * the string need not be null_terminated.
     198             :  */
     199             : text *
     200    26124082 : cstring_to_text_with_len(const char *s, int len)
     201             : {
     202    26124082 :     text       *result = (text *) palloc(len + VARHDRSZ);
     203             : 
     204    26124082 :     SET_VARSIZE(result, len + VARHDRSZ);
     205    26124082 :     memcpy(VARDATA(result), s, len);
     206             : 
     207    26124082 :     return result;
     208             : }
     209             : 
     210             : /*
     211             :  * text_to_cstring
     212             :  *
     213             :  * Create a palloc'd, null-terminated C string from a text value.
     214             :  *
     215             :  * We support being passed a compressed or toasted text value.
     216             :  * This is a bit bogus since such values shouldn't really be referred to as
     217             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     218             :  * case here, we'd need another routine that did, anyway.
     219             :  */
     220             : char *
     221    13649410 : text_to_cstring(const text *t)
     222             : {
     223             :     /* must cast away the const, unfortunately */
     224    13649410 :     text       *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
     225    13649410 :     int         len = VARSIZE_ANY_EXHDR(tunpacked);
     226             :     char       *result;
     227             : 
     228    13649410 :     result = (char *) palloc(len + 1);
     229    13649410 :     memcpy(result, VARDATA_ANY(tunpacked), len);
     230    13649410 :     result[len] = '\0';
     231             : 
     232    13649410 :     if (tunpacked != t)
     233      119696 :         pfree(tunpacked);
     234             : 
     235    13649410 :     return result;
     236             : }
     237             : 
     238             : /*
     239             :  * text_to_cstring_buffer
     240             :  *
     241             :  * Copy a text value into a caller-supplied buffer of size dst_len.
     242             :  *
     243             :  * The text string is truncated if necessary to fit.  The result is
     244             :  * guaranteed null-terminated (unless dst_len == 0).
     245             :  *
     246             :  * We support being passed a compressed or toasted text value.
     247             :  * This is a bit bogus since such values shouldn't really be referred to as
     248             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     249             :  * case here, we'd need another routine that did, anyway.
     250             :  */
     251             : void
     252         622 : text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
     253             : {
     254             :     /* must cast away the const, unfortunately */
     255         622 :     text       *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
     256         622 :     size_t      src_len = VARSIZE_ANY_EXHDR(srcunpacked);
     257             : 
     258         622 :     if (dst_len > 0)
     259             :     {
     260         622 :         dst_len--;
     261         622 :         if (dst_len >= src_len)
     262         622 :             dst_len = src_len;
     263             :         else                    /* ensure truncation is encoding-safe */
     264           0 :             dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
     265         622 :         memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
     266         622 :         dst[dst_len] = '\0';
     267             :     }
     268             : 
     269         622 :     if (srcunpacked != src)
     270           0 :         pfree(srcunpacked);
     271         622 : }
     272             : 
     273             : 
     274             : /*****************************************************************************
     275             :  *   USER I/O ROUTINES                                                       *
     276             :  *****************************************************************************/
     277             : 
     278             : 
     279             : #define VAL(CH)         ((CH) - '0')
     280             : #define DIG(VAL)        ((VAL) + '0')
     281             : 
     282             : /*
     283             :  *      byteain         - converts from printable representation of byte array
     284             :  *
     285             :  *      Non-printable characters must be passed as '\nnn' (octal) and are
     286             :  *      converted to internal form.  '\' must be passed as '\\'.
     287             :  *      ereport(ERROR, ...) if bad form.
     288             :  *
     289             :  *      BUGS:
     290             :  *              The input is scanned twice.
     291             :  *              The error checking of input is minimal.
     292             :  */
     293             : Datum
     294       19118 : byteain(PG_FUNCTION_ARGS)
     295             : {
     296       19118 :     char       *inputText = PG_GETARG_CSTRING(0);
     297             :     char       *tp;
     298             :     char       *rp;
     299             :     int         bc;
     300             :     bytea      *result;
     301             : 
     302             :     /* Recognize hex input */
     303       19118 :     if (inputText[0] == '\\' && inputText[1] == 'x')
     304             :     {
     305         792 :         size_t      len = strlen(inputText);
     306             : 
     307         792 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
     308         792 :         result = palloc(bc);
     309         792 :         bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
     310         780 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
     311             : 
     312         780 :         PG_RETURN_BYTEA_P(result);
     313             :     }
     314             : 
     315             :     /* Else, it's the traditional escaped style */
     316      298968 :     for (bc = 0, tp = inputText; *tp != '\0'; bc++)
     317             :     {
     318      280648 :         if (tp[0] != '\\')
     319      279640 :             tp++;
     320        1008 :         else if ((tp[0] == '\\') &&
     321        1008 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     322        1002 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     323        1002 :                  (tp[3] >= '0' && tp[3] <= '7'))
     324        1002 :             tp += 4;
     325           6 :         else if ((tp[0] == '\\') &&
     326           6 :                  (tp[1] == '\\'))
     327           0 :             tp += 2;
     328             :         else
     329             :         {
     330             :             /*
     331             :              * one backslash, not followed by another or ### valid octal
     332             :              */
     333           6 :             ereport(ERROR,
     334             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     335             :                      errmsg("invalid input syntax for type %s", "bytea")));
     336             :         }
     337             :     }
     338             : 
     339       18320 :     bc += VARHDRSZ;
     340             : 
     341       18320 :     result = (bytea *) palloc(bc);
     342       18320 :     SET_VARSIZE(result, bc);
     343             : 
     344       18320 :     tp = inputText;
     345       18320 :     rp = VARDATA(result);
     346      298950 :     while (*tp != '\0')
     347             :     {
     348      280630 :         if (tp[0] != '\\')
     349      279628 :             *rp++ = *tp++;
     350        1002 :         else if ((tp[0] == '\\') &&
     351        1002 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     352        1002 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     353        1002 :                  (tp[3] >= '0' && tp[3] <= '7'))
     354             :         {
     355        1002 :             bc = VAL(tp[1]);
     356        1002 :             bc <<= 3;
     357        1002 :             bc += VAL(tp[2]);
     358        1002 :             bc <<= 3;
     359        1002 :             *rp++ = bc + VAL(tp[3]);
     360             : 
     361        1002 :             tp += 4;
     362             :         }
     363           0 :         else if ((tp[0] == '\\') &&
     364           0 :                  (tp[1] == '\\'))
     365             :         {
     366           0 :             *rp++ = '\\';
     367           0 :             tp += 2;
     368             :         }
     369             :         else
     370             :         {
     371             :             /*
     372             :              * We should never get here. The first pass should not allow it.
     373             :              */
     374           0 :             ereport(ERROR,
     375             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     376             :                      errmsg("invalid input syntax for type %s", "bytea")));
     377             :         }
     378             :     }
     379             : 
     380       18320 :     PG_RETURN_BYTEA_P(result);
     381             : }
     382             : 
     383             : /*
     384             :  *      byteaout        - converts to printable representation of byte array
     385             :  *
     386             :  *      In the traditional escaped format, non-printable characters are
     387             :  *      printed as '\nnn' (octal) and '\' as '\\'.
     388             :  */
     389             : Datum
     390       13360 : byteaout(PG_FUNCTION_ARGS)
     391             : {
     392       13360 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
     393             :     char       *result;
     394             :     char       *rp;
     395             : 
     396       13360 :     if (bytea_output == BYTEA_OUTPUT_HEX)
     397             :     {
     398             :         /* Print hex format */
     399       13096 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
     400       13096 :         *rp++ = '\\';
     401       13096 :         *rp++ = 'x';
     402       13096 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
     403             :     }
     404         264 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
     405             :     {
     406             :         /* Print traditional escaped format */
     407             :         char       *vp;
     408             :         uint64      len;
     409             :         int         i;
     410             : 
     411         264 :         len = 1;                /* empty string has 1 char */
     412         264 :         vp = VARDATA_ANY(vlena);
     413        2608 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     414             :         {
     415        2344 :             if (*vp == '\\')
     416           0 :                 len += 2;
     417        2344 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     418         498 :                 len += 4;
     419             :             else
     420        1846 :                 len++;
     421             :         }
     422             : 
     423             :         /*
     424             :          * In principle len can't overflow uint32 if the input fit in 1GB, but
     425             :          * for safety let's check rather than relying on palloc's internal
     426             :          * check.
     427             :          */
     428         264 :         if (len > MaxAllocSize)
     429           0 :             ereport(ERROR,
     430             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     431             :                      errmsg_internal("result of bytea output conversion is too large")));
     432         264 :         rp = result = (char *) palloc(len);
     433             : 
     434         264 :         vp = VARDATA_ANY(vlena);
     435        2608 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     436             :         {
     437        2344 :             if (*vp == '\\')
     438             :             {
     439           0 :                 *rp++ = '\\';
     440           0 :                 *rp++ = '\\';
     441             :             }
     442        2344 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     443         498 :             {
     444             :                 int         val;    /* holds unprintable chars */
     445             : 
     446         498 :                 val = *vp;
     447         498 :                 rp[0] = '\\';
     448         498 :                 rp[3] = DIG(val & 07);
     449         498 :                 val >>= 3;
     450         498 :                 rp[2] = DIG(val & 07);
     451         498 :                 val >>= 3;
     452         498 :                 rp[1] = DIG(val & 03);
     453         498 :                 rp += 4;
     454             :             }
     455             :             else
     456        1846 :                 *rp++ = *vp;
     457             :         }
     458             :     }
     459             :     else
     460             :     {
     461           0 :         elog(ERROR, "unrecognized bytea_output setting: %d",
     462             :              bytea_output);
     463             :         rp = result = NULL;     /* keep compiler quiet */
     464             :     }
     465       13360 :     *rp = '\0';
     466       13360 :     PG_RETURN_CSTRING(result);
     467             : }
     468             : 
     469             : /*
     470             :  *      bytearecv           - converts external binary format to bytea
     471             :  */
     472             : Datum
     473        1038 : bytearecv(PG_FUNCTION_ARGS)
     474             : {
     475        1038 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     476             :     bytea      *result;
     477             :     int         nbytes;
     478             : 
     479        1038 :     nbytes = buf->len - buf->cursor;
     480        1038 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
     481        1038 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
     482        1038 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
     483        1038 :     PG_RETURN_BYTEA_P(result);
     484             : }
     485             : 
     486             : /*
     487             :  *      byteasend           - converts bytea to binary format
     488             :  *
     489             :  * This is a special case: just copy the input...
     490             :  */
     491             : Datum
     492        5516 : byteasend(PG_FUNCTION_ARGS)
     493             : {
     494        5516 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
     495             : 
     496        5516 :     PG_RETURN_BYTEA_P(vlena);
     497             : }
     498             : 
     499             : Datum
     500       32774 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
     501             : {
     502             :     StringInfo  state;
     503             : 
     504       32774 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     505             : 
     506             :     /* Append the value unless null. */
     507       32774 :     if (!PG_ARGISNULL(1))
     508             :     {
     509       32774 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
     510             : 
     511             :         /* On the first time through, we ignore the delimiter. */
     512       32774 :         if (state == NULL)
     513          26 :             state = makeStringAggState(fcinfo);
     514       32748 :         else if (!PG_ARGISNULL(2))
     515             :         {
     516       32742 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
     517             : 
     518       32742 :             appendBinaryStringInfo(state, VARDATA_ANY(delim), VARSIZE_ANY_EXHDR(delim));
     519             :         }
     520             : 
     521       32774 :         appendBinaryStringInfo(state, VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value));
     522             :     }
     523             : 
     524             :     /*
     525             :      * The transition type for string_agg() is declared to be "internal",
     526             :      * which is a pass-by-value type the same size as a pointer.
     527             :      */
     528       32774 :     PG_RETURN_POINTER(state);
     529             : }
     530             : 
     531             : Datum
     532          32 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
     533             : {
     534             :     StringInfo  state;
     535             : 
     536             :     /* cannot be called directly because of internal-type argument */
     537             :     Assert(AggCheckCallContext(fcinfo, NULL));
     538             : 
     539          32 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     540             : 
     541          32 :     if (state != NULL)
     542             :     {
     543             :         bytea      *result;
     544             : 
     545          26 :         result = (bytea *) palloc(state->len + VARHDRSZ);
     546          26 :         SET_VARSIZE(result, state->len + VARHDRSZ);
     547          26 :         memcpy(VARDATA(result), state->data, state->len);
     548          26 :         PG_RETURN_BYTEA_P(result);
     549             :     }
     550             :     else
     551           6 :         PG_RETURN_NULL();
     552             : }
     553             : 
     554             : /*
     555             :  *      textin          - converts "..." to internal representation
     556             :  */
     557             : Datum
     558    17761384 : textin(PG_FUNCTION_ARGS)
     559             : {
     560    17761384 :     char       *inputText = PG_GETARG_CSTRING(0);
     561             : 
     562    17761384 :     PG_RETURN_TEXT_P(cstring_to_text(inputText));
     563             : }
     564             : 
     565             : /*
     566             :  *      textout         - converts internal representation to "..."
     567             :  */
     568             : Datum
     569     8036942 : textout(PG_FUNCTION_ARGS)
     570             : {
     571     8036942 :     Datum       txt = PG_GETARG_DATUM(0);
     572             : 
     573     8036942 :     PG_RETURN_CSTRING(TextDatumGetCString(txt));
     574             : }
     575             : 
     576             : /*
     577             :  *      textrecv            - converts external binary format to text
     578             :  */
     579             : Datum
     580       53372 : textrecv(PG_FUNCTION_ARGS)
     581             : {
     582       53372 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     583             :     text       *result;
     584             :     char       *str;
     585             :     int         nbytes;
     586             : 
     587       53372 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     588             : 
     589       53372 :     result = cstring_to_text_with_len(str, nbytes);
     590       53372 :     pfree(str);
     591       53372 :     PG_RETURN_TEXT_P(result);
     592             : }
     593             : 
     594             : /*
     595             :  *      textsend            - converts text to binary format
     596             :  */
     597             : Datum
     598       36540 : textsend(PG_FUNCTION_ARGS)
     599             : {
     600       36540 :     text       *t = PG_GETARG_TEXT_PP(0);
     601             :     StringInfoData buf;
     602             : 
     603       36540 :     pq_begintypsend(&buf);
     604       36540 :     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
     605       36540 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     606             : }
     607             : 
     608             : 
     609             : /*
     610             :  *      unknownin           - converts "..." to internal representation
     611             :  */
     612             : Datum
     613           0 : unknownin(PG_FUNCTION_ARGS)
     614             : {
     615           0 :     char       *str = PG_GETARG_CSTRING(0);
     616             : 
     617             :     /* representation is same as cstring */
     618           0 :     PG_RETURN_CSTRING(pstrdup(str));
     619             : }
     620             : 
     621             : /*
     622             :  *      unknownout          - converts internal representation to "..."
     623             :  */
     624             : Datum
     625         678 : unknownout(PG_FUNCTION_ARGS)
     626             : {
     627             :     /* representation is same as cstring */
     628         678 :     char       *str = PG_GETARG_CSTRING(0);
     629             : 
     630         678 :     PG_RETURN_CSTRING(pstrdup(str));
     631             : }
     632             : 
     633             : /*
     634             :  *      unknownrecv         - converts external binary format to unknown
     635             :  */
     636             : Datum
     637           0 : unknownrecv(PG_FUNCTION_ARGS)
     638             : {
     639           0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     640             :     char       *str;
     641             :     int         nbytes;
     642             : 
     643           0 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     644             :     /* representation is same as cstring */
     645           0 :     PG_RETURN_CSTRING(str);
     646             : }
     647             : 
     648             : /*
     649             :  *      unknownsend         - converts unknown to binary format
     650             :  */
     651             : Datum
     652           0 : unknownsend(PG_FUNCTION_ARGS)
     653             : {
     654             :     /* representation is same as cstring */
     655           0 :     char       *str = PG_GETARG_CSTRING(0);
     656             :     StringInfoData buf;
     657             : 
     658           0 :     pq_begintypsend(&buf);
     659           0 :     pq_sendtext(&buf, str, strlen(str));
     660           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     661             : }
     662             : 
     663             : 
     664             : /* ========== PUBLIC ROUTINES ========== */
     665             : 
     666             : /*
     667             :  * textlen -
     668             :  *    returns the logical length of a text*
     669             :  *     (which is less than the VARSIZE of the text*)
     670             :  */
     671             : Datum
     672      430414 : textlen(PG_FUNCTION_ARGS)
     673             : {
     674      430414 :     Datum       str = PG_GETARG_DATUM(0);
     675             : 
     676             :     /* try to avoid decompressing argument */
     677      430414 :     PG_RETURN_INT32(text_length(str));
     678             : }
     679             : 
     680             : /*
     681             :  * text_length -
     682             :  *  Does the real work for textlen()
     683             :  *
     684             :  *  This is broken out so it can be called directly by other string processing
     685             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     686             :  *  it may still be in compressed form.  We can avoid decompressing it at all
     687             :  *  in some cases.
     688             :  */
     689             : static int32
     690      430426 : text_length(Datum str)
     691             : {
     692             :     /* fastpath when max encoding length is one */
     693      430426 :     if (pg_database_encoding_max_length() == 1)
     694          32 :         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     695             :     else
     696             :     {
     697      430394 :         text       *t = DatumGetTextPP(str);
     698             : 
     699      430394 :         PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
     700             :                                              VARSIZE_ANY_EXHDR(t)));
     701             :     }
     702             : }
     703             : 
     704             : /*
     705             :  * textoctetlen -
     706             :  *    returns the physical length of a text*
     707             :  *     (which is less than the VARSIZE of the text*)
     708             :  */
     709             : Datum
     710          70 : textoctetlen(PG_FUNCTION_ARGS)
     711             : {
     712          70 :     Datum       str = PG_GETARG_DATUM(0);
     713             : 
     714             :     /* We need not detoast the input at all */
     715          70 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     716             : }
     717             : 
     718             : /*
     719             :  * textcat -
     720             :  *    takes two text* and returns a text* that is the concatenation of
     721             :  *    the two.
     722             :  *
     723             :  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
     724             :  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
     725             :  * Allocate space for output in all cases.
     726             :  * XXX - thomas 1997-07-10
     727             :  */
     728             : Datum
     729     2571020 : textcat(PG_FUNCTION_ARGS)
     730             : {
     731     2571020 :     text       *t1 = PG_GETARG_TEXT_PP(0);
     732     2571020 :     text       *t2 = PG_GETARG_TEXT_PP(1);
     733             : 
     734     2571020 :     PG_RETURN_TEXT_P(text_catenate(t1, t2));
     735             : }
     736             : 
     737             : /*
     738             :  * text_catenate
     739             :  *  Guts of textcat(), broken out so it can be used by other functions
     740             :  *
     741             :  * Arguments can be in short-header form, but not compressed or out-of-line
     742             :  */
     743             : static text *
     744     2571100 : text_catenate(text *t1, text *t2)
     745             : {
     746             :     text       *result;
     747             :     int         len1,
     748             :                 len2,
     749             :                 len;
     750             :     char       *ptr;
     751             : 
     752     2571100 :     len1 = VARSIZE_ANY_EXHDR(t1);
     753     2571100 :     len2 = VARSIZE_ANY_EXHDR(t2);
     754             : 
     755             :     /* paranoia ... probably should throw error instead? */
     756     2571100 :     if (len1 < 0)
     757           0 :         len1 = 0;
     758     2571100 :     if (len2 < 0)
     759           0 :         len2 = 0;
     760             : 
     761     2571100 :     len = len1 + len2 + VARHDRSZ;
     762     2571100 :     result = (text *) palloc(len);
     763             : 
     764             :     /* Set size of result string... */
     765     2571100 :     SET_VARSIZE(result, len);
     766             : 
     767             :     /* Fill data field of result string... */
     768     2571100 :     ptr = VARDATA(result);
     769     2571100 :     if (len1 > 0)
     770     2567892 :         memcpy(ptr, VARDATA_ANY(t1), len1);
     771     2571100 :     if (len2 > 0)
     772     2570878 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
     773             : 
     774     2571100 :     return result;
     775             : }
     776             : 
     777             : /*
     778             :  * charlen_to_bytelen()
     779             :  *  Compute the number of bytes occupied by n characters starting at *p
     780             :  *
     781             :  * It is caller's responsibility that there actually are n characters;
     782             :  * the string need not be null-terminated.
     783             :  */
     784             : static int
     785        9620 : charlen_to_bytelen(const char *p, int n)
     786             : {
     787        9620 :     if (pg_database_encoding_max_length() == 1)
     788             :     {
     789             :         /* Optimization for single-byte encodings */
     790           0 :         return n;
     791             :     }
     792             :     else
     793             :     {
     794             :         const char *s;
     795             : 
     796     5874612 :         for (s = p; n > 0; n--)
     797     5864992 :             s += pg_mblen(s);
     798             : 
     799        9620 :         return s - p;
     800             :     }
     801             : }
     802             : 
     803             : /*
     804             :  * text_substr()
     805             :  * Return a substring starting at the specified position.
     806             :  * - thomas 1997-12-31
     807             :  *
     808             :  * Input:
     809             :  *  - string
     810             :  *  - starting position (is one-based)
     811             :  *  - string length
     812             :  *
     813             :  * If the starting position is zero or less, then return from the start of the string
     814             :  *  adjusting the length to be consistent with the "negative start" per SQL.
     815             :  * If the length is less than zero, return the remaining string.
     816             :  *
     817             :  * Added multibyte support.
     818             :  * - Tatsuo Ishii 1998-4-21
     819             :  * Changed behavior if starting position is less than one to conform to SQL behavior.
     820             :  * Formerly returned the entire string; now returns a portion.
     821             :  * - Thomas Lockhart 1998-12-10
     822             :  * Now uses faster TOAST-slicing interface
     823             :  * - John Gray 2002-02-22
     824             :  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
     825             :  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
     826             :  * error; if E < 1, return '', not entire string). Fixed MB related bug when
     827             :  * S > LC and < LC + 4 sometimes garbage characters are returned.
     828             :  * - Joe Conway 2002-08-10
     829             :  */
     830             : Datum
     831      474808 : text_substr(PG_FUNCTION_ARGS)
     832             : {
     833      474808 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     834             :                                     PG_GETARG_INT32(1),
     835             :                                     PG_GETARG_INT32(2),
     836             :                                     false));
     837             : }
     838             : 
     839             : /*
     840             :  * text_substr_no_len -
     841             :  *    Wrapper to avoid opr_sanity failure due to
     842             :  *    one function accepting a different number of args.
     843             :  */
     844             : Datum
     845          72 : text_substr_no_len(PG_FUNCTION_ARGS)
     846             : {
     847          72 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     848             :                                     PG_GETARG_INT32(1),
     849             :                                     -1, true));
     850             : }
     851             : 
     852             : /*
     853             :  * text_substring -
     854             :  *  Does the real work for text_substr() and text_substr_no_len()
     855             :  *
     856             :  *  This is broken out so it can be called directly by other string processing
     857             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     858             :  *  it may still be in compressed/toasted form.  We can avoid detoasting all
     859             :  *  of it in some cases.
     860             :  *
     861             :  *  The result is always a freshly palloc'd datum.
     862             :  */
     863             : static text *
     864      514728 : text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
     865             : {
     866      514728 :     int32       eml = pg_database_encoding_max_length();
     867      514728 :     int32       S = start;      /* start position */
     868             :     int32       S1;             /* adjusted start position */
     869             :     int32       L1;             /* adjusted substring length */
     870             :     int32       E;              /* end position */
     871             : 
     872             :     /*
     873             :      * SQL99 says S can be zero or negative, but we still must fetch from the
     874             :      * start of the string.
     875             :      */
     876      514728 :     S1 = Max(S, 1);
     877             : 
     878             :     /* life is easy if the encoding max length is 1 */
     879      514728 :     if (eml == 1)
     880             :     {
     881          16 :         if (length_not_specified)   /* special case - get length to end of
     882             :                                      * string */
     883           0 :             L1 = -1;
     884          16 :         else if (length < 0)
     885             :         {
     886             :             /* SQL99 says to throw an error for E < S, i.e., negative length */
     887           0 :             ereport(ERROR,
     888             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
     889             :                      errmsg("negative substring length not allowed")));
     890             :             L1 = -1;            /* silence stupider compilers */
     891             :         }
     892          16 :         else if (pg_add_s32_overflow(S, length, &E))
     893             :         {
     894             :             /*
     895             :              * L could be large enough for S + L to overflow, in which case
     896             :              * the substring must run to end of string.
     897             :              */
     898           0 :             L1 = -1;
     899             :         }
     900             :         else
     901             :         {
     902             :             /*
     903             :              * A zero or negative value for the end position can happen if the
     904             :              * start was negative or one. SQL99 says to return a zero-length
     905             :              * string.
     906             :              */
     907          16 :             if (E < 1)
     908           0 :                 return cstring_to_text("");
     909             : 
     910          16 :             L1 = E - S1;
     911             :         }
     912             : 
     913             :         /*
     914             :          * If the start position is past the end of the string, SQL99 says to
     915             :          * return a zero-length string -- DatumGetTextPSlice() will do that
     916             :          * for us.  We need only convert S1 to zero-based starting position.
     917             :          */
     918          16 :         return DatumGetTextPSlice(str, S1 - 1, L1);
     919             :     }
     920      514712 :     else if (eml > 1)
     921             :     {
     922             :         /*
     923             :          * When encoding max length is > 1, we can't get LC without
     924             :          * detoasting, so we'll grab a conservatively large slice now and go
     925             :          * back later to do the right thing
     926             :          */
     927             :         int32       slice_start;
     928             :         int32       slice_size;
     929             :         int32       slice_strlen;
     930             :         text       *slice;
     931             :         int32       E1;
     932             :         int32       i;
     933             :         char       *p;
     934             :         char       *s;
     935             :         text       *ret;
     936             : 
     937             :         /*
     938             :          * We need to start at position zero because there is no way to know
     939             :          * in advance which byte offset corresponds to the supplied start
     940             :          * position.
     941             :          */
     942      514712 :         slice_start = 0;
     943             : 
     944      514712 :         if (length_not_specified)   /* special case - get length to end of
     945             :                                      * string */
     946         112 :             slice_size = L1 = -1;
     947      514600 :         else if (length < 0)
     948             :         {
     949             :             /* SQL99 says to throw an error for E < S, i.e., negative length */
     950          12 :             ereport(ERROR,
     951             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
     952             :                      errmsg("negative substring length not allowed")));
     953             :             slice_size = L1 = -1;   /* silence stupider compilers */
     954             :         }
     955      514588 :         else if (pg_add_s32_overflow(S, length, &E))
     956             :         {
     957             :             /*
     958             :              * L could be large enough for S + L to overflow, in which case
     959             :              * the substring must run to end of string.
     960             :              */
     961           6 :             slice_size = L1 = -1;
     962             :         }
     963             :         else
     964             :         {
     965             :             /*
     966             :              * A zero or negative value for the end position can happen if the
     967             :              * start was negative or one. SQL99 says to return a zero-length
     968             :              * string.
     969             :              */
     970      514582 :             if (E < 1)
     971           0 :                 return cstring_to_text("");
     972             : 
     973             :             /*
     974             :              * if E is past the end of the string, the tuple toaster will
     975             :              * truncate the length for us
     976             :              */
     977      514582 :             L1 = E - S1;
     978             : 
     979             :             /*
     980             :              * Total slice size in bytes can't be any longer than the start
     981             :              * position plus substring length times the encoding max length.
     982             :              * If that overflows, we can just use -1.
     983             :              */
     984      514582 :             if (pg_mul_s32_overflow(E, eml, &slice_size))
     985           6 :                 slice_size = -1;
     986             :         }
     987             : 
     988             :         /*
     989             :          * If we're working with an untoasted source, no need to do an extra
     990             :          * copying step.
     991             :          */
     992      514700 :         if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
     993      514646 :             VARATT_IS_EXTERNAL(DatumGetPointer(str)))
     994         324 :             slice = DatumGetTextPSlice(str, slice_start, slice_size);
     995             :         else
     996      514376 :             slice = (text *) DatumGetPointer(str);
     997             : 
     998             :         /* see if we got back an empty string */
     999      514700 :         if (VARSIZE_ANY_EXHDR(slice) == 0)
    1000             :         {
    1001           0 :             if (slice != (text *) DatumGetPointer(str))
    1002           0 :                 pfree(slice);
    1003           0 :             return cstring_to_text("");
    1004             :         }
    1005             : 
    1006             :         /* Now we can get the actual length of the slice in MB characters */
    1007      514700 :         slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
    1008      514700 :                                             VARSIZE_ANY_EXHDR(slice));
    1009             : 
    1010             :         /*
    1011             :          * Check that the start position wasn't > slice_strlen. If so, SQL99
    1012             :          * says to return a zero-length string.
    1013             :          */
    1014      514700 :         if (S1 > slice_strlen)
    1015             :         {
    1016          22 :             if (slice != (text *) DatumGetPointer(str))
    1017           0 :                 pfree(slice);
    1018          22 :             return cstring_to_text("");
    1019             :         }
    1020             : 
    1021             :         /*
    1022             :          * Adjust L1 and E1 now that we know the slice string length. Again
    1023             :          * remember that S1 is one based, and slice_start is zero based.
    1024             :          */
    1025      514678 :         if (L1 > -1)
    1026      514582 :             E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
    1027             :         else
    1028          96 :             E1 = slice_start + 1 + slice_strlen;
    1029             : 
    1030             :         /*
    1031             :          * Find the start position in the slice; remember S1 is not zero based
    1032             :          */
    1033      514678 :         p = VARDATA_ANY(slice);
    1034     5335676 :         for (i = 0; i < S1 - 1; i++)
    1035     4820998 :             p += pg_mblen(p);
    1036             : 
    1037             :         /* hang onto a pointer to our start position */
    1038      514678 :         s = p;
    1039             : 
    1040             :         /*
    1041             :          * Count the actual bytes used by the substring of the requested
    1042             :          * length.
    1043             :          */
    1044     2631104 :         for (i = S1; i < E1; i++)
    1045     2116426 :             p += pg_mblen(p);
    1046             : 
    1047      514678 :         ret = (text *) palloc(VARHDRSZ + (p - s));
    1048      514678 :         SET_VARSIZE(ret, VARHDRSZ + (p - s));
    1049      514678 :         memcpy(VARDATA(ret), s, (p - s));
    1050             : 
    1051      514678 :         if (slice != (text *) DatumGetPointer(str))
    1052         324 :             pfree(slice);
    1053             : 
    1054      514678 :         return ret;
    1055             :     }
    1056             :     else
    1057           0 :         elog(ERROR, "invalid backend encoding: encoding max length < 1");
    1058             : 
    1059             :     /* not reached: suppress compiler warning */
    1060             :     return NULL;
    1061             : }
    1062             : 
    1063             : /*
    1064             :  * textoverlay
    1065             :  *  Replace specified substring of first string with second
    1066             :  *
    1067             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    1068             :  * This code is a direct implementation of what the standard says.
    1069             :  */
    1070             : Datum
    1071          28 : textoverlay(PG_FUNCTION_ARGS)
    1072             : {
    1073          28 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1074          28 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1075          28 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1076          28 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    1077             : 
    1078          28 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1079             : }
    1080             : 
    1081             : Datum
    1082          12 : textoverlay_no_len(PG_FUNCTION_ARGS)
    1083             : {
    1084          12 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1085          12 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1086          12 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1087             :     int         sl;
    1088             : 
    1089          12 :     sl = text_length(PointerGetDatum(t2));  /* defaults to length(t2) */
    1090          12 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1091             : }
    1092             : 
    1093             : static text *
    1094          40 : text_overlay(text *t1, text *t2, int sp, int sl)
    1095             : {
    1096             :     text       *result;
    1097             :     text       *s1;
    1098             :     text       *s2;
    1099             :     int         sp_pl_sl;
    1100             : 
    1101             :     /*
    1102             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    1103             :      * "substring length" error because that's what should be expected
    1104             :      * according to the spec's definition of OVERLAY().
    1105             :      */
    1106          40 :     if (sp <= 0)
    1107           0 :         ereport(ERROR,
    1108             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    1109             :                  errmsg("negative substring length not allowed")));
    1110          40 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    1111           0 :         ereport(ERROR,
    1112             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1113             :                  errmsg("integer out of range")));
    1114             : 
    1115          40 :     s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
    1116          40 :     s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    1117          40 :     result = text_catenate(s1, t2);
    1118          40 :     result = text_catenate(result, s2);
    1119             : 
    1120          40 :     return result;
    1121             : }
    1122             : 
    1123             : /*
    1124             :  * textpos -
    1125             :  *    Return the position of the specified substring.
    1126             :  *    Implements the SQL POSITION() function.
    1127             :  *    Ref: A Guide To The SQL Standard, Date & Darwen, 1997
    1128             :  * - thomas 1997-07-27
    1129             :  */
    1130             : Datum
    1131         106 : textpos(PG_FUNCTION_ARGS)
    1132             : {
    1133         106 :     text       *str = PG_GETARG_TEXT_PP(0);
    1134         106 :     text       *search_str = PG_GETARG_TEXT_PP(1);
    1135             : 
    1136         106 :     PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
    1137             : }
    1138             : 
    1139             : /*
    1140             :  * text_position -
    1141             :  *  Does the real work for textpos()
    1142             :  *
    1143             :  * Inputs:
    1144             :  *      t1 - string to be searched
    1145             :  *      t2 - pattern to match within t1
    1146             :  * Result:
    1147             :  *      Character index of the first matched char, starting from 1,
    1148             :  *      or 0 if no match.
    1149             :  *
    1150             :  *  This is broken out so it can be called directly by other string processing
    1151             :  *  functions.
    1152             :  */
    1153             : static int
    1154         106 : text_position(text *t1, text *t2, Oid collid)
    1155             : {
    1156             :     TextPositionState state;
    1157             :     int         result;
    1158             : 
    1159             :     /* Empty needle always matches at position 1 */
    1160         106 :     if (VARSIZE_ANY_EXHDR(t2) < 1)
    1161          12 :         return 1;
    1162             : 
    1163             :     /* Otherwise, can't match if haystack is shorter than needle */
    1164          94 :     if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2))
    1165          22 :         return 0;
    1166             : 
    1167          72 :     text_position_setup(t1, t2, collid, &state);
    1168          72 :     if (!text_position_next(&state))
    1169          24 :         result = 0;
    1170             :     else
    1171          48 :         result = text_position_get_match_pos(&state);
    1172          72 :     text_position_cleanup(&state);
    1173          72 :     return result;
    1174             : }
    1175             : 
    1176             : 
    1177             : /*
    1178             :  * text_position_setup, text_position_next, text_position_cleanup -
    1179             :  *  Component steps of text_position()
    1180             :  *
    1181             :  * These are broken out so that a string can be efficiently searched for
    1182             :  * multiple occurrences of the same pattern.  text_position_next may be
    1183             :  * called multiple times, and it advances to the next match on each call.
    1184             :  * text_position_get_match_ptr() and text_position_get_match_pos() return
    1185             :  * a pointer or 1-based character position of the last match, respectively.
    1186             :  *
    1187             :  * The "state" variable is normally just a local variable in the caller.
    1188             :  *
    1189             :  * NOTE: text_position_next skips over the matched portion.  For example,
    1190             :  * searching for "xx" in "xxx" returns only one match, not two.
    1191             :  */
    1192             : 
    1193             : static void
    1194        2422 : text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
    1195             : {
    1196        2422 :     int         len1 = VARSIZE_ANY_EXHDR(t1);
    1197        2422 :     int         len2 = VARSIZE_ANY_EXHDR(t2);
    1198        2422 :     pg_locale_t mylocale = 0;
    1199             : 
    1200        2422 :     check_collation_set(collid);
    1201             : 
    1202        2422 :     if (!lc_collate_is_c(collid))
    1203         260 :         mylocale = pg_newlocale_from_collation(collid);
    1204             : 
    1205        2422 :     if (mylocale && !mylocale->deterministic)
    1206           0 :         ereport(ERROR,
    1207             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1208             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1209             : 
    1210             :     Assert(len1 > 0);
    1211             :     Assert(len2 > 0);
    1212             : 
    1213             :     /*
    1214             :      * Even with a multi-byte encoding, we perform the search using the raw
    1215             :      * byte sequence, ignoring multibyte issues.  For UTF-8, that works fine,
    1216             :      * because in UTF-8 the byte sequence of one character cannot contain
    1217             :      * another character.  For other multi-byte encodings, we do the search
    1218             :      * initially as a simple byte search, ignoring multibyte issues, but
    1219             :      * verify afterwards that the match we found is at a character boundary,
    1220             :      * and continue the search if it was a false match.
    1221             :      */
    1222        2422 :     if (pg_database_encoding_max_length() == 1)
    1223          48 :         state->is_multibyte_char_in_char = false;
    1224        2374 :     else if (GetDatabaseEncoding() == PG_UTF8)
    1225        2374 :         state->is_multibyte_char_in_char = false;
    1226             :     else
    1227           0 :         state->is_multibyte_char_in_char = true;
    1228             : 
    1229        2422 :     state->str1 = VARDATA_ANY(t1);
    1230        2422 :     state->str2 = VARDATA_ANY(t2);
    1231        2422 :     state->len1 = len1;
    1232        2422 :     state->len2 = len2;
    1233        2422 :     state->last_match = NULL;
    1234        2422 :     state->refpoint = state->str1;
    1235        2422 :     state->refpos = 0;
    1236             : 
    1237             :     /*
    1238             :      * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
    1239             :      * notes we use the terminology that the "haystack" is the string to be
    1240             :      * searched (t1) and the "needle" is the pattern being sought (t2).
    1241             :      *
    1242             :      * If the needle is empty or bigger than the haystack then there is no
    1243             :      * point in wasting cycles initializing the table.  We also choose not to
    1244             :      * use B-M-H for needles of length 1, since the skip table can't possibly
    1245             :      * save anything in that case.
    1246             :      */
    1247        2422 :     if (len1 >= len2 && len2 > 1)
    1248             :     {
    1249        2222 :         int         searchlength = len1 - len2;
    1250             :         int         skiptablemask;
    1251             :         int         last;
    1252             :         int         i;
    1253        2222 :         const char *str2 = state->str2;
    1254             : 
    1255             :         /*
    1256             :          * First we must determine how much of the skip table to use.  The
    1257             :          * declaration of TextPositionState allows up to 256 elements, but for
    1258             :          * short search problems we don't really want to have to initialize so
    1259             :          * many elements --- it would take too long in comparison to the
    1260             :          * actual search time.  So we choose a useful skip table size based on
    1261             :          * the haystack length minus the needle length.  The closer the needle
    1262             :          * length is to the haystack length the less useful skipping becomes.
    1263             :          *
    1264             :          * Note: since we use bit-masking to select table elements, the skip
    1265             :          * table size MUST be a power of 2, and so the mask must be 2^N-1.
    1266             :          */
    1267        2222 :         if (searchlength < 16)
    1268          54 :             skiptablemask = 3;
    1269        2168 :         else if (searchlength < 64)
    1270          16 :             skiptablemask = 7;
    1271        2152 :         else if (searchlength < 128)
    1272           2 :             skiptablemask = 15;
    1273        2150 :         else if (searchlength < 512)
    1274         146 :             skiptablemask = 31;
    1275        2004 :         else if (searchlength < 2048)
    1276        1898 :             skiptablemask = 63;
    1277         106 :         else if (searchlength < 4096)
    1278          42 :             skiptablemask = 127;
    1279             :         else
    1280          64 :             skiptablemask = 255;
    1281        2222 :         state->skiptablemask = skiptablemask;
    1282             : 
    1283             :         /*
    1284             :          * Initialize the skip table.  We set all elements to the needle
    1285             :          * length, since this is the correct skip distance for any character
    1286             :          * not found in the needle.
    1287             :          */
    1288      150502 :         for (i = 0; i <= skiptablemask; i++)
    1289      148280 :             state->skiptable[i] = len2;
    1290             : 
    1291             :         /*
    1292             :          * Now examine the needle.  For each character except the last one,
    1293             :          * set the corresponding table element to the appropriate skip
    1294             :          * distance.  Note that when two characters share the same skip table
    1295             :          * entry, the one later in the needle must determine the skip
    1296             :          * distance.
    1297             :          */
    1298        2222 :         last = len2 - 1;
    1299             : 
    1300       27386 :         for (i = 0; i < last; i++)
    1301       25164 :             state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
    1302             :     }
    1303        2422 : }
    1304             : 
    1305             : /*
    1306             :  * Advance to the next match, starting from the end of the previous match
    1307             :  * (or the beginning of the string, on first call).  Returns true if a match
    1308             :  * is found.
    1309             :  *
    1310             :  * Note that this refuses to match an empty-string needle.  Most callers
    1311             :  * will have handled that case specially and we'll never see it here.
    1312             :  */
    1313             : static bool
    1314        8638 : text_position_next(TextPositionState *state)
    1315             : {
    1316        8638 :     int         needle_len = state->len2;
    1317             :     char       *start_ptr;
    1318             :     char       *matchptr;
    1319             : 
    1320        8638 :     if (needle_len <= 0)
    1321           0 :         return false;           /* result for empty pattern */
    1322             : 
    1323             :     /* Start from the point right after the previous match. */
    1324        8638 :     if (state->last_match)
    1325        6204 :         start_ptr = state->last_match + needle_len;
    1326             :     else
    1327        2434 :         start_ptr = state->str1;
    1328             : 
    1329        8638 : retry:
    1330        8638 :     matchptr = text_position_next_internal(start_ptr, state);
    1331             : 
    1332        8638 :     if (!matchptr)
    1333        2362 :         return false;
    1334             : 
    1335             :     /*
    1336             :      * Found a match for the byte sequence.  If this is a multibyte encoding,
    1337             :      * where one character's byte sequence can appear inside a longer
    1338             :      * multi-byte character, we need to verify that the match was at a
    1339             :      * character boundary, not in the middle of a multi-byte character.
    1340             :      */
    1341        6276 :     if (state->is_multibyte_char_in_char)
    1342             :     {
    1343             :         /* Walk one character at a time, until we reach the match. */
    1344             : 
    1345             :         /* the search should never move backwards. */
    1346             :         Assert(state->refpoint <= matchptr);
    1347             : 
    1348           0 :         while (state->refpoint < matchptr)
    1349             :         {
    1350             :             /* step to next character. */
    1351           0 :             state->refpoint += pg_mblen(state->refpoint);
    1352           0 :             state->refpos++;
    1353             : 
    1354             :             /*
    1355             :              * If we stepped over the match's start position, then it was a
    1356             :              * false positive, where the byte sequence appeared in the middle
    1357             :              * of a multi-byte character.  Skip it, and continue the search at
    1358             :              * the next character boundary.
    1359             :              */
    1360           0 :             if (state->refpoint > matchptr)
    1361             :             {
    1362           0 :                 start_ptr = state->refpoint;
    1363           0 :                 goto retry;
    1364             :             }
    1365             :         }
    1366             :     }
    1367             : 
    1368        6276 :     state->last_match = matchptr;
    1369        6276 :     return true;
    1370             : }
    1371             : 
    1372             : /*
    1373             :  * Subroutine of text_position_next().  This searches for the raw byte
    1374             :  * sequence, ignoring any multi-byte encoding issues.  Returns the first
    1375             :  * match starting at 'start_ptr', or NULL if no match is found.
    1376             :  */
    1377             : static char *
    1378        8638 : text_position_next_internal(char *start_ptr, TextPositionState *state)
    1379             : {
    1380        8638 :     int         haystack_len = state->len1;
    1381        8638 :     int         needle_len = state->len2;
    1382        8638 :     int         skiptablemask = state->skiptablemask;
    1383        8638 :     const char *haystack = state->str1;
    1384        8638 :     const char *needle = state->str2;
    1385        8638 :     const char *haystack_end = &haystack[haystack_len];
    1386             :     const char *hptr;
    1387             : 
    1388             :     Assert(start_ptr >= haystack && start_ptr <= haystack_end);
    1389             : 
    1390        8638 :     if (needle_len == 1)
    1391             :     {
    1392             :         /* No point in using B-M-H for a one-character needle */
    1393         726 :         char        nchar = *needle;
    1394             : 
    1395         726 :         hptr = start_ptr;
    1396        5538 :         while (hptr < haystack_end)
    1397             :         {
    1398        5380 :             if (*hptr == nchar)
    1399         568 :                 return (char *) hptr;
    1400        4812 :             hptr++;
    1401             :         }
    1402             :     }
    1403             :     else
    1404             :     {
    1405        7912 :         const char *needle_last = &needle[needle_len - 1];
    1406             : 
    1407             :         /* Start at startpos plus the length of the needle */
    1408        7912 :         hptr = start_ptr + needle_len - 1;
    1409      204456 :         while (hptr < haystack_end)
    1410             :         {
    1411             :             /* Match the needle scanning *backward* */
    1412             :             const char *nptr;
    1413             :             const char *p;
    1414             : 
    1415      202252 :             nptr = needle_last;
    1416      202252 :             p = hptr;
    1417      284180 :             while (*nptr == *p)
    1418             :             {
    1419             :                 /* Matched it all?  If so, return 1-based position */
    1420       87636 :                 if (nptr == needle)
    1421        5708 :                     return (char *) p;
    1422       81928 :                 nptr--, p--;
    1423             :             }
    1424             : 
    1425             :             /*
    1426             :              * No match, so use the haystack char at hptr to decide how far to
    1427             :              * advance.  If the needle had any occurrence of that character
    1428             :              * (or more precisely, one sharing the same skiptable entry)
    1429             :              * before its last character, then we advance far enough to align
    1430             :              * the last such needle character with that haystack position.
    1431             :              * Otherwise we can advance by the whole needle length.
    1432             :              */
    1433      196544 :             hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
    1434             :         }
    1435             :     }
    1436             : 
    1437        2362 :     return 0;                   /* not found */
    1438             : }
    1439             : 
    1440             : /*
    1441             :  * Return a pointer to the current match.
    1442             :  *
    1443             :  * The returned pointer points into the original haystack string.
    1444             :  */
    1445             : static char *
    1446        6198 : text_position_get_match_ptr(TextPositionState *state)
    1447             : {
    1448        6198 :     return state->last_match;
    1449             : }
    1450             : 
    1451             : /*
    1452             :  * Return the offset of the current match.
    1453             :  *
    1454             :  * The offset is in characters, 1-based.
    1455             :  */
    1456             : static int
    1457          48 : text_position_get_match_pos(TextPositionState *state)
    1458             : {
    1459             :     /* Convert the byte position to char position. */
    1460          96 :     state->refpos += pg_mbstrlen_with_len(state->refpoint,
    1461          48 :                                           state->last_match - state->refpoint);
    1462          48 :     state->refpoint = state->last_match;
    1463          48 :     return state->refpos + 1;
    1464             : }
    1465             : 
    1466             : /*
    1467             :  * Reset search state to the initial state installed by text_position_setup.
    1468             :  *
    1469             :  * The next call to text_position_next will search from the beginning
    1470             :  * of the string.
    1471             :  */
    1472             : static void
    1473          12 : text_position_reset(TextPositionState *state)
    1474             : {
    1475          12 :     state->last_match = NULL;
    1476          12 :     state->refpoint = state->str1;
    1477          12 :     state->refpos = 0;
    1478          12 : }
    1479             : 
    1480             : static void
    1481        2422 : text_position_cleanup(TextPositionState *state)
    1482             : {
    1483             :     /* no cleanup needed */
    1484        2422 : }
    1485             : 
    1486             : 
    1487             : static void
    1488    15719526 : check_collation_set(Oid collid)
    1489             : {
    1490    15719526 :     if (!OidIsValid(collid))
    1491             :     {
    1492             :         /*
    1493             :          * This typically means that the parser could not resolve a conflict
    1494             :          * of implicit collations, so report it that way.
    1495             :          */
    1496          12 :         ereport(ERROR,
    1497             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1498             :                  errmsg("could not determine which collation to use for string comparison"),
    1499             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
    1500             :     }
    1501    15719514 : }
    1502             : 
    1503             : /* varstr_cmp()
    1504             :  * Comparison function for text strings with given lengths.
    1505             :  * Includes locale support, but must copy strings to temporary memory
    1506             :  *  to allow null-termination for inputs to strcoll().
    1507             :  * Returns an integer less than, equal to, or greater than zero, indicating
    1508             :  * whether arg1 is less than, equal to, or greater than arg2.
    1509             :  *
    1510             :  * Note: many functions that depend on this are marked leakproof; therefore,
    1511             :  * avoid reporting the actual contents of the input when throwing errors.
    1512             :  * All errors herein should be things that can't happen except on corrupt
    1513             :  * data, anyway; otherwise we will have trouble with indexing strings that
    1514             :  * would cause them.
    1515             :  */
    1516             : int
    1517     8480582 : varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
    1518             : {
    1519             :     int         result;
    1520             : 
    1521     8480582 :     check_collation_set(collid);
    1522             : 
    1523             :     /*
    1524             :      * Unfortunately, there is no strncoll(), so in the non-C locale case we
    1525             :      * have to do some memory copying.  This turns out to be significantly
    1526             :      * slower, so we optimize the case where LC_COLLATE is C.  We also try to
    1527             :      * optimize relatively-short strings by avoiding palloc/pfree overhead.
    1528             :      */
    1529     8480576 :     if (lc_collate_is_c(collid))
    1530             :     {
    1531     4260780 :         result = memcmp(arg1, arg2, Min(len1, len2));
    1532     4260780 :         if ((result == 0) && (len1 != len2))
    1533      130692 :             result = (len1 < len2) ? -1 : 1;
    1534             :     }
    1535             :     else
    1536             :     {
    1537             :         char        a1buf[TEXTBUFLEN];
    1538             :         char        a2buf[TEXTBUFLEN];
    1539             :         char       *a1p,
    1540             :                    *a2p;
    1541             :         pg_locale_t mylocale;
    1542             : 
    1543     4219796 :         mylocale = pg_newlocale_from_collation(collid);
    1544             : 
    1545             :         /*
    1546             :          * memcmp() can't tell us which of two unequal strings sorts first,
    1547             :          * but it's a cheap way to tell if they're equal.  Testing shows that
    1548             :          * memcmp() followed by strcoll() is only trivially slower than
    1549             :          * strcoll() by itself, so we don't lose much if this doesn't work out
    1550             :          * very often, and if it does - for example, because there are many
    1551             :          * equal strings in the input - then we win big by avoiding expensive
    1552             :          * collation-aware comparisons.
    1553             :          */
    1554     4219796 :         if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
    1555     1692454 :             return 0;
    1556             : 
    1557             : #ifdef WIN32
    1558             :         /* Win32 does not have UTF-8, so we need to map to UTF-16 */
    1559             :         if (GetDatabaseEncoding() == PG_UTF8
    1560             :             && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
    1561             :         {
    1562             :             int         a1len;
    1563             :             int         a2len;
    1564             :             int         r;
    1565             : 
    1566             :             if (len1 >= TEXTBUFLEN / 2)
    1567             :             {
    1568             :                 a1len = len1 * 2 + 2;
    1569             :                 a1p = palloc(a1len);
    1570             :             }
    1571             :             else
    1572             :             {
    1573             :                 a1len = TEXTBUFLEN;
    1574             :                 a1p = a1buf;
    1575             :             }
    1576             :             if (len2 >= TEXTBUFLEN / 2)
    1577             :             {
    1578             :                 a2len = len2 * 2 + 2;
    1579             :                 a2p = palloc(a2len);
    1580             :             }
    1581             :             else
    1582             :             {
    1583             :                 a2len = TEXTBUFLEN;
    1584             :                 a2p = a2buf;
    1585             :             }
    1586             : 
    1587             :             /* stupid Microsloth API does not work for zero-length input */
    1588             :             if (len1 == 0)
    1589             :                 r = 0;
    1590             :             else
    1591             :             {
    1592             :                 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1593             :                                         (LPWSTR) a1p, a1len / 2);
    1594             :                 if (!r)
    1595             :                     ereport(ERROR,
    1596             :                             (errmsg("could not convert string to UTF-16: error code %lu",
    1597             :                                     GetLastError())));
    1598             :             }
    1599             :             ((LPWSTR) a1p)[r] = 0;
    1600             : 
    1601             :             if (len2 == 0)
    1602             :                 r = 0;
    1603             :             else
    1604             :             {
    1605             :                 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1606             :                                         (LPWSTR) a2p, a2len / 2);
    1607             :                 if (!r)
    1608             :                     ereport(ERROR,
    1609             :                             (errmsg("could not convert string to UTF-16: error code %lu",
    1610             :                                     GetLastError())));
    1611             :             }
    1612             :             ((LPWSTR) a2p)[r] = 0;
    1613             : 
    1614             :             errno = 0;
    1615             : #ifdef HAVE_LOCALE_T
    1616             :             if (mylocale)
    1617             :                 result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
    1618             :             else
    1619             : #endif
    1620             :                 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
    1621             :             if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw
    1622             :                                          * headers */
    1623             :                 ereport(ERROR,
    1624             :                         (errmsg("could not compare Unicode strings: %m")));
    1625             : 
    1626             :             /* Break tie if necessary. */
    1627             :             if (result == 0 &&
    1628             :                 (!mylocale || mylocale->deterministic))
    1629             :             {
    1630             :                 result = memcmp(arg1, arg2, Min(len1, len2));
    1631             :                 if ((result == 0) && (len1 != len2))
    1632             :                     result = (len1 < len2) ? -1 : 1;
    1633             :             }
    1634             : 
    1635             :             if (a1p != a1buf)
    1636             :                 pfree(a1p);
    1637             :             if (a2p != a2buf)
    1638             :                 pfree(a2p);
    1639             : 
    1640             :             return result;
    1641             :         }
    1642             : #endif                          /* WIN32 */
    1643             : 
    1644     2527342 :         if (len1 >= TEXTBUFLEN)
    1645         260 :             a1p = (char *) palloc(len1 + 1);
    1646             :         else
    1647     2527082 :             a1p = a1buf;
    1648     2527342 :         if (len2 >= TEXTBUFLEN)
    1649         132 :             a2p = (char *) palloc(len2 + 1);
    1650             :         else
    1651     2527210 :             a2p = a2buf;
    1652             : 
    1653     2527342 :         memcpy(a1p, arg1, len1);
    1654     2527342 :         a1p[len1] = '\0';
    1655     2527342 :         memcpy(a2p, arg2, len2);
    1656     2527342 :         a2p[len2] = '\0';
    1657             : 
    1658     2527342 :         if (mylocale)
    1659             :         {
    1660           0 :             if (mylocale->provider == COLLPROVIDER_ICU)
    1661             :             {
    1662             : #ifdef USE_ICU
    1663             : #ifdef HAVE_UCOL_STRCOLLUTF8
    1664             :                 if (GetDatabaseEncoding() == PG_UTF8)
    1665             :                 {
    1666             :                     UErrorCode  status;
    1667             : 
    1668             :                     status = U_ZERO_ERROR;
    1669             :                     result = ucol_strcollUTF8(mylocale->info.icu.ucol,
    1670             :                                               arg1, len1,
    1671             :                                               arg2, len2,
    1672             :                                               &status);
    1673             :                     if (U_FAILURE(status))
    1674             :                         ereport(ERROR,
    1675             :                                 (errmsg("collation failed: %s", u_errorName(status))));
    1676             :                 }
    1677             :                 else
    1678             : #endif
    1679             :                 {
    1680             :                     int32_t     ulen1,
    1681             :                                 ulen2;
    1682             :                     UChar      *uchar1,
    1683             :                                *uchar2;
    1684             : 
    1685             :                     ulen1 = icu_to_uchar(&uchar1, arg1, len1);
    1686             :                     ulen2 = icu_to_uchar(&uchar2, arg2, len2);
    1687             : 
    1688             :                     result = ucol_strcoll(mylocale->info.icu.ucol,
    1689             :                                           uchar1, ulen1,
    1690             :                                           uchar2, ulen2);
    1691             : 
    1692             :                     pfree(uchar1);
    1693             :                     pfree(uchar2);
    1694             :                 }
    1695             : #else                           /* not USE_ICU */
    1696             :                 /* shouldn't happen */
    1697           0 :                 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
    1698             : #endif                          /* not USE_ICU */
    1699             :             }
    1700             :             else
    1701             :             {
    1702             : #ifdef HAVE_LOCALE_T
    1703           0 :                 result = strcoll_l(a1p, a2p, mylocale->info.lt);
    1704             : #else
    1705             :                 /* shouldn't happen */
    1706             :                 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
    1707             : #endif
    1708             :             }
    1709             :         }
    1710             :         else
    1711     2527342 :             result = strcoll(a1p, a2p);
    1712             : 
    1713             :         /* Break tie if necessary. */
    1714     2527342 :         if (result == 0 &&
    1715           0 :             (!mylocale || mylocale->deterministic))
    1716           0 :             result = strcmp(a1p, a2p);
    1717             : 
    1718     2527342 :         if (a1p != a1buf)
    1719         260 :             pfree(a1p);
    1720     2527342 :         if (a2p != a2buf)
    1721         132 :             pfree(a2p);
    1722             :     }
    1723             : 
    1724     6788122 :     return result;
    1725             : }
    1726             : 
    1727             : /* text_cmp()
    1728             :  * Internal comparison function for text strings.
    1729             :  * Returns -1, 0 or 1
    1730             :  */
    1731             : static int
    1732     6844108 : text_cmp(text *arg1, text *arg2, Oid collid)
    1733             : {
    1734             :     char       *a1p,
    1735             :                *a2p;
    1736             :     int         len1,
    1737             :                 len2;
    1738             : 
    1739     6844108 :     a1p = VARDATA_ANY(arg1);
    1740     6844108 :     a2p = VARDATA_ANY(arg2);
    1741             : 
    1742     6844108 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1743     6844108 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1744             : 
    1745     6844108 :     return varstr_cmp(a1p, len1, a2p, len2, collid);
    1746             : }
    1747             : 
    1748             : /*
    1749             :  * Comparison functions for text strings.
    1750             :  *
    1751             :  * Note: btree indexes need these routines not to leak memory; therefore,
    1752             :  * be careful to free working copies of toasted datums.  Most places don't
    1753             :  * need to be so careful.
    1754             :  */
    1755             : 
    1756             : Datum
    1757     6815200 : texteq(PG_FUNCTION_ARGS)
    1758             : {
    1759     6815200 :     Oid         collid = PG_GET_COLLATION();
    1760     6815200 :     bool        locale_is_c = false;
    1761     6815200 :     pg_locale_t mylocale = 0;
    1762             :     bool        result;
    1763             : 
    1764     6815200 :     check_collation_set(collid);
    1765             : 
    1766     6815200 :     if (lc_collate_is_c(collid))
    1767      543010 :         locale_is_c = true;
    1768             :     else
    1769     6272190 :         mylocale = pg_newlocale_from_collation(collid);
    1770             : 
    1771     6815200 :     if (locale_is_c || !mylocale || mylocale->deterministic)
    1772     6815200 :     {
    1773     6815200 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1774     6815200 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1775             :         Size        len1,
    1776             :                     len2;
    1777             : 
    1778             :         /*
    1779             :          * Since we only care about equality or not-equality, we can avoid all
    1780             :          * the expense of strcoll() here, and just do bitwise comparison.  In
    1781             :          * fact, we don't even have to do a bitwise comparison if we can show
    1782             :          * the lengths of the strings are unequal; which might save us from
    1783             :          * having to detoast one or both values.
    1784             :          */
    1785     6815200 :         len1 = toast_raw_datum_size(arg1);
    1786     6815200 :         len2 = toast_raw_datum_size(arg2);
    1787     6815200 :         if (len1 != len2)
    1788     2451528 :             result = false;
    1789             :         else
    1790             :         {
    1791     4363672 :             text       *targ1 = DatumGetTextPP(arg1);
    1792     4363672 :             text       *targ2 = DatumGetTextPP(arg2);
    1793             : 
    1794     4363672 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1795             :                              len1 - VARHDRSZ) == 0);
    1796             : 
    1797     4363672 :             PG_FREE_IF_COPY(targ1, 0);
    1798     4363672 :             PG_FREE_IF_COPY(targ2, 1);
    1799             :         }
    1800             :     }
    1801             :     else
    1802             :     {
    1803           0 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1804           0 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1805             : 
    1806           0 :         result = (text_cmp(arg1, arg2, collid) == 0);
    1807             : 
    1808           0 :         PG_FREE_IF_COPY(arg1, 0);
    1809           0 :         PG_FREE_IF_COPY(arg2, 1);
    1810             :     }
    1811             : 
    1812     6815200 :     PG_RETURN_BOOL(result);
    1813             : }
    1814             : 
    1815             : Datum
    1816       19258 : textne(PG_FUNCTION_ARGS)
    1817             : {
    1818       19258 :     Oid         collid = PG_GET_COLLATION();
    1819       19258 :     bool        locale_is_c = false;
    1820       19258 :     pg_locale_t mylocale = 0;
    1821             :     bool        result;
    1822             : 
    1823       19258 :     check_collation_set(collid);
    1824             : 
    1825       19258 :     if (lc_collate_is_c(collid))
    1826          18 :         locale_is_c = true;
    1827             :     else
    1828       19240 :         mylocale = pg_newlocale_from_collation(collid);
    1829             : 
    1830       19258 :     if (locale_is_c || !mylocale || mylocale->deterministic)
    1831       19258 :     {
    1832       19258 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1833       19258 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1834             :         Size        len1,
    1835             :                     len2;
    1836             : 
    1837             :         /* See comment in texteq() */
    1838       19258 :         len1 = toast_raw_datum_size(arg1);
    1839       19258 :         len2 = toast_raw_datum_size(arg2);
    1840       19258 :         if (len1 != len2)
    1841        1780 :             result = true;
    1842             :         else
    1843             :         {
    1844       17478 :             text       *targ1 = DatumGetTextPP(arg1);
    1845       17478 :             text       *targ2 = DatumGetTextPP(arg2);
    1846             : 
    1847       17478 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1848             :                              len1 - VARHDRSZ) != 0);
    1849             : 
    1850       17478 :             PG_FREE_IF_COPY(targ1, 0);
    1851       17478 :             PG_FREE_IF_COPY(targ2, 1);
    1852             :         }
    1853             :     }
    1854             :     else
    1855             :     {
    1856           0 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1857           0 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1858             : 
    1859           0 :         result = (text_cmp(arg1, arg2, collid) != 0);
    1860             : 
    1861           0 :         PG_FREE_IF_COPY(arg1, 0);
    1862           0 :         PG_FREE_IF_COPY(arg2, 1);
    1863             :     }
    1864             : 
    1865       19258 :     PG_RETURN_BOOL(result);
    1866             : }
    1867             : 
    1868             : Datum
    1869      123554 : text_lt(PG_FUNCTION_ARGS)
    1870             : {
    1871      123554 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1872      123554 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1873             :     bool        result;
    1874             : 
    1875      123554 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
    1876             : 
    1877      123548 :     PG_FREE_IF_COPY(arg1, 0);
    1878      123548 :     PG_FREE_IF_COPY(arg2, 1);
    1879             : 
    1880      123548 :     PG_RETURN_BOOL(result);
    1881             : }
    1882             : 
    1883             : Datum
    1884      325034 : text_le(PG_FUNCTION_ARGS)
    1885             : {
    1886      325034 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1887      325034 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1888             :     bool        result;
    1889             : 
    1890      325034 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
    1891             : 
    1892      325034 :     PG_FREE_IF_COPY(arg1, 0);
    1893      325034 :     PG_FREE_IF_COPY(arg2, 1);
    1894             : 
    1895      325034 :     PG_RETURN_BOOL(result);
    1896             : }
    1897             : 
    1898             : Datum
    1899      112336 : text_gt(PG_FUNCTION_ARGS)
    1900             : {
    1901      112336 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1902      112336 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1903             :     bool        result;
    1904             : 
    1905      112336 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
    1906             : 
    1907      112336 :     PG_FREE_IF_COPY(arg1, 0);
    1908      112336 :     PG_FREE_IF_COPY(arg2, 1);
    1909             : 
    1910      112336 :     PG_RETURN_BOOL(result);
    1911             : }
    1912             : 
    1913             : Datum
    1914      184534 : text_ge(PG_FUNCTION_ARGS)
    1915             : {
    1916      184534 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1917      184534 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1918             :     bool        result;
    1919             : 
    1920      184534 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
    1921             : 
    1922      184534 :     PG_FREE_IF_COPY(arg1, 0);
    1923      184534 :     PG_FREE_IF_COPY(arg2, 1);
    1924             : 
    1925      184534 :     PG_RETURN_BOOL(result);
    1926             : }
    1927             : 
    1928             : Datum
    1929       37914 : text_starts_with(PG_FUNCTION_ARGS)
    1930             : {
    1931       37914 :     Datum       arg1 = PG_GETARG_DATUM(0);
    1932       37914 :     Datum       arg2 = PG_GETARG_DATUM(1);
    1933       37914 :     Oid         collid = PG_GET_COLLATION();
    1934       37914 :     pg_locale_t mylocale = 0;
    1935             :     bool        result;
    1936             :     Size        len1,
    1937             :                 len2;
    1938             : 
    1939       37914 :     check_collation_set(collid);
    1940             : 
    1941       37914 :     if (!lc_collate_is_c(collid))
    1942       37914 :         mylocale = pg_newlocale_from_collation(collid);
    1943             : 
    1944       37914 :     if (mylocale && !mylocale->deterministic)
    1945           0 :         ereport(ERROR,
    1946             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1947             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1948             : 
    1949       37914 :     len1 = toast_raw_datum_size(arg1);
    1950       37914 :     len2 = toast_raw_datum_size(arg2);
    1951       37914 :     if (len2 > len1)
    1952           0 :         result = false;
    1953             :     else
    1954             :     {
    1955       37914 :         text       *targ1 = text_substring(arg1, 1, len2, false);
    1956       37914 :         text       *targ2 = DatumGetTextPP(arg2);
    1957             : 
    1958       37914 :         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1959       37914 :                          VARSIZE_ANY_EXHDR(targ2)) == 0);
    1960             : 
    1961       37914 :         PG_FREE_IF_COPY(targ1, 0);
    1962       37914 :         PG_FREE_IF_COPY(targ2, 1);
    1963             :     }
    1964             : 
    1965       37914 :     PG_RETURN_BOOL(result);
    1966             : }
    1967             : 
    1968             : Datum
    1969     5872882 : bttextcmp(PG_FUNCTION_ARGS)
    1970             : {
    1971     5872882 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1972     5872882 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1973             :     int32       result;
    1974             : 
    1975     5872882 :     result = text_cmp(arg1, arg2, PG_GET_COLLATION());
    1976             : 
    1977     5872882 :     PG_FREE_IF_COPY(arg1, 0);
    1978     5872882 :     PG_FREE_IF_COPY(arg2, 1);
    1979             : 
    1980     5872882 :     PG_RETURN_INT32(result);
    1981             : }
    1982             : 
    1983             : Datum
    1984       79892 : bttextsortsupport(PG_FUNCTION_ARGS)
    1985             : {
    1986       79892 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    1987       79892 :     Oid         collid = ssup->ssup_collation;
    1988             :     MemoryContext oldcontext;
    1989             : 
    1990       79892 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    1991             : 
    1992             :     /* Use generic string SortSupport */
    1993       79892 :     varstr_sortsupport(ssup, TEXTOID, collid);
    1994             : 
    1995       79886 :     MemoryContextSwitchTo(oldcontext);
    1996             : 
    1997       79886 :     PG_RETURN_VOID();
    1998             : }
    1999             : 
    2000             : /*
    2001             :  * Generic sortsupport interface for character type's operator classes.
    2002             :  * Includes locale support, and support for BpChar semantics (i.e. removing
    2003             :  * trailing spaces before comparison).
    2004             :  *
    2005             :  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
    2006             :  * same representation.  Callers that always use the C collation (e.g.
    2007             :  * non-collatable type callers like bytea) may have NUL bytes in their strings;
    2008             :  * this will not work with any other collation, though.
    2009             :  */
    2010             : void
    2011      150670 : varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
    2012             : {
    2013      150670 :     bool        abbreviate = ssup->abbreviate;
    2014      150670 :     bool        collate_c = false;
    2015             :     VarStringSortSupport *sss;
    2016      150670 :     pg_locale_t locale = 0;
    2017             : 
    2018      150670 :     check_collation_set(collid);
    2019             : 
    2020             :     /*
    2021             :      * If possible, set ssup->comparator to a function which can be used to
    2022             :      * directly compare two datums.  If we can do this, we'll avoid the
    2023             :      * overhead of a trip through the fmgr layer for every comparison, which
    2024             :      * can be substantial.
    2025             :      *
    2026             :      * Most typically, we'll set the comparator to varlenafastcmp_locale,
    2027             :      * which uses strcoll() to perform comparisons.  We use that for the
    2028             :      * BpChar case too, but type NAME uses namefastcmp_locale. However, if
    2029             :      * LC_COLLATE = C, we can make things quite a bit faster with
    2030             :      * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
    2031             :      * memcmp() rather than strcoll().
    2032             :      */
    2033      150664 :     if (lc_collate_is_c(collid))
    2034             :     {
    2035      101914 :         if (typid == BPCHAROID)
    2036          28 :             ssup->comparator = bpcharfastcmp_c;
    2037      101886 :         else if (typid == NAMEOID)
    2038             :         {
    2039       69934 :             ssup->comparator = namefastcmp_c;
    2040             :             /* Not supporting abbreviation with type NAME, for now */
    2041       69934 :             abbreviate = false;
    2042             :         }
    2043             :         else
    2044       31952 :             ssup->comparator = varstrfastcmp_c;
    2045             : 
    2046      101914 :         collate_c = true;
    2047             :     }
    2048             :     else
    2049             :     {
    2050             :         /*
    2051             :          * We need a collation-sensitive comparison.  To make things faster,
    2052             :          * we'll figure out the collation based on the locale id and cache the
    2053             :          * result.
    2054             :          */
    2055       48750 :         locale = pg_newlocale_from_collation(collid);
    2056             : 
    2057             :         /*
    2058             :          * There is a further exception on Windows.  When the database
    2059             :          * encoding is UTF-8 and we are not using the C collation, complex
    2060             :          * hacks are required.  We don't currently have a comparator that
    2061             :          * handles that case, so we fall back on the slow method of having the
    2062             :          * sort code invoke bttextcmp() (in the case of text) via the fmgr
    2063             :          * trampoline.  ICU locales work just the same on Windows, however.
    2064             :          */
    2065             : #ifdef WIN32
    2066             :         if (GetDatabaseEncoding() == PG_UTF8 &&
    2067             :             !(locale && locale->provider == COLLPROVIDER_ICU))
    2068             :             return;
    2069             : #endif
    2070             : 
    2071             :         /*
    2072             :          * We use varlenafastcmp_locale except for type NAME.
    2073             :          */
    2074       48750 :         if (typid == NAMEOID)
    2075             :         {
    2076           0 :             ssup->comparator = namefastcmp_locale;
    2077             :             /* Not supporting abbreviation with type NAME, for now */
    2078           0 :             abbreviate = false;
    2079             :         }
    2080             :         else
    2081       48750 :             ssup->comparator = varlenafastcmp_locale;
    2082             :     }
    2083             : 
    2084             :     /*
    2085             :      * Unfortunately, it seems that abbreviation for non-C collations is
    2086             :      * broken on many common platforms; testing of multiple versions of glibc
    2087             :      * reveals that, for many locales, strcoll() and strxfrm() do not return
    2088             :      * consistent results, which is fatal to this optimization.  While no
    2089             :      * other libc other than Cygwin has so far been shown to have a problem,
    2090             :      * we take the conservative course of action for right now and disable
    2091             :      * this categorically.  (Users who are certain this isn't a problem on
    2092             :      * their system can define TRUST_STRXFRM.)
    2093             :      *
    2094             :      * Even apart from the risk of broken locales, it's possible that there
    2095             :      * are platforms where the use of abbreviated keys should be disabled at
    2096             :      * compile time.  Having only 4 byte datums could make worst-case
    2097             :      * performance drastically more likely, for example.  Moreover, macOS's
    2098             :      * strxfrm() implementation is known to not effectively concentrate a
    2099             :      * significant amount of entropy from the original string in earlier
    2100             :      * transformed blobs.  It's possible that other supported platforms are
    2101             :      * similarly encumbered.  So, if we ever get past disabling this
    2102             :      * categorically, we may still want or need to disable it for particular
    2103             :      * platforms.
    2104             :      */
    2105             : #ifndef TRUST_STRXFRM
    2106      150664 :     if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
    2107       48750 :         abbreviate = false;
    2108             : #endif
    2109             : 
    2110             :     /*
    2111             :      * If we're using abbreviated keys, or if we're using a locale-aware
    2112             :      * comparison, we need to initialize a VarStringSortSupport object. Both
    2113             :      * cases will make use of the temporary buffers we initialize here for
    2114             :      * scratch space (and to detect requirement for BpChar semantics from
    2115             :      * caller), and the abbreviation case requires additional state.
    2116             :      */
    2117      150664 :     if (abbreviate || !collate_c)
    2118             :     {
    2119       52030 :         sss = palloc(sizeof(VarStringSortSupport));
    2120       52030 :         sss->buf1 = palloc(TEXTBUFLEN);
    2121       52030 :         sss->buflen1 = TEXTBUFLEN;
    2122       52030 :         sss->buf2 = palloc(TEXTBUFLEN);
    2123       52030 :         sss->buflen2 = TEXTBUFLEN;
    2124             :         /* Start with invalid values */
    2125       52030 :         sss->last_len1 = -1;
    2126       52030 :         sss->last_len2 = -1;
    2127             :         /* Initialize */
    2128       52030 :         sss->last_returned = 0;
    2129       52030 :         sss->locale = locale;
    2130             : 
    2131             :         /*
    2132             :          * To avoid somehow confusing a strxfrm() blob and an original string,
    2133             :          * constantly keep track of the variety of data that buf1 and buf2
    2134             :          * currently contain.
    2135             :          *
    2136             :          * Comparisons may be interleaved with conversion calls.  Frequently,
    2137             :          * conversions and comparisons are batched into two distinct phases,
    2138             :          * but the correctness of caching cannot hinge upon this.  For
    2139             :          * comparison caching, buffer state is only trusted if cache_blob is
    2140             :          * found set to false, whereas strxfrm() caching only trusts the state
    2141             :          * when cache_blob is found set to true.
    2142             :          *
    2143             :          * Arbitrarily initialize cache_blob to true.
    2144             :          */
    2145       52030 :         sss->cache_blob = true;
    2146       52030 :         sss->collate_c = collate_c;
    2147       52030 :         sss->typid = typid;
    2148       52030 :         ssup->ssup_extra = sss;
    2149             : 
    2150             :         /*
    2151             :          * If possible, plan to use the abbreviated keys optimization.  The
    2152             :          * core code may switch back to authoritative comparator should
    2153             :          * abbreviation be aborted.
    2154             :          */
    2155       52030 :         if (abbreviate)
    2156             :         {
    2157        3280 :             sss->prop_card = 0.20;
    2158        3280 :             initHyperLogLog(&sss->abbr_card, 10);
    2159        3280 :             initHyperLogLog(&sss->full_card, 10);
    2160        3280 :             ssup->abbrev_full_comparator = ssup->comparator;
    2161        3280 :             ssup->comparator = ssup_datum_unsigned_cmp;
    2162        3280 :             ssup->abbrev_converter = varstr_abbrev_convert;
    2163        3280 :             ssup->abbrev_abort = varstr_abbrev_abort;
    2164             :         }
    2165             :     }
    2166      150664 : }
    2167             : 
    2168             : /*
    2169             :  * sortsupport comparison func (for C locale case)
    2170             :  */
    2171             : static int
    2172    81080548 : varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2173             : {
    2174    81080548 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2175    81080548 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2176             :     char       *a1p,
    2177             :                *a2p;
    2178             :     int         len1,
    2179             :                 len2,
    2180             :                 result;
    2181             : 
    2182    81080548 :     a1p = VARDATA_ANY(arg1);
    2183    81080548 :     a2p = VARDATA_ANY(arg2);
    2184             : 
    2185    81080548 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2186    81080548 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2187             : 
    2188    81080548 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2189    81080548 :     if ((result == 0) && (len1 != len2))
    2190     1850424 :         result = (len1 < len2) ? -1 : 1;
    2191             : 
    2192             :     /* We can't afford to leak memory here. */
    2193    81080548 :     if (PointerGetDatum(arg1) != x)
    2194           0 :         pfree(arg1);
    2195    81080548 :     if (PointerGetDatum(arg2) != y)
    2196           0 :         pfree(arg2);
    2197             : 
    2198    81080548 :     return result;
    2199             : }
    2200             : 
    2201             : /*
    2202             :  * sortsupport comparison func (for BpChar C locale case)
    2203             :  *
    2204             :  * BpChar outsources its sortsupport to this module.  Specialization for the
    2205             :  * varstr_sortsupport BpChar case, modeled on
    2206             :  * internal_bpchar_pattern_compare().
    2207             :  */
    2208             : static int
    2209       60014 : bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2210             : {
    2211       60014 :     BpChar     *arg1 = DatumGetBpCharPP(x);
    2212       60014 :     BpChar     *arg2 = DatumGetBpCharPP(y);
    2213             :     char       *a1p,
    2214             :                *a2p;
    2215             :     int         len1,
    2216             :                 len2,
    2217             :                 result;
    2218             : 
    2219       60014 :     a1p = VARDATA_ANY(arg1);
    2220       60014 :     a2p = VARDATA_ANY(arg2);
    2221             : 
    2222       60014 :     len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
    2223       60014 :     len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
    2224             : 
    2225       60014 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2226       60014 :     if ((result == 0) && (len1 != len2))
    2227           0 :         result = (len1 < len2) ? -1 : 1;
    2228             : 
    2229             :     /* We can't afford to leak memory here. */
    2230       60014 :     if (PointerGetDatum(arg1) != x)
    2231           0 :         pfree(arg1);
    2232       60014 :     if (PointerGetDatum(arg2) != y)
    2233           0 :         pfree(arg2);
    2234             : 
    2235       60014 :     return result;
    2236             : }
    2237             : 
    2238             : /*
    2239             :  * sortsupport comparison func (for NAME C locale case)
    2240             :  */
    2241             : static int
    2242   103647056 : namefastcmp_c(Datum x, Datum y, SortSupport ssup)
    2243             : {
    2244   103647056 :     Name        arg1 = DatumGetName(x);
    2245   103647056 :     Name        arg2 = DatumGetName(y);
    2246             : 
    2247   103647056 :     return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
    2248             : }
    2249             : 
    2250             : /*
    2251             :  * sortsupport comparison func (for locale case with all varlena types)
    2252             :  */
    2253             : static int
    2254    40193606 : varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2255             : {
    2256    40193606 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2257    40193606 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2258             :     char       *a1p,
    2259             :                *a2p;
    2260             :     int         len1,
    2261             :                 len2,
    2262             :                 result;
    2263             : 
    2264    40193606 :     a1p = VARDATA_ANY(arg1);
    2265    40193606 :     a2p = VARDATA_ANY(arg2);
    2266             : 
    2267    40193606 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2268    40193606 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2269             : 
    2270    40193606 :     result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
    2271             : 
    2272             :     /* We can't afford to leak memory here. */
    2273    40193606 :     if (PointerGetDatum(arg1) != x)
    2274           6 :         pfree(arg1);
    2275    40193606 :     if (PointerGetDatum(arg2) != y)
    2276           6 :         pfree(arg2);
    2277             : 
    2278    40193606 :     return result;
    2279             : }
    2280             : 
    2281             : /*
    2282             :  * sortsupport comparison func (for locale case with NAME type)
    2283             :  */
    2284             : static int
    2285           0 : namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2286             : {
    2287           0 :     Name        arg1 = DatumGetName(x);
    2288           0 :     Name        arg2 = DatumGetName(y);
    2289             : 
    2290           0 :     return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
    2291           0 :                                 NameStr(*arg2), strlen(NameStr(*arg2)),
    2292             :                                 ssup);
    2293             : }
    2294             : 
    2295             : /*
    2296             :  * sortsupport comparison func for locale cases
    2297             :  */
    2298             : static int
    2299    40193606 : varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
    2300             : {
    2301    40193606 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2302             :     int         result;
    2303             :     bool        arg1_match;
    2304             : 
    2305             :     /* Fast pre-check for equality, as discussed in varstr_cmp() */
    2306    40193606 :     if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
    2307             :     {
    2308             :         /*
    2309             :          * No change in buf1 or buf2 contents, so avoid changing last_len1 or
    2310             :          * last_len2.  Existing contents of buffers might still be used by
    2311             :          * next call.
    2312             :          *
    2313             :          * It's fine to allow the comparison of BpChar padding bytes here,
    2314             :          * even though that implies that the memcmp() will usually be
    2315             :          * performed for BpChar callers (though multibyte characters could
    2316             :          * still prevent that from occurring).  The memcmp() is still very
    2317             :          * cheap, and BpChar's funny semantics have us remove trailing spaces
    2318             :          * (not limited to padding), so we need make no distinction between
    2319             :          * padding space characters and "real" space characters.
    2320             :          */
    2321    15092018 :         return 0;
    2322             :     }
    2323             : 
    2324    25101588 :     if (sss->typid == BPCHAROID)
    2325             :     {
    2326             :         /* Get true number of bytes, ignoring trailing spaces */
    2327       40196 :         len1 = bpchartruelen(a1p, len1);
    2328       40196 :         len2 = bpchartruelen(a2p, len2);
    2329             :     }
    2330             : 
    2331    25101588 :     if (len1 >= sss->buflen1)
    2332             :     {
    2333           6 :         pfree(sss->buf1);
    2334           6 :         sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2335           6 :         sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
    2336             :     }
    2337    25101588 :     if (len2 >= sss->buflen2)
    2338             :     {
    2339           6 :         pfree(sss->buf2);
    2340           6 :         sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
    2341           6 :         sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
    2342             :     }
    2343             : 
    2344             :     /*
    2345             :      * We're likely to be asked to compare the same strings repeatedly, and
    2346             :      * memcmp() is so much cheaper than strcoll() that it pays to try to cache
    2347             :      * comparisons, even though in general there is no reason to think that
    2348             :      * that will work out (every string datum may be unique).  Caching does
    2349             :      * not slow things down measurably when it doesn't work out, and can speed
    2350             :      * things up by rather a lot when it does.  In part, this is because the
    2351             :      * memcmp() compares data from cachelines that are needed in L1 cache even
    2352             :      * when the last comparison's result cannot be reused.
    2353             :      */
    2354    25101588 :     arg1_match = true;
    2355    25101588 :     if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
    2356             :     {
    2357    22409654 :         arg1_match = false;
    2358    22409654 :         memcpy(sss->buf1, a1p, len1);
    2359    22409654 :         sss->buf1[len1] = '\0';
    2360    22409654 :         sss->last_len1 = len1;
    2361             :     }
    2362             : 
    2363             :     /*
    2364             :      * If we're comparing the same two strings as last time, we can return the
    2365             :      * same answer without calling strcoll() again.  This is more likely than
    2366             :      * it seems (at least with moderate to low cardinality sets), because
    2367             :      * quicksort compares the same pivot against many values.
    2368             :      */
    2369    25101588 :     if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
    2370             :     {
    2371     4091966 :         memcpy(sss->buf2, a2p, len2);
    2372     4091966 :         sss->buf2[len2] = '\0';
    2373     4091966 :         sss->last_len2 = len2;
    2374             :     }
    2375    21009622 :     else if (arg1_match && !sss->cache_blob)
    2376             :     {
    2377             :         /* Use result cached following last actual strcoll() call */
    2378     2304198 :         return sss->last_returned;
    2379             :     }
    2380             : 
    2381    22797390 :     if (sss->locale)
    2382             :     {
    2383           0 :         if (sss->locale->provider == COLLPROVIDER_ICU)
    2384             :         {
    2385             : #ifdef USE_ICU
    2386             : #ifdef HAVE_UCOL_STRCOLLUTF8
    2387             :             if (GetDatabaseEncoding() == PG_UTF8)
    2388             :             {
    2389             :                 UErrorCode  status;
    2390             : 
    2391             :                 status = U_ZERO_ERROR;
    2392             :                 result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
    2393             :                                           a1p, len1,
    2394             :                                           a2p, len2,
    2395             :                                           &status);
    2396             :                 if (U_FAILURE(status))
    2397             :                     ereport(ERROR,
    2398             :                             (errmsg("collation failed: %s", u_errorName(status))));
    2399             :             }
    2400             :             else
    2401             : #endif
    2402             :             {
    2403             :                 int32_t     ulen1,
    2404             :                             ulen2;
    2405             :                 UChar      *uchar1,
    2406             :                            *uchar2;
    2407             : 
    2408             :                 ulen1 = icu_to_uchar(&uchar1, a1p, len1);
    2409             :                 ulen2 = icu_to_uchar(&uchar2, a2p, len2);
    2410             : 
    2411             :                 result = ucol_strcoll(sss->locale->info.icu.ucol,
    2412             :                                       uchar1, ulen1,
    2413             :                                       uchar2, ulen2);
    2414             : 
    2415             :                 pfree(uchar1);
    2416             :                 pfree(uchar2);
    2417             :             }
    2418             : #else                           /* not USE_ICU */
    2419             :             /* shouldn't happen */
    2420           0 :             elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
    2421             : #endif                          /* not USE_ICU */
    2422             :         }
    2423             :         else
    2424             :         {
    2425             : #ifdef HAVE_LOCALE_T
    2426           0 :             result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
    2427             : #else
    2428             :             /* shouldn't happen */
    2429             :             elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
    2430             : #endif
    2431             :         }
    2432             :     }
    2433             :     else
    2434    22797390 :         result = strcoll(sss->buf1, sss->buf2);
    2435             : 
    2436             :     /* Break tie if necessary. */
    2437    22797390 :     if (result == 0 &&
    2438           0 :         (!sss->locale || sss->locale->deterministic))
    2439           0 :         result = strcmp(sss->buf1, sss->buf2);
    2440             : 
    2441             :     /* Cache result, perhaps saving an expensive strcoll() call next time */
    2442    22797390 :     sss->cache_blob = false;
    2443    22797390 :     sss->last_returned = result;
    2444    22797390 :     return result;
    2445             : }
    2446             : 
    2447             : /*
    2448             :  * Conversion routine for sortsupport.  Converts original to abbreviated key
    2449             :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
    2450             :  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
    2451             :  * stored in reverse order), and treat it as an unsigned integer.  When the "C"
    2452             :  * locale is used, or in case of bytea, just memcpy() from original instead.
    2453             :  */
    2454             : static Datum
    2455      149038 : varstr_abbrev_convert(Datum original, SortSupport ssup)
    2456             : {
    2457      149038 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2458      149038 :     VarString  *authoritative = DatumGetVarStringPP(original);
    2459      149038 :     char       *authoritative_data = VARDATA_ANY(authoritative);
    2460             : 
    2461             :     /* working state */
    2462             :     Datum       res;
    2463             :     char       *pres;
    2464             :     int         len;
    2465             :     uint32      hash;
    2466             : 
    2467      149038 :     pres = (char *) &res;
    2468             :     /* memset(), so any non-overwritten bytes are NUL */
    2469      149038 :     memset(pres, 0, sizeof(Datum));
    2470      149038 :     len = VARSIZE_ANY_EXHDR(authoritative);
    2471             : 
    2472             :     /* Get number of bytes, ignoring trailing spaces */
    2473      149038 :     if (sss->typid == BPCHAROID)
    2474           0 :         len = bpchartruelen(authoritative_data, len);
    2475             : 
    2476             :     /*
    2477             :      * If we're using the C collation, use memcpy(), rather than strxfrm(), to
    2478             :      * abbreviate keys.  The full comparator for the C locale is always
    2479             :      * memcmp().  It would be incorrect to allow bytea callers (callers that
    2480             :      * always force the C collation -- bytea isn't a collatable type, but this
    2481             :      * approach is convenient) to use strxfrm().  This is because bytea
    2482             :      * strings may contain NUL bytes.  Besides, this should be faster, too.
    2483             :      *
    2484             :      * More generally, it's okay that bytea callers can have NUL bytes in
    2485             :      * strings because abbreviated cmp need not make a distinction between
    2486             :      * terminating NUL bytes, and NUL bytes representing actual NULs in the
    2487             :      * authoritative representation.  Hopefully a comparison at or past one
    2488             :      * abbreviated key's terminating NUL byte will resolve the comparison
    2489             :      * without consulting the authoritative representation; specifically, some
    2490             :      * later non-NUL byte in the longer string can resolve the comparison
    2491             :      * against a subsequent terminating NUL in the shorter string.  There will
    2492             :      * usually be what is effectively a "length-wise" resolution there and
    2493             :      * then.
    2494             :      *
    2495             :      * If that doesn't work out -- if all bytes in the longer string
    2496             :      * positioned at or past the offset of the smaller string's (first)
    2497             :      * terminating NUL are actually representative of NUL bytes in the
    2498             :      * authoritative binary string (perhaps with some *terminating* NUL bytes
    2499             :      * towards the end of the longer string iff it happens to still be small)
    2500             :      * -- then an authoritative tie-breaker will happen, and do the right
    2501             :      * thing: explicitly consider string length.
    2502             :      */
    2503      149038 :     if (sss->collate_c)
    2504      149038 :         memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
    2505             :     else
    2506             :     {
    2507             :         Size        bsize;
    2508             : #ifdef USE_ICU
    2509             :         int32_t     ulen = -1;
    2510             :         UChar      *uchar = NULL;
    2511             : #endif
    2512             : 
    2513             :         /*
    2514             :          * We're not using the C collation, so fall back on strxfrm or ICU
    2515             :          * analogs.
    2516             :          */
    2517             : 
    2518             :         /* By convention, we use buffer 1 to store and NUL-terminate */
    2519           0 :         if (len >= sss->buflen1)
    2520             :         {
    2521           0 :             pfree(sss->buf1);
    2522           0 :             sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2523           0 :             sss->buf1 = palloc(sss->buflen1);
    2524             :         }
    2525             : 
    2526             :         /* Might be able to reuse strxfrm() blob from last call */
    2527           0 :         if (sss->last_len1 == len && sss->cache_blob &&
    2528           0 :             memcmp(sss->buf1, authoritative_data, len) == 0)
    2529             :         {
    2530           0 :             memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
    2531             :             /* No change affecting cardinality, so no hashing required */
    2532           0 :             goto done;
    2533             :         }
    2534             : 
    2535           0 :         memcpy(sss->buf1, authoritative_data, len);
    2536             : 
    2537             :         /*
    2538             :          * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
    2539             :          * necessary for ICU, but doesn't hurt.
    2540             :          */
    2541           0 :         sss->buf1[len] = '\0';
    2542           0 :         sss->last_len1 = len;
    2543             : 
    2544             : #ifdef USE_ICU
    2545             :         /* When using ICU and not UTF8, convert string to UChar. */
    2546             :         if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
    2547             :             GetDatabaseEncoding() != PG_UTF8)
    2548             :             ulen = icu_to_uchar(&uchar, sss->buf1, len);
    2549             : #endif
    2550             : 
    2551             :         /*
    2552             :          * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
    2553             :          * and try again.  Both of these functions have the result buffer
    2554             :          * content undefined if the result did not fit, so we need to retry
    2555             :          * until everything fits, even though we only need the first few bytes
    2556             :          * in the end.  When using ucol_nextSortKeyPart(), however, we only
    2557             :          * ask for as many bytes as we actually need.
    2558             :          */
    2559             :         for (;;)
    2560             :         {
    2561             : #ifdef USE_ICU
    2562             :             if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
    2563             :             {
    2564             :                 /*
    2565             :                  * When using UTF8, use the iteration interface so we only
    2566             :                  * need to produce as many bytes as we actually need.
    2567             :                  */
    2568             :                 if (GetDatabaseEncoding() == PG_UTF8)
    2569             :                 {
    2570             :                     UCharIterator iter;
    2571             :                     uint32_t    state[2];
    2572             :                     UErrorCode  status;
    2573             : 
    2574             :                     uiter_setUTF8(&iter, sss->buf1, len);
    2575             :                     state[0] = state[1] = 0;    /* won't need that again */
    2576             :                     status = U_ZERO_ERROR;
    2577             :                     bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
    2578             :                                                  &iter,
    2579             :                                                  state,
    2580             :                                                  (uint8_t *) sss->buf2,
    2581             :                                                  Min(sizeof(Datum), sss->buflen2),
    2582             :                                                  &status);
    2583             :                     if (U_FAILURE(status))
    2584             :                         ereport(ERROR,
    2585             :                                 (errmsg("sort key generation failed: %s",
    2586             :                                         u_errorName(status))));
    2587             :                 }
    2588             :                 else
    2589             :                     bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
    2590             :                                             uchar, ulen,
    2591             :                                             (uint8_t *) sss->buf2, sss->buflen2);
    2592             :             }
    2593             :             else
    2594             : #endif
    2595             : #ifdef HAVE_LOCALE_T
    2596           0 :             if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
    2597           0 :                 bsize = strxfrm_l(sss->buf2, sss->buf1,
    2598           0 :                                   sss->buflen2, sss->locale->info.lt);
    2599             :             else
    2600             : #endif
    2601           0 :                 bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
    2602             : 
    2603           0 :             sss->last_len2 = bsize;
    2604           0 :             if (bsize < sss->buflen2)
    2605           0 :                 break;
    2606             : 
    2607             :             /*
    2608             :              * Grow buffer and retry.
    2609             :              */
    2610           0 :             pfree(sss->buf2);
    2611           0 :             sss->buflen2 = Max(bsize + 1,
    2612             :                                Min(sss->buflen2 * 2, MaxAllocSize));
    2613           0 :             sss->buf2 = palloc(sss->buflen2);
    2614             :         }
    2615             : 
    2616             :         /*
    2617             :          * Every Datum byte is always compared.  This is safe because the
    2618             :          * strxfrm() blob is itself NUL terminated, leaving no danger of
    2619             :          * misinterpreting any NUL bytes not intended to be interpreted as
    2620             :          * logically representing termination.
    2621             :          *
    2622             :          * (Actually, even if there were NUL bytes in the blob it would be
    2623             :          * okay.  See remarks on bytea case above.)
    2624             :          */
    2625           0 :         memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
    2626             : 
    2627             : #ifdef USE_ICU
    2628             :         if (uchar)
    2629             :             pfree(uchar);
    2630             : #endif
    2631             :     }
    2632             : 
    2633             :     /*
    2634             :      * Maintain approximate cardinality of both abbreviated keys and original,
    2635             :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
    2636             :      * the worst case, where we do many string transformations for no saving
    2637             :      * in full strcoll()-based comparisons.  These statistics are used by
    2638             :      * varstr_abbrev_abort().
    2639             :      *
    2640             :      * First, Hash key proper, or a significant fraction of it.  Mix in length
    2641             :      * in order to compensate for cases where differences are past
    2642             :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
    2643             :      */
    2644      149038 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
    2645             :                                    Min(len, PG_CACHE_LINE_SIZE)));
    2646             : 
    2647      149038 :     if (len > PG_CACHE_LINE_SIZE)
    2648          10 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
    2649             : 
    2650      149038 :     addHyperLogLog(&sss->full_card, hash);
    2651             : 
    2652             :     /* Hash abbreviated key */
    2653             : #if SIZEOF_DATUM == 8
    2654             :     {
    2655             :         uint32      lohalf,
    2656             :                     hihalf;
    2657             : 
    2658      149038 :         lohalf = (uint32) res;
    2659      149038 :         hihalf = (uint32) (res >> 32);
    2660      149038 :         hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
    2661             :     }
    2662             : #else                           /* SIZEOF_DATUM != 8 */
    2663             :     hash = DatumGetUInt32(hash_uint32((uint32) res));
    2664             : #endif
    2665             : 
    2666      149038 :     addHyperLogLog(&sss->abbr_card, hash);
    2667             : 
    2668             :     /* Cache result, perhaps saving an expensive strxfrm() call next time */
    2669      149038 :     sss->cache_blob = true;
    2670      149038 : done:
    2671             : 
    2672             :     /*
    2673             :      * Byteswap on little-endian machines.
    2674             :      *
    2675             :      * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
    2676             :      * 3-way comparator) works correctly on all platforms.  If we didn't do
    2677             :      * this, the comparator would have to call memcmp() with a pair of
    2678             :      * pointers to the first byte of each abbreviated key, which is slower.
    2679             :      */
    2680      149038 :     res = DatumBigEndianToNative(res);
    2681             : 
    2682             :     /* Don't leak memory here */
    2683      149038 :     if (PointerGetDatum(authoritative) != original)
    2684           0 :         pfree(authoritative);
    2685             : 
    2686      149038 :     return res;
    2687             : }
    2688             : 
    2689             : /*
    2690             :  * Callback for estimating effectiveness of abbreviated key optimization, using
    2691             :  * heuristic rules.  Returns value indicating if the abbreviation optimization
    2692             :  * should be aborted, based on its projected effectiveness.
    2693             :  */
    2694             : static bool
    2695         304 : varstr_abbrev_abort(int memtupcount, SortSupport ssup)
    2696             : {
    2697         304 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2698             :     double      abbrev_distinct,
    2699             :                 key_distinct;
    2700             : 
    2701             :     Assert(ssup->abbreviate);
    2702             : 
    2703             :     /* Have a little patience */
    2704         304 :     if (memtupcount < 100)
    2705         186 :         return false;
    2706             : 
    2707         118 :     abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
    2708         118 :     key_distinct = estimateHyperLogLog(&sss->full_card);
    2709             : 
    2710             :     /*
    2711             :      * Clamp cardinality estimates to at least one distinct value.  While
    2712             :      * NULLs are generally disregarded, if only NULL values were seen so far,
    2713             :      * that might misrepresent costs if we failed to clamp.
    2714             :      */
    2715         118 :     if (abbrev_distinct <= 1.0)
    2716           0 :         abbrev_distinct = 1.0;
    2717             : 
    2718         118 :     if (key_distinct <= 1.0)
    2719           0 :         key_distinct = 1.0;
    2720             : 
    2721             :     /*
    2722             :      * In the worst case all abbreviated keys are identical, while at the same
    2723             :      * time there are differences within full key strings not captured in
    2724             :      * abbreviations.
    2725             :      */
    2726             : #ifdef TRACE_SORT
    2727         118 :     if (trace_sort)
    2728             :     {
    2729           0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
    2730             : 
    2731           0 :         elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
    2732             :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
    2733             :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
    2734             :              sss->prop_card);
    2735             :     }
    2736             : #endif
    2737             : 
    2738             :     /*
    2739             :      * If the number of distinct abbreviated keys approximately matches the
    2740             :      * number of distinct authoritative original keys, that's reason enough to
    2741             :      * proceed.  We can win even with a very low cardinality set if most
    2742             :      * tie-breakers only memcmp().  This is by far the most important
    2743             :      * consideration.
    2744             :      *
    2745             :      * While comparisons that are resolved at the abbreviated key level are
    2746             :      * considerably cheaper than tie-breakers resolved with memcmp(), both of
    2747             :      * those two outcomes are so much cheaper than a full strcoll() once
    2748             :      * sorting is underway that it doesn't seem worth it to weigh abbreviated
    2749             :      * cardinality against the overall size of the set in order to more
    2750             :      * accurately model costs.  Assume that an abbreviated comparison, and an
    2751             :      * abbreviated comparison with a cheap memcmp()-based authoritative
    2752             :      * resolution are equivalent.
    2753             :      */
    2754         118 :     if (abbrev_distinct > key_distinct * sss->prop_card)
    2755             :     {
    2756             :         /*
    2757             :          * When we have exceeded 10,000 tuples, decay required cardinality
    2758             :          * aggressively for next call.
    2759             :          *
    2760             :          * This is useful because the number of comparisons required on
    2761             :          * average increases at a linearithmic rate, and at roughly 10,000
    2762             :          * tuples that factor will start to dominate over the linear costs of
    2763             :          * string transformation (this is a conservative estimate).  The decay
    2764             :          * rate is chosen to be a little less aggressive than halving -- which
    2765             :          * (since we're called at points at which memtupcount has doubled)
    2766             :          * would never see the cost model actually abort past the first call
    2767             :          * following a decay.  This decay rate is mostly a precaution against
    2768             :          * a sudden, violent swing in how well abbreviated cardinality tracks
    2769             :          * full key cardinality.  The decay also serves to prevent a marginal
    2770             :          * case from being aborted too late, when too much has already been
    2771             :          * invested in string transformation.
    2772             :          *
    2773             :          * It's possible for sets of several million distinct strings with
    2774             :          * mere tens of thousands of distinct abbreviated keys to still
    2775             :          * benefit very significantly.  This will generally occur provided
    2776             :          * each abbreviated key is a proxy for a roughly uniform number of the
    2777             :          * set's full keys. If it isn't so, we hope to catch that early and
    2778             :          * abort.  If it isn't caught early, by the time the problem is
    2779             :          * apparent it's probably not worth aborting.
    2780             :          */
    2781         118 :         if (memtupcount > 10000)
    2782           0 :             sss->prop_card *= 0.65;
    2783             : 
    2784         118 :         return false;
    2785             :     }
    2786             : 
    2787             :     /*
    2788             :      * Abort abbreviation strategy.
    2789             :      *
    2790             :      * The worst case, where all abbreviated keys are identical while all
    2791             :      * original strings differ will typically only see a regression of about
    2792             :      * 10% in execution time for small to medium sized lists of strings.
    2793             :      * Whereas on modern CPUs where cache stalls are the dominant cost, we can
    2794             :      * often expect very large improvements, particularly with sets of strings
    2795             :      * of moderately high to high abbreviated cardinality.  There is little to
    2796             :      * lose but much to gain, which our strategy reflects.
    2797             :      */
    2798             : #ifdef TRACE_SORT
    2799           0 :     if (trace_sort)
    2800           0 :         elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
    2801             :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
    2802             :              memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
    2803             : #endif
    2804             : 
    2805           0 :     return true;
    2806             : }
    2807             : 
    2808             : /*
    2809             :  * Generic equalimage support function for character type's operator classes.
    2810             :  * Disables the use of deduplication with nondeterministic collations.
    2811             :  */
    2812             : Datum
    2813       24128 : btvarstrequalimage(PG_FUNCTION_ARGS)
    2814             : {
    2815             :     /* Oid      opcintype = PG_GETARG_OID(0); */
    2816       24128 :     Oid         collid = PG_GET_COLLATION();
    2817             : 
    2818       24128 :     check_collation_set(collid);
    2819             : 
    2820       24128 :     if (lc_collate_is_c(collid) ||
    2821           0 :         collid == DEFAULT_COLLATION_OID ||
    2822           0 :         get_collation_isdeterministic(collid))
    2823       24128 :         PG_RETURN_BOOL(true);
    2824             :     else
    2825           0 :         PG_RETURN_BOOL(false);
    2826             : }
    2827             : 
    2828             : Datum
    2829      184626 : text_larger(PG_FUNCTION_ARGS)
    2830             : {
    2831      184626 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2832      184626 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2833             :     text       *result;
    2834             : 
    2835      184626 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
    2836             : 
    2837      184626 :     PG_RETURN_TEXT_P(result);
    2838             : }
    2839             : 
    2840             : Datum
    2841       41142 : text_smaller(PG_FUNCTION_ARGS)
    2842             : {
    2843       41142 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2844       41142 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2845             :     text       *result;
    2846             : 
    2847       41142 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
    2848             : 
    2849       41142 :     PG_RETURN_TEXT_P(result);
    2850             : }
    2851             : 
    2852             : 
    2853             : /*
    2854             :  * Cross-type comparison functions for types text and name.
    2855             :  */
    2856             : 
    2857             : Datum
    2858      188950 : nameeqtext(PG_FUNCTION_ARGS)
    2859             : {
    2860      188950 :     Name        arg1 = PG_GETARG_NAME(0);
    2861      188950 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2862      188950 :     size_t      len1 = strlen(NameStr(*arg1));
    2863      188950 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2864      188950 :     Oid         collid = PG_GET_COLLATION();
    2865             :     bool        result;
    2866             : 
    2867      188950 :     check_collation_set(collid);
    2868             : 
    2869      188950 :     if (collid == C_COLLATION_OID)
    2870      293376 :         result = (len1 == len2 &&
    2871      129564 :                   memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2872             :     else
    2873       25138 :         result = (varstr_cmp(NameStr(*arg1), len1,
    2874       25138 :                              VARDATA_ANY(arg2), len2,
    2875             :                              collid) == 0);
    2876             : 
    2877      188950 :     PG_FREE_IF_COPY(arg2, 1);
    2878             : 
    2879      188950 :     PG_RETURN_BOOL(result);
    2880             : }
    2881             : 
    2882             : Datum
    2883         384 : texteqname(PG_FUNCTION_ARGS)
    2884             : {
    2885         384 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2886         384 :     Name        arg2 = PG_GETARG_NAME(1);
    2887         384 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2888         384 :     size_t      len2 = strlen(NameStr(*arg2));
    2889         384 :     Oid         collid = PG_GET_COLLATION();
    2890             :     bool        result;
    2891             : 
    2892         384 :     check_collation_set(collid);
    2893             : 
    2894         384 :     if (collid == C_COLLATION_OID)
    2895         564 :         result = (len1 == len2 &&
    2896         180 :                   memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2897             :     else
    2898           0 :         result = (varstr_cmp(VARDATA_ANY(arg1), len1,
    2899           0 :                              NameStr(*arg2), len2,
    2900             :                              collid) == 0);
    2901             : 
    2902         384 :     PG_FREE_IF_COPY(arg1, 0);
    2903             : 
    2904         384 :     PG_RETURN_BOOL(result);
    2905             : }
    2906             : 
    2907             : Datum
    2908          18 : namenetext(PG_FUNCTION_ARGS)
    2909             : {
    2910          18 :     Name        arg1 = PG_GETARG_NAME(0);
    2911          18 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2912          18 :     size_t      len1 = strlen(NameStr(*arg1));
    2913          18 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2914          18 :     Oid         collid = PG_GET_COLLATION();
    2915             :     bool        result;
    2916             : 
    2917          18 :     check_collation_set(collid);
    2918             : 
    2919          18 :     if (collid == C_COLLATION_OID)
    2920          18 :         result = !(len1 == len2 &&
    2921           0 :                    memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2922             :     else
    2923           0 :         result = !(varstr_cmp(NameStr(*arg1), len1,
    2924           0 :                               VARDATA_ANY(arg2), len2,
    2925             :                               collid) == 0);
    2926             : 
    2927          18 :     PG_FREE_IF_COPY(arg2, 1);
    2928             : 
    2929          18 :     PG_RETURN_BOOL(result);
    2930             : }
    2931             : 
    2932             : Datum
    2933           0 : textnename(PG_FUNCTION_ARGS)
    2934             : {
    2935           0 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2936           0 :     Name        arg2 = PG_GETARG_NAME(1);
    2937           0 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2938           0 :     size_t      len2 = strlen(NameStr(*arg2));
    2939           0 :     Oid         collid = PG_GET_COLLATION();
    2940             :     bool        result;
    2941             : 
    2942           0 :     check_collation_set(collid);
    2943             : 
    2944           0 :     if (collid == C_COLLATION_OID)
    2945           0 :         result = !(len1 == len2 &&
    2946           0 :                    memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2947             :     else
    2948           0 :         result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
    2949           0 :                               NameStr(*arg2), len2,
    2950             :                               collid) == 0);
    2951             : 
    2952           0 :     PG_FREE_IF_COPY(arg1, 0);
    2953             : 
    2954           0 :     PG_RETURN_BOOL(result);
    2955             : }
    2956             : 
    2957             : Datum
    2958      139650 : btnametextcmp(PG_FUNCTION_ARGS)
    2959             : {
    2960      139650 :     Name        arg1 = PG_GETARG_NAME(0);
    2961      139650 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2962             :     int32       result;
    2963             : 
    2964      279300 :     result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
    2965      279300 :                         VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
    2966             :                         PG_GET_COLLATION());
    2967             : 
    2968      139650 :     PG_FREE_IF_COPY(arg2, 1);
    2969             : 
    2970      139650 :     PG_RETURN_INT32(result);
    2971             : }
    2972             : 
    2973             : Datum
    2974           0 : bttextnamecmp(PG_FUNCTION_ARGS)
    2975             : {
    2976           0 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2977           0 :     Name        arg2 = PG_GETARG_NAME(1);
    2978             :     int32       result;
    2979             : 
    2980           0 :     result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
    2981           0 :                         NameStr(*arg2), strlen(NameStr(*arg2)),
    2982             :                         PG_GET_COLLATION());
    2983             : 
    2984           0 :     PG_FREE_IF_COPY(arg1, 0);
    2985             : 
    2986           0 :     PG_RETURN_INT32(result);
    2987             : }
    2988             : 
    2989             : #define CmpCall(cmpfunc) \
    2990             :     DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
    2991             :                                           PG_GET_COLLATION(), \
    2992             :                                           PG_GETARG_DATUM(0), \
    2993             :                                           PG_GETARG_DATUM(1)))
    2994             : 
    2995             : Datum
    2996       44776 : namelttext(PG_FUNCTION_ARGS)
    2997             : {
    2998       44776 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
    2999             : }
    3000             : 
    3001             : Datum
    3002           0 : nameletext(PG_FUNCTION_ARGS)
    3003             : {
    3004           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
    3005             : }
    3006             : 
    3007             : Datum
    3008           0 : namegttext(PG_FUNCTION_ARGS)
    3009             : {
    3010           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
    3011             : }
    3012             : 
    3013             : Datum
    3014       43292 : namegetext(PG_FUNCTION_ARGS)
    3015             : {
    3016       43292 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
    3017             : }
    3018             : 
    3019             : Datum
    3020           0 : textltname(PG_FUNCTION_ARGS)
    3021             : {
    3022           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
    3023             : }
    3024             : 
    3025             : Datum
    3026           0 : textlename(PG_FUNCTION_ARGS)
    3027             : {
    3028           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
    3029             : }
    3030             : 
    3031             : Datum
    3032           0 : textgtname(PG_FUNCTION_ARGS)
    3033             : {
    3034           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
    3035             : }
    3036             : 
    3037             : Datum
    3038           0 : textgename(PG_FUNCTION_ARGS)
    3039             : {
    3040           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
    3041             : }
    3042             : 
    3043             : #undef CmpCall
    3044             : 
    3045             : 
    3046             : /*
    3047             :  * The following operators support character-by-character comparison
    3048             :  * of text datums, to allow building indexes suitable for LIKE clauses.
    3049             :  * Note that the regular texteq/textne comparison operators, and regular
    3050             :  * support functions 1 and 2 with "C" collation are assumed to be
    3051             :  * compatible with these!
    3052             :  */
    3053             : 
    3054             : static int
    3055      152080 : internal_text_pattern_compare(text *arg1, text *arg2)
    3056             : {
    3057             :     int         result;
    3058             :     int         len1,
    3059             :                 len2;
    3060             : 
    3061      152080 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3062      152080 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3063             : 
    3064      152080 :     result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3065      152080 :     if (result != 0)
    3066      152026 :         return result;
    3067          54 :     else if (len1 < len2)
    3068           0 :         return -1;
    3069          54 :     else if (len1 > len2)
    3070          18 :         return 1;
    3071             :     else
    3072          36 :         return 0;
    3073             : }
    3074             : 
    3075             : 
    3076             : Datum
    3077       39538 : text_pattern_lt(PG_FUNCTION_ARGS)
    3078             : {
    3079       39538 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3080       39538 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3081             :     int         result;
    3082             : 
    3083       39538 :     result = internal_text_pattern_compare(arg1, arg2);
    3084             : 
    3085       39538 :     PG_FREE_IF_COPY(arg1, 0);
    3086       39538 :     PG_FREE_IF_COPY(arg2, 1);
    3087             : 
    3088       39538 :     PG_RETURN_BOOL(result < 0);
    3089             : }
    3090             : 
    3091             : 
    3092             : Datum
    3093       37510 : text_pattern_le(PG_FUNCTION_ARGS)
    3094             : {
    3095       37510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3096       37510 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3097             :     int         result;
    3098             : 
    3099       37510 :     result = internal_text_pattern_compare(arg1, arg2);
    3100             : 
    3101       37510 :     PG_FREE_IF_COPY(arg1, 0);
    3102       37510 :     PG_FREE_IF_COPY(arg2, 1);
    3103             : 
    3104       37510 :     PG_RETURN_BOOL(result <= 0);
    3105             : }
    3106             : 
    3107             : 
    3108             : Datum
    3109       37510 : text_pattern_ge(PG_FUNCTION_ARGS)
    3110             : {
    3111       37510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3112       37510 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3113             :     int         result;
    3114             : 
    3115       37510 :     result = internal_text_pattern_compare(arg1, arg2);
    3116             : 
    3117       37510 :     PG_FREE_IF_COPY(arg1, 0);
    3118       37510 :     PG_FREE_IF_COPY(arg2, 1);
    3119             : 
    3120       37510 :     PG_RETURN_BOOL(result >= 0);
    3121             : }
    3122             : 
    3123             : 
    3124             : Datum
    3125       37510 : text_pattern_gt(PG_FUNCTION_ARGS)
    3126             : {
    3127       37510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3128       37510 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3129             :     int         result;
    3130             : 
    3131       37510 :     result = internal_text_pattern_compare(arg1, arg2);
    3132             : 
    3133       37510 :     PG_FREE_IF_COPY(arg1, 0);
    3134       37510 :     PG_FREE_IF_COPY(arg2, 1);
    3135             : 
    3136       37510 :     PG_RETURN_BOOL(result > 0);
    3137             : }
    3138             : 
    3139             : 
    3140             : Datum
    3141          12 : bttext_pattern_cmp(PG_FUNCTION_ARGS)
    3142             : {
    3143          12 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    3144          12 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    3145             :     int         result;
    3146             : 
    3147          12 :     result = internal_text_pattern_compare(arg1, arg2);
    3148             : 
    3149          12 :     PG_FREE_IF_COPY(arg1, 0);
    3150          12 :     PG_FREE_IF_COPY(arg2, 1);
    3151             : 
    3152          12 :     PG_RETURN_INT32(result);
    3153             : }
    3154             : 
    3155             : 
    3156             : Datum
    3157         116 : bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
    3158             : {
    3159         116 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    3160             :     MemoryContext oldcontext;
    3161             : 
    3162         116 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    3163             : 
    3164             :     /* Use generic string SortSupport, forcing "C" collation */
    3165         116 :     varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
    3166             : 
    3167         116 :     MemoryContextSwitchTo(oldcontext);
    3168             : 
    3169         116 :     PG_RETURN_VOID();
    3170             : }
    3171             : 
    3172             : 
    3173             : /*-------------------------------------------------------------
    3174             :  * byteaoctetlen
    3175             :  *
    3176             :  * get the number of bytes contained in an instance of type 'bytea'
    3177             :  *-------------------------------------------------------------
    3178             :  */
    3179             : Datum
    3180         302 : byteaoctetlen(PG_FUNCTION_ARGS)
    3181             : {
    3182         302 :     Datum       str = PG_GETARG_DATUM(0);
    3183             : 
    3184             :     /* We need not detoast the input at all */
    3185         302 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
    3186             : }
    3187             : 
    3188             : /*
    3189             :  * byteacat -
    3190             :  *    takes two bytea* and returns a bytea* that is the concatenation of
    3191             :  *    the two.
    3192             :  *
    3193             :  * Cloned from textcat and modified as required.
    3194             :  */
    3195             : Datum
    3196        1520 : byteacat(PG_FUNCTION_ARGS)
    3197             : {
    3198        1520 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3199        1520 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3200             : 
    3201        1520 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
    3202             : }
    3203             : 
    3204             : /*
    3205             :  * bytea_catenate
    3206             :  *  Guts of byteacat(), broken out so it can be used by other functions
    3207             :  *
    3208             :  * Arguments can be in short-header form, but not compressed or out-of-line
    3209             :  */
    3210             : static bytea *
    3211        1556 : bytea_catenate(bytea *t1, bytea *t2)
    3212             : {
    3213             :     bytea      *result;
    3214             :     int         len1,
    3215             :                 len2,
    3216             :                 len;
    3217             :     char       *ptr;
    3218             : 
    3219        1556 :     len1 = VARSIZE_ANY_EXHDR(t1);
    3220        1556 :     len2 = VARSIZE_ANY_EXHDR(t2);
    3221             : 
    3222             :     /* paranoia ... probably should throw error instead? */
    3223        1556 :     if (len1 < 0)
    3224           0 :         len1 = 0;
    3225        1556 :     if (len2 < 0)
    3226           0 :         len2 = 0;
    3227             : 
    3228        1556 :     len = len1 + len2 + VARHDRSZ;
    3229        1556 :     result = (bytea *) palloc(len);
    3230             : 
    3231             :     /* Set size of result string... */
    3232        1556 :     SET_VARSIZE(result, len);
    3233             : 
    3234             :     /* Fill data field of result string... */
    3235        1556 :     ptr = VARDATA(result);
    3236        1556 :     if (len1 > 0)
    3237        1556 :         memcpy(ptr, VARDATA_ANY(t1), len1);
    3238        1556 :     if (len2 > 0)
    3239        1538 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
    3240             : 
    3241        1556 :     return result;
    3242             : }
    3243             : 
    3244             : #define PG_STR_GET_BYTEA(str_) \
    3245             :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
    3246             : 
    3247             : /*
    3248             :  * bytea_substr()
    3249             :  * Return a substring starting at the specified position.
    3250             :  * Cloned from text_substr and modified as required.
    3251             :  *
    3252             :  * Input:
    3253             :  *  - string
    3254             :  *  - starting position (is one-based)
    3255             :  *  - string length (optional)
    3256             :  *
    3257             :  * If the starting position is zero or less, then return from the start of the string
    3258             :  * adjusting the length to be consistent with the "negative start" per SQL.
    3259             :  * If the length is less than zero, an ERROR is thrown. If no third argument
    3260             :  * (length) is provided, the length to the end of the string is assumed.
    3261             :  */
    3262             : Datum
    3263          82 : bytea_substr(PG_FUNCTION_ARGS)
    3264             : {
    3265          82 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    3266             :                                       PG_GETARG_INT32(1),
    3267             :                                       PG_GETARG_INT32(2),
    3268             :                                       false));
    3269             : }
    3270             : 
    3271             : /*
    3272             :  * bytea_substr_no_len -
    3273             :  *    Wrapper to avoid opr_sanity failure due to
    3274             :  *    one function accepting a different number of args.
    3275             :  */
    3276             : Datum
    3277        3900 : bytea_substr_no_len(PG_FUNCTION_ARGS)
    3278             : {
    3279        3900 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    3280             :                                       PG_GETARG_INT32(1),
    3281             :                                       -1,
    3282             :                                       true));
    3283             : }
    3284             : 
    3285             : static bytea *
    3286        4018 : bytea_substring(Datum str,
    3287             :                 int S,
    3288             :                 int L,
    3289             :                 bool length_not_specified)
    3290             : {
    3291             :     int32       S1;             /* adjusted start position */
    3292             :     int32       L1;             /* adjusted substring length */
    3293             :     int32       E;              /* end position */
    3294             : 
    3295             :     /*
    3296             :      * The logic here should generally match text_substring().
    3297             :      */
    3298        4018 :     S1 = Max(S, 1);
    3299             : 
    3300        4018 :     if (length_not_specified)
    3301             :     {
    3302             :         /*
    3303             :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
    3304             :          * end of the string if we pass it a negative value for length.
    3305             :          */
    3306        3918 :         L1 = -1;
    3307             :     }
    3308         100 :     else if (L < 0)
    3309             :     {
    3310             :         /* SQL99 says to throw an error for E < S, i.e., negative length */
    3311          12 :         ereport(ERROR,
    3312             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    3313             :                  errmsg("negative substring length not allowed")));
    3314             :         L1 = -1;                /* silence stupider compilers */
    3315             :     }
    3316          88 :     else if (pg_add_s32_overflow(S, L, &E))
    3317             :     {
    3318             :         /*
    3319             :          * L could be large enough for S + L to overflow, in which case the
    3320             :          * substring must run to end of string.
    3321             :          */
    3322           6 :         L1 = -1;
    3323             :     }
    3324             :     else
    3325             :     {
    3326             :         /*
    3327             :          * A zero or negative value for the end position can happen if the
    3328             :          * start was negative or one. SQL99 says to return a zero-length
    3329             :          * string.
    3330             :          */
    3331          82 :         if (E < 1)
    3332           0 :             return PG_STR_GET_BYTEA("");
    3333             : 
    3334          82 :         L1 = E - S1;
    3335             :     }
    3336             : 
    3337             :     /*
    3338             :      * If the start position is past the end of the string, SQL99 says to
    3339             :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
    3340             :      * us.  We need only convert S1 to zero-based starting position.
    3341             :      */
    3342        4006 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
    3343             : }
    3344             : 
    3345             : /*
    3346             :  * byteaoverlay
    3347             :  *  Replace specified substring of first string with second
    3348             :  *
    3349             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    3350             :  * This code is a direct implementation of what the standard says.
    3351             :  */
    3352             : Datum
    3353           6 : byteaoverlay(PG_FUNCTION_ARGS)
    3354             : {
    3355           6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3356           6 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3357           6 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3358           6 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    3359             : 
    3360           6 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3361             : }
    3362             : 
    3363             : Datum
    3364          12 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
    3365             : {
    3366          12 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3367          12 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3368          12 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3369             :     int         sl;
    3370             : 
    3371          12 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
    3372          12 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3373             : }
    3374             : 
    3375             : static bytea *
    3376          18 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
    3377             : {
    3378             :     bytea      *result;
    3379             :     bytea      *s1;
    3380             :     bytea      *s2;
    3381             :     int         sp_pl_sl;
    3382             : 
    3383             :     /*
    3384             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    3385             :      * "substring length" error because that's what should be expected
    3386             :      * according to the spec's definition of OVERLAY().
    3387             :      */
    3388          18 :     if (sp <= 0)
    3389           0 :         ereport(ERROR,
    3390             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    3391             :                  errmsg("negative substring length not allowed")));
    3392          18 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    3393           0 :         ereport(ERROR,
    3394             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    3395             :                  errmsg("integer out of range")));
    3396             : 
    3397          18 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
    3398          18 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    3399          18 :     result = bytea_catenate(s1, t2);
    3400          18 :     result = bytea_catenate(result, s2);
    3401             : 
    3402          18 :     return result;
    3403             : }
    3404             : 
    3405             : /*
    3406             :  * bit_count
    3407             :  */
    3408             : Datum
    3409           6 : bytea_bit_count(PG_FUNCTION_ARGS)
    3410             : {
    3411           6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3412             : 
    3413           6 :     PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
    3414             : }
    3415             : 
    3416             : /*
    3417             :  * byteapos -
    3418             :  *    Return the position of the specified substring.
    3419             :  *    Implements the SQL POSITION() function.
    3420             :  * Cloned from textpos and modified as required.
    3421             :  */
    3422             : Datum
    3423           0 : byteapos(PG_FUNCTION_ARGS)
    3424             : {
    3425           0 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3426           0 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3427             :     int         pos;
    3428             :     int         px,
    3429             :                 p;
    3430             :     int         len1,
    3431             :                 len2;
    3432             :     char       *p1,
    3433             :                *p2;
    3434             : 
    3435           0 :     len1 = VARSIZE_ANY_EXHDR(t1);
    3436           0 :     len2 = VARSIZE_ANY_EXHDR(t2);
    3437             : 
    3438           0 :     if (len2 <= 0)
    3439           0 :         PG_RETURN_INT32(1);     /* result for empty pattern */
    3440             : 
    3441           0 :     p1 = VARDATA_ANY(t1);
    3442           0 :     p2 = VARDATA_ANY(t2);
    3443             : 
    3444           0 :     pos = 0;
    3445           0 :     px = (len1 - len2);
    3446           0 :     for (p = 0; p <= px; p++)
    3447             :     {
    3448           0 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
    3449             :         {
    3450           0 :             pos = p + 1;
    3451           0 :             break;
    3452             :         };
    3453           0 :         p1++;
    3454             :     };
    3455             : 
    3456           0 :     PG_RETURN_INT32(pos);
    3457             : }
    3458             : 
    3459             : /*-------------------------------------------------------------
    3460             :  * byteaGetByte
    3461             :  *
    3462             :  * this routine treats "bytea" as an array of bytes.
    3463             :  * It returns the Nth byte (a number between 0 and 255).
    3464             :  *-------------------------------------------------------------
    3465             :  */
    3466             : Datum
    3467          34 : byteaGetByte(PG_FUNCTION_ARGS)
    3468             : {
    3469          34 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3470          34 :     int32       n = PG_GETARG_INT32(1);
    3471             :     int         len;
    3472             :     int         byte;
    3473             : 
    3474          34 :     len = VARSIZE_ANY_EXHDR(v);
    3475             : 
    3476          34 :     if (n < 0 || n >= len)
    3477           6 :         ereport(ERROR,
    3478             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3479             :                  errmsg("index %d out of valid range, 0..%d",
    3480             :                         n, len - 1)));
    3481             : 
    3482          28 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
    3483             : 
    3484          28 :     PG_RETURN_INT32(byte);
    3485             : }
    3486             : 
    3487             : /*-------------------------------------------------------------
    3488             :  * byteaGetBit
    3489             :  *
    3490             :  * This routine treats a "bytea" type like an array of bits.
    3491             :  * It returns the value of the Nth bit (0 or 1).
    3492             :  *
    3493             :  *-------------------------------------------------------------
    3494             :  */
    3495             : Datum
    3496          12 : byteaGetBit(PG_FUNCTION_ARGS)
    3497             : {
    3498          12 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3499          12 :     int64       n = PG_GETARG_INT64(1);
    3500             :     int         byteNo,
    3501             :                 bitNo;
    3502             :     int         len;
    3503             :     int         byte;
    3504             : 
    3505          12 :     len = VARSIZE_ANY_EXHDR(v);
    3506             : 
    3507          12 :     if (n < 0 || n >= (int64) len * 8)
    3508           6 :         ereport(ERROR,
    3509             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3510             :                  errmsg("index %lld out of valid range, 0..%lld",
    3511             :                         (long long) n, (long long) len * 8 - 1)));
    3512             : 
    3513             :     /* n/8 is now known < len, so safe to cast to int */
    3514           6 :     byteNo = (int) (n / 8);
    3515           6 :     bitNo = (int) (n % 8);
    3516             : 
    3517           6 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
    3518             : 
    3519           6 :     if (byte & (1 << bitNo))
    3520           6 :         PG_RETURN_INT32(1);
    3521             :     else
    3522           0 :         PG_RETURN_INT32(0);
    3523             : }
    3524             : 
    3525             : /*-------------------------------------------------------------
    3526             :  * byteaSetByte
    3527             :  *
    3528             :  * Given an instance of type 'bytea' creates a new one with
    3529             :  * the Nth byte set to the given value.
    3530             :  *
    3531             :  *-------------------------------------------------------------
    3532             :  */
    3533             : Datum
    3534          12 : byteaSetByte(PG_FUNCTION_ARGS)
    3535             : {
    3536          12 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3537          12 :     int32       n = PG_GETARG_INT32(1);
    3538          12 :     int32       newByte = PG_GETARG_INT32(2);
    3539             :     int         len;
    3540             : 
    3541          12 :     len = VARSIZE(res) - VARHDRSZ;
    3542             : 
    3543          12 :     if (n < 0 || n >= len)
    3544           6 :         ereport(ERROR,
    3545             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3546             :                  errmsg("index %d out of valid range, 0..%d",
    3547             :                         n, len - 1)));
    3548             : 
    3549             :     /*
    3550             :      * Now set the byte.
    3551             :      */
    3552           6 :     ((unsigned char *) VARDATA(res))[n] = newByte;
    3553             : 
    3554           6 :     PG_RETURN_BYTEA_P(res);
    3555             : }
    3556             : 
    3557             : /*-------------------------------------------------------------
    3558             :  * byteaSetBit
    3559             :  *
    3560             :  * Given an instance of type 'bytea' creates a new one with
    3561             :  * the Nth bit set to the given value.
    3562             :  *
    3563             :  *-------------------------------------------------------------
    3564             :  */
    3565             : Datum
    3566          12 : byteaSetBit(PG_FUNCTION_ARGS)
    3567             : {
    3568          12 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3569          12 :     int64       n = PG_GETARG_INT64(1);
    3570          12 :     int32       newBit = PG_GETARG_INT32(2);
    3571             :     int         len;
    3572             :     int         oldByte,
    3573             :                 newByte;
    3574             :     int         byteNo,
    3575             :                 bitNo;
    3576             : 
    3577          12 :     len = VARSIZE(res) - VARHDRSZ;
    3578             : 
    3579          12 :     if (n < 0 || n >= (int64) len * 8)
    3580           6 :         ereport(ERROR,
    3581             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3582             :                  errmsg("index %lld out of valid range, 0..%lld",
    3583             :                         (long long) n, (long long) len * 8 - 1)));
    3584             : 
    3585             :     /* n/8 is now known < len, so safe to cast to int */
    3586           6 :     byteNo = (int) (n / 8);
    3587           6 :     bitNo = (int) (n % 8);
    3588             : 
    3589             :     /*
    3590             :      * sanity check!
    3591             :      */
    3592           6 :     if (newBit != 0 && newBit != 1)
    3593           0 :         ereport(ERROR,
    3594             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    3595             :                  errmsg("new bit must be 0 or 1")));
    3596             : 
    3597             :     /*
    3598             :      * Update the byte.
    3599             :      */
    3600           6 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
    3601             : 
    3602           6 :     if (newBit == 0)
    3603           6 :         newByte = oldByte & (~(1 << bitNo));
    3604             :     else
    3605           0 :         newByte = oldByte | (1 << bitNo);
    3606             : 
    3607           6 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
    3608             : 
    3609           6 :     PG_RETURN_BYTEA_P(res);
    3610             : }
    3611             : 
    3612             : 
    3613             : /* text_name()
    3614             :  * Converts a text type to a Name type.
    3615             :  */
    3616             : Datum
    3617       33972 : text_name(PG_FUNCTION_ARGS)
    3618             : {
    3619       33972 :     text       *s = PG_GETARG_TEXT_PP(0);
    3620             :     Name        result;
    3621             :     int         len;
    3622             : 
    3623       33972 :     len = VARSIZE_ANY_EXHDR(s);
    3624             : 
    3625             :     /* Truncate oversize input */
    3626       33972 :     if (len >= NAMEDATALEN)
    3627           6 :         len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
    3628             : 
    3629             :     /* We use palloc0 here to ensure result is zero-padded */
    3630       33972 :     result = (Name) palloc0(NAMEDATALEN);
    3631       33972 :     memcpy(NameStr(*result), VARDATA_ANY(s), len);
    3632             : 
    3633       33972 :     PG_RETURN_NAME(result);
    3634             : }
    3635             : 
    3636             : /* name_text()
    3637             :  * Converts a Name type to a text type.
    3638             :  */
    3639             : Datum
    3640      977918 : name_text(PG_FUNCTION_ARGS)
    3641             : {
    3642      977918 :     Name        s = PG_GETARG_NAME(0);
    3643             : 
    3644      977918 :     PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
    3645             : }
    3646             : 
    3647             : 
    3648             : /*
    3649             :  * textToQualifiedNameList - convert a text object to list of names
    3650             :  *
    3651             :  * This implements the input parsing needed by nextval() and other
    3652             :  * functions that take a text parameter representing a qualified name.
    3653             :  * We split the name at dots, downcase if not double-quoted, and
    3654             :  * truncate names if they're too long.
    3655             :  */
    3656             : List *
    3657        1334 : textToQualifiedNameList(text *textval)
    3658             : {
    3659             :     char       *rawname;
    3660        1334 :     List       *result = NIL;
    3661             :     List       *namelist;
    3662             :     ListCell   *l;
    3663             : 
    3664             :     /* Convert to C string (handles possible detoasting). */
    3665             :     /* Note we rely on being able to modify rawname below. */
    3666        1334 :     rawname = text_to_cstring(textval);
    3667             : 
    3668        1334 :     if (!SplitIdentifierString(rawname, '.', &namelist))
    3669           0 :         ereport(ERROR,
    3670             :                 (errcode(ERRCODE_INVALID_NAME),
    3671             :                  errmsg("invalid name syntax")));
    3672             : 
    3673        1334 :     if (namelist == NIL)
    3674           0 :         ereport(ERROR,
    3675             :                 (errcode(ERRCODE_INVALID_NAME),
    3676             :                  errmsg("invalid name syntax")));
    3677             : 
    3678        2778 :     foreach(l, namelist)
    3679             :     {
    3680        1444 :         char       *curname = (char *) lfirst(l);
    3681             : 
    3682        1444 :         result = lappend(result, makeString(pstrdup(curname)));
    3683             :     }
    3684             : 
    3685        1334 :     pfree(rawname);
    3686        1334 :     list_free(namelist);
    3687             : 
    3688        1334 :     return result;
    3689             : }
    3690             : 
    3691             : /*
    3692             :  * SplitIdentifierString --- parse a string containing identifiers
    3693             :  *
    3694             :  * This is the guts of textToQualifiedNameList, and is exported for use in
    3695             :  * other situations such as parsing GUC variables.  In the GUC case, it's
    3696             :  * important to avoid memory leaks, so the API is designed to minimize the
    3697             :  * amount of stuff that needs to be allocated and freed.
    3698             :  *
    3699             :  * Inputs:
    3700             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3701             :  *             been modified to contain the separated identifiers.
    3702             :  *  separator: the separator punctuation expected between identifiers
    3703             :  *             (typically '.' or ',').  Whitespace may also appear around
    3704             :  *             identifiers.
    3705             :  * Outputs:
    3706             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3707             :  *            rawstring.  Caller should list_free() this even on error return.
    3708             :  *
    3709             :  * Returns true if okay, false if there is a syntax error in the string.
    3710             :  *
    3711             :  * Note that an empty string is considered okay here, though not in
    3712             :  * textToQualifiedNameList.
    3713             :  */
    3714             : bool
    3715      121256 : SplitIdentifierString(char *rawstring, char separator,
    3716             :                       List **namelist)
    3717             : {
    3718      121256 :     char       *nextp = rawstring;
    3719      121256 :     bool        done = false;
    3720             : 
    3721      121256 :     *namelist = NIL;
    3722             : 
    3723      121256 :     while (scanner_isspace(*nextp))
    3724           0 :         nextp++;                /* skip leading whitespace */
    3725             : 
    3726      121256 :     if (*nextp == '\0')
    3727       14042 :         return true;            /* allow empty string */
    3728             : 
    3729             :     /* At the top of the loop, we are at start of a new identifier. */
    3730             :     do
    3731             :     {
    3732             :         char       *curname;
    3733             :         char       *endp;
    3734             : 
    3735      173396 :         if (*nextp == '"')
    3736             :         {
    3737             :             /* Quoted name --- collapse quote-quote pairs, no downcasing */
    3738       25680 :             curname = nextp + 1;
    3739             :             for (;;)
    3740             :             {
    3741       25684 :                 endp = strchr(nextp + 1, '"');
    3742       25682 :                 if (endp == NULL)
    3743           0 :                     return false;   /* mismatched quotes */
    3744       25682 :                 if (endp[1] != '"')
    3745       25680 :                     break;      /* found end of quoted name */
    3746             :                 /* Collapse adjacent quotes into one quote, and look again */
    3747           2 :                 memmove(endp, endp + 1, strlen(endp));
    3748           2 :                 nextp = endp;
    3749             :             }
    3750             :             /* endp now points at the terminating quote */
    3751       25680 :             nextp = endp + 1;
    3752             :         }
    3753             :         else
    3754             :         {
    3755             :             /* Unquoted name --- extends to separator or whitespace */
    3756             :             char       *downname;
    3757             :             int         len;
    3758             : 
    3759      147716 :             curname = nextp;
    3760     1277074 :             while (*nextp && *nextp != separator &&
    3761     1129360 :                    !scanner_isspace(*nextp))
    3762     1129358 :                 nextp++;
    3763      147716 :             endp = nextp;
    3764      147716 :             if (curname == nextp)
    3765           0 :                 return false;   /* empty unquoted name not allowed */
    3766             : 
    3767             :             /*
    3768             :              * Downcase the identifier, using same code as main lexer does.
    3769             :              *
    3770             :              * XXX because we want to overwrite the input in-place, we cannot
    3771             :              * support a downcasing transformation that increases the string
    3772             :              * length.  This is not a problem given the current implementation
    3773             :              * of downcase_truncate_identifier, but we'll probably have to do
    3774             :              * something about this someday.
    3775             :              */
    3776      147716 :             len = endp - curname;
    3777      147716 :             downname = downcase_truncate_identifier(curname, len, false);
    3778             :             Assert(strlen(downname) <= len);
    3779      147716 :             strncpy(curname, downname, len);    /* strncpy is required here */
    3780      147716 :             pfree(downname);
    3781             :         }
    3782             : 
    3783      173398 :         while (scanner_isspace(*nextp))
    3784           2 :             nextp++;            /* skip trailing whitespace */
    3785             : 
    3786      173396 :         if (*nextp == separator)
    3787             :         {
    3788       66182 :             nextp++;
    3789      104482 :             while (scanner_isspace(*nextp))
    3790       38300 :                 nextp++;        /* skip leading whitespace for next */
    3791             :             /* we expect another name, so done remains false */
    3792             :         }
    3793      107214 :         else if (*nextp == '\0')
    3794      107212 :             done = true;
    3795             :         else
    3796           2 :             return false;       /* invalid syntax */
    3797             : 
    3798             :         /* Now safe to overwrite separator with a null */
    3799      173394 :         *endp = '\0';
    3800             : 
    3801             :         /* Truncate name if it's overlength */
    3802      173394 :         truncate_identifier(curname, strlen(curname), false);
    3803             : 
    3804             :         /*
    3805             :          * Finished isolating current name --- add it to list
    3806             :          */
    3807      173394 :         *namelist = lappend(*namelist, curname);
    3808             : 
    3809             :         /* Loop back if we didn't reach end of string */
    3810      173394 :     } while (!done);
    3811             : 
    3812      107212 :     return true;
    3813             : }
    3814             : 
    3815             : 
    3816             : /*
    3817             :  * SplitDirectoriesString --- parse a string containing file/directory names
    3818             :  *
    3819             :  * This works fine on file names too; the function name is historical.
    3820             :  *
    3821             :  * This is similar to SplitIdentifierString, except that the parsing
    3822             :  * rules are meant to handle pathnames instead of identifiers: there is
    3823             :  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
    3824             :  * and we apply canonicalize_path() to each extracted string.  Because of the
    3825             :  * last, the returned strings are separately palloc'd rather than being
    3826             :  * pointers into rawstring --- but we still scribble on rawstring.
    3827             :  *
    3828             :  * Inputs:
    3829             :  *  rawstring: the input string; must be modifiable!
    3830             :  *  separator: the separator punctuation expected between directories
    3831             :  *             (typically ',' or ';').  Whitespace may also appear around
    3832             :  *             directories.
    3833             :  * Outputs:
    3834             :  *  namelist: filled with a palloc'd list of directory names.
    3835             :  *            Caller should list_free_deep() this even on error return.
    3836             :  *
    3837             :  * Returns true if okay, false if there is a syntax error in the string.
    3838             :  *
    3839             :  * Note that an empty string is considered okay here.
    3840             :  */
    3841             : bool
    3842        1058 : SplitDirectoriesString(char *rawstring, char separator,
    3843             :                        List **namelist)
    3844             : {
    3845        1058 :     char       *nextp = rawstring;
    3846        1058 :     bool        done = false;
    3847             : 
    3848        1058 :     *namelist = NIL;
    3849             : 
    3850        1058 :     while (scanner_isspace(*nextp))
    3851           0 :         nextp++;                /* skip leading whitespace */
    3852             : 
    3853        1058 :     if (*nextp == '\0')
    3854           2 :         return true;            /* allow empty string */
    3855             : 
    3856             :     /* At the top of the loop, we are at start of a new directory. */
    3857             :     do
    3858             :     {
    3859             :         char       *curname;
    3860             :         char       *endp;
    3861             : 
    3862        1056 :         if (*nextp == '"')
    3863             :         {
    3864             :             /* Quoted name --- collapse quote-quote pairs */
    3865           0 :             curname = nextp + 1;
    3866             :             for (;;)
    3867             :             {
    3868           0 :                 endp = strchr(nextp + 1, '"');
    3869           0 :                 if (endp == NULL)
    3870           0 :                     return false;   /* mismatched quotes */
    3871           0 :                 if (endp[1] != '"')
    3872           0 :                     break;      /* found end of quoted name */
    3873             :                 /* Collapse adjacent quotes into one quote, and look again */
    3874           0 :                 memmove(endp, endp + 1, strlen(endp));
    3875           0 :                 nextp = endp;
    3876             :             }
    3877             :             /* endp now points at the terminating quote */
    3878           0 :             nextp = endp + 1;
    3879             :         }
    3880             :         else
    3881             :         {
    3882             :             /* Unquoted name --- extends to separator or end of string */
    3883        1056 :             curname = endp = nextp;
    3884       18194 :             while (*nextp && *nextp != separator)
    3885             :             {
    3886             :                 /* trailing whitespace should not be included in name */
    3887       17138 :                 if (!scanner_isspace(*nextp))
    3888       17138 :                     endp = nextp + 1;
    3889       17138 :                 nextp++;
    3890             :             }
    3891        1056 :             if (curname == endp)
    3892           0 :                 return false;   /* empty unquoted name not allowed */
    3893             :         }
    3894             : 
    3895        1056 :         while (scanner_isspace(*nextp))
    3896           0 :             nextp++;            /* skip trailing whitespace */
    3897             : 
    3898        1056 :         if (*nextp == separator)
    3899             :         {
    3900           0 :             nextp++;
    3901           0 :             while (scanner_isspace(*nextp))
    3902           0 :                 nextp++;        /* skip leading whitespace for next */
    3903             :             /* we expect another name, so done remains false */
    3904             :         }
    3905        1056 :         else if (*nextp == '\0')
    3906        1056 :             done = true;
    3907             :         else
    3908           0 :             return false;       /* invalid syntax */
    3909             : 
    3910             :         /* Now safe to overwrite separator with a null */
    3911        1056 :         *endp = '\0';
    3912             : 
    3913             :         /* Truncate path if it's overlength */
    3914        1056 :         if (strlen(curname) >= MAXPGPATH)
    3915           0 :             curname[MAXPGPATH - 1] = '\0';
    3916             : 
    3917             :         /*
    3918             :          * Finished isolating current name --- add it to list
    3919             :          */
    3920        1056 :         curname = pstrdup(curname);
    3921        1056 :         canonicalize_path(curname);
    3922        1056 :         *namelist = lappend(*namelist, curname);
    3923             : 
    3924             :         /* Loop back if we didn't reach end of string */
    3925        1056 :     } while (!done);
    3926             : 
    3927        1056 :     return true;
    3928             : }
    3929             : 
    3930             : 
    3931             : /*
    3932             :  * SplitGUCList --- parse a string containing identifiers or file names
    3933             :  *
    3934             :  * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
    3935             :  * presuming whether the elements will be taken as identifiers or file names.
    3936             :  * We assume the input has already been through flatten_set_variable_args(),
    3937             :  * so that we need never downcase (if appropriate, that was done already).
    3938             :  * Nor do we ever truncate, since we don't know the correct max length.
    3939             :  * We disallow embedded whitespace for simplicity (it shouldn't matter,
    3940             :  * because any embedded whitespace should have led to double-quoting).
    3941             :  * Otherwise the API is identical to SplitIdentifierString.
    3942             :  *
    3943             :  * XXX it's annoying to have so many copies of this string-splitting logic.
    3944             :  * However, it's not clear that having one function with a bunch of option
    3945             :  * flags would be much better.
    3946             :  *
    3947             :  * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
    3948             :  * Be sure to update that if you have to change this.
    3949             :  *
    3950             :  * Inputs:
    3951             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3952             :  *             been modified to contain the separated identifiers.
    3953             :  *  separator: the separator punctuation expected between identifiers
    3954             :  *             (typically '.' or ',').  Whitespace may also appear around
    3955             :  *             identifiers.
    3956             :  * Outputs:
    3957             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3958             :  *            rawstring.  Caller should list_free() this even on error return.
    3959             :  *
    3960             :  * Returns true if okay, false if there is a syntax error in the string.
    3961             :  */
    3962             : bool
    3963        1056 : SplitGUCList(char *rawstring, char separator,
    3964             :              List **namelist)
    3965             : {
    3966        1056 :     char       *nextp = rawstring;
    3967        1056 :     bool        done = false;
    3968             : 
    3969        1056 :     *namelist = NIL;
    3970             : 
    3971        1056 :     while (scanner_isspace(*nextp))
    3972           0 :         nextp++;                /* skip leading whitespace */
    3973             : 
    3974        1056 :     if (*nextp == '\0')
    3975        1000 :         return true;            /* allow empty string */
    3976             : 
    3977             :     /* At the top of the loop, we are at start of a new identifier. */
    3978             :     do
    3979             :     {
    3980             :         char       *curname;
    3981             :         char       *endp;
    3982             : 
    3983          74 :         if (*nextp == '"')
    3984             :         {
    3985             :             /* Quoted name --- collapse quote-quote pairs */
    3986          24 :             curname = nextp + 1;
    3987             :             for (;;)
    3988             :             {
    3989          36 :                 endp = strchr(nextp + 1, '"');
    3990          30 :                 if (endp == NULL)
    3991           0 :                     return false;   /* mismatched quotes */
    3992          30 :                 if (endp[1] != '"')
    3993          24 :                     break;      /* found end of quoted name */
    3994             :                 /* Collapse adjacent quotes into one quote, and look again */
    3995           6 :                 memmove(endp, endp + 1, strlen(endp));
    3996           6 :                 nextp = endp;
    3997             :             }
    3998             :             /* endp now points at the terminating quote */
    3999          24 :             nextp = endp + 1;
    4000             :         }
    4001             :         else
    4002             :         {
    4003             :             /* Unquoted name --- extends to separator or whitespace */
    4004          50 :             curname = nextp;
    4005         506 :             while (*nextp && *nextp != separator &&
    4006         456 :                    !scanner_isspace(*nextp))
    4007         456 :                 nextp++;
    4008          50 :             endp = nextp;
    4009          50 :             if (curname == nextp)
    4010           0 :                 return false;   /* empty unquoted name not allowed */
    4011             :         }
    4012             : 
    4013          74 :         while (scanner_isspace(*nextp))
    4014           0 :             nextp++;            /* skip trailing whitespace */
    4015             : 
    4016          74 :         if (*nextp == separator)
    4017             :         {
    4018          18 :             nextp++;
    4019          36 :             while (scanner_isspace(*nextp))
    4020          18 :                 nextp++;        /* skip leading whitespace for next */
    4021             :             /* we expect another name, so done remains false */
    4022             :         }
    4023          56 :         else if (*nextp == '\0')
    4024          56 :             done = true;
    4025             :         else
    4026           0 :             return false;       /* invalid syntax */
    4027             : 
    4028             :         /* Now safe to overwrite separator with a null */
    4029          74 :         *endp = '\0';
    4030             : 
    4031             :         /*
    4032             :          * Finished isolating current name --- add it to list
    4033             :          */
    4034          74 :         *namelist = lappend(*namelist, curname);
    4035             : 
    4036             :         /* Loop back if we didn't reach end of string */
    4037          74 :     } while (!done);
    4038             : 
    4039          56 :     return true;
    4040             : }
    4041             : 
    4042             : 
    4043             : /*****************************************************************************
    4044             :  *  Comparison Functions used for bytea
    4045             :  *
    4046             :  * Note: btree indexes need these routines not to leak memory; therefore,
    4047             :  * be careful to free working copies of toasted datums.  Most places don't
    4048             :  * need to be so careful.
    4049             :  *****************************************************************************/
    4050             : 
    4051             : Datum
    4052       10370 : byteaeq(PG_FUNCTION_ARGS)
    4053             : {
    4054       10370 :     Datum       arg1 = PG_GETARG_DATUM(0);
    4055       10370 :     Datum       arg2 = PG_GETARG_DATUM(1);
    4056             :     bool        result;
    4057             :     Size        len1,
    4058             :                 len2;
    4059             : 
    4060             :     /*
    4061             :      * We can use a fast path for unequal lengths, which might save us from
    4062             :      * having to detoast one or both values.
    4063             :      */
    4064       10370 :     len1 = toast_raw_datum_size(arg1);
    4065       10370 :     len2 = toast_raw_datum_size(arg2);
    4066       10370 :     if (len1 != len2)
    4067        4304 :         result = false;
    4068             :     else
    4069             :     {
    4070        6066 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    4071        6066 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    4072             : 
    4073        6066 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    4074             :                          len1 - VARHDRSZ) == 0);
    4075             : 
    4076        6066 :         PG_FREE_IF_COPY(barg1, 0);
    4077        6066 :         PG_FREE_IF_COPY(barg2, 1);
    4078             :     }
    4079             : 
    4080       10370 :     PG_RETURN_BOOL(result);
    4081             : }
    4082             : 
    4083             : Datum
    4084         768 : byteane(PG_FUNCTION_ARGS)
    4085             : {
    4086         768 :     Datum       arg1 = PG_GETARG_DATUM(0);
    4087         768 :     Datum       arg2 = PG_GETARG_DATUM(1);
    4088             :     bool        result;
    4089             :     Size        len1,
    4090             :                 len2;
    4091             : 
    4092             :     /*
    4093             :      * We can use a fast path for unequal lengths, which might save us from
    4094             :      * having to detoast one or both values.
    4095             :      */
    4096         768 :     len1 = toast_raw_datum_size(arg1);
    4097         768 :     len2 = toast_raw_datum_size(arg2);
    4098         768 :     if (len1 != len2)
    4099           0 :         result = true;
    4100             :     else
    4101             :     {
    4102         768 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    4103         768 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    4104             : 
    4105         768 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    4106             :                          len1 - VARHDRSZ) != 0);
    4107             : 
    4108         768 :         PG_FREE_IF_COPY(barg1, 0);
    4109         768 :         PG_FREE_IF_COPY(barg2, 1);
    4110             :     }
    4111             : 
    4112         768 :     PG_RETURN_BOOL(result);
    4113             : }
    4114             : 
    4115             : Datum
    4116        7052 : bytealt(PG_FUNCTION_ARGS)
    4117             : {
    4118        7052 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4119        7052 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4120             :     int         len1,
    4121             :                 len2;
    4122             :     int         cmp;
    4123             : 
    4124        7052 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4125        7052 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4126             : 
    4127        7052 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4128             : 
    4129        7052 :     PG_FREE_IF_COPY(arg1, 0);
    4130        7052 :     PG_FREE_IF_COPY(arg2, 1);
    4131             : 
    4132        7052 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
    4133             : }
    4134             : 
    4135             : Datum
    4136        6356 : byteale(PG_FUNCTION_ARGS)
    4137             : {
    4138        6356 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4139        6356 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4140             :     int         len1,
    4141             :                 len2;
    4142             :     int         cmp;
    4143             : 
    4144        6356 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4145        6356 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4146             : 
    4147        6356 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4148             : 
    4149        6356 :     PG_FREE_IF_COPY(arg1, 0);
    4150        6356 :     PG_FREE_IF_COPY(arg2, 1);
    4151             : 
    4152        6356 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
    4153             : }
    4154             : 
    4155             : Datum
    4156        4966 : byteagt(PG_FUNCTION_ARGS)
    4157             : {
    4158        4966 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4159        4966 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4160             :     int         len1,
    4161             :                 len2;
    4162             :     int         cmp;
    4163             : 
    4164        4966 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4165        4966 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4166             : 
    4167        4966 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4168             : 
    4169        4966 :     PG_FREE_IF_COPY(arg1, 0);
    4170        4966 :     PG_FREE_IF_COPY(arg2, 1);
    4171             : 
    4172        4966 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
    4173             : }
    4174             : 
    4175             : Datum
    4176        5008 : byteage(PG_FUNCTION_ARGS)
    4177             : {
    4178        5008 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4179        5008 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4180             :     int         len1,
    4181             :                 len2;
    4182             :     int         cmp;
    4183             : 
    4184        5008 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4185        5008 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4186             : 
    4187        5008 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4188             : 
    4189        5008 :     PG_FREE_IF_COPY(arg1, 0);
    4190        5008 :     PG_FREE_IF_COPY(arg2, 1);
    4191             : 
    4192        5008 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
    4193             : }
    4194             : 
    4195             : Datum
    4196       87498 : byteacmp(PG_FUNCTION_ARGS)
    4197             : {
    4198       87498 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    4199       87498 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    4200             :     int         len1,
    4201             :                 len2;
    4202             :     int         cmp;
    4203             : 
    4204       87498 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    4205       87498 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    4206             : 
    4207       87498 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    4208       87498 :     if ((cmp == 0) && (len1 != len2))
    4209       14604 :         cmp = (len1 < len2) ? -1 : 1;
    4210             : 
    4211       87498 :     PG_FREE_IF_COPY(arg1, 0);
    4212       87498 :     PG_FREE_IF_COPY(arg2, 1);
    4213             : 
    4214       87498 :     PG_RETURN_INT32(cmp);
    4215             : }
    4216             : 
    4217             : Datum
    4218          30 : bytea_sortsupport(PG_FUNCTION_ARGS)
    4219             : {
    4220          30 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    4221             :     MemoryContext oldcontext;
    4222             : 
    4223          30 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    4224             : 
    4225             :     /* Use generic string SortSupport, forcing "C" collation */
    4226          30 :     varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
    4227             : 
    4228          30 :     MemoryContextSwitchTo(oldcontext);
    4229             : 
    4230          30 :     PG_RETURN_VOID();
    4231             : }
    4232             : 
    4233             : /*
    4234             :  * appendStringInfoText
    4235             :  *
    4236             :  * Append a text to str.
    4237             :  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
    4238             :  */
    4239             : static void
    4240     1570906 : appendStringInfoText(StringInfo str, const text *t)
    4241             : {
    4242     1570906 :     appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
    4243     1570906 : }
    4244             : 
    4245             : /*
    4246             :  * replace_text
    4247             :  * replace all occurrences of 'old_sub_str' in 'orig_str'
    4248             :  * with 'new_sub_str' to form 'new_str'
    4249             :  *
    4250             :  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
    4251             :  * otherwise returns 'new_str'
    4252             :  */
    4253             : Datum
    4254        2182 : replace_text(PG_FUNCTION_ARGS)
    4255             : {
    4256        2182 :     text       *src_text = PG_GETARG_TEXT_PP(0);
    4257        2182 :     text       *from_sub_text = PG_GETARG_TEXT_PP(1);
    4258        2182 :     text       *to_sub_text = PG_GETARG_TEXT_PP(2);
    4259             :     int         src_text_len;
    4260             :     int         from_sub_text_len;
    4261             :     TextPositionState state;
    4262             :     text       *ret_text;
    4263             :     int         chunk_len;
    4264             :     char       *curr_ptr;
    4265             :     char       *start_ptr;
    4266             :     StringInfoData str;
    4267             :     bool        found;
    4268             : 
    4269        2182 :     src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4270        2182 :     from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
    4271             : 
    4272             :     /* Return unmodified source string if empty source or pattern */
    4273        2182 :     if (src_text_len < 1 || from_sub_text_len < 1)
    4274             :     {
    4275           0 :         PG_RETURN_TEXT_P(src_text);
    4276             :     }
    4277             : 
    4278        2182 :     text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
    4279             : 
    4280        2182 :     found = text_position_next(&state);
    4281             : 
    4282             :     /* When the from_sub_text is not found, there is nothing to do. */
    4283        2182 :     if (!found)
    4284             :     {
    4285         644 :         text_position_cleanup(&state);
    4286         644 :         PG_RETURN_TEXT_P(src_text);
    4287             :     }
    4288        1538 :     curr_ptr = text_position_get_match_ptr(&state);
    4289        1538 :     start_ptr = VARDATA_ANY(src_text);
    4290             : 
    4291        1538 :     initStringInfo(&str);
    4292             : 
    4293             :     do
    4294             :     {
    4295        5694 :         CHECK_FOR_INTERRUPTS();
    4296             : 
    4297             :         /* copy the data skipped over by last text_position_next() */
    4298        5694 :         chunk_len = curr_ptr - start_ptr;
    4299        5694 :         appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4300             : 
    4301        5694 :         appendStringInfoText(&str, to_sub_text);
    4302             : 
    4303        5694 :         start_ptr = curr_ptr + from_sub_text_len;
    4304             : 
    4305        5694 :         found = text_position_next(&state);
    4306        5694 :         if (found)
    4307        4156 :             curr_ptr = text_position_get_match_ptr(&state);
    4308             :     }
    4309        5694 :     while (found);
    4310             : 
    4311             :     /* copy trailing data */
    4312        1538 :     chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4313        1538 :     appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4314             : 
    4315        1538 :     text_position_cleanup(&state);
    4316             : 
    4317        1538 :     ret_text = cstring_to_text_with_len(str.data, str.len);
    4318        1538 :     pfree(str.data);
    4319             : 
    4320        1538 :     PG_RETURN_TEXT_P(ret_text);
    4321             : }
    4322             : 
    4323             : /*
    4324             :  * check_replace_text_has_escape
    4325             :  *
    4326             :  * Returns 0 if text contains no backslashes that need processing.
    4327             :  * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
    4328             :  * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
    4329             :  */
    4330             : static int
    4331        9118 : check_replace_text_has_escape(const text *replace_text)
    4332             : {
    4333        9118 :     int         result = 0;
    4334        9118 :     const char *p = VARDATA_ANY(replace_text);
    4335        9118 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4336             : 
    4337        9162 :     while (p < p_end)
    4338             :     {
    4339             :         /* Find next escape char, if any. */
    4340        8100 :         p = memchr(p, '\\', p_end - p);
    4341        8100 :         if (p == NULL)
    4342        7884 :             break;
    4343         216 :         p++;
    4344             :         /* Note: a backslash at the end doesn't require extra processing. */
    4345         216 :         if (p < p_end)
    4346             :         {
    4347         216 :             if (*p >= '1' && *p <= '9')
    4348         172 :                 return 2;       /* Found a submatch specifier, so done */
    4349          44 :             result = 1;         /* Found some other sequence, keep looking */
    4350          44 :             p++;
    4351             :         }
    4352             :     }
    4353        8946 :     return result;
    4354             : }
    4355             : 
    4356             : /*
    4357             :  * appendStringInfoRegexpSubstr
    4358             :  *
    4359             :  * Append replace_text to str, substituting regexp back references for
    4360             :  * \n escapes.  start_ptr is the start of the match in the source string,
    4361             :  * at logical character position data_pos.
    4362             :  */
    4363             : static void
    4364         128 : appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
    4365             :                              regmatch_t *pmatch,
    4366             :                              char *start_ptr, int data_pos)
    4367             : {
    4368         128 :     const char *p = VARDATA_ANY(replace_text);
    4369         128 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4370             : 
    4371         340 :     while (p < p_end)
    4372             :     {
    4373         296 :         const char *chunk_start = p;
    4374             :         int         so;
    4375             :         int         eo;
    4376             : 
    4377             :         /* Find next escape char, if any. */
    4378         296 :         p = memchr(p, '\\', p_end - p);
    4379         296 :         if (p == NULL)
    4380          78 :             p = p_end;
    4381             : 
    4382             :         /* Copy the text we just scanned over, if any. */
    4383         296 :         if (p > chunk_start)
    4384         204 :             appendBinaryStringInfo(str, chunk_start, p - chunk_start);
    4385             : 
    4386             :         /* Done if at end of string, else advance over escape char. */
    4387         296 :         if (p >= p_end)
    4388          78 :             break;
    4389         218 :         p++;
    4390             : 
    4391         218 :         if (p >= p_end)
    4392             :         {
    4393             :             /* Escape at very end of input.  Treat same as unexpected char */
    4394           6 :             appendStringInfoChar(str, '\\');
    4395           6 :             break;
    4396             :         }
    4397             : 
    4398         212 :         if (*p >= '1' && *p <= '9')
    4399         152 :         {
    4400             :             /* Use the back reference of regexp. */
    4401         152 :             int         idx = *p - '0';
    4402             : 
    4403         152 :             so = pmatch[idx].rm_so;
    4404         152 :             eo = pmatch[idx].rm_eo;
    4405         152 :             p++;
    4406             :         }
    4407          60 :         else if (*p == '&')
    4408             :         {
    4409             :             /* Use the entire matched string. */
    4410          18 :             so = pmatch[0].rm_so;
    4411          18 :             eo = pmatch[0].rm_eo;
    4412          18 :             p++;
    4413             :         }
    4414          42 :         else if (*p == '\\')
    4415             :         {
    4416             :             /* \\ means transfer one \ to output. */
    4417          36 :             appendStringInfoChar(str, '\\');
    4418          36 :             p++;
    4419          36 :             continue;
    4420             :         }
    4421             :         else
    4422             :         {
    4423             :             /*
    4424             :              * If escape char is not followed by any expected char, just treat
    4425             :              * it as ordinary data to copy.  (XXX would it be better to throw
    4426             :              * an error?)
    4427             :              */
    4428           6 :             appendStringInfoChar(str, '\\');
    4429           6 :             continue;
    4430             :         }
    4431             : 
    4432         170 :         if (so >= 0 && eo >= 0)
    4433             :         {
    4434             :             /*
    4435             :              * Copy the text that is back reference of regexp.  Note so and eo
    4436             :              * are counted in characters not bytes.
    4437             :              */
    4438             :             char       *chunk_start;
    4439             :             int         chunk_len;
    4440             : 
    4441             :             Assert(so >= data_pos);
    4442         170 :             chunk_start = start_ptr;
    4443         170 :             chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
    4444         170 :             chunk_len = charlen_to_bytelen(chunk_start, eo - so);
    4445         170 :             appendBinaryStringInfo(str, chunk_start, chunk_len);
    4446             :         }
    4447             :     }
    4448         128 : }
    4449             : 
    4450             : /*
    4451             :  * replace_text_regexp
    4452             :  *
    4453             :  * replace substring(s) in src_text that match pattern with replace_text.
    4454             :  * The replace_text can contain backslash markers to substitute
    4455             :  * (parts of) the matched text.
    4456             :  *
    4457             :  * cflags: regexp compile flags.
    4458             :  * collation: collation to use.
    4459             :  * search_start: the character (not byte) offset in src_text at which to
    4460             :  * begin searching.
    4461             :  * n: if 0, replace all matches; if > 0, replace only the N'th match.
    4462             :  */
    4463             : text *
    4464        9118 : replace_text_regexp(text *src_text, text *pattern_text,
    4465             :                     text *replace_text,
    4466             :                     int cflags, Oid collation,
    4467             :                     int search_start, int n)
    4468             : {
    4469             :     text       *ret_text;
    4470             :     regex_t    *re;
    4471        9118 :     int         src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4472        9118 :     int         nmatches = 0;
    4473             :     StringInfoData buf;
    4474             :     regmatch_t  pmatch[10];     /* main match, plus \1 to \9 */
    4475        9118 :     int         nmatch = lengthof(pmatch);
    4476             :     pg_wchar   *data;
    4477             :     size_t      data_len;
    4478             :     int         data_pos;
    4479             :     char       *start_ptr;
    4480             :     int         escape_status;
    4481             : 
    4482        9118 :     initStringInfo(&buf);
    4483             : 
    4484             :     /* Convert data string to wide characters. */
    4485        9118 :     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
    4486        9118 :     data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
    4487             : 
    4488             :     /* Check whether replace_text has escapes, especially regexp submatches. */
    4489        9118 :     escape_status = check_replace_text_has_escape(replace_text);
    4490             : 
    4491             :     /* If no regexp submatches, we can use REG_NOSUB. */
    4492        9118 :     if (escape_status < 2)
    4493             :     {
    4494        8946 :         cflags |= REG_NOSUB;
    4495             :         /* Also tell pg_regexec we only want the whole-match location. */
    4496        8946 :         nmatch = 1;
    4497             :     }
    4498             : 
    4499             :     /* Prepare the regexp. */
    4500        9118 :     re = RE_compile_and_cache(pattern_text, cflags, collation);
    4501             : 
    4502             :     /* start_ptr points to the data_pos'th character of src_text */
    4503        9118 :     start_ptr = (char *) VARDATA_ANY(src_text);
    4504        9118 :     data_pos = 0;
    4505             : 
    4506       12684 :     while (search_start <= data_len)
    4507             :     {
    4508             :         int         regexec_result;
    4509             : 
    4510       12678 :         CHECK_FOR_INTERRUPTS();
    4511             : 
    4512       12678 :         regexec_result = pg_regexec(re,
    4513             :                                     data,
    4514             :                                     data_len,
    4515             :                                     search_start,
    4516             :                                     NULL,   /* no details */
    4517             :                                     nmatch,
    4518             :                                     pmatch,
    4519             :                                     0);
    4520             : 
    4521       12678 :         if (regexec_result == REG_NOMATCH)
    4522        7920 :             break;
    4523             : 
    4524        4758 :         if (regexec_result != REG_OKAY)
    4525             :         {
    4526             :             char        errMsg[100];
    4527             : 
    4528           0 :             CHECK_FOR_INTERRUPTS();
    4529           0 :             pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
    4530           0 :             ereport(ERROR,
    4531             :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
    4532             :                      errmsg("regular expression failed: %s", errMsg)));
    4533             :         }
    4534             : 
    4535             :         /*
    4536             :          * Count matches, and decide whether to replace this match.
    4537             :          */
    4538        4758 :         nmatches++;
    4539        4758 :         if (n > 0 && nmatches != n)
    4540             :         {
    4541             :             /*
    4542             :              * No, so advance search_start, but not start_ptr/data_pos. (Thus,
    4543             :              * we treat the matched text as if it weren't matched, and copy it
    4544             :              * to the output later.)
    4545             :              */
    4546          60 :             search_start = pmatch[0].rm_eo;
    4547          60 :             if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4548           0 :                 search_start++;
    4549          60 :             continue;
    4550             :         }
    4551             : 
    4552             :         /*
    4553             :          * Copy the text to the left of the match position.  Note we are given
    4554             :          * character not byte indexes.
    4555             :          */
    4556        4698 :         if (pmatch[0].rm_so - data_pos > 0)
    4557             :         {
    4558             :             int         chunk_len;
    4559             : 
    4560        4582 :             chunk_len = charlen_to_bytelen(start_ptr,
    4561        4582 :                                            pmatch[0].rm_so - data_pos);
    4562        4582 :             appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4563             : 
    4564             :             /*
    4565             :              * Advance start_ptr over that text, to avoid multiple rescans of
    4566             :              * it if the replace_text contains multiple back-references.
    4567             :              */
    4568        4582 :             start_ptr += chunk_len;
    4569        4582 :             data_pos = pmatch[0].rm_so;
    4570             :         }
    4571             : 
    4572             :         /*
    4573             :          * Copy the replace_text, processing escapes if any are present.
    4574             :          */
    4575        4698 :         if (escape_status > 0)
    4576         128 :             appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
    4577             :                                          start_ptr, data_pos);
    4578             :         else
    4579        4570 :             appendStringInfoText(&buf, replace_text);
    4580             : 
    4581             :         /* Advance start_ptr and data_pos over the matched text. */
    4582        9396 :         start_ptr += charlen_to_bytelen(start_ptr,
    4583        4698 :                                         pmatch[0].rm_eo - data_pos);
    4584        4698 :         data_pos = pmatch[0].rm_eo;
    4585             : 
    4586             :         /*
    4587             :          * If we only want to replace one occurrence, we're done.
    4588             :          */
    4589        4698 :         if (n > 0)
    4590        1192 :             break;
    4591             : 
    4592             :         /*
    4593             :          * Advance search position.  Normally we start the next search at the
    4594             :          * end of the previous match; but if the match was of zero length, we
    4595             :          * have to advance by one character, or we'd just find the same match
    4596             :          * again.
    4597             :          */
    4598        3506 :         search_start = data_pos;
    4599        3506 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4600          12 :             search_start++;
    4601             :     }
    4602             : 
    4603             :     /*
    4604             :      * Copy the text to the right of the last match.
    4605             :      */
    4606        9118 :     if (data_pos < data_len)
    4607             :     {
    4608             :         int         chunk_len;
    4609             : 
    4610        8722 :         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4611        8722 :         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4612             :     }
    4613             : 
    4614        9118 :     ret_text = cstring_to_text_with_len(buf.data, buf.len);
    4615        9118 :     pfree(buf.data);
    4616        9118 :     pfree(data);
    4617             : 
    4618        9118 :     return ret_text;
    4619             : }
    4620             : 
    4621             : /*
    4622             :  * split_part
    4623             :  * parse input string based on provided field separator
    4624             :  * return N'th item (1 based, negative counts from end)
    4625             :  */
    4626             : Datum
    4627         102 : split_part(PG_FUNCTION_ARGS)
    4628             : {
    4629         102 :     text       *inputstring = PG_GETARG_TEXT_PP(0);
    4630         102 :     text       *fldsep = PG_GETARG_TEXT_PP(1);
    4631         102 :     int         fldnum = PG_GETARG_INT32(2);
    4632             :     int         inputstring_len;
    4633             :     int         fldsep_len;
    4634             :     TextPositionState state;
    4635             :     char       *start_ptr;
    4636             :     char       *end_ptr;
    4637             :     text       *result_text;
    4638             :     bool        found;
    4639             : 
    4640             :     /* field number is 1 based */
    4641         102 :     if (fldnum == 0)
    4642           6 :         ereport(ERROR,
    4643             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    4644             :                  errmsg("field position must not be zero")));
    4645             : 
    4646          96 :     inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4647          96 :     fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4648             : 
    4649             :     /* return empty string for empty input string */
    4650          96 :     if (inputstring_len < 1)
    4651          12 :         PG_RETURN_TEXT_P(cstring_to_text(""));
    4652             : 
    4653             :     /* handle empty field separator */
    4654          84 :     if (fldsep_len < 1)
    4655             :     {
    4656             :         /* if first or last field, return input string, else empty string */
    4657          24 :         if (fldnum == 1 || fldnum == -1)
    4658          12 :             PG_RETURN_TEXT_P(inputstring);
    4659             :         else
    4660          12 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4661             :     }
    4662             : 
    4663             :     /* find the first field separator */
    4664          60 :     text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
    4665             : 
    4666          60 :     found = text_position_next(&state);
    4667             : 
    4668             :     /* special case if fldsep not found at all */
    4669          60 :     if (!found)
    4670             :     {
    4671          12 :         text_position_cleanup(&state);
    4672             :         /* if first or last field, return input string, else empty string */
    4673          12 :         if (fldnum == 1 || fldnum == -1)
    4674           6 :             PG_RETURN_TEXT_P(inputstring);
    4675             :         else
    4676           6 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4677             :     }
    4678             : 
    4679             :     /*
    4680             :      * take care of a negative field number (i.e. count from the right) by
    4681             :      * converting to a positive field number; we need total number of fields
    4682             :      */
    4683          48 :     if (fldnum < 0)
    4684             :     {
    4685             :         /* we found a fldsep, so there are at least two fields */
    4686          24 :         int         numfields = 2;
    4687             : 
    4688          36 :         while (text_position_next(&state))
    4689          12 :             numfields++;
    4690             : 
    4691             :         /* special case of last field does not require an extra pass */
    4692          24 :         if (fldnum == -1)
    4693             :         {
    4694           6 :             start_ptr = text_position_get_match_ptr(&state) + fldsep_len;
    4695           6 :             end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
    4696           6 :             text_position_cleanup(&state);
    4697           6 :             PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
    4698             :                                                       end_ptr - start_ptr));
    4699             :         }
    4700             : 
    4701             :         /* else, convert fldnum to positive notation */
    4702          18 :         fldnum += numfields + 1;
    4703             : 
    4704             :         /* if nonexistent field, return empty string */
    4705          18 :         if (fldnum <= 0)
    4706             :         {
    4707           6 :             text_position_cleanup(&state);
    4708           6 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4709             :         }
    4710             : 
    4711             :         /* reset to pointing at first match, but now with positive fldnum */
    4712          12 :         text_position_reset(&state);
    4713          12 :         found = text_position_next(&state);
    4714             :         Assert(found);
    4715             :     }
    4716             : 
    4717             :     /* identify bounds of first field */
    4718          36 :     start_ptr = VARDATA_ANY(inputstring);
    4719          36 :     end_ptr = text_position_get_match_ptr(&state);
    4720             : 
    4721          66 :     while (found && --fldnum > 0)
    4722             :     {
    4723             :         /* identify bounds of next field */
    4724          30 :         start_ptr = end_ptr + fldsep_len;
    4725          30 :         found = text_position_next(&state);
    4726          30 :         if (found)
    4727          18 :             end_ptr = text_position_get_match_ptr(&state);
    4728             :     }
    4729             : 
    4730          36 :     text_position_cleanup(&state);
    4731             : 
    4732          36 :     if (fldnum > 0)
    4733             :     {
    4734             :         /* N'th field separator not found */
    4735             :         /* if last field requested, return it, else empty string */
    4736          12 :         if (fldnum == 1)
    4737             :         {
    4738           6 :             int         last_len = start_ptr - VARDATA_ANY(inputstring);
    4739             : 
    4740           6 :             result_text = cstring_to_text_with_len(start_ptr,
    4741             :                                                    inputstring_len - last_len);
    4742             :         }
    4743             :         else
    4744           6 :             result_text = cstring_to_text("");
    4745             :     }
    4746             :     else
    4747             :     {
    4748             :         /* non-last field requested */
    4749          24 :         result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
    4750             :     }
    4751             : 
    4752          36 :     PG_RETURN_TEXT_P(result_text);
    4753             : }
    4754             : 
    4755             : /*
    4756             :  * Convenience function to return true when two text params are equal.
    4757             :  */
    4758             : static bool
    4759         168 : text_isequal(text *txt1, text *txt2, Oid collid)
    4760             : {
    4761         168 :     return DatumGetBool(DirectFunctionCall2Coll(texteq,
    4762             :                                                 collid,
    4763             :                                                 PointerGetDatum(txt1),
    4764             :                                                 PointerGetDatum(txt2)));
    4765             : }
    4766             : 
    4767             : /*
    4768             :  * text_to_array
    4769             :  * parse input string and return text array of elements,
    4770             :  * based on provided field separator
    4771             :  */
    4772             : Datum
    4773         108 : text_to_array(PG_FUNCTION_ARGS)
    4774             : {
    4775             :     SplitTextOutputData tstate;
    4776             : 
    4777             :     /* For array output, tstate should start as all zeroes */
    4778         108 :     memset(&tstate, 0, sizeof(tstate));
    4779             : 
    4780         108 :     if (!split_text(fcinfo, &tstate))
    4781           6 :         PG_RETURN_NULL();
    4782             : 
    4783         102 :     if (tstate.astate == NULL)
    4784           6 :         PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
    4785             : 
    4786          96 :     PG_RETURN_ARRAYTYPE_P(makeArrayResult(tstate.astate,
    4787             :                                           CurrentMemoryContext));
    4788             : }
    4789             : 
    4790             : /*
    4791             :  * text_to_array_null
    4792             :  * parse input string and return text array of elements,
    4793             :  * based on provided field separator and null string
    4794             :  *
    4795             :  * This is a separate entry point only to prevent the regression tests from
    4796             :  * complaining about different argument sets for the same internal function.
    4797             :  */
    4798             : Datum
    4799          24 : text_to_array_null(PG_FUNCTION_ARGS)
    4800             : {
    4801          24 :     return text_to_array(fcinfo);
    4802             : }
    4803             : 
    4804             : /*
    4805             :  * text_to_table
    4806             :  * parse input string and return table of elements,
    4807             :  * based on provided field separator
    4808             :  */
    4809             : Datum
    4810          84 : text_to_table(PG_FUNCTION_ARGS)
    4811             : {
    4812          84 :     ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
    4813             :     SplitTextOutputData tstate;
    4814             : 
    4815          84 :     tstate.astate = NULL;
    4816          84 :     SetSingleFuncCall(fcinfo, SRF_SINGLE_USE_EXPECTED);
    4817          84 :     tstate.tupstore = rsi->setResult;
    4818          84 :     tstate.tupdesc = rsi->setDesc;
    4819             : 
    4820          84 :     (void) split_text(fcinfo, &tstate);
    4821             : 
    4822          84 :     return (Datum) 0;
    4823             : }
    4824             : 
    4825             : /*
    4826             :  * text_to_table_null
    4827             :  * parse input string and return table of elements,
    4828             :  * based on provided field separator and null string
    4829             :  *
    4830             :  * This is a separate entry point only to prevent the regression tests from
    4831             :  * complaining about different argument sets for the same internal function.
    4832             :  */
    4833             : Datum
    4834          24 : text_to_table_null(PG_FUNCTION_ARGS)
    4835             : {
    4836          24 :     return text_to_table(fcinfo);
    4837             : }
    4838             : 
    4839             : /*
    4840             :  * Common code for text_to_array, text_to_array_null, text_to_table
    4841             :  * and text_to_table_null functions.
    4842             :  *
    4843             :  * These are not strict so we have to test for null inputs explicitly.
    4844             :  * Returns false if result is to be null, else returns true.
    4845             :  *
    4846             :  * Note that if the result is valid but empty (zero elements), we return
    4847             :  * without changing *tstate --- caller must handle that case, too.
    4848             :  */
    4849             : static bool
    4850         192 : split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
    4851             : {
    4852             :     text       *inputstring;
    4853             :     text       *fldsep;
    4854             :     text       *null_string;
    4855         192 :     Oid         collation = PG_GET_COLLATION();
    4856             :     int         inputstring_len;
    4857             :     int         fldsep_len;
    4858             :     char       *start_ptr;
    4859             :     text       *result_text;
    4860             : 
    4861             :     /* when input string is NULL, then result is NULL too */
    4862         192 :     if (PG_ARGISNULL(0))
    4863          12 :         return false;
    4864             : 
    4865         180 :     inputstring = PG_GETARG_TEXT_PP(0);
    4866             : 
    4867             :     /* fldsep can be NULL */
    4868         180 :     if (!PG_ARGISNULL(1))
    4869         168 :         fldsep = PG_GETARG_TEXT_PP(1);
    4870             :     else
    4871          12 :         fldsep = NULL;
    4872             : 
    4873             :     /* null_string can be NULL or omitted */
    4874         180 :     if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
    4875          48 :         null_string = PG_GETARG_TEXT_PP(2);
    4876             :     else
    4877         132 :         null_string = NULL;
    4878             : 
    4879         180 :     if (fldsep != NULL)
    4880             :     {
    4881             :         /*
    4882             :          * Normal case with non-null fldsep.  Use the text_position machinery
    4883             :          * to search for occurrences of fldsep.
    4884             :          */
    4885             :         TextPositionState state;
    4886             : 
    4887         168 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4888         168 :         fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4889             : 
    4890             :         /* return empty set for empty input string */
    4891         168 :         if (inputstring_len < 1)
    4892          60 :             return true;
    4893             : 
    4894             :         /* empty field separator: return input string as a one-element set */
    4895         156 :         if (fldsep_len < 1)
    4896             :         {
    4897          48 :             split_text_accum_result(tstate, inputstring,
    4898             :                                     null_string, collation);
    4899          48 :             return true;
    4900             :         }
    4901             : 
    4902         108 :         text_position_setup(inputstring, fldsep, collation, &state);
    4903             : 
    4904         108 :         start_ptr = VARDATA_ANY(inputstring);
    4905             : 
    4906             :         for (;;)
    4907         444 :         {
    4908             :             bool        found;
    4909             :             char       *end_ptr;
    4910             :             int         chunk_len;
    4911             : 
    4912         552 :             CHECK_FOR_INTERRUPTS();
    4913             : 
    4914         552 :             found = text_position_next(&state);
    4915         552 :             if (!found)
    4916             :             {
    4917             :                 /* fetch last field */
    4918         108 :                 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
    4919         108 :                 end_ptr = NULL; /* not used, but some compilers complain */
    4920             :             }
    4921             :             else
    4922             :             {
    4923             :                 /* fetch non-last field */
    4924         444 :                 end_ptr = text_position_get_match_ptr(&state);
    4925         444 :                 chunk_len = end_ptr - start_ptr;
    4926             :             }
    4927             : 
    4928             :             /* build a temp text datum to pass to split_text_accum_result */
    4929         552 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4930             : 
    4931             :             /* stash away this field */
    4932         552 :             split_text_accum_result(tstate, result_text,
    4933             :                                     null_string, collation);
    4934             : 
    4935         552 :             pfree(result_text);
    4936             : 
    4937         552 :             if (!found)
    4938         108 :                 break;
    4939             : 
    4940         444 :             start_ptr = end_ptr + fldsep_len;
    4941             :         }
    4942             : 
    4943         108 :         text_position_cleanup(&state);
    4944             :     }
    4945             :     else
    4946             :     {
    4947             :         /*
    4948             :          * When fldsep is NULL, each character in the input string becomes a
    4949             :          * separate element in the result set.  The separator is effectively
    4950             :          * the space between characters.
    4951             :          */
    4952          12 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4953             : 
    4954          12 :         start_ptr = VARDATA_ANY(inputstring);
    4955             : 
    4956          72 :         while (inputstring_len > 0)
    4957             :         {
    4958          60 :             int         chunk_len = pg_mblen(start_ptr);
    4959             : 
    4960          60 :             CHECK_FOR_INTERRUPTS();
    4961             : 
    4962             :             /* build a temp text datum to pass to split_text_accum_result */
    4963          60 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4964             : 
    4965             :             /* stash away this field */
    4966          60 :             split_text_accum_result(tstate, result_text,
    4967             :                                     null_string, collation);
    4968             : 
    4969          60 :             pfree(result_text);
    4970             : 
    4971          60 :             start_ptr += chunk_len;
    4972          60 :             inputstring_len -= chunk_len;
    4973             :         }
    4974             :     }
    4975             : 
    4976         120 :     return true;
    4977             : }
    4978             : 
    4979             : /*
    4980             :  * Add text item to result set (table or array).
    4981             :  *
    4982             :  * This is also responsible for checking to see if the item matches
    4983             :  * the null_string, in which case we should emit NULL instead.
    4984             :  */
    4985             : static void
    4986         660 : split_text_accum_result(SplitTextOutputData *tstate,
    4987             :                         text *field_value,
    4988             :                         text *null_string,
    4989             :                         Oid collation)
    4990             : {
    4991         660 :     bool        is_null = false;
    4992             : 
    4993         660 :     if (null_string && text_isequal(field_value, null_string, collation))
    4994          48 :         is_null = true;
    4995             : 
    4996         660 :     if (tstate->tupstore)
    4997             :     {
    4998             :         Datum       values[1];
    4999             :         bool        nulls[1];
    5000             : 
    5001         228 :         values[0] = PointerGetDatum(field_value);
    5002         228 :         nulls[0] = is_null;
    5003             : 
    5004         228 :         tuplestore_putvalues(tstate->tupstore,
    5005             :                              tstate->tupdesc,
    5006             :                              values,
    5007             :                              nulls);
    5008             :     }
    5009             :     else
    5010             :     {
    5011         432 :         tstate->astate = accumArrayResult(tstate->astate,
    5012             :                                           PointerGetDatum(field_value),
    5013             :                                           is_null,
    5014             :                                           TEXTOID,
    5015             :                                           CurrentMemoryContext);
    5016             :     }
    5017         660 : }
    5018             : 
    5019             : /*
    5020             :  * array_to_text
    5021             :  * concatenate Cstring representation of input array elements
    5022             :  * using provided field separator
    5023             :  */
    5024             : Datum
    5025       58306 : array_to_text(PG_FUNCTION_ARGS)
    5026             : {
    5027       58306 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
    5028       58306 :     char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    5029             : 
    5030       58306 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
    5031             : }
    5032             : 
    5033             : /*
    5034             :  * array_to_text_null
    5035             :  * concatenate Cstring representation of input array elements
    5036             :  * using provided field separator and null string
    5037             :  *
    5038             :  * This version is not strict so we have to test for null inputs explicitly.
    5039             :  */
    5040             : Datum
    5041          12 : array_to_text_null(PG_FUNCTION_ARGS)
    5042             : {
    5043             :     ArrayType  *v;
    5044             :     char       *fldsep;
    5045             :     char       *null_string;
    5046             : 
    5047             :     /* returns NULL when first or second parameter is NULL */
    5048          12 :     if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
    5049           0 :         PG_RETURN_NULL();
    5050             : 
    5051          12 :     v = PG_GETARG_ARRAYTYPE_P(0);
    5052          12 :     fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    5053             : 
    5054             :     /* NULL null string is passed through as a null pointer */
    5055          12 :     if (!PG_ARGISNULL(2))
    5056           6 :         null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
    5057             :     else
    5058           6 :         null_string = NULL;
    5059             : 
    5060          12 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
    5061             : }
    5062             : 
    5063             : /*
    5064             :  * common code for array_to_text and array_to_text_null functions
    5065             :  */
    5066             : static text *
    5067       58336 : array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
    5068             :                        const char *fldsep, const char *null_string)
    5069             : {
    5070             :     text       *result;
    5071             :     int         nitems,
    5072             :                *dims,
    5073             :                 ndims;
    5074             :     Oid         element_type;
    5075             :     int         typlen;
    5076             :     bool        typbyval;
    5077             :     char        typalign;
    5078             :     StringInfoData buf;
    5079       58336 :     bool        printed = false;
    5080             :     char       *p;
    5081             :     bits8      *bitmap;
    5082             :     int         bitmask;
    5083             :     int         i;
    5084             :     ArrayMetaState *my_extra;
    5085             : 
    5086       58336 :     ndims = ARR_NDIM(v);
    5087       58336 :     dims = ARR_DIMS(v);
    5088       58336 :     nitems = ArrayGetNItems(ndims, dims);
    5089             : 
    5090             :     /* if there are no elements, return an empty string */
    5091       58336 :     if (nitems == 0)
    5092       34850 :         return cstring_to_text_with_len("", 0);
    5093             : 
    5094       23486 :     element_type = ARR_ELEMTYPE(v);
    5095       23486 :     initStringInfo(&buf);
    5096             : 
    5097             :     /*
    5098             :      * We arrange to look up info about element type, including its output
    5099             :      * conversion proc, only once per series of calls, assuming the element
    5100             :      * type doesn't change underneath us.
    5101             :      */
    5102       23486 :     my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    5103       23486 :     if (my_extra == NULL)
    5104             :     {
    5105        1314 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5106             :                                                       sizeof(ArrayMetaState));
    5107        1314 :         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    5108        1314 :         my_extra->element_type = ~element_type;
    5109             :     }
    5110             : 
    5111       23486 :     if (my_extra->element_type != element_type)
    5112             :     {
    5113             :         /*
    5114             :          * Get info about element type, including its output conversion proc
    5115             :          */
    5116        1314 :         get_type_io_data(element_type, IOFunc_output,
    5117             :                          &my_extra->typlen, &my_extra->typbyval,
    5118             :                          &my_extra->typalign, &my_extra->typdelim,
    5119             :                          &my_extra->typioparam, &my_extra->typiofunc);
    5120        1314 :         fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
    5121        1314 :                       fcinfo->flinfo->fn_mcxt);
    5122        1314 :         my_extra->element_type = element_type;
    5123             :     }
    5124       23486 :     typlen = my_extra->typlen;
    5125       23486 :     typbyval = my_extra->typbyval;
    5126       23486 :     typalign = my_extra->typalign;
    5127             : 
    5128       23486 :     p = ARR_DATA_PTR(v);
    5129       23486 :     bitmap = ARR_NULLBITMAP(v);
    5130       23486 :     bitmask = 1;
    5131             : 
    5132       80206 :     for (i = 0; i < nitems; i++)
    5133             :     {
    5134             :         Datum       itemvalue;
    5135             :         char       *value;
    5136             : 
    5137             :         /* Get source element, checking for NULL */
    5138       56720 :         if (bitmap && (*bitmap & bitmask) == 0)
    5139             :         {
    5140             :             /* if null_string is NULL, we just ignore null elements */
    5141          18 :             if (null_string != NULL)
    5142             :             {
    5143           6 :                 if (printed)
    5144           6 :                     appendStringInfo(&buf, "%s%s", fldsep, null_string);
    5145             :                 else
    5146           0 :                     appendStringInfoString(&buf, null_string);
    5147           6 :                 printed = true;
    5148             :             }
    5149             :         }
    5150             :         else
    5151             :         {
    5152       56702 :             itemvalue = fetch_att(p, typbyval, typlen);
    5153             : 
    5154       56702 :             value = OutputFunctionCall(&my_extra->proc, itemvalue);
    5155             : 
    5156       56702 :             if (printed)
    5157       33216 :                 appendStringInfo(&buf, "%s%s", fldsep, value);
    5158             :             else
    5159       23486 :                 appendStringInfoString(&buf, value);
    5160       56702 :             printed = true;
    5161             : 
    5162       56702 :             p = att_addlength_pointer(p, typlen, p);
    5163       56702 :             p = (char *) att_align_nominal(p, typalign);
    5164             :         }
    5165             : 
    5166             :         /* advance bitmap pointer if any */
    5167       56720 :         if (bitmap)
    5168             :         {
    5169         108 :             bitmask <<= 1;
    5170         108 :             if (bitmask == 0x100)
    5171             :             {
    5172           0 :                 bitmap++;
    5173           0 :                 bitmask = 1;
    5174             :             }
    5175             :         }
    5176             :     }
    5177             : 
    5178       23486 :     result = cstring_to_text_with_len(buf.data, buf.len);
    5179       23486 :     pfree(buf.data);
    5180             : 
    5181       23486 :     return result;
    5182             : }
    5183             : 
    5184             : #define HEXBASE 16
    5185             : /*
    5186             :  * Convert an int32 to a string containing a base 16 (hex) representation of
    5187             :  * the number.
    5188             :  */
    5189             : Datum
    5190       38708 : to_hex32(PG_FUNCTION_ARGS)
    5191             : {
    5192       38708 :     uint32      value = (uint32) PG_GETARG_INT32(0);
    5193             :     char       *ptr;
    5194       38708 :     const char *digits = "0123456789abcdef";
    5195             :     char        buf[32];        /* bigger than needed, but reasonable */
    5196             : 
    5197       38708 :     ptr = buf + sizeof(buf) - 1;
    5198       38708 :     *ptr = '\0';
    5199             : 
    5200             :     do
    5201             :     {
    5202       74726 :         *--ptr = digits[value % HEXBASE];
    5203       74726 :         value /= HEXBASE;
    5204       74726 :     } while (ptr > buf && value);
    5205             : 
    5206       38708 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
    5207             : }
    5208             : 
    5209             : /*
    5210             :  * Convert an int64 to a string containing a base 16 (hex) representation of
    5211             :  * the number.
    5212             :  */
    5213             : Datum
    5214           6 : to_hex64(PG_FUNCTION_ARGS)
    5215             : {
    5216           6 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    5217             :     char       *ptr;
    5218           6 :     const char *digits = "0123456789abcdef";
    5219             :     char        buf[32];        /* bigger than needed, but reasonable */
    5220             : 
    5221           6 :     ptr = buf + sizeof(buf) - 1;
    5222           6 :     *ptr = '\0';
    5223             : 
    5224             :     do
    5225             :     {
    5226          48 :         *--ptr = digits[value % HEXBASE];
    5227          48 :         value /= HEXBASE;
    5228          48 :     } while (ptr > buf && value);
    5229             : 
    5230           6 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
    5231             : }
    5232             : 
    5233             : /*
    5234             :  * Return the size of a datum, possibly compressed
    5235             :  *
    5236             :  * Works on any data type
    5237             :  */
    5238             : Datum
    5239         122 : pg_column_size(PG_FUNCTION_ARGS)
    5240             : {
    5241         122 :     Datum       value = PG_GETARG_DATUM(0);
    5242             :     int32       result;
    5243             :     int         typlen;
    5244             : 
    5245             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5246         122 :     if (fcinfo->flinfo->fn_extra == NULL)
    5247             :     {
    5248             :         /* Lookup the datatype of the supplied argument */
    5249         122 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5250             : 
    5251         122 :         typlen = get_typlen(argtypeid);
    5252         122 :         if (typlen == 0)        /* should not happen */
    5253           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5254             : 
    5255         122 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5256             :                                                       sizeof(int));
    5257         122 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5258             :     }
    5259             :     else
    5260           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5261             : 
    5262         122 :     if (typlen == -1)
    5263             :     {
    5264             :         /* varlena type, possibly toasted */
    5265         122 :         result = toast_datum_size(value);
    5266             :     }
    5267           0 :     else if (typlen == -2)
    5268             :     {
    5269             :         /* cstring */
    5270           0 :         result = strlen(DatumGetCString(value)) + 1;
    5271             :     }
    5272             :     else
    5273             :     {
    5274             :         /* ordinary fixed-width type */
    5275           0 :         result = typlen;
    5276             :     }
    5277             : 
    5278         122 :     PG_RETURN_INT32(result);
    5279             : }
    5280             : 
    5281             : /*
    5282             :  * Return the compression method stored in the compressed attribute.  Return
    5283             :  * NULL for non varlena type or uncompressed data.
    5284             :  */
    5285             : Datum
    5286         162 : pg_column_compression(PG_FUNCTION_ARGS)
    5287             : {
    5288             :     int         typlen;
    5289             :     char       *result;
    5290             :     ToastCompressionId cmid;
    5291             : 
    5292             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5293         162 :     if (fcinfo->flinfo->fn_extra == NULL)
    5294             :     {
    5295             :         /* Lookup the datatype of the supplied argument */
    5296         108 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5297             : 
    5298         108 :         typlen = get_typlen(argtypeid);
    5299         108 :         if (typlen == 0)        /* should not happen */
    5300           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5301             : 
    5302         108 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5303             :                                                       sizeof(int));
    5304         108 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5305             :     }
    5306             :     else
    5307          54 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5308             : 
    5309         162 :     if (typlen != -1)
    5310           0 :         PG_RETURN_NULL();
    5311             : 
    5312             :     /* get the compression method id stored in the compressed varlena */
    5313         162 :     cmid = toast_get_compression_id((struct varlena *)
    5314         162 :                                     DatumGetPointer(PG_GETARG_DATUM(0)));
    5315         162 :     if (cmid == TOAST_INVALID_COMPRESSION_ID)
    5316           6 :         PG_RETURN_NULL();
    5317             : 
    5318             :     /* convert compression method id to compression method name */
    5319         156 :     switch (cmid)
    5320             :     {
    5321          66 :         case TOAST_PGLZ_COMPRESSION_ID:
    5322          66 :             result = "pglz";
    5323          66 :             break;
    5324          90 :         case TOAST_LZ4_COMPRESSION_ID:
    5325          90 :             result = "lz4";
    5326          90 :             break;
    5327           0 :         default:
    5328           0 :             elog(ERROR, "invalid compression method id %d", cmid);
    5329             :     }
    5330             : 
    5331         156 :     PG_RETURN_TEXT_P(cstring_to_text(result));
    5332             : }
    5333             : 
    5334             : /*
    5335             :  * string_agg - Concatenates values and returns string.
    5336             :  *
    5337             :  * Syntax: string_agg(value text, delimiter text) RETURNS text
    5338             :  *
    5339             :  * Note: Any NULL values are ignored. The first-call delimiter isn't
    5340             :  * actually used at all, and on subsequent calls the delimiter precedes
    5341             :  * the associated value.
    5342             :  */
    5343             : 
    5344             : /* subroutine to initialize state */
    5345             : static StringInfo
    5346        1412 : makeStringAggState(FunctionCallInfo fcinfo)
    5347             : {
    5348             :     StringInfo  state;
    5349             :     MemoryContext aggcontext;
    5350             :     MemoryContext oldcontext;
    5351             : 
    5352        1412 :     if (!AggCheckCallContext(fcinfo, &aggcontext))
    5353             :     {
    5354             :         /* cannot be called directly because of internal-type argument */
    5355           0 :         elog(ERROR, "string_agg_transfn called in non-aggregate context");
    5356             :     }
    5357             : 
    5358             :     /*
    5359             :      * Create state in aggregate context.  It'll stay there across subsequent
    5360             :      * calls.
    5361             :      */
    5362        1412 :     oldcontext = MemoryContextSwitchTo(aggcontext);
    5363        1412 :     state = makeStringInfo();
    5364        1412 :     MemoryContextSwitchTo(oldcontext);
    5365             : 
    5366        1412 :     return state;
    5367             : }
    5368             : 
    5369             : Datum
    5370      781062 : string_agg_transfn(PG_FUNCTION_ARGS)
    5371             : {
    5372             :     StringInfo  state;
    5373             : 
    5374      781062 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5375             : 
    5376             :     /* Append the value unless null. */
    5377      781062 :     if (!PG_ARGISNULL(1))
    5378             :     {
    5379             :         /* On the first time through, we ignore the delimiter. */
    5380      781014 :         if (state == NULL)
    5381        1386 :             state = makeStringAggState(fcinfo);
    5382      779628 :         else if (!PG_ARGISNULL(2))
    5383      779628 :             appendStringInfoText(state, PG_GETARG_TEXT_PP(2));  /* delimiter */
    5384             : 
    5385      781014 :         appendStringInfoText(state, PG_GETARG_TEXT_PP(1));  /* value */
    5386             :     }
    5387             : 
    5388             :     /*
    5389             :      * The transition type for string_agg() is declared to be "internal",
    5390             :      * which is a pass-by-value type the same size as a pointer.
    5391             :      */
    5392      781062 :     PG_RETURN_POINTER(state);
    5393             : }
    5394             : 
    5395             : Datum
    5396        1458 : string_agg_finalfn(PG_FUNCTION_ARGS)
    5397             : {
    5398             :     StringInfo  state;
    5399             : 
    5400             :     /* cannot be called directly because of internal-type argument */
    5401             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5402             : 
    5403        1458 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5404             : 
    5405        1458 :     if (state != NULL)
    5406        1386 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
    5407             :     else
    5408          72 :         PG_RETURN_NULL();
    5409             : }
    5410             : 
    5411             : /*
    5412             :  * Prepare cache with fmgr info for the output functions of the datatypes of
    5413             :  * the arguments of a concat-like function, beginning with argument "argidx".
    5414             :  * (Arguments before that will have corresponding slots in the resulting
    5415             :  * FmgrInfo array, but we don't fill those slots.)
    5416             :  */
    5417             : static FmgrInfo *
    5418          40 : build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
    5419             : {
    5420             :     FmgrInfo   *foutcache;
    5421             :     int         i;
    5422             : 
    5423             :     /* We keep the info in fn_mcxt so it survives across calls */
    5424          40 :     foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5425          40 :                                                 PG_NARGS() * sizeof(FmgrInfo));
    5426             : 
    5427         196 :     for (i = argidx; i < PG_NARGS(); i++)
    5428             :     {
    5429             :         Oid         valtype;
    5430             :         Oid         typOutput;
    5431             :         bool        typIsVarlena;
    5432             : 
    5433         156 :         valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
    5434         156 :         if (!OidIsValid(valtype))
    5435           0 :             elog(ERROR, "could not determine data type of concat() input");
    5436             : 
    5437         156 :         getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
    5438         156 :         fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
    5439             :     }
    5440             : 
    5441          40 :     fcinfo->flinfo->fn_extra = foutcache;
    5442             : 
    5443          40 :     return foutcache;
    5444             : }
    5445             : 
    5446             : /*
    5447             :  * Implementation of both concat() and concat_ws().
    5448             :  *
    5449             :  * sepstr is the separator string to place between values.
    5450             :  * argidx identifies the first argument to concatenate (counting from zero);
    5451             :  * note that this must be constant across any one series of calls.
    5452             :  *
    5453             :  * Returns NULL if result should be NULL, else text value.
    5454             :  */
    5455             : static text *
    5456          72 : concat_internal(const char *sepstr, int argidx,
    5457             :                 FunctionCallInfo fcinfo)
    5458             : {
    5459             :     text       *result;
    5460             :     StringInfoData str;
    5461             :     FmgrInfo   *foutcache;
    5462          72 :     bool        first_arg = true;
    5463             :     int         i;
    5464             : 
    5465             :     /*
    5466             :      * concat(VARIADIC some-array) is essentially equivalent to
    5467             :      * array_to_text(), ie concat the array elements with the given separator.
    5468             :      * So we just pass the case off to that code.
    5469             :      */
    5470          72 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5471             :     {
    5472             :         ArrayType  *arr;
    5473             : 
    5474             :         /* Should have just the one argument */
    5475             :         Assert(argidx == PG_NARGS() - 1);
    5476             : 
    5477             :         /* concat(VARIADIC NULL) is defined as NULL */
    5478          30 :         if (PG_ARGISNULL(argidx))
    5479          12 :             return NULL;
    5480             : 
    5481             :         /*
    5482             :          * Non-null argument had better be an array.  We assume that any call
    5483             :          * context that could let get_fn_expr_variadic return true will have
    5484             :          * checked that a VARIADIC-labeled parameter actually is an array.  So
    5485             :          * it should be okay to just Assert that it's an array rather than
    5486             :          * doing a full-fledged error check.
    5487             :          */
    5488             :         Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
    5489             : 
    5490             :         /* OK, safe to fetch the array value */
    5491          18 :         arr = PG_GETARG_ARRAYTYPE_P(argidx);
    5492             : 
    5493             :         /*
    5494             :          * And serialize the array.  We tell array_to_text to ignore null
    5495             :          * elements, which matches the behavior of the loop below.
    5496             :          */
    5497          18 :         return array_to_text_internal(fcinfo, arr, sepstr, NULL);
    5498             :     }
    5499             : 
    5500             :     /* Normal case without explicit VARIADIC marker */
    5501          42 :     initStringInfo(&str);
    5502             : 
    5503             :     /* Get output function info, building it if first time through */
    5504          42 :     foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
    5505          42 :     if (foutcache == NULL)
    5506          40 :         foutcache = build_concat_foutcache(fcinfo, argidx);
    5507             : 
    5508         204 :     for (i = argidx; i < PG_NARGS(); i++)
    5509             :     {
    5510         162 :         if (!PG_ARGISNULL(i))
    5511             :         {
    5512         150 :             Datum       value = PG_GETARG_DATUM(i);
    5513             : 
    5514             :             /* add separator if appropriate */
    5515         150 :             if (first_arg)
    5516          42 :                 first_arg = false;
    5517             :             else
    5518         108 :                 appendStringInfoString(&str, sepstr);
    5519             : 
    5520             :             /* call the appropriate type output function, append the result */
    5521         150 :             appendStringInfoString(&str,
    5522         150 :                                    OutputFunctionCall(&foutcache[i], value));
    5523             :         }
    5524             :     }
    5525             : 
    5526          42 :     result = cstring_to_text_with_len(str.data, str.len);
    5527          42 :     pfree(str.data);
    5528             : 
    5529          42 :     return result;
    5530             : }
    5531             : 
    5532             : /*
    5533             :  * Concatenate all arguments. NULL arguments are ignored.
    5534             :  */
    5535             : Datum
    5536          36 : text_concat(PG_FUNCTION_ARGS)
    5537             : {
    5538             :     text       *result;
    5539             : 
    5540          36 :     result = concat_internal("", 0, fcinfo);
    5541          36 :     if (result == NULL)
    5542           6 :         PG_RETURN_NULL();
    5543          30 :     PG_RETURN_TEXT_P(result);
    5544             : }
    5545             : 
    5546             : /*
    5547             :  * Concatenate all but first argument value with separators. The first
    5548             :  * parameter is used as the separator. NULL arguments are ignored.
    5549             :  */
    5550             : Datum
    5551          42 : text_concat_ws(PG_FUNCTION_ARGS)
    5552             : {
    5553             :     char       *sep;
    5554             :     text       *result;
    5555             : 
    5556             :     /* return NULL when separator is NULL */
    5557          42 :     if (PG_ARGISNULL(0))
    5558           6 :         PG_RETURN_NULL();
    5559          36 :     sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
    5560             : 
    5561          36 :     result = concat_internal(sep, 1, fcinfo);
    5562          36 :     if (result == NULL)
    5563           6 :         PG_RETURN_NULL();
    5564          30 :     PG_RETURN_TEXT_P(result);
    5565             : }
    5566             : 
    5567             : /*
    5568             :  * Return first n characters in the string. When n is negative,
    5569             :  * return all but last |n| characters.
    5570             :  */
    5571             : Datum
    5572        1884 : text_left(PG_FUNCTION_ARGS)
    5573             : {
    5574        1884 :     int         n = PG_GETARG_INT32(1);
    5575             : 
    5576        1884 :     if (n < 0)
    5577             :     {
    5578          30 :         text       *str = PG_GETARG_TEXT_PP(0);
    5579          30 :         const char *p = VARDATA_ANY(str);
    5580          30 :         int         len = VARSIZE_ANY_EXHDR(str);
    5581             :         int         rlen;
    5582             : 
    5583          30 :         n = pg_mbstrlen_with_len(p, len) + n;
    5584          30 :         rlen = pg_mbcharcliplen(p, len, n);
    5585          30 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
    5586             :     }
    5587             :     else
    5588        1854 :         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
    5589             : }
    5590             : 
    5591             : /*
    5592             :  * Return last n characters in the string. When n is negative,
    5593             :  * return all but first |n| characters.
    5594             :  */
    5595             : Datum
    5596          66 : text_right(PG_FUNCTION_ARGS)
    5597             : {
    5598          66 :     text       *str = PG_GETARG_TEXT_PP(0);
    5599          66 :     const char *p = VARDATA_ANY(str);
    5600          66 :     int         len = VARSIZE_ANY_EXHDR(str);
    5601          66 :     int         n = PG_GETARG_INT32(1);
    5602             :     int         off;
    5603             : 
    5604          66 :     if (n < 0)
    5605          30 :         n = -n;
    5606             :     else
    5607          36 :         n = pg_mbstrlen_with_len(p, len) - n;
    5608          66 :     off = pg_mbcharcliplen(p, len, n);
    5609             : 
    5610          66 :     PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
    5611             : }
    5612             : 
    5613             : /*
    5614             :  * Return reversed string
    5615             :  */
    5616             : Datum
    5617           6 : text_reverse(PG_FUNCTION_ARGS)
    5618             : {
    5619           6 :     text       *str = PG_GETARG_TEXT_PP(0);
    5620           6 :     const char *p = VARDATA_ANY(str);
    5621           6 :     int         len = VARSIZE_ANY_EXHDR(str);
    5622           6 :     const char *endp = p + len;
    5623             :     text       *result;
    5624             :     char       *dst;
    5625             : 
    5626           6 :     result = palloc(len + VARHDRSZ);
    5627           6 :     dst = (char *) VARDATA(result) + len;
    5628           6 :     SET_VARSIZE(result, len + VARHDRSZ);
    5629             : 
    5630           6 :     if (pg_database_encoding_max_length() > 1)
    5631             :     {
    5632             :         /* multibyte version */
    5633          36 :         while (p < endp)
    5634             :         {
    5635             :             int         sz;
    5636             : 
    5637          30 :             sz = pg_mblen(p);
    5638          30 :             dst -= sz;
    5639          30 :             memcpy(dst, p, sz);
    5640          30 :             p += sz;
    5641             :         }
    5642             :     }
    5643             :     else
    5644             :     {
    5645             :         /* single byte version */
    5646           0 :         while (p < endp)
    5647           0 :             *(--dst) = *p++;
    5648             :     }
    5649             : 
    5650           6 :     PG_RETURN_TEXT_P(result);
    5651             : }
    5652             : 
    5653             : 
    5654             : /*
    5655             :  * Support macros for text_format()
    5656             :  */
    5657             : #define TEXT_FORMAT_FLAG_MINUS  0x0001  /* is minus flag present? */
    5658             : 
    5659             : #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
    5660             :     do { \
    5661             :         if (++(ptr) >= (end_ptr)) \
    5662             :             ereport(ERROR, \
    5663             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
    5664             :                      errmsg("unterminated format() type specifier"), \
    5665             :                      errhint("For a single \"%%\" use \"%%%%\"."))); \
    5666             :     } while (0)
    5667             : 
    5668             : /*
    5669             :  * Returns a formatted string
    5670             :  */
    5671             : Datum
    5672       85012 : text_format(PG_FUNCTION_ARGS)
    5673             : {
    5674             :     text       *fmt;
    5675             :     StringInfoData str;
    5676             :     const char *cp;
    5677             :     const char *start_ptr;
    5678             :     const char *end_ptr;
    5679             :     text       *result;
    5680             :     int         arg;
    5681             :     bool        funcvariadic;
    5682             :     int         nargs;
    5683       85012 :     Datum      *elements = NULL;
    5684       85012 :     bool       *nulls = NULL;
    5685       85012 :     Oid         element_type = InvalidOid;
    5686       85012 :     Oid         prev_type = InvalidOid;
    5687       85012 :     Oid         prev_width_type = InvalidOid;
    5688             :     FmgrInfo    typoutputfinfo;
    5689             :     FmgrInfo    typoutputinfo_width;
    5690             : 
    5691             :     /* When format string is null, immediately return null */
    5692       85012 :     if (PG_ARGISNULL(0))
    5693           6 :         PG_RETURN_NULL();
    5694             : 
    5695             :     /* If argument is marked VARIADIC, expand array into elements */
    5696       85006 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5697             :     {
    5698             :         ArrayType  *arr;
    5699             :         int16       elmlen;
    5700             :         bool        elmbyval;
    5701             :         char        elmalign;
    5702             :         int         nitems;
    5703             : 
    5704             :         /* Should have just the one argument */
    5705             :         Assert(PG_NARGS() == 2);
    5706             : 
    5707             :         /* If argument is NULL, we treat it as zero-length array */
    5708          48 :         if (PG_ARGISNULL(1))
    5709           6 :             nitems = 0;
    5710             :         else
    5711             :         {
    5712             :             /*
    5713             :              * Non-null argument had better be an array.  We assume that any
    5714             :              * call context that could let get_fn_expr_variadic return true
    5715             :              * will have checked that a VARIADIC-labeled parameter actually is
    5716             :              * an array.  So it should be okay to just Assert that it's an
    5717             :              * array rather than doing a full-fledged error check.
    5718             :              */
    5719             :             Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
    5720             : 
    5721             :             /* OK, safe to fetch the array value */
    5722          42 :             arr = PG_GETARG_ARRAYTYPE_P(1);
    5723             : 
    5724             :             /* Get info about array element type */
    5725          42 :             element_type = ARR_ELEMTYPE(arr);
    5726          42 :             get_typlenbyvalalign(element_type,
    5727             :                                  &elmlen, &elmbyval, &elmalign);
    5728             : 
    5729             :             /* Extract all array elements */
    5730          42 :             deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
    5731             :                               &elements, &nulls, &nitems);
    5732             :         }
    5733             : 
    5734          48 :         nargs = nitems + 1;
    5735          48 :         funcvariadic = true;
    5736             :     }
    5737             :     else
    5738             :     {
    5739             :         /* Non-variadic case, we'll process the arguments individually */
    5740       84958 :         nargs = PG_NARGS();
    5741       84958 :         funcvariadic = false;
    5742             :     }
    5743             : 
    5744             :     /* Setup for main loop. */
    5745       85006 :     fmt = PG_GETARG_TEXT_PP(0);
    5746       85006 :     start_ptr = VARDATA_ANY(fmt);
    5747       85006 :     end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
    5748       85006 :     initStringInfo(&str);
    5749       85006 :     arg = 1;                    /* next argument position to print */
    5750             : 
    5751             :     /* Scan format string, looking for conversion specifiers. */
    5752      846406 :     for (cp = start_ptr; cp < end_ptr; cp++)
    5753             :     {
    5754             :         int         argpos;
    5755             :         int         widthpos;
    5756             :         int         flags;
    5757             :         int         width;
    5758             :         Datum       value;
    5759             :         bool        isNull;
    5760             :         Oid         typid;
    5761             : 
    5762             :         /*
    5763             :          * If it's not the start of a conversion specifier, just copy it to
    5764             :          * the output buffer.
    5765             :          */
    5766      761460 :         if (*cp != '%')
    5767             :         {
    5768      642456 :             appendStringInfoCharMacro(&str, *cp);
    5769      642474 :             continue;
    5770             :         }
    5771             : 
    5772      119004 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5773             : 
    5774             :         /* Easy case: %% outputs a single % */
    5775      119004 :         if (*cp == '%')
    5776             :         {
    5777          18 :             appendStringInfoCharMacro(&str, *cp);
    5778          18 :             continue;
    5779             :         }
    5780             : 
    5781             :         /* Parse the optional portions of the format specifier */
    5782      118986 :         cp = text_format_parse_format(cp, end_ptr,
    5783             :                                       &argpos, &widthpos,
    5784             :                                       &flags, &width);
    5785             : 
    5786             :         /*
    5787             :          * Next we should see the main conversion specifier.  Whether or not
    5788             :          * an argument position was present, it's known that at least one
    5789             :          * character remains in the string at this point.  Experience suggests
    5790             :          * that it's worth checking that that character is one of the expected
    5791             :          * ones before we try to fetch arguments, so as to produce the least
    5792             :          * confusing response to a mis-formatted specifier.
    5793             :          */
    5794      118962 :         if (strchr("sIL", *cp) == NULL)
    5795           6 :             ereport(ERROR,
    5796             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5797             :                      errmsg("unrecognized format() type specifier \"%.*s\"",
    5798             :                             pg_mblen(cp), cp),
    5799             :                      errhint("For a single \"%%\" use \"%%%%\".")));
    5800             : 
    5801             :         /* If indirect width was specified, get its value */
    5802      118956 :         if (widthpos >= 0)
    5803             :         {
    5804             :             /* Collect the specified or next argument position */
    5805          42 :             if (widthpos > 0)
    5806          36 :                 arg = widthpos;
    5807          42 :             if (arg >= nargs)
    5808           0 :                 ereport(ERROR,
    5809             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5810             :                          errmsg("too few arguments for format()")));
    5811             : 
    5812             :             /* Get the value and type of the selected argument */
    5813          42 :             if (!funcvariadic)
    5814             :             {
    5815          42 :                 value = PG_GETARG_DATUM(arg);
    5816          42 :                 isNull = PG_ARGISNULL(arg);
    5817          42 :                 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5818             :             }
    5819             :             else
    5820             :             {
    5821           0 :                 value = elements[arg - 1];
    5822           0 :                 isNull = nulls[arg - 1];
    5823           0 :                 typid = element_type;
    5824             :             }
    5825          42 :             if (!OidIsValid(typid))
    5826           0 :                 elog(ERROR, "could not determine data type of format() input");
    5827             : 
    5828          42 :             arg++;
    5829             : 
    5830             :             /* We can treat NULL width the same as zero */
    5831          42 :             if (isNull)
    5832           6 :                 width = 0;
    5833          36 :             else if (typid == INT4OID)
    5834          36 :                 width = DatumGetInt32(value);
    5835           0 :             else if (typid == INT2OID)
    5836           0 :                 width = DatumGetInt16(value);
    5837             :             else
    5838             :             {
    5839             :                 /* For less-usual datatypes, convert to text then to int */
    5840             :                 char       *str;
    5841             : 
    5842           0 :                 if (typid != prev_width_type)
    5843             :                 {
    5844             :                     Oid         typoutputfunc;
    5845             :                     bool        typIsVarlena;
    5846             : 
    5847           0 :                     getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5848           0 :                     fmgr_info(typoutputfunc, &typoutputinfo_width);
    5849           0 :                     prev_width_type = typid;
    5850             :                 }
    5851             : 
    5852           0 :                 str = OutputFunctionCall(&typoutputinfo_width, value);
    5853             : 
    5854             :                 /* pg_strtoint32 will complain about bad data or overflow */
    5855           0 :                 width = pg_strtoint32(str);
    5856             : 
    5857           0 :                 pfree(str);
    5858             :             }
    5859             :         }
    5860             : 
    5861             :         /* Collect the specified or next argument position */
    5862      118956 :         if (argpos > 0)
    5863         132 :             arg = argpos;
    5864      118956 :         if (arg >= nargs)
    5865          24 :             ereport(ERROR,
    5866             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5867             :                      errmsg("too few arguments for format()")));
    5868             : 
    5869             :         /* Get the value and type of the selected argument */
    5870      118932 :         if (!funcvariadic)
    5871             :         {
    5872      117660 :             value = PG_GETARG_DATUM(arg);
    5873      117660 :             isNull = PG_ARGISNULL(arg);
    5874      117660 :             typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5875             :         }
    5876             :         else
    5877             :         {
    5878        1272 :             value = elements[arg - 1];
    5879        1272 :             isNull = nulls[arg - 1];
    5880        1272 :             typid = element_type;
    5881             :         }
    5882      118932 :         if (!OidIsValid(typid))
    5883           0 :             elog(ERROR, "could not determine data type of format() input");
    5884             : 
    5885      118932 :         arg++;
    5886             : 
    5887             :         /*
    5888             :          * Get the appropriate typOutput function, reusing previous one if
    5889             :          * same type as previous argument.  That's particularly useful in the
    5890             :          * variadic-array case, but often saves work even for ordinary calls.
    5891             :          */
    5892      118932 :         if (typid != prev_type)
    5893             :         {
    5894             :             Oid         typoutputfunc;
    5895             :             bool        typIsVarlena;
    5896             : 
    5897       87886 :             getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5898       87886 :             fmgr_info(typoutputfunc, &typoutputfinfo);
    5899       87886 :             prev_type = typid;
    5900             :         }
    5901             : 
    5902             :         /*
    5903             :          * And now we can format the value.
    5904             :          */
    5905      118932 :         switch (*cp)
    5906             :         {
    5907      118932 :             case 's':
    5908             :             case 'I':
    5909             :             case 'L':
    5910      118932 :                 text_format_string_conversion(&str, *cp, &typoutputfinfo,
    5911             :                                               value, isNull,
    5912             :                                               flags, width);
    5913      118926 :                 break;
    5914           0 :             default:
    5915             :                 /* should not get here, because of previous check */
    5916           0 :                 ereport(ERROR,
    5917             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5918             :                          errmsg("unrecognized format() type specifier \"%.*s\"",
    5919             :                                 pg_mblen(cp), cp),
    5920             :                          errhint("For a single \"%%\" use \"%%%%\".")));
    5921             :                 break;
    5922             :         }
    5923             :     }
    5924             : 
    5925             :     /* Don't need deconstruct_array results anymore. */
    5926       84946 :     if (elements != NULL)
    5927          42 :         pfree(elements);
    5928       84946 :     if (nulls != NULL)
    5929          42 :         pfree(nulls);
    5930             : 
    5931             :     /* Generate results. */
    5932       84946 :     result = cstring_to_text_with_len(str.data, str.len);
    5933       84946 :     pfree(str.data);
    5934             : 
    5935       84946 :     PG_RETURN_TEXT_P(result);
    5936             : }
    5937             : 
    5938             : /*
    5939             :  * Parse contiguous digits as a decimal number.
    5940             :  *
    5941             :  * Returns true if some digits could be parsed.
    5942             :  * The value is returned into *value, and *ptr is advanced to the next
    5943             :  * character to be parsed.
    5944             :  *
    5945             :  * Note parsing invariant: at least one character is known available before
    5946             :  * string end (end_ptr) at entry, and this is still true at exit.
    5947             :  */
    5948             : static bool
    5949      177936 : text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
    5950             : {
    5951      177936 :     bool        found = false;
    5952      177936 :     const char *cp = *ptr;
    5953      177936 :     int         val = 0;
    5954             : 
    5955      298248 :     while (*cp >= '0' && *cp <= '9')
    5956             :     {
    5957      120318 :         int8        digit = (*cp - '0');
    5958             : 
    5959      120318 :         if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
    5960      120318 :             unlikely(pg_add_s32_overflow(val, digit, &val)))
    5961           0 :             ereport(ERROR,
    5962             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    5963             :                      errmsg("number is out of range")));
    5964      120318 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5965      120312 :         found = true;
    5966             :     }
    5967             : 
    5968      177930 :     *ptr = cp;
    5969      177930 :     *value = val;
    5970             : 
    5971      177930 :     return found;
    5972             : }
    5973             : 
    5974             : /*
    5975             :  * Parse a format specifier (generally following the SUS printf spec).
    5976             :  *
    5977             :  * We have already advanced over the initial '%', and we are looking for
    5978             :  * [argpos][flags][width]type (but the type character is not consumed here).
    5979             :  *
    5980             :  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
    5981             :  * Output parameters:
    5982             :  *  argpos: argument position for value to be printed.  -1 means unspecified.
    5983             :  *  widthpos: argument position for width.  Zero means the argument position
    5984             :  *          was unspecified (ie, take the next arg) and -1 means no width
    5985             :  *          argument (width was omitted or specified as a constant).
    5986             :  *  flags: bitmask of flags.
    5987             :  *  width: directly-specified width value.  Zero means the width was omitted
    5988             :  *          (note it's not necessary to distinguish this case from an explicit
    5989             :  *          zero width value).
    5990             :  *
    5991             :  * The function result is the next character position to be parsed, ie, the
    5992             :  * location where the type character is/should be.
    5993             :  *
    5994             :  * Note parsing invariant: at least one character is known available before
    5995             :  * string end (end_ptr) at entry, and this is still true at exit.
    5996             :  */
    5997             : static const char *
    5998      118986 : text_format_parse_format(const char *start_ptr, const char *end_ptr,
    5999             :                          int *argpos, int *widthpos,
    6000             :                          int *flags, int *width)
    6001             : {
    6002      118986 :     const char *cp = start_ptr;
    6003             :     int         n;
    6004             : 
    6005             :     /* set defaults for output parameters */
    6006      118986 :     *argpos = -1;
    6007      118986 :     *widthpos = -1;
    6008      118986 :     *flags = 0;
    6009      118986 :     *width = 0;
    6010             : 
    6011             :     /* try to identify first number */
    6012      118986 :     if (text_format_parse_digits(&cp, end_ptr, &n))
    6013             :     {
    6014       60174 :         if (*cp != '$')
    6015             :         {
    6016             :             /* Must be just a width and a type, so we're done */
    6017       60024 :             *width = n;
    6018       60024 :             return cp;
    6019             :         }
    6020             :         /* The number was argument position */
    6021         150 :         *argpos = n;
    6022             :         /* Explicit 0 for argument index is immediately refused */
    6023         150 :         if (n == 0)
    6024           6 :             ereport(ERROR,
    6025             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6026             :                      errmsg("format specifies argument 0, but arguments are numbered from 1")));
    6027         144 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    6028             :     }
    6029             : 
    6030             :     /* Handle flags (only minus is supported now) */
    6031       58980 :     while (*cp == '-')
    6032             :     {
    6033          30 :         *flags |= TEXT_FORMAT_FLAG_MINUS;
    6034          30 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    6035             :     }
    6036             : 
    6037       58950 :     if (*cp == '*')
    6038             :     {
    6039             :         /* Handle indirect width */
    6040          48 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    6041          48 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    6042             :         {
    6043             :             /* number in this position must be closed by $ */
    6044          42 :             if (*cp != '$')
    6045           0 :                 ereport(ERROR,
    6046             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6047             :                          errmsg("width argument position must be ended by \"$\"")));
    6048             :             /* The number was width argument position */
    6049          42 :             *widthpos = n;
    6050             :             /* Explicit 0 for argument index is immediately refused */
    6051          42 :             if (n == 0)
    6052           6 :                 ereport(ERROR,
    6053             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6054             :                          errmsg("format specifies argument 0, but arguments are numbered from 1")));
    6055          36 :             ADVANCE_PARSE_POINTER(cp, end_ptr);
    6056             :         }
    6057             :         else
    6058           6 :             *widthpos = 0;      /* width's argument position is unspecified */
    6059             :     }
    6060             :     else
    6061             :     {
    6062             :         /* Check for direct width specification */
    6063       58902 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    6064          30 :             *width = n;
    6065             :     }
    6066             : 
    6067             :     /* cp should now be pointing at type character */
    6068       58938 :     return cp;
    6069             : }
    6070             : 
    6071             : /*
    6072             :  * Format a %s, %I, or %L conversion
    6073             :  */
    6074             : static void
    6075      118932 : text_format_string_conversion(StringInfo buf, char conversion,
    6076             :                               FmgrInfo *typOutputInfo,
    6077             :                               Datum value, bool isNull,
    6078             :                               int flags, int width)
    6079             : {
    6080             :     char       *str;
    6081             : 
    6082             :     /* Handle NULL arguments before trying to stringify the value. */
    6083      118932 :     if (isNull)
    6084             :     {
    6085         306 :         if (conversion == 's')
    6086         234 :             text_format_append_string(buf, "", flags, width);
    6087          72 :         else if (conversion == 'L')
    6088          66 :             text_format_append_string(buf, "NULL", flags, width);
    6089           6 :         else if (conversion == 'I')
    6090           6 :             ereport(ERROR,
    6091             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    6092             :                      errmsg("null values cannot be formatted as an SQL identifier")));
    6093         300 :         return;
    6094             :     }
    6095             : 
    6096             :     /* Stringify. */
    6097      118626 :     str = OutputFunctionCall(typOutputInfo, value);
    6098             : 
    6099             :     /* Escape. */
    6100      118626 :     if (conversion == 'I')
    6101             :     {
    6102             :         /* quote_identifier may or may not allocate a new string. */
    6103        3106 :         text_format_append_string(buf, quote_identifier(str), flags, width);
    6104             :     }
    6105      115520 :     else if (conversion == 'L')
    6106             :     {
    6107        2578 :         char       *qstr = quote_literal_cstr(str);
    6108             : 
    6109        2578 :         text_format_append_string(buf, qstr, flags, width);
    6110             :         /* quote_literal_cstr() always allocates a new string */
    6111        2578 :         pfree(qstr);
    6112             :     }
    6113             :     else
    6114      112942 :         text_format_append_string(buf, str, flags, width);
    6115             : 
    6116             :     /* Cleanup. */
    6117      118626 :     pfree(str);
    6118             : }
    6119             : 
    6120             : /*
    6121             :  * Append str to buf, padding as directed by flags/width
    6122             :  */
    6123             : static void
    6124      118926 : text_format_append_string(StringInfo buf, const char *str,
    6125             :                           int flags, int width)
    6126             : {
    6127      118926 :     bool        align_to_left = false;
    6128             :     int         len;
    6129             : 
    6130             :     /* fast path for typical easy case */
    6131      118926 :     if (width == 0)
    6132             :     {
    6133       58842 :         appendStringInfoString(buf, str);
    6134       58842 :         return;
    6135             :     }
    6136             : 
    6137       60084 :     if (width < 0)
    6138             :     {
    6139             :         /* Negative width: implicit '-' flag, then take absolute value */
    6140           6 :         align_to_left = true;
    6141             :         /* -INT_MIN is undefined */
    6142           6 :         if (width <= INT_MIN)
    6143           0 :             ereport(ERROR,
    6144             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    6145             :                      errmsg("number is out of range")));
    6146           6 :         width = -width;
    6147             :     }
    6148       60078 :     else if (flags & TEXT_FORMAT_FLAG_MINUS)
    6149          24 :         align_to_left = true;
    6150             : 
    6151       60084 :     len = pg_mbstrlen(str);
    6152       60084 :     if (align_to_left)
    6153             :     {
    6154             :         /* left justify */
    6155          30 :         appendStringInfoString(buf, str);
    6156          30 :         if (len < width)
    6157          30 :             appendStringInfoSpaces(buf, width - len);
    6158             :     }
    6159             :     else
    6160             :     {
    6161             :         /* right justify */
    6162       60054 :         if (len < width)
    6163       60054 :             appendStringInfoSpaces(buf, width - len);
    6164       60054 :         appendStringInfoString(buf, str);
    6165             :     }
    6166             : }
    6167             : 
    6168             : /*
    6169             :  * text_format_nv - nonvariadic wrapper for text_format function.
    6170             :  *
    6171             :  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
    6172             :  * which checks that all built-in functions that share the implementing C
    6173             :  * function take the same number of arguments.
    6174             :  */
    6175             : Datum
    6176          30 : text_format_nv(PG_FUNCTION_ARGS)
    6177             : {
    6178          30 :     return text_format(fcinfo);
    6179             : }
    6180             : 
    6181             : /*
    6182             :  * Helper function for Levenshtein distance functions. Faster than memcmp(),
    6183             :  * for this use case.
    6184             :  */
    6185             : static inline bool
    6186           0 : rest_of_char_same(const char *s1, const char *s2, int len)
    6187             : {
    6188           0 :     while (len > 0)
    6189             :     {
    6190           0 :         len--;
    6191           0 :         if (s1[len] != s2[len])
    6192           0 :             return false;
    6193             :     }
    6194           0 :     return true;
    6195             : }
    6196             : 
    6197             : /* Expand each Levenshtein distance variant */
    6198             : #include "levenshtein.c"
    6199             : #define LEVENSHTEIN_LESS_EQUAL
    6200             : #include "levenshtein.c"
    6201             : 
    6202             : 
    6203             : /*
    6204             :  * Unicode support
    6205             :  */
    6206             : 
    6207             : static UnicodeNormalizationForm
    6208         186 : unicode_norm_form_from_string(const char *formstr)
    6209             : {
    6210         186 :     UnicodeNormalizationForm form = -1;
    6211             : 
    6212             :     /*
    6213             :      * Might as well check this while we're here.
    6214             :      */
    6215         186 :     if (GetDatabaseEncoding() != PG_UTF8)
    6216           0 :         ereport(ERROR,
    6217             :                 (errcode(ERRCODE_SYNTAX_ERROR),
    6218             :                  errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
    6219             : 
    6220         186 :     if (pg_strcasecmp(formstr, "NFC") == 0)
    6221          66 :         form = UNICODE_NFC;
    6222         120 :     else if (pg_strcasecmp(formstr, "NFD") == 0)
    6223          36 :         form = UNICODE_NFD;
    6224          84 :     else if (pg_strcasecmp(formstr, "NFKC") == 0)
    6225          36 :         form = UNICODE_NFKC;
    6226          48 :     else if (pg_strcasecmp(formstr, "NFKD") == 0)
    6227          36 :         form = UNICODE_NFKD;
    6228             :     else
    6229          12 :         ereport(ERROR,
    6230             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6231             :                  errmsg("invalid normalization form: %s", formstr)));
    6232             : 
    6233         174 :     return form;
    6234             : }
    6235             : 
    6236             : Datum
    6237          48 : unicode_normalize_func(PG_FUNCTION_ARGS)
    6238             : {
    6239          48 :     text       *input = PG_GETARG_TEXT_PP(0);
    6240          48 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
    6241             :     UnicodeNormalizationForm form;
    6242             :     int         size;
    6243             :     pg_wchar   *input_chars;
    6244             :     pg_wchar   *output_chars;
    6245             :     unsigned char *p;
    6246             :     text       *result;
    6247             :     int         i;
    6248             : 
    6249          48 :     form = unicode_norm_form_from_string(formstr);
    6250             : 
    6251             :     /* convert to pg_wchar */
    6252          42 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6253          42 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
    6254          42 :     p = (unsigned char *) VARDATA_ANY(input);
    6255         168 :     for (i = 0; i < size; i++)
    6256             :     {
    6257         126 :         input_chars[i] = utf8_to_unicode(p);
    6258         126 :         p += pg_utf_mblen(p);
    6259             :     }
    6260          42 :     input_chars[i] = (pg_wchar) '\0';
    6261             :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
    6262             : 
    6263             :     /* action */
    6264          42 :     output_chars = unicode_normalize(form, input_chars);
    6265             : 
    6266             :     /* convert back to UTF-8 string */
    6267          42 :     size = 0;
    6268         162 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6269             :     {
    6270             :         unsigned char buf[4];
    6271             : 
    6272         120 :         unicode_to_utf8(*wp, buf);
    6273         120 :         size += pg_utf_mblen(buf);
    6274             :     }
    6275             : 
    6276          42 :     result = palloc(size + VARHDRSZ);
    6277          42 :     SET_VARSIZE(result, size + VARHDRSZ);
    6278             : 
    6279          42 :     p = (unsigned char *) VARDATA_ANY(result);
    6280         162 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6281             :     {
    6282         120 :         unicode_to_utf8(*wp, p);
    6283         120 :         p += pg_utf_mblen(p);
    6284             :     }
    6285             :     Assert((char *) p == (char *) result + size + VARHDRSZ);
    6286             : 
    6287          42 :     PG_RETURN_TEXT_P(result);
    6288             : }
    6289             : 
    6290             : /*
    6291             :  * Check whether the string is in the specified Unicode normalization form.
    6292             :  *
    6293             :  * This is done by converting the string to the specified normal form and then
    6294             :  * comparing that to the original string.  To speed that up, we also apply the
    6295             :  * "quick check" algorithm specified in UAX #15, which can give a yes or no
    6296             :  * answer for many strings by just scanning the string once.
    6297             :  *
    6298             :  * This function should generally be optimized for the case where the string
    6299             :  * is in fact normalized.  In that case, we'll end up looking at the entire
    6300             :  * string, so it's probably not worth doing any incremental conversion etc.
    6301             :  */
    6302             : Datum
    6303         138 : unicode_is_normalized(PG_FUNCTION_ARGS)
    6304             : {
    6305         138 :     text       *input = PG_GETARG_TEXT_PP(0);
    6306         138 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
    6307             :     UnicodeNormalizationForm form;
    6308             :     int         size;
    6309             :     pg_wchar   *input_chars;
    6310             :     pg_wchar   *output_chars;
    6311             :     unsigned char *p;
    6312             :     int         i;
    6313             :     UnicodeNormalizationQC quickcheck;
    6314             :     int         output_size;
    6315             :     bool        result;
    6316             : 
    6317         138 :     form = unicode_norm_form_from_string(formstr);
    6318             : 
    6319             :     /* convert to pg_wchar */
    6320         132 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6321         132 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
    6322         132 :     p = (unsigned char *) VARDATA_ANY(input);
    6323         504 :     for (i = 0; i < size; i++)
    6324             :     {
    6325         372 :         input_chars[i] = utf8_to_unicode(p);
    6326         372 :         p += pg_utf_mblen(p);
    6327             :     }
    6328         132 :     input_chars[i] = (pg_wchar) '\0';
    6329             :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
    6330             : 
    6331             :     /* quick check (see UAX #15) */
    6332         132 :     quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
    6333         132 :     if (quickcheck == UNICODE_NORM_QC_YES)
    6334          42 :         PG_RETURN_BOOL(true);
    6335          90 :     else if (quickcheck == UNICODE_NORM_QC_NO)
    6336          12 :         PG_RETURN_BOOL(false);
    6337             : 
    6338             :     /* normalize and compare with original */
    6339          78 :     output_chars = unicode_normalize(form, input_chars);
    6340             : 
    6341          78 :     output_size = 0;
    6342         324 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6343         246 :         output_size++;
    6344             : 
    6345         114 :     result = (size == output_size) &&
    6346          36 :         (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
    6347             : 
    6348          78 :     PG_RETURN_BOOL(result);
    6349             : }
    6350             : 
    6351             : /*
    6352             :  * Check if first n chars are hexadecimal digits
    6353             :  */
    6354             : static bool
    6355         156 : isxdigits_n(const char *instr, size_t n)
    6356             : {
    6357         660 :     for (size_t i = 0; i < n; i++)
    6358         570 :         if (!isxdigit((unsigned char) instr[i]))
    6359          66 :             return false;
    6360             : 
    6361          90 :     return true;
    6362             : }
    6363             : 
    6364             : static unsigned int
    6365         504 : hexval(unsigned char c)
    6366             : {
    6367         504 :     if (c >= '0' && c <= '9')
    6368         384 :         return c - '0';
    6369         120 :     if (c >= 'a' && c <= 'f')
    6370          60 :         return c - 'a' + 0xA;
    6371          60 :     if (c >= 'A' && c <= 'F')
    6372          60 :         return c - 'A' + 0xA;
    6373           0 :     elog(ERROR, "invalid hexadecimal digit");
    6374             :     return 0;                   /* not reached */
    6375             : }
    6376             : 
    6377             : /*
    6378             :  * Translate string with hexadecimal digits to number
    6379             :  */
    6380             : static unsigned int
    6381          90 : hexval_n(const char *instr, size_t n)
    6382             : {
    6383          90 :     unsigned int result = 0;
    6384             : 
    6385         594 :     for (size_t i = 0; i < n; i++)
    6386         504 :         result += hexval(instr[i]) << (4 * (n - i - 1));
    6387             : 
    6388          90 :     return result;
    6389             : }
    6390             : 
    6391             : /*
    6392             :  * Replaces Unicode escape sequences by Unicode characters
    6393             :  */
    6394             : Datum
    6395          66 : unistr(PG_FUNCTION_ARGS)
    6396             : {
    6397          66 :     text       *input_text = PG_GETARG_TEXT_PP(0);
    6398             :     char       *instr;
    6399             :     int         len;
    6400             :     StringInfoData str;
    6401             :     text       *result;
    6402          66 :     pg_wchar    pair_first = 0;
    6403             :     char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
    6404             : 
    6405          66 :     instr = VARDATA_ANY(input_text);
    6406          66 :     len = VARSIZE_ANY_EXHDR(input_text);
    6407             : 
    6408          66 :     initStringInfo(&str);
    6409             : 
    6410         510 :     while (len > 0)
    6411             :     {
    6412         486 :         if (instr[0] == '\\')
    6413             :         {
    6414         102 :             if (len >= 2 &&
    6415         102 :                 instr[1] == '\\')
    6416             :             {
    6417           6 :                 if (pair_first)
    6418           0 :                     goto invalid_pair;
    6419           6 :                 appendStringInfoChar(&str, '\\');
    6420           6 :                 instr += 2;
    6421           6 :                 len -= 2;
    6422             :             }
    6423          96 :             else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
    6424          66 :                      (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
    6425          30 :             {
    6426             :                 pg_wchar    unicode;
    6427          42 :                 int         offset = instr[1] == 'u' ? 2 : 1;
    6428             : 
    6429          42 :                 unicode = hexval_n(instr + offset, 4);
    6430             : 
    6431          42 :                 if (!is_valid_unicode_codepoint(unicode))
    6432           0 :                     ereport(ERROR,
    6433             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6434             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6435             : 
    6436          42 :                 if (pair_first)
    6437             :                 {
    6438          12 :                     if (is_utf16_surrogate_second(unicode))
    6439             :                     {
    6440           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6441           0 :                         pair_first = 0;
    6442             :                     }
    6443             :                     else
    6444          12 :                         goto invalid_pair;
    6445             :                 }
    6446          30 :                 else if (is_utf16_surrogate_second(unicode))
    6447           0 :                     goto invalid_pair;
    6448             : 
    6449          30 :                 if (is_utf16_surrogate_first(unicode))
    6450          18 :                     pair_first = unicode;
    6451             :                 else
    6452             :                 {
    6453          12 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6454          12 :                     appendStringInfoString(&str, cbuf);
    6455             :                 }
    6456             : 
    6457          30 :                 instr += 4 + offset;
    6458          30 :                 len -= 4 + offset;
    6459             :             }
    6460          54 :             else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
    6461          12 :             {
    6462             :                 pg_wchar    unicode;
    6463             : 
    6464          24 :                 unicode = hexval_n(instr + 2, 6);
    6465             : 
    6466          24 :                 if (!is_valid_unicode_codepoint(unicode))
    6467           6 :                     ereport(ERROR,
    6468             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6469             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6470             : 
    6471          18 :                 if (pair_first)
    6472             :                 {
    6473           6 :                     if (is_utf16_surrogate_second(unicode))
    6474             :                     {
    6475           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6476           0 :                         pair_first = 0;
    6477             :                     }
    6478             :                     else
    6479           6 :                         goto invalid_pair;
    6480             :                 }
    6481          12 :                 else if (is_utf16_surrogate_second(unicode))
    6482           0 :                     goto invalid_pair;
    6483             : 
    6484          12 :                 if (is_utf16_surrogate_first(unicode))
    6485           6 :                     pair_first = unicode;
    6486             :                 else
    6487             :                 {
    6488           6 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6489           6 :                     appendStringInfoString(&str, cbuf);
    6490             :                 }
    6491             : 
    6492          12 :                 instr += 8;
    6493          12 :                 len -= 8;
    6494             :             }
    6495          30 :             else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
    6496          12 :             {
    6497             :                 pg_wchar    unicode;
    6498             : 
    6499          24 :                 unicode = hexval_n(instr + 2, 8);
    6500             : 
    6501          24 :                 if (!is_valid_unicode_codepoint(unicode))
    6502           6 :                     ereport(ERROR,
    6503             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6504             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6505             : 
    6506          18 :                 if (pair_first)
    6507             :                 {
    6508           6 :                     if (is_utf16_surrogate_second(unicode))
    6509             :                     {
    6510           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6511           0 :                         pair_first = 0;
    6512             :                     }
    6513             :                     else
    6514           6 :                         goto invalid_pair;
    6515             :                 }
    6516          12 :                 else if (is_utf16_surrogate_second(unicode))
    6517           0 :                     goto invalid_pair;
    6518             : 
    6519          12 :                 if (is_utf16_surrogate_first(unicode))
    6520           6 :                     pair_first = unicode;
    6521             :                 else
    6522             :                 {
    6523           6 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6524           6 :                     appendStringInfoString(&str, cbuf);
    6525             :                 }
    6526             : 
    6527          12 :                 instr += 10;
    6528          12 :                 len -= 10;
    6529             :             }
    6530             :             else
    6531           6 :                 ereport(ERROR,
    6532             :                         (errcode(ERRCODE_SYNTAX_ERROR),
    6533             :                          errmsg("invalid Unicode escape"),
    6534             :                          errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX.")));
    6535             :         }
    6536             :         else
    6537             :         {
    6538         384 :             if (pair_first)
    6539           0 :                 goto invalid_pair;
    6540             : 
    6541         384 :             appendStringInfoChar(&str, *instr++);
    6542         384 :             len--;
    6543             :         }
    6544             :     }
    6545             : 
    6546             :     /* unfinished surrogate pair? */
    6547          24 :     if (pair_first)
    6548           6 :         goto invalid_pair;
    6549             : 
    6550          18 :     result = cstring_to_text_with_len(str.data, str.len);
    6551          18 :     pfree(str.data);
    6552             : 
    6553          18 :     PG_RETURN_TEXT_P(result);
    6554             : 
    6555          30 : invalid_pair:
    6556          30 :     ereport(ERROR,
    6557             :             (errcode(ERRCODE_SYNTAX_ERROR),
    6558             :              errmsg("invalid Unicode surrogate pair")));
    6559             :     PG_RETURN_NULL();           /* keep compiler quiet */
    6560             : }

Generated by: LCOV version 1.14