LCOV - code coverage report
Current view: top level - src/backend/utils/adt - varlena.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16beta1 Lines: 1950 2173 89.7 %
Date: 2023-05-31 04:12:22 Functions: 150 163 92.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * varlena.c
       4             :  *    Functions for the variable-length built-in types.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/utils/adt/varlena.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <ctype.h>
      18             : #include <limits.h>
      19             : 
      20             : #include "access/detoast.h"
      21             : #include "access/toast_compression.h"
      22             : #include "catalog/pg_collation.h"
      23             : #include "catalog/pg_type.h"
      24             : #include "common/hashfn.h"
      25             : #include "common/int.h"
      26             : #include "common/unicode_norm.h"
      27             : #include "funcapi.h"
      28             : #include "lib/hyperloglog.h"
      29             : #include "libpq/pqformat.h"
      30             : #include "miscadmin.h"
      31             : #include "nodes/execnodes.h"
      32             : #include "parser/scansup.h"
      33             : #include "port/pg_bswap.h"
      34             : #include "regex/regex.h"
      35             : #include "utils/builtins.h"
      36             : #include "utils/bytea.h"
      37             : #include "utils/guc.h"
      38             : #include "utils/lsyscache.h"
      39             : #include "utils/memutils.h"
      40             : #include "utils/pg_locale.h"
      41             : #include "utils/sortsupport.h"
      42             : #include "utils/varlena.h"
      43             : 
      44             : 
      45             : /* GUC variable */
      46             : int         bytea_output = BYTEA_OUTPUT_HEX;
      47             : 
      48             : typedef struct varlena VarString;
      49             : 
      50             : /*
      51             :  * State for text_position_* functions.
      52             :  */
      53             : typedef struct
      54             : {
      55             :     bool        is_multibyte_char_in_char;  /* need to check char boundaries? */
      56             : 
      57             :     char       *str1;           /* haystack string */
      58             :     char       *str2;           /* needle string */
      59             :     int         len1;           /* string lengths in bytes */
      60             :     int         len2;
      61             : 
      62             :     /* Skip table for Boyer-Moore-Horspool search algorithm: */
      63             :     int         skiptablemask;  /* mask for ANDing with skiptable subscripts */
      64             :     int         skiptable[256]; /* skip distance for given mismatched char */
      65             : 
      66             :     char       *last_match;     /* pointer to last match in 'str1' */
      67             : 
      68             :     /*
      69             :      * Sometimes we need to convert the byte position of a match to a
      70             :      * character position.  These store the last position that was converted,
      71             :      * so that on the next call, we can continue from that point, rather than
      72             :      * count characters from the very beginning.
      73             :      */
      74             :     char       *refpoint;       /* pointer within original haystack string */
      75             :     int         refpos;         /* 0-based character offset of the same point */
      76             : } TextPositionState;
      77             : 
      78             : typedef struct
      79             : {
      80             :     char       *buf1;           /* 1st string, or abbreviation original string
      81             :                                  * buf */
      82             :     char       *buf2;           /* 2nd string, or abbreviation strxfrm() buf */
      83             :     int         buflen1;        /* Allocated length of buf1 */
      84             :     int         buflen2;        /* Allocated length of buf2 */
      85             :     int         last_len1;      /* Length of last buf1 string/strxfrm() input */
      86             :     int         last_len2;      /* Length of last buf2 string/strxfrm() blob */
      87             :     int         last_returned;  /* Last comparison result (cache) */
      88             :     bool        cache_blob;     /* Does buf2 contain strxfrm() blob, etc? */
      89             :     bool        collate_c;
      90             :     Oid         typid;          /* Actual datatype (text/bpchar/bytea/name) */
      91             :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
      92             :     hyperLogLogState full_card; /* Full key cardinality state */
      93             :     double      prop_card;      /* Required cardinality proportion */
      94             :     pg_locale_t locale;
      95             : } VarStringSortSupport;
      96             : 
      97             : /*
      98             :  * Output data for split_text(): we output either to an array or a table.
      99             :  * tupstore and tupdesc must be set up in advance to output to a table.
     100             :  */
     101             : typedef struct
     102             : {
     103             :     ArrayBuildState *astate;
     104             :     Tuplestorestate *tupstore;
     105             :     TupleDesc   tupdesc;
     106             : } SplitTextOutputData;
     107             : 
     108             : /*
     109             :  * This should be large enough that most strings will fit, but small enough
     110             :  * that we feel comfortable putting it on the stack
     111             :  */
     112             : #define TEXTBUFLEN      1024
     113             : 
     114             : #define DatumGetVarStringP(X)       ((VarString *) PG_DETOAST_DATUM(X))
     115             : #define DatumGetVarStringPP(X)      ((VarString *) PG_DETOAST_DATUM_PACKED(X))
     116             : 
     117             : static int  varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
     118             : static int  bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
     119             : static int  namefastcmp_c(Datum x, Datum y, SortSupport ssup);
     120             : static int  varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
     121             : static int  namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
     122             : static int  varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
     123             : static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
     124             : static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
     125             : static int32 text_length(Datum str);
     126             : static text *text_catenate(text *t1, text *t2);
     127             : static text *text_substring(Datum str,
     128             :                             int32 start,
     129             :                             int32 length,
     130             :                             bool length_not_specified);
     131             : static text *text_overlay(text *t1, text *t2, int sp, int sl);
     132             : static int  text_position(text *t1, text *t2, Oid collid);
     133             : static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
     134             : static bool text_position_next(TextPositionState *state);
     135             : static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
     136             : static char *text_position_get_match_ptr(TextPositionState *state);
     137             : static int  text_position_get_match_pos(TextPositionState *state);
     138             : static void text_position_cleanup(TextPositionState *state);
     139             : static void check_collation_set(Oid collid);
     140             : static int  text_cmp(text *arg1, text *arg2, Oid collid);
     141             : static bytea *bytea_catenate(bytea *t1, bytea *t2);
     142             : static bytea *bytea_substring(Datum str,
     143             :                               int S,
     144             :                               int L,
     145             :                               bool length_not_specified);
     146             : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
     147             : static void appendStringInfoText(StringInfo str, const text *t);
     148             : static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
     149             : static void split_text_accum_result(SplitTextOutputData *tstate,
     150             :                                     text *field_value,
     151             :                                     text *null_string,
     152             :                                     Oid collation);
     153             : static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
     154             :                                     const char *fldsep, const char *null_string);
     155             : static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
     156             : static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
     157             :                                      int *value);
     158             : static const char *text_format_parse_format(const char *start_ptr,
     159             :                                             const char *end_ptr,
     160             :                                             int *argpos, int *widthpos,
     161             :                                             int *flags, int *width);
     162             : static void text_format_string_conversion(StringInfo buf, char conversion,
     163             :                                           FmgrInfo *typOutputInfo,
     164             :                                           Datum value, bool isNull,
     165             :                                           int flags, int width);
     166             : static void text_format_append_string(StringInfo buf, const char *str,
     167             :                                       int flags, int width);
     168             : 
     169             : 
     170             : /*****************************************************************************
     171             :  *   CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                          *
     172             :  *****************************************************************************/
     173             : 
     174             : /*
     175             :  * cstring_to_text
     176             :  *
     177             :  * Create a text value from a null-terminated C string.
     178             :  *
     179             :  * The new text value is freshly palloc'd with a full-size VARHDR.
     180             :  */
     181             : text *
     182    23804648 : cstring_to_text(const char *s)
     183             : {
     184    23804648 :     return cstring_to_text_with_len(s, strlen(s));
     185             : }
     186             : 
     187             : /*
     188             :  * cstring_to_text_with_len
     189             :  *
     190             :  * Same as cstring_to_text except the caller specifies the string length;
     191             :  * the string need not be null_terminated.
     192             :  */
     193             : text *
     194    28873258 : cstring_to_text_with_len(const char *s, int len)
     195             : {
     196    28873258 :     text       *result = (text *) palloc(len + VARHDRSZ);
     197             : 
     198    28873258 :     SET_VARSIZE(result, len + VARHDRSZ);
     199    28873258 :     memcpy(VARDATA(result), s, len);
     200             : 
     201    28873258 :     return result;
     202             : }
     203             : 
     204             : /*
     205             :  * text_to_cstring
     206             :  *
     207             :  * Create a palloc'd, null-terminated C string from a text value.
     208             :  *
     209             :  * We support being passed a compressed or toasted text value.
     210             :  * This is a bit bogus since such values shouldn't really be referred to as
     211             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     212             :  * case here, we'd need another routine that did, anyway.
     213             :  */
     214             : char *
     215    18129666 : text_to_cstring(const text *t)
     216             : {
     217             :     /* must cast away the const, unfortunately */
     218    18129666 :     text       *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
     219    18129666 :     int         len = VARSIZE_ANY_EXHDR(tunpacked);
     220             :     char       *result;
     221             : 
     222    18129666 :     result = (char *) palloc(len + 1);
     223    18129666 :     memcpy(result, VARDATA_ANY(tunpacked), len);
     224    18129666 :     result[len] = '\0';
     225             : 
     226    18129666 :     if (tunpacked != t)
     227      128472 :         pfree(tunpacked);
     228             : 
     229    18129666 :     return result;
     230             : }
     231             : 
     232             : /*
     233             :  * text_to_cstring_buffer
     234             :  *
     235             :  * Copy a text value into a caller-supplied buffer of size dst_len.
     236             :  *
     237             :  * The text string is truncated if necessary to fit.  The result is
     238             :  * guaranteed null-terminated (unless dst_len == 0).
     239             :  *
     240             :  * We support being passed a compressed or toasted text value.
     241             :  * This is a bit bogus since such values shouldn't really be referred to as
     242             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     243             :  * case here, we'd need another routine that did, anyway.
     244             :  */
     245             : void
     246         640 : text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
     247             : {
     248             :     /* must cast away the const, unfortunately */
     249         640 :     text       *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
     250         640 :     size_t      src_len = VARSIZE_ANY_EXHDR(srcunpacked);
     251             : 
     252         640 :     if (dst_len > 0)
     253             :     {
     254         640 :         dst_len--;
     255         640 :         if (dst_len >= src_len)
     256         640 :             dst_len = src_len;
     257             :         else                    /* ensure truncation is encoding-safe */
     258           0 :             dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
     259         640 :         memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
     260         640 :         dst[dst_len] = '\0';
     261             :     }
     262             : 
     263         640 :     if (srcunpacked != src)
     264           0 :         pfree(srcunpacked);
     265         640 : }
     266             : 
     267             : 
     268             : /*****************************************************************************
     269             :  *   USER I/O ROUTINES                                                       *
     270             :  *****************************************************************************/
     271             : 
     272             : 
     273             : #define VAL(CH)         ((CH) - '0')
     274             : #define DIG(VAL)        ((VAL) + '0')
     275             : 
     276             : /*
     277             :  *      byteain         - converts from printable representation of byte array
     278             :  *
     279             :  *      Non-printable characters must be passed as '\nnn' (octal) and are
     280             :  *      converted to internal form.  '\' must be passed as '\\'.
     281             :  *      ereport(ERROR, ...) if bad form.
     282             :  *
     283             :  *      BUGS:
     284             :  *              The input is scanned twice.
     285             :  *              The error checking of input is minimal.
     286             :  */
     287             : Datum
     288      262846 : byteain(PG_FUNCTION_ARGS)
     289             : {
     290      262846 :     char       *inputText = PG_GETARG_CSTRING(0);
     291      262846 :     Node       *escontext = fcinfo->context;
     292             :     char       *tp;
     293             :     char       *rp;
     294             :     int         bc;
     295             :     bytea      *result;
     296             : 
     297             :     /* Recognize hex input */
     298      262846 :     if (inputText[0] == '\\' && inputText[1] == 'x')
     299             :     {
     300         810 :         size_t      len = strlen(inputText);
     301             : 
     302         810 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
     303         810 :         result = palloc(bc);
     304         810 :         bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
     305             :                              escontext);
     306         798 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
     307             : 
     308         798 :         PG_RETURN_BYTEA_P(result);
     309             :     }
     310             : 
     311             :     /* Else, it's the traditional escaped style */
     312     4354570 :     for (bc = 0, tp = inputText; *tp != '\0'; bc++)
     313             :     {
     314     4092546 :         if (tp[0] != '\\')
     315     4091530 :             tp++;
     316        1016 :         else if ((tp[0] == '\\') &&
     317        1016 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     318        1004 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     319        1004 :                  (tp[3] >= '0' && tp[3] <= '7'))
     320        1004 :             tp += 4;
     321          12 :         else if ((tp[0] == '\\') &&
     322          12 :                  (tp[1] == '\\'))
     323           0 :             tp += 2;
     324             :         else
     325             :         {
     326             :             /*
     327             :              * one backslash, not followed by another or ### valid octal
     328             :              */
     329          12 :             ereturn(escontext, (Datum) 0,
     330             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     331             :                      errmsg("invalid input syntax for type %s", "bytea")));
     332             :         }
     333             :     }
     334             : 
     335      262024 :     bc += VARHDRSZ;
     336             : 
     337      262024 :     result = (bytea *) palloc(bc);
     338      262024 :     SET_VARSIZE(result, bc);
     339             : 
     340      262024 :     tp = inputText;
     341      262024 :     rp = VARDATA(result);
     342     4354528 :     while (*tp != '\0')
     343             :     {
     344     4092504 :         if (tp[0] != '\\')
     345     4091500 :             *rp++ = *tp++;
     346        1004 :         else if ((tp[0] == '\\') &&
     347        1004 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     348        1004 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     349        1004 :                  (tp[3] >= '0' && tp[3] <= '7'))
     350             :         {
     351        1004 :             bc = VAL(tp[1]);
     352        1004 :             bc <<= 3;
     353        1004 :             bc += VAL(tp[2]);
     354        1004 :             bc <<= 3;
     355        1004 :             *rp++ = bc + VAL(tp[3]);
     356             : 
     357        1004 :             tp += 4;
     358             :         }
     359           0 :         else if ((tp[0] == '\\') &&
     360           0 :                  (tp[1] == '\\'))
     361             :         {
     362           0 :             *rp++ = '\\';
     363           0 :             tp += 2;
     364             :         }
     365             :         else
     366             :         {
     367             :             /*
     368             :              * We should never get here. The first pass should not allow it.
     369             :              */
     370           0 :             ereturn(escontext, (Datum) 0,
     371             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     372             :                      errmsg("invalid input syntax for type %s", "bytea")));
     373             :         }
     374             :     }
     375             : 
     376      262024 :     PG_RETURN_BYTEA_P(result);
     377             : }
     378             : 
     379             : /*
     380             :  *      byteaout        - converts to printable representation of byte array
     381             :  *
     382             :  *      In the traditional escaped format, non-printable characters are
     383             :  *      printed as '\nnn' (octal) and '\' as '\\'.
     384             :  */
     385             : Datum
     386       13432 : byteaout(PG_FUNCTION_ARGS)
     387             : {
     388       13432 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
     389             :     char       *result;
     390             :     char       *rp;
     391             : 
     392       13432 :     if (bytea_output == BYTEA_OUTPUT_HEX)
     393             :     {
     394             :         /* Print hex format */
     395       13048 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
     396       13048 :         *rp++ = '\\';
     397       13048 :         *rp++ = 'x';
     398       13048 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
     399             :     }
     400         384 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
     401             :     {
     402             :         /* Print traditional escaped format */
     403             :         char       *vp;
     404             :         uint64      len;
     405             :         int         i;
     406             : 
     407         384 :         len = 1;                /* empty string has 1 char */
     408         384 :         vp = VARDATA_ANY(vlena);
     409      217660 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     410             :         {
     411      217276 :             if (*vp == '\\')
     412           0 :                 len += 2;
     413      217276 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     414         498 :                 len += 4;
     415             :             else
     416      216778 :                 len++;
     417             :         }
     418             : 
     419             :         /*
     420             :          * In principle len can't overflow uint32 if the input fit in 1GB, but
     421             :          * for safety let's check rather than relying on palloc's internal
     422             :          * check.
     423             :          */
     424         384 :         if (len > MaxAllocSize)
     425           0 :             ereport(ERROR,
     426             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     427             :                      errmsg_internal("result of bytea output conversion is too large")));
     428         384 :         rp = result = (char *) palloc(len);
     429             : 
     430         384 :         vp = VARDATA_ANY(vlena);
     431      217660 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     432             :         {
     433      217276 :             if (*vp == '\\')
     434             :             {
     435           0 :                 *rp++ = '\\';
     436           0 :                 *rp++ = '\\';
     437             :             }
     438      217276 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     439         498 :             {
     440             :                 int         val;    /* holds unprintable chars */
     441             : 
     442         498 :                 val = *vp;
     443         498 :                 rp[0] = '\\';
     444         498 :                 rp[3] = DIG(val & 07);
     445         498 :                 val >>= 3;
     446         498 :                 rp[2] = DIG(val & 07);
     447         498 :                 val >>= 3;
     448         498 :                 rp[1] = DIG(val & 03);
     449         498 :                 rp += 4;
     450             :             }
     451             :             else
     452      216778 :                 *rp++ = *vp;
     453             :         }
     454             :     }
     455             :     else
     456             :     {
     457           0 :         elog(ERROR, "unrecognized bytea_output setting: %d",
     458             :              bytea_output);
     459             :         rp = result = NULL;     /* keep compiler quiet */
     460             :     }
     461       13432 :     *rp = '\0';
     462       13432 :     PG_RETURN_CSTRING(result);
     463             : }
     464             : 
     465             : /*
     466             :  *      bytearecv           - converts external binary format to bytea
     467             :  */
     468             : Datum
     469        1038 : bytearecv(PG_FUNCTION_ARGS)
     470             : {
     471        1038 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     472             :     bytea      *result;
     473             :     int         nbytes;
     474             : 
     475        1038 :     nbytes = buf->len - buf->cursor;
     476        1038 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
     477        1038 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
     478        1038 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
     479        1038 :     PG_RETURN_BYTEA_P(result);
     480             : }
     481             : 
     482             : /*
     483             :  *      byteasend           - converts bytea to binary format
     484             :  *
     485             :  * This is a special case: just copy the input...
     486             :  */
     487             : Datum
     488        5604 : byteasend(PG_FUNCTION_ARGS)
     489             : {
     490        5604 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
     491             : 
     492        5604 :     PG_RETURN_BYTEA_P(vlena);
     493             : }
     494             : 
     495             : Datum
     496       92774 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
     497             : {
     498             :     StringInfo  state;
     499             : 
     500       92774 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     501             : 
     502             :     /* Append the value unless null, preceding it with the delimiter. */
     503       92774 :     if (!PG_ARGISNULL(1))
     504             :     {
     505       77774 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
     506       77774 :         bool        isfirst = false;
     507             : 
     508             :         /*
     509             :          * You might think we can just throw away the first delimiter, however
     510             :          * we must keep it as we may be a parallel worker doing partial
     511             :          * aggregation building a state to send to the main process.  We need
     512             :          * to keep the delimiter of every aggregation so that the combine
     513             :          * function can properly join up the strings of two separately
     514             :          * partially aggregated results.  The first delimiter is only stripped
     515             :          * off in the final function.  To know how much to strip off the front
     516             :          * of the string, we store the length of the first delimiter in the
     517             :          * StringInfo's cursor field, which we don't otherwise need here.
     518             :          */
     519       77774 :         if (state == NULL)
     520             :         {
     521         166 :             state = makeStringAggState(fcinfo);
     522         166 :             isfirst = true;
     523             :         }
     524             : 
     525       77774 :         if (!PG_ARGISNULL(2))
     526             :         {
     527       77762 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
     528             : 
     529       77762 :             appendBinaryStringInfo(state, VARDATA_ANY(delim),
     530       77762 :                                    VARSIZE_ANY_EXHDR(delim));
     531       77762 :             if (isfirst)
     532         160 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
     533             :         }
     534             : 
     535       77774 :         appendBinaryStringInfo(state, VARDATA_ANY(value),
     536       77774 :                                VARSIZE_ANY_EXHDR(value));
     537             :     }
     538             : 
     539             :     /*
     540             :      * The transition type for string_agg() is declared to be "internal",
     541             :      * which is a pass-by-value type the same size as a pointer.
     542             :      */
     543       92774 :     if (state)
     544       92732 :         PG_RETURN_POINTER(state);
     545          42 :     PG_RETURN_NULL();
     546             : }
     547             : 
     548             : Datum
     549         152 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
     550             : {
     551             :     StringInfo  state;
     552             : 
     553             :     /* cannot be called directly because of internal-type argument */
     554             :     Assert(AggCheckCallContext(fcinfo, NULL));
     555             : 
     556         152 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     557             : 
     558         152 :     if (state != NULL)
     559             :     {
     560             :         /* As per comment in transfn, strip data before the cursor position */
     561             :         bytea      *result;
     562         146 :         int         strippedlen = state->len - state->cursor;
     563             : 
     564         146 :         result = (bytea *) palloc(strippedlen + VARHDRSZ);
     565         146 :         SET_VARSIZE(result, strippedlen + VARHDRSZ);
     566         146 :         memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
     567         146 :         PG_RETURN_BYTEA_P(result);
     568             :     }
     569             :     else
     570           6 :         PG_RETURN_NULL();
     571             : }
     572             : 
     573             : /*
     574             :  *      textin          - converts "..." to internal representation
     575             :  */
     576             : Datum
     577    18095270 : textin(PG_FUNCTION_ARGS)
     578             : {
     579    18095270 :     char       *inputText = PG_GETARG_CSTRING(0);
     580             : 
     581    18095270 :     PG_RETURN_TEXT_P(cstring_to_text(inputText));
     582             : }
     583             : 
     584             : /*
     585             :  *      textout         - converts internal representation to "..."
     586             :  */
     587             : Datum
     588     8300560 : textout(PG_FUNCTION_ARGS)
     589             : {
     590     8300560 :     Datum       txt = PG_GETARG_DATUM(0);
     591             : 
     592     8300560 :     PG_RETURN_CSTRING(TextDatumGetCString(txt));
     593             : }
     594             : 
     595             : /*
     596             :  *      textrecv            - converts external binary format to text
     597             :  */
     598             : Datum
     599      106720 : textrecv(PG_FUNCTION_ARGS)
     600             : {
     601      106720 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     602             :     text       *result;
     603             :     char       *str;
     604             :     int         nbytes;
     605             : 
     606      106720 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     607             : 
     608      106720 :     result = cstring_to_text_with_len(str, nbytes);
     609      106720 :     pfree(str);
     610      106720 :     PG_RETURN_TEXT_P(result);
     611             : }
     612             : 
     613             : /*
     614             :  *      textsend            - converts text to binary format
     615             :  */
     616             : Datum
     617       68228 : textsend(PG_FUNCTION_ARGS)
     618             : {
     619       68228 :     text       *t = PG_GETARG_TEXT_PP(0);
     620             :     StringInfoData buf;
     621             : 
     622       68228 :     pq_begintypsend(&buf);
     623       68228 :     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
     624       68228 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     625             : }
     626             : 
     627             : 
     628             : /*
     629             :  *      unknownin           - converts "..." to internal representation
     630             :  */
     631             : Datum
     632           0 : unknownin(PG_FUNCTION_ARGS)
     633             : {
     634           0 :     char       *str = PG_GETARG_CSTRING(0);
     635             : 
     636             :     /* representation is same as cstring */
     637           0 :     PG_RETURN_CSTRING(pstrdup(str));
     638             : }
     639             : 
     640             : /*
     641             :  *      unknownout          - converts internal representation to "..."
     642             :  */
     643             : Datum
     644         682 : unknownout(PG_FUNCTION_ARGS)
     645             : {
     646             :     /* representation is same as cstring */
     647         682 :     char       *str = PG_GETARG_CSTRING(0);
     648             : 
     649         682 :     PG_RETURN_CSTRING(pstrdup(str));
     650             : }
     651             : 
     652             : /*
     653             :  *      unknownrecv         - converts external binary format to unknown
     654             :  */
     655             : Datum
     656           0 : unknownrecv(PG_FUNCTION_ARGS)
     657             : {
     658           0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     659             :     char       *str;
     660             :     int         nbytes;
     661             : 
     662           0 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     663             :     /* representation is same as cstring */
     664           0 :     PG_RETURN_CSTRING(str);
     665             : }
     666             : 
     667             : /*
     668             :  *      unknownsend         - converts unknown to binary format
     669             :  */
     670             : Datum
     671           0 : unknownsend(PG_FUNCTION_ARGS)
     672             : {
     673             :     /* representation is same as cstring */
     674           0 :     char       *str = PG_GETARG_CSTRING(0);
     675             :     StringInfoData buf;
     676             : 
     677           0 :     pq_begintypsend(&buf);
     678           0 :     pq_sendtext(&buf, str, strlen(str));
     679           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     680             : }
     681             : 
     682             : 
     683             : /* ========== PUBLIC ROUTINES ========== */
     684             : 
     685             : /*
     686             :  * textlen -
     687             :  *    returns the logical length of a text*
     688             :  *     (which is less than the VARSIZE of the text*)
     689             :  */
     690             : Datum
     691      430540 : textlen(PG_FUNCTION_ARGS)
     692             : {
     693      430540 :     Datum       str = PG_GETARG_DATUM(0);
     694             : 
     695             :     /* try to avoid decompressing argument */
     696      430540 :     PG_RETURN_INT32(text_length(str));
     697             : }
     698             : 
     699             : /*
     700             :  * text_length -
     701             :  *  Does the real work for textlen()
     702             :  *
     703             :  *  This is broken out so it can be called directly by other string processing
     704             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     705             :  *  it may still be in compressed form.  We can avoid decompressing it at all
     706             :  *  in some cases.
     707             :  */
     708             : static int32
     709      430552 : text_length(Datum str)
     710             : {
     711             :     /* fastpath when max encoding length is one */
     712      430552 :     if (pg_database_encoding_max_length() == 1)
     713          32 :         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     714             :     else
     715             :     {
     716      430520 :         text       *t = DatumGetTextPP(str);
     717             : 
     718      430520 :         PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
     719             :                                              VARSIZE_ANY_EXHDR(t)));
     720             :     }
     721             : }
     722             : 
     723             : /*
     724             :  * textoctetlen -
     725             :  *    returns the physical length of a text*
     726             :  *     (which is less than the VARSIZE of the text*)
     727             :  */
     728             : Datum
     729          70 : textoctetlen(PG_FUNCTION_ARGS)
     730             : {
     731          70 :     Datum       str = PG_GETARG_DATUM(0);
     732             : 
     733             :     /* We need not detoast the input at all */
     734          70 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     735             : }
     736             : 
     737             : /*
     738             :  * textcat -
     739             :  *    takes two text* and returns a text* that is the concatenation of
     740             :  *    the two.
     741             :  *
     742             :  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
     743             :  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
     744             :  * Allocate space for output in all cases.
     745             :  * XXX - thomas 1997-07-10
     746             :  */
     747             : Datum
     748     2664476 : textcat(PG_FUNCTION_ARGS)
     749             : {
     750     2664476 :     text       *t1 = PG_GETARG_TEXT_PP(0);
     751     2664476 :     text       *t2 = PG_GETARG_TEXT_PP(1);
     752             : 
     753     2664476 :     PG_RETURN_TEXT_P(text_catenate(t1, t2));
     754             : }
     755             : 
     756             : /*
     757             :  * text_catenate
     758             :  *  Guts of textcat(), broken out so it can be used by other functions
     759             :  *
     760             :  * Arguments can be in short-header form, but not compressed or out-of-line
     761             :  */
     762             : static text *
     763     2664556 : text_catenate(text *t1, text *t2)
     764             : {
     765             :     text       *result;
     766             :     int         len1,
     767             :                 len2,
     768             :                 len;
     769             :     char       *ptr;
     770             : 
     771     2664556 :     len1 = VARSIZE_ANY_EXHDR(t1);
     772     2664556 :     len2 = VARSIZE_ANY_EXHDR(t2);
     773             : 
     774             :     /* paranoia ... probably should throw error instead? */
     775     2664556 :     if (len1 < 0)
     776           0 :         len1 = 0;
     777     2664556 :     if (len2 < 0)
     778           0 :         len2 = 0;
     779             : 
     780     2664556 :     len = len1 + len2 + VARHDRSZ;
     781     2664556 :     result = (text *) palloc(len);
     782             : 
     783             :     /* Set size of result string... */
     784     2664556 :     SET_VARSIZE(result, len);
     785             : 
     786             :     /* Fill data field of result string... */
     787     2664556 :     ptr = VARDATA(result);
     788     2664556 :     if (len1 > 0)
     789     2661300 :         memcpy(ptr, VARDATA_ANY(t1), len1);
     790     2664556 :     if (len2 > 0)
     791     2664346 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
     792             : 
     793     2664556 :     return result;
     794             : }
     795             : 
     796             : /*
     797             :  * charlen_to_bytelen()
     798             :  *  Compute the number of bytes occupied by n characters starting at *p
     799             :  *
     800             :  * It is caller's responsibility that there actually are n characters;
     801             :  * the string need not be null-terminated.
     802             :  */
     803             : static int
     804       10920 : charlen_to_bytelen(const char *p, int n)
     805             : {
     806       10920 :     if (pg_database_encoding_max_length() == 1)
     807             :     {
     808             :         /* Optimization for single-byte encodings */
     809         144 :         return n;
     810             :     }
     811             :     else
     812             :     {
     813             :         const char *s;
     814             : 
     815     5906696 :         for (s = p; n > 0; n--)
     816     5895920 :             s += pg_mblen(s);
     817             : 
     818       10776 :         return s - p;
     819             :     }
     820             : }
     821             : 
     822             : /*
     823             :  * text_substr()
     824             :  * Return a substring starting at the specified position.
     825             :  * - thomas 1997-12-31
     826             :  *
     827             :  * Input:
     828             :  *  - string
     829             :  *  - starting position (is one-based)
     830             :  *  - string length
     831             :  *
     832             :  * If the starting position is zero or less, then return from the start of the string
     833             :  *  adjusting the length to be consistent with the "negative start" per SQL.
     834             :  * If the length is less than zero, return the remaining string.
     835             :  *
     836             :  * Added multibyte support.
     837             :  * - Tatsuo Ishii 1998-4-21
     838             :  * Changed behavior if starting position is less than one to conform to SQL behavior.
     839             :  * Formerly returned the entire string; now returns a portion.
     840             :  * - Thomas Lockhart 1998-12-10
     841             :  * Now uses faster TOAST-slicing interface
     842             :  * - John Gray 2002-02-22
     843             :  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
     844             :  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
     845             :  * error; if E < 1, return '', not entire string). Fixed MB related bug when
     846             :  * S > LC and < LC + 4 sometimes garbage characters are returned.
     847             :  * - Joe Conway 2002-08-10
     848             :  */
     849             : Datum
     850      749534 : text_substr(PG_FUNCTION_ARGS)
     851             : {
     852      749534 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     853             :                                     PG_GETARG_INT32(1),
     854             :                                     PG_GETARG_INT32(2),
     855             :                                     false));
     856             : }
     857             : 
     858             : /*
     859             :  * text_substr_no_len -
     860             :  *    Wrapper to avoid opr_sanity failure due to
     861             :  *    one function accepting a different number of args.
     862             :  */
     863             : Datum
     864          36 : text_substr_no_len(PG_FUNCTION_ARGS)
     865             : {
     866          36 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     867             :                                     PG_GETARG_INT32(1),
     868             :                                     -1, true));
     869             : }
     870             : 
     871             : /*
     872             :  * text_substring -
     873             :  *  Does the real work for text_substr() and text_substr_no_len()
     874             :  *
     875             :  *  This is broken out so it can be called directly by other string processing
     876             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     877             :  *  it may still be in compressed/toasted form.  We can avoid detoasting all
     878             :  *  of it in some cases.
     879             :  *
     880             :  *  The result is always a freshly palloc'd datum.
     881             :  */
     882             : static text *
     883      789418 : text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
     884             : {
     885      789418 :     int32       eml = pg_database_encoding_max_length();
     886      789418 :     int32       S = start;      /* start position */
     887             :     int32       S1;             /* adjusted start position */
     888             :     int32       L1;             /* adjusted substring length */
     889             :     int32       E;              /* end position */
     890             : 
     891             :     /*
     892             :      * SQL99 says S can be zero or negative, but we still must fetch from the
     893             :      * start of the string.
     894             :      */
     895      789418 :     S1 = Max(S, 1);
     896             : 
     897             :     /* life is easy if the encoding max length is 1 */
     898      789418 :     if (eml == 1)
     899             :     {
     900          18 :         if (length_not_specified)   /* special case - get length to end of
     901             :                                      * string */
     902           0 :             L1 = -1;
     903          18 :         else if (length < 0)
     904             :         {
     905             :             /* SQL99 says to throw an error for E < S, i.e., negative length */
     906           0 :             ereport(ERROR,
     907             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
     908             :                      errmsg("negative substring length not allowed")));
     909             :             L1 = -1;            /* silence stupider compilers */
     910             :         }
     911          18 :         else if (pg_add_s32_overflow(S, length, &E))
     912             :         {
     913             :             /*
     914             :              * L could be large enough for S + L to overflow, in which case
     915             :              * the substring must run to end of string.
     916             :              */
     917           0 :             L1 = -1;
     918             :         }
     919             :         else
     920             :         {
     921             :             /*
     922             :              * A zero or negative value for the end position can happen if the
     923             :              * start was negative or one. SQL99 says to return a zero-length
     924             :              * string.
     925             :              */
     926          18 :             if (E < 1)
     927           0 :                 return cstring_to_text("");
     928             : 
     929          18 :             L1 = E - S1;
     930             :         }
     931             : 
     932             :         /*
     933             :          * If the start position is past the end of the string, SQL99 says to
     934             :          * return a zero-length string -- DatumGetTextPSlice() will do that
     935             :          * for us.  We need only convert S1 to zero-based starting position.
     936             :          */
     937          18 :         return DatumGetTextPSlice(str, S1 - 1, L1);
     938             :     }
     939      789400 :     else if (eml > 1)
     940             :     {
     941             :         /*
     942             :          * When encoding max length is > 1, we can't get LC without
     943             :          * detoasting, so we'll grab a conservatively large slice now and go
     944             :          * back later to do the right thing
     945             :          */
     946             :         int32       slice_start;
     947             :         int32       slice_size;
     948             :         int32       slice_strlen;
     949             :         text       *slice;
     950             :         int32       E1;
     951             :         int32       i;
     952             :         char       *p;
     953             :         char       *s;
     954             :         text       *ret;
     955             : 
     956             :         /*
     957             :          * We need to start at position zero because there is no way to know
     958             :          * in advance which byte offset corresponds to the supplied start
     959             :          * position.
     960             :          */
     961      789400 :         slice_start = 0;
     962             : 
     963      789400 :         if (length_not_specified)   /* special case - get length to end of
     964             :                                      * string */
     965          76 :             slice_size = L1 = -1;
     966      789324 :         else if (length < 0)
     967             :         {
     968             :             /* SQL99 says to throw an error for E < S, i.e., negative length */
     969          12 :             ereport(ERROR,
     970             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
     971             :                      errmsg("negative substring length not allowed")));
     972             :             slice_size = L1 = -1;   /* silence stupider compilers */
     973             :         }
     974      789312 :         else if (pg_add_s32_overflow(S, length, &E))
     975             :         {
     976             :             /*
     977             :              * L could be large enough for S + L to overflow, in which case
     978             :              * the substring must run to end of string.
     979             :              */
     980           6 :             slice_size = L1 = -1;
     981             :         }
     982             :         else
     983             :         {
     984             :             /*
     985             :              * A zero or negative value for the end position can happen if the
     986             :              * start was negative or one. SQL99 says to return a zero-length
     987             :              * string.
     988             :              */
     989      789306 :             if (E < 1)
     990           0 :                 return cstring_to_text("");
     991             : 
     992             :             /*
     993             :              * if E is past the end of the string, the tuple toaster will
     994             :              * truncate the length for us
     995             :              */
     996      789306 :             L1 = E - S1;
     997             : 
     998             :             /*
     999             :              * Total slice size in bytes can't be any longer than the start
    1000             :              * position plus substring length times the encoding max length.
    1001             :              * If that overflows, we can just use -1.
    1002             :              */
    1003      789306 :             if (pg_mul_s32_overflow(E, eml, &slice_size))
    1004           6 :                 slice_size = -1;
    1005             :         }
    1006             : 
    1007             :         /*
    1008             :          * If we're working with an untoasted source, no need to do an extra
    1009             :          * copying step.
    1010             :          */
    1011      789388 :         if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
    1012      789334 :             VARATT_IS_EXTERNAL(DatumGetPointer(str)))
    1013         324 :             slice = DatumGetTextPSlice(str, slice_start, slice_size);
    1014             :         else
    1015      789064 :             slice = (text *) DatumGetPointer(str);
    1016             : 
    1017             :         /* see if we got back an empty string */
    1018      789388 :         if (VARSIZE_ANY_EXHDR(slice) == 0)
    1019             :         {
    1020           0 :             if (slice != (text *) DatumGetPointer(str))
    1021           0 :                 pfree(slice);
    1022           0 :             return cstring_to_text("");
    1023             :         }
    1024             : 
    1025             :         /* Now we can get the actual length of the slice in MB characters */
    1026      789388 :         slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
    1027      789388 :                                             VARSIZE_ANY_EXHDR(slice));
    1028             : 
    1029             :         /*
    1030             :          * Check that the start position wasn't > slice_strlen. If so, SQL99
    1031             :          * says to return a zero-length string.
    1032             :          */
    1033      789388 :         if (S1 > slice_strlen)
    1034             :         {
    1035          22 :             if (slice != (text *) DatumGetPointer(str))
    1036           0 :                 pfree(slice);
    1037          22 :             return cstring_to_text("");
    1038             :         }
    1039             : 
    1040             :         /*
    1041             :          * Adjust L1 and E1 now that we know the slice string length. Again
    1042             :          * remember that S1 is one based, and slice_start is zero based.
    1043             :          */
    1044      789366 :         if (L1 > -1)
    1045      789306 :             E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
    1046             :         else
    1047          60 :             E1 = slice_start + 1 + slice_strlen;
    1048             : 
    1049             :         /*
    1050             :          * Find the start position in the slice; remember S1 is not zero based
    1051             :          */
    1052      789366 :         p = VARDATA_ANY(slice);
    1053     5610148 :         for (i = 0; i < S1 - 1; i++)
    1054     4820782 :             p += pg_mblen(p);
    1055             : 
    1056             :         /* hang onto a pointer to our start position */
    1057      789366 :         s = p;
    1058             : 
    1059             :         /*
    1060             :          * Count the actual bytes used by the substring of the requested
    1061             :          * length.
    1062             :          */
    1063     9707316 :         for (i = S1; i < E1; i++)
    1064     8917950 :             p += pg_mblen(p);
    1065             : 
    1066      789366 :         ret = (text *) palloc(VARHDRSZ + (p - s));
    1067      789366 :         SET_VARSIZE(ret, VARHDRSZ + (p - s));
    1068      789366 :         memcpy(VARDATA(ret), s, (p - s));
    1069             : 
    1070      789366 :         if (slice != (text *) DatumGetPointer(str))
    1071         324 :             pfree(slice);
    1072             : 
    1073      789366 :         return ret;
    1074             :     }
    1075             :     else
    1076           0 :         elog(ERROR, "invalid backend encoding: encoding max length < 1");
    1077             : 
    1078             :     /* not reached: suppress compiler warning */
    1079             :     return NULL;
    1080             : }
    1081             : 
    1082             : /*
    1083             :  * textoverlay
    1084             :  *  Replace specified substring of first string with second
    1085             :  *
    1086             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    1087             :  * This code is a direct implementation of what the standard says.
    1088             :  */
    1089             : Datum
    1090          28 : textoverlay(PG_FUNCTION_ARGS)
    1091             : {
    1092          28 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1093          28 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1094          28 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1095          28 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    1096             : 
    1097          28 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1098             : }
    1099             : 
    1100             : Datum
    1101          12 : textoverlay_no_len(PG_FUNCTION_ARGS)
    1102             : {
    1103          12 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1104          12 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1105          12 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1106             :     int         sl;
    1107             : 
    1108          12 :     sl = text_length(PointerGetDatum(t2));  /* defaults to length(t2) */
    1109          12 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1110             : }
    1111             : 
    1112             : static text *
    1113          40 : text_overlay(text *t1, text *t2, int sp, int sl)
    1114             : {
    1115             :     text       *result;
    1116             :     text       *s1;
    1117             :     text       *s2;
    1118             :     int         sp_pl_sl;
    1119             : 
    1120             :     /*
    1121             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    1122             :      * "substring length" error because that's what should be expected
    1123             :      * according to the spec's definition of OVERLAY().
    1124             :      */
    1125          40 :     if (sp <= 0)
    1126           0 :         ereport(ERROR,
    1127             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    1128             :                  errmsg("negative substring length not allowed")));
    1129          40 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    1130           0 :         ereport(ERROR,
    1131             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1132             :                  errmsg("integer out of range")));
    1133             : 
    1134          40 :     s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
    1135          40 :     s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    1136          40 :     result = text_catenate(s1, t2);
    1137          40 :     result = text_catenate(result, s2);
    1138             : 
    1139          40 :     return result;
    1140             : }
    1141             : 
    1142             : /*
    1143             :  * textpos -
    1144             :  *    Return the position of the specified substring.
    1145             :  *    Implements the SQL POSITION() function.
    1146             :  *    Ref: A Guide To The SQL Standard, Date & Darwen, 1997
    1147             :  * - thomas 1997-07-27
    1148             :  */
    1149             : Datum
    1150         106 : textpos(PG_FUNCTION_ARGS)
    1151             : {
    1152         106 :     text       *str = PG_GETARG_TEXT_PP(0);
    1153         106 :     text       *search_str = PG_GETARG_TEXT_PP(1);
    1154             : 
    1155         106 :     PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
    1156             : }
    1157             : 
    1158             : /*
    1159             :  * text_position -
    1160             :  *  Does the real work for textpos()
    1161             :  *
    1162             :  * Inputs:
    1163             :  *      t1 - string to be searched
    1164             :  *      t2 - pattern to match within t1
    1165             :  * Result:
    1166             :  *      Character index of the first matched char, starting from 1,
    1167             :  *      or 0 if no match.
    1168             :  *
    1169             :  *  This is broken out so it can be called directly by other string processing
    1170             :  *  functions.
    1171             :  */
    1172             : static int
    1173         106 : text_position(text *t1, text *t2, Oid collid)
    1174             : {
    1175             :     TextPositionState state;
    1176             :     int         result;
    1177             : 
    1178             :     /* Empty needle always matches at position 1 */
    1179         106 :     if (VARSIZE_ANY_EXHDR(t2) < 1)
    1180          12 :         return 1;
    1181             : 
    1182             :     /* Otherwise, can't match if haystack is shorter than needle */
    1183          94 :     if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2))
    1184          22 :         return 0;
    1185             : 
    1186          72 :     text_position_setup(t1, t2, collid, &state);
    1187          72 :     if (!text_position_next(&state))
    1188          24 :         result = 0;
    1189             :     else
    1190          48 :         result = text_position_get_match_pos(&state);
    1191          72 :     text_position_cleanup(&state);
    1192          72 :     return result;
    1193             : }
    1194             : 
    1195             : 
    1196             : /*
    1197             :  * text_position_setup, text_position_next, text_position_cleanup -
    1198             :  *  Component steps of text_position()
    1199             :  *
    1200             :  * These are broken out so that a string can be efficiently searched for
    1201             :  * multiple occurrences of the same pattern.  text_position_next may be
    1202             :  * called multiple times, and it advances to the next match on each call.
    1203             :  * text_position_get_match_ptr() and text_position_get_match_pos() return
    1204             :  * a pointer or 1-based character position of the last match, respectively.
    1205             :  *
    1206             :  * The "state" variable is normally just a local variable in the caller.
    1207             :  *
    1208             :  * NOTE: text_position_next skips over the matched portion.  For example,
    1209             :  * searching for "xx" in "xxx" returns only one match, not two.
    1210             :  */
    1211             : 
    1212             : static void
    1213        2800 : text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
    1214             : {
    1215        2800 :     int         len1 = VARSIZE_ANY_EXHDR(t1);
    1216        2800 :     int         len2 = VARSIZE_ANY_EXHDR(t2);
    1217        2800 :     pg_locale_t mylocale = 0;
    1218             : 
    1219        2800 :     check_collation_set(collid);
    1220             : 
    1221        2800 :     if (!lc_collate_is_c(collid))
    1222         280 :         mylocale = pg_newlocale_from_collation(collid);
    1223             : 
    1224        2800 :     if (!pg_locale_deterministic(mylocale))
    1225          12 :         ereport(ERROR,
    1226             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1227             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1228             : 
    1229             :     Assert(len1 > 0);
    1230             :     Assert(len2 > 0);
    1231             : 
    1232             :     /*
    1233             :      * Even with a multi-byte encoding, we perform the search using the raw
    1234             :      * byte sequence, ignoring multibyte issues.  For UTF-8, that works fine,
    1235             :      * because in UTF-8 the byte sequence of one character cannot contain
    1236             :      * another character.  For other multi-byte encodings, we do the search
    1237             :      * initially as a simple byte search, ignoring multibyte issues, but
    1238             :      * verify afterwards that the match we found is at a character boundary,
    1239             :      * and continue the search if it was a false match.
    1240             :      */
    1241        2788 :     if (pg_database_encoding_max_length() == 1)
    1242          86 :         state->is_multibyte_char_in_char = false;
    1243        2702 :     else if (GetDatabaseEncoding() == PG_UTF8)
    1244        2702 :         state->is_multibyte_char_in_char = false;
    1245             :     else
    1246           0 :         state->is_multibyte_char_in_char = true;
    1247             : 
    1248        2788 :     state->str1 = VARDATA_ANY(t1);
    1249        2788 :     state->str2 = VARDATA_ANY(t2);
    1250        2788 :     state->len1 = len1;
    1251        2788 :     state->len2 = len2;
    1252        2788 :     state->last_match = NULL;
    1253        2788 :     state->refpoint = state->str1;
    1254        2788 :     state->refpos = 0;
    1255             : 
    1256             :     /*
    1257             :      * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
    1258             :      * notes we use the terminology that the "haystack" is the string to be
    1259             :      * searched (t1) and the "needle" is the pattern being sought (t2).
    1260             :      *
    1261             :      * If the needle is empty or bigger than the haystack then there is no
    1262             :      * point in wasting cycles initializing the table.  We also choose not to
    1263             :      * use B-M-H for needles of length 1, since the skip table can't possibly
    1264             :      * save anything in that case.
    1265             :      */
    1266        2788 :     if (len1 >= len2 && len2 > 1)
    1267             :     {
    1268        2580 :         int         searchlength = len1 - len2;
    1269             :         int         skiptablemask;
    1270             :         int         last;
    1271             :         int         i;
    1272        2580 :         const char *str2 = state->str2;
    1273             : 
    1274             :         /*
    1275             :          * First we must determine how much of the skip table to use.  The
    1276             :          * declaration of TextPositionState allows up to 256 elements, but for
    1277             :          * short search problems we don't really want to have to initialize so
    1278             :          * many elements --- it would take too long in comparison to the
    1279             :          * actual search time.  So we choose a useful skip table size based on
    1280             :          * the haystack length minus the needle length.  The closer the needle
    1281             :          * length is to the haystack length the less useful skipping becomes.
    1282             :          *
    1283             :          * Note: since we use bit-masking to select table elements, the skip
    1284             :          * table size MUST be a power of 2, and so the mask must be 2^N-1.
    1285             :          */
    1286        2580 :         if (searchlength < 16)
    1287          54 :             skiptablemask = 3;
    1288        2526 :         else if (searchlength < 64)
    1289          16 :             skiptablemask = 7;
    1290        2510 :         else if (searchlength < 128)
    1291          14 :             skiptablemask = 15;
    1292        2496 :         else if (searchlength < 512)
    1293         190 :             skiptablemask = 31;
    1294        2306 :         else if (searchlength < 2048)
    1295        2178 :             skiptablemask = 63;
    1296         128 :         else if (searchlength < 4096)
    1297          58 :             skiptablemask = 127;
    1298             :         else
    1299          70 :             skiptablemask = 255;
    1300        2580 :         state->skiptablemask = skiptablemask;
    1301             : 
    1302             :         /*
    1303             :          * Initialize the skip table.  We set all elements to the needle
    1304             :          * length, since this is the correct skip distance for any character
    1305             :          * not found in the needle.
    1306             :          */
    1307      173964 :         for (i = 0; i <= skiptablemask; i++)
    1308      171384 :             state->skiptable[i] = len2;
    1309             : 
    1310             :         /*
    1311             :          * Now examine the needle.  For each character except the last one,
    1312             :          * set the corresponding table element to the appropriate skip
    1313             :          * distance.  Note that when two characters share the same skip table
    1314             :          * entry, the one later in the needle must determine the skip
    1315             :          * distance.
    1316             :          */
    1317        2580 :         last = len2 - 1;
    1318             : 
    1319       32412 :         for (i = 0; i < last; i++)
    1320       29832 :             state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
    1321             :     }
    1322        2788 : }
    1323             : 
    1324             : /*
    1325             :  * Advance to the next match, starting from the end of the previous match
    1326             :  * (or the beginning of the string, on first call).  Returns true if a match
    1327             :  * is found.
    1328             :  *
    1329             :  * Note that this refuses to match an empty-string needle.  Most callers
    1330             :  * will have handled that case specially and we'll never see it here.
    1331             :  */
    1332             : static bool
    1333        9598 : text_position_next(TextPositionState *state)
    1334             : {
    1335        9598 :     int         needle_len = state->len2;
    1336             :     char       *start_ptr;
    1337             :     char       *matchptr;
    1338             : 
    1339        9598 :     if (needle_len <= 0)
    1340           0 :         return false;           /* result for empty pattern */
    1341             : 
    1342             :     /* Start from the point right after the previous match. */
    1343        9598 :     if (state->last_match)
    1344        6798 :         start_ptr = state->last_match + needle_len;
    1345             :     else
    1346        2800 :         start_ptr = state->str1;
    1347             : 
    1348        9598 : retry:
    1349        9598 :     matchptr = text_position_next_internal(start_ptr, state);
    1350             : 
    1351        9598 :     if (!matchptr)
    1352        2728 :         return false;
    1353             : 
    1354             :     /*
    1355             :      * Found a match for the byte sequence.  If this is a multibyte encoding,
    1356             :      * where one character's byte sequence can appear inside a longer
    1357             :      * multi-byte character, we need to verify that the match was at a
    1358             :      * character boundary, not in the middle of a multi-byte character.
    1359             :      */
    1360        6870 :     if (state->is_multibyte_char_in_char)
    1361             :     {
    1362             :         /* Walk one character at a time, until we reach the match. */
    1363             : 
    1364             :         /* the search should never move backwards. */
    1365             :         Assert(state->refpoint <= matchptr);
    1366             : 
    1367           0 :         while (state->refpoint < matchptr)
    1368             :         {
    1369             :             /* step to next character. */
    1370           0 :             state->refpoint += pg_mblen(state->refpoint);
    1371           0 :             state->refpos++;
    1372             : 
    1373             :             /*
    1374             :              * If we stepped over the match's start position, then it was a
    1375             :              * false positive, where the byte sequence appeared in the middle
    1376             :              * of a multi-byte character.  Skip it, and continue the search at
    1377             :              * the next character boundary.
    1378             :              */
    1379           0 :             if (state->refpoint > matchptr)
    1380             :             {
    1381           0 :                 start_ptr = state->refpoint;
    1382           0 :                 goto retry;
    1383             :             }
    1384             :         }
    1385             :     }
    1386             : 
    1387        6870 :     state->last_match = matchptr;
    1388        6870 :     return true;
    1389             : }
    1390             : 
    1391             : /*
    1392             :  * Subroutine of text_position_next().  This searches for the raw byte
    1393             :  * sequence, ignoring any multi-byte encoding issues.  Returns the first
    1394             :  * match starting at 'start_ptr', or NULL if no match is found.
    1395             :  */
    1396             : static char *
    1397        9598 : text_position_next_internal(char *start_ptr, TextPositionState *state)
    1398             : {
    1399        9598 :     int         haystack_len = state->len1;
    1400        9598 :     int         needle_len = state->len2;
    1401        9598 :     int         skiptablemask = state->skiptablemask;
    1402        9598 :     const char *haystack = state->str1;
    1403        9598 :     const char *needle = state->str2;
    1404        9598 :     const char *haystack_end = &haystack[haystack_len];
    1405             :     const char *hptr;
    1406             : 
    1407             :     Assert(start_ptr >= haystack && start_ptr <= haystack_end);
    1408             : 
    1409        9598 :     if (needle_len == 1)
    1410             :     {
    1411             :         /* No point in using B-M-H for a one-character needle */
    1412         754 :         char        nchar = *needle;
    1413             : 
    1414         754 :         hptr = start_ptr;
    1415        5758 :         while (hptr < haystack_end)
    1416             :         {
    1417        5592 :             if (*hptr == nchar)
    1418         588 :                 return (char *) hptr;
    1419        5004 :             hptr++;
    1420             :         }
    1421             :     }
    1422             :     else
    1423             :     {
    1424        8844 :         const char *needle_last = &needle[needle_len - 1];
    1425             : 
    1426             :         /* Start at startpos plus the length of the needle */
    1427        8844 :         hptr = start_ptr + needle_len - 1;
    1428      232916 :         while (hptr < haystack_end)
    1429             :         {
    1430             :             /* Match the needle scanning *backward* */
    1431             :             const char *nptr;
    1432             :             const char *p;
    1433             : 
    1434      230354 :             nptr = needle_last;
    1435      230354 :             p = hptr;
    1436      320786 :             while (*nptr == *p)
    1437             :             {
    1438             :                 /* Matched it all?  If so, return 1-based position */
    1439       96714 :                 if (nptr == needle)
    1440        6282 :                     return (char *) p;
    1441       90432 :                 nptr--, p--;
    1442             :             }
    1443             : 
    1444             :             /*
    1445             :              * No match, so use the haystack char at hptr to decide how far to
    1446             :              * advance.  If the needle had any occurrence of that character
    1447             :              * (or more precisely, one sharing the same skiptable entry)
    1448             :              * before its last character, then we advance far enough to align
    1449             :              * the last such needle character with that haystack position.
    1450             :              * Otherwise we can advance by the whole needle length.
    1451             :              */
    1452      224072 :             hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
    1453             :         }
    1454             :     }
    1455             : 
    1456        2728 :     return 0;                   /* not found */
    1457             : }
    1458             : 
    1459             : /*
    1460             :  * Return a pointer to the current match.
    1461             :  *
    1462             :  * The returned pointer points into the original haystack string.
    1463             :  */
    1464             : static char *
    1465        6792 : text_position_get_match_ptr(TextPositionState *state)
    1466             : {
    1467        6792 :     return state->last_match;
    1468             : }
    1469             : 
    1470             : /*
    1471             :  * Return the offset of the current match.
    1472             :  *
    1473             :  * The offset is in characters, 1-based.
    1474             :  */
    1475             : static int
    1476          48 : text_position_get_match_pos(TextPositionState *state)
    1477             : {
    1478             :     /* Convert the byte position to char position. */
    1479          96 :     state->refpos += pg_mbstrlen_with_len(state->refpoint,
    1480          48 :                                           state->last_match - state->refpoint);
    1481          48 :     state->refpoint = state->last_match;
    1482          48 :     return state->refpos + 1;
    1483             : }
    1484             : 
    1485             : /*
    1486             :  * Reset search state to the initial state installed by text_position_setup.
    1487             :  *
    1488             :  * The next call to text_position_next will search from the beginning
    1489             :  * of the string.
    1490             :  */
    1491             : static void
    1492          12 : text_position_reset(TextPositionState *state)
    1493             : {
    1494          12 :     state->last_match = NULL;
    1495          12 :     state->refpoint = state->str1;
    1496          12 :     state->refpos = 0;
    1497          12 : }
    1498             : 
    1499             : static void
    1500        2788 : text_position_cleanup(TextPositionState *state)
    1501             : {
    1502             :     /* no cleanup needed */
    1503        2788 : }
    1504             : 
    1505             : 
    1506             : static void
    1507    16557106 : check_collation_set(Oid collid)
    1508             : {
    1509    16557106 :     if (!OidIsValid(collid))
    1510             :     {
    1511             :         /*
    1512             :          * This typically means that the parser could not resolve a conflict
    1513             :          * of implicit collations, so report it that way.
    1514             :          */
    1515          30 :         ereport(ERROR,
    1516             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1517             :                  errmsg("could not determine which collation to use for string comparison"),
    1518             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
    1519             :     }
    1520    16557076 : }
    1521             : 
    1522             : /* varstr_cmp()
    1523             :  * Comparison function for text strings with given lengths.
    1524             :  * Includes locale support, but must copy strings to temporary memory
    1525             :  *  to allow null-termination for inputs to strcoll().
    1526             :  * Returns an integer less than, equal to, or greater than zero, indicating
    1527             :  * whether arg1 is less than, equal to, or greater than arg2.
    1528             :  *
    1529             :  * Note: many functions that depend on this are marked leakproof; therefore,
    1530             :  * avoid reporting the actual contents of the input when throwing errors.
    1531             :  * All errors herein should be things that can't happen except on corrupt
    1532             :  * data, anyway; otherwise we will have trouble with indexing strings that
    1533             :  * would cause them.
    1534             :  */
    1535             : int
    1536     9273010 : varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
    1537             : {
    1538             :     int         result;
    1539             : 
    1540     9273010 :     check_collation_set(collid);
    1541             : 
    1542             :     /*
    1543             :      * Unfortunately, there is no strncoll(), so in the non-C locale case we
    1544             :      * have to do some memory copying.  This turns out to be significantly
    1545             :      * slower, so we optimize the case where LC_COLLATE is C.  We also try to
    1546             :      * optimize relatively-short strings by avoiding palloc/pfree overhead.
    1547             :      */
    1548     9272992 :     if (lc_collate_is_c(collid))
    1549             :     {
    1550     4921398 :         result = memcmp(arg1, arg2, Min(len1, len2));
    1551     4921398 :         if ((result == 0) && (len1 != len2))
    1552      151984 :             result = (len1 < len2) ? -1 : 1;
    1553             :     }
    1554             :     else
    1555             :     {
    1556             :         pg_locale_t mylocale;
    1557             : 
    1558     4351594 :         mylocale = pg_newlocale_from_collation(collid);
    1559             : 
    1560             :         /*
    1561             :          * memcmp() can't tell us which of two unequal strings sorts first,
    1562             :          * but it's a cheap way to tell if they're equal.  Testing shows that
    1563             :          * memcmp() followed by strcoll() is only trivially slower than
    1564             :          * strcoll() by itself, so we don't lose much if this doesn't work out
    1565             :          * very often, and if it does - for example, because there are many
    1566             :          * equal strings in the input - then we win big by avoiding expensive
    1567             :          * collation-aware comparisons.
    1568             :          */
    1569     4351594 :         if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
    1570     1723046 :             return 0;
    1571             : 
    1572     2628548 :         result = pg_strncoll(arg1, len1, arg2, len2, mylocale);
    1573             : 
    1574             :         /* Break tie if necessary. */
    1575     2628548 :         if (result == 0 && pg_locale_deterministic(mylocale))
    1576             :         {
    1577           0 :             result = memcmp(arg1, arg2, Min(len1, len2));
    1578           0 :             if ((result == 0) && (len1 != len2))
    1579           0 :                 result = (len1 < len2) ? -1 : 1;
    1580             :         }
    1581             :     }
    1582             : 
    1583     7549946 :     return result;
    1584             : }
    1585             : 
    1586             : /* text_cmp()
    1587             :  * Internal comparison function for text strings.
    1588             :  * Returns -1, 0 or 1
    1589             :  */
    1590             : static int
    1591     7628144 : text_cmp(text *arg1, text *arg2, Oid collid)
    1592             : {
    1593             :     char       *a1p,
    1594             :                *a2p;
    1595             :     int         len1,
    1596             :                 len2;
    1597             : 
    1598     7628144 :     a1p = VARDATA_ANY(arg1);
    1599     7628144 :     a2p = VARDATA_ANY(arg2);
    1600             : 
    1601     7628144 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1602     7628144 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1603             : 
    1604     7628144 :     return varstr_cmp(a1p, len1, a2p, len2, collid);
    1605             : }
    1606             : 
    1607             : /*
    1608             :  * Comparison functions for text strings.
    1609             :  *
    1610             :  * Note: btree indexes need these routines not to leak memory; therefore,
    1611             :  * be careful to free working copies of toasted datums.  Most places don't
    1612             :  * need to be so careful.
    1613             :  */
    1614             : 
    1615             : Datum
    1616     6835294 : texteq(PG_FUNCTION_ARGS)
    1617             : {
    1618     6835294 :     Oid         collid = PG_GET_COLLATION();
    1619     6835294 :     bool        locale_is_c = false;
    1620     6835294 :     pg_locale_t mylocale = 0;
    1621             :     bool        result;
    1622             : 
    1623     6835294 :     check_collation_set(collid);
    1624             : 
    1625     6835294 :     if (lc_collate_is_c(collid))
    1626      160932 :         locale_is_c = true;
    1627             :     else
    1628     6674362 :         mylocale = pg_newlocale_from_collation(collid);
    1629             : 
    1630     6835294 :     if (locale_is_c || pg_locale_deterministic(mylocale))
    1631     6834810 :     {
    1632     6834810 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1633     6834810 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1634             :         Size        len1,
    1635             :                     len2;
    1636             : 
    1637             :         /*
    1638             :          * Since we only care about equality or not-equality, we can avoid all
    1639             :          * the expense of strcoll() here, and just do bitwise comparison.  In
    1640             :          * fact, we don't even have to do a bitwise comparison if we can show
    1641             :          * the lengths of the strings are unequal; which might save us from
    1642             :          * having to detoast one or both values.
    1643             :          */
    1644     6834810 :         len1 = toast_raw_datum_size(arg1);
    1645     6834810 :         len2 = toast_raw_datum_size(arg2);
    1646     6834810 :         if (len1 != len2)
    1647     2501730 :             result = false;
    1648             :         else
    1649             :         {
    1650     4333080 :             text       *targ1 = DatumGetTextPP(arg1);
    1651     4333080 :             text       *targ2 = DatumGetTextPP(arg2);
    1652             : 
    1653     4333080 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1654             :                              len1 - VARHDRSZ) == 0);
    1655             : 
    1656     4333080 :             PG_FREE_IF_COPY(targ1, 0);
    1657     4333080 :             PG_FREE_IF_COPY(targ2, 1);
    1658             :         }
    1659             :     }
    1660             :     else
    1661             :     {
    1662         484 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1663         484 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1664             : 
    1665         484 :         result = (text_cmp(arg1, arg2, collid) == 0);
    1666             : 
    1667         484 :         PG_FREE_IF_COPY(arg1, 0);
    1668         484 :         PG_FREE_IF_COPY(arg2, 1);
    1669             :     }
    1670             : 
    1671     6835294 :     PG_RETURN_BOOL(result);
    1672             : }
    1673             : 
    1674             : Datum
    1675       19416 : textne(PG_FUNCTION_ARGS)
    1676             : {
    1677       19416 :     Oid         collid = PG_GET_COLLATION();
    1678       19416 :     bool        locale_is_c = false;
    1679       19416 :     pg_locale_t mylocale = 0;
    1680             :     bool        result;
    1681             : 
    1682       19416 :     check_collation_set(collid);
    1683             : 
    1684       19416 :     if (lc_collate_is_c(collid))
    1685          18 :         locale_is_c = true;
    1686             :     else
    1687       19398 :         mylocale = pg_newlocale_from_collation(collid);
    1688             : 
    1689       19416 :     if (locale_is_c || pg_locale_deterministic(mylocale))
    1690       19392 :     {
    1691       19392 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1692       19392 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1693             :         Size        len1,
    1694             :                     len2;
    1695             : 
    1696             :         /* See comment in texteq() */
    1697       19392 :         len1 = toast_raw_datum_size(arg1);
    1698       19392 :         len2 = toast_raw_datum_size(arg2);
    1699       19392 :         if (len1 != len2)
    1700        1818 :             result = true;
    1701             :         else
    1702             :         {
    1703       17574 :             text       *targ1 = DatumGetTextPP(arg1);
    1704       17574 :             text       *targ2 = DatumGetTextPP(arg2);
    1705             : 
    1706       17574 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1707             :                              len1 - VARHDRSZ) != 0);
    1708             : 
    1709       17574 :             PG_FREE_IF_COPY(targ1, 0);
    1710       17574 :             PG_FREE_IF_COPY(targ2, 1);
    1711             :         }
    1712             :     }
    1713             :     else
    1714             :     {
    1715          24 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1716          24 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1717             : 
    1718          24 :         result = (text_cmp(arg1, arg2, collid) != 0);
    1719             : 
    1720          24 :         PG_FREE_IF_COPY(arg1, 0);
    1721          24 :         PG_FREE_IF_COPY(arg2, 1);
    1722             :     }
    1723             : 
    1724       19416 :     PG_RETURN_BOOL(result);
    1725             : }
    1726             : 
    1727             : Datum
    1728      123412 : text_lt(PG_FUNCTION_ARGS)
    1729             : {
    1730      123412 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1731      123412 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1732             :     bool        result;
    1733             : 
    1734      123412 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
    1735             : 
    1736      123394 :     PG_FREE_IF_COPY(arg1, 0);
    1737      123394 :     PG_FREE_IF_COPY(arg2, 1);
    1738             : 
    1739      123394 :     PG_RETURN_BOOL(result);
    1740             : }
    1741             : 
    1742             : Datum
    1743      324354 : text_le(PG_FUNCTION_ARGS)
    1744             : {
    1745      324354 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1746      324354 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1747             :     bool        result;
    1748             : 
    1749      324354 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
    1750             : 
    1751      324354 :     PG_FREE_IF_COPY(arg1, 0);
    1752      324354 :     PG_FREE_IF_COPY(arg2, 1);
    1753             : 
    1754      324354 :     PG_RETURN_BOOL(result);
    1755             : }
    1756             : 
    1757             : Datum
    1758      113646 : text_gt(PG_FUNCTION_ARGS)
    1759             : {
    1760      113646 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1761      113646 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1762             :     bool        result;
    1763             : 
    1764      113646 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
    1765             : 
    1766      113646 :     PG_FREE_IF_COPY(arg1, 0);
    1767      113646 :     PG_FREE_IF_COPY(arg2, 1);
    1768             : 
    1769      113646 :     PG_RETURN_BOOL(result);
    1770             : }
    1771             : 
    1772             : Datum
    1773      184724 : text_ge(PG_FUNCTION_ARGS)
    1774             : {
    1775      184724 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1776      184724 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1777             :     bool        result;
    1778             : 
    1779      184724 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
    1780             : 
    1781      184724 :     PG_FREE_IF_COPY(arg1, 0);
    1782      184724 :     PG_FREE_IF_COPY(arg2, 1);
    1783             : 
    1784      184724 :     PG_RETURN_BOOL(result);
    1785             : }
    1786             : 
    1787             : Datum
    1788       37914 : text_starts_with(PG_FUNCTION_ARGS)
    1789             : {
    1790       37914 :     Datum       arg1 = PG_GETARG_DATUM(0);
    1791       37914 :     Datum       arg2 = PG_GETARG_DATUM(1);
    1792       37914 :     Oid         collid = PG_GET_COLLATION();
    1793       37914 :     pg_locale_t mylocale = 0;
    1794             :     bool        result;
    1795             :     Size        len1,
    1796             :                 len2;
    1797             : 
    1798       37914 :     check_collation_set(collid);
    1799             : 
    1800       37914 :     if (!lc_collate_is_c(collid))
    1801       37914 :         mylocale = pg_newlocale_from_collation(collid);
    1802             : 
    1803       37914 :     if (!pg_locale_deterministic(mylocale))
    1804           0 :         ereport(ERROR,
    1805             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1806             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1807             : 
    1808       37914 :     len1 = toast_raw_datum_size(arg1);
    1809       37914 :     len2 = toast_raw_datum_size(arg2);
    1810       37914 :     if (len2 > len1)
    1811           0 :         result = false;
    1812             :     else
    1813             :     {
    1814       37914 :         text       *targ1 = text_substring(arg1, 1, len2, false);
    1815       37914 :         text       *targ2 = DatumGetTextPP(arg2);
    1816             : 
    1817       37914 :         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1818       37914 :                          VARSIZE_ANY_EXHDR(targ2)) == 0);
    1819             : 
    1820       37914 :         PG_FREE_IF_COPY(targ1, 0);
    1821       37914 :         PG_FREE_IF_COPY(targ2, 1);
    1822             :     }
    1823             : 
    1824       37914 :     PG_RETURN_BOOL(result);
    1825             : }
    1826             : 
    1827             : Datum
    1828     6565864 : bttextcmp(PG_FUNCTION_ARGS)
    1829             : {
    1830     6565864 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1831     6565864 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1832             :     int32       result;
    1833             : 
    1834     6565864 :     result = text_cmp(arg1, arg2, PG_GET_COLLATION());
    1835             : 
    1836     6565864 :     PG_FREE_IF_COPY(arg1, 0);
    1837     6565864 :     PG_FREE_IF_COPY(arg2, 1);
    1838             : 
    1839     6565864 :     PG_RETURN_INT32(result);
    1840             : }
    1841             : 
    1842             : Datum
    1843       87410 : bttextsortsupport(PG_FUNCTION_ARGS)
    1844             : {
    1845       87410 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    1846       87410 :     Oid         collid = ssup->ssup_collation;
    1847             :     MemoryContext oldcontext;
    1848             : 
    1849       87410 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    1850             : 
    1851             :     /* Use generic string SortSupport */
    1852       87410 :     varstr_sortsupport(ssup, TEXTOID, collid);
    1853             : 
    1854       87398 :     MemoryContextSwitchTo(oldcontext);
    1855             : 
    1856       87398 :     PG_RETURN_VOID();
    1857             : }
    1858             : 
    1859             : /*
    1860             :  * Generic sortsupport interface for character type's operator classes.
    1861             :  * Includes locale support, and support for BpChar semantics (i.e. removing
    1862             :  * trailing spaces before comparison).
    1863             :  *
    1864             :  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
    1865             :  * same representation.  Callers that always use the C collation (e.g.
    1866             :  * non-collatable type callers like bytea) may have NUL bytes in their strings;
    1867             :  * this will not work with any other collation, though.
    1868             :  */
    1869             : void
    1870      163398 : varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
    1871             : {
    1872      163398 :     bool        abbreviate = ssup->abbreviate;
    1873      163398 :     bool        collate_c = false;
    1874             :     VarStringSortSupport *sss;
    1875      163398 :     pg_locale_t locale = 0;
    1876             : 
    1877      163398 :     check_collation_set(collid);
    1878             : 
    1879             :     /*
    1880             :      * If possible, set ssup->comparator to a function which can be used to
    1881             :      * directly compare two datums.  If we can do this, we'll avoid the
    1882             :      * overhead of a trip through the fmgr layer for every comparison, which
    1883             :      * can be substantial.
    1884             :      *
    1885             :      * Most typically, we'll set the comparator to varlenafastcmp_locale,
    1886             :      * which uses strcoll() to perform comparisons.  We use that for the
    1887             :      * BpChar case too, but type NAME uses namefastcmp_locale. However, if
    1888             :      * LC_COLLATE = C, we can make things quite a bit faster with
    1889             :      * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
    1890             :      * memcmp() rather than strcoll().
    1891             :      */
    1892      163386 :     if (lc_collate_is_c(collid))
    1893             :     {
    1894      112072 :         if (typid == BPCHAROID)
    1895          22 :             ssup->comparator = bpcharfastcmp_c;
    1896      112050 :         else if (typid == NAMEOID)
    1897             :         {
    1898       75038 :             ssup->comparator = namefastcmp_c;
    1899             :             /* Not supporting abbreviation with type NAME, for now */
    1900       75038 :             abbreviate = false;
    1901             :         }
    1902             :         else
    1903       37012 :             ssup->comparator = varstrfastcmp_c;
    1904             : 
    1905      112072 :         collate_c = true;
    1906             :     }
    1907             :     else
    1908             :     {
    1909             :         /*
    1910             :          * We need a collation-sensitive comparison.  To make things faster,
    1911             :          * we'll figure out the collation based on the locale id and cache the
    1912             :          * result.
    1913             :          */
    1914       51314 :         locale = pg_newlocale_from_collation(collid);
    1915             : 
    1916             :         /*
    1917             :          * We use varlenafastcmp_locale except for type NAME.
    1918             :          */
    1919       51314 :         if (typid == NAMEOID)
    1920             :         {
    1921           0 :             ssup->comparator = namefastcmp_locale;
    1922             :             /* Not supporting abbreviation with type NAME, for now */
    1923           0 :             abbreviate = false;
    1924             :         }
    1925             :         else
    1926       51314 :             ssup->comparator = varlenafastcmp_locale;
    1927             :     }
    1928             : 
    1929             :     /*
    1930             :      * Unfortunately, it seems that abbreviation for non-C collations is
    1931             :      * broken on many common platforms; see pg_strxfrm_enabled().
    1932             :      *
    1933             :      * Even apart from the risk of broken locales, it's possible that there
    1934             :      * are platforms where the use of abbreviated keys should be disabled at
    1935             :      * compile time.  Having only 4 byte datums could make worst-case
    1936             :      * performance drastically more likely, for example.  Moreover, macOS's
    1937             :      * strxfrm() implementation is known to not effectively concentrate a
    1938             :      * significant amount of entropy from the original string in earlier
    1939             :      * transformed blobs.  It's possible that other supported platforms are
    1940             :      * similarly encumbered.  So, if we ever get past disabling this
    1941             :      * categorically, we may still want or need to disable it for particular
    1942             :      * platforms.
    1943             :      */
    1944      163386 :     if (!collate_c && !pg_strxfrm_enabled(locale))
    1945           0 :         abbreviate = false;
    1946             : 
    1947             :     /*
    1948             :      * If we're using abbreviated keys, or if we're using a locale-aware
    1949             :      * comparison, we need to initialize a VarStringSortSupport object. Both
    1950             :      * cases will make use of the temporary buffers we initialize here for
    1951             :      * scratch space (and to detect requirement for BpChar semantics from
    1952             :      * caller), and the abbreviation case requires additional state.
    1953             :      */
    1954      163386 :     if (abbreviate || !collate_c)
    1955             :     {
    1956       54858 :         sss = palloc(sizeof(VarStringSortSupport));
    1957       54858 :         sss->buf1 = palloc(TEXTBUFLEN);
    1958       54858 :         sss->buflen1 = TEXTBUFLEN;
    1959       54858 :         sss->buf2 = palloc(TEXTBUFLEN);
    1960       54858 :         sss->buflen2 = TEXTBUFLEN;
    1961             :         /* Start with invalid values */
    1962       54858 :         sss->last_len1 = -1;
    1963       54858 :         sss->last_len2 = -1;
    1964             :         /* Initialize */
    1965       54858 :         sss->last_returned = 0;
    1966       54858 :         sss->locale = locale;
    1967             : 
    1968             :         /*
    1969             :          * To avoid somehow confusing a strxfrm() blob and an original string,
    1970             :          * constantly keep track of the variety of data that buf1 and buf2
    1971             :          * currently contain.
    1972             :          *
    1973             :          * Comparisons may be interleaved with conversion calls.  Frequently,
    1974             :          * conversions and comparisons are batched into two distinct phases,
    1975             :          * but the correctness of caching cannot hinge upon this.  For
    1976             :          * comparison caching, buffer state is only trusted if cache_blob is
    1977             :          * found set to false, whereas strxfrm() caching only trusts the state
    1978             :          * when cache_blob is found set to true.
    1979             :          *
    1980             :          * Arbitrarily initialize cache_blob to true.
    1981             :          */
    1982       54858 :         sss->cache_blob = true;
    1983       54858 :         sss->collate_c = collate_c;
    1984       54858 :         sss->typid = typid;
    1985       54858 :         ssup->ssup_extra = sss;
    1986             : 
    1987             :         /*
    1988             :          * If possible, plan to use the abbreviated keys optimization.  The
    1989             :          * core code may switch back to authoritative comparator should
    1990             :          * abbreviation be aborted.
    1991             :          */
    1992       54858 :         if (abbreviate)
    1993             :         {
    1994       47492 :             sss->prop_card = 0.20;
    1995       47492 :             initHyperLogLog(&sss->abbr_card, 10);
    1996       47492 :             initHyperLogLog(&sss->full_card, 10);
    1997       47492 :             ssup->abbrev_full_comparator = ssup->comparator;
    1998       47492 :             ssup->comparator = ssup_datum_unsigned_cmp;
    1999       47492 :             ssup->abbrev_converter = varstr_abbrev_convert;
    2000       47492 :             ssup->abbrev_abort = varstr_abbrev_abort;
    2001             :         }
    2002             :     }
    2003      163386 : }
    2004             : 
    2005             : /*
    2006             :  * sortsupport comparison func (for C locale case)
    2007             :  */
    2008             : static int
    2009   114424040 : varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2010             : {
    2011   114424040 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2012   114424040 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2013             :     char       *a1p,
    2014             :                *a2p;
    2015             :     int         len1,
    2016             :                 len2,
    2017             :                 result;
    2018             : 
    2019   114424040 :     a1p = VARDATA_ANY(arg1);
    2020   114424040 :     a2p = VARDATA_ANY(arg2);
    2021             : 
    2022   114424040 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2023   114424040 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2024             : 
    2025   114424040 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2026   114424040 :     if ((result == 0) && (len1 != len2))
    2027     2763736 :         result = (len1 < len2) ? -1 : 1;
    2028             : 
    2029             :     /* We can't afford to leak memory here. */
    2030   114424040 :     if (PointerGetDatum(arg1) != x)
    2031           0 :         pfree(arg1);
    2032   114424040 :     if (PointerGetDatum(arg2) != y)
    2033           0 :         pfree(arg2);
    2034             : 
    2035   114424040 :     return result;
    2036             : }
    2037             : 
    2038             : /*
    2039             :  * sortsupport comparison func (for BpChar C locale case)
    2040             :  *
    2041             :  * BpChar outsources its sortsupport to this module.  Specialization for the
    2042             :  * varstr_sortsupport BpChar case, modeled on
    2043             :  * internal_bpchar_pattern_compare().
    2044             :  */
    2045             : static int
    2046          16 : bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2047             : {
    2048          16 :     BpChar     *arg1 = DatumGetBpCharPP(x);
    2049          16 :     BpChar     *arg2 = DatumGetBpCharPP(y);
    2050             :     char       *a1p,
    2051             :                *a2p;
    2052             :     int         len1,
    2053             :                 len2,
    2054             :                 result;
    2055             : 
    2056          16 :     a1p = VARDATA_ANY(arg1);
    2057          16 :     a2p = VARDATA_ANY(arg2);
    2058             : 
    2059          16 :     len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
    2060          16 :     len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
    2061             : 
    2062          16 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2063          16 :     if ((result == 0) && (len1 != len2))
    2064           0 :         result = (len1 < len2) ? -1 : 1;
    2065             : 
    2066             :     /* We can't afford to leak memory here. */
    2067          16 :     if (PointerGetDatum(arg1) != x)
    2068           0 :         pfree(arg1);
    2069          16 :     if (PointerGetDatum(arg2) != y)
    2070           0 :         pfree(arg2);
    2071             : 
    2072          16 :     return result;
    2073             : }
    2074             : 
    2075             : /*
    2076             :  * sortsupport comparison func (for NAME C locale case)
    2077             :  */
    2078             : static int
    2079   118524664 : namefastcmp_c(Datum x, Datum y, SortSupport ssup)
    2080             : {
    2081   118524664 :     Name        arg1 = DatumGetName(x);
    2082   118524664 :     Name        arg2 = DatumGetName(y);
    2083             : 
    2084   118524664 :     return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
    2085             : }
    2086             : 
    2087             : /*
    2088             :  * sortsupport comparison func (for locale case with all varlena types)
    2089             :  */
    2090             : static int
    2091    37242482 : varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2092             : {
    2093    37242482 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2094    37242482 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2095             :     char       *a1p,
    2096             :                *a2p;
    2097             :     int         len1,
    2098             :                 len2,
    2099             :                 result;
    2100             : 
    2101    37242482 :     a1p = VARDATA_ANY(arg1);
    2102    37242482 :     a2p = VARDATA_ANY(arg2);
    2103             : 
    2104    37242482 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2105    37242482 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2106             : 
    2107    37242482 :     result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
    2108             : 
    2109             :     /* We can't afford to leak memory here. */
    2110    37242482 :     if (PointerGetDatum(arg1) != x)
    2111           6 :         pfree(arg1);
    2112    37242482 :     if (PointerGetDatum(arg2) != y)
    2113           6 :         pfree(arg2);
    2114             : 
    2115    37242482 :     return result;
    2116             : }
    2117             : 
    2118             : /*
    2119             :  * sortsupport comparison func (for locale case with NAME type)
    2120             :  */
    2121             : static int
    2122           0 : namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2123             : {
    2124           0 :     Name        arg1 = DatumGetName(x);
    2125           0 :     Name        arg2 = DatumGetName(y);
    2126             : 
    2127           0 :     return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
    2128           0 :                                 NameStr(*arg2), strlen(NameStr(*arg2)),
    2129             :                                 ssup);
    2130             : }
    2131             : 
    2132             : /*
    2133             :  * sortsupport comparison func for locale cases
    2134             :  */
    2135             : static int
    2136    37242482 : varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
    2137             : {
    2138    37242482 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2139             :     int         result;
    2140             :     bool        arg1_match;
    2141             : 
    2142             :     /* Fast pre-check for equality, as discussed in varstr_cmp() */
    2143    37242482 :     if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
    2144             :     {
    2145             :         /*
    2146             :          * No change in buf1 or buf2 contents, so avoid changing last_len1 or
    2147             :          * last_len2.  Existing contents of buffers might still be used by
    2148             :          * next call.
    2149             :          *
    2150             :          * It's fine to allow the comparison of BpChar padding bytes here,
    2151             :          * even though that implies that the memcmp() will usually be
    2152             :          * performed for BpChar callers (though multibyte characters could
    2153             :          * still prevent that from occurring).  The memcmp() is still very
    2154             :          * cheap, and BpChar's funny semantics have us remove trailing spaces
    2155             :          * (not limited to padding), so we need make no distinction between
    2156             :          * padding space characters and "real" space characters.
    2157             :          */
    2158    13619292 :         return 0;
    2159             :     }
    2160             : 
    2161    23623190 :     if (sss->typid == BPCHAROID)
    2162             :     {
    2163             :         /* Get true number of bytes, ignoring trailing spaces */
    2164       35866 :         len1 = bpchartruelen(a1p, len1);
    2165       35866 :         len2 = bpchartruelen(a2p, len2);
    2166             :     }
    2167             : 
    2168    23623190 :     if (len1 >= sss->buflen1)
    2169             :     {
    2170           0 :         sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2171           0 :         sss->buf1 = repalloc(sss->buf1, sss->buflen1);
    2172             :     }
    2173    23623190 :     if (len2 >= sss->buflen2)
    2174             :     {
    2175           6 :         sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
    2176           6 :         sss->buf2 = repalloc(sss->buf2, sss->buflen2);
    2177             :     }
    2178             : 
    2179             :     /*
    2180             :      * We're likely to be asked to compare the same strings repeatedly, and
    2181             :      * memcmp() is so much cheaper than strcoll() that it pays to try to cache
    2182             :      * comparisons, even though in general there is no reason to think that
    2183             :      * that will work out (every string datum may be unique).  Caching does
    2184             :      * not slow things down measurably when it doesn't work out, and can speed
    2185             :      * things up by rather a lot when it does.  In part, this is because the
    2186             :      * memcmp() compares data from cachelines that are needed in L1 cache even
    2187             :      * when the last comparison's result cannot be reused.
    2188             :      */
    2189    23623190 :     arg1_match = true;
    2190    23623190 :     if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
    2191             :     {
    2192    21009864 :         arg1_match = false;
    2193    21009864 :         memcpy(sss->buf1, a1p, len1);
    2194    21009864 :         sss->buf1[len1] = '\0';
    2195    21009864 :         sss->last_len1 = len1;
    2196             :     }
    2197             : 
    2198             :     /*
    2199             :      * If we're comparing the same two strings as last time, we can return the
    2200             :      * same answer without calling strcoll() again.  This is more likely than
    2201             :      * it seems (at least with moderate to low cardinality sets), because
    2202             :      * quicksort compares the same pivot against many values.
    2203             :      */
    2204    23623190 :     if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
    2205             :     {
    2206     3920018 :         memcpy(sss->buf2, a2p, len2);
    2207     3920018 :         sss->buf2[len2] = '\0';
    2208     3920018 :         sss->last_len2 = len2;
    2209             :     }
    2210    19703172 :     else if (arg1_match && !sss->cache_blob)
    2211             :     {
    2212             :         /* Use result cached following last actual strcoll() call */
    2213     2226204 :         return sss->last_returned;
    2214             :     }
    2215             : 
    2216    21396986 :     result = pg_strcoll(sss->buf1, sss->buf2, sss->locale);
    2217             : 
    2218             :     /* Break tie if necessary. */
    2219    21396986 :     if (result == 0 && pg_locale_deterministic(sss->locale))
    2220           0 :         result = strcmp(sss->buf1, sss->buf2);
    2221             : 
    2222             :     /* Cache result, perhaps saving an expensive strcoll() call next time */
    2223    21396986 :     sss->cache_blob = false;
    2224    21396986 :     sss->last_returned = result;
    2225    21396986 :     return result;
    2226             : }
    2227             : 
    2228             : /*
    2229             :  * Conversion routine for sortsupport.  Converts original to abbreviated key
    2230             :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
    2231             :  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
    2232             :  * stored in reverse order), and treat it as an unsigned integer.  When the "C"
    2233             :  * locale is used, or in case of bytea, just memcpy() from original instead.
    2234             :  */
    2235             : static Datum
    2236     1152366 : varstr_abbrev_convert(Datum original, SortSupport ssup)
    2237             : {
    2238     1152366 :     const size_t max_prefix_bytes = sizeof(Datum);
    2239     1152366 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2240     1152366 :     VarString  *authoritative = DatumGetVarStringPP(original);
    2241     1152366 :     char       *authoritative_data = VARDATA_ANY(authoritative);
    2242             : 
    2243             :     /* working state */
    2244             :     Datum       res;
    2245             :     char       *pres;
    2246             :     int         len;
    2247             :     uint32      hash;
    2248             : 
    2249     1152366 :     pres = (char *) &res;
    2250             :     /* memset(), so any non-overwritten bytes are NUL */
    2251     1152366 :     memset(pres, 0, max_prefix_bytes);
    2252     1152366 :     len = VARSIZE_ANY_EXHDR(authoritative);
    2253             : 
    2254             :     /* Get number of bytes, ignoring trailing spaces */
    2255     1152366 :     if (sss->typid == BPCHAROID)
    2256        2592 :         len = bpchartruelen(authoritative_data, len);
    2257             : 
    2258             :     /*
    2259             :      * If we're using the C collation, use memcpy(), rather than strxfrm(), to
    2260             :      * abbreviate keys.  The full comparator for the C locale is always
    2261             :      * memcmp().  It would be incorrect to allow bytea callers (callers that
    2262             :      * always force the C collation -- bytea isn't a collatable type, but this
    2263             :      * approach is convenient) to use strxfrm().  This is because bytea
    2264             :      * strings may contain NUL bytes.  Besides, this should be faster, too.
    2265             :      *
    2266             :      * More generally, it's okay that bytea callers can have NUL bytes in
    2267             :      * strings because abbreviated cmp need not make a distinction between
    2268             :      * terminating NUL bytes, and NUL bytes representing actual NULs in the
    2269             :      * authoritative representation.  Hopefully a comparison at or past one
    2270             :      * abbreviated key's terminating NUL byte will resolve the comparison
    2271             :      * without consulting the authoritative representation; specifically, some
    2272             :      * later non-NUL byte in the longer string can resolve the comparison
    2273             :      * against a subsequent terminating NUL in the shorter string.  There will
    2274             :      * usually be what is effectively a "length-wise" resolution there and
    2275             :      * then.
    2276             :      *
    2277             :      * If that doesn't work out -- if all bytes in the longer string
    2278             :      * positioned at or past the offset of the smaller string's (first)
    2279             :      * terminating NUL are actually representative of NUL bytes in the
    2280             :      * authoritative binary string (perhaps with some *terminating* NUL bytes
    2281             :      * towards the end of the longer string iff it happens to still be small)
    2282             :      * -- then an authoritative tie-breaker will happen, and do the right
    2283             :      * thing: explicitly consider string length.
    2284             :      */
    2285     1152366 :     if (sss->collate_c)
    2286      512708 :         memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
    2287             :     else
    2288             :     {
    2289             :         Size        bsize;
    2290             : 
    2291             :         /*
    2292             :          * We're not using the C collation, so fall back on strxfrm or ICU
    2293             :          * analogs.
    2294             :          */
    2295             : 
    2296             :         /* By convention, we use buffer 1 to store and NUL-terminate */
    2297      639658 :         if (len >= sss->buflen1)
    2298             :         {
    2299          24 :             sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2300          24 :             sss->buf1 = repalloc(sss->buf1, sss->buflen1);
    2301             :         }
    2302             : 
    2303             :         /* Might be able to reuse strxfrm() blob from last call */
    2304      639658 :         if (sss->last_len1 == len && sss->cache_blob &&
    2305      623826 :             memcmp(sss->buf1, authoritative_data, len) == 0)
    2306             :         {
    2307      306354 :             memcpy(pres, sss->buf2, Min(max_prefix_bytes, sss->last_len2));
    2308             :             /* No change affecting cardinality, so no hashing required */
    2309      306354 :             goto done;
    2310             :         }
    2311             : 
    2312      333304 :         memcpy(sss->buf1, authoritative_data, len);
    2313             : 
    2314             :         /*
    2315             :          * pg_strxfrm() and pg_strxfrm_prefix expect NUL-terminated strings.
    2316             :          */
    2317      333304 :         sss->buf1[len] = '\0';
    2318      333304 :         sss->last_len1 = len;
    2319             : 
    2320      333304 :         if (pg_strxfrm_prefix_enabled(sss->locale))
    2321             :         {
    2322      333304 :             if (sss->buflen2 < max_prefix_bytes)
    2323             :             {
    2324           0 :                 sss->buflen2 = Max(max_prefix_bytes,
    2325             :                                    Min(sss->buflen2 * 2, MaxAllocSize));
    2326           0 :                 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
    2327             :             }
    2328             : 
    2329      333304 :             bsize = pg_strxfrm_prefix(sss->buf2, sss->buf1,
    2330             :                                       max_prefix_bytes, sss->locale);
    2331      333304 :             sss->last_len2 = bsize;
    2332             :         }
    2333             :         else
    2334             :         {
    2335             :             /*
    2336             :              * Loop: Call pg_strxfrm(), possibly enlarge buffer, and try
    2337             :              * again.  The pg_strxfrm() function leaves the result buffer
    2338             :              * content undefined if the result did not fit, so we need to
    2339             :              * retry until everything fits, even though we only need the first
    2340             :              * few bytes in the end.
    2341             :              */
    2342             :             for (;;)
    2343             :             {
    2344           0 :                 bsize = pg_strxfrm(sss->buf2, sss->buf1, sss->buflen2,
    2345             :                                    sss->locale);
    2346             : 
    2347           0 :                 sss->last_len2 = bsize;
    2348           0 :                 if (bsize < sss->buflen2)
    2349           0 :                     break;
    2350             : 
    2351             :                 /*
    2352             :                  * Grow buffer and retry.
    2353             :                  */
    2354           0 :                 sss->buflen2 = Max(bsize + 1,
    2355             :                                    Min(sss->buflen2 * 2, MaxAllocSize));
    2356           0 :                 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
    2357             :             }
    2358             :         }
    2359             : 
    2360             :         /*
    2361             :          * Every Datum byte is always compared.  This is safe because the
    2362             :          * strxfrm() blob is itself NUL terminated, leaving no danger of
    2363             :          * misinterpreting any NUL bytes not intended to be interpreted as
    2364             :          * logically representing termination.
    2365             :          *
    2366             :          * (Actually, even if there were NUL bytes in the blob it would be
    2367             :          * okay.  See remarks on bytea case above.)
    2368             :          */
    2369      333304 :         memcpy(pres, sss->buf2, Min(max_prefix_bytes, bsize));
    2370             :     }
    2371             : 
    2372             :     /*
    2373             :      * Maintain approximate cardinality of both abbreviated keys and original,
    2374             :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
    2375             :      * the worst case, where we do many string transformations for no saving
    2376             :      * in full strcoll()-based comparisons.  These statistics are used by
    2377             :      * varstr_abbrev_abort().
    2378             :      *
    2379             :      * First, Hash key proper, or a significant fraction of it.  Mix in length
    2380             :      * in order to compensate for cases where differences are past
    2381             :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
    2382             :      */
    2383      846012 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
    2384             :                                    Min(len, PG_CACHE_LINE_SIZE)));
    2385             : 
    2386      846012 :     if (len > PG_CACHE_LINE_SIZE)
    2387          46 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
    2388             : 
    2389      846012 :     addHyperLogLog(&sss->full_card, hash);
    2390             : 
    2391             :     /* Hash abbreviated key */
    2392             : #if SIZEOF_DATUM == 8
    2393             :     {
    2394             :         uint32      lohalf,
    2395             :                     hihalf;
    2396             : 
    2397      846012 :         lohalf = (uint32) res;
    2398      846012 :         hihalf = (uint32) (res >> 32);
    2399      846012 :         hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
    2400             :     }
    2401             : #else                           /* SIZEOF_DATUM != 8 */
    2402             :     hash = DatumGetUInt32(hash_uint32((uint32) res));
    2403             : #endif
    2404             : 
    2405      846012 :     addHyperLogLog(&sss->abbr_card, hash);
    2406             : 
    2407             :     /* Cache result, perhaps saving an expensive strxfrm() call next time */
    2408      846012 :     sss->cache_blob = true;
    2409     1152366 : done:
    2410             : 
    2411             :     /*
    2412             :      * Byteswap on little-endian machines.
    2413             :      *
    2414             :      * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
    2415             :      * 3-way comparator) works correctly on all platforms.  If we didn't do
    2416             :      * this, the comparator would have to call memcmp() with a pair of
    2417             :      * pointers to the first byte of each abbreviated key, which is slower.
    2418             :      */
    2419     1152366 :     res = DatumBigEndianToNative(res);
    2420             : 
    2421             :     /* Don't leak memory here */
    2422     1152366 :     if (PointerGetDatum(authoritative) != original)
    2423          12 :         pfree(authoritative);
    2424             : 
    2425     1152366 :     return res;
    2426             : }
    2427             : 
    2428             : /*
    2429             :  * Callback for estimating effectiveness of abbreviated key optimization, using
    2430             :  * heuristic rules.  Returns value indicating if the abbreviation optimization
    2431             :  * should be aborted, based on its projected effectiveness.
    2432             :  */
    2433             : static bool
    2434        3744 : varstr_abbrev_abort(int memtupcount, SortSupport ssup)
    2435             : {
    2436        3744 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2437             :     double      abbrev_distinct,
    2438             :                 key_distinct;
    2439             : 
    2440             :     Assert(ssup->abbreviate);
    2441             : 
    2442             :     /* Have a little patience */
    2443        3744 :     if (memtupcount < 100)
    2444        2300 :         return false;
    2445             : 
    2446        1444 :     abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
    2447        1444 :     key_distinct = estimateHyperLogLog(&sss->full_card);
    2448             : 
    2449             :     /*
    2450             :      * Clamp cardinality estimates to at least one distinct value.  While
    2451             :      * NULLs are generally disregarded, if only NULL values were seen so far,
    2452             :      * that might misrepresent costs if we failed to clamp.
    2453             :      */
    2454        1444 :     if (abbrev_distinct <= 1.0)
    2455           0 :         abbrev_distinct = 1.0;
    2456             : 
    2457        1444 :     if (key_distinct <= 1.0)
    2458           0 :         key_distinct = 1.0;
    2459             : 
    2460             :     /*
    2461             :      * In the worst case all abbreviated keys are identical, while at the same
    2462             :      * time there are differences within full key strings not captured in
    2463             :      * abbreviations.
    2464             :      */
    2465             : #ifdef TRACE_SORT
    2466        1444 :     if (trace_sort)
    2467             :     {
    2468           0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
    2469             : 
    2470           0 :         elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
    2471             :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
    2472             :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
    2473             :              sss->prop_card);
    2474             :     }
    2475             : #endif
    2476             : 
    2477             :     /*
    2478             :      * If the number of distinct abbreviated keys approximately matches the
    2479             :      * number of distinct authoritative original keys, that's reason enough to
    2480             :      * proceed.  We can win even with a very low cardinality set if most
    2481             :      * tie-breakers only memcmp().  This is by far the most important
    2482             :      * consideration.
    2483             :      *
    2484             :      * While comparisons that are resolved at the abbreviated key level are
    2485             :      * considerably cheaper than tie-breakers resolved with memcmp(), both of
    2486             :      * those two outcomes are so much cheaper than a full strcoll() once
    2487             :      * sorting is underway that it doesn't seem worth it to weigh abbreviated
    2488             :      * cardinality against the overall size of the set in order to more
    2489             :      * accurately model costs.  Assume that an abbreviated comparison, and an
    2490             :      * abbreviated comparison with a cheap memcmp()-based authoritative
    2491             :      * resolution are equivalent.
    2492             :      */
    2493        1444 :     if (abbrev_distinct > key_distinct * sss->prop_card)
    2494             :     {
    2495             :         /*
    2496             :          * When we have exceeded 10,000 tuples, decay required cardinality
    2497             :          * aggressively for next call.
    2498             :          *
    2499             :          * This is useful because the number of comparisons required on
    2500             :          * average increases at a linearithmic rate, and at roughly 10,000
    2501             :          * tuples that factor will start to dominate over the linear costs of
    2502             :          * string transformation (this is a conservative estimate).  The decay
    2503             :          * rate is chosen to be a little less aggressive than halving -- which
    2504             :          * (since we're called at points at which memtupcount has doubled)
    2505             :          * would never see the cost model actually abort past the first call
    2506             :          * following a decay.  This decay rate is mostly a precaution against
    2507             :          * a sudden, violent swing in how well abbreviated cardinality tracks
    2508             :          * full key cardinality.  The decay also serves to prevent a marginal
    2509             :          * case from being aborted too late, when too much has already been
    2510             :          * invested in string transformation.
    2511             :          *
    2512             :          * It's possible for sets of several million distinct strings with
    2513             :          * mere tens of thousands of distinct abbreviated keys to still
    2514             :          * benefit very significantly.  This will generally occur provided
    2515             :          * each abbreviated key is a proxy for a roughly uniform number of the
    2516             :          * set's full keys. If it isn't so, we hope to catch that early and
    2517             :          * abort.  If it isn't caught early, by the time the problem is
    2518             :          * apparent it's probably not worth aborting.
    2519             :          */
    2520        1386 :         if (memtupcount > 10000)
    2521           6 :             sss->prop_card *= 0.65;
    2522             : 
    2523        1386 :         return false;
    2524             :     }
    2525             : 
    2526             :     /*
    2527             :      * Abort abbreviation strategy.
    2528             :      *
    2529             :      * The worst case, where all abbreviated keys are identical while all
    2530             :      * original strings differ will typically only see a regression of about
    2531             :      * 10% in execution time for small to medium sized lists of strings.
    2532             :      * Whereas on modern CPUs where cache stalls are the dominant cost, we can
    2533             :      * often expect very large improvements, particularly with sets of strings
    2534             :      * of moderately high to high abbreviated cardinality.  There is little to
    2535             :      * lose but much to gain, which our strategy reflects.
    2536             :      */
    2537             : #ifdef TRACE_SORT
    2538          58 :     if (trace_sort)
    2539           0 :         elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
    2540             :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
    2541             :              memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
    2542             : #endif
    2543             : 
    2544          58 :     return true;
    2545             : }
    2546             : 
    2547             : /*
    2548             :  * Generic equalimage support function for character type's operator classes.
    2549             :  * Disables the use of deduplication with nondeterministic collations.
    2550             :  */
    2551             : Datum
    2552       26052 : btvarstrequalimage(PG_FUNCTION_ARGS)
    2553             : {
    2554             :     /* Oid      opcintype = PG_GETARG_OID(0); */
    2555       26052 :     Oid         collid = PG_GET_COLLATION();
    2556             : 
    2557       26052 :     check_collation_set(collid);
    2558             : 
    2559       26052 :     if (lc_collate_is_c(collid) ||
    2560          44 :         collid == DEFAULT_COLLATION_OID ||
    2561          44 :         get_collation_isdeterministic(collid))
    2562       26032 :         PG_RETURN_BOOL(true);
    2563             :     else
    2564          20 :         PG_RETURN_BOOL(false);
    2565             : }
    2566             : 
    2567             : Datum
    2568      229560 : text_larger(PG_FUNCTION_ARGS)
    2569             : {
    2570      229560 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2571      229560 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2572             :     text       *result;
    2573             : 
    2574      229560 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
    2575             : 
    2576      229560 :     PG_RETURN_TEXT_P(result);
    2577             : }
    2578             : 
    2579             : Datum
    2580       86076 : text_smaller(PG_FUNCTION_ARGS)
    2581             : {
    2582       86076 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2583       86076 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2584             :     text       *result;
    2585             : 
    2586       86076 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
    2587             : 
    2588       86076 :     PG_RETURN_TEXT_P(result);
    2589             : }
    2590             : 
    2591             : 
    2592             : /*
    2593             :  * Cross-type comparison functions for types text and name.
    2594             :  */
    2595             : 
    2596             : Datum
    2597      192382 : nameeqtext(PG_FUNCTION_ARGS)
    2598             : {
    2599      192382 :     Name        arg1 = PG_GETARG_NAME(0);
    2600      192382 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2601      192382 :     size_t      len1 = strlen(NameStr(*arg1));
    2602      192382 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2603      192382 :     Oid         collid = PG_GET_COLLATION();
    2604             :     bool        result;
    2605             : 
    2606      192382 :     check_collation_set(collid);
    2607             : 
    2608      192382 :     if (collid == C_COLLATION_OID)
    2609      291990 :         result = (len1 == len2 &&
    2610      129508 :                   memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2611             :     else
    2612       29900 :         result = (varstr_cmp(NameStr(*arg1), len1,
    2613       29900 :                              VARDATA_ANY(arg2), len2,
    2614             :                              collid) == 0);
    2615             : 
    2616      192382 :     PG_FREE_IF_COPY(arg2, 1);
    2617             : 
    2618      192382 :     PG_RETURN_BOOL(result);
    2619             : }
    2620             : 
    2621             : Datum
    2622        6786 : texteqname(PG_FUNCTION_ARGS)
    2623             : {
    2624        6786 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2625        6786 :     Name        arg2 = PG_GETARG_NAME(1);
    2626        6786 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2627        6786 :     size_t      len2 = strlen(NameStr(*arg2));
    2628        6786 :     Oid         collid = PG_GET_COLLATION();
    2629             :     bool        result;
    2630             : 
    2631        6786 :     check_collation_set(collid);
    2632             : 
    2633        6786 :     if (collid == C_COLLATION_OID)
    2634         564 :         result = (len1 == len2 &&
    2635         180 :                   memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2636             :     else
    2637        6402 :         result = (varstr_cmp(VARDATA_ANY(arg1), len1,
    2638        6402 :                              NameStr(*arg2), len2,
    2639             :                              collid) == 0);
    2640             : 
    2641        6786 :     PG_FREE_IF_COPY(arg1, 0);
    2642             : 
    2643        6786 :     PG_RETURN_BOOL(result);
    2644             : }
    2645             : 
    2646             : Datum
    2647          36 : namenetext(PG_FUNCTION_ARGS)
    2648             : {
    2649          36 :     Name        arg1 = PG_GETARG_NAME(0);
    2650          36 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2651          36 :     size_t      len1 = strlen(NameStr(*arg1));
    2652          36 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2653          36 :     Oid         collid = PG_GET_COLLATION();
    2654             :     bool        result;
    2655             : 
    2656          36 :     check_collation_set(collid);
    2657             : 
    2658          36 :     if (collid == C_COLLATION_OID)
    2659          18 :         result = !(len1 == len2 &&
    2660           0 :                    memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2661             :     else
    2662          18 :         result = !(varstr_cmp(NameStr(*arg1), len1,
    2663          18 :                               VARDATA_ANY(arg2), len2,
    2664             :                               collid) == 0);
    2665             : 
    2666          36 :     PG_FREE_IF_COPY(arg2, 1);
    2667             : 
    2668          36 :     PG_RETURN_BOOL(result);
    2669             : }
    2670             : 
    2671             : Datum
    2672          18 : textnename(PG_FUNCTION_ARGS)
    2673             : {
    2674          18 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2675          18 :     Name        arg2 = PG_GETARG_NAME(1);
    2676          18 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2677          18 :     size_t      len2 = strlen(NameStr(*arg2));
    2678          18 :     Oid         collid = PG_GET_COLLATION();
    2679             :     bool        result;
    2680             : 
    2681          18 :     check_collation_set(collid);
    2682             : 
    2683          18 :     if (collid == C_COLLATION_OID)
    2684           0 :         result = !(len1 == len2 &&
    2685           0 :                    memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2686             :     else
    2687          18 :         result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
    2688          18 :                               NameStr(*arg2), len2,
    2689             :                               collid) == 0);
    2690             : 
    2691          18 :     PG_FREE_IF_COPY(arg1, 0);
    2692             : 
    2693          18 :     PG_RETURN_BOOL(result);
    2694             : }
    2695             : 
    2696             : Datum
    2697      148574 : btnametextcmp(PG_FUNCTION_ARGS)
    2698             : {
    2699      148574 :     Name        arg1 = PG_GETARG_NAME(0);
    2700      148574 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2701             :     int32       result;
    2702             : 
    2703      297148 :     result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
    2704      297148 :                         VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
    2705             :                         PG_GET_COLLATION());
    2706             : 
    2707      148574 :     PG_FREE_IF_COPY(arg2, 1);
    2708             : 
    2709      148574 :     PG_RETURN_INT32(result);
    2710             : }
    2711             : 
    2712             : Datum
    2713           0 : bttextnamecmp(PG_FUNCTION_ARGS)
    2714             : {
    2715           0 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2716           0 :     Name        arg2 = PG_GETARG_NAME(1);
    2717             :     int32       result;
    2718             : 
    2719           0 :     result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
    2720           0 :                         NameStr(*arg2), strlen(NameStr(*arg2)),
    2721             :                         PG_GET_COLLATION());
    2722             : 
    2723           0 :     PG_FREE_IF_COPY(arg1, 0);
    2724             : 
    2725           0 :     PG_RETURN_INT32(result);
    2726             : }
    2727             : 
    2728             : #define CmpCall(cmpfunc) \
    2729             :     DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
    2730             :                                           PG_GET_COLLATION(), \
    2731             :                                           PG_GETARG_DATUM(0), \
    2732             :                                           PG_GETARG_DATUM(1)))
    2733             : 
    2734             : Datum
    2735       47414 : namelttext(PG_FUNCTION_ARGS)
    2736             : {
    2737       47414 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
    2738             : }
    2739             : 
    2740             : Datum
    2741           0 : nameletext(PG_FUNCTION_ARGS)
    2742             : {
    2743           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
    2744             : }
    2745             : 
    2746             : Datum
    2747           0 : namegttext(PG_FUNCTION_ARGS)
    2748             : {
    2749           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
    2750             : }
    2751             : 
    2752             : Datum
    2753       45790 : namegetext(PG_FUNCTION_ARGS)
    2754             : {
    2755       45790 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
    2756             : }
    2757             : 
    2758             : Datum
    2759           0 : textltname(PG_FUNCTION_ARGS)
    2760             : {
    2761           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
    2762             : }
    2763             : 
    2764             : Datum
    2765           0 : textlename(PG_FUNCTION_ARGS)
    2766             : {
    2767           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
    2768             : }
    2769             : 
    2770             : Datum
    2771           0 : textgtname(PG_FUNCTION_ARGS)
    2772             : {
    2773           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
    2774             : }
    2775             : 
    2776             : Datum
    2777           0 : textgename(PG_FUNCTION_ARGS)
    2778             : {
    2779           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
    2780             : }
    2781             : 
    2782             : #undef CmpCall
    2783             : 
    2784             : 
    2785             : /*
    2786             :  * The following operators support character-by-character comparison
    2787             :  * of text datums, to allow building indexes suitable for LIKE clauses.
    2788             :  * Note that the regular texteq/textne comparison operators, and regular
    2789             :  * support functions 1 and 2 with "C" collation are assumed to be
    2790             :  * compatible with these!
    2791             :  */
    2792             : 
    2793             : static int
    2794      152080 : internal_text_pattern_compare(text *arg1, text *arg2)
    2795             : {
    2796             :     int         result;
    2797             :     int         len1,
    2798             :                 len2;
    2799             : 
    2800      152080 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2801      152080 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2802             : 
    2803      152080 :     result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    2804      152080 :     if (result != 0)
    2805      152026 :         return result;
    2806          54 :     else if (len1 < len2)
    2807           0 :         return -1;
    2808          54 :     else if (len1 > len2)
    2809          18 :         return 1;
    2810             :     else
    2811          36 :         return 0;
    2812             : }
    2813             : 
    2814             : 
    2815             : Datum
    2816       39538 : text_pattern_lt(PG_FUNCTION_ARGS)
    2817             : {
    2818       39538 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2819       39538 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2820             :     int         result;
    2821             : 
    2822       39538 :     result = internal_text_pattern_compare(arg1, arg2);
    2823             : 
    2824       39538 :     PG_FREE_IF_COPY(arg1, 0);
    2825       39538 :     PG_FREE_IF_COPY(arg2, 1);
    2826             : 
    2827       39538 :     PG_RETURN_BOOL(result < 0);
    2828             : }
    2829             : 
    2830             : 
    2831             : Datum
    2832       37510 : text_pattern_le(PG_FUNCTION_ARGS)
    2833             : {
    2834       37510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2835       37510 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2836             :     int         result;
    2837             : 
    2838       37510 :     result = internal_text_pattern_compare(arg1, arg2);
    2839             : 
    2840       37510 :     PG_FREE_IF_COPY(arg1, 0);
    2841       37510 :     PG_FREE_IF_COPY(arg2, 1);
    2842             : 
    2843       37510 :     PG_RETURN_BOOL(result <= 0);
    2844             : }
    2845             : 
    2846             : 
    2847             : Datum
    2848       37510 : text_pattern_ge(PG_FUNCTION_ARGS)
    2849             : {
    2850       37510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2851       37510 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2852             :     int         result;
    2853             : 
    2854       37510 :     result = internal_text_pattern_compare(arg1, arg2);
    2855             : 
    2856       37510 :     PG_FREE_IF_COPY(arg1, 0);
    2857       37510 :     PG_FREE_IF_COPY(arg2, 1);
    2858             : 
    2859       37510 :     PG_RETURN_BOOL(result >= 0);
    2860             : }
    2861             : 
    2862             : 
    2863             : Datum
    2864       37510 : text_pattern_gt(PG_FUNCTION_ARGS)
    2865             : {
    2866       37510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2867       37510 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2868             :     int         result;
    2869             : 
    2870       37510 :     result = internal_text_pattern_compare(arg1, arg2);
    2871             : 
    2872       37510 :     PG_FREE_IF_COPY(arg1, 0);
    2873       37510 :     PG_FREE_IF_COPY(arg2, 1);
    2874             : 
    2875       37510 :     PG_RETURN_BOOL(result > 0);
    2876             : }
    2877             : 
    2878             : 
    2879             : Datum
    2880          12 : bttext_pattern_cmp(PG_FUNCTION_ARGS)
    2881             : {
    2882          12 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2883          12 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2884             :     int         result;
    2885             : 
    2886          12 :     result = internal_text_pattern_compare(arg1, arg2);
    2887             : 
    2888          12 :     PG_FREE_IF_COPY(arg1, 0);
    2889          12 :     PG_FREE_IF_COPY(arg2, 1);
    2890             : 
    2891          12 :     PG_RETURN_INT32(result);
    2892             : }
    2893             : 
    2894             : 
    2895             : Datum
    2896         116 : bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
    2897             : {
    2898         116 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    2899             :     MemoryContext oldcontext;
    2900             : 
    2901         116 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    2902             : 
    2903             :     /* Use generic string SortSupport, forcing "C" collation */
    2904         116 :     varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
    2905             : 
    2906         116 :     MemoryContextSwitchTo(oldcontext);
    2907             : 
    2908         116 :     PG_RETURN_VOID();
    2909             : }
    2910             : 
    2911             : 
    2912             : /*-------------------------------------------------------------
    2913             :  * byteaoctetlen
    2914             :  *
    2915             :  * get the number of bytes contained in an instance of type 'bytea'
    2916             :  *-------------------------------------------------------------
    2917             :  */
    2918             : Datum
    2919         314 : byteaoctetlen(PG_FUNCTION_ARGS)
    2920             : {
    2921         314 :     Datum       str = PG_GETARG_DATUM(0);
    2922             : 
    2923             :     /* We need not detoast the input at all */
    2924         314 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
    2925             : }
    2926             : 
    2927             : /*
    2928             :  * byteacat -
    2929             :  *    takes two bytea* and returns a bytea* that is the concatenation of
    2930             :  *    the two.
    2931             :  *
    2932             :  * Cloned from textcat and modified as required.
    2933             :  */
    2934             : Datum
    2935        1520 : byteacat(PG_FUNCTION_ARGS)
    2936             : {
    2937        1520 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    2938        1520 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    2939             : 
    2940        1520 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
    2941             : }
    2942             : 
    2943             : /*
    2944             :  * bytea_catenate
    2945             :  *  Guts of byteacat(), broken out so it can be used by other functions
    2946             :  *
    2947             :  * Arguments can be in short-header form, but not compressed or out-of-line
    2948             :  */
    2949             : static bytea *
    2950        1556 : bytea_catenate(bytea *t1, bytea *t2)
    2951             : {
    2952             :     bytea      *result;
    2953             :     int         len1,
    2954             :                 len2,
    2955             :                 len;
    2956             :     char       *ptr;
    2957             : 
    2958        1556 :     len1 = VARSIZE_ANY_EXHDR(t1);
    2959        1556 :     len2 = VARSIZE_ANY_EXHDR(t2);
    2960             : 
    2961             :     /* paranoia ... probably should throw error instead? */
    2962        1556 :     if (len1 < 0)
    2963           0 :         len1 = 0;
    2964        1556 :     if (len2 < 0)
    2965           0 :         len2 = 0;
    2966             : 
    2967        1556 :     len = len1 + len2 + VARHDRSZ;
    2968        1556 :     result = (bytea *) palloc(len);
    2969             : 
    2970             :     /* Set size of result string... */
    2971        1556 :     SET_VARSIZE(result, len);
    2972             : 
    2973             :     /* Fill data field of result string... */
    2974        1556 :     ptr = VARDATA(result);
    2975        1556 :     if (len1 > 0)
    2976        1556 :         memcpy(ptr, VARDATA_ANY(t1), len1);
    2977        1556 :     if (len2 > 0)
    2978        1538 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
    2979             : 
    2980        1556 :     return result;
    2981             : }
    2982             : 
    2983             : #define PG_STR_GET_BYTEA(str_) \
    2984             :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
    2985             : 
    2986             : /*
    2987             :  * bytea_substr()
    2988             :  * Return a substring starting at the specified position.
    2989             :  * Cloned from text_substr and modified as required.
    2990             :  *
    2991             :  * Input:
    2992             :  *  - string
    2993             :  *  - starting position (is one-based)
    2994             :  *  - string length (optional)
    2995             :  *
    2996             :  * If the starting position is zero or less, then return from the start of the string
    2997             :  * adjusting the length to be consistent with the "negative start" per SQL.
    2998             :  * If the length is less than zero, an ERROR is thrown. If no third argument
    2999             :  * (length) is provided, the length to the end of the string is assumed.
    3000             :  */
    3001             : Datum
    3002          86 : bytea_substr(PG_FUNCTION_ARGS)
    3003             : {
    3004          86 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    3005             :                                       PG_GETARG_INT32(1),
    3006             :                                       PG_GETARG_INT32(2),
    3007             :                                       false));
    3008             : }
    3009             : 
    3010             : /*
    3011             :  * bytea_substr_no_len -
    3012             :  *    Wrapper to avoid opr_sanity failure due to
    3013             :  *    one function accepting a different number of args.
    3014             :  */
    3015             : Datum
    3016        3900 : bytea_substr_no_len(PG_FUNCTION_ARGS)
    3017             : {
    3018        3900 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    3019             :                                       PG_GETARG_INT32(1),
    3020             :                                       -1,
    3021             :                                       true));
    3022             : }
    3023             : 
    3024             : static bytea *
    3025        4022 : bytea_substring(Datum str,
    3026             :                 int S,
    3027             :                 int L,
    3028             :                 bool length_not_specified)
    3029             : {
    3030             :     int32       S1;             /* adjusted start position */
    3031             :     int32       L1;             /* adjusted substring length */
    3032             :     int32       E;              /* end position */
    3033             : 
    3034             :     /*
    3035             :      * The logic here should generally match text_substring().
    3036             :      */
    3037        4022 :     S1 = Max(S, 1);
    3038             : 
    3039        4022 :     if (length_not_specified)
    3040             :     {
    3041             :         /*
    3042             :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
    3043             :          * end of the string if we pass it a negative value for length.
    3044             :          */
    3045        3918 :         L1 = -1;
    3046             :     }
    3047         104 :     else if (L < 0)
    3048             :     {
    3049             :         /* SQL99 says to throw an error for E < S, i.e., negative length */
    3050          12 :         ereport(ERROR,
    3051             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    3052             :                  errmsg("negative substring length not allowed")));
    3053             :         L1 = -1;                /* silence stupider compilers */
    3054             :     }
    3055          92 :     else if (pg_add_s32_overflow(S, L, &E))
    3056             :     {
    3057             :         /*
    3058             :          * L could be large enough for S + L to overflow, in which case the
    3059             :          * substring must run to end of string.
    3060             :          */
    3061           6 :         L1 = -1;
    3062             :     }
    3063             :     else
    3064             :     {
    3065             :         /*
    3066             :          * A zero or negative value for the end position can happen if the
    3067             :          * start was negative or one. SQL99 says to return a zero-length
    3068             :          * string.
    3069             :          */
    3070          86 :         if (E < 1)
    3071           0 :             return PG_STR_GET_BYTEA("");
    3072             : 
    3073          86 :         L1 = E - S1;
    3074             :     }
    3075             : 
    3076             :     /*
    3077             :      * If the start position is past the end of the string, SQL99 says to
    3078             :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
    3079             :      * us.  We need only convert S1 to zero-based starting position.
    3080             :      */
    3081        4010 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
    3082             : }
    3083             : 
    3084             : /*
    3085             :  * byteaoverlay
    3086             :  *  Replace specified substring of first string with second
    3087             :  *
    3088             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    3089             :  * This code is a direct implementation of what the standard says.
    3090             :  */
    3091             : Datum
    3092           6 : byteaoverlay(PG_FUNCTION_ARGS)
    3093             : {
    3094           6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3095           6 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3096           6 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3097           6 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    3098             : 
    3099           6 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3100             : }
    3101             : 
    3102             : Datum
    3103          12 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
    3104             : {
    3105          12 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3106          12 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3107          12 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3108             :     int         sl;
    3109             : 
    3110          12 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
    3111          12 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3112             : }
    3113             : 
    3114             : static bytea *
    3115          18 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
    3116             : {
    3117             :     bytea      *result;
    3118             :     bytea      *s1;
    3119             :     bytea      *s2;
    3120             :     int         sp_pl_sl;
    3121             : 
    3122             :     /*
    3123             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    3124             :      * "substring length" error because that's what should be expected
    3125             :      * according to the spec's definition of OVERLAY().
    3126             :      */
    3127          18 :     if (sp <= 0)
    3128           0 :         ereport(ERROR,
    3129             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    3130             :                  errmsg("negative substring length not allowed")));
    3131          18 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    3132           0 :         ereport(ERROR,
    3133             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    3134             :                  errmsg("integer out of range")));
    3135             : 
    3136          18 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
    3137          18 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    3138          18 :     result = bytea_catenate(s1, t2);
    3139          18 :     result = bytea_catenate(result, s2);
    3140             : 
    3141          18 :     return result;
    3142             : }
    3143             : 
    3144             : /*
    3145             :  * bit_count
    3146             :  */
    3147             : Datum
    3148           6 : bytea_bit_count(PG_FUNCTION_ARGS)
    3149             : {
    3150           6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3151             : 
    3152           6 :     PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
    3153             : }
    3154             : 
    3155             : /*
    3156             :  * byteapos -
    3157             :  *    Return the position of the specified substring.
    3158             :  *    Implements the SQL POSITION() function.
    3159             :  * Cloned from textpos and modified as required.
    3160             :  */
    3161             : Datum
    3162           0 : byteapos(PG_FUNCTION_ARGS)
    3163             : {
    3164           0 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3165           0 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3166             :     int         pos;
    3167             :     int         px,
    3168             :                 p;
    3169             :     int         len1,
    3170             :                 len2;
    3171             :     char       *p1,
    3172             :                *p2;
    3173             : 
    3174           0 :     len1 = VARSIZE_ANY_EXHDR(t1);
    3175           0 :     len2 = VARSIZE_ANY_EXHDR(t2);
    3176             : 
    3177           0 :     if (len2 <= 0)
    3178           0 :         PG_RETURN_INT32(1);     /* result for empty pattern */
    3179             : 
    3180           0 :     p1 = VARDATA_ANY(t1);
    3181           0 :     p2 = VARDATA_ANY(t2);
    3182             : 
    3183           0 :     pos = 0;
    3184           0 :     px = (len1 - len2);
    3185           0 :     for (p = 0; p <= px; p++)
    3186             :     {
    3187           0 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
    3188             :         {
    3189           0 :             pos = p + 1;
    3190           0 :             break;
    3191             :         };
    3192           0 :         p1++;
    3193             :     };
    3194             : 
    3195           0 :     PG_RETURN_INT32(pos);
    3196             : }
    3197             : 
    3198             : /*-------------------------------------------------------------
    3199             :  * byteaGetByte
    3200             :  *
    3201             :  * this routine treats "bytea" as an array of bytes.
    3202             :  * It returns the Nth byte (a number between 0 and 255).
    3203             :  *-------------------------------------------------------------
    3204             :  */
    3205             : Datum
    3206          60 : byteaGetByte(PG_FUNCTION_ARGS)
    3207             : {
    3208          60 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3209          60 :     int32       n = PG_GETARG_INT32(1);
    3210             :     int         len;
    3211             :     int         byte;
    3212             : 
    3213          60 :     len = VARSIZE_ANY_EXHDR(v);
    3214             : 
    3215          60 :     if (n < 0 || n >= len)
    3216           6 :         ereport(ERROR,
    3217             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3218             :                  errmsg("index %d out of valid range, 0..%d",
    3219             :                         n, len - 1)));
    3220             : 
    3221          54 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
    3222             : 
    3223          54 :     PG_RETURN_INT32(byte);
    3224             : }
    3225             : 
    3226             : /*-------------------------------------------------------------
    3227             :  * byteaGetBit
    3228             :  *
    3229             :  * This routine treats a "bytea" type like an array of bits.
    3230             :  * It returns the value of the Nth bit (0 or 1).
    3231             :  *
    3232             :  *-------------------------------------------------------------
    3233             :  */
    3234             : Datum
    3235          12 : byteaGetBit(PG_FUNCTION_ARGS)
    3236             : {
    3237          12 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3238          12 :     int64       n = PG_GETARG_INT64(1);
    3239             :     int         byteNo,
    3240             :                 bitNo;
    3241             :     int         len;
    3242             :     int         byte;
    3243             : 
    3244          12 :     len = VARSIZE_ANY_EXHDR(v);
    3245             : 
    3246          12 :     if (n < 0 || n >= (int64) len * 8)
    3247           6 :         ereport(ERROR,
    3248             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3249             :                  errmsg("index %lld out of valid range, 0..%lld",
    3250             :                         (long long) n, (long long) len * 8 - 1)));
    3251             : 
    3252             :     /* n/8 is now known < len, so safe to cast to int */
    3253           6 :     byteNo = (int) (n / 8);
    3254           6 :     bitNo = (int) (n % 8);
    3255             : 
    3256           6 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
    3257             : 
    3258           6 :     if (byte & (1 << bitNo))
    3259           6 :         PG_RETURN_INT32(1);
    3260             :     else
    3261           0 :         PG_RETURN_INT32(0);
    3262             : }
    3263             : 
    3264             : /*-------------------------------------------------------------
    3265             :  * byteaSetByte
    3266             :  *
    3267             :  * Given an instance of type 'bytea' creates a new one with
    3268             :  * the Nth byte set to the given value.
    3269             :  *
    3270             :  *-------------------------------------------------------------
    3271             :  */
    3272             : Datum
    3273          12 : byteaSetByte(PG_FUNCTION_ARGS)
    3274             : {
    3275          12 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3276          12 :     int32       n = PG_GETARG_INT32(1);
    3277          12 :     int32       newByte = PG_GETARG_INT32(2);
    3278             :     int         len;
    3279             : 
    3280          12 :     len = VARSIZE(res) - VARHDRSZ;
    3281             : 
    3282          12 :     if (n < 0 || n >= len)
    3283           6 :         ereport(ERROR,
    3284             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3285             :                  errmsg("index %d out of valid range, 0..%d",
    3286             :                         n, len - 1)));
    3287             : 
    3288             :     /*
    3289             :      * Now set the byte.
    3290             :      */
    3291           6 :     ((unsigned char *) VARDATA(res))[n] = newByte;
    3292             : 
    3293           6 :     PG_RETURN_BYTEA_P(res);
    3294             : }
    3295             : 
    3296             : /*-------------------------------------------------------------
    3297             :  * byteaSetBit
    3298             :  *
    3299             :  * Given an instance of type 'bytea' creates a new one with
    3300             :  * the Nth bit set to the given value.
    3301             :  *
    3302             :  *-------------------------------------------------------------
    3303             :  */
    3304             : Datum
    3305          12 : byteaSetBit(PG_FUNCTION_ARGS)
    3306             : {
    3307          12 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3308          12 :     int64       n = PG_GETARG_INT64(1);
    3309          12 :     int32       newBit = PG_GETARG_INT32(2);
    3310             :     int         len;
    3311             :     int         oldByte,
    3312             :                 newByte;
    3313             :     int         byteNo,
    3314             :                 bitNo;
    3315             : 
    3316          12 :     len = VARSIZE(res) - VARHDRSZ;
    3317             : 
    3318          12 :     if (n < 0 || n >= (int64) len * 8)
    3319           6 :         ereport(ERROR,
    3320             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3321             :                  errmsg("index %lld out of valid range, 0..%lld",
    3322             :                         (long long) n, (long long) len * 8 - 1)));
    3323             : 
    3324             :     /* n/8 is now known < len, so safe to cast to int */
    3325           6 :     byteNo = (int) (n / 8);
    3326           6 :     bitNo = (int) (n % 8);
    3327             : 
    3328             :     /*
    3329             :      * sanity check!
    3330             :      */
    3331           6 :     if (newBit != 0 && newBit != 1)
    3332           0 :         ereport(ERROR,
    3333             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    3334             :                  errmsg("new bit must be 0 or 1")));
    3335             : 
    3336             :     /*
    3337             :      * Update the byte.
    3338             :      */
    3339           6 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
    3340             : 
    3341           6 :     if (newBit == 0)
    3342           6 :         newByte = oldByte & (~(1 << bitNo));
    3343             :     else
    3344           0 :         newByte = oldByte | (1 << bitNo);
    3345             : 
    3346           6 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
    3347             : 
    3348           6 :     PG_RETURN_BYTEA_P(res);
    3349             : }
    3350             : 
    3351             : 
    3352             : /* text_name()
    3353             :  * Converts a text type to a Name type.
    3354             :  */
    3355             : Datum
    3356       30522 : text_name(PG_FUNCTION_ARGS)
    3357             : {
    3358       30522 :     text       *s = PG_GETARG_TEXT_PP(0);
    3359             :     Name        result;
    3360             :     int         len;
    3361             : 
    3362       30522 :     len = VARSIZE_ANY_EXHDR(s);
    3363             : 
    3364             :     /* Truncate oversize input */
    3365       30522 :     if (len >= NAMEDATALEN)
    3366           6 :         len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
    3367             : 
    3368             :     /* We use palloc0 here to ensure result is zero-padded */
    3369       30522 :     result = (Name) palloc0(NAMEDATALEN);
    3370       30522 :     memcpy(NameStr(*result), VARDATA_ANY(s), len);
    3371             : 
    3372       30522 :     PG_RETURN_NAME(result);
    3373             : }
    3374             : 
    3375             : /* name_text()
    3376             :  * Converts a Name type to a text type.
    3377             :  */
    3378             : Datum
    3379     1042242 : name_text(PG_FUNCTION_ARGS)
    3380             : {
    3381     1042242 :     Name        s = PG_GETARG_NAME(0);
    3382             : 
    3383     1042242 :     PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
    3384             : }
    3385             : 
    3386             : 
    3387             : /*
    3388             :  * textToQualifiedNameList - convert a text object to list of names
    3389             :  *
    3390             :  * This implements the input parsing needed by nextval() and other
    3391             :  * functions that take a text parameter representing a qualified name.
    3392             :  * We split the name at dots, downcase if not double-quoted, and
    3393             :  * truncate names if they're too long.
    3394             :  */
    3395             : List *
    3396        1374 : textToQualifiedNameList(text *textval)
    3397             : {
    3398             :     char       *rawname;
    3399        1374 :     List       *result = NIL;
    3400             :     List       *namelist;
    3401             :     ListCell   *l;
    3402             : 
    3403             :     /* Convert to C string (handles possible detoasting). */
    3404             :     /* Note we rely on being able to modify rawname below. */
    3405        1374 :     rawname = text_to_cstring(textval);
    3406             : 
    3407        1374 :     if (!SplitIdentifierString(rawname, '.', &namelist))
    3408           0 :         ereport(ERROR,
    3409             :                 (errcode(ERRCODE_INVALID_NAME),
    3410             :                  errmsg("invalid name syntax")));
    3411             : 
    3412        1374 :     if (namelist == NIL)
    3413           0 :         ereport(ERROR,
    3414             :                 (errcode(ERRCODE_INVALID_NAME),
    3415             :                  errmsg("invalid name syntax")));
    3416             : 
    3417        2858 :     foreach(l, namelist)
    3418             :     {
    3419        1484 :         char       *curname = (char *) lfirst(l);
    3420             : 
    3421        1484 :         result = lappend(result, makeString(pstrdup(curname)));
    3422             :     }
    3423             : 
    3424        1374 :     pfree(rawname);
    3425        1374 :     list_free(namelist);
    3426             : 
    3427        1374 :     return result;
    3428             : }
    3429             : 
    3430             : /*
    3431             :  * SplitIdentifierString --- parse a string containing identifiers
    3432             :  *
    3433             :  * This is the guts of textToQualifiedNameList, and is exported for use in
    3434             :  * other situations such as parsing GUC variables.  In the GUC case, it's
    3435             :  * important to avoid memory leaks, so the API is designed to minimize the
    3436             :  * amount of stuff that needs to be allocated and freed.
    3437             :  *
    3438             :  * Inputs:
    3439             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3440             :  *             been modified to contain the separated identifiers.
    3441             :  *  separator: the separator punctuation expected between identifiers
    3442             :  *             (typically '.' or ',').  Whitespace may also appear around
    3443             :  *             identifiers.
    3444             :  * Outputs:
    3445             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3446             :  *            rawstring.  Caller should list_free() this even on error return.
    3447             :  *
    3448             :  * Returns true if okay, false if there is a syntax error in the string.
    3449             :  *
    3450             :  * Note that an empty string is considered okay here, though not in
    3451             :  * textToQualifiedNameList.
    3452             :  */
    3453             : bool
    3454      144856 : SplitIdentifierString(char *rawstring, char separator,
    3455             :                       List **namelist)
    3456             : {
    3457      144856 :     char       *nextp = rawstring;
    3458      144856 :     bool        done = false;
    3459             : 
    3460      144856 :     *namelist = NIL;
    3461             : 
    3462      144862 :     while (scanner_isspace(*nextp))
    3463           6 :         nextp++;                /* skip leading whitespace */
    3464             : 
    3465      144856 :     if (*nextp == '\0')
    3466       19692 :         return true;            /* allow empty string */
    3467             : 
    3468             :     /* At the top of the loop, we are at start of a new identifier. */
    3469             :     do
    3470             :     {
    3471             :         char       *curname;
    3472             :         char       *endp;
    3473             : 
    3474      200614 :         if (*nextp == '"')
    3475             :         {
    3476             :             /* Quoted name --- collapse quote-quote pairs, no downcasing */
    3477       30764 :             curname = nextp + 1;
    3478             :             for (;;)
    3479             :             {
    3480       30768 :                 endp = strchr(nextp + 1, '"');
    3481       30766 :                 if (endp == NULL)
    3482           0 :                     return false;   /* mismatched quotes */
    3483       30766 :                 if (endp[1] != '"')
    3484       30764 :                     break;      /* found end of quoted name */
    3485             :                 /* Collapse adjacent quotes into one quote, and look again */
    3486           2 :                 memmove(endp, endp + 1, strlen(endp));
    3487           2 :                 nextp = endp;
    3488             :             }
    3489             :             /* endp now points at the terminating quote */
    3490       30764 :             nextp = endp + 1;
    3491             :         }
    3492             :         else
    3493             :         {
    3494             :             /* Unquoted name --- extends to separator or whitespace */
    3495             :             char       *downname;
    3496             :             int         len;
    3497             : 
    3498      169850 :             curname = nextp;
    3499     1474152 :             while (*nextp && *nextp != separator &&
    3500     1304304 :                    !scanner_isspace(*nextp))
    3501     1304302 :                 nextp++;
    3502      169850 :             endp = nextp;
    3503      169850 :             if (curname == nextp)
    3504           0 :                 return false;   /* empty unquoted name not allowed */
    3505             : 
    3506             :             /*
    3507             :              * Downcase the identifier, using same code as main lexer does.
    3508             :              *
    3509             :              * XXX because we want to overwrite the input in-place, we cannot
    3510             :              * support a downcasing transformation that increases the string
    3511             :              * length.  This is not a problem given the current implementation
    3512             :              * of downcase_truncate_identifier, but we'll probably have to do
    3513             :              * something about this someday.
    3514             :              */
    3515      169850 :             len = endp - curname;
    3516      169850 :             downname = downcase_truncate_identifier(curname, len, false);
    3517             :             Assert(strlen(downname) <= len);
    3518      169850 :             strncpy(curname, downname, len);    /* strncpy is required here */
    3519      169850 :             pfree(downname);
    3520             :         }
    3521             : 
    3522      200616 :         while (scanner_isspace(*nextp))
    3523           2 :             nextp++;            /* skip trailing whitespace */
    3524             : 
    3525      200614 :         if (*nextp == separator)
    3526             :         {
    3527       75450 :             nextp++;
    3528      121802 :             while (scanner_isspace(*nextp))
    3529       46352 :                 nextp++;        /* skip leading whitespace for next */
    3530             :             /* we expect another name, so done remains false */
    3531             :         }
    3532      125164 :         else if (*nextp == '\0')
    3533      125162 :             done = true;
    3534             :         else
    3535           2 :             return false;       /* invalid syntax */
    3536             : 
    3537             :         /* Now safe to overwrite separator with a null */
    3538      200612 :         *endp = '\0';
    3539             : 
    3540             :         /* Truncate name if it's overlength */
    3541      200612 :         truncate_identifier(curname, strlen(curname), false);
    3542             : 
    3543             :         /*
    3544             :          * Finished isolating current name --- add it to list
    3545             :          */
    3546      200612 :         *namelist = lappend(*namelist, curname);
    3547             : 
    3548             :         /* Loop back if we didn't reach end of string */
    3549      200612 :     } while (!done);
    3550             : 
    3551      125162 :     return true;
    3552             : }
    3553             : 
    3554             : 
    3555             : /*
    3556             :  * SplitDirectoriesString --- parse a string containing file/directory names
    3557             :  *
    3558             :  * This works fine on file names too; the function name is historical.
    3559             :  *
    3560             :  * This is similar to SplitIdentifierString, except that the parsing
    3561             :  * rules are meant to handle pathnames instead of identifiers: there is
    3562             :  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
    3563             :  * and we apply canonicalize_path() to each extracted string.  Because of the
    3564             :  * last, the returned strings are separately palloc'd rather than being
    3565             :  * pointers into rawstring --- but we still scribble on rawstring.
    3566             :  *
    3567             :  * Inputs:
    3568             :  *  rawstring: the input string; must be modifiable!
    3569             :  *  separator: the separator punctuation expected between directories
    3570             :  *             (typically ',' or ';').  Whitespace may also appear around
    3571             :  *             directories.
    3572             :  * Outputs:
    3573             :  *  namelist: filled with a palloc'd list of directory names.
    3574             :  *            Caller should list_free_deep() this even on error return.
    3575             :  *
    3576             :  * Returns true if okay, false if there is a syntax error in the string.
    3577             :  *
    3578             :  * Note that an empty string is considered okay here.
    3579             :  */
    3580             : bool
    3581        1242 : SplitDirectoriesString(char *rawstring, char separator,
    3582             :                        List **namelist)
    3583             : {
    3584        1242 :     char       *nextp = rawstring;
    3585        1242 :     bool        done = false;
    3586             : 
    3587        1242 :     *namelist = NIL;
    3588             : 
    3589        1242 :     while (scanner_isspace(*nextp))
    3590           0 :         nextp++;                /* skip leading whitespace */
    3591             : 
    3592        1242 :     if (*nextp == '\0')
    3593           2 :         return true;            /* allow empty string */
    3594             : 
    3595             :     /* At the top of the loop, we are at start of a new directory. */
    3596             :     do
    3597             :     {
    3598             :         char       *curname;
    3599             :         char       *endp;
    3600             : 
    3601        1240 :         if (*nextp == '"')
    3602             :         {
    3603             :             /* Quoted name --- collapse quote-quote pairs */
    3604           0 :             curname = nextp + 1;
    3605             :             for (;;)
    3606             :             {
    3607           0 :                 endp = strchr(nextp + 1, '"');
    3608           0 :                 if (endp == NULL)
    3609           0 :                     return false;   /* mismatched quotes */
    3610           0 :                 if (endp[1] != '"')
    3611           0 :                     break;      /* found end of quoted name */
    3612             :                 /* Collapse adjacent quotes into one quote, and look again */
    3613           0 :                 memmove(endp, endp + 1, strlen(endp));
    3614           0 :                 nextp = endp;
    3615             :             }
    3616             :             /* endp now points at the terminating quote */
    3617           0 :             nextp = endp + 1;
    3618             :         }
    3619             :         else
    3620             :         {
    3621             :             /* Unquoted name --- extends to separator or end of string */
    3622        1240 :             curname = endp = nextp;
    3623       20948 :             while (*nextp && *nextp != separator)
    3624             :             {
    3625             :                 /* trailing whitespace should not be included in name */
    3626       19708 :                 if (!scanner_isspace(*nextp))
    3627       19708 :                     endp = nextp + 1;
    3628       19708 :                 nextp++;
    3629             :             }
    3630        1240 :             if (curname == endp)
    3631           0 :                 return false;   /* empty unquoted name not allowed */
    3632             :         }
    3633             : 
    3634        1240 :         while (scanner_isspace(*nextp))
    3635           0 :             nextp++;            /* skip trailing whitespace */
    3636             : 
    3637        1240 :         if (*nextp == separator)
    3638             :         {
    3639           0 :             nextp++;
    3640           0 :             while (scanner_isspace(*nextp))
    3641           0 :                 nextp++;        /* skip leading whitespace for next */
    3642             :             /* we expect another name, so done remains false */
    3643             :         }
    3644        1240 :         else if (*nextp == '\0')
    3645        1240 :             done = true;
    3646             :         else
    3647           0 :             return false;       /* invalid syntax */
    3648             : 
    3649             :         /* Now safe to overwrite separator with a null */
    3650        1240 :         *endp = '\0';
    3651             : 
    3652             :         /* Truncate path if it's overlength */
    3653        1240 :         if (strlen(curname) >= MAXPGPATH)
    3654           0 :             curname[MAXPGPATH - 1] = '\0';
    3655             : 
    3656             :         /*
    3657             :          * Finished isolating current name --- add it to list
    3658             :          */
    3659        1240 :         curname = pstrdup(curname);
    3660        1240 :         canonicalize_path(curname);
    3661        1240 :         *namelist = lappend(*namelist, curname);
    3662             : 
    3663             :         /* Loop back if we didn't reach end of string */
    3664        1240 :     } while (!done);
    3665             : 
    3666        1240 :     return true;
    3667             : }
    3668             : 
    3669             : 
    3670             : /*
    3671             :  * SplitGUCList --- parse a string containing identifiers or file names
    3672             :  *
    3673             :  * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
    3674             :  * presuming whether the elements will be taken as identifiers or file names.
    3675             :  * We assume the input has already been through flatten_set_variable_args(),
    3676             :  * so that we need never downcase (if appropriate, that was done already).
    3677             :  * Nor do we ever truncate, since we don't know the correct max length.
    3678             :  * We disallow embedded whitespace for simplicity (it shouldn't matter,
    3679             :  * because any embedded whitespace should have led to double-quoting).
    3680             :  * Otherwise the API is identical to SplitIdentifierString.
    3681             :  *
    3682             :  * XXX it's annoying to have so many copies of this string-splitting logic.
    3683             :  * However, it's not clear that having one function with a bunch of option
    3684             :  * flags would be much better.
    3685             :  *
    3686             :  * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
    3687             :  * Be sure to update that if you have to change this.
    3688             :  *
    3689             :  * Inputs:
    3690             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3691             :  *             been modified to contain the separated identifiers.
    3692             :  *  separator: the separator punctuation expected between identifiers
    3693             :  *             (typically '.' or ',').  Whitespace may also appear around
    3694             :  *             identifiers.
    3695             :  * Outputs:
    3696             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3697             :  *            rawstring.  Caller should list_free() this even on error return.
    3698             :  *
    3699             :  * Returns true if okay, false if there is a syntax error in the string.
    3700             :  */
    3701             : bool
    3702        4900 : SplitGUCList(char *rawstring, char separator,
    3703             :              List **namelist)
    3704             : {
    3705        4900 :     char       *nextp = rawstring;
    3706        4900 :     bool        done = false;
    3707             : 
    3708        4900 :     *namelist = NIL;
    3709             : 
    3710        4900 :     while (scanner_isspace(*nextp))
    3711           0 :         nextp++;                /* skip leading whitespace */
    3712             : 
    3713        4900 :     if (*nextp == '\0')
    3714        4834 :         return true;            /* allow empty string */
    3715             : 
    3716             :     /* At the top of the loop, we are at start of a new identifier. */
    3717             :     do
    3718             :     {
    3719             :         char       *curname;
    3720             :         char       *endp;
    3721             : 
    3722          92 :         if (*nextp == '"')
    3723             :         {
    3724             :             /* Quoted name --- collapse quote-quote pairs */
    3725          24 :             curname = nextp + 1;
    3726             :             for (;;)
    3727             :             {
    3728          36 :                 endp = strchr(nextp + 1, '"');
    3729          30 :                 if (endp == NULL)
    3730           0 :                     return false;   /* mismatched quotes */
    3731          30 :                 if (endp[1] != '"')
    3732          24 :                     break;      /* found end of quoted name */
    3733             :                 /* Collapse adjacent quotes into one quote, and look again */
    3734           6 :                 memmove(endp, endp + 1, strlen(endp));
    3735           6 :                 nextp = endp;
    3736             :             }
    3737             :             /* endp now points at the terminating quote */
    3738          24 :             nextp = endp + 1;
    3739             :         }
    3740             :         else
    3741             :         {
    3742             :             /* Unquoted name --- extends to separator or whitespace */
    3743          68 :             curname = nextp;
    3744         638 :             while (*nextp && *nextp != separator &&
    3745         570 :                    !scanner_isspace(*nextp))
    3746         570 :                 nextp++;
    3747          68 :             endp = nextp;
    3748          68 :             if (curname == nextp)
    3749           0 :                 return false;   /* empty unquoted name not allowed */
    3750             :         }
    3751             : 
    3752          92 :         while (scanner_isspace(*nextp))
    3753           0 :             nextp++;            /* skip trailing whitespace */
    3754             : 
    3755          92 :         if (*nextp == separator)
    3756             :         {
    3757          26 :             nextp++;
    3758          44 :             while (scanner_isspace(*nextp))
    3759          18 :                 nextp++;        /* skip leading whitespace for next */
    3760             :             /* we expect another name, so done remains false */
    3761             :         }
    3762          66 :         else if (*nextp == '\0')
    3763          66 :             done = true;
    3764             :         else
    3765           0 :             return false;       /* invalid syntax */
    3766             : 
    3767             :         /* Now safe to overwrite separator with a null */
    3768          92 :         *endp = '\0';
    3769             : 
    3770             :         /*
    3771             :          * Finished isolating current name --- add it to list
    3772             :          */
    3773          92 :         *namelist = lappend(*namelist, curname);
    3774             : 
    3775             :         /* Loop back if we didn't reach end of string */
    3776          92 :     } while (!done);
    3777             : 
    3778          66 :     return true;
    3779             : }
    3780             : 
    3781             : 
    3782             : /*****************************************************************************
    3783             :  *  Comparison Functions used for bytea
    3784             :  *
    3785             :  * Note: btree indexes need these routines not to leak memory; therefore,
    3786             :  * be careful to free working copies of toasted datums.  Most places don't
    3787             :  * need to be so careful.
    3788             :  *****************************************************************************/
    3789             : 
    3790             : Datum
    3791       10378 : byteaeq(PG_FUNCTION_ARGS)
    3792             : {
    3793       10378 :     Datum       arg1 = PG_GETARG_DATUM(0);
    3794       10378 :     Datum       arg2 = PG_GETARG_DATUM(1);
    3795             :     bool        result;
    3796             :     Size        len1,
    3797             :                 len2;
    3798             : 
    3799             :     /*
    3800             :      * We can use a fast path for unequal lengths, which might save us from
    3801             :      * having to detoast one or both values.
    3802             :      */
    3803       10378 :     len1 = toast_raw_datum_size(arg1);
    3804       10378 :     len2 = toast_raw_datum_size(arg2);
    3805       10378 :     if (len1 != len2)
    3806        4308 :         result = false;
    3807             :     else
    3808             :     {
    3809        6070 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    3810        6070 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    3811             : 
    3812        6070 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    3813             :                          len1 - VARHDRSZ) == 0);
    3814             : 
    3815        6070 :         PG_FREE_IF_COPY(barg1, 0);
    3816        6070 :         PG_FREE_IF_COPY(barg2, 1);
    3817             :     }
    3818             : 
    3819       10378 :     PG_RETURN_BOOL(result);
    3820             : }
    3821             : 
    3822             : Datum
    3823         768 : byteane(PG_FUNCTION_ARGS)
    3824             : {
    3825         768 :     Datum       arg1 = PG_GETARG_DATUM(0);
    3826         768 :     Datum       arg2 = PG_GETARG_DATUM(1);
    3827             :     bool        result;
    3828             :     Size        len1,
    3829             :                 len2;
    3830             : 
    3831             :     /*
    3832             :      * We can use a fast path for unequal lengths, which might save us from
    3833             :      * having to detoast one or both values.
    3834             :      */
    3835         768 :     len1 = toast_raw_datum_size(arg1);
    3836         768 :     len2 = toast_raw_datum_size(arg2);
    3837         768 :     if (len1 != len2)
    3838           0 :         result = true;
    3839             :     else
    3840             :     {
    3841         768 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    3842         768 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    3843             : 
    3844         768 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    3845             :                          len1 - VARHDRSZ) != 0);
    3846             : 
    3847         768 :         PG_FREE_IF_COPY(barg1, 0);
    3848         768 :         PG_FREE_IF_COPY(barg2, 1);
    3849             :     }
    3850             : 
    3851         768 :     PG_RETURN_BOOL(result);
    3852             : }
    3853             : 
    3854             : Datum
    3855        8316 : bytealt(PG_FUNCTION_ARGS)
    3856             : {
    3857        8316 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3858        8316 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3859             :     int         len1,
    3860             :                 len2;
    3861             :     int         cmp;
    3862             : 
    3863        8316 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3864        8316 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3865             : 
    3866        8316 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3867             : 
    3868        8316 :     PG_FREE_IF_COPY(arg1, 0);
    3869        8316 :     PG_FREE_IF_COPY(arg2, 1);
    3870             : 
    3871        8316 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
    3872             : }
    3873             : 
    3874             : Datum
    3875        6356 : byteale(PG_FUNCTION_ARGS)
    3876             : {
    3877        6356 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3878        6356 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3879             :     int         len1,
    3880             :                 len2;
    3881             :     int         cmp;
    3882             : 
    3883        6356 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3884        6356 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3885             : 
    3886        6356 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3887             : 
    3888        6356 :     PG_FREE_IF_COPY(arg1, 0);
    3889        6356 :     PG_FREE_IF_COPY(arg2, 1);
    3890             : 
    3891        6356 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
    3892             : }
    3893             : 
    3894             : Datum
    3895        6228 : byteagt(PG_FUNCTION_ARGS)
    3896             : {
    3897        6228 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3898        6228 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3899             :     int         len1,
    3900             :                 len2;
    3901             :     int         cmp;
    3902             : 
    3903        6228 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3904        6228 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3905             : 
    3906        6228 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3907             : 
    3908        6228 :     PG_FREE_IF_COPY(arg1, 0);
    3909        6228 :     PG_FREE_IF_COPY(arg2, 1);
    3910             : 
    3911        6228 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
    3912             : }
    3913             : 
    3914             : Datum
    3915        5010 : byteage(PG_FUNCTION_ARGS)
    3916             : {
    3917        5010 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3918        5010 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3919             :     int         len1,
    3920             :                 len2;
    3921             :     int         cmp;
    3922             : 
    3923        5010 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3924        5010 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3925             : 
    3926        5010 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3927             : 
    3928        5010 :     PG_FREE_IF_COPY(arg1, 0);
    3929        5010 :     PG_FREE_IF_COPY(arg2, 1);
    3930             : 
    3931        5010 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
    3932             : }
    3933             : 
    3934             : Datum
    3935       87600 : byteacmp(PG_FUNCTION_ARGS)
    3936             : {
    3937       87600 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3938       87600 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3939             :     int         len1,
    3940             :                 len2;
    3941             :     int         cmp;
    3942             : 
    3943       87600 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3944       87600 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3945             : 
    3946       87600 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3947       87600 :     if ((cmp == 0) && (len1 != len2))
    3948       14708 :         cmp = (len1 < len2) ? -1 : 1;
    3949             : 
    3950       87600 :     PG_FREE_IF_COPY(arg1, 0);
    3951       87600 :     PG_FREE_IF_COPY(arg2, 1);
    3952             : 
    3953       87600 :     PG_RETURN_INT32(cmp);
    3954             : }
    3955             : 
    3956             : Datum
    3957          40 : bytea_sortsupport(PG_FUNCTION_ARGS)
    3958             : {
    3959          40 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    3960             :     MemoryContext oldcontext;
    3961             : 
    3962          40 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    3963             : 
    3964             :     /* Use generic string SortSupport, forcing "C" collation */
    3965          40 :     varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
    3966             : 
    3967          40 :     MemoryContextSwitchTo(oldcontext);
    3968             : 
    3969          40 :     PG_RETURN_VOID();
    3970             : }
    3971             : 
    3972             : /*
    3973             :  * appendStringInfoText
    3974             :  *
    3975             :  * Append a text to str.
    3976             :  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
    3977             :  */
    3978             : static void
    3979     1681666 : appendStringInfoText(StringInfo str, const text *t)
    3980             : {
    3981     1681666 :     appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
    3982     1681666 : }
    3983             : 
    3984             : /*
    3985             :  * replace_text
    3986             :  * replace all occurrences of 'old_sub_str' in 'orig_str'
    3987             :  * with 'new_sub_str' to form 'new_str'
    3988             :  *
    3989             :  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
    3990             :  * otherwise returns 'new_str'
    3991             :  */
    3992             : Datum
    3993        2540 : replace_text(PG_FUNCTION_ARGS)
    3994             : {
    3995        2540 :     text       *src_text = PG_GETARG_TEXT_PP(0);
    3996        2540 :     text       *from_sub_text = PG_GETARG_TEXT_PP(1);
    3997        2540 :     text       *to_sub_text = PG_GETARG_TEXT_PP(2);
    3998             :     int         src_text_len;
    3999             :     int         from_sub_text_len;
    4000             :     TextPositionState state;
    4001             :     text       *ret_text;
    4002             :     int         chunk_len;
    4003             :     char       *curr_ptr;
    4004             :     char       *start_ptr;
    4005             :     StringInfoData str;
    4006             :     bool        found;
    4007             : 
    4008        2540 :     src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4009        2540 :     from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
    4010             : 
    4011             :     /* Return unmodified source string if empty source or pattern */
    4012        2540 :     if (src_text_len < 1 || from_sub_text_len < 1)
    4013             :     {
    4014           0 :         PG_RETURN_TEXT_P(src_text);
    4015             :     }
    4016             : 
    4017        2540 :     text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
    4018             : 
    4019        2540 :     found = text_position_next(&state);
    4020             : 
    4021             :     /* When the from_sub_text is not found, there is nothing to do. */
    4022        2540 :     if (!found)
    4023             :     {
    4024         780 :         text_position_cleanup(&state);
    4025         780 :         PG_RETURN_TEXT_P(src_text);
    4026             :     }
    4027        1760 :     curr_ptr = text_position_get_match_ptr(&state);
    4028        1760 :     start_ptr = VARDATA_ANY(src_text);
    4029             : 
    4030        1760 :     initStringInfo(&str);
    4031             : 
    4032             :     do
    4033             :     {
    4034        6268 :         CHECK_FOR_INTERRUPTS();
    4035             : 
    4036             :         /* copy the data skipped over by last text_position_next() */
    4037        6268 :         chunk_len = curr_ptr - start_ptr;
    4038        6268 :         appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4039             : 
    4040        6268 :         appendStringInfoText(&str, to_sub_text);
    4041             : 
    4042        6268 :         start_ptr = curr_ptr + from_sub_text_len;
    4043             : 
    4044        6268 :         found = text_position_next(&state);
    4045        6268 :         if (found)
    4046        4508 :             curr_ptr = text_position_get_match_ptr(&state);
    4047             :     }
    4048        6268 :     while (found);
    4049             : 
    4050             :     /* copy trailing data */
    4051        1760 :     chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4052        1760 :     appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4053             : 
    4054        1760 :     text_position_cleanup(&state);
    4055             : 
    4056        1760 :     ret_text = cstring_to_text_with_len(str.data, str.len);
    4057        1760 :     pfree(str.data);
    4058             : 
    4059        1760 :     PG_RETURN_TEXT_P(ret_text);
    4060             : }
    4061             : 
    4062             : /*
    4063             :  * check_replace_text_has_escape
    4064             :  *
    4065             :  * Returns 0 if text contains no backslashes that need processing.
    4066             :  * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
    4067             :  * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
    4068             :  */
    4069             : static int
    4070       10526 : check_replace_text_has_escape(const text *replace_text)
    4071             : {
    4072       10526 :     int         result = 0;
    4073       10526 :     const char *p = VARDATA_ANY(replace_text);
    4074       10526 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4075             : 
    4076       10570 :     while (p < p_end)
    4077             :     {
    4078             :         /* Find next escape char, if any. */
    4079        9272 :         p = memchr(p, '\\', p_end - p);
    4080        9272 :         if (p == NULL)
    4081        8594 :             break;
    4082         678 :         p++;
    4083             :         /* Note: a backslash at the end doesn't require extra processing. */
    4084         678 :         if (p < p_end)
    4085             :         {
    4086         678 :             if (*p >= '1' && *p <= '9')
    4087         634 :                 return 2;       /* Found a submatch specifier, so done */
    4088          44 :             result = 1;         /* Found some other sequence, keep looking */
    4089          44 :             p++;
    4090             :         }
    4091             :     }
    4092        9892 :     return result;
    4093             : }
    4094             : 
    4095             : /*
    4096             :  * appendStringInfoRegexpSubstr
    4097             :  *
    4098             :  * Append replace_text to str, substituting regexp back references for
    4099             :  * \n escapes.  start_ptr is the start of the match in the source string,
    4100             :  * at logical character position data_pos.
    4101             :  */
    4102             : static void
    4103         212 : appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
    4104             :                              regmatch_t *pmatch,
    4105             :                              char *start_ptr, int data_pos)
    4106             : {
    4107         212 :     const char *p = VARDATA_ANY(replace_text);
    4108         212 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4109             : 
    4110         526 :     while (p < p_end)
    4111             :     {
    4112         470 :         const char *chunk_start = p;
    4113             :         int         so;
    4114             :         int         eo;
    4115             : 
    4116             :         /* Find next escape char, if any. */
    4117         470 :         p = memchr(p, '\\', p_end - p);
    4118         470 :         if (p == NULL)
    4119         150 :             p = p_end;
    4120             : 
    4121             :         /* Copy the text we just scanned over, if any. */
    4122         470 :         if (p > chunk_start)
    4123         294 :             appendBinaryStringInfo(str, chunk_start, p - chunk_start);
    4124             : 
    4125             :         /* Done if at end of string, else advance over escape char. */
    4126         470 :         if (p >= p_end)
    4127         150 :             break;
    4128         320 :         p++;
    4129             : 
    4130         320 :         if (p >= p_end)
    4131             :         {
    4132             :             /* Escape at very end of input.  Treat same as unexpected char */
    4133           6 :             appendStringInfoChar(str, '\\');
    4134           6 :             break;
    4135             :         }
    4136             : 
    4137         314 :         if (*p >= '1' && *p <= '9')
    4138         254 :         {
    4139             :             /* Use the back reference of regexp. */
    4140         254 :             int         idx = *p - '0';
    4141             : 
    4142         254 :             so = pmatch[idx].rm_so;
    4143         254 :             eo = pmatch[idx].rm_eo;
    4144         254 :             p++;
    4145             :         }
    4146          60 :         else if (*p == '&')
    4147             :         {
    4148             :             /* Use the entire matched string. */
    4149          18 :             so = pmatch[0].rm_so;
    4150          18 :             eo = pmatch[0].rm_eo;
    4151          18 :             p++;
    4152             :         }
    4153          42 :         else if (*p == '\\')
    4154             :         {
    4155             :             /* \\ means transfer one \ to output. */
    4156          36 :             appendStringInfoChar(str, '\\');
    4157          36 :             p++;
    4158          36 :             continue;
    4159             :         }
    4160             :         else
    4161             :         {
    4162             :             /*
    4163             :              * If escape char is not followed by any expected char, just treat
    4164             :              * it as ordinary data to copy.  (XXX would it be better to throw
    4165             :              * an error?)
    4166             :              */
    4167           6 :             appendStringInfoChar(str, '\\');
    4168           6 :             continue;
    4169             :         }
    4170             : 
    4171         272 :         if (so >= 0 && eo >= 0)
    4172             :         {
    4173             :             /*
    4174             :              * Copy the text that is back reference of regexp.  Note so and eo
    4175             :              * are counted in characters not bytes.
    4176             :              */
    4177             :             char       *chunk_start;
    4178             :             int         chunk_len;
    4179             : 
    4180             :             Assert(so >= data_pos);
    4181         272 :             chunk_start = start_ptr;
    4182         272 :             chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
    4183         272 :             chunk_len = charlen_to_bytelen(chunk_start, eo - so);
    4184         272 :             appendBinaryStringInfo(str, chunk_start, chunk_len);
    4185             :         }
    4186             :     }
    4187         212 : }
    4188             : 
    4189             : /*
    4190             :  * replace_text_regexp
    4191             :  *
    4192             :  * replace substring(s) in src_text that match pattern with replace_text.
    4193             :  * The replace_text can contain backslash markers to substitute
    4194             :  * (parts of) the matched text.
    4195             :  *
    4196             :  * cflags: regexp compile flags.
    4197             :  * collation: collation to use.
    4198             :  * search_start: the character (not byte) offset in src_text at which to
    4199             :  * begin searching.
    4200             :  * n: if 0, replace all matches; if > 0, replace only the N'th match.
    4201             :  */
    4202             : text *
    4203       10526 : replace_text_regexp(text *src_text, text *pattern_text,
    4204             :                     text *replace_text,
    4205             :                     int cflags, Oid collation,
    4206             :                     int search_start, int n)
    4207             : {
    4208             :     text       *ret_text;
    4209             :     regex_t    *re;
    4210       10526 :     int         src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4211       10526 :     int         nmatches = 0;
    4212             :     StringInfoData buf;
    4213             :     regmatch_t  pmatch[10];     /* main match, plus \1 to \9 */
    4214       10526 :     int         nmatch = lengthof(pmatch);
    4215             :     pg_wchar   *data;
    4216             :     size_t      data_len;
    4217             :     int         data_pos;
    4218             :     char       *start_ptr;
    4219             :     int         escape_status;
    4220             : 
    4221       10526 :     initStringInfo(&buf);
    4222             : 
    4223             :     /* Convert data string to wide characters. */
    4224       10526 :     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
    4225       10526 :     data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
    4226             : 
    4227             :     /* Check whether replace_text has escapes, especially regexp submatches. */
    4228       10526 :     escape_status = check_replace_text_has_escape(replace_text);
    4229             : 
    4230             :     /* If no regexp submatches, we can use REG_NOSUB. */
    4231       10526 :     if (escape_status < 2)
    4232             :     {
    4233        9892 :         cflags |= REG_NOSUB;
    4234             :         /* Also tell pg_regexec we only want the whole-match location. */
    4235        9892 :         nmatch = 1;
    4236             :     }
    4237             : 
    4238             :     /* Prepare the regexp. */
    4239       10526 :     re = RE_compile_and_cache(pattern_text, cflags, collation);
    4240             : 
    4241             :     /* start_ptr points to the data_pos'th character of src_text */
    4242       10526 :     start_ptr = (char *) VARDATA_ANY(src_text);
    4243       10526 :     data_pos = 0;
    4244             : 
    4245       14504 :     while (search_start <= data_len)
    4246             :     {
    4247             :         int         regexec_result;
    4248             : 
    4249       14498 :         CHECK_FOR_INTERRUPTS();
    4250             : 
    4251       14498 :         regexec_result = pg_regexec(re,
    4252             :                                     data,
    4253             :                                     data_len,
    4254             :                                     search_start,
    4255             :                                     NULL,   /* no details */
    4256             :                                     nmatch,
    4257             :                                     pmatch,
    4258             :                                     0);
    4259             : 
    4260       14498 :         if (regexec_result == REG_NOMATCH)
    4261        9164 :             break;
    4262             : 
    4263        5334 :         if (regexec_result != REG_OKAY)
    4264             :         {
    4265             :             char        errMsg[100];
    4266             : 
    4267           0 :             pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
    4268           0 :             ereport(ERROR,
    4269             :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
    4270             :                      errmsg("regular expression failed: %s", errMsg)));
    4271             :         }
    4272             : 
    4273             :         /*
    4274             :          * Count matches, and decide whether to replace this match.
    4275             :          */
    4276        5334 :         nmatches++;
    4277        5334 :         if (n > 0 && nmatches != n)
    4278             :         {
    4279             :             /*
    4280             :              * No, so advance search_start, but not start_ptr/data_pos. (Thus,
    4281             :              * we treat the matched text as if it weren't matched, and copy it
    4282             :              * to the output later.)
    4283             :              */
    4284          60 :             search_start = pmatch[0].rm_eo;
    4285          60 :             if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4286           0 :                 search_start++;
    4287          60 :             continue;
    4288             :         }
    4289             : 
    4290             :         /*
    4291             :          * Copy the text to the left of the match position.  Note we are given
    4292             :          * character not byte indexes.
    4293             :          */
    4294        5274 :         if (pmatch[0].rm_so - data_pos > 0)
    4295             :         {
    4296             :             int         chunk_len;
    4297             : 
    4298        5102 :             chunk_len = charlen_to_bytelen(start_ptr,
    4299        5102 :                                            pmatch[0].rm_so - data_pos);
    4300        5102 :             appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4301             : 
    4302             :             /*
    4303             :              * Advance start_ptr over that text, to avoid multiple rescans of
    4304             :              * it if the replace_text contains multiple back-references.
    4305             :              */
    4306        5102 :             start_ptr += chunk_len;
    4307        5102 :             data_pos = pmatch[0].rm_so;
    4308             :         }
    4309             : 
    4310             :         /*
    4311             :          * Copy the replace_text, processing escapes if any are present.
    4312             :          */
    4313        5274 :         if (escape_status > 0)
    4314         212 :             appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
    4315             :                                          start_ptr, data_pos);
    4316             :         else
    4317        5062 :             appendStringInfoText(&buf, replace_text);
    4318             : 
    4319             :         /* Advance start_ptr and data_pos over the matched text. */
    4320       10548 :         start_ptr += charlen_to_bytelen(start_ptr,
    4321        5274 :                                         pmatch[0].rm_eo - data_pos);
    4322        5274 :         data_pos = pmatch[0].rm_eo;
    4323             : 
    4324             :         /*
    4325             :          * If we only want to replace one occurrence, we're done.
    4326             :          */
    4327        5274 :         if (n > 0)
    4328        1356 :             break;
    4329             : 
    4330             :         /*
    4331             :          * Advance search position.  Normally we start the next search at the
    4332             :          * end of the previous match; but if the match was of zero length, we
    4333             :          * have to advance by one character, or we'd just find the same match
    4334             :          * again.
    4335             :          */
    4336        3918 :         search_start = data_pos;
    4337        3918 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4338          12 :             search_start++;
    4339             :     }
    4340             : 
    4341             :     /*
    4342             :      * Copy the text to the right of the last match.
    4343             :      */
    4344       10526 :     if (data_pos < data_len)
    4345             :     {
    4346             :         int         chunk_len;
    4347             : 
    4348       10062 :         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4349       10062 :         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4350             :     }
    4351             : 
    4352       10526 :     ret_text = cstring_to_text_with_len(buf.data, buf.len);
    4353       10526 :     pfree(buf.data);
    4354       10526 :     pfree(data);
    4355             : 
    4356       10526 :     return ret_text;
    4357             : }
    4358             : 
    4359             : /*
    4360             :  * split_part
    4361             :  * parse input string based on provided field separator
    4362             :  * return N'th item (1 based, negative counts from end)
    4363             :  */
    4364             : Datum
    4365         102 : split_part(PG_FUNCTION_ARGS)
    4366             : {
    4367         102 :     text       *inputstring = PG_GETARG_TEXT_PP(0);
    4368         102 :     text       *fldsep = PG_GETARG_TEXT_PP(1);
    4369         102 :     int         fldnum = PG_GETARG_INT32(2);
    4370             :     int         inputstring_len;
    4371             :     int         fldsep_len;
    4372             :     TextPositionState state;
    4373             :     char       *start_ptr;
    4374             :     char       *end_ptr;
    4375             :     text       *result_text;
    4376             :     bool        found;
    4377             : 
    4378             :     /* field number is 1 based */
    4379         102 :     if (fldnum == 0)
    4380           6 :         ereport(ERROR,
    4381             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    4382             :                  errmsg("field position must not be zero")));
    4383             : 
    4384          96 :     inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4385          96 :     fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4386             : 
    4387             :     /* return empty string for empty input string */
    4388          96 :     if (inputstring_len < 1)
    4389          12 :         PG_RETURN_TEXT_P(cstring_to_text(""));
    4390             : 
    4391             :     /* handle empty field separator */
    4392          84 :     if (fldsep_len < 1)
    4393             :     {
    4394             :         /* if first or last field, return input string, else empty string */
    4395          24 :         if (fldnum == 1 || fldnum == -1)
    4396          12 :             PG_RETURN_TEXT_P(inputstring);
    4397             :         else
    4398          12 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4399             :     }
    4400             : 
    4401             :     /* find the first field separator */
    4402          60 :     text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
    4403             : 
    4404          60 :     found = text_position_next(&state);
    4405             : 
    4406             :     /* special case if fldsep not found at all */
    4407          60 :     if (!found)
    4408             :     {
    4409          12 :         text_position_cleanup(&state);
    4410             :         /* if first or last field, return input string, else empty string */
    4411          12 :         if (fldnum == 1 || fldnum == -1)
    4412           6 :             PG_RETURN_TEXT_P(inputstring);
    4413             :         else
    4414           6 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4415             :     }
    4416             : 
    4417             :     /*
    4418             :      * take care of a negative field number (i.e. count from the right) by
    4419             :      * converting to a positive field number; we need total number of fields
    4420             :      */
    4421          48 :     if (fldnum < 0)
    4422             :     {
    4423             :         /* we found a fldsep, so there are at least two fields */
    4424          24 :         int         numfields = 2;
    4425             : 
    4426          36 :         while (text_position_next(&state))
    4427          12 :             numfields++;
    4428             : 
    4429             :         /* special case of last field does not require an extra pass */
    4430          24 :         if (fldnum == -1)
    4431             :         {
    4432           6 :             start_ptr = text_position_get_match_ptr(&state) + fldsep_len;
    4433           6 :             end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
    4434           6 :             text_position_cleanup(&state);
    4435           6 :             PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
    4436             :                                                       end_ptr - start_ptr));
    4437             :         }
    4438             : 
    4439             :         /* else, convert fldnum to positive notation */
    4440          18 :         fldnum += numfields + 1;
    4441             : 
    4442             :         /* if nonexistent field, return empty string */
    4443          18 :         if (fldnum <= 0)
    4444             :         {
    4445           6 :             text_position_cleanup(&state);
    4446           6 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4447             :         }
    4448             : 
    4449             :         /* reset to pointing at first match, but now with positive fldnum */
    4450          12 :         text_position_reset(&state);
    4451          12 :         found = text_position_next(&state);
    4452             :         Assert(found);
    4453             :     }
    4454             : 
    4455             :     /* identify bounds of first field */
    4456          36 :     start_ptr = VARDATA_ANY(inputstring);
    4457          36 :     end_ptr = text_position_get_match_ptr(&state);
    4458             : 
    4459          66 :     while (found && --fldnum > 0)
    4460             :     {
    4461             :         /* identify bounds of next field */
    4462          30 :         start_ptr = end_ptr + fldsep_len;
    4463          30 :         found = text_position_next(&state);
    4464          30 :         if (found)
    4465          18 :             end_ptr = text_position_get_match_ptr(&state);
    4466             :     }
    4467             : 
    4468          36 :     text_position_cleanup(&state);
    4469             : 
    4470          36 :     if (fldnum > 0)
    4471             :     {
    4472             :         /* N'th field separator not found */
    4473             :         /* if last field requested, return it, else empty string */
    4474          12 :         if (fldnum == 1)
    4475             :         {
    4476           6 :             int         last_len = start_ptr - VARDATA_ANY(inputstring);
    4477             : 
    4478           6 :             result_text = cstring_to_text_with_len(start_ptr,
    4479             :                                                    inputstring_len - last_len);
    4480             :         }
    4481             :         else
    4482           6 :             result_text = cstring_to_text("");
    4483             :     }
    4484             :     else
    4485             :     {
    4486             :         /* non-last field requested */
    4487          24 :         result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
    4488             :     }
    4489             : 
    4490          36 :     PG_RETURN_TEXT_P(result_text);
    4491             : }
    4492             : 
    4493             : /*
    4494             :  * Convenience function to return true when two text params are equal.
    4495             :  */
    4496             : static bool
    4497         348 : text_isequal(text *txt1, text *txt2, Oid collid)
    4498             : {
    4499         348 :     return DatumGetBool(DirectFunctionCall2Coll(texteq,
    4500             :                                                 collid,
    4501             :                                                 PointerGetDatum(txt1),
    4502             :                                                 PointerGetDatum(txt2)));
    4503             : }
    4504             : 
    4505             : /*
    4506             :  * text_to_array
    4507             :  * parse input string and return text array of elements,
    4508             :  * based on provided field separator
    4509             :  */
    4510             : Datum
    4511         146 : text_to_array(PG_FUNCTION_ARGS)
    4512             : {
    4513             :     SplitTextOutputData tstate;
    4514             : 
    4515             :     /* For array output, tstate should start as all zeroes */
    4516         146 :     memset(&tstate, 0, sizeof(tstate));
    4517             : 
    4518         146 :     if (!split_text(fcinfo, &tstate))
    4519           6 :         PG_RETURN_NULL();
    4520             : 
    4521         128 :     if (tstate.astate == NULL)
    4522           6 :         PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
    4523             : 
    4524         122 :     PG_RETURN_DATUM(makeArrayResult(tstate.astate,
    4525             :                                     CurrentMemoryContext));
    4526             : }
    4527             : 
    4528             : /*
    4529             :  * text_to_array_null
    4530             :  * parse input string and return text array of elements,
    4531             :  * based on provided field separator and null string
    4532             :  *
    4533             :  * This is a separate entry point only to prevent the regression tests from
    4534             :  * complaining about different argument sets for the same internal function.
    4535             :  */
    4536             : Datum
    4537          60 : text_to_array_null(PG_FUNCTION_ARGS)
    4538             : {
    4539          60 :     return text_to_array(fcinfo);
    4540             : }
    4541             : 
    4542             : /*
    4543             :  * text_to_table
    4544             :  * parse input string and return table of elements,
    4545             :  * based on provided field separator
    4546             :  */
    4547             : Datum
    4548          84 : text_to_table(PG_FUNCTION_ARGS)
    4549             : {
    4550          84 :     ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
    4551             :     SplitTextOutputData tstate;
    4552             : 
    4553          84 :     tstate.astate = NULL;
    4554          84 :     InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
    4555          84 :     tstate.tupstore = rsi->setResult;
    4556          84 :     tstate.tupdesc = rsi->setDesc;
    4557             : 
    4558          84 :     (void) split_text(fcinfo, &tstate);
    4559             : 
    4560          84 :     return (Datum) 0;
    4561             : }
    4562             : 
    4563             : /*
    4564             :  * text_to_table_null
    4565             :  * parse input string and return table of elements,
    4566             :  * based on provided field separator and null string
    4567             :  *
    4568             :  * This is a separate entry point only to prevent the regression tests from
    4569             :  * complaining about different argument sets for the same internal function.
    4570             :  */
    4571             : Datum
    4572          24 : text_to_table_null(PG_FUNCTION_ARGS)
    4573             : {
    4574          24 :     return text_to_table(fcinfo);
    4575             : }
    4576             : 
    4577             : /*
    4578             :  * Common code for text_to_array, text_to_array_null, text_to_table
    4579             :  * and text_to_table_null functions.
    4580             :  *
    4581             :  * These are not strict so we have to test for null inputs explicitly.
    4582             :  * Returns false if result is to be null, else returns true.
    4583             :  *
    4584             :  * Note that if the result is valid but empty (zero elements), we return
    4585             :  * without changing *tstate --- caller must handle that case, too.
    4586             :  */
    4587             : static bool
    4588         230 : split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
    4589             : {
    4590             :     text       *inputstring;
    4591             :     text       *fldsep;
    4592             :     text       *null_string;
    4593         230 :     Oid         collation = PG_GET_COLLATION();
    4594             :     int         inputstring_len;
    4595             :     int         fldsep_len;
    4596             :     char       *start_ptr;
    4597             :     text       *result_text;
    4598             : 
    4599             :     /* when input string is NULL, then result is NULL too */
    4600         230 :     if (PG_ARGISNULL(0))
    4601          12 :         return false;
    4602             : 
    4603         218 :     inputstring = PG_GETARG_TEXT_PP(0);
    4604             : 
    4605             :     /* fldsep can be NULL */
    4606         218 :     if (!PG_ARGISNULL(1))
    4607         188 :         fldsep = PG_GETARG_TEXT_PP(1);
    4608             :     else
    4609          30 :         fldsep = NULL;
    4610             : 
    4611             :     /* null_string can be NULL or omitted */
    4612         218 :     if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
    4613          84 :         null_string = PG_GETARG_TEXT_PP(2);
    4614             :     else
    4615         134 :         null_string = NULL;
    4616             : 
    4617         218 :     if (fldsep != NULL)
    4618             :     {
    4619             :         /*
    4620             :          * Normal case with non-null fldsep.  Use the text_position machinery
    4621             :          * to search for occurrences of fldsep.
    4622             :          */
    4623             :         TextPositionState state;
    4624             : 
    4625         188 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4626         188 :         fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4627             : 
    4628             :         /* return empty set for empty input string */
    4629         188 :         if (inputstring_len < 1)
    4630          60 :             return true;
    4631             : 
    4632             :         /* empty field separator: return input string as a one-element set */
    4633         176 :         if (fldsep_len < 1)
    4634             :         {
    4635          48 :             split_text_accum_result(tstate, inputstring,
    4636             :                                     null_string, collation);
    4637          48 :             return true;
    4638             :         }
    4639             : 
    4640         128 :         text_position_setup(inputstring, fldsep, collation, &state);
    4641             : 
    4642         116 :         start_ptr = VARDATA_ANY(inputstring);
    4643             : 
    4644             :         for (;;)
    4645         464 :         {
    4646             :             bool        found;
    4647             :             char       *end_ptr;
    4648             :             int         chunk_len;
    4649             : 
    4650         580 :             CHECK_FOR_INTERRUPTS();
    4651             : 
    4652         580 :             found = text_position_next(&state);
    4653         580 :             if (!found)
    4654             :             {
    4655             :                 /* fetch last field */
    4656         116 :                 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
    4657         116 :                 end_ptr = NULL; /* not used, but some compilers complain */
    4658             :             }
    4659             :             else
    4660             :             {
    4661             :                 /* fetch non-last field */
    4662         464 :                 end_ptr = text_position_get_match_ptr(&state);
    4663         464 :                 chunk_len = end_ptr - start_ptr;
    4664             :             }
    4665             : 
    4666             :             /* build a temp text datum to pass to split_text_accum_result */
    4667         580 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4668             : 
    4669             :             /* stash away this field */
    4670         580 :             split_text_accum_result(tstate, result_text,
    4671             :                                     null_string, collation);
    4672             : 
    4673         580 :             pfree(result_text);
    4674             : 
    4675         580 :             if (!found)
    4676         116 :                 break;
    4677             : 
    4678         464 :             start_ptr = end_ptr + fldsep_len;
    4679             :         }
    4680             : 
    4681         116 :         text_position_cleanup(&state);
    4682             :     }
    4683             :     else
    4684             :     {
    4685             :         /*
    4686             :          * When fldsep is NULL, each character in the input string becomes a
    4687             :          * separate element in the result set.  The separator is effectively
    4688             :          * the space between characters.
    4689             :          */
    4690          30 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4691             : 
    4692          30 :         start_ptr = VARDATA_ANY(inputstring);
    4693             : 
    4694         252 :         while (inputstring_len > 0)
    4695             :         {
    4696         222 :             int         chunk_len = pg_mblen(start_ptr);
    4697             : 
    4698         222 :             CHECK_FOR_INTERRUPTS();
    4699             : 
    4700             :             /* build a temp text datum to pass to split_text_accum_result */
    4701         222 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4702             : 
    4703             :             /* stash away this field */
    4704         222 :             split_text_accum_result(tstate, result_text,
    4705             :                                     null_string, collation);
    4706             : 
    4707         222 :             pfree(result_text);
    4708             : 
    4709         222 :             start_ptr += chunk_len;
    4710         222 :             inputstring_len -= chunk_len;
    4711             :         }
    4712             :     }
    4713             : 
    4714         146 :     return true;
    4715             : }
    4716             : 
    4717             : /*
    4718             :  * Add text item to result set (table or array).
    4719             :  *
    4720             :  * This is also responsible for checking to see if the item matches
    4721             :  * the null_string, in which case we should emit NULL instead.
    4722             :  */
    4723             : static void
    4724         850 : split_text_accum_result(SplitTextOutputData *tstate,
    4725             :                         text *field_value,
    4726             :                         text *null_string,
    4727             :                         Oid collation)
    4728             : {
    4729         850 :     bool        is_null = false;
    4730             : 
    4731         850 :     if (null_string && text_isequal(field_value, null_string, collation))
    4732          60 :         is_null = true;
    4733             : 
    4734         850 :     if (tstate->tupstore)
    4735             :     {
    4736             :         Datum       values[1];
    4737             :         bool        nulls[1];
    4738             : 
    4739         228 :         values[0] = PointerGetDatum(field_value);
    4740         228 :         nulls[0] = is_null;
    4741             : 
    4742         228 :         tuplestore_putvalues(tstate->tupstore,
    4743             :                              tstate->tupdesc,
    4744             :                              values,
    4745             :                              nulls);
    4746             :     }
    4747             :     else
    4748             :     {
    4749         622 :         tstate->astate = accumArrayResult(tstate->astate,
    4750             :                                           PointerGetDatum(field_value),
    4751             :                                           is_null,
    4752             :                                           TEXTOID,
    4753             :                                           CurrentMemoryContext);
    4754             :     }
    4755         850 : }
    4756             : 
    4757             : /*
    4758             :  * array_to_text
    4759             :  * concatenate Cstring representation of input array elements
    4760             :  * using provided field separator
    4761             :  */
    4762             : Datum
    4763       59664 : array_to_text(PG_FUNCTION_ARGS)
    4764             : {
    4765       59664 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
    4766       59664 :     char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4767             : 
    4768       59664 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
    4769             : }
    4770             : 
    4771             : /*
    4772             :  * array_to_text_null
    4773             :  * concatenate Cstring representation of input array elements
    4774             :  * using provided field separator and null string
    4775             :  *
    4776             :  * This version is not strict so we have to test for null inputs explicitly.
    4777             :  */
    4778             : Datum
    4779          12 : array_to_text_null(PG_FUNCTION_ARGS)
    4780             : {
    4781             :     ArrayType  *v;
    4782             :     char       *fldsep;
    4783             :     char       *null_string;
    4784             : 
    4785             :     /* returns NULL when first or second parameter is NULL */
    4786          12 :     if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
    4787           0 :         PG_RETURN_NULL();
    4788             : 
    4789          12 :     v = PG_GETARG_ARRAYTYPE_P(0);
    4790          12 :     fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4791             : 
    4792             :     /* NULL null string is passed through as a null pointer */
    4793          12 :     if (!PG_ARGISNULL(2))
    4794           6 :         null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
    4795             :     else
    4796           6 :         null_string = NULL;
    4797             : 
    4798          12 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
    4799             : }
    4800             : 
    4801             : /*
    4802             :  * common code for array_to_text and array_to_text_null functions
    4803             :  */
    4804             : static text *
    4805       59694 : array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
    4806             :                        const char *fldsep, const char *null_string)
    4807             : {
    4808             :     text       *result;
    4809             :     int         nitems,
    4810             :                *dims,
    4811             :                 ndims;
    4812             :     Oid         element_type;
    4813             :     int         typlen;
    4814             :     bool        typbyval;
    4815             :     char        typalign;
    4816             :     StringInfoData buf;
    4817       59694 :     bool        printed = false;
    4818             :     char       *p;
    4819             :     bits8      *bitmap;
    4820             :     int         bitmask;
    4821             :     int         i;
    4822             :     ArrayMetaState *my_extra;
    4823             : 
    4824       59694 :     ndims = ARR_NDIM(v);
    4825       59694 :     dims = ARR_DIMS(v);
    4826       59694 :     nitems = ArrayGetNItems(ndims, dims);
    4827             : 
    4828             :     /* if there are no elements, return an empty string */
    4829       59694 :     if (nitems == 0)
    4830       36048 :         return cstring_to_text_with_len("", 0);
    4831             : 
    4832       23646 :     element_type = ARR_ELEMTYPE(v);
    4833       23646 :     initStringInfo(&buf);
    4834             : 
    4835             :     /*
    4836             :      * We arrange to look up info about element type, including its output
    4837             :      * conversion proc, only once per series of calls, assuming the element
    4838             :      * type doesn't change underneath us.
    4839             :      */
    4840       23646 :     my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    4841       23646 :     if (my_extra == NULL)
    4842             :     {
    4843        1354 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    4844             :                                                       sizeof(ArrayMetaState));
    4845        1354 :         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    4846        1354 :         my_extra->element_type = ~element_type;
    4847             :     }
    4848             : 
    4849       23646 :     if (my_extra->element_type != element_type)
    4850             :     {
    4851             :         /*
    4852             :          * Get info about element type, including its output conversion proc
    4853             :          */
    4854        1354 :         get_type_io_data(element_type, IOFunc_output,
    4855             :                          &my_extra->typlen, &my_extra->typbyval,
    4856             :                          &my_extra->typalign, &my_extra->typdelim,
    4857             :                          &my_extra->typioparam, &my_extra->typiofunc);
    4858        1354 :         fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
    4859        1354 :                       fcinfo->flinfo->fn_mcxt);
    4860        1354 :         my_extra->element_type = element_type;
    4861             :     }
    4862       23646 :     typlen = my_extra->typlen;
    4863       23646 :     typbyval = my_extra->typbyval;
    4864       23646 :     typalign = my_extra->typalign;
    4865             : 
    4866       23646 :     p = ARR_DATA_PTR(v);
    4867       23646 :     bitmap = ARR_NULLBITMAP(v);
    4868       23646 :     bitmask = 1;
    4869             : 
    4870       80826 :     for (i = 0; i < nitems; i++)
    4871             :     {
    4872             :         Datum       itemvalue;
    4873             :         char       *value;
    4874             : 
    4875             :         /* Get source element, checking for NULL */
    4876       57180 :         if (bitmap && (*bitmap & bitmask) == 0)
    4877             :         {
    4878             :             /* if null_string is NULL, we just ignore null elements */
    4879          18 :             if (null_string != NULL)
    4880             :             {
    4881           6 :                 if (printed)
    4882           6 :                     appendStringInfo(&buf, "%s%s", fldsep, null_string);
    4883             :                 else
    4884           0 :                     appendStringInfoString(&buf, null_string);
    4885           6 :                 printed = true;
    4886             :             }
    4887             :         }
    4888             :         else
    4889             :         {
    4890       57162 :             itemvalue = fetch_att(p, typbyval, typlen);
    4891             : 
    4892       57162 :             value = OutputFunctionCall(&my_extra->proc, itemvalue);
    4893             : 
    4894       57162 :             if (printed)
    4895       33516 :                 appendStringInfo(&buf, "%s%s", fldsep, value);
    4896             :             else
    4897       23646 :                 appendStringInfoString(&buf, value);
    4898       57162 :             printed = true;
    4899             : 
    4900       57162 :             p = att_addlength_pointer(p, typlen, p);
    4901       57162 :             p = (char *) att_align_nominal(p, typalign);
    4902             :         }
    4903             : 
    4904             :         /* advance bitmap pointer if any */
    4905       57180 :         if (bitmap)
    4906             :         {
    4907         108 :             bitmask <<= 1;
    4908         108 :             if (bitmask == 0x100)
    4909             :             {
    4910           0 :                 bitmap++;
    4911           0 :                 bitmask = 1;
    4912             :             }
    4913             :         }
    4914             :     }
    4915             : 
    4916       23646 :     result = cstring_to_text_with_len(buf.data, buf.len);
    4917       23646 :     pfree(buf.data);
    4918             : 
    4919       23646 :     return result;
    4920             : }
    4921             : 
    4922             : #define HEXBASE 16
    4923             : /*
    4924             :  * Convert an int32 to a string containing a base 16 (hex) representation of
    4925             :  * the number.
    4926             :  */
    4927             : Datum
    4928       38684 : to_hex32(PG_FUNCTION_ARGS)
    4929             : {
    4930       38684 :     uint32      value = (uint32) PG_GETARG_INT32(0);
    4931             :     char       *ptr;
    4932       38684 :     const char *digits = "0123456789abcdef";
    4933             :     char        buf[32];        /* bigger than needed, but reasonable */
    4934             : 
    4935       38684 :     ptr = buf + sizeof(buf) - 1;
    4936       38684 :     *ptr = '\0';
    4937             : 
    4938             :     do
    4939             :     {
    4940       74558 :         *--ptr = digits[value % HEXBASE];
    4941       74558 :         value /= HEXBASE;
    4942       74558 :     } while (ptr > buf && value);
    4943             : 
    4944       38684 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
    4945             : }
    4946             : 
    4947             : /*
    4948             :  * Convert an int64 to a string containing a base 16 (hex) representation of
    4949             :  * the number.
    4950             :  */
    4951             : Datum
    4952           6 : to_hex64(PG_FUNCTION_ARGS)
    4953             : {
    4954           6 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    4955             :     char       *ptr;
    4956           6 :     const char *digits = "0123456789abcdef";
    4957             :     char        buf[32];        /* bigger than needed, but reasonable */
    4958             : 
    4959           6 :     ptr = buf + sizeof(buf) - 1;
    4960           6 :     *ptr = '\0';
    4961             : 
    4962             :     do
    4963             :     {
    4964          48 :         *--ptr = digits[value % HEXBASE];
    4965          48 :         value /= HEXBASE;
    4966          48 :     } while (ptr > buf && value);
    4967             : 
    4968           6 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
    4969             : }
    4970             : 
    4971             : /*
    4972             :  * Return the size of a datum, possibly compressed
    4973             :  *
    4974             :  * Works on any data type
    4975             :  */
    4976             : Datum
    4977         122 : pg_column_size(PG_FUNCTION_ARGS)
    4978             : {
    4979         122 :     Datum       value = PG_GETARG_DATUM(0);
    4980             :     int32       result;
    4981             :     int         typlen;
    4982             : 
    4983             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    4984         122 :     if (fcinfo->flinfo->fn_extra == NULL)
    4985             :     {
    4986             :         /* Lookup the datatype of the supplied argument */
    4987         122 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    4988             : 
    4989         122 :         typlen = get_typlen(argtypeid);
    4990         122 :         if (typlen == 0)        /* should not happen */
    4991           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    4992             : 
    4993         122 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    4994             :                                                       sizeof(int));
    4995         122 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    4996             :     }
    4997             :     else
    4998           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    4999             : 
    5000         122 :     if (typlen == -1)
    5001             :     {
    5002             :         /* varlena type, possibly toasted */
    5003         122 :         result = toast_datum_size(value);
    5004             :     }
    5005           0 :     else if (typlen == -2)
    5006             :     {
    5007             :         /* cstring */
    5008           0 :         result = strlen(DatumGetCString(value)) + 1;
    5009             :     }
    5010             :     else
    5011             :     {
    5012             :         /* ordinary fixed-width type */
    5013           0 :         result = typlen;
    5014             :     }
    5015             : 
    5016         122 :     PG_RETURN_INT32(result);
    5017             : }
    5018             : 
    5019             : /*
    5020             :  * Return the compression method stored in the compressed attribute.  Return
    5021             :  * NULL for non varlena type or uncompressed data.
    5022             :  */
    5023             : Datum
    5024         162 : pg_column_compression(PG_FUNCTION_ARGS)
    5025             : {
    5026             :     int         typlen;
    5027             :     char       *result;
    5028             :     ToastCompressionId cmid;
    5029             : 
    5030             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5031         162 :     if (fcinfo->flinfo->fn_extra == NULL)
    5032             :     {
    5033             :         /* Lookup the datatype of the supplied argument */
    5034         108 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5035             : 
    5036         108 :         typlen = get_typlen(argtypeid);
    5037         108 :         if (typlen == 0)        /* should not happen */
    5038           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5039             : 
    5040         108 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5041             :                                                       sizeof(int));
    5042         108 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5043             :     }
    5044             :     else
    5045          54 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5046             : 
    5047         162 :     if (typlen != -1)
    5048           0 :         PG_RETURN_NULL();
    5049             : 
    5050             :     /* get the compression method id stored in the compressed varlena */
    5051         162 :     cmid = toast_get_compression_id((struct varlena *)
    5052         162 :                                     DatumGetPointer(PG_GETARG_DATUM(0)));
    5053         162 :     if (cmid == TOAST_INVALID_COMPRESSION_ID)
    5054           6 :         PG_RETURN_NULL();
    5055             : 
    5056             :     /* convert compression method id to compression method name */
    5057         156 :     switch (cmid)
    5058             :     {
    5059          66 :         case TOAST_PGLZ_COMPRESSION_ID:
    5060          66 :             result = "pglz";
    5061          66 :             break;
    5062          90 :         case TOAST_LZ4_COMPRESSION_ID:
    5063          90 :             result = "lz4";
    5064          90 :             break;
    5065           0 :         default:
    5066           0 :             elog(ERROR, "invalid compression method id %d", cmid);
    5067             :     }
    5068             : 
    5069         156 :     PG_RETURN_TEXT_P(cstring_to_text(result));
    5070             : }
    5071             : 
    5072             : /*
    5073             :  * string_agg - Concatenates values and returns string.
    5074             :  *
    5075             :  * Syntax: string_agg(value text, delimiter text) RETURNS text
    5076             :  *
    5077             :  * Note: Any NULL values are ignored. The first-call delimiter isn't
    5078             :  * actually used at all, and on subsequent calls the delimiter precedes
    5079             :  * the associated value.
    5080             :  */
    5081             : 
    5082             : /* subroutine to initialize state */
    5083             : static StringInfo
    5084        1974 : makeStringAggState(FunctionCallInfo fcinfo)
    5085             : {
    5086             :     StringInfo  state;
    5087             :     MemoryContext aggcontext;
    5088             :     MemoryContext oldcontext;
    5089             : 
    5090        1974 :     if (!AggCheckCallContext(fcinfo, &aggcontext))
    5091             :     {
    5092             :         /* cannot be called directly because of internal-type argument */
    5093           0 :         elog(ERROR, "string_agg_transfn called in non-aggregate context");
    5094             :     }
    5095             : 
    5096             :     /*
    5097             :      * Create state in aggregate context.  It'll stay there across subsequent
    5098             :      * calls.
    5099             :      */
    5100        1974 :     oldcontext = MemoryContextSwitchTo(aggcontext);
    5101        1974 :     state = makeStringInfo();
    5102        1974 :     MemoryContextSwitchTo(oldcontext);
    5103             : 
    5104        1974 :     return state;
    5105             : }
    5106             : 
    5107             : Datum
    5108      850216 : string_agg_transfn(PG_FUNCTION_ARGS)
    5109             : {
    5110             :     StringInfo  state;
    5111             : 
    5112      850216 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5113             : 
    5114             :     /* Append the value unless null, preceding it with the delimiter. */
    5115      850216 :     if (!PG_ARGISNULL(1))
    5116             :     {
    5117      835168 :         text       *value = PG_GETARG_TEXT_PP(1);
    5118      835168 :         bool        isfirst = false;
    5119             : 
    5120             :         /*
    5121             :          * You might think we can just throw away the first delimiter, however
    5122             :          * we must keep it as we may be a parallel worker doing partial
    5123             :          * aggregation building a state to send to the main process.  We need
    5124             :          * to keep the delimiter of every aggregation so that the combine
    5125             :          * function can properly join up the strings of two separately
    5126             :          * partially aggregated results.  The first delimiter is only stripped
    5127             :          * off in the final function.  To know how much to strip off the front
    5128             :          * of the string, we store the length of the first delimiter in the
    5129             :          * StringInfo's cursor field, which we don't otherwise need here.
    5130             :          */
    5131      835168 :         if (state == NULL)
    5132             :         {
    5133        1528 :             state = makeStringAggState(fcinfo);
    5134        1528 :             isfirst = true;
    5135             :         }
    5136             : 
    5137      835168 :         if (!PG_ARGISNULL(2))
    5138             :         {
    5139      835168 :             text       *delim = PG_GETARG_TEXT_PP(2);
    5140             : 
    5141      835168 :             appendStringInfoText(state, delim);
    5142      835168 :             if (isfirst)
    5143        1528 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
    5144             :         }
    5145             : 
    5146      835168 :         appendStringInfoText(state, value);
    5147             :     }
    5148             : 
    5149             :     /*
    5150             :      * The transition type for string_agg() is declared to be "internal",
    5151             :      * which is a pass-by-value type the same size as a pointer.
    5152             :      */
    5153      850216 :     if (state)
    5154      850132 :         PG_RETURN_POINTER(state);
    5155          84 :     PG_RETURN_NULL();
    5156             : }
    5157             : 
    5158             : /*
    5159             :  * string_agg_combine
    5160             :  *      Aggregate combine function for string_agg(text) and string_agg(bytea)
    5161             :  */
    5162             : Datum
    5163         160 : string_agg_combine(PG_FUNCTION_ARGS)
    5164             : {
    5165             :     StringInfo  state1;
    5166             :     StringInfo  state2;
    5167             :     MemoryContext agg_context;
    5168             : 
    5169         160 :     if (!AggCheckCallContext(fcinfo, &agg_context))
    5170           0 :         elog(ERROR, "aggregate function called in non-aggregate context");
    5171             : 
    5172         160 :     state1 = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5173         160 :     state2 = PG_ARGISNULL(1) ? NULL : (StringInfo) PG_GETARG_POINTER(1);
    5174             : 
    5175         160 :     if (state2 == NULL)
    5176             :     {
    5177             :         /*
    5178             :          * NULL state2 is easy, just return state1, which we know is already
    5179             :          * in the agg_context
    5180             :          */
    5181           0 :         if (state1 == NULL)
    5182           0 :             PG_RETURN_NULL();
    5183           0 :         PG_RETURN_POINTER(state1);
    5184             :     }
    5185             : 
    5186         160 :     if (state1 == NULL)
    5187             :     {
    5188             :         /* We must copy state2's data into the agg_context */
    5189             :         MemoryContext old_context;
    5190             : 
    5191         120 :         old_context = MemoryContextSwitchTo(agg_context);
    5192         120 :         state1 = makeStringAggState(fcinfo);
    5193         120 :         appendBinaryStringInfo(state1, state2->data, state2->len);
    5194         120 :         state1->cursor = state2->cursor;
    5195         120 :         MemoryContextSwitchTo(old_context);
    5196             :     }
    5197          40 :     else if (state2->len > 0)
    5198             :     {
    5199             :         /* Combine ... state1->cursor does not change in this case */
    5200          40 :         appendBinaryStringInfo(state1, state2->data, state2->len);
    5201             :     }
    5202             : 
    5203         160 :     PG_RETURN_POINTER(state1);
    5204             : }
    5205             : 
    5206             : /*
    5207             :  * string_agg_serialize
    5208             :  *      Aggregate serialize function for string_agg(text) and string_agg(bytea)
    5209             :  *
    5210             :  * This is strict, so we need not handle NULL input
    5211             :  */
    5212             : Datum
    5213         160 : string_agg_serialize(PG_FUNCTION_ARGS)
    5214             : {
    5215             :     StringInfo  state;
    5216             :     StringInfoData buf;
    5217             :     bytea      *result;
    5218             : 
    5219             :     /* cannot be called directly because of internal-type argument */
    5220             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5221             : 
    5222         160 :     state = (StringInfo) PG_GETARG_POINTER(0);
    5223             : 
    5224         160 :     pq_begintypsend(&buf);
    5225             : 
    5226             :     /* cursor */
    5227         160 :     pq_sendint(&buf, state->cursor, 4);
    5228             : 
    5229             :     /* data */
    5230         160 :     pq_sendbytes(&buf, state->data, state->len);
    5231             : 
    5232         160 :     result = pq_endtypsend(&buf);
    5233             : 
    5234         160 :     PG_RETURN_BYTEA_P(result);
    5235             : }
    5236             : 
    5237             : /*
    5238             :  * string_agg_deserialize
    5239             :  *      Aggregate deserial function for string_agg(text) and string_agg(bytea)
    5240             :  *
    5241             :  * This is strict, so we need not handle NULL input
    5242             :  */
    5243             : Datum
    5244         160 : string_agg_deserialize(PG_FUNCTION_ARGS)
    5245             : {
    5246             :     bytea      *sstate;
    5247             :     StringInfo  result;
    5248             :     StringInfoData buf;
    5249             :     char       *data;
    5250             :     int         datalen;
    5251             : 
    5252             :     /* cannot be called directly because of internal-type argument */
    5253             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5254             : 
    5255         160 :     sstate = PG_GETARG_BYTEA_PP(0);
    5256             : 
    5257             :     /*
    5258             :      * Copy the bytea into a StringInfo so that we can "receive" it using the
    5259             :      * standard recv-function infrastructure.
    5260             :      */
    5261         160 :     initStringInfo(&buf);
    5262         320 :     appendBinaryStringInfo(&buf,
    5263         320 :                            VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate));
    5264             : 
    5265         160 :     result = makeStringAggState(fcinfo);
    5266             : 
    5267             :     /* cursor */
    5268         160 :     result->cursor = pq_getmsgint(&buf, 4);
    5269             : 
    5270             :     /* data */
    5271         160 :     datalen = VARSIZE_ANY_EXHDR(sstate) - 4;
    5272         160 :     data = (char *) pq_getmsgbytes(&buf, datalen);
    5273         160 :     appendBinaryStringInfo(result, data, datalen);
    5274             : 
    5275         160 :     pq_getmsgend(&buf);
    5276         160 :     pfree(buf.data);
    5277             : 
    5278         160 :     PG_RETURN_POINTER(result);
    5279             : }
    5280             : 
    5281             : Datum
    5282        1580 : string_agg_finalfn(PG_FUNCTION_ARGS)
    5283             : {
    5284             :     StringInfo  state;
    5285             : 
    5286             :     /* cannot be called directly because of internal-type argument */
    5287             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5288             : 
    5289        1580 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5290             : 
    5291        1580 :     if (state != NULL)
    5292             :     {
    5293             :         /* As per comment in transfn, strip data before the cursor position */
    5294        1508 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(&state->data[state->cursor],
    5295             :                                                   state->len - state->cursor));
    5296             :     }
    5297             :     else
    5298          72 :         PG_RETURN_NULL();
    5299             : }
    5300             : 
    5301             : /*
    5302             :  * Prepare cache with fmgr info for the output functions of the datatypes of
    5303             :  * the arguments of a concat-like function, beginning with argument "argidx".
    5304             :  * (Arguments before that will have corresponding slots in the resulting
    5305             :  * FmgrInfo array, but we don't fill those slots.)
    5306             :  */
    5307             : static FmgrInfo *
    5308          40 : build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
    5309             : {
    5310             :     FmgrInfo   *foutcache;
    5311             :     int         i;
    5312             : 
    5313             :     /* We keep the info in fn_mcxt so it survives across calls */
    5314          40 :     foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5315          40 :                                                 PG_NARGS() * sizeof(FmgrInfo));
    5316             : 
    5317         196 :     for (i = argidx; i < PG_NARGS(); i++)
    5318             :     {
    5319             :         Oid         valtype;
    5320             :         Oid         typOutput;
    5321             :         bool        typIsVarlena;
    5322             : 
    5323         156 :         valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
    5324         156 :         if (!OidIsValid(valtype))
    5325           0 :             elog(ERROR, "could not determine data type of concat() input");
    5326             : 
    5327         156 :         getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
    5328         156 :         fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
    5329             :     }
    5330             : 
    5331          40 :     fcinfo->flinfo->fn_extra = foutcache;
    5332             : 
    5333          40 :     return foutcache;
    5334             : }
    5335             : 
    5336             : /*
    5337             :  * Implementation of both concat() and concat_ws().
    5338             :  *
    5339             :  * sepstr is the separator string to place between values.
    5340             :  * argidx identifies the first argument to concatenate (counting from zero);
    5341             :  * note that this must be constant across any one series of calls.
    5342             :  *
    5343             :  * Returns NULL if result should be NULL, else text value.
    5344             :  */
    5345             : static text *
    5346          72 : concat_internal(const char *sepstr, int argidx,
    5347             :                 FunctionCallInfo fcinfo)
    5348             : {
    5349             :     text       *result;
    5350             :     StringInfoData str;
    5351             :     FmgrInfo   *foutcache;
    5352          72 :     bool        first_arg = true;
    5353             :     int         i;
    5354             : 
    5355             :     /*
    5356             :      * concat(VARIADIC some-array) is essentially equivalent to
    5357             :      * array_to_text(), ie concat the array elements with the given separator.
    5358             :      * So we just pass the case off to that code.
    5359             :      */
    5360          72 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5361             :     {
    5362             :         ArrayType  *arr;
    5363             : 
    5364             :         /* Should have just the one argument */
    5365             :         Assert(argidx == PG_NARGS() - 1);
    5366             : 
    5367             :         /* concat(VARIADIC NULL) is defined as NULL */
    5368          30 :         if (PG_ARGISNULL(argidx))
    5369          12 :             return NULL;
    5370             : 
    5371             :         /*
    5372             :          * Non-null argument had better be an array.  We assume that any call
    5373             :          * context that could let get_fn_expr_variadic return true will have
    5374             :          * checked that a VARIADIC-labeled parameter actually is an array.  So
    5375             :          * it should be okay to just Assert that it's an array rather than
    5376             :          * doing a full-fledged error check.
    5377             :          */
    5378             :         Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
    5379             : 
    5380             :         /* OK, safe to fetch the array value */
    5381          18 :         arr = PG_GETARG_ARRAYTYPE_P(argidx);
    5382             : 
    5383             :         /*
    5384             :          * And serialize the array.  We tell array_to_text to ignore null
    5385             :          * elements, which matches the behavior of the loop below.
    5386             :          */
    5387          18 :         return array_to_text_internal(fcinfo, arr, sepstr, NULL);
    5388             :     }
    5389             : 
    5390             :     /* Normal case without explicit VARIADIC marker */
    5391          42 :     initStringInfo(&str);
    5392             : 
    5393             :     /* Get output function info, building it if first time through */
    5394          42 :     foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
    5395          42 :     if (foutcache == NULL)
    5396          40 :         foutcache = build_concat_foutcache(fcinfo, argidx);
    5397             : 
    5398         204 :     for (i = argidx; i < PG_NARGS(); i++)
    5399             :     {
    5400         162 :         if (!PG_ARGISNULL(i))
    5401             :         {
    5402         150 :             Datum       value = PG_GETARG_DATUM(i);
    5403             : 
    5404             :             /* add separator if appropriate */
    5405         150 :             if (first_arg)
    5406          42 :                 first_arg = false;
    5407             :             else
    5408         108 :                 appendStringInfoString(&str, sepstr);
    5409             : 
    5410             :             /* call the appropriate type output function, append the result */
    5411         150 :             appendStringInfoString(&str,
    5412         150 :                                    OutputFunctionCall(&foutcache[i], value));
    5413             :         }
    5414             :     }
    5415             : 
    5416          42 :     result = cstring_to_text_with_len(str.data, str.len);
    5417          42 :     pfree(str.data);
    5418             : 
    5419          42 :     return result;
    5420             : }
    5421             : 
    5422             : /*
    5423             :  * Concatenate all arguments. NULL arguments are ignored.
    5424             :  */
    5425             : Datum
    5426          36 : text_concat(PG_FUNCTION_ARGS)
    5427             : {
    5428             :     text       *result;
    5429             : 
    5430          36 :     result = concat_internal("", 0, fcinfo);
    5431          36 :     if (result == NULL)
    5432           6 :         PG_RETURN_NULL();
    5433          30 :     PG_RETURN_TEXT_P(result);
    5434             : }
    5435             : 
    5436             : /*
    5437             :  * Concatenate all but first argument value with separators. The first
    5438             :  * parameter is used as the separator. NULL arguments are ignored.
    5439             :  */
    5440             : Datum
    5441          42 : text_concat_ws(PG_FUNCTION_ARGS)
    5442             : {
    5443             :     char       *sep;
    5444             :     text       *result;
    5445             : 
    5446             :     /* return NULL when separator is NULL */
    5447          42 :     if (PG_ARGISNULL(0))
    5448           6 :         PG_RETURN_NULL();
    5449          36 :     sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
    5450             : 
    5451          36 :     result = concat_internal(sep, 1, fcinfo);
    5452          36 :     if (result == NULL)
    5453           6 :         PG_RETURN_NULL();
    5454          30 :     PG_RETURN_TEXT_P(result);
    5455             : }
    5456             : 
    5457             : /*
    5458             :  * Return first n characters in the string. When n is negative,
    5459             :  * return all but last |n| characters.
    5460             :  */
    5461             : Datum
    5462        1884 : text_left(PG_FUNCTION_ARGS)
    5463             : {
    5464        1884 :     int         n = PG_GETARG_INT32(1);
    5465             : 
    5466        1884 :     if (n < 0)
    5467             :     {
    5468          30 :         text       *str = PG_GETARG_TEXT_PP(0);
    5469          30 :         const char *p = VARDATA_ANY(str);
    5470          30 :         int         len = VARSIZE_ANY_EXHDR(str);
    5471             :         int         rlen;
    5472             : 
    5473          30 :         n = pg_mbstrlen_with_len(p, len) + n;
    5474          30 :         rlen = pg_mbcharcliplen(p, len, n);
    5475          30 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
    5476             :     }
    5477             :     else
    5478        1854 :         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
    5479             : }
    5480             : 
    5481             : /*
    5482             :  * Return last n characters in the string. When n is negative,
    5483             :  * return all but first |n| characters.
    5484             :  */
    5485             : Datum
    5486          66 : text_right(PG_FUNCTION_ARGS)
    5487             : {
    5488          66 :     text       *str = PG_GETARG_TEXT_PP(0);
    5489          66 :     const char *p = VARDATA_ANY(str);
    5490          66 :     int         len = VARSIZE_ANY_EXHDR(str);
    5491          66 :     int         n = PG_GETARG_INT32(1);
    5492             :     int         off;
    5493             : 
    5494          66 :     if (n < 0)
    5495          30 :         n = -n;
    5496             :     else
    5497          36 :         n = pg_mbstrlen_with_len(p, len) - n;
    5498          66 :     off = pg_mbcharcliplen(p, len, n);
    5499             : 
    5500          66 :     PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
    5501             : }
    5502             : 
    5503             : /*
    5504             :  * Return reversed string
    5505             :  */
    5506             : Datum
    5507           6 : text_reverse(PG_FUNCTION_ARGS)
    5508             : {
    5509           6 :     text       *str = PG_GETARG_TEXT_PP(0);
    5510           6 :     const char *p = VARDATA_ANY(str);
    5511           6 :     int         len = VARSIZE_ANY_EXHDR(str);
    5512           6 :     const char *endp = p + len;
    5513             :     text       *result;
    5514             :     char       *dst;
    5515             : 
    5516           6 :     result = palloc(len + VARHDRSZ);
    5517           6 :     dst = (char *) VARDATA(result) + len;
    5518           6 :     SET_VARSIZE(result, len + VARHDRSZ);
    5519             : 
    5520           6 :     if (pg_database_encoding_max_length() > 1)
    5521             :     {
    5522             :         /* multibyte version */
    5523          36 :         while (p < endp)
    5524             :         {
    5525             :             int         sz;
    5526             : 
    5527          30 :             sz = pg_mblen(p);
    5528          30 :             dst -= sz;
    5529          30 :             memcpy(dst, p, sz);
    5530          30 :             p += sz;
    5531             :         }
    5532             :     }
    5533             :     else
    5534             :     {
    5535             :         /* single byte version */
    5536           0 :         while (p < endp)
    5537           0 :             *(--dst) = *p++;
    5538             :     }
    5539             : 
    5540           6 :     PG_RETURN_TEXT_P(result);
    5541             : }
    5542             : 
    5543             : 
    5544             : /*
    5545             :  * Support macros for text_format()
    5546             :  */
    5547             : #define TEXT_FORMAT_FLAG_MINUS  0x0001  /* is minus flag present? */
    5548             : 
    5549             : #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
    5550             :     do { \
    5551             :         if (++(ptr) >= (end_ptr)) \
    5552             :             ereport(ERROR, \
    5553             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
    5554             :                      errmsg("unterminated format() type specifier"), \
    5555             :                      errhint("For a single \"%%\" use \"%%%%\"."))); \
    5556             :     } while (0)
    5557             : 
    5558             : /*
    5559             :  * Returns a formatted string
    5560             :  */
    5561             : Datum
    5562       25068 : text_format(PG_FUNCTION_ARGS)
    5563             : {
    5564             :     text       *fmt;
    5565             :     StringInfoData str;
    5566             :     const char *cp;
    5567             :     const char *start_ptr;
    5568             :     const char *end_ptr;
    5569             :     text       *result;
    5570             :     int         arg;
    5571             :     bool        funcvariadic;
    5572             :     int         nargs;
    5573       25068 :     Datum      *elements = NULL;
    5574       25068 :     bool       *nulls = NULL;
    5575       25068 :     Oid         element_type = InvalidOid;
    5576       25068 :     Oid         prev_type = InvalidOid;
    5577       25068 :     Oid         prev_width_type = InvalidOid;
    5578             :     FmgrInfo    typoutputfinfo;
    5579             :     FmgrInfo    typoutputinfo_width;
    5580             : 
    5581             :     /* When format string is null, immediately return null */
    5582       25068 :     if (PG_ARGISNULL(0))
    5583           6 :         PG_RETURN_NULL();
    5584             : 
    5585             :     /* If argument is marked VARIADIC, expand array into elements */
    5586       25062 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5587             :     {
    5588             :         ArrayType  *arr;
    5589             :         int16       elmlen;
    5590             :         bool        elmbyval;
    5591             :         char        elmalign;
    5592             :         int         nitems;
    5593             : 
    5594             :         /* Should have just the one argument */
    5595             :         Assert(PG_NARGS() == 2);
    5596             : 
    5597             :         /* If argument is NULL, we treat it as zero-length array */
    5598          48 :         if (PG_ARGISNULL(1))
    5599           6 :             nitems = 0;
    5600             :         else
    5601             :         {
    5602             :             /*
    5603             :              * Non-null argument had better be an array.  We assume that any
    5604             :              * call context that could let get_fn_expr_variadic return true
    5605             :              * will have checked that a VARIADIC-labeled parameter actually is
    5606             :              * an array.  So it should be okay to just Assert that it's an
    5607             :              * array rather than doing a full-fledged error check.
    5608             :              */
    5609             :             Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
    5610             : 
    5611             :             /* OK, safe to fetch the array value */
    5612          42 :             arr = PG_GETARG_ARRAYTYPE_P(1);
    5613             : 
    5614             :             /* Get info about array element type */
    5615          42 :             element_type = ARR_ELEMTYPE(arr);
    5616          42 :             get_typlenbyvalalign(element_type,
    5617             :                                  &elmlen, &elmbyval, &elmalign);
    5618             : 
    5619             :             /* Extract all array elements */
    5620          42 :             deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
    5621             :                               &elements, &nulls, &nitems);
    5622             :         }
    5623             : 
    5624          48 :         nargs = nitems + 1;
    5625          48 :         funcvariadic = true;
    5626             :     }
    5627             :     else
    5628             :     {
    5629             :         /* Non-variadic case, we'll process the arguments individually */
    5630       25014 :         nargs = PG_NARGS();
    5631       25014 :         funcvariadic = false;
    5632             :     }
    5633             : 
    5634             :     /* Setup for main loop. */
    5635       25062 :     fmt = PG_GETARG_TEXT_PP(0);
    5636       25062 :     start_ptr = VARDATA_ANY(fmt);
    5637       25062 :     end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
    5638       25062 :     initStringInfo(&str);
    5639       25062 :     arg = 1;                    /* next argument position to print */
    5640             : 
    5641             :     /* Scan format string, looking for conversion specifiers. */
    5642      727698 :     for (cp = start_ptr; cp < end_ptr; cp++)
    5643             :     {
    5644             :         int         argpos;
    5645             :         int         widthpos;
    5646             :         int         flags;
    5647             :         int         width;
    5648             :         Datum       value;
    5649             :         bool        isNull;
    5650             :         Oid         typid;
    5651             : 
    5652             :         /*
    5653             :          * If it's not the start of a conversion specifier, just copy it to
    5654             :          * the output buffer.
    5655             :          */
    5656      702696 :         if (*cp != '%')
    5657             :         {
    5658      643614 :             appendStringInfoCharMacro(&str, *cp);
    5659      643632 :             continue;
    5660             :         }
    5661             : 
    5662       59082 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5663             : 
    5664             :         /* Easy case: %% outputs a single % */
    5665       59082 :         if (*cp == '%')
    5666             :         {
    5667          18 :             appendStringInfoCharMacro(&str, *cp);
    5668          18 :             continue;
    5669             :         }
    5670             : 
    5671             :         /* Parse the optional portions of the format specifier */
    5672       59064 :         cp = text_format_parse_format(cp, end_ptr,
    5673             :                                       &argpos, &widthpos,
    5674             :                                       &flags, &width);
    5675             : 
    5676             :         /*
    5677             :          * Next we should see the main conversion specifier.  Whether or not
    5678             :          * an argument position was present, it's known that at least one
    5679             :          * character remains in the string at this point.  Experience suggests
    5680             :          * that it's worth checking that that character is one of the expected
    5681             :          * ones before we try to fetch arguments, so as to produce the least
    5682             :          * confusing response to a mis-formatted specifier.
    5683             :          */
    5684       59040 :         if (strchr("sIL", *cp) == NULL)
    5685           6 :             ereport(ERROR,
    5686             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5687             :                      errmsg("unrecognized format() type specifier \"%.*s\"",
    5688             :                             pg_mblen(cp), cp),
    5689             :                      errhint("For a single \"%%\" use \"%%%%\".")));
    5690             : 
    5691             :         /* If indirect width was specified, get its value */
    5692       59034 :         if (widthpos >= 0)
    5693             :         {
    5694             :             /* Collect the specified or next argument position */
    5695          42 :             if (widthpos > 0)
    5696          36 :                 arg = widthpos;
    5697          42 :             if (arg >= nargs)
    5698           0 :                 ereport(ERROR,
    5699             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5700             :                          errmsg("too few arguments for format()")));
    5701             : 
    5702             :             /* Get the value and type of the selected argument */
    5703          42 :             if (!funcvariadic)
    5704             :             {
    5705          42 :                 value = PG_GETARG_DATUM(arg);
    5706          42 :                 isNull = PG_ARGISNULL(arg);
    5707          42 :                 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5708             :             }
    5709             :             else
    5710             :             {
    5711           0 :                 value = elements[arg - 1];
    5712           0 :                 isNull = nulls[arg - 1];
    5713           0 :                 typid = element_type;
    5714             :             }
    5715          42 :             if (!OidIsValid(typid))
    5716           0 :                 elog(ERROR, "could not determine data type of format() input");
    5717             : 
    5718          42 :             arg++;
    5719             : 
    5720             :             /* We can treat NULL width the same as zero */
    5721          42 :             if (isNull)
    5722           6 :                 width = 0;
    5723          36 :             else if (typid == INT4OID)
    5724          36 :                 width = DatumGetInt32(value);
    5725           0 :             else if (typid == INT2OID)
    5726           0 :                 width = DatumGetInt16(value);
    5727             :             else
    5728             :             {
    5729             :                 /* For less-usual datatypes, convert to text then to int */
    5730             :                 char       *str;
    5731             : 
    5732           0 :                 if (typid != prev_width_type)
    5733             :                 {
    5734             :                     Oid         typoutputfunc;
    5735             :                     bool        typIsVarlena;
    5736             : 
    5737           0 :                     getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5738           0 :                     fmgr_info(typoutputfunc, &typoutputinfo_width);
    5739           0 :                     prev_width_type = typid;
    5740             :                 }
    5741             : 
    5742           0 :                 str = OutputFunctionCall(&typoutputinfo_width, value);
    5743             : 
    5744             :                 /* pg_strtoint32 will complain about bad data or overflow */
    5745           0 :                 width = pg_strtoint32(str);
    5746             : 
    5747           0 :                 pfree(str);
    5748             :             }
    5749             :         }
    5750             : 
    5751             :         /* Collect the specified or next argument position */
    5752       59034 :         if (argpos > 0)
    5753         132 :             arg = argpos;
    5754       59034 :         if (arg >= nargs)
    5755          24 :             ereport(ERROR,
    5756             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5757             :                      errmsg("too few arguments for format()")));
    5758             : 
    5759             :         /* Get the value and type of the selected argument */
    5760       59010 :         if (!funcvariadic)
    5761             :         {
    5762       57738 :             value = PG_GETARG_DATUM(arg);
    5763       57738 :             isNull = PG_ARGISNULL(arg);
    5764       57738 :             typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5765             :         }
    5766             :         else
    5767             :         {
    5768        1272 :             value = elements[arg - 1];
    5769        1272 :             isNull = nulls[arg - 1];
    5770        1272 :             typid = element_type;
    5771             :         }
    5772       59010 :         if (!OidIsValid(typid))
    5773           0 :             elog(ERROR, "could not determine data type of format() input");
    5774             : 
    5775       59010 :         arg++;
    5776             : 
    5777             :         /*
    5778             :          * Get the appropriate typOutput function, reusing previous one if
    5779             :          * same type as previous argument.  That's particularly useful in the
    5780             :          * variadic-array case, but often saves work even for ordinary calls.
    5781             :          */
    5782       59010 :         if (typid != prev_type)
    5783             :         {
    5784             :             Oid         typoutputfunc;
    5785             :             bool        typIsVarlena;
    5786             : 
    5787       27960 :             getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5788       27960 :             fmgr_info(typoutputfunc, &typoutputfinfo);
    5789       27960 :             prev_type = typid;
    5790             :         }
    5791             : 
    5792             :         /*
    5793             :          * And now we can format the value.
    5794             :          */
    5795       59010 :         switch (*cp)
    5796             :         {
    5797       59010 :             case 's':
    5798             :             case 'I':
    5799             :             case 'L':
    5800       59010 :                 text_format_string_conversion(&str, *cp, &typoutputfinfo,
    5801             :                                               value, isNull,
    5802             :                                               flags, width);
    5803       59004 :                 break;
    5804           0 :             default:
    5805             :                 /* should not get here, because of previous check */
    5806           0 :                 ereport(ERROR,
    5807             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5808             :                          errmsg("unrecognized format() type specifier \"%.*s\"",
    5809             :                                 pg_mblen(cp), cp),
    5810             :                          errhint("For a single \"%%\" use \"%%%%\".")));
    5811             :                 break;
    5812             :         }
    5813             :     }
    5814             : 
    5815             :     /* Don't need deconstruct_array results anymore. */
    5816       25002 :     if (elements != NULL)
    5817          42 :         pfree(elements);
    5818       25002 :     if (nulls != NULL)
    5819          42 :         pfree(nulls);
    5820             : 
    5821             :     /* Generate results. */
    5822       25002 :     result = cstring_to_text_with_len(str.data, str.len);
    5823       25002 :     pfree(str.data);
    5824             : 
    5825       25002 :     PG_RETURN_TEXT_P(result);
    5826             : }
    5827             : 
    5828             : /*
    5829             :  * Parse contiguous digits as a decimal number.
    5830             :  *
    5831             :  * Returns true if some digits could be parsed.
    5832             :  * The value is returned into *value, and *ptr is advanced to the next
    5833             :  * character to be parsed.
    5834             :  *
    5835             :  * Note parsing invariant: at least one character is known available before
    5836             :  * string end (end_ptr) at entry, and this is still true at exit.
    5837             :  */
    5838             : static bool
    5839      118092 : text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
    5840             : {
    5841      118092 :     bool        found = false;
    5842      118092 :     const char *cp = *ptr;
    5843      118092 :     int         val = 0;
    5844             : 
    5845      118404 :     while (*cp >= '0' && *cp <= '9')
    5846             :     {
    5847         318 :         int8        digit = (*cp - '0');
    5848             : 
    5849         318 :         if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
    5850         318 :             unlikely(pg_add_s32_overflow(val, digit, &val)))
    5851           0 :             ereport(ERROR,
    5852             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    5853             :                      errmsg("number is out of range")));
    5854         318 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5855         312 :         found = true;
    5856             :     }
    5857             : 
    5858      118086 :     *ptr = cp;
    5859      118086 :     *value = val;
    5860             : 
    5861      118086 :     return found;
    5862             : }
    5863             : 
    5864             : /*
    5865             :  * Parse a format specifier (generally following the SUS printf spec).
    5866             :  *
    5867             :  * We have already advanced over the initial '%', and we are looking for
    5868             :  * [argpos][flags][width]type (but the type character is not consumed here).
    5869             :  *
    5870             :  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
    5871             :  * Output parameters:
    5872             :  *  argpos: argument position for value to be printed.  -1 means unspecified.
    5873             :  *  widthpos: argument position for width.  Zero means the argument position
    5874             :  *          was unspecified (ie, take the next arg) and -1 means no width
    5875             :  *          argument (width was omitted or specified as a constant).
    5876             :  *  flags: bitmask of flags.
    5877             :  *  width: directly-specified width value.  Zero means the width was omitted
    5878             :  *          (note it's not necessary to distinguish this case from an explicit
    5879             :  *          zero width value).
    5880             :  *
    5881             :  * The function result is the next character position to be parsed, ie, the
    5882             :  * location where the type character is/should be.
    5883             :  *
    5884             :  * Note parsing invariant: at least one character is known available before
    5885             :  * string end (end_ptr) at entry, and this is still true at exit.
    5886             :  */
    5887             : static const char *
    5888       59064 : text_format_parse_format(const char *start_ptr, const char *end_ptr,
    5889             :                          int *argpos, int *widthpos,
    5890             :                          int *flags, int *width)
    5891             : {
    5892       59064 :     const char *cp = start_ptr;
    5893             :     int         n;
    5894             : 
    5895             :     /* set defaults for output parameters */
    5896       59064 :     *argpos = -1;
    5897       59064 :     *widthpos = -1;
    5898       59064 :     *flags = 0;
    5899       59064 :     *width = 0;
    5900             : 
    5901             :     /* try to identify first number */
    5902       59064 :     if (text_format_parse_digits(&cp, end_ptr, &n))
    5903             :     {
    5904         174 :         if (*cp != '$')
    5905             :         {
    5906             :             /* Must be just a width and a type, so we're done */
    5907          24 :             *width = n;
    5908          24 :             return cp;
    5909             :         }
    5910             :         /* The number was argument position */
    5911         150 :         *argpos = n;
    5912             :         /* Explicit 0 for argument index is immediately refused */
    5913         150 :         if (n == 0)
    5914           6 :             ereport(ERROR,
    5915             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5916             :                      errmsg("format specifies argument 0, but arguments are numbered from 1")));
    5917         144 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5918             :     }
    5919             : 
    5920             :     /* Handle flags (only minus is supported now) */
    5921       59058 :     while (*cp == '-')
    5922             :     {
    5923          30 :         *flags |= TEXT_FORMAT_FLAG_MINUS;
    5924          30 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5925             :     }
    5926             : 
    5927       59028 :     if (*cp == '*')
    5928             :     {
    5929             :         /* Handle indirect width */
    5930          48 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5931          48 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    5932             :         {
    5933             :             /* number in this position must be closed by $ */
    5934          42 :             if (*cp != '$')
    5935           0 :                 ereport(ERROR,
    5936             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5937             :                          errmsg("width argument position must be ended by \"$\"")));
    5938             :             /* The number was width argument position */
    5939          42 :             *widthpos = n;
    5940             :             /* Explicit 0 for argument index is immediately refused */
    5941          42 :             if (n == 0)
    5942           6 :                 ereport(ERROR,
    5943             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5944             :                          errmsg("format specifies argument 0, but arguments are numbered from 1")));
    5945          36 :             ADVANCE_PARSE_POINTER(cp, end_ptr);
    5946             :         }
    5947             :         else
    5948           6 :             *widthpos = 0;      /* width's argument position is unspecified */
    5949             :     }
    5950             :     else
    5951             :     {
    5952             :         /* Check for direct width specification */
    5953       58980 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    5954          30 :             *width = n;
    5955             :     }
    5956             : 
    5957             :     /* cp should now be pointing at type character */
    5958       59016 :     return cp;
    5959             : }
    5960             : 
    5961             : /*
    5962             :  * Format a %s, %I, or %L conversion
    5963             :  */
    5964             : static void
    5965       59010 : text_format_string_conversion(StringInfo buf, char conversion,
    5966             :                               FmgrInfo *typOutputInfo,
    5967             :                               Datum value, bool isNull,
    5968             :                               int flags, int width)
    5969             : {
    5970             :     char       *str;
    5971             : 
    5972             :     /* Handle NULL arguments before trying to stringify the value. */
    5973       59010 :     if (isNull)
    5974             :     {
    5975         306 :         if (conversion == 's')
    5976         234 :             text_format_append_string(buf, "", flags, width);
    5977          72 :         else if (conversion == 'L')
    5978          66 :             text_format_append_string(buf, "NULL", flags, width);
    5979           6 :         else if (conversion == 'I')
    5980           6 :             ereport(ERROR,
    5981             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    5982             :                      errmsg("null values cannot be formatted as an SQL identifier")));
    5983         300 :         return;
    5984             :     }
    5985             : 
    5986             :     /* Stringify. */
    5987       58704 :     str = OutputFunctionCall(typOutputInfo, value);
    5988             : 
    5989             :     /* Escape. */
    5990       58704 :     if (conversion == 'I')
    5991             :     {
    5992             :         /* quote_identifier may or may not allocate a new string. */
    5993        3106 :         text_format_append_string(buf, quote_identifier(str), flags, width);
    5994             :     }
    5995       55598 :     else if (conversion == 'L')
    5996             :     {
    5997        2578 :         char       *qstr = quote_literal_cstr(str);
    5998             : 
    5999        2578 :         text_format_append_string(buf, qstr, flags, width);
    6000             :         /* quote_literal_cstr() always allocates a new string */
    6001        2578 :         pfree(qstr);
    6002             :     }
    6003             :     else
    6004       53020 :         text_format_append_string(buf, str, flags, width);
    6005             : 
    6006             :     /* Cleanup. */
    6007       58704 :     pfree(str);
    6008             : }
    6009             : 
    6010             : /*
    6011             :  * Append str to buf, padding as directed by flags/width
    6012             :  */
    6013             : static void
    6014       59004 : text_format_append_string(StringInfo buf, const char *str,
    6015             :                           int flags, int width)
    6016             : {
    6017       59004 :     bool        align_to_left = false;
    6018             :     int         len;
    6019             : 
    6020             :     /* fast path for typical easy case */
    6021       59004 :     if (width == 0)
    6022             :     {
    6023       58920 :         appendStringInfoString(buf, str);
    6024       58920 :         return;
    6025             :     }
    6026             : 
    6027          84 :     if (width < 0)
    6028             :     {
    6029             :         /* Negative width: implicit '-' flag, then take absolute value */
    6030           6 :         align_to_left = true;
    6031             :         /* -INT_MIN is undefined */
    6032           6 :         if (width <= INT_MIN)
    6033           0 :             ereport(ERROR,
    6034             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    6035             :                      errmsg("number is out of range")));
    6036           6 :         width = -width;
    6037             :     }
    6038          78 :     else if (flags & TEXT_FORMAT_FLAG_MINUS)
    6039          24 :         align_to_left = true;
    6040             : 
    6041          84 :     len = pg_mbstrlen(str);
    6042          84 :     if (align_to_left)
    6043             :     {
    6044             :         /* left justify */
    6045          30 :         appendStringInfoString(buf, str);
    6046          30 :         if (len < width)
    6047          30 :             appendStringInfoSpaces(buf, width - len);
    6048             :     }
    6049             :     else
    6050             :     {
    6051             :         /* right justify */
    6052          54 :         if (len < width)
    6053          54 :             appendStringInfoSpaces(buf, width - len);
    6054          54 :         appendStringInfoString(buf, str);
    6055             :     }
    6056             : }
    6057             : 
    6058             : /*
    6059             :  * text_format_nv - nonvariadic wrapper for text_format function.
    6060             :  *
    6061             :  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
    6062             :  * which checks that all built-in functions that share the implementing C
    6063             :  * function take the same number of arguments.
    6064             :  */
    6065             : Datum
    6066          30 : text_format_nv(PG_FUNCTION_ARGS)
    6067             : {
    6068          30 :     return text_format(fcinfo);
    6069             : }
    6070             : 
    6071             : /*
    6072             :  * Helper function for Levenshtein distance functions. Faster than memcmp(),
    6073             :  * for this use case.
    6074             :  */
    6075             : static inline bool
    6076           0 : rest_of_char_same(const char *s1, const char *s2, int len)
    6077             : {
    6078           0 :     while (len > 0)
    6079             :     {
    6080           0 :         len--;
    6081           0 :         if (s1[len] != s2[len])
    6082           0 :             return false;
    6083             :     }
    6084           0 :     return true;
    6085             : }
    6086             : 
    6087             : /* Expand each Levenshtein distance variant */
    6088             : #include "levenshtein.c"
    6089             : #define LEVENSHTEIN_LESS_EQUAL
    6090             : #include "levenshtein.c"
    6091             : 
    6092             : 
    6093             : /*
    6094             :  * The following *ClosestMatch() functions can be used to determine whether a
    6095             :  * user-provided string resembles any known valid values, which is useful for
    6096             :  * providing hints in log messages, among other things.  Use these functions
    6097             :  * like so:
    6098             :  *
    6099             :  *      initClosestMatch(&state, source_string, max_distance);
    6100             :  *
    6101             :  *      for (int i = 0; i < num_valid_strings; i++)
    6102             :  *          updateClosestMatch(&state, valid_strings[i]);
    6103             :  *
    6104             :  *      closestMatch = getClosestMatch(&state);
    6105             :  */
    6106             : 
    6107             : /*
    6108             :  * Initialize the given state with the source string and maximum Levenshtein
    6109             :  * distance to consider.
    6110             :  */
    6111             : void
    6112          56 : initClosestMatch(ClosestMatchState *state, const char *source, int max_d)
    6113             : {
    6114             :     Assert(state);
    6115             :     Assert(max_d >= 0);
    6116             : 
    6117          56 :     state->source = source;
    6118          56 :     state->min_d = -1;
    6119          56 :     state->max_d = max_d;
    6120          56 :     state->match = NULL;
    6121          56 : }
    6122             : 
    6123             : /*
    6124             :  * If the candidate string is a closer match than the current one saved (or
    6125             :  * there is no match saved), save it as the closest match.
    6126             :  *
    6127             :  * If the source or candidate string is NULL, empty, or too long, this function
    6128             :  * takes no action.  Likewise, if the Levenshtein distance exceeds the maximum
    6129             :  * allowed or more than half the characters are different, no action is taken.
    6130             :  */
    6131             : void
    6132         334 : updateClosestMatch(ClosestMatchState *state, const char *candidate)
    6133             : {
    6134             :     int         dist;
    6135             : 
    6136             :     Assert(state);
    6137             : 
    6138         334 :     if (state->source == NULL || state->source[0] == '\0' ||
    6139         334 :         candidate == NULL || candidate[0] == '\0')
    6140           0 :         return;
    6141             : 
    6142             :     /*
    6143             :      * To avoid ERROR-ing, we check the lengths here instead of setting
    6144             :      * 'trusted' to false in the call to varstr_levenshtein_less_equal().
    6145             :      */
    6146         334 :     if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN ||
    6147         334 :         strlen(candidate) > MAX_LEVENSHTEIN_STRLEN)
    6148           0 :         return;
    6149             : 
    6150         334 :     dist = varstr_levenshtein_less_equal(state->source, strlen(state->source),
    6151         334 :                                          candidate, strlen(candidate), 1, 1, 1,
    6152             :                                          state->max_d, true);
    6153         334 :     if (dist <= state->max_d &&
    6154          56 :         dist <= strlen(state->source) / 2 &&
    6155          14 :         (state->min_d == -1 || dist < state->min_d))
    6156             :     {
    6157          14 :         state->min_d = dist;
    6158          14 :         state->match = candidate;
    6159             :     }
    6160             : }
    6161             : 
    6162             : /*
    6163             :  * Return the closest match.  If no suitable candidates were provided via
    6164             :  * updateClosestMatch(), return NULL.
    6165             :  */
    6166             : const char *
    6167          56 : getClosestMatch(ClosestMatchState *state)
    6168             : {
    6169             :     Assert(state);
    6170             : 
    6171          56 :     return state->match;
    6172             : }
    6173             : 
    6174             : 
    6175             : /*
    6176             :  * Unicode support
    6177             :  */
    6178             : 
    6179             : static UnicodeNormalizationForm
    6180         186 : unicode_norm_form_from_string(const char *formstr)
    6181             : {
    6182         186 :     UnicodeNormalizationForm form = -1;
    6183             : 
    6184             :     /*
    6185             :      * Might as well check this while we're here.
    6186             :      */
    6187         186 :     if (GetDatabaseEncoding() != PG_UTF8)
    6188           0 :         ereport(ERROR,
    6189             :                 (errcode(ERRCODE_SYNTAX_ERROR),
    6190             :                  errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
    6191             : 
    6192         186 :     if (pg_strcasecmp(formstr, "NFC") == 0)
    6193          66 :         form = UNICODE_NFC;
    6194         120 :     else if (pg_strcasecmp(formstr, "NFD") == 0)
    6195          36 :         form = UNICODE_NFD;
    6196          84 :     else if (pg_strcasecmp(formstr, "NFKC") == 0)
    6197          36 :         form = UNICODE_NFKC;
    6198          48 :     else if (pg_strcasecmp(formstr, "NFKD") == 0)
    6199          36 :         form = UNICODE_NFKD;
    6200             :     else
    6201          12 :         ereport(ERROR,
    6202             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6203             :                  errmsg("invalid normalization form: %s", formstr)));
    6204             : 
    6205         174 :     return form;
    6206             : }
    6207             : 
    6208             : Datum
    6209          48 : unicode_normalize_func(PG_FUNCTION_ARGS)
    6210             : {
    6211          48 :     text       *input = PG_GETARG_TEXT_PP(0);
    6212          48 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
    6213             :     UnicodeNormalizationForm form;
    6214             :     int         size;
    6215             :     pg_wchar   *input_chars;
    6216             :     pg_wchar   *output_chars;
    6217             :     unsigned char *p;
    6218             :     text       *result;
    6219             :     int         i;
    6220             : 
    6221          48 :     form = unicode_norm_form_from_string(formstr);
    6222             : 
    6223             :     /* convert to pg_wchar */
    6224          42 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6225          42 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
    6226          42 :     p = (unsigned char *) VARDATA_ANY(input);
    6227         168 :     for (i = 0; i < size; i++)
    6228             :     {
    6229         126 :         input_chars[i] = utf8_to_unicode(p);
    6230         126 :         p += pg_utf_mblen(p);
    6231             :     }
    6232          42 :     input_chars[i] = (pg_wchar) '\0';
    6233             :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
    6234             : 
    6235             :     /* action */
    6236          42 :     output_chars = unicode_normalize(form, input_chars);
    6237             : 
    6238             :     /* convert back to UTF-8 string */
    6239          42 :     size = 0;
    6240         162 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6241             :     {
    6242             :         unsigned char buf[4];
    6243             : 
    6244         120 :         unicode_to_utf8(*wp, buf);
    6245         120 :         size += pg_utf_mblen(buf);
    6246             :     }
    6247             : 
    6248          42 :     result = palloc(size + VARHDRSZ);
    6249          42 :     SET_VARSIZE(result, size + VARHDRSZ);
    6250             : 
    6251          42 :     p = (unsigned char *) VARDATA_ANY(result);
    6252         162 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6253             :     {
    6254         120 :         unicode_to_utf8(*wp, p);
    6255         120 :         p += pg_utf_mblen(p);
    6256             :     }
    6257             :     Assert((char *) p == (char *) result + size + VARHDRSZ);
    6258             : 
    6259          42 :     PG_RETURN_TEXT_P(result);
    6260             : }
    6261             : 
    6262             : /*
    6263             :  * Check whether the string is in the specified Unicode normalization form.
    6264             :  *
    6265             :  * This is done by converting the string to the specified normal form and then
    6266             :  * comparing that to the original string.  To speed that up, we also apply the
    6267             :  * "quick check" algorithm specified in UAX #15, which can give a yes or no
    6268             :  * answer for many strings by just scanning the string once.
    6269             :  *
    6270             :  * This function should generally be optimized for the case where the string
    6271             :  * is in fact normalized.  In that case, we'll end up looking at the entire
    6272             :  * string, so it's probably not worth doing any incremental conversion etc.
    6273             :  */
    6274             : Datum
    6275         138 : unicode_is_normalized(PG_FUNCTION_ARGS)
    6276             : {
    6277         138 :     text       *input = PG_GETARG_TEXT_PP(0);
    6278         138 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
    6279             :     UnicodeNormalizationForm form;
    6280             :     int         size;
    6281             :     pg_wchar   *input_chars;
    6282             :     pg_wchar   *output_chars;
    6283             :     unsigned char *p;
    6284             :     int         i;
    6285             :     UnicodeNormalizationQC quickcheck;
    6286             :     int         output_size;
    6287             :     bool        result;
    6288             : 
    6289         138 :     form = unicode_norm_form_from_string(formstr);
    6290             : 
    6291             :     /* convert to pg_wchar */
    6292         132 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6293         132 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
    6294         132 :     p = (unsigned char *) VARDATA_ANY(input);
    6295         504 :     for (i = 0; i < size; i++)
    6296             :     {
    6297         372 :         input_chars[i] = utf8_to_unicode(p);
    6298         372 :         p += pg_utf_mblen(p);
    6299             :     }
    6300         132 :     input_chars[i] = (pg_wchar) '\0';
    6301             :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
    6302             : 
    6303             :     /* quick check (see UAX #15) */
    6304         132 :     quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
    6305         132 :     if (quickcheck == UNICODE_NORM_QC_YES)
    6306          42 :         PG_RETURN_BOOL(true);
    6307          90 :     else if (quickcheck == UNICODE_NORM_QC_NO)
    6308          12 :         PG_RETURN_BOOL(false);
    6309             : 
    6310             :     /* normalize and compare with original */
    6311          78 :     output_chars = unicode_normalize(form, input_chars);
    6312             : 
    6313          78 :     output_size = 0;
    6314         324 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6315         246 :         output_size++;
    6316             : 
    6317         114 :     result = (size == output_size) &&
    6318          36 :         (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
    6319             : 
    6320          78 :     PG_RETURN_BOOL(result);
    6321             : }
    6322             : 
    6323             : /*
    6324             :  * Check if first n chars are hexadecimal digits
    6325             :  */
    6326             : static bool
    6327         156 : isxdigits_n(const char *instr, size_t n)
    6328             : {
    6329         660 :     for (size_t i = 0; i < n; i++)
    6330         570 :         if (!isxdigit((unsigned char) instr[i]))
    6331          66 :             return false;
    6332             : 
    6333          90 :     return true;
    6334             : }
    6335             : 
    6336             : static unsigned int
    6337         504 : hexval(unsigned char c)
    6338             : {
    6339         504 :     if (c >= '0' && c <= '9')
    6340         384 :         return c - '0';
    6341         120 :     if (c >= 'a' && c <= 'f')
    6342          60 :         return c - 'a' + 0xA;
    6343          60 :     if (c >= 'A' && c <= 'F')
    6344          60 :         return c - 'A' + 0xA;
    6345           0 :     elog(ERROR, "invalid hexadecimal digit");
    6346             :     return 0;                   /* not reached */
    6347             : }
    6348             : 
    6349             : /*
    6350             :  * Translate string with hexadecimal digits to number
    6351             :  */
    6352             : static unsigned int
    6353          90 : hexval_n(const char *instr, size_t n)
    6354             : {
    6355          90 :     unsigned int result = 0;
    6356             : 
    6357         594 :     for (size_t i = 0; i < n; i++)
    6358         504 :         result += hexval(instr[i]) << (4 * (n - i - 1));
    6359             : 
    6360          90 :     return result;
    6361             : }
    6362             : 
    6363             : /*
    6364             :  * Replaces Unicode escape sequences by Unicode characters
    6365             :  */
    6366             : Datum
    6367          66 : unistr(PG_FUNCTION_ARGS)
    6368             : {
    6369          66 :     text       *input_text = PG_GETARG_TEXT_PP(0);
    6370             :     char       *instr;
    6371             :     int         len;
    6372             :     StringInfoData str;
    6373             :     text       *result;
    6374          66 :     pg_wchar    pair_first = 0;
    6375             :     char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
    6376             : 
    6377          66 :     instr = VARDATA_ANY(input_text);
    6378          66 :     len = VARSIZE_ANY_EXHDR(input_text);
    6379             : 
    6380          66 :     initStringInfo(&str);
    6381             : 
    6382         510 :     while (len > 0)
    6383             :     {
    6384         486 :         if (instr[0] == '\\')
    6385             :         {
    6386         102 :             if (len >= 2 &&
    6387         102 :                 instr[1] == '\\')
    6388             :             {
    6389           6 :                 if (pair_first)
    6390           0 :                     goto invalid_pair;
    6391           6 :                 appendStringInfoChar(&str, '\\');
    6392           6 :                 instr += 2;
    6393           6 :                 len -= 2;
    6394             :             }
    6395          96 :             else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
    6396          66 :                      (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
    6397          30 :             {
    6398             :                 pg_wchar    unicode;
    6399          42 :                 int         offset = instr[1] == 'u' ? 2 : 1;
    6400             : 
    6401          42 :                 unicode = hexval_n(instr + offset, 4);
    6402             : 
    6403          42 :                 if (!is_valid_unicode_codepoint(unicode))
    6404           0 :                     ereport(ERROR,
    6405             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6406             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6407             : 
    6408          42 :                 if (pair_first)
    6409             :                 {
    6410          12 :                     if (is_utf16_surrogate_second(unicode))
    6411             :                     {
    6412           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6413           0 :                         pair_first = 0;
    6414             :                     }
    6415             :                     else
    6416          12 :                         goto invalid_pair;
    6417             :                 }
    6418          30 :                 else if (is_utf16_surrogate_second(unicode))
    6419           0 :                     goto invalid_pair;
    6420             : 
    6421          30 :                 if (is_utf16_surrogate_first(unicode))
    6422          18 :                     pair_first = unicode;
    6423             :                 else
    6424             :                 {
    6425          12 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6426          12 :                     appendStringInfoString(&str, cbuf);
    6427             :                 }
    6428             : 
    6429          30 :                 instr += 4 + offset;
    6430          30 :                 len -= 4 + offset;
    6431             :             }
    6432          54 :             else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
    6433          12 :             {
    6434             :                 pg_wchar    unicode;
    6435             : 
    6436          24 :                 unicode = hexval_n(instr + 2, 6);
    6437             : 
    6438          24 :                 if (!is_valid_unicode_codepoint(unicode))
    6439           6 :                     ereport(ERROR,
    6440             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6441             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6442             : 
    6443          18 :                 if (pair_first)
    6444             :                 {
    6445           6 :                     if (is_utf16_surrogate_second(unicode))
    6446             :                     {
    6447           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6448           0 :                         pair_first = 0;
    6449             :                     }
    6450             :                     else
    6451           6 :                         goto invalid_pair;
    6452             :                 }
    6453          12 :                 else if (is_utf16_surrogate_second(unicode))
    6454           0 :                     goto invalid_pair;
    6455             : 
    6456          12 :                 if (is_utf16_surrogate_first(unicode))
    6457           6 :                     pair_first = unicode;
    6458             :                 else
    6459             :                 {
    6460           6 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6461           6 :                     appendStringInfoString(&str, cbuf);
    6462             :                 }
    6463             : 
    6464          12 :                 instr += 8;
    6465          12 :                 len -= 8;
    6466             :             }
    6467          30 :             else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
    6468          12 :             {
    6469             :                 pg_wchar    unicode;
    6470             : 
    6471          24 :                 unicode = hexval_n(instr + 2, 8);
    6472             : 
    6473          24 :                 if (!is_valid_unicode_codepoint(unicode))
    6474           6 :                     ereport(ERROR,
    6475             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6476             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6477             : 
    6478          18 :                 if (pair_first)
    6479             :                 {
    6480           6 :                     if (is_utf16_surrogate_second(unicode))
    6481             :                     {
    6482           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6483           0 :                         pair_first = 0;
    6484             :                     }
    6485             :                     else
    6486           6 :                         goto invalid_pair;
    6487             :                 }
    6488          12 :                 else if (is_utf16_surrogate_second(unicode))
    6489           0 :                     goto invalid_pair;
    6490             : 
    6491          12 :                 if (is_utf16_surrogate_first(unicode))
    6492           6 :                     pair_first = unicode;
    6493             :                 else
    6494             :                 {
    6495           6 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6496           6 :                     appendStringInfoString(&str, cbuf);
    6497             :                 }
    6498             : 
    6499          12 :                 instr += 10;
    6500          12 :                 len -= 10;
    6501             :             }
    6502             :             else
    6503           6 :                 ereport(ERROR,
    6504             :                         (errcode(ERRCODE_SYNTAX_ERROR),
    6505             :                          errmsg("invalid Unicode escape"),
    6506             :                          errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX.")));
    6507             :         }
    6508             :         else
    6509             :         {
    6510         384 :             if (pair_first)
    6511           0 :                 goto invalid_pair;
    6512             : 
    6513         384 :             appendStringInfoChar(&str, *instr++);
    6514         384 :             len--;
    6515             :         }
    6516             :     }
    6517             : 
    6518             :     /* unfinished surrogate pair? */
    6519          24 :     if (pair_first)
    6520           6 :         goto invalid_pair;
    6521             : 
    6522          18 :     result = cstring_to_text_with_len(str.data, str.len);
    6523          18 :     pfree(str.data);
    6524             : 
    6525          18 :     PG_RETURN_TEXT_P(result);
    6526             : 
    6527          30 : invalid_pair:
    6528          30 :     ereport(ERROR,
    6529             :             (errcode(ERRCODE_SYNTAX_ERROR),
    6530             :              errmsg("invalid Unicode surrogate pair")));
    6531             :     PG_RETURN_NULL();           /* keep compiler quiet */
    6532             : }

Generated by: LCOV version 1.14