LCOV - code coverage report
Current view: top level - src/backend/utils/adt - varlena.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 1990 2213 89.9 %
Date: 2024-11-21 08:14:44 Functions: 161 174 92.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * varlena.c
       4             :  *    Functions for the variable-length built-in types.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/utils/adt/varlena.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <ctype.h>
      18             : #include <limits.h>
      19             : 
      20             : #include "access/detoast.h"
      21             : #include "access/toast_compression.h"
      22             : #include "catalog/pg_collation.h"
      23             : #include "catalog/pg_type.h"
      24             : #include "common/hashfn.h"
      25             : #include "common/int.h"
      26             : #include "common/unicode_category.h"
      27             : #include "common/unicode_norm.h"
      28             : #include "common/unicode_version.h"
      29             : #include "funcapi.h"
      30             : #include "lib/hyperloglog.h"
      31             : #include "libpq/pqformat.h"
      32             : #include "miscadmin.h"
      33             : #include "nodes/execnodes.h"
      34             : #include "parser/scansup.h"
      35             : #include "port/pg_bswap.h"
      36             : #include "regex/regex.h"
      37             : #include "utils/builtins.h"
      38             : #include "utils/bytea.h"
      39             : #include "utils/guc.h"
      40             : #include "utils/lsyscache.h"
      41             : #include "utils/memutils.h"
      42             : #include "utils/pg_locale.h"
      43             : #include "utils/sortsupport.h"
      44             : #include "utils/varlena.h"
      45             : 
      46             : 
      47             : /* GUC variable */
      48             : int         bytea_output = BYTEA_OUTPUT_HEX;
      49             : 
      50             : typedef struct varlena VarString;
      51             : 
      52             : /*
      53             :  * State for text_position_* functions.
      54             :  */
      55             : typedef struct
      56             : {
      57             :     bool        is_multibyte_char_in_char;  /* need to check char boundaries? */
      58             : 
      59             :     char       *str1;           /* haystack string */
      60             :     char       *str2;           /* needle string */
      61             :     int         len1;           /* string lengths in bytes */
      62             :     int         len2;
      63             : 
      64             :     /* Skip table for Boyer-Moore-Horspool search algorithm: */
      65             :     int         skiptablemask;  /* mask for ANDing with skiptable subscripts */
      66             :     int         skiptable[256]; /* skip distance for given mismatched char */
      67             : 
      68             :     char       *last_match;     /* pointer to last match in 'str1' */
      69             : 
      70             :     /*
      71             :      * Sometimes we need to convert the byte position of a match to a
      72             :      * character position.  These store the last position that was converted,
      73             :      * so that on the next call, we can continue from that point, rather than
      74             :      * count characters from the very beginning.
      75             :      */
      76             :     char       *refpoint;       /* pointer within original haystack string */
      77             :     int         refpos;         /* 0-based character offset of the same point */
      78             : } TextPositionState;
      79             : 
      80             : typedef struct
      81             : {
      82             :     char       *buf1;           /* 1st string, or abbreviation original string
      83             :                                  * buf */
      84             :     char       *buf2;           /* 2nd string, or abbreviation strxfrm() buf */
      85             :     int         buflen1;        /* Allocated length of buf1 */
      86             :     int         buflen2;        /* Allocated length of buf2 */
      87             :     int         last_len1;      /* Length of last buf1 string/strxfrm() input */
      88             :     int         last_len2;      /* Length of last buf2 string/strxfrm() blob */
      89             :     int         last_returned;  /* Last comparison result (cache) */
      90             :     bool        cache_blob;     /* Does buf2 contain strxfrm() blob, etc? */
      91             :     bool        collate_c;
      92             :     Oid         typid;          /* Actual datatype (text/bpchar/bytea/name) */
      93             :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
      94             :     hyperLogLogState full_card; /* Full key cardinality state */
      95             :     double      prop_card;      /* Required cardinality proportion */
      96             :     pg_locale_t locale;
      97             : } VarStringSortSupport;
      98             : 
      99             : /*
     100             :  * Output data for split_text(): we output either to an array or a table.
     101             :  * tupstore and tupdesc must be set up in advance to output to a table.
     102             :  */
     103             : typedef struct
     104             : {
     105             :     ArrayBuildState *astate;
     106             :     Tuplestorestate *tupstore;
     107             :     TupleDesc   tupdesc;
     108             : } SplitTextOutputData;
     109             : 
     110             : /*
     111             :  * This should be large enough that most strings will fit, but small enough
     112             :  * that we feel comfortable putting it on the stack
     113             :  */
     114             : #define TEXTBUFLEN      1024
     115             : 
     116             : #define DatumGetVarStringP(X)       ((VarString *) PG_DETOAST_DATUM(X))
     117             : #define DatumGetVarStringPP(X)      ((VarString *) PG_DETOAST_DATUM_PACKED(X))
     118             : 
     119             : static int  varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
     120             : static int  bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
     121             : static int  namefastcmp_c(Datum x, Datum y, SortSupport ssup);
     122             : static int  varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
     123             : static int  namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
     124             : static int  varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
     125             : static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
     126             : static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
     127             : static int32 text_length(Datum str);
     128             : static text *text_catenate(text *t1, text *t2);
     129             : static text *text_substring(Datum str,
     130             :                             int32 start,
     131             :                             int32 length,
     132             :                             bool length_not_specified);
     133             : static text *text_overlay(text *t1, text *t2, int sp, int sl);
     134             : static int  text_position(text *t1, text *t2, Oid collid);
     135             : static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
     136             : static bool text_position_next(TextPositionState *state);
     137             : static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
     138             : static char *text_position_get_match_ptr(TextPositionState *state);
     139             : static int  text_position_get_match_pos(TextPositionState *state);
     140             : static void text_position_cleanup(TextPositionState *state);
     141             : static void check_collation_set(Oid collid);
     142             : static int  text_cmp(text *arg1, text *arg2, Oid collid);
     143             : static bytea *bytea_catenate(bytea *t1, bytea *t2);
     144             : static bytea *bytea_substring(Datum str,
     145             :                               int S,
     146             :                               int L,
     147             :                               bool length_not_specified);
     148             : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
     149             : static void appendStringInfoText(StringInfo str, const text *t);
     150             : static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
     151             : static void split_text_accum_result(SplitTextOutputData *tstate,
     152             :                                     text *field_value,
     153             :                                     text *null_string,
     154             :                                     Oid collation);
     155             : static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
     156             :                                     const char *fldsep, const char *null_string);
     157             : static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
     158             : static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
     159             :                                      int *value);
     160             : static const char *text_format_parse_format(const char *start_ptr,
     161             :                                             const char *end_ptr,
     162             :                                             int *argpos, int *widthpos,
     163             :                                             int *flags, int *width);
     164             : static void text_format_string_conversion(StringInfo buf, char conversion,
     165             :                                           FmgrInfo *typOutputInfo,
     166             :                                           Datum value, bool isNull,
     167             :                                           int flags, int width);
     168             : static void text_format_append_string(StringInfo buf, const char *str,
     169             :                                       int flags, int width);
     170             : 
     171             : 
     172             : /*****************************************************************************
     173             :  *   CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                          *
     174             :  *****************************************************************************/
     175             : 
     176             : /*
     177             :  * cstring_to_text
     178             :  *
     179             :  * Create a text value from a null-terminated C string.
     180             :  *
     181             :  * The new text value is freshly palloc'd with a full-size VARHDR.
     182             :  */
     183             : text *
     184    22523052 : cstring_to_text(const char *s)
     185             : {
     186    22523052 :     return cstring_to_text_with_len(s, strlen(s));
     187             : }
     188             : 
     189             : /*
     190             :  * cstring_to_text_with_len
     191             :  *
     192             :  * Same as cstring_to_text except the caller specifies the string length;
     193             :  * the string need not be null_terminated.
     194             :  */
     195             : text *
     196    25145542 : cstring_to_text_with_len(const char *s, int len)
     197             : {
     198    25145542 :     text       *result = (text *) palloc(len + VARHDRSZ);
     199             : 
     200    25145542 :     SET_VARSIZE(result, len + VARHDRSZ);
     201    25145542 :     memcpy(VARDATA(result), s, len);
     202             : 
     203    25145542 :     return result;
     204             : }
     205             : 
     206             : /*
     207             :  * text_to_cstring
     208             :  *
     209             :  * Create a palloc'd, null-terminated C string from a text value.
     210             :  *
     211             :  * We support being passed a compressed or toasted text value.
     212             :  * This is a bit bogus since such values shouldn't really be referred to as
     213             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     214             :  * case here, we'd need another routine that did, anyway.
     215             :  */
     216             : char *
     217    15062244 : text_to_cstring(const text *t)
     218             : {
     219             :     /* must cast away the const, unfortunately */
     220    15062244 :     text       *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
     221    15062244 :     int         len = VARSIZE_ANY_EXHDR(tunpacked);
     222             :     char       *result;
     223             : 
     224    15062244 :     result = (char *) palloc(len + 1);
     225    15062244 :     memcpy(result, VARDATA_ANY(tunpacked), len);
     226    15062244 :     result[len] = '\0';
     227             : 
     228    15062244 :     if (tunpacked != t)
     229       39964 :         pfree(tunpacked);
     230             : 
     231    15062244 :     return result;
     232             : }
     233             : 
     234             : /*
     235             :  * text_to_cstring_buffer
     236             :  *
     237             :  * Copy a text value into a caller-supplied buffer of size dst_len.
     238             :  *
     239             :  * The text string is truncated if necessary to fit.  The result is
     240             :  * guaranteed null-terminated (unless dst_len == 0).
     241             :  *
     242             :  * We support being passed a compressed or toasted text value.
     243             :  * This is a bit bogus since such values shouldn't really be referred to as
     244             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     245             :  * case here, we'd need another routine that did, anyway.
     246             :  */
     247             : void
     248         952 : text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
     249             : {
     250             :     /* must cast away the const, unfortunately */
     251         952 :     text       *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
     252         952 :     size_t      src_len = VARSIZE_ANY_EXHDR(srcunpacked);
     253             : 
     254         952 :     if (dst_len > 0)
     255             :     {
     256         952 :         dst_len--;
     257         952 :         if (dst_len >= src_len)
     258         952 :             dst_len = src_len;
     259             :         else                    /* ensure truncation is encoding-safe */
     260           0 :             dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
     261         952 :         memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
     262         952 :         dst[dst_len] = '\0';
     263             :     }
     264             : 
     265         952 :     if (srcunpacked != src)
     266           0 :         pfree(srcunpacked);
     267         952 : }
     268             : 
     269             : 
     270             : /*****************************************************************************
     271             :  *   USER I/O ROUTINES                                                       *
     272             :  *****************************************************************************/
     273             : 
     274             : 
     275             : #define VAL(CH)         ((CH) - '0')
     276             : #define DIG(VAL)        ((VAL) + '0')
     277             : 
     278             : /*
     279             :  *      byteain         - converts from printable representation of byte array
     280             :  *
     281             :  *      Non-printable characters must be passed as '\nnn' (octal) and are
     282             :  *      converted to internal form.  '\' must be passed as '\\'.
     283             :  *      ereport(ERROR, ...) if bad form.
     284             :  *
     285             :  *      BUGS:
     286             :  *              The input is scanned twice.
     287             :  *              The error checking of input is minimal.
     288             :  */
     289             : Datum
     290      985314 : byteain(PG_FUNCTION_ARGS)
     291             : {
     292      985314 :     char       *inputText = PG_GETARG_CSTRING(0);
     293      985314 :     Node       *escontext = fcinfo->context;
     294             :     char       *tp;
     295             :     char       *rp;
     296             :     int         bc;
     297             :     bytea      *result;
     298             : 
     299             :     /* Recognize hex input */
     300      985314 :     if (inputText[0] == '\\' && inputText[1] == 'x')
     301             :     {
     302      111132 :         size_t      len = strlen(inputText);
     303             : 
     304      111132 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
     305      111132 :         result = palloc(bc);
     306      111132 :         bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
     307             :                              escontext);
     308      111120 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
     309             : 
     310      111120 :         PG_RETURN_BYTEA_P(result);
     311             :     }
     312             : 
     313             :     /* Else, it's the traditional escaped style */
     314     8101596 :     for (bc = 0, tp = inputText; *tp != '\0'; bc++)
     315             :     {
     316     7227426 :         if (tp[0] != '\\')
     317     7226410 :             tp++;
     318        1016 :         else if ((tp[0] == '\\') &&
     319        1016 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     320        1004 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     321        1004 :                  (tp[3] >= '0' && tp[3] <= '7'))
     322        1004 :             tp += 4;
     323          12 :         else if ((tp[0] == '\\') &&
     324          12 :                  (tp[1] == '\\'))
     325           0 :             tp += 2;
     326             :         else
     327             :         {
     328             :             /*
     329             :              * one backslash, not followed by another or ### valid octal
     330             :              */
     331          12 :             ereturn(escontext, (Datum) 0,
     332             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     333             :                      errmsg("invalid input syntax for type %s", "bytea")));
     334             :         }
     335             :     }
     336             : 
     337      874170 :     bc += VARHDRSZ;
     338             : 
     339      874170 :     result = (bytea *) palloc(bc);
     340      874170 :     SET_VARSIZE(result, bc);
     341             : 
     342      874170 :     tp = inputText;
     343      874170 :     rp = VARDATA(result);
     344     8101554 :     while (*tp != '\0')
     345             :     {
     346     7227384 :         if (tp[0] != '\\')
     347     7226380 :             *rp++ = *tp++;
     348        1004 :         else if ((tp[0] == '\\') &&
     349        1004 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     350        1004 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     351        1004 :                  (tp[3] >= '0' && tp[3] <= '7'))
     352             :         {
     353        1004 :             bc = VAL(tp[1]);
     354        1004 :             bc <<= 3;
     355        1004 :             bc += VAL(tp[2]);
     356        1004 :             bc <<= 3;
     357        1004 :             *rp++ = bc + VAL(tp[3]);
     358             : 
     359        1004 :             tp += 4;
     360             :         }
     361           0 :         else if ((tp[0] == '\\') &&
     362           0 :                  (tp[1] == '\\'))
     363             :         {
     364           0 :             *rp++ = '\\';
     365           0 :             tp += 2;
     366             :         }
     367             :         else
     368             :         {
     369             :             /*
     370             :              * We should never get here. The first pass should not allow it.
     371             :              */
     372           0 :             ereturn(escontext, (Datum) 0,
     373             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     374             :                      errmsg("invalid input syntax for type %s", "bytea")));
     375             :         }
     376             :     }
     377             : 
     378      874170 :     PG_RETURN_BYTEA_P(result);
     379             : }
     380             : 
     381             : /*
     382             :  *      byteaout        - converts to printable representation of byte array
     383             :  *
     384             :  *      In the traditional escaped format, non-printable characters are
     385             :  *      printed as '\nnn' (octal) and '\' as '\\'.
     386             :  */
     387             : Datum
     388      159836 : byteaout(PG_FUNCTION_ARGS)
     389             : {
     390      159836 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
     391             :     char       *result;
     392             :     char       *rp;
     393             : 
     394      159836 :     if (bytea_output == BYTEA_OUTPUT_HEX)
     395             :     {
     396             :         /* Print hex format */
     397      159452 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
     398      159452 :         *rp++ = '\\';
     399      159452 :         *rp++ = 'x';
     400      159452 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
     401             :     }
     402         384 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
     403             :     {
     404             :         /* Print traditional escaped format */
     405             :         char       *vp;
     406             :         uint64      len;
     407             :         int         i;
     408             : 
     409         384 :         len = 1;                /* empty string has 1 char */
     410         384 :         vp = VARDATA_ANY(vlena);
     411      217660 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     412             :         {
     413      217276 :             if (*vp == '\\')
     414           0 :                 len += 2;
     415      217276 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     416         498 :                 len += 4;
     417             :             else
     418      216778 :                 len++;
     419             :         }
     420             : 
     421             :         /*
     422             :          * In principle len can't overflow uint32 if the input fit in 1GB, but
     423             :          * for safety let's check rather than relying on palloc's internal
     424             :          * check.
     425             :          */
     426         384 :         if (len > MaxAllocSize)
     427           0 :             ereport(ERROR,
     428             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     429             :                      errmsg_internal("result of bytea output conversion is too large")));
     430         384 :         rp = result = (char *) palloc(len);
     431             : 
     432         384 :         vp = VARDATA_ANY(vlena);
     433      217660 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     434             :         {
     435      217276 :             if (*vp == '\\')
     436             :             {
     437           0 :                 *rp++ = '\\';
     438           0 :                 *rp++ = '\\';
     439             :             }
     440      217276 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     441         498 :             {
     442             :                 int         val;    /* holds unprintable chars */
     443             : 
     444         498 :                 val = *vp;
     445         498 :                 rp[0] = '\\';
     446         498 :                 rp[3] = DIG(val & 07);
     447         498 :                 val >>= 3;
     448         498 :                 rp[2] = DIG(val & 07);
     449         498 :                 val >>= 3;
     450         498 :                 rp[1] = DIG(val & 03);
     451         498 :                 rp += 4;
     452             :             }
     453             :             else
     454      216778 :                 *rp++ = *vp;
     455             :         }
     456             :     }
     457             :     else
     458             :     {
     459           0 :         elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
     460             :              bytea_output);
     461             :         rp = result = NULL;     /* keep compiler quiet */
     462             :     }
     463      159836 :     *rp = '\0';
     464      159836 :     PG_RETURN_CSTRING(result);
     465             : }
     466             : 
     467             : /*
     468             :  *      bytearecv           - converts external binary format to bytea
     469             :  */
     470             : Datum
     471      107710 : bytearecv(PG_FUNCTION_ARGS)
     472             : {
     473      107710 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     474             :     bytea      *result;
     475             :     int         nbytes;
     476             : 
     477      107710 :     nbytes = buf->len - buf->cursor;
     478      107710 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
     479      107710 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
     480      107710 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
     481      107710 :     PG_RETURN_BYTEA_P(result);
     482             : }
     483             : 
     484             : /*
     485             :  *      byteasend           - converts bytea to binary format
     486             :  *
     487             :  * This is a special case: just copy the input...
     488             :  */
     489             : Datum
     490       68960 : byteasend(PG_FUNCTION_ARGS)
     491             : {
     492       68960 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
     493             : 
     494       68960 :     PG_RETURN_BYTEA_P(vlena);
     495             : }
     496             : 
     497             : Datum
     498      258774 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
     499             : {
     500             :     StringInfo  state;
     501             : 
     502      258774 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     503             : 
     504             :     /* Append the value unless null, preceding it with the delimiter. */
     505      258774 :     if (!PG_ARGISNULL(1))
     506             :     {
     507      243774 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
     508      243774 :         bool        isfirst = false;
     509             : 
     510             :         /*
     511             :          * You might think we can just throw away the first delimiter, however
     512             :          * we must keep it as we may be a parallel worker doing partial
     513             :          * aggregation building a state to send to the main process.  We need
     514             :          * to keep the delimiter of every aggregation so that the combine
     515             :          * function can properly join up the strings of two separately
     516             :          * partially aggregated results.  The first delimiter is only stripped
     517             :          * off in the final function.  To know how much to strip off the front
     518             :          * of the string, we store the length of the first delimiter in the
     519             :          * StringInfo's cursor field, which we don't otherwise need here.
     520             :          */
     521      243774 :         if (state == NULL)
     522             :         {
     523         168 :             state = makeStringAggState(fcinfo);
     524         168 :             isfirst = true;
     525             :         }
     526             : 
     527      243774 :         if (!PG_ARGISNULL(2))
     528             :         {
     529      243762 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
     530             : 
     531      243762 :             appendBinaryStringInfo(state, VARDATA_ANY(delim),
     532      243762 :                                    VARSIZE_ANY_EXHDR(delim));
     533      243762 :             if (isfirst)
     534         162 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
     535             :         }
     536             : 
     537      243774 :         appendBinaryStringInfo(state, VARDATA_ANY(value),
     538      243774 :                                VARSIZE_ANY_EXHDR(value));
     539             :     }
     540             : 
     541             :     /*
     542             :      * The transition type for string_agg() is declared to be "internal",
     543             :      * which is a pass-by-value type the same size as a pointer.
     544             :      */
     545      258774 :     if (state)
     546      258732 :         PG_RETURN_POINTER(state);
     547          42 :     PG_RETURN_NULL();
     548             : }
     549             : 
     550             : Datum
     551         154 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
     552             : {
     553             :     StringInfo  state;
     554             : 
     555             :     /* cannot be called directly because of internal-type argument */
     556             :     Assert(AggCheckCallContext(fcinfo, NULL));
     557             : 
     558         154 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     559             : 
     560         154 :     if (state != NULL)
     561             :     {
     562             :         /* As per comment in transfn, strip data before the cursor position */
     563             :         bytea      *result;
     564         148 :         int         strippedlen = state->len - state->cursor;
     565             : 
     566         148 :         result = (bytea *) palloc(strippedlen + VARHDRSZ);
     567         148 :         SET_VARSIZE(result, strippedlen + VARHDRSZ);
     568         148 :         memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
     569         148 :         PG_RETURN_BYTEA_P(result);
     570             :     }
     571             :     else
     572           6 :         PG_RETURN_NULL();
     573             : }
     574             : 
     575             : /*
     576             :  *      textin          - converts cstring to internal representation
     577             :  */
     578             : Datum
     579    19516918 : textin(PG_FUNCTION_ARGS)
     580             : {
     581    19516918 :     char       *inputText = PG_GETARG_CSTRING(0);
     582             : 
     583    19516918 :     PG_RETURN_TEXT_P(cstring_to_text(inputText));
     584             : }
     585             : 
     586             : /*
     587             :  *      textout         - converts internal representation to cstring
     588             :  */
     589             : Datum
     590     7291800 : textout(PG_FUNCTION_ARGS)
     591             : {
     592     7291800 :     Datum       txt = PG_GETARG_DATUM(0);
     593             : 
     594     7291800 :     PG_RETURN_CSTRING(TextDatumGetCString(txt));
     595             : }
     596             : 
     597             : /*
     598             :  *      textrecv            - converts external binary format to text
     599             :  */
     600             : Datum
     601          48 : textrecv(PG_FUNCTION_ARGS)
     602             : {
     603          48 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     604             :     text       *result;
     605             :     char       *str;
     606             :     int         nbytes;
     607             : 
     608          48 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     609             : 
     610          48 :     result = cstring_to_text_with_len(str, nbytes);
     611          48 :     pfree(str);
     612          48 :     PG_RETURN_TEXT_P(result);
     613             : }
     614             : 
     615             : /*
     616             :  *      textsend            - converts text to binary format
     617             :  */
     618             : Datum
     619        4912 : textsend(PG_FUNCTION_ARGS)
     620             : {
     621        4912 :     text       *t = PG_GETARG_TEXT_PP(0);
     622             :     StringInfoData buf;
     623             : 
     624        4912 :     pq_begintypsend(&buf);
     625        4912 :     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
     626        4912 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     627             : }
     628             : 
     629             : 
     630             : /*
     631             :  *      unknownin           - converts cstring to internal representation
     632             :  */
     633             : Datum
     634           0 : unknownin(PG_FUNCTION_ARGS)
     635             : {
     636           0 :     char       *str = PG_GETARG_CSTRING(0);
     637             : 
     638             :     /* representation is same as cstring */
     639           0 :     PG_RETURN_CSTRING(pstrdup(str));
     640             : }
     641             : 
     642             : /*
     643             :  *      unknownout          - converts internal representation to cstring
     644             :  */
     645             : Datum
     646         790 : unknownout(PG_FUNCTION_ARGS)
     647             : {
     648             :     /* representation is same as cstring */
     649         790 :     char       *str = PG_GETARG_CSTRING(0);
     650             : 
     651         790 :     PG_RETURN_CSTRING(pstrdup(str));
     652             : }
     653             : 
     654             : /*
     655             :  *      unknownrecv         - converts external binary format to unknown
     656             :  */
     657             : Datum
     658           0 : unknownrecv(PG_FUNCTION_ARGS)
     659             : {
     660           0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     661             :     char       *str;
     662             :     int         nbytes;
     663             : 
     664           0 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     665             :     /* representation is same as cstring */
     666           0 :     PG_RETURN_CSTRING(str);
     667             : }
     668             : 
     669             : /*
     670             :  *      unknownsend         - converts unknown to binary format
     671             :  */
     672             : Datum
     673           0 : unknownsend(PG_FUNCTION_ARGS)
     674             : {
     675             :     /* representation is same as cstring */
     676           0 :     char       *str = PG_GETARG_CSTRING(0);
     677             :     StringInfoData buf;
     678             : 
     679           0 :     pq_begintypsend(&buf);
     680           0 :     pq_sendtext(&buf, str, strlen(str));
     681           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     682             : }
     683             : 
     684             : 
     685             : /* ========== PUBLIC ROUTINES ========== */
     686             : 
     687             : /*
     688             :  * textlen -
     689             :  *    returns the logical length of a text*
     690             :  *     (which is less than the VARSIZE of the text*)
     691             :  */
     692             : Datum
     693      430688 : textlen(PG_FUNCTION_ARGS)
     694             : {
     695      430688 :     Datum       str = PG_GETARG_DATUM(0);
     696             : 
     697             :     /* try to avoid decompressing argument */
     698      430688 :     PG_RETURN_INT32(text_length(str));
     699             : }
     700             : 
     701             : /*
     702             :  * text_length -
     703             :  *  Does the real work for textlen()
     704             :  *
     705             :  *  This is broken out so it can be called directly by other string processing
     706             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     707             :  *  it may still be in compressed form.  We can avoid decompressing it at all
     708             :  *  in some cases.
     709             :  */
     710             : static int32
     711      430700 : text_length(Datum str)
     712             : {
     713             :     /* fastpath when max encoding length is one */
     714      430700 :     if (pg_database_encoding_max_length() == 1)
     715          20 :         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     716             :     else
     717             :     {
     718      430680 :         text       *t = DatumGetTextPP(str);
     719             : 
     720      430680 :         PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
     721             :                                              VARSIZE_ANY_EXHDR(t)));
     722             :     }
     723             : }
     724             : 
     725             : /*
     726             :  * textoctetlen -
     727             :  *    returns the physical length of a text*
     728             :  *     (which is less than the VARSIZE of the text*)
     729             :  */
     730             : Datum
     731          70 : textoctetlen(PG_FUNCTION_ARGS)
     732             : {
     733          70 :     Datum       str = PG_GETARG_DATUM(0);
     734             : 
     735             :     /* We need not detoast the input at all */
     736          70 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     737             : }
     738             : 
     739             : /*
     740             :  * textcat -
     741             :  *    takes two text* and returns a text* that is the concatenation of
     742             :  *    the two.
     743             :  *
     744             :  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
     745             :  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
     746             :  * Allocate space for output in all cases.
     747             :  * XXX - thomas 1997-07-10
     748             :  */
     749             : Datum
     750     1804202 : textcat(PG_FUNCTION_ARGS)
     751             : {
     752     1804202 :     text       *t1 = PG_GETARG_TEXT_PP(0);
     753     1804202 :     text       *t2 = PG_GETARG_TEXT_PP(1);
     754             : 
     755     1804202 :     PG_RETURN_TEXT_P(text_catenate(t1, t2));
     756             : }
     757             : 
     758             : /*
     759             :  * text_catenate
     760             :  *  Guts of textcat(), broken out so it can be used by other functions
     761             :  *
     762             :  * Arguments can be in short-header form, but not compressed or out-of-line
     763             :  */
     764             : static text *
     765     1804282 : text_catenate(text *t1, text *t2)
     766             : {
     767             :     text       *result;
     768             :     int         len1,
     769             :                 len2,
     770             :                 len;
     771             :     char       *ptr;
     772             : 
     773     1804282 :     len1 = VARSIZE_ANY_EXHDR(t1);
     774     1804282 :     len2 = VARSIZE_ANY_EXHDR(t2);
     775             : 
     776             :     /* paranoia ... probably should throw error instead? */
     777     1804282 :     if (len1 < 0)
     778           0 :         len1 = 0;
     779     1804282 :     if (len2 < 0)
     780           0 :         len2 = 0;
     781             : 
     782     1804282 :     len = len1 + len2 + VARHDRSZ;
     783     1804282 :     result = (text *) palloc(len);
     784             : 
     785             :     /* Set size of result string... */
     786     1804282 :     SET_VARSIZE(result, len);
     787             : 
     788             :     /* Fill data field of result string... */
     789     1804282 :     ptr = VARDATA(result);
     790     1804282 :     if (len1 > 0)
     791     1803464 :         memcpy(ptr, VARDATA_ANY(t1), len1);
     792     1804282 :     if (len2 > 0)
     793     1804072 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
     794             : 
     795     1804282 :     return result;
     796             : }
     797             : 
     798             : /*
     799             :  * charlen_to_bytelen()
     800             :  *  Compute the number of bytes occupied by n characters starting at *p
     801             :  *
     802             :  * It is caller's responsibility that there actually are n characters;
     803             :  * the string need not be null-terminated.
     804             :  */
     805             : static int
     806       14658 : charlen_to_bytelen(const char *p, int n)
     807             : {
     808       14658 :     if (pg_database_encoding_max_length() == 1)
     809             :     {
     810             :         /* Optimization for single-byte encodings */
     811         180 :         return n;
     812             :     }
     813             :     else
     814             :     {
     815             :         const char *s;
     816             : 
     817     5983078 :         for (s = p; n > 0; n--)
     818     5968600 :             s += pg_mblen(s);
     819             : 
     820       14478 :         return s - p;
     821             :     }
     822             : }
     823             : 
     824             : /*
     825             :  * text_substr()
     826             :  * Return a substring starting at the specified position.
     827             :  * - thomas 1997-12-31
     828             :  *
     829             :  * Input:
     830             :  *  - string
     831             :  *  - starting position (is one-based)
     832             :  *  - string length
     833             :  *
     834             :  * If the starting position is zero or less, then return from the start of the string
     835             :  *  adjusting the length to be consistent with the "negative start" per SQL.
     836             :  * If the length is less than zero, return the remaining string.
     837             :  *
     838             :  * Added multibyte support.
     839             :  * - Tatsuo Ishii 1998-4-21
     840             :  * Changed behavior if starting position is less than one to conform to SQL behavior.
     841             :  * Formerly returned the entire string; now returns a portion.
     842             :  * - Thomas Lockhart 1998-12-10
     843             :  * Now uses faster TOAST-slicing interface
     844             :  * - John Gray 2002-02-22
     845             :  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
     846             :  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
     847             :  * error; if E < 1, return '', not entire string). Fixed MB related bug when
     848             :  * S > LC and < LC + 4 sometimes garbage characters are returned.
     849             :  * - Joe Conway 2002-08-10
     850             :  */
     851             : Datum
     852      587804 : text_substr(PG_FUNCTION_ARGS)
     853             : {
     854      587804 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     855             :                                     PG_GETARG_INT32(1),
     856             :                                     PG_GETARG_INT32(2),
     857             :                                     false));
     858             : }
     859             : 
     860             : /*
     861             :  * text_substr_no_len -
     862             :  *    Wrapper to avoid opr_sanity failure due to
     863             :  *    one function accepting a different number of args.
     864             :  */
     865             : Datum
     866          36 : text_substr_no_len(PG_FUNCTION_ARGS)
     867             : {
     868          36 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     869             :                                     PG_GETARG_INT32(1),
     870             :                                     -1, true));
     871             : }
     872             : 
     873             : /*
     874             :  * text_substring -
     875             :  *  Does the real work for text_substr() and text_substr_no_len()
     876             :  *
     877             :  *  This is broken out so it can be called directly by other string processing
     878             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     879             :  *  it may still be in compressed/toasted form.  We can avoid detoasting all
     880             :  *  of it in some cases.
     881             :  *
     882             :  *  The result is always a freshly palloc'd datum.
     883             :  */
     884             : static text *
     885      627940 : text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
     886             : {
     887      627940 :     int32       eml = pg_database_encoding_max_length();
     888      627940 :     int32       S = start;      /* start position */
     889             :     int32       S1;             /* adjusted start position */
     890             :     int32       L1;             /* adjusted substring length */
     891             :     int32       E;              /* end position */
     892             : 
     893             :     /*
     894             :      * SQL99 says S can be zero or negative (which we don't document), but we
     895             :      * still must fetch from the start of the string.
     896             :      * https://www.postgresql.org/message-id/170905442373.643.11536838320909376197%40wrigleys.postgresql.org
     897             :      */
     898      627940 :     S1 = Max(S, 1);
     899             : 
     900             :     /* life is easy if the encoding max length is 1 */
     901      627940 :     if (eml == 1)
     902             :     {
     903          22 :         if (length_not_specified)   /* special case - get length to end of
     904             :                                      * string */
     905           0 :             L1 = -1;
     906          22 :         else if (length < 0)
     907             :         {
     908             :             /* SQL99 says to throw an error for E < S, i.e., negative length */
     909           0 :             ereport(ERROR,
     910             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
     911             :                      errmsg("negative substring length not allowed")));
     912             :             L1 = -1;            /* silence stupider compilers */
     913             :         }
     914          22 :         else if (pg_add_s32_overflow(S, length, &E))
     915             :         {
     916             :             /*
     917             :              * L could be large enough for S + L to overflow, in which case
     918             :              * the substring must run to end of string.
     919             :              */
     920           0 :             L1 = -1;
     921             :         }
     922             :         else
     923             :         {
     924             :             /*
     925             :              * A zero or negative value for the end position can happen if the
     926             :              * start was negative or one. SQL99 says to return a zero-length
     927             :              * string.
     928             :              */
     929          22 :             if (E < 1)
     930           0 :                 return cstring_to_text("");
     931             : 
     932          22 :             L1 = E - S1;
     933             :         }
     934             : 
     935             :         /*
     936             :          * If the start position is past the end of the string, SQL99 says to
     937             :          * return a zero-length string -- DatumGetTextPSlice() will do that
     938             :          * for us.  We need only convert S1 to zero-based starting position.
     939             :          */
     940          22 :         return DatumGetTextPSlice(str, S1 - 1, L1);
     941             :     }
     942      627918 :     else if (eml > 1)
     943             :     {
     944             :         /*
     945             :          * When encoding max length is > 1, we can't get LC without
     946             :          * detoasting, so we'll grab a conservatively large slice now and go
     947             :          * back later to do the right thing
     948             :          */
     949             :         int32       slice_start;
     950             :         int32       slice_size;
     951             :         int32       slice_strlen;
     952             :         text       *slice;
     953             :         int32       E1;
     954             :         int32       i;
     955             :         char       *p;
     956             :         char       *s;
     957             :         text       *ret;
     958             : 
     959             :         /*
     960             :          * We need to start at position zero because there is no way to know
     961             :          * in advance which byte offset corresponds to the supplied start
     962             :          * position.
     963             :          */
     964      627918 :         slice_start = 0;
     965             : 
     966      627918 :         if (length_not_specified)   /* special case - get length to end of
     967             :                                      * string */
     968          76 :             slice_size = L1 = -1;
     969      627842 :         else if (length < 0)
     970             :         {
     971             :             /* SQL99 says to throw an error for E < S, i.e., negative length */
     972          12 :             ereport(ERROR,
     973             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
     974             :                      errmsg("negative substring length not allowed")));
     975             :             slice_size = L1 = -1;   /* silence stupider compilers */
     976             :         }
     977      627830 :         else if (pg_add_s32_overflow(S, length, &E))
     978             :         {
     979             :             /*
     980             :              * L could be large enough for S + L to overflow, in which case
     981             :              * the substring must run to end of string.
     982             :              */
     983           6 :             slice_size = L1 = -1;
     984             :         }
     985             :         else
     986             :         {
     987             :             /*
     988             :              * A zero or negative value for the end position can happen if the
     989             :              * start was negative or one. SQL99 says to return a zero-length
     990             :              * string.
     991             :              */
     992      627824 :             if (E < 1)
     993           0 :                 return cstring_to_text("");
     994             : 
     995             :             /*
     996             :              * if E is past the end of the string, the tuple toaster will
     997             :              * truncate the length for us
     998             :              */
     999      627824 :             L1 = E - S1;
    1000             : 
    1001             :             /*
    1002             :              * Total slice size in bytes can't be any longer than the start
    1003             :              * position plus substring length times the encoding max length.
    1004             :              * If that overflows, we can just use -1.
    1005             :              */
    1006      627824 :             if (pg_mul_s32_overflow(E, eml, &slice_size))
    1007           6 :                 slice_size = -1;
    1008             :         }
    1009             : 
    1010             :         /*
    1011             :          * If we're working with an untoasted source, no need to do an extra
    1012             :          * copying step.
    1013             :          */
    1014      627906 :         if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
    1015      627852 :             VARATT_IS_EXTERNAL(DatumGetPointer(str)))
    1016         324 :             slice = DatumGetTextPSlice(str, slice_start, slice_size);
    1017             :         else
    1018      627582 :             slice = (text *) DatumGetPointer(str);
    1019             : 
    1020             :         /* see if we got back an empty string */
    1021      627906 :         if (VARSIZE_ANY_EXHDR(slice) == 0)
    1022             :         {
    1023           0 :             if (slice != (text *) DatumGetPointer(str))
    1024           0 :                 pfree(slice);
    1025           0 :             return cstring_to_text("");
    1026             :         }
    1027             : 
    1028             :         /* Now we can get the actual length of the slice in MB characters */
    1029      627906 :         slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
    1030      627906 :                                             VARSIZE_ANY_EXHDR(slice));
    1031             : 
    1032             :         /*
    1033             :          * Check that the start position wasn't > slice_strlen. If so, SQL99
    1034             :          * says to return a zero-length string.
    1035             :          */
    1036      627906 :         if (S1 > slice_strlen)
    1037             :         {
    1038          22 :             if (slice != (text *) DatumGetPointer(str))
    1039           0 :                 pfree(slice);
    1040          22 :             return cstring_to_text("");
    1041             :         }
    1042             : 
    1043             :         /*
    1044             :          * Adjust L1 and E1 now that we know the slice string length. Again
    1045             :          * remember that S1 is one based, and slice_start is zero based.
    1046             :          */
    1047      627884 :         if (L1 > -1)
    1048      627824 :             E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
    1049             :         else
    1050          60 :             E1 = slice_start + 1 + slice_strlen;
    1051             : 
    1052             :         /*
    1053             :          * Find the start position in the slice; remember S1 is not zero based
    1054             :          */
    1055      627884 :         p = VARDATA_ANY(slice);
    1056     5449230 :         for (i = 0; i < S1 - 1; i++)
    1057     4821346 :             p += pg_mblen(p);
    1058             : 
    1059             :         /* hang onto a pointer to our start position */
    1060      627884 :         s = p;
    1061             : 
    1062             :         /*
    1063             :          * Count the actual bytes used by the substring of the requested
    1064             :          * length.
    1065             :          */
    1066     9786596 :         for (i = S1; i < E1; i++)
    1067     9158712 :             p += pg_mblen(p);
    1068             : 
    1069      627884 :         ret = (text *) palloc(VARHDRSZ + (p - s));
    1070      627884 :         SET_VARSIZE(ret, VARHDRSZ + (p - s));
    1071      627884 :         memcpy(VARDATA(ret), s, (p - s));
    1072             : 
    1073      627884 :         if (slice != (text *) DatumGetPointer(str))
    1074         324 :             pfree(slice);
    1075             : 
    1076      627884 :         return ret;
    1077             :     }
    1078             :     else
    1079           0 :         elog(ERROR, "invalid backend encoding: encoding max length < 1");
    1080             : 
    1081             :     /* not reached: suppress compiler warning */
    1082             :     return NULL;
    1083             : }
    1084             : 
    1085             : /*
    1086             :  * textoverlay
    1087             :  *  Replace specified substring of first string with second
    1088             :  *
    1089             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    1090             :  * This code is a direct implementation of what the standard says.
    1091             :  */
    1092             : Datum
    1093          28 : textoverlay(PG_FUNCTION_ARGS)
    1094             : {
    1095          28 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1096          28 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1097          28 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1098          28 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    1099             : 
    1100          28 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1101             : }
    1102             : 
    1103             : Datum
    1104          12 : textoverlay_no_len(PG_FUNCTION_ARGS)
    1105             : {
    1106          12 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1107          12 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1108          12 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1109             :     int         sl;
    1110             : 
    1111          12 :     sl = text_length(PointerGetDatum(t2));  /* defaults to length(t2) */
    1112          12 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1113             : }
    1114             : 
    1115             : static text *
    1116          40 : text_overlay(text *t1, text *t2, int sp, int sl)
    1117             : {
    1118             :     text       *result;
    1119             :     text       *s1;
    1120             :     text       *s2;
    1121             :     int         sp_pl_sl;
    1122             : 
    1123             :     /*
    1124             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    1125             :      * "substring length" error because that's what should be expected
    1126             :      * according to the spec's definition of OVERLAY().
    1127             :      */
    1128          40 :     if (sp <= 0)
    1129           0 :         ereport(ERROR,
    1130             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    1131             :                  errmsg("negative substring length not allowed")));
    1132          40 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    1133           0 :         ereport(ERROR,
    1134             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1135             :                  errmsg("integer out of range")));
    1136             : 
    1137          40 :     s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
    1138          40 :     s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    1139          40 :     result = text_catenate(s1, t2);
    1140          40 :     result = text_catenate(result, s2);
    1141             : 
    1142          40 :     return result;
    1143             : }
    1144             : 
    1145             : /*
    1146             :  * textpos -
    1147             :  *    Return the position of the specified substring.
    1148             :  *    Implements the SQL POSITION() function.
    1149             :  *    Ref: A Guide To The SQL Standard, Date & Darwen, 1997
    1150             :  * - thomas 1997-07-27
    1151             :  */
    1152             : Datum
    1153         106 : textpos(PG_FUNCTION_ARGS)
    1154             : {
    1155         106 :     text       *str = PG_GETARG_TEXT_PP(0);
    1156         106 :     text       *search_str = PG_GETARG_TEXT_PP(1);
    1157             : 
    1158         106 :     PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
    1159             : }
    1160             : 
    1161             : /*
    1162             :  * text_position -
    1163             :  *  Does the real work for textpos()
    1164             :  *
    1165             :  * Inputs:
    1166             :  *      t1 - string to be searched
    1167             :  *      t2 - pattern to match within t1
    1168             :  * Result:
    1169             :  *      Character index of the first matched char, starting from 1,
    1170             :  *      or 0 if no match.
    1171             :  *
    1172             :  *  This is broken out so it can be called directly by other string processing
    1173             :  *  functions.
    1174             :  */
    1175             : static int
    1176         106 : text_position(text *t1, text *t2, Oid collid)
    1177             : {
    1178             :     TextPositionState state;
    1179             :     int         result;
    1180             : 
    1181             :     /* Empty needle always matches at position 1 */
    1182         106 :     if (VARSIZE_ANY_EXHDR(t2) < 1)
    1183          12 :         return 1;
    1184             : 
    1185             :     /* Otherwise, can't match if haystack is shorter than needle */
    1186          94 :     if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2))
    1187          22 :         return 0;
    1188             : 
    1189          72 :     text_position_setup(t1, t2, collid, &state);
    1190          72 :     if (!text_position_next(&state))
    1191          24 :         result = 0;
    1192             :     else
    1193          48 :         result = text_position_get_match_pos(&state);
    1194          72 :     text_position_cleanup(&state);
    1195          72 :     return result;
    1196             : }
    1197             : 
    1198             : 
    1199             : /*
    1200             :  * text_position_setup, text_position_next, text_position_cleanup -
    1201             :  *  Component steps of text_position()
    1202             :  *
    1203             :  * These are broken out so that a string can be efficiently searched for
    1204             :  * multiple occurrences of the same pattern.  text_position_next may be
    1205             :  * called multiple times, and it advances to the next match on each call.
    1206             :  * text_position_get_match_ptr() and text_position_get_match_pos() return
    1207             :  * a pointer or 1-based character position of the last match, respectively.
    1208             :  *
    1209             :  * The "state" variable is normally just a local variable in the caller.
    1210             :  *
    1211             :  * NOTE: text_position_next skips over the matched portion.  For example,
    1212             :  * searching for "xx" in "xxx" returns only one match, not two.
    1213             :  */
    1214             : 
    1215             : static void
    1216        1420 : text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
    1217             : {
    1218        1420 :     int         len1 = VARSIZE_ANY_EXHDR(t1);
    1219        1420 :     int         len2 = VARSIZE_ANY_EXHDR(t2);
    1220             :     pg_locale_t mylocale;
    1221             : 
    1222        1420 :     check_collation_set(collid);
    1223             : 
    1224        1420 :     mylocale = pg_newlocale_from_collation(collid);
    1225             : 
    1226        1420 :     if (!mylocale->deterministic)
    1227          12 :         ereport(ERROR,
    1228             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1229             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1230             : 
    1231             :     Assert(len1 > 0);
    1232             :     Assert(len2 > 0);
    1233             : 
    1234             :     /*
    1235             :      * Even with a multi-byte encoding, we perform the search using the raw
    1236             :      * byte sequence, ignoring multibyte issues.  For UTF-8, that works fine,
    1237             :      * because in UTF-8 the byte sequence of one character cannot contain
    1238             :      * another character.  For other multi-byte encodings, we do the search
    1239             :      * initially as a simple byte search, ignoring multibyte issues, but
    1240             :      * verify afterwards that the match we found is at a character boundary,
    1241             :      * and continue the search if it was a false match.
    1242             :      */
    1243        1408 :     if (pg_database_encoding_max_length() == 1)
    1244         108 :         state->is_multibyte_char_in_char = false;
    1245        1300 :     else if (GetDatabaseEncoding() == PG_UTF8)
    1246        1300 :         state->is_multibyte_char_in_char = false;
    1247             :     else
    1248           0 :         state->is_multibyte_char_in_char = true;
    1249             : 
    1250        1408 :     state->str1 = VARDATA_ANY(t1);
    1251        1408 :     state->str2 = VARDATA_ANY(t2);
    1252        1408 :     state->len1 = len1;
    1253        1408 :     state->len2 = len2;
    1254        1408 :     state->last_match = NULL;
    1255        1408 :     state->refpoint = state->str1;
    1256        1408 :     state->refpos = 0;
    1257             : 
    1258             :     /*
    1259             :      * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
    1260             :      * notes we use the terminology that the "haystack" is the string to be
    1261             :      * searched (t1) and the "needle" is the pattern being sought (t2).
    1262             :      *
    1263             :      * If the needle is empty or bigger than the haystack then there is no
    1264             :      * point in wasting cycles initializing the table.  We also choose not to
    1265             :      * use B-M-H for needles of length 1, since the skip table can't possibly
    1266             :      * save anything in that case.
    1267             :      */
    1268        1408 :     if (len1 >= len2 && len2 > 1)
    1269             :     {
    1270        1200 :         int         searchlength = len1 - len2;
    1271             :         int         skiptablemask;
    1272             :         int         last;
    1273             :         int         i;
    1274        1200 :         const char *str2 = state->str2;
    1275             : 
    1276             :         /*
    1277             :          * First we must determine how much of the skip table to use.  The
    1278             :          * declaration of TextPositionState allows up to 256 elements, but for
    1279             :          * short search problems we don't really want to have to initialize so
    1280             :          * many elements --- it would take too long in comparison to the
    1281             :          * actual search time.  So we choose a useful skip table size based on
    1282             :          * the haystack length minus the needle length.  The closer the needle
    1283             :          * length is to the haystack length the less useful skipping becomes.
    1284             :          *
    1285             :          * Note: since we use bit-masking to select table elements, the skip
    1286             :          * table size MUST be a power of 2, and so the mask must be 2^N-1.
    1287             :          */
    1288        1200 :         if (searchlength < 16)
    1289          54 :             skiptablemask = 3;
    1290        1146 :         else if (searchlength < 64)
    1291          16 :             skiptablemask = 7;
    1292        1130 :         else if (searchlength < 128)
    1293          14 :             skiptablemask = 15;
    1294        1116 :         else if (searchlength < 512)
    1295         216 :             skiptablemask = 31;
    1296         900 :         else if (searchlength < 2048)
    1297         702 :             skiptablemask = 63;
    1298         198 :         else if (searchlength < 4096)
    1299         128 :             skiptablemask = 127;
    1300             :         else
    1301          70 :             skiptablemask = 255;
    1302        1200 :         state->skiptablemask = skiptablemask;
    1303             : 
    1304             :         /*
    1305             :          * Initialize the skip table.  We set all elements to the needle
    1306             :          * length, since this is the correct skip distance for any character
    1307             :          * not found in the needle.
    1308             :          */
    1309       87912 :         for (i = 0; i <= skiptablemask; i++)
    1310       86712 :             state->skiptable[i] = len2;
    1311             : 
    1312             :         /*
    1313             :          * Now examine the needle.  For each character except the last one,
    1314             :          * set the corresponding table element to the appropriate skip
    1315             :          * distance.  Note that when two characters share the same skip table
    1316             :          * entry, the one later in the needle must determine the skip
    1317             :          * distance.
    1318             :          */
    1319        1200 :         last = len2 - 1;
    1320             : 
    1321       16382 :         for (i = 0; i < last; i++)
    1322       15182 :             state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
    1323             :     }
    1324        1408 : }
    1325             : 
    1326             : /*
    1327             :  * Advance to the next match, starting from the end of the previous match
    1328             :  * (or the beginning of the string, on first call).  Returns true if a match
    1329             :  * is found.
    1330             :  *
    1331             :  * Note that this refuses to match an empty-string needle.  Most callers
    1332             :  * will have handled that case specially and we'll never see it here.
    1333             :  */
    1334             : static bool
    1335        6744 : text_position_next(TextPositionState *state)
    1336             : {
    1337        6744 :     int         needle_len = state->len2;
    1338             :     char       *start_ptr;
    1339             :     char       *matchptr;
    1340             : 
    1341        6744 :     if (needle_len <= 0)
    1342           0 :         return false;           /* result for empty pattern */
    1343             : 
    1344             :     /* Start from the point right after the previous match. */
    1345        6744 :     if (state->last_match)
    1346        5324 :         start_ptr = state->last_match + needle_len;
    1347             :     else
    1348        1420 :         start_ptr = state->str1;
    1349             : 
    1350        6744 : retry:
    1351        6744 :     matchptr = text_position_next_internal(start_ptr, state);
    1352             : 
    1353        6744 :     if (!matchptr)
    1354        1348 :         return false;
    1355             : 
    1356             :     /*
    1357             :      * Found a match for the byte sequence.  If this is a multibyte encoding,
    1358             :      * where one character's byte sequence can appear inside a longer
    1359             :      * multi-byte character, we need to verify that the match was at a
    1360             :      * character boundary, not in the middle of a multi-byte character.
    1361             :      */
    1362        5396 :     if (state->is_multibyte_char_in_char)
    1363             :     {
    1364             :         /* Walk one character at a time, until we reach the match. */
    1365             : 
    1366             :         /* the search should never move backwards. */
    1367             :         Assert(state->refpoint <= matchptr);
    1368             : 
    1369           0 :         while (state->refpoint < matchptr)
    1370             :         {
    1371             :             /* step to next character. */
    1372           0 :             state->refpoint += pg_mblen(state->refpoint);
    1373           0 :             state->refpos++;
    1374             : 
    1375             :             /*
    1376             :              * If we stepped over the match's start position, then it was a
    1377             :              * false positive, where the byte sequence appeared in the middle
    1378             :              * of a multi-byte character.  Skip it, and continue the search at
    1379             :              * the next character boundary.
    1380             :              */
    1381           0 :             if (state->refpoint > matchptr)
    1382             :             {
    1383           0 :                 start_ptr = state->refpoint;
    1384           0 :                 goto retry;
    1385             :             }
    1386             :         }
    1387             :     }
    1388             : 
    1389        5396 :     state->last_match = matchptr;
    1390        5396 :     return true;
    1391             : }
    1392             : 
    1393             : /*
    1394             :  * Subroutine of text_position_next().  This searches for the raw byte
    1395             :  * sequence, ignoring any multi-byte encoding issues.  Returns the first
    1396             :  * match starting at 'start_ptr', or NULL if no match is found.
    1397             :  */
    1398             : static char *
    1399        6744 : text_position_next_internal(char *start_ptr, TextPositionState *state)
    1400             : {
    1401        6744 :     int         haystack_len = state->len1;
    1402        6744 :     int         needle_len = state->len2;
    1403        6744 :     int         skiptablemask = state->skiptablemask;
    1404        6744 :     const char *haystack = state->str1;
    1405        6744 :     const char *needle = state->str2;
    1406        6744 :     const char *haystack_end = &haystack[haystack_len];
    1407             :     const char *hptr;
    1408             : 
    1409             :     Assert(start_ptr >= haystack && start_ptr <= haystack_end);
    1410             : 
    1411        6744 :     if (needle_len == 1)
    1412             :     {
    1413             :         /* No point in using B-M-H for a one-character needle */
    1414         754 :         char        nchar = *needle;
    1415             : 
    1416         754 :         hptr = start_ptr;
    1417        5758 :         while (hptr < haystack_end)
    1418             :         {
    1419        5592 :             if (*hptr == nchar)
    1420         588 :                 return (char *) hptr;
    1421        5004 :             hptr++;
    1422             :         }
    1423             :     }
    1424             :     else
    1425             :     {
    1426        5990 :         const char *needle_last = &needle[needle_len - 1];
    1427             : 
    1428             :         /* Start at startpos plus the length of the needle */
    1429        5990 :         hptr = start_ptr + needle_len - 1;
    1430      156834 :         while (hptr < haystack_end)
    1431             :         {
    1432             :             /* Match the needle scanning *backward* */
    1433             :             const char *nptr;
    1434             :             const char *p;
    1435             : 
    1436      155652 :             nptr = needle_last;
    1437      155652 :             p = hptr;
    1438      227640 :             while (*nptr == *p)
    1439             :             {
    1440             :                 /* Matched it all?  If so, return 1-based position */
    1441       76796 :                 if (nptr == needle)
    1442        4808 :                     return (char *) p;
    1443       71988 :                 nptr--, p--;
    1444             :             }
    1445             : 
    1446             :             /*
    1447             :              * No match, so use the haystack char at hptr to decide how far to
    1448             :              * advance.  If the needle had any occurrence of that character
    1449             :              * (or more precisely, one sharing the same skiptable entry)
    1450             :              * before its last character, then we advance far enough to align
    1451             :              * the last such needle character with that haystack position.
    1452             :              * Otherwise we can advance by the whole needle length.
    1453             :              */
    1454      150844 :             hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
    1455             :         }
    1456             :     }
    1457             : 
    1458        1348 :     return 0;                   /* not found */
    1459             : }
    1460             : 
    1461             : /*
    1462             :  * Return a pointer to the current match.
    1463             :  *
    1464             :  * The returned pointer points into the original haystack string.
    1465             :  */
    1466             : static char *
    1467        5318 : text_position_get_match_ptr(TextPositionState *state)
    1468             : {
    1469        5318 :     return state->last_match;
    1470             : }
    1471             : 
    1472             : /*
    1473             :  * Return the offset of the current match.
    1474             :  *
    1475             :  * The offset is in characters, 1-based.
    1476             :  */
    1477             : static int
    1478          48 : text_position_get_match_pos(TextPositionState *state)
    1479             : {
    1480             :     /* Convert the byte position to char position. */
    1481          96 :     state->refpos += pg_mbstrlen_with_len(state->refpoint,
    1482          48 :                                           state->last_match - state->refpoint);
    1483          48 :     state->refpoint = state->last_match;
    1484          48 :     return state->refpos + 1;
    1485             : }
    1486             : 
    1487             : /*
    1488             :  * Reset search state to the initial state installed by text_position_setup.
    1489             :  *
    1490             :  * The next call to text_position_next will search from the beginning
    1491             :  * of the string.
    1492             :  */
    1493             : static void
    1494          12 : text_position_reset(TextPositionState *state)
    1495             : {
    1496          12 :     state->last_match = NULL;
    1497          12 :     state->refpoint = state->str1;
    1498          12 :     state->refpos = 0;
    1499          12 : }
    1500             : 
    1501             : static void
    1502        1408 : text_position_cleanup(TextPositionState *state)
    1503             : {
    1504             :     /* no cleanup needed */
    1505        1408 : }
    1506             : 
    1507             : 
    1508             : static void
    1509    12945784 : check_collation_set(Oid collid)
    1510             : {
    1511    12945784 :     if (!OidIsValid(collid))
    1512             :     {
    1513             :         /*
    1514             :          * This typically means that the parser could not resolve a conflict
    1515             :          * of implicit collations, so report it that way.
    1516             :          */
    1517          30 :         ereport(ERROR,
    1518             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1519             :                  errmsg("could not determine which collation to use for string comparison"),
    1520             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
    1521             :     }
    1522    12945754 : }
    1523             : 
    1524             : /*
    1525             :  * varstr_cmp()
    1526             :  *
    1527             :  * Comparison function for text strings with given lengths, using the
    1528             :  * appropriate locale. Returns an integer less than, equal to, or greater than
    1529             :  * zero, indicating whether arg1 is less than, equal to, or greater than arg2.
    1530             :  *
    1531             :  * Note: many functions that depend on this are marked leakproof; therefore,
    1532             :  * avoid reporting the actual contents of the input when throwing errors.
    1533             :  * All errors herein should be things that can't happen except on corrupt
    1534             :  * data, anyway; otherwise we will have trouble with indexing strings that
    1535             :  * would cause them.
    1536             :  */
    1537             : int
    1538     6424834 : varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
    1539             : {
    1540             :     int         result;
    1541             :     pg_locale_t mylocale;
    1542             : 
    1543     6424834 :     check_collation_set(collid);
    1544             : 
    1545     6424816 :     mylocale = pg_newlocale_from_collation(collid);
    1546             : 
    1547     6424816 :     if (mylocale->collate_is_c)
    1548             :     {
    1549     3090750 :         result = memcmp(arg1, arg2, Min(len1, len2));
    1550     3090750 :         if ((result == 0) && (len1 != len2))
    1551      112280 :             result = (len1 < len2) ? -1 : 1;
    1552             :     }
    1553             :     else
    1554             :     {
    1555             :         /*
    1556             :          * memcmp() can't tell us which of two unequal strings sorts first,
    1557             :          * but it's a cheap way to tell if they're equal.  Testing shows that
    1558             :          * memcmp() followed by strcoll() is only trivially slower than
    1559             :          * strcoll() by itself, so we don't lose much if this doesn't work out
    1560             :          * very often, and if it does - for example, because there are many
    1561             :          * equal strings in the input - then we win big by avoiding expensive
    1562             :          * collation-aware comparisons.
    1563             :          */
    1564     3334066 :         if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
    1565     1278128 :             return 0;
    1566             : 
    1567     2055938 :         result = pg_strncoll(arg1, len1, arg2, len2, mylocale);
    1568             : 
    1569             :         /* Break tie if necessary. */
    1570     2055938 :         if (result == 0 && mylocale->deterministic)
    1571             :         {
    1572           0 :             result = memcmp(arg1, arg2, Min(len1, len2));
    1573           0 :             if ((result == 0) && (len1 != len2))
    1574           0 :                 result = (len1 < len2) ? -1 : 1;
    1575             :         }
    1576             :     }
    1577             : 
    1578     5146688 :     return result;
    1579             : }
    1580             : 
    1581             : /* text_cmp()
    1582             :  * Internal comparison function for text strings.
    1583             :  * Returns -1, 0 or 1
    1584             :  */
    1585             : static int
    1586     4851234 : text_cmp(text *arg1, text *arg2, Oid collid)
    1587             : {
    1588             :     char       *a1p,
    1589             :                *a2p;
    1590             :     int         len1,
    1591             :                 len2;
    1592             : 
    1593     4851234 :     a1p = VARDATA_ANY(arg1);
    1594     4851234 :     a2p = VARDATA_ANY(arg2);
    1595             : 
    1596     4851234 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1597     4851234 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1598             : 
    1599     4851234 :     return varstr_cmp(a1p, len1, a2p, len2, collid);
    1600             : }
    1601             : 
    1602             : /*
    1603             :  * Comparison functions for text strings.
    1604             :  *
    1605             :  * Note: btree indexes need these routines not to leak memory; therefore,
    1606             :  * be careful to free working copies of toasted datums.  Most places don't
    1607             :  * need to be so careful.
    1608             :  */
    1609             : 
    1610             : Datum
    1611     6123330 : texteq(PG_FUNCTION_ARGS)
    1612             : {
    1613     6123330 :     Oid         collid = PG_GET_COLLATION();
    1614     6123330 :     pg_locale_t mylocale = 0;
    1615             :     bool        result;
    1616             : 
    1617     6123330 :     check_collation_set(collid);
    1618             : 
    1619     6123330 :     mylocale = pg_newlocale_from_collation(collid);
    1620             : 
    1621     6123330 :     if (mylocale->deterministic)
    1622             :     {
    1623     6115034 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1624     6115034 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1625             :         Size        len1,
    1626             :                     len2;
    1627             : 
    1628             :         /*
    1629             :          * Since we only care about equality or not-equality, we can avoid all
    1630             :          * the expense of strcoll() here, and just do bitwise comparison.  In
    1631             :          * fact, we don't even have to do a bitwise comparison if we can show
    1632             :          * the lengths of the strings are unequal; which might save us from
    1633             :          * having to detoast one or both values.
    1634             :          */
    1635     6115034 :         len1 = toast_raw_datum_size(arg1);
    1636     6115034 :         len2 = toast_raw_datum_size(arg2);
    1637     6115034 :         if (len1 != len2)
    1638     2837672 :             result = false;
    1639             :         else
    1640             :         {
    1641     3277362 :             text       *targ1 = DatumGetTextPP(arg1);
    1642     3277362 :             text       *targ2 = DatumGetTextPP(arg2);
    1643             : 
    1644     3277362 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1645             :                              len1 - VARHDRSZ) == 0);
    1646             : 
    1647     3277362 :             PG_FREE_IF_COPY(targ1, 0);
    1648     3277362 :             PG_FREE_IF_COPY(targ2, 1);
    1649             :         }
    1650             :     }
    1651             :     else
    1652             :     {
    1653        8296 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1654        8296 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1655             : 
    1656        8296 :         result = (text_cmp(arg1, arg2, collid) == 0);
    1657             : 
    1658        8296 :         PG_FREE_IF_COPY(arg1, 0);
    1659        8296 :         PG_FREE_IF_COPY(arg2, 1);
    1660             :     }
    1661             : 
    1662     6123330 :     PG_RETURN_BOOL(result);
    1663             : }
    1664             : 
    1665             : Datum
    1666       22536 : textne(PG_FUNCTION_ARGS)
    1667             : {
    1668       22536 :     Oid         collid = PG_GET_COLLATION();
    1669             :     pg_locale_t mylocale;
    1670             :     bool        result;
    1671             : 
    1672       22536 :     check_collation_set(collid);
    1673             : 
    1674       22536 :     mylocale = pg_newlocale_from_collation(collid);
    1675             : 
    1676       22536 :     if (mylocale->deterministic)
    1677             :     {
    1678       22512 :         Datum       arg1 = PG_GETARG_DATUM(0);
    1679       22512 :         Datum       arg2 = PG_GETARG_DATUM(1);
    1680             :         Size        len1,
    1681             :                     len2;
    1682             : 
    1683             :         /* See comment in texteq() */
    1684       22512 :         len1 = toast_raw_datum_size(arg1);
    1685       22512 :         len2 = toast_raw_datum_size(arg2);
    1686       22512 :         if (len1 != len2)
    1687        4310 :             result = true;
    1688             :         else
    1689             :         {
    1690       18202 :             text       *targ1 = DatumGetTextPP(arg1);
    1691       18202 :             text       *targ2 = DatumGetTextPP(arg2);
    1692             : 
    1693       18202 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1694             :                              len1 - VARHDRSZ) != 0);
    1695             : 
    1696       18202 :             PG_FREE_IF_COPY(targ1, 0);
    1697       18202 :             PG_FREE_IF_COPY(targ2, 1);
    1698             :         }
    1699             :     }
    1700             :     else
    1701             :     {
    1702          24 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
    1703          24 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
    1704             : 
    1705          24 :         result = (text_cmp(arg1, arg2, collid) != 0);
    1706             : 
    1707          24 :         PG_FREE_IF_COPY(arg1, 0);
    1708          24 :         PG_FREE_IF_COPY(arg2, 1);
    1709             :     }
    1710             : 
    1711       22536 :     PG_RETURN_BOOL(result);
    1712             : }
    1713             : 
    1714             : Datum
    1715      208406 : text_lt(PG_FUNCTION_ARGS)
    1716             : {
    1717      208406 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1718      208406 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1719             :     bool        result;
    1720             : 
    1721      208406 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
    1722             : 
    1723      208388 :     PG_FREE_IF_COPY(arg1, 0);
    1724      208388 :     PG_FREE_IF_COPY(arg2, 1);
    1725             : 
    1726      208388 :     PG_RETURN_BOOL(result);
    1727             : }
    1728             : 
    1729             : Datum
    1730      316980 : text_le(PG_FUNCTION_ARGS)
    1731             : {
    1732      316980 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1733      316980 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1734             :     bool        result;
    1735             : 
    1736      316980 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
    1737             : 
    1738      316980 :     PG_FREE_IF_COPY(arg1, 0);
    1739      316980 :     PG_FREE_IF_COPY(arg2, 1);
    1740             : 
    1741      316980 :     PG_RETURN_BOOL(result);
    1742             : }
    1743             : 
    1744             : Datum
    1745      195818 : text_gt(PG_FUNCTION_ARGS)
    1746             : {
    1747      195818 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1748      195818 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1749             :     bool        result;
    1750             : 
    1751      195818 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
    1752             : 
    1753      195818 :     PG_FREE_IF_COPY(arg1, 0);
    1754      195818 :     PG_FREE_IF_COPY(arg2, 1);
    1755             : 
    1756      195818 :     PG_RETURN_BOOL(result);
    1757             : }
    1758             : 
    1759             : Datum
    1760      177902 : text_ge(PG_FUNCTION_ARGS)
    1761             : {
    1762      177902 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1763      177902 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1764             :     bool        result;
    1765             : 
    1766      177902 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
    1767             : 
    1768      177902 :     PG_FREE_IF_COPY(arg1, 0);
    1769      177902 :     PG_FREE_IF_COPY(arg2, 1);
    1770             : 
    1771      177902 :     PG_RETURN_BOOL(result);
    1772             : }
    1773             : 
    1774             : Datum
    1775       37914 : text_starts_with(PG_FUNCTION_ARGS)
    1776             : {
    1777       37914 :     Datum       arg1 = PG_GETARG_DATUM(0);
    1778       37914 :     Datum       arg2 = PG_GETARG_DATUM(1);
    1779       37914 :     Oid         collid = PG_GET_COLLATION();
    1780             :     pg_locale_t mylocale;
    1781             :     bool        result;
    1782             :     Size        len1,
    1783             :                 len2;
    1784             : 
    1785       37914 :     check_collation_set(collid);
    1786             : 
    1787       37914 :     mylocale = pg_newlocale_from_collation(collid);
    1788             : 
    1789       37914 :     if (!mylocale->deterministic)
    1790           0 :         ereport(ERROR,
    1791             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1792             :                  errmsg("nondeterministic collations are not supported for substring searches")));
    1793             : 
    1794       37914 :     len1 = toast_raw_datum_size(arg1);
    1795       37914 :     len2 = toast_raw_datum_size(arg2);
    1796       37914 :     if (len2 > len1)
    1797           0 :         result = false;
    1798             :     else
    1799             :     {
    1800       37914 :         text       *targ1 = text_substring(arg1, 1, len2, false);
    1801       37914 :         text       *targ2 = DatumGetTextPP(arg2);
    1802             : 
    1803       37914 :         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1804       37914 :                          VARSIZE_ANY_EXHDR(targ2)) == 0);
    1805             : 
    1806       37914 :         PG_FREE_IF_COPY(targ1, 0);
    1807       37914 :         PG_FREE_IF_COPY(targ2, 1);
    1808             :     }
    1809             : 
    1810       37914 :     PG_RETURN_BOOL(result);
    1811             : }
    1812             : 
    1813             : Datum
    1814     3628172 : bttextcmp(PG_FUNCTION_ARGS)
    1815             : {
    1816     3628172 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1817     3628172 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1818             :     int32       result;
    1819             : 
    1820     3628172 :     result = text_cmp(arg1, arg2, PG_GET_COLLATION());
    1821             : 
    1822     3628172 :     PG_FREE_IF_COPY(arg1, 0);
    1823     3628172 :     PG_FREE_IF_COPY(arg2, 1);
    1824             : 
    1825     3628172 :     PG_RETURN_INT32(result);
    1826             : }
    1827             : 
    1828             : Datum
    1829       78610 : bttextsortsupport(PG_FUNCTION_ARGS)
    1830             : {
    1831       78610 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    1832       78610 :     Oid         collid = ssup->ssup_collation;
    1833             :     MemoryContext oldcontext;
    1834             : 
    1835       78610 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    1836             : 
    1837             :     /* Use generic string SortSupport */
    1838       78610 :     varstr_sortsupport(ssup, TEXTOID, collid);
    1839             : 
    1840       78598 :     MemoryContextSwitchTo(oldcontext);
    1841             : 
    1842       78598 :     PG_RETURN_VOID();
    1843             : }
    1844             : 
    1845             : /*
    1846             :  * Generic sortsupport interface for character type's operator classes.
    1847             :  * Includes locale support, and support for BpChar semantics (i.e. removing
    1848             :  * trailing spaces before comparison).
    1849             :  *
    1850             :  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
    1851             :  * same representation.  Callers that always use the C collation (e.g.
    1852             :  * non-collatable type callers like bytea) may have NUL bytes in their strings;
    1853             :  * this will not work with any other collation, though.
    1854             :  */
    1855             : void
    1856      116904 : varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
    1857             : {
    1858      116904 :     bool        abbreviate = ssup->abbreviate;
    1859      116904 :     bool        collate_c = false;
    1860             :     VarStringSortSupport *sss;
    1861             :     pg_locale_t locale;
    1862             : 
    1863      116904 :     check_collation_set(collid);
    1864             : 
    1865      116892 :     locale = pg_newlocale_from_collation(collid);
    1866             : 
    1867             :     /*
    1868             :      * If possible, set ssup->comparator to a function which can be used to
    1869             :      * directly compare two datums.  If we can do this, we'll avoid the
    1870             :      * overhead of a trip through the fmgr layer for every comparison, which
    1871             :      * can be substantial.
    1872             :      *
    1873             :      * Most typically, we'll set the comparator to varlenafastcmp_locale,
    1874             :      * which uses strcoll() to perform comparisons.  We use that for the
    1875             :      * BpChar case too, but type NAME uses namefastcmp_locale. However, if
    1876             :      * LC_COLLATE = C, we can make things quite a bit faster with
    1877             :      * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
    1878             :      * memcmp() rather than strcoll().
    1879             :      */
    1880      116892 :     if (locale->collate_is_c)
    1881             :     {
    1882       75414 :         if (typid == BPCHAROID)
    1883         308 :             ssup->comparator = bpcharfastcmp_c;
    1884       75106 :         else if (typid == NAMEOID)
    1885             :         {
    1886       37242 :             ssup->comparator = namefastcmp_c;
    1887             :             /* Not supporting abbreviation with type NAME, for now */
    1888       37242 :             abbreviate = false;
    1889             :         }
    1890             :         else
    1891       37864 :             ssup->comparator = varstrfastcmp_c;
    1892             : 
    1893       75414 :         collate_c = true;
    1894             :     }
    1895             :     else
    1896             :     {
    1897             :         /*
    1898             :          * We use varlenafastcmp_locale except for type NAME.
    1899             :          */
    1900       41478 :         if (typid == NAMEOID)
    1901             :         {
    1902           0 :             ssup->comparator = namefastcmp_locale;
    1903             :             /* Not supporting abbreviation with type NAME, for now */
    1904           0 :             abbreviate = false;
    1905             :         }
    1906             :         else
    1907       41478 :             ssup->comparator = varlenafastcmp_locale;
    1908             : 
    1909             :         /*
    1910             :          * Unfortunately, it seems that abbreviation for non-C collations is
    1911             :          * broken on many common platforms; see pg_strxfrm_enabled().
    1912             :          *
    1913             :          * Even apart from the risk of broken locales, it's possible that
    1914             :          * there are platforms where the use of abbreviated keys should be
    1915             :          * disabled at compile time.  Having only 4 byte datums could make
    1916             :          * worst-case performance drastically more likely, for example.
    1917             :          * Moreover, macOS's strxfrm() implementation is known to not
    1918             :          * effectively concentrate a significant amount of entropy from the
    1919             :          * original string in earlier transformed blobs.  It's possible that
    1920             :          * other supported platforms are similarly encumbered.  So, if we ever
    1921             :          * get past disabling this categorically, we may still want or need to
    1922             :          * disable it for particular platforms.
    1923             :          */
    1924       41478 :         if (!pg_strxfrm_enabled(locale))
    1925       40778 :             abbreviate = false;
    1926             :     }
    1927             : 
    1928             :     /*
    1929             :      * If we're using abbreviated keys, or if we're using a locale-aware
    1930             :      * comparison, we need to initialize a VarStringSortSupport object. Both
    1931             :      * cases will make use of the temporary buffers we initialize here for
    1932             :      * scratch space (and to detect requirement for BpChar semantics from
    1933             :      * caller), and the abbreviation case requires additional state.
    1934             :      */
    1935      116892 :     if (abbreviate || !collate_c)
    1936             :     {
    1937       63630 :         sss = palloc(sizeof(VarStringSortSupport));
    1938       63630 :         sss->buf1 = palloc(TEXTBUFLEN);
    1939       63630 :         sss->buflen1 = TEXTBUFLEN;
    1940       63630 :         sss->buf2 = palloc(TEXTBUFLEN);
    1941       63630 :         sss->buflen2 = TEXTBUFLEN;
    1942             :         /* Start with invalid values */
    1943       63630 :         sss->last_len1 = -1;
    1944       63630 :         sss->last_len2 = -1;
    1945             :         /* Initialize */
    1946       63630 :         sss->last_returned = 0;
    1947       63630 :         if (collate_c)
    1948       22152 :             sss->locale = NULL;
    1949             :         else
    1950       41478 :             sss->locale = locale;
    1951             : 
    1952             :         /*
    1953             :          * To avoid somehow confusing a strxfrm() blob and an original string,
    1954             :          * constantly keep track of the variety of data that buf1 and buf2
    1955             :          * currently contain.
    1956             :          *
    1957             :          * Comparisons may be interleaved with conversion calls.  Frequently,
    1958             :          * conversions and comparisons are batched into two distinct phases,
    1959             :          * but the correctness of caching cannot hinge upon this.  For
    1960             :          * comparison caching, buffer state is only trusted if cache_blob is
    1961             :          * found set to false, whereas strxfrm() caching only trusts the state
    1962             :          * when cache_blob is found set to true.
    1963             :          *
    1964             :          * Arbitrarily initialize cache_blob to true.
    1965             :          */
    1966       63630 :         sss->cache_blob = true;
    1967       63630 :         sss->collate_c = collate_c;
    1968       63630 :         sss->typid = typid;
    1969       63630 :         ssup->ssup_extra = sss;
    1970             : 
    1971             :         /*
    1972             :          * If possible, plan to use the abbreviated keys optimization.  The
    1973             :          * core code may switch back to authoritative comparator should
    1974             :          * abbreviation be aborted.
    1975             :          */
    1976       63630 :         if (abbreviate)
    1977             :         {
    1978       22678 :             sss->prop_card = 0.20;
    1979       22678 :             initHyperLogLog(&sss->abbr_card, 10);
    1980       22678 :             initHyperLogLog(&sss->full_card, 10);
    1981       22678 :             ssup->abbrev_full_comparator = ssup->comparator;
    1982       22678 :             ssup->comparator = ssup_datum_unsigned_cmp;
    1983       22678 :             ssup->abbrev_converter = varstr_abbrev_convert;
    1984       22678 :             ssup->abbrev_abort = varstr_abbrev_abort;
    1985             :         }
    1986             :     }
    1987      116892 : }
    1988             : 
    1989             : /*
    1990             :  * sortsupport comparison func (for C locale case)
    1991             :  */
    1992             : static int
    1993    36554308 : varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
    1994             : {
    1995    36554308 :     VarString  *arg1 = DatumGetVarStringPP(x);
    1996    36554308 :     VarString  *arg2 = DatumGetVarStringPP(y);
    1997             :     char       *a1p,
    1998             :                *a2p;
    1999             :     int         len1,
    2000             :                 len2,
    2001             :                 result;
    2002             : 
    2003    36554308 :     a1p = VARDATA_ANY(arg1);
    2004    36554308 :     a2p = VARDATA_ANY(arg2);
    2005             : 
    2006    36554308 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2007    36554308 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2008             : 
    2009    36554308 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2010    36554308 :     if ((result == 0) && (len1 != len2))
    2011      998812 :         result = (len1 < len2) ? -1 : 1;
    2012             : 
    2013             :     /* We can't afford to leak memory here. */
    2014    36554308 :     if (PointerGetDatum(arg1) != x)
    2015           2 :         pfree(arg1);
    2016    36554308 :     if (PointerGetDatum(arg2) != y)
    2017           2 :         pfree(arg2);
    2018             : 
    2019    36554308 :     return result;
    2020             : }
    2021             : 
    2022             : /*
    2023             :  * sortsupport comparison func (for BpChar C locale case)
    2024             :  *
    2025             :  * BpChar outsources its sortsupport to this module.  Specialization for the
    2026             :  * varstr_sortsupport BpChar case, modeled on
    2027             :  * internal_bpchar_pattern_compare().
    2028             :  */
    2029             : static int
    2030       62420 : bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
    2031             : {
    2032       62420 :     BpChar     *arg1 = DatumGetBpCharPP(x);
    2033       62420 :     BpChar     *arg2 = DatumGetBpCharPP(y);
    2034             :     char       *a1p,
    2035             :                *a2p;
    2036             :     int         len1,
    2037             :                 len2,
    2038             :                 result;
    2039             : 
    2040       62420 :     a1p = VARDATA_ANY(arg1);
    2041       62420 :     a2p = VARDATA_ANY(arg2);
    2042             : 
    2043       62420 :     len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
    2044       62420 :     len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
    2045             : 
    2046       62420 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2047       62420 :     if ((result == 0) && (len1 != len2))
    2048           4 :         result = (len1 < len2) ? -1 : 1;
    2049             : 
    2050             :     /* We can't afford to leak memory here. */
    2051       62420 :     if (PointerGetDatum(arg1) != x)
    2052           0 :         pfree(arg1);
    2053       62420 :     if (PointerGetDatum(arg2) != y)
    2054           0 :         pfree(arg2);
    2055             : 
    2056       62420 :     return result;
    2057             : }
    2058             : 
    2059             : /*
    2060             :  * sortsupport comparison func (for NAME C locale case)
    2061             :  */
    2062             : static int
    2063    33228528 : namefastcmp_c(Datum x, Datum y, SortSupport ssup)
    2064             : {
    2065    33228528 :     Name        arg1 = DatumGetName(x);
    2066    33228528 :     Name        arg2 = DatumGetName(y);
    2067             : 
    2068    33228528 :     return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
    2069             : }
    2070             : 
    2071             : /*
    2072             :  * sortsupport comparison func (for locale case with all varlena types)
    2073             :  */
    2074             : static int
    2075    33335450 : varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2076             : {
    2077    33335450 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2078    33335450 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2079             :     char       *a1p,
    2080             :                *a2p;
    2081             :     int         len1,
    2082             :                 len2,
    2083             :                 result;
    2084             : 
    2085    33335450 :     a1p = VARDATA_ANY(arg1);
    2086    33335450 :     a2p = VARDATA_ANY(arg2);
    2087             : 
    2088    33335450 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2089    33335450 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2090             : 
    2091    33335450 :     result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
    2092             : 
    2093             :     /* We can't afford to leak memory here. */
    2094    33335450 :     if (PointerGetDatum(arg1) != x)
    2095           4 :         pfree(arg1);
    2096    33335450 :     if (PointerGetDatum(arg2) != y)
    2097           4 :         pfree(arg2);
    2098             : 
    2099    33335450 :     return result;
    2100             : }
    2101             : 
    2102             : /*
    2103             :  * sortsupport comparison func (for locale case with NAME type)
    2104             :  */
    2105             : static int
    2106           0 : namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2107             : {
    2108           0 :     Name        arg1 = DatumGetName(x);
    2109           0 :     Name        arg2 = DatumGetName(y);
    2110             : 
    2111           0 :     return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
    2112           0 :                                 NameStr(*arg2), strlen(NameStr(*arg2)),
    2113             :                                 ssup);
    2114             : }
    2115             : 
    2116             : /*
    2117             :  * sortsupport comparison func for locale cases
    2118             :  */
    2119             : static int
    2120    33335450 : varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
    2121             : {
    2122    33335450 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2123             :     int         result;
    2124             :     bool        arg1_match;
    2125             : 
    2126             :     /* Fast pre-check for equality, as discussed in varstr_cmp() */
    2127    33335450 :     if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
    2128             :     {
    2129             :         /*
    2130             :          * No change in buf1 or buf2 contents, so avoid changing last_len1 or
    2131             :          * last_len2.  Existing contents of buffers might still be used by
    2132             :          * next call.
    2133             :          *
    2134             :          * It's fine to allow the comparison of BpChar padding bytes here,
    2135             :          * even though that implies that the memcmp() will usually be
    2136             :          * performed for BpChar callers (though multibyte characters could
    2137             :          * still prevent that from occurring).  The memcmp() is still very
    2138             :          * cheap, and BpChar's funny semantics have us remove trailing spaces
    2139             :          * (not limited to padding), so we need make no distinction between
    2140             :          * padding space characters and "real" space characters.
    2141             :          */
    2142     9131052 :         return 0;
    2143             :     }
    2144             : 
    2145    24204398 :     if (sss->typid == BPCHAROID)
    2146             :     {
    2147             :         /* Get true number of bytes, ignoring trailing spaces */
    2148       33042 :         len1 = bpchartruelen(a1p, len1);
    2149       33042 :         len2 = bpchartruelen(a2p, len2);
    2150             :     }
    2151             : 
    2152    24204398 :     if (len1 >= sss->buflen1)
    2153             :     {
    2154          14 :         sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2155          14 :         sss->buf1 = repalloc(sss->buf1, sss->buflen1);
    2156             :     }
    2157    24204398 :     if (len2 >= sss->buflen2)
    2158             :     {
    2159          10 :         sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
    2160          10 :         sss->buf2 = repalloc(sss->buf2, sss->buflen2);
    2161             :     }
    2162             : 
    2163             :     /*
    2164             :      * We're likely to be asked to compare the same strings repeatedly, and
    2165             :      * memcmp() is so much cheaper than strcoll() that it pays to try to cache
    2166             :      * comparisons, even though in general there is no reason to think that
    2167             :      * that will work out (every string datum may be unique).  Caching does
    2168             :      * not slow things down measurably when it doesn't work out, and can speed
    2169             :      * things up by rather a lot when it does.  In part, this is because the
    2170             :      * memcmp() compares data from cachelines that are needed in L1 cache even
    2171             :      * when the last comparison's result cannot be reused.
    2172             :      */
    2173    24204398 :     arg1_match = true;
    2174    24204398 :     if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
    2175             :     {
    2176    22367292 :         arg1_match = false;
    2177    22367292 :         memcpy(sss->buf1, a1p, len1);
    2178    22367292 :         sss->buf1[len1] = '\0';
    2179    22367292 :         sss->last_len1 = len1;
    2180             :     }
    2181             : 
    2182             :     /*
    2183             :      * If we're comparing the same two strings as last time, we can return the
    2184             :      * same answer without calling strcoll() again.  This is more likely than
    2185             :      * it seems (at least with moderate to low cardinality sets), because
    2186             :      * quicksort compares the same pivot against many values.
    2187             :      */
    2188    24204398 :     if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
    2189             :     {
    2190     3611592 :         memcpy(sss->buf2, a2p, len2);
    2191     3611592 :         sss->buf2[len2] = '\0';
    2192     3611592 :         sss->last_len2 = len2;
    2193             :     }
    2194    20592806 :     else if (arg1_match && !sss->cache_blob)
    2195             :     {
    2196             :         /* Use result cached following last actual strcoll() call */
    2197     1454106 :         return sss->last_returned;
    2198             :     }
    2199             : 
    2200    22750292 :     result = pg_strcoll(sss->buf1, sss->buf2, sss->locale);
    2201             : 
    2202             :     /* Break tie if necessary. */
    2203    22750292 :     if (result == 0 && sss->locale->deterministic)
    2204           0 :         result = strcmp(sss->buf1, sss->buf2);
    2205             : 
    2206             :     /* Cache result, perhaps saving an expensive strcoll() call next time */
    2207    22750292 :     sss->cache_blob = false;
    2208    22750292 :     sss->last_returned = result;
    2209    22750292 :     return result;
    2210             : }
    2211             : 
    2212             : /*
    2213             :  * Conversion routine for sortsupport.  Converts original to abbreviated key
    2214             :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
    2215             :  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
    2216             :  * stored in reverse order), and treat it as an unsigned integer.  When the "C"
    2217             :  * locale is used, or in case of bytea, just memcpy() from original instead.
    2218             :  */
    2219             : static Datum
    2220      824202 : varstr_abbrev_convert(Datum original, SortSupport ssup)
    2221             : {
    2222      824202 :     const size_t max_prefix_bytes = sizeof(Datum);
    2223      824202 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2224      824202 :     VarString  *authoritative = DatumGetVarStringPP(original);
    2225      824202 :     char       *authoritative_data = VARDATA_ANY(authoritative);
    2226             : 
    2227             :     /* working state */
    2228             :     Datum       res;
    2229             :     char       *pres;
    2230             :     int         len;
    2231             :     uint32      hash;
    2232             : 
    2233      824202 :     pres = (char *) &res;
    2234             :     /* memset(), so any non-overwritten bytes are NUL */
    2235      824202 :     memset(pres, 0, max_prefix_bytes);
    2236      824202 :     len = VARSIZE_ANY_EXHDR(authoritative);
    2237             : 
    2238             :     /* Get number of bytes, ignoring trailing spaces */
    2239      824202 :     if (sss->typid == BPCHAROID)
    2240        1010 :         len = bpchartruelen(authoritative_data, len);
    2241             : 
    2242             :     /*
    2243             :      * If we're using the C collation, use memcpy(), rather than strxfrm(), to
    2244             :      * abbreviate keys.  The full comparator for the C locale is always
    2245             :      * memcmp().  It would be incorrect to allow bytea callers (callers that
    2246             :      * always force the C collation -- bytea isn't a collatable type, but this
    2247             :      * approach is convenient) to use strxfrm().  This is because bytea
    2248             :      * strings may contain NUL bytes.  Besides, this should be faster, too.
    2249             :      *
    2250             :      * More generally, it's okay that bytea callers can have NUL bytes in
    2251             :      * strings because abbreviated cmp need not make a distinction between
    2252             :      * terminating NUL bytes, and NUL bytes representing actual NULs in the
    2253             :      * authoritative representation.  Hopefully a comparison at or past one
    2254             :      * abbreviated key's terminating NUL byte will resolve the comparison
    2255             :      * without consulting the authoritative representation; specifically, some
    2256             :      * later non-NUL byte in the longer string can resolve the comparison
    2257             :      * against a subsequent terminating NUL in the shorter string.  There will
    2258             :      * usually be what is effectively a "length-wise" resolution there and
    2259             :      * then.
    2260             :      *
    2261             :      * If that doesn't work out -- if all bytes in the longer string
    2262             :      * positioned at or past the offset of the smaller string's (first)
    2263             :      * terminating NUL are actually representative of NUL bytes in the
    2264             :      * authoritative binary string (perhaps with some *terminating* NUL bytes
    2265             :      * towards the end of the longer string iff it happens to still be small)
    2266             :      * -- then an authoritative tie-breaker will happen, and do the right
    2267             :      * thing: explicitly consider string length.
    2268             :      */
    2269      824202 :     if (sss->collate_c)
    2270      822384 :         memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
    2271             :     else
    2272             :     {
    2273             :         Size        bsize;
    2274             : 
    2275             :         /*
    2276             :          * We're not using the C collation, so fall back on strxfrm or ICU
    2277             :          * analogs.
    2278             :          */
    2279             : 
    2280             :         /* By convention, we use buffer 1 to store and NUL-terminate */
    2281        1818 :         if (len >= sss->buflen1)
    2282             :         {
    2283           0 :             sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2284           0 :             sss->buf1 = repalloc(sss->buf1, sss->buflen1);
    2285             :         }
    2286             : 
    2287             :         /* Might be able to reuse strxfrm() blob from last call */
    2288        1818 :         if (sss->last_len1 == len && sss->cache_blob &&
    2289         936 :             memcmp(sss->buf1, authoritative_data, len) == 0)
    2290             :         {
    2291         168 :             memcpy(pres, sss->buf2, Min(max_prefix_bytes, sss->last_len2));
    2292             :             /* No change affecting cardinality, so no hashing required */
    2293         168 :             goto done;
    2294             :         }
    2295             : 
    2296        1650 :         memcpy(sss->buf1, authoritative_data, len);
    2297             : 
    2298             :         /*
    2299             :          * pg_strxfrm() and pg_strxfrm_prefix expect NUL-terminated strings.
    2300             :          */
    2301        1650 :         sss->buf1[len] = '\0';
    2302        1650 :         sss->last_len1 = len;
    2303             : 
    2304        1650 :         if (pg_strxfrm_prefix_enabled(sss->locale))
    2305             :         {
    2306        1650 :             if (sss->buflen2 < max_prefix_bytes)
    2307             :             {
    2308           0 :                 sss->buflen2 = Max(max_prefix_bytes,
    2309             :                                    Min(sss->buflen2 * 2, MaxAllocSize));
    2310           0 :                 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
    2311             :             }
    2312             : 
    2313        1650 :             bsize = pg_strxfrm_prefix(sss->buf2, sss->buf1,
    2314             :                                       max_prefix_bytes, sss->locale);
    2315        1650 :             sss->last_len2 = bsize;
    2316             :         }
    2317             :         else
    2318             :         {
    2319             :             /*
    2320             :              * Loop: Call pg_strxfrm(), possibly enlarge buffer, and try
    2321             :              * again.  The pg_strxfrm() function leaves the result buffer
    2322             :              * content undefined if the result did not fit, so we need to
    2323             :              * retry until everything fits, even though we only need the first
    2324             :              * few bytes in the end.
    2325             :              */
    2326             :             for (;;)
    2327             :             {
    2328           0 :                 bsize = pg_strxfrm(sss->buf2, sss->buf1, sss->buflen2,
    2329             :                                    sss->locale);
    2330             : 
    2331           0 :                 sss->last_len2 = bsize;
    2332           0 :                 if (bsize < sss->buflen2)
    2333           0 :                     break;
    2334             : 
    2335             :                 /*
    2336             :                  * Grow buffer and retry.
    2337             :                  */
    2338           0 :                 sss->buflen2 = Max(bsize + 1,
    2339             :                                    Min(sss->buflen2 * 2, MaxAllocSize));
    2340           0 :                 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
    2341             :             }
    2342             :         }
    2343             : 
    2344             :         /*
    2345             :          * Every Datum byte is always compared.  This is safe because the
    2346             :          * strxfrm() blob is itself NUL terminated, leaving no danger of
    2347             :          * misinterpreting any NUL bytes not intended to be interpreted as
    2348             :          * logically representing termination.
    2349             :          *
    2350             :          * (Actually, even if there were NUL bytes in the blob it would be
    2351             :          * okay.  See remarks on bytea case above.)
    2352             :          */
    2353        1650 :         memcpy(pres, sss->buf2, Min(max_prefix_bytes, bsize));
    2354             :     }
    2355             : 
    2356             :     /*
    2357             :      * Maintain approximate cardinality of both abbreviated keys and original,
    2358             :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
    2359             :      * the worst case, where we do many string transformations for no saving
    2360             :      * in full strcoll()-based comparisons.  These statistics are used by
    2361             :      * varstr_abbrev_abort().
    2362             :      *
    2363             :      * First, Hash key proper, or a significant fraction of it.  Mix in length
    2364             :      * in order to compensate for cases where differences are past
    2365             :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
    2366             :      */
    2367      824034 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
    2368             :                                    Min(len, PG_CACHE_LINE_SIZE)));
    2369             : 
    2370      824034 :     if (len > PG_CACHE_LINE_SIZE)
    2371         184 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
    2372             : 
    2373      824034 :     addHyperLogLog(&sss->full_card, hash);
    2374             : 
    2375             :     /* Hash abbreviated key */
    2376             : #if SIZEOF_DATUM == 8
    2377             :     {
    2378             :         uint32      lohalf,
    2379             :                     hihalf;
    2380             : 
    2381      824034 :         lohalf = (uint32) res;
    2382      824034 :         hihalf = (uint32) (res >> 32);
    2383      824034 :         hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
    2384             :     }
    2385             : #else                           /* SIZEOF_DATUM != 8 */
    2386             :     hash = DatumGetUInt32(hash_uint32((uint32) res));
    2387             : #endif
    2388             : 
    2389      824034 :     addHyperLogLog(&sss->abbr_card, hash);
    2390             : 
    2391             :     /* Cache result, perhaps saving an expensive strxfrm() call next time */
    2392      824034 :     sss->cache_blob = true;
    2393      824202 : done:
    2394             : 
    2395             :     /*
    2396             :      * Byteswap on little-endian machines.
    2397             :      *
    2398             :      * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
    2399             :      * 3-way comparator) works correctly on all platforms.  If we didn't do
    2400             :      * this, the comparator would have to call memcmp() with a pair of
    2401             :      * pointers to the first byte of each abbreviated key, which is slower.
    2402             :      */
    2403      824202 :     res = DatumBigEndianToNative(res);
    2404             : 
    2405             :     /* Don't leak memory here */
    2406      824202 :     if (PointerGetDatum(authoritative) != original)
    2407           4 :         pfree(authoritative);
    2408             : 
    2409      824202 :     return res;
    2410             : }
    2411             : 
    2412             : /*
    2413             :  * Callback for estimating effectiveness of abbreviated key optimization, using
    2414             :  * heuristic rules.  Returns value indicating if the abbreviation optimization
    2415             :  * should be aborted, based on its projected effectiveness.
    2416             :  */
    2417             : static bool
    2418        2190 : varstr_abbrev_abort(int memtupcount, SortSupport ssup)
    2419             : {
    2420        2190 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2421             :     double      abbrev_distinct,
    2422             :                 key_distinct;
    2423             : 
    2424             :     Assert(ssup->abbreviate);
    2425             : 
    2426             :     /* Have a little patience */
    2427        2190 :     if (memtupcount < 100)
    2428        1216 :         return false;
    2429             : 
    2430         974 :     abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
    2431         974 :     key_distinct = estimateHyperLogLog(&sss->full_card);
    2432             : 
    2433             :     /*
    2434             :      * Clamp cardinality estimates to at least one distinct value.  While
    2435             :      * NULLs are generally disregarded, if only NULL values were seen so far,
    2436             :      * that might misrepresent costs if we failed to clamp.
    2437             :      */
    2438         974 :     if (abbrev_distinct <= 1.0)
    2439           0 :         abbrev_distinct = 1.0;
    2440             : 
    2441         974 :     if (key_distinct <= 1.0)
    2442           0 :         key_distinct = 1.0;
    2443             : 
    2444             :     /*
    2445             :      * In the worst case all abbreviated keys are identical, while at the same
    2446             :      * time there are differences within full key strings not captured in
    2447             :      * abbreviations.
    2448             :      */
    2449         974 :     if (trace_sort)
    2450             :     {
    2451           0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
    2452             : 
    2453           0 :         elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
    2454             :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
    2455             :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
    2456             :              sss->prop_card);
    2457             :     }
    2458             : 
    2459             :     /*
    2460             :      * If the number of distinct abbreviated keys approximately matches the
    2461             :      * number of distinct authoritative original keys, that's reason enough to
    2462             :      * proceed.  We can win even with a very low cardinality set if most
    2463             :      * tie-breakers only memcmp().  This is by far the most important
    2464             :      * consideration.
    2465             :      *
    2466             :      * While comparisons that are resolved at the abbreviated key level are
    2467             :      * considerably cheaper than tie-breakers resolved with memcmp(), both of
    2468             :      * those two outcomes are so much cheaper than a full strcoll() once
    2469             :      * sorting is underway that it doesn't seem worth it to weigh abbreviated
    2470             :      * cardinality against the overall size of the set in order to more
    2471             :      * accurately model costs.  Assume that an abbreviated comparison, and an
    2472             :      * abbreviated comparison with a cheap memcmp()-based authoritative
    2473             :      * resolution are equivalent.
    2474             :      */
    2475         974 :     if (abbrev_distinct > key_distinct * sss->prop_card)
    2476             :     {
    2477             :         /*
    2478             :          * When we have exceeded 10,000 tuples, decay required cardinality
    2479             :          * aggressively for next call.
    2480             :          *
    2481             :          * This is useful because the number of comparisons required on
    2482             :          * average increases at a linearithmic rate, and at roughly 10,000
    2483             :          * tuples that factor will start to dominate over the linear costs of
    2484             :          * string transformation (this is a conservative estimate).  The decay
    2485             :          * rate is chosen to be a little less aggressive than halving -- which
    2486             :          * (since we're called at points at which memtupcount has doubled)
    2487             :          * would never see the cost model actually abort past the first call
    2488             :          * following a decay.  This decay rate is mostly a precaution against
    2489             :          * a sudden, violent swing in how well abbreviated cardinality tracks
    2490             :          * full key cardinality.  The decay also serves to prevent a marginal
    2491             :          * case from being aborted too late, when too much has already been
    2492             :          * invested in string transformation.
    2493             :          *
    2494             :          * It's possible for sets of several million distinct strings with
    2495             :          * mere tens of thousands of distinct abbreviated keys to still
    2496             :          * benefit very significantly.  This will generally occur provided
    2497             :          * each abbreviated key is a proxy for a roughly uniform number of the
    2498             :          * set's full keys. If it isn't so, we hope to catch that early and
    2499             :          * abort.  If it isn't caught early, by the time the problem is
    2500             :          * apparent it's probably not worth aborting.
    2501             :          */
    2502         974 :         if (memtupcount > 10000)
    2503           4 :             sss->prop_card *= 0.65;
    2504             : 
    2505         974 :         return false;
    2506             :     }
    2507             : 
    2508             :     /*
    2509             :      * Abort abbreviation strategy.
    2510             :      *
    2511             :      * The worst case, where all abbreviated keys are identical while all
    2512             :      * original strings differ will typically only see a regression of about
    2513             :      * 10% in execution time for small to medium sized lists of strings.
    2514             :      * Whereas on modern CPUs where cache stalls are the dominant cost, we can
    2515             :      * often expect very large improvements, particularly with sets of strings
    2516             :      * of moderately high to high abbreviated cardinality.  There is little to
    2517             :      * lose but much to gain, which our strategy reflects.
    2518             :      */
    2519           0 :     if (trace_sort)
    2520           0 :         elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
    2521             :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
    2522             :              memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
    2523             : 
    2524           0 :     return true;
    2525             : }
    2526             : 
    2527             : /*
    2528             :  * Generic equalimage support function for character type's operator classes.
    2529             :  * Disables the use of deduplication with nondeterministic collations.
    2530             :  */
    2531             : Datum
    2532        8422 : btvarstrequalimage(PG_FUNCTION_ARGS)
    2533             : {
    2534             :     /* Oid      opcintype = PG_GETARG_OID(0); */
    2535        8422 :     Oid         collid = PG_GET_COLLATION();
    2536             :     pg_locale_t locale;
    2537             : 
    2538        8422 :     check_collation_set(collid);
    2539             : 
    2540        8422 :     locale = pg_newlocale_from_collation(collid);
    2541             : 
    2542        8422 :     PG_RETURN_BOOL(locale->deterministic);
    2543             : }
    2544             : 
    2545             : Datum
    2546      229560 : text_larger(PG_FUNCTION_ARGS)
    2547             : {
    2548      229560 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2549      229560 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2550             :     text       *result;
    2551             : 
    2552      229560 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
    2553             : 
    2554      229560 :     PG_RETURN_TEXT_P(result);
    2555             : }
    2556             : 
    2557             : Datum
    2558       86076 : text_smaller(PG_FUNCTION_ARGS)
    2559             : {
    2560       86076 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2561       86076 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2562             :     text       *result;
    2563             : 
    2564       86076 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
    2565             : 
    2566       86076 :     PG_RETURN_TEXT_P(result);
    2567             : }
    2568             : 
    2569             : 
    2570             : /*
    2571             :  * Cross-type comparison functions for types text and name.
    2572             :  */
    2573             : 
    2574             : Datum
    2575      202860 : nameeqtext(PG_FUNCTION_ARGS)
    2576             : {
    2577      202860 :     Name        arg1 = PG_GETARG_NAME(0);
    2578      202860 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2579      202860 :     size_t      len1 = strlen(NameStr(*arg1));
    2580      202860 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2581      202860 :     Oid         collid = PG_GET_COLLATION();
    2582             :     bool        result;
    2583             : 
    2584      202860 :     check_collation_set(collid);
    2585             : 
    2586      202860 :     if (collid == C_COLLATION_OID)
    2587      303890 :         result = (len1 == len2 &&
    2588      132294 :                   memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2589             :     else
    2590       31264 :         result = (varstr_cmp(NameStr(*arg1), len1,
    2591       31264 :                              VARDATA_ANY(arg2), len2,
    2592             :                              collid) == 0);
    2593             : 
    2594      202860 :     PG_FREE_IF_COPY(arg2, 1);
    2595             : 
    2596      202860 :     PG_RETURN_BOOL(result);
    2597             : }
    2598             : 
    2599             : Datum
    2600        7510 : texteqname(PG_FUNCTION_ARGS)
    2601             : {
    2602        7510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2603        7510 :     Name        arg2 = PG_GETARG_NAME(1);
    2604        7510 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2605        7510 :     size_t      len2 = strlen(NameStr(*arg2));
    2606        7510 :     Oid         collid = PG_GET_COLLATION();
    2607             :     bool        result;
    2608             : 
    2609        7510 :     check_collation_set(collid);
    2610             : 
    2611        7510 :     if (collid == C_COLLATION_OID)
    2612         568 :         result = (len1 == len2 &&
    2613         182 :                   memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2614             :     else
    2615        7124 :         result = (varstr_cmp(VARDATA_ANY(arg1), len1,
    2616        7124 :                              NameStr(*arg2), len2,
    2617             :                              collid) == 0);
    2618             : 
    2619        7510 :     PG_FREE_IF_COPY(arg1, 0);
    2620             : 
    2621        7510 :     PG_RETURN_BOOL(result);
    2622             : }
    2623             : 
    2624             : Datum
    2625          36 : namenetext(PG_FUNCTION_ARGS)
    2626             : {
    2627          36 :     Name        arg1 = PG_GETARG_NAME(0);
    2628          36 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2629          36 :     size_t      len1 = strlen(NameStr(*arg1));
    2630          36 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
    2631          36 :     Oid         collid = PG_GET_COLLATION();
    2632             :     bool        result;
    2633             : 
    2634          36 :     check_collation_set(collid);
    2635             : 
    2636          36 :     if (collid == C_COLLATION_OID)
    2637          18 :         result = !(len1 == len2 &&
    2638           0 :                    memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
    2639             :     else
    2640          18 :         result = !(varstr_cmp(NameStr(*arg1), len1,
    2641          18 :                               VARDATA_ANY(arg2), len2,
    2642             :                               collid) == 0);
    2643             : 
    2644          36 :     PG_FREE_IF_COPY(arg2, 1);
    2645             : 
    2646          36 :     PG_RETURN_BOOL(result);
    2647             : }
    2648             : 
    2649             : Datum
    2650          18 : textnename(PG_FUNCTION_ARGS)
    2651             : {
    2652          18 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2653          18 :     Name        arg2 = PG_GETARG_NAME(1);
    2654          18 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
    2655          18 :     size_t      len2 = strlen(NameStr(*arg2));
    2656          18 :     Oid         collid = PG_GET_COLLATION();
    2657             :     bool        result;
    2658             : 
    2659          18 :     check_collation_set(collid);
    2660             : 
    2661          18 :     if (collid == C_COLLATION_OID)
    2662           0 :         result = !(len1 == len2 &&
    2663           0 :                    memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
    2664             :     else
    2665          18 :         result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
    2666          18 :                               NameStr(*arg2), len2,
    2667             :                               collid) == 0);
    2668             : 
    2669          18 :     PG_FREE_IF_COPY(arg1, 0);
    2670             : 
    2671          18 :     PG_RETURN_BOOL(result);
    2672             : }
    2673             : 
    2674             : Datum
    2675      147330 : btnametextcmp(PG_FUNCTION_ARGS)
    2676             : {
    2677      147330 :     Name        arg1 = PG_GETARG_NAME(0);
    2678      147330 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2679             :     int32       result;
    2680             : 
    2681      294660 :     result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
    2682      294660 :                         VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
    2683             :                         PG_GET_COLLATION());
    2684             : 
    2685      147330 :     PG_FREE_IF_COPY(arg2, 1);
    2686             : 
    2687      147330 :     PG_RETURN_INT32(result);
    2688             : }
    2689             : 
    2690             : Datum
    2691           0 : bttextnamecmp(PG_FUNCTION_ARGS)
    2692             : {
    2693           0 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2694           0 :     Name        arg2 = PG_GETARG_NAME(1);
    2695             :     int32       result;
    2696             : 
    2697           0 :     result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
    2698           0 :                         NameStr(*arg2), strlen(NameStr(*arg2)),
    2699             :                         PG_GET_COLLATION());
    2700             : 
    2701           0 :     PG_FREE_IF_COPY(arg1, 0);
    2702             : 
    2703           0 :     PG_RETURN_INT32(result);
    2704             : }
    2705             : 
    2706             : #define CmpCall(cmpfunc) \
    2707             :     DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
    2708             :                                           PG_GET_COLLATION(), \
    2709             :                                           PG_GETARG_DATUM(0), \
    2710             :                                           PG_GETARG_DATUM(1)))
    2711             : 
    2712             : Datum
    2713       49656 : namelttext(PG_FUNCTION_ARGS)
    2714             : {
    2715       49656 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
    2716             : }
    2717             : 
    2718             : Datum
    2719           0 : nameletext(PG_FUNCTION_ARGS)
    2720             : {
    2721           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
    2722             : }
    2723             : 
    2724             : Datum
    2725           0 : namegttext(PG_FUNCTION_ARGS)
    2726             : {
    2727           0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
    2728             : }
    2729             : 
    2730             : Datum
    2731       31608 : namegetext(PG_FUNCTION_ARGS)
    2732             : {
    2733       31608 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
    2734             : }
    2735             : 
    2736             : Datum
    2737           0 : textltname(PG_FUNCTION_ARGS)
    2738             : {
    2739           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
    2740             : }
    2741             : 
    2742             : Datum
    2743           0 : textlename(PG_FUNCTION_ARGS)
    2744             : {
    2745           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
    2746             : }
    2747             : 
    2748             : Datum
    2749           0 : textgtname(PG_FUNCTION_ARGS)
    2750             : {
    2751           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
    2752             : }
    2753             : 
    2754             : Datum
    2755           0 : textgename(PG_FUNCTION_ARGS)
    2756             : {
    2757           0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
    2758             : }
    2759             : 
    2760             : #undef CmpCall
    2761             : 
    2762             : 
    2763             : /*
    2764             :  * The following operators support character-by-character comparison
    2765             :  * of text datums, to allow building indexes suitable for LIKE clauses.
    2766             :  * Note that the regular texteq/textne comparison operators, and regular
    2767             :  * support functions 1 and 2 with "C" collation are assumed to be
    2768             :  * compatible with these!
    2769             :  */
    2770             : 
    2771             : static int
    2772      152158 : internal_text_pattern_compare(text *arg1, text *arg2)
    2773             : {
    2774             :     int         result;
    2775             :     int         len1,
    2776             :                 len2;
    2777             : 
    2778      152158 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2779      152158 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2780             : 
    2781      152158 :     result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    2782      152158 :     if (result != 0)
    2783      152092 :         return result;
    2784          66 :     else if (len1 < len2)
    2785           0 :         return -1;
    2786          66 :     else if (len1 > len2)
    2787          18 :         return 1;
    2788             :     else
    2789          48 :         return 0;
    2790             : }
    2791             : 
    2792             : 
    2793             : Datum
    2794       39580 : text_pattern_lt(PG_FUNCTION_ARGS)
    2795             : {
    2796       39580 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2797       39580 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2798             :     int         result;
    2799             : 
    2800       39580 :     result = internal_text_pattern_compare(arg1, arg2);
    2801             : 
    2802       39580 :     PG_FREE_IF_COPY(arg1, 0);
    2803       39580 :     PG_FREE_IF_COPY(arg2, 1);
    2804             : 
    2805       39580 :     PG_RETURN_BOOL(result < 0);
    2806             : }
    2807             : 
    2808             : 
    2809             : Datum
    2810       37510 : text_pattern_le(PG_FUNCTION_ARGS)
    2811             : {
    2812       37510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2813       37510 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2814             :     int         result;
    2815             : 
    2816       37510 :     result = internal_text_pattern_compare(arg1, arg2);
    2817             : 
    2818       37510 :     PG_FREE_IF_COPY(arg1, 0);
    2819       37510 :     PG_FREE_IF_COPY(arg2, 1);
    2820             : 
    2821       37510 :     PG_RETURN_BOOL(result <= 0);
    2822             : }
    2823             : 
    2824             : 
    2825             : Datum
    2826       37534 : text_pattern_ge(PG_FUNCTION_ARGS)
    2827             : {
    2828       37534 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2829       37534 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2830             :     int         result;
    2831             : 
    2832       37534 :     result = internal_text_pattern_compare(arg1, arg2);
    2833             : 
    2834       37534 :     PG_FREE_IF_COPY(arg1, 0);
    2835       37534 :     PG_FREE_IF_COPY(arg2, 1);
    2836             : 
    2837       37534 :     PG_RETURN_BOOL(result >= 0);
    2838             : }
    2839             : 
    2840             : 
    2841             : Datum
    2842       37510 : text_pattern_gt(PG_FUNCTION_ARGS)
    2843             : {
    2844       37510 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2845       37510 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2846             :     int         result;
    2847             : 
    2848       37510 :     result = internal_text_pattern_compare(arg1, arg2);
    2849             : 
    2850       37510 :     PG_FREE_IF_COPY(arg1, 0);
    2851       37510 :     PG_FREE_IF_COPY(arg2, 1);
    2852             : 
    2853       37510 :     PG_RETURN_BOOL(result > 0);
    2854             : }
    2855             : 
    2856             : 
    2857             : Datum
    2858          24 : bttext_pattern_cmp(PG_FUNCTION_ARGS)
    2859             : {
    2860          24 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2861          24 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2862             :     int         result;
    2863             : 
    2864          24 :     result = internal_text_pattern_compare(arg1, arg2);
    2865             : 
    2866          24 :     PG_FREE_IF_COPY(arg1, 0);
    2867          24 :     PG_FREE_IF_COPY(arg2, 1);
    2868             : 
    2869          24 :     PG_RETURN_INT32(result);
    2870             : }
    2871             : 
    2872             : 
    2873             : Datum
    2874         116 : bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
    2875             : {
    2876         116 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    2877             :     MemoryContext oldcontext;
    2878             : 
    2879         116 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    2880             : 
    2881             :     /* Use generic string SortSupport, forcing "C" collation */
    2882         116 :     varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
    2883             : 
    2884         116 :     MemoryContextSwitchTo(oldcontext);
    2885             : 
    2886         116 :     PG_RETURN_VOID();
    2887             : }
    2888             : 
    2889             : 
    2890             : /*-------------------------------------------------------------
    2891             :  * byteaoctetlen
    2892             :  *
    2893             :  * get the number of bytes contained in an instance of type 'bytea'
    2894             :  *-------------------------------------------------------------
    2895             :  */
    2896             : Datum
    2897         458 : byteaoctetlen(PG_FUNCTION_ARGS)
    2898             : {
    2899         458 :     Datum       str = PG_GETARG_DATUM(0);
    2900             : 
    2901             :     /* We need not detoast the input at all */
    2902         458 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
    2903             : }
    2904             : 
    2905             : /*
    2906             :  * byteacat -
    2907             :  *    takes two bytea* and returns a bytea* that is the concatenation of
    2908             :  *    the two.
    2909             :  *
    2910             :  * Cloned from textcat and modified as required.
    2911             :  */
    2912             : Datum
    2913        1520 : byteacat(PG_FUNCTION_ARGS)
    2914             : {
    2915        1520 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    2916        1520 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    2917             : 
    2918        1520 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
    2919             : }
    2920             : 
    2921             : /*
    2922             :  * bytea_catenate
    2923             :  *  Guts of byteacat(), broken out so it can be used by other functions
    2924             :  *
    2925             :  * Arguments can be in short-header form, but not compressed or out-of-line
    2926             :  */
    2927             : static bytea *
    2928        1556 : bytea_catenate(bytea *t1, bytea *t2)
    2929             : {
    2930             :     bytea      *result;
    2931             :     int         len1,
    2932             :                 len2,
    2933             :                 len;
    2934             :     char       *ptr;
    2935             : 
    2936        1556 :     len1 = VARSIZE_ANY_EXHDR(t1);
    2937        1556 :     len2 = VARSIZE_ANY_EXHDR(t2);
    2938             : 
    2939             :     /* paranoia ... probably should throw error instead? */
    2940        1556 :     if (len1 < 0)
    2941           0 :         len1 = 0;
    2942        1556 :     if (len2 < 0)
    2943           0 :         len2 = 0;
    2944             : 
    2945        1556 :     len = len1 + len2 + VARHDRSZ;
    2946        1556 :     result = (bytea *) palloc(len);
    2947             : 
    2948             :     /* Set size of result string... */
    2949        1556 :     SET_VARSIZE(result, len);
    2950             : 
    2951             :     /* Fill data field of result string... */
    2952        1556 :     ptr = VARDATA(result);
    2953        1556 :     if (len1 > 0)
    2954        1556 :         memcpy(ptr, VARDATA_ANY(t1), len1);
    2955        1556 :     if (len2 > 0)
    2956        1538 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
    2957             : 
    2958        1556 :     return result;
    2959             : }
    2960             : 
    2961             : #define PG_STR_GET_BYTEA(str_) \
    2962             :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
    2963             : 
    2964             : /*
    2965             :  * bytea_substr()
    2966             :  * Return a substring starting at the specified position.
    2967             :  * Cloned from text_substr and modified as required.
    2968             :  *
    2969             :  * Input:
    2970             :  *  - string
    2971             :  *  - starting position (is one-based)
    2972             :  *  - string length (optional)
    2973             :  *
    2974             :  * If the starting position is zero or less, then return from the start of the string
    2975             :  * adjusting the length to be consistent with the "negative start" per SQL.
    2976             :  * If the length is less than zero, an ERROR is thrown. If no third argument
    2977             :  * (length) is provided, the length to the end of the string is assumed.
    2978             :  */
    2979             : Datum
    2980          86 : bytea_substr(PG_FUNCTION_ARGS)
    2981             : {
    2982          86 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    2983             :                                       PG_GETARG_INT32(1),
    2984             :                                       PG_GETARG_INT32(2),
    2985             :                                       false));
    2986             : }
    2987             : 
    2988             : /*
    2989             :  * bytea_substr_no_len -
    2990             :  *    Wrapper to avoid opr_sanity failure due to
    2991             :  *    one function accepting a different number of args.
    2992             :  */
    2993             : Datum
    2994        3900 : bytea_substr_no_len(PG_FUNCTION_ARGS)
    2995             : {
    2996        3900 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    2997             :                                       PG_GETARG_INT32(1),
    2998             :                                       -1,
    2999             :                                       true));
    3000             : }
    3001             : 
    3002             : static bytea *
    3003        4022 : bytea_substring(Datum str,
    3004             :                 int S,
    3005             :                 int L,
    3006             :                 bool length_not_specified)
    3007             : {
    3008             :     int32       S1;             /* adjusted start position */
    3009             :     int32       L1;             /* adjusted substring length */
    3010             :     int32       E;              /* end position */
    3011             : 
    3012             :     /*
    3013             :      * The logic here should generally match text_substring().
    3014             :      */
    3015        4022 :     S1 = Max(S, 1);
    3016             : 
    3017        4022 :     if (length_not_specified)
    3018             :     {
    3019             :         /*
    3020             :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
    3021             :          * end of the string if we pass it a negative value for length.
    3022             :          */
    3023        3918 :         L1 = -1;
    3024             :     }
    3025         104 :     else if (L < 0)
    3026             :     {
    3027             :         /* SQL99 says to throw an error for E < S, i.e., negative length */
    3028          12 :         ereport(ERROR,
    3029             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    3030             :                  errmsg("negative substring length not allowed")));
    3031             :         L1 = -1;                /* silence stupider compilers */
    3032             :     }
    3033          92 :     else if (pg_add_s32_overflow(S, L, &E))
    3034             :     {
    3035             :         /*
    3036             :          * L could be large enough for S + L to overflow, in which case the
    3037             :          * substring must run to end of string.
    3038             :          */
    3039           6 :         L1 = -1;
    3040             :     }
    3041             :     else
    3042             :     {
    3043             :         /*
    3044             :          * A zero or negative value for the end position can happen if the
    3045             :          * start was negative or one. SQL99 says to return a zero-length
    3046             :          * string.
    3047             :          */
    3048          86 :         if (E < 1)
    3049           0 :             return PG_STR_GET_BYTEA("");
    3050             : 
    3051          86 :         L1 = E - S1;
    3052             :     }
    3053             : 
    3054             :     /*
    3055             :      * If the start position is past the end of the string, SQL99 says to
    3056             :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
    3057             :      * us.  We need only convert S1 to zero-based starting position.
    3058             :      */
    3059        4010 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
    3060             : }
    3061             : 
    3062             : /*
    3063             :  * byteaoverlay
    3064             :  *  Replace specified substring of first string with second
    3065             :  *
    3066             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    3067             :  * This code is a direct implementation of what the standard says.
    3068             :  */
    3069             : Datum
    3070           6 : byteaoverlay(PG_FUNCTION_ARGS)
    3071             : {
    3072           6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3073           6 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3074           6 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3075           6 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    3076             : 
    3077           6 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3078             : }
    3079             : 
    3080             : Datum
    3081          12 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
    3082             : {
    3083          12 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3084          12 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3085          12 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    3086             :     int         sl;
    3087             : 
    3088          12 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
    3089          12 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    3090             : }
    3091             : 
    3092             : static bytea *
    3093          18 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
    3094             : {
    3095             :     bytea      *result;
    3096             :     bytea      *s1;
    3097             :     bytea      *s2;
    3098             :     int         sp_pl_sl;
    3099             : 
    3100             :     /*
    3101             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    3102             :      * "substring length" error because that's what should be expected
    3103             :      * according to the spec's definition of OVERLAY().
    3104             :      */
    3105          18 :     if (sp <= 0)
    3106           0 :         ereport(ERROR,
    3107             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    3108             :                  errmsg("negative substring length not allowed")));
    3109          18 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
    3110           0 :         ereport(ERROR,
    3111             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    3112             :                  errmsg("integer out of range")));
    3113             : 
    3114          18 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
    3115          18 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    3116          18 :     result = bytea_catenate(s1, t2);
    3117          18 :     result = bytea_catenate(result, s2);
    3118             : 
    3119          18 :     return result;
    3120             : }
    3121             : 
    3122             : /*
    3123             :  * bit_count
    3124             :  */
    3125             : Datum
    3126           6 : bytea_bit_count(PG_FUNCTION_ARGS)
    3127             : {
    3128           6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3129             : 
    3130           6 :     PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
    3131             : }
    3132             : 
    3133             : /*
    3134             :  * byteapos -
    3135             :  *    Return the position of the specified substring.
    3136             :  *    Implements the SQL POSITION() function.
    3137             :  * Cloned from textpos and modified as required.
    3138             :  */
    3139             : Datum
    3140           0 : byteapos(PG_FUNCTION_ARGS)
    3141             : {
    3142           0 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    3143           0 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    3144             :     int         pos;
    3145             :     int         px,
    3146             :                 p;
    3147             :     int         len1,
    3148             :                 len2;
    3149             :     char       *p1,
    3150             :                *p2;
    3151             : 
    3152           0 :     len1 = VARSIZE_ANY_EXHDR(t1);
    3153           0 :     len2 = VARSIZE_ANY_EXHDR(t2);
    3154             : 
    3155           0 :     if (len2 <= 0)
    3156           0 :         PG_RETURN_INT32(1);     /* result for empty pattern */
    3157             : 
    3158           0 :     p1 = VARDATA_ANY(t1);
    3159           0 :     p2 = VARDATA_ANY(t2);
    3160             : 
    3161           0 :     pos = 0;
    3162           0 :     px = (len1 - len2);
    3163           0 :     for (p = 0; p <= px; p++)
    3164             :     {
    3165           0 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
    3166             :         {
    3167           0 :             pos = p + 1;
    3168           0 :             break;
    3169             :         };
    3170           0 :         p1++;
    3171             :     };
    3172             : 
    3173           0 :     PG_RETURN_INT32(pos);
    3174             : }
    3175             : 
    3176             : /*-------------------------------------------------------------
    3177             :  * byteaGetByte
    3178             :  *
    3179             :  * this routine treats "bytea" as an array of bytes.
    3180             :  * It returns the Nth byte (a number between 0 and 255).
    3181             :  *-------------------------------------------------------------
    3182             :  */
    3183             : Datum
    3184          60 : byteaGetByte(PG_FUNCTION_ARGS)
    3185             : {
    3186          60 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3187          60 :     int32       n = PG_GETARG_INT32(1);
    3188             :     int         len;
    3189             :     int         byte;
    3190             : 
    3191          60 :     len = VARSIZE_ANY_EXHDR(v);
    3192             : 
    3193          60 :     if (n < 0 || n >= len)
    3194           6 :         ereport(ERROR,
    3195             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3196             :                  errmsg("index %d out of valid range, 0..%d",
    3197             :                         n, len - 1)));
    3198             : 
    3199          54 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
    3200             : 
    3201          54 :     PG_RETURN_INT32(byte);
    3202             : }
    3203             : 
    3204             : /*-------------------------------------------------------------
    3205             :  * byteaGetBit
    3206             :  *
    3207             :  * This routine treats a "bytea" type like an array of bits.
    3208             :  * It returns the value of the Nth bit (0 or 1).
    3209             :  *
    3210             :  *-------------------------------------------------------------
    3211             :  */
    3212             : Datum
    3213          12 : byteaGetBit(PG_FUNCTION_ARGS)
    3214             : {
    3215          12 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3216          12 :     int64       n = PG_GETARG_INT64(1);
    3217             :     int         byteNo,
    3218             :                 bitNo;
    3219             :     int         len;
    3220             :     int         byte;
    3221             : 
    3222          12 :     len = VARSIZE_ANY_EXHDR(v);
    3223             : 
    3224          12 :     if (n < 0 || n >= (int64) len * 8)
    3225           6 :         ereport(ERROR,
    3226             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3227             :                  errmsg("index %lld out of valid range, 0..%lld",
    3228             :                         (long long) n, (long long) len * 8 - 1)));
    3229             : 
    3230             :     /* n/8 is now known < len, so safe to cast to int */
    3231           6 :     byteNo = (int) (n / 8);
    3232           6 :     bitNo = (int) (n % 8);
    3233             : 
    3234           6 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
    3235             : 
    3236           6 :     if (byte & (1 << bitNo))
    3237           6 :         PG_RETURN_INT32(1);
    3238             :     else
    3239           0 :         PG_RETURN_INT32(0);
    3240             : }
    3241             : 
    3242             : /*-------------------------------------------------------------
    3243             :  * byteaSetByte
    3244             :  *
    3245             :  * Given an instance of type 'bytea' creates a new one with
    3246             :  * the Nth byte set to the given value.
    3247             :  *
    3248             :  *-------------------------------------------------------------
    3249             :  */
    3250             : Datum
    3251          12 : byteaSetByte(PG_FUNCTION_ARGS)
    3252             : {
    3253          12 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3254          12 :     int32       n = PG_GETARG_INT32(1);
    3255          12 :     int32       newByte = PG_GETARG_INT32(2);
    3256             :     int         len;
    3257             : 
    3258          12 :     len = VARSIZE(res) - VARHDRSZ;
    3259             : 
    3260          12 :     if (n < 0 || n >= len)
    3261           6 :         ereport(ERROR,
    3262             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3263             :                  errmsg("index %d out of valid range, 0..%d",
    3264             :                         n, len - 1)));
    3265             : 
    3266             :     /*
    3267             :      * Now set the byte.
    3268             :      */
    3269           6 :     ((unsigned char *) VARDATA(res))[n] = newByte;
    3270             : 
    3271           6 :     PG_RETURN_BYTEA_P(res);
    3272             : }
    3273             : 
    3274             : /*-------------------------------------------------------------
    3275             :  * byteaSetBit
    3276             :  *
    3277             :  * Given an instance of type 'bytea' creates a new one with
    3278             :  * the Nth bit set to the given value.
    3279             :  *
    3280             :  *-------------------------------------------------------------
    3281             :  */
    3282             : Datum
    3283          12 : byteaSetBit(PG_FUNCTION_ARGS)
    3284             : {
    3285          12 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3286          12 :     int64       n = PG_GETARG_INT64(1);
    3287          12 :     int32       newBit = PG_GETARG_INT32(2);
    3288             :     int         len;
    3289             :     int         oldByte,
    3290             :                 newByte;
    3291             :     int         byteNo,
    3292             :                 bitNo;
    3293             : 
    3294          12 :     len = VARSIZE(res) - VARHDRSZ;
    3295             : 
    3296          12 :     if (n < 0 || n >= (int64) len * 8)
    3297           6 :         ereport(ERROR,
    3298             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3299             :                  errmsg("index %lld out of valid range, 0..%lld",
    3300             :                         (long long) n, (long long) len * 8 - 1)));
    3301             : 
    3302             :     /* n/8 is now known < len, so safe to cast to int */
    3303           6 :     byteNo = (int) (n / 8);
    3304           6 :     bitNo = (int) (n % 8);
    3305             : 
    3306             :     /*
    3307             :      * sanity check!
    3308             :      */
    3309           6 :     if (newBit != 0 && newBit != 1)
    3310           0 :         ereport(ERROR,
    3311             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    3312             :                  errmsg("new bit must be 0 or 1")));
    3313             : 
    3314             :     /*
    3315             :      * Update the byte.
    3316             :      */
    3317           6 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
    3318             : 
    3319           6 :     if (newBit == 0)
    3320           6 :         newByte = oldByte & (~(1 << bitNo));
    3321             :     else
    3322           0 :         newByte = oldByte | (1 << bitNo);
    3323             : 
    3324           6 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
    3325             : 
    3326           6 :     PG_RETURN_BYTEA_P(res);
    3327             : }
    3328             : 
    3329             : 
    3330             : /* text_name()
    3331             :  * Converts a text type to a Name type.
    3332             :  */
    3333             : Datum
    3334       30574 : text_name(PG_FUNCTION_ARGS)
    3335             : {
    3336       30574 :     text       *s = PG_GETARG_TEXT_PP(0);
    3337             :     Name        result;
    3338             :     int         len;
    3339             : 
    3340       30574 :     len = VARSIZE_ANY_EXHDR(s);
    3341             : 
    3342             :     /* Truncate oversize input */
    3343       30574 :     if (len >= NAMEDATALEN)
    3344           6 :         len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
    3345             : 
    3346             :     /* We use palloc0 here to ensure result is zero-padded */
    3347       30574 :     result = (Name) palloc0(NAMEDATALEN);
    3348       30574 :     memcpy(NameStr(*result), VARDATA_ANY(s), len);
    3349             : 
    3350       30574 :     PG_RETURN_NAME(result);
    3351             : }
    3352             : 
    3353             : /* name_text()
    3354             :  * Converts a Name type to a text type.
    3355             :  */
    3356             : Datum
    3357      646124 : name_text(PG_FUNCTION_ARGS)
    3358             : {
    3359      646124 :     Name        s = PG_GETARG_NAME(0);
    3360             : 
    3361      646124 :     PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
    3362             : }
    3363             : 
    3364             : 
    3365             : /*
    3366             :  * textToQualifiedNameList - convert a text object to list of names
    3367             :  *
    3368             :  * This implements the input parsing needed by nextval() and other
    3369             :  * functions that take a text parameter representing a qualified name.
    3370             :  * We split the name at dots, downcase if not double-quoted, and
    3371             :  * truncate names if they're too long.
    3372             :  */
    3373             : List *
    3374        4458 : textToQualifiedNameList(text *textval)
    3375             : {
    3376             :     char       *rawname;
    3377        4458 :     List       *result = NIL;
    3378             :     List       *namelist;
    3379             :     ListCell   *l;
    3380             : 
    3381             :     /* Convert to C string (handles possible detoasting). */
    3382             :     /* Note we rely on being able to modify rawname below. */
    3383        4458 :     rawname = text_to_cstring(textval);
    3384             : 
    3385        4458 :     if (!SplitIdentifierString(rawname, '.', &namelist))
    3386           0 :         ereport(ERROR,
    3387             :                 (errcode(ERRCODE_INVALID_NAME),
    3388             :                  errmsg("invalid name syntax")));
    3389             : 
    3390        4458 :     if (namelist == NIL)
    3391           0 :         ereport(ERROR,
    3392             :                 (errcode(ERRCODE_INVALID_NAME),
    3393             :                  errmsg("invalid name syntax")));
    3394             : 
    3395        9134 :     foreach(l, namelist)
    3396             :     {
    3397        4676 :         char       *curname = (char *) lfirst(l);
    3398             : 
    3399        4676 :         result = lappend(result, makeString(pstrdup(curname)));
    3400             :     }
    3401             : 
    3402        4458 :     pfree(rawname);
    3403        4458 :     list_free(namelist);
    3404             : 
    3405        4458 :     return result;
    3406             : }
    3407             : 
    3408             : /*
    3409             :  * SplitIdentifierString --- parse a string containing identifiers
    3410             :  *
    3411             :  * This is the guts of textToQualifiedNameList, and is exported for use in
    3412             :  * other situations such as parsing GUC variables.  In the GUC case, it's
    3413             :  * important to avoid memory leaks, so the API is designed to minimize the
    3414             :  * amount of stuff that needs to be allocated and freed.
    3415             :  *
    3416             :  * Inputs:
    3417             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3418             :  *             been modified to contain the separated identifiers.
    3419             :  *  separator: the separator punctuation expected between identifiers
    3420             :  *             (typically '.' or ',').  Whitespace may also appear around
    3421             :  *             identifiers.
    3422             :  * Outputs:
    3423             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3424             :  *            rawstring.  Caller should list_free() this even on error return.
    3425             :  *
    3426             :  * Returns true if okay, false if there is a syntax error in the string.
    3427             :  *
    3428             :  * Note that an empty string is considered okay here, though not in
    3429             :  * textToQualifiedNameList.
    3430             :  */
    3431             : bool
    3432      241772 : SplitIdentifierString(char *rawstring, char separator,
    3433             :                       List **namelist)
    3434             : {
    3435      241772 :     char       *nextp = rawstring;
    3436      241772 :     bool        done = false;
    3437             : 
    3438      241772 :     *namelist = NIL;
    3439             : 
    3440      241778 :     while (scanner_isspace(*nextp))
    3441           6 :         nextp++;                /* skip leading whitespace */
    3442             : 
    3443      241772 :     if (*nextp == '\0')
    3444       29758 :         return true;            /* allow empty string */
    3445             : 
    3446             :     /* At the top of the loop, we are at start of a new identifier. */
    3447             :     do
    3448             :     {
    3449             :         char       *curname;
    3450             :         char       *endp;
    3451             : 
    3452      385884 :         if (*nextp == '"')
    3453             :         {
    3454             :             /* Quoted name --- collapse quote-quote pairs, no downcasing */
    3455       36320 :             curname = nextp + 1;
    3456             :             for (;;)
    3457             :             {
    3458       36324 :                 endp = strchr(nextp + 1, '"');
    3459       36322 :                 if (endp == NULL)
    3460           0 :                     return false;   /* mismatched quotes */
    3461       36322 :                 if (endp[1] != '"')
    3462       36320 :                     break;      /* found end of quoted name */
    3463             :                 /* Collapse adjacent quotes into one quote, and look again */
    3464           2 :                 memmove(endp, endp + 1, strlen(endp));
    3465           2 :                 nextp = endp;
    3466             :             }
    3467             :             /* endp now points at the terminating quote */
    3468       36320 :             nextp = endp + 1;
    3469             :         }
    3470             :         else
    3471             :         {
    3472             :             /* Unquoted name --- extends to separator or whitespace */
    3473             :             char       *downname;
    3474             :             int         len;
    3475             : 
    3476      349564 :             curname = nextp;
    3477     3140500 :             while (*nextp && *nextp != separator &&
    3478     2790938 :                    !scanner_isspace(*nextp))
    3479     2790936 :                 nextp++;
    3480      349564 :             endp = nextp;
    3481      349564 :             if (curname == nextp)
    3482           0 :                 return false;   /* empty unquoted name not allowed */
    3483             : 
    3484             :             /*
    3485             :              * Downcase the identifier, using same code as main lexer does.
    3486             :              *
    3487             :              * XXX because we want to overwrite the input in-place, we cannot
    3488             :              * support a downcasing transformation that increases the string
    3489             :              * length.  This is not a problem given the current implementation
    3490             :              * of downcase_truncate_identifier, but we'll probably have to do
    3491             :              * something about this someday.
    3492             :              */
    3493      349564 :             len = endp - curname;
    3494      349564 :             downname = downcase_truncate_identifier(curname, len, false);
    3495             :             Assert(strlen(downname) <= len);
    3496      349564 :             strncpy(curname, downname, len);    /* strncpy is required here */
    3497      349564 :             pfree(downname);
    3498             :         }
    3499             : 
    3500      385886 :         while (scanner_isspace(*nextp))
    3501           2 :             nextp++;            /* skip trailing whitespace */
    3502             : 
    3503      385884 :         if (*nextp == separator)
    3504             :         {
    3505      173870 :             nextp++;
    3506      323982 :             while (scanner_isspace(*nextp))
    3507      150112 :                 nextp++;        /* skip leading whitespace for next */
    3508             :             /* we expect another name, so done remains false */
    3509             :         }
    3510      212014 :         else if (*nextp == '\0')
    3511      212012 :             done = true;
    3512             :         else
    3513           2 :             return false;       /* invalid syntax */
    3514             : 
    3515             :         /* Now safe to overwrite separator with a null */
    3516      385882 :         *endp = '\0';
    3517             : 
    3518             :         /* Truncate name if it's overlength */
    3519      385882 :         truncate_identifier(curname, strlen(curname), false);
    3520             : 
    3521             :         /*
    3522             :          * Finished isolating current name --- add it to list
    3523             :          */
    3524      385882 :         *namelist = lappend(*namelist, curname);
    3525             : 
    3526             :         /* Loop back if we didn't reach end of string */
    3527      385882 :     } while (!done);
    3528             : 
    3529      212012 :     return true;
    3530             : }
    3531             : 
    3532             : 
    3533             : /*
    3534             :  * SplitDirectoriesString --- parse a string containing file/directory names
    3535             :  *
    3536             :  * This works fine on file names too; the function name is historical.
    3537             :  *
    3538             :  * This is similar to SplitIdentifierString, except that the parsing
    3539             :  * rules are meant to handle pathnames instead of identifiers: there is
    3540             :  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
    3541             :  * and we apply canonicalize_path() to each extracted string.  Because of the
    3542             :  * last, the returned strings are separately palloc'd rather than being
    3543             :  * pointers into rawstring --- but we still scribble on rawstring.
    3544             :  *
    3545             :  * Inputs:
    3546             :  *  rawstring: the input string; must be modifiable!
    3547             :  *  separator: the separator punctuation expected between directories
    3548             :  *             (typically ',' or ';').  Whitespace may also appear around
    3549             :  *             directories.
    3550             :  * Outputs:
    3551             :  *  namelist: filled with a palloc'd list of directory names.
    3552             :  *            Caller should list_free_deep() this even on error return.
    3553             :  *
    3554             :  * Returns true if okay, false if there is a syntax error in the string.
    3555             :  *
    3556             :  * Note that an empty string is considered okay here.
    3557             :  */
    3558             : bool
    3559        1586 : SplitDirectoriesString(char *rawstring, char separator,
    3560             :                        List **namelist)
    3561             : {
    3562        1586 :     char       *nextp = rawstring;
    3563        1586 :     bool        done = false;
    3564             : 
    3565        1586 :     *namelist = NIL;
    3566             : 
    3567        1586 :     while (scanner_isspace(*nextp))
    3568           0 :         nextp++;                /* skip leading whitespace */
    3569             : 
    3570        1586 :     if (*nextp == '\0')
    3571           2 :         return true;            /* allow empty string */
    3572             : 
    3573             :     /* At the top of the loop, we are at start of a new directory. */
    3574             :     do
    3575             :     {
    3576             :         char       *curname;
    3577             :         char       *endp;
    3578             : 
    3579        1586 :         if (*nextp == '"')
    3580             :         {
    3581             :             /* Quoted name --- collapse quote-quote pairs */
    3582           0 :             curname = nextp + 1;
    3583             :             for (;;)
    3584             :             {
    3585           0 :                 endp = strchr(nextp + 1, '"');
    3586           0 :                 if (endp == NULL)
    3587           0 :                     return false;   /* mismatched quotes */
    3588           0 :                 if (endp[1] != '"')
    3589           0 :                     break;      /* found end of quoted name */
    3590             :                 /* Collapse adjacent quotes into one quote, and look again */
    3591           0 :                 memmove(endp, endp + 1, strlen(endp));
    3592           0 :                 nextp = endp;
    3593             :             }
    3594             :             /* endp now points at the terminating quote */
    3595           0 :             nextp = endp + 1;
    3596             :         }
    3597             :         else
    3598             :         {
    3599             :             /* Unquoted name --- extends to separator or end of string */
    3600        1586 :             curname = endp = nextp;
    3601       26602 :             while (*nextp && *nextp != separator)
    3602             :             {
    3603             :                 /* trailing whitespace should not be included in name */
    3604       25016 :                 if (!scanner_isspace(*nextp))
    3605       25016 :                     endp = nextp + 1;
    3606       25016 :                 nextp++;
    3607             :             }
    3608        1586 :             if (curname == endp)
    3609           0 :                 return false;   /* empty unquoted name not allowed */
    3610             :         }
    3611             : 
    3612        1586 :         while (scanner_isspace(*nextp))
    3613           0 :             nextp++;            /* skip trailing whitespace */
    3614             : 
    3615        1586 :         if (*nextp == separator)
    3616             :         {
    3617           2 :             nextp++;
    3618           2 :             while (scanner_isspace(*nextp))
    3619           0 :                 nextp++;        /* skip leading whitespace for next */
    3620             :             /* we expect another name, so done remains false */
    3621             :         }
    3622        1584 :         else if (*nextp == '\0')
    3623        1584 :             done = true;
    3624             :         else
    3625           0 :             return false;       /* invalid syntax */
    3626             : 
    3627             :         /* Now safe to overwrite separator with a null */
    3628        1586 :         *endp = '\0';
    3629             : 
    3630             :         /* Truncate path if it's overlength */
    3631        1586 :         if (strlen(curname) >= MAXPGPATH)
    3632           0 :             curname[MAXPGPATH - 1] = '\0';
    3633             : 
    3634             :         /*
    3635             :          * Finished isolating current name --- add it to list
    3636             :          */
    3637        1586 :         curname = pstrdup(curname);
    3638        1586 :         canonicalize_path(curname);
    3639        1586 :         *namelist = lappend(*namelist, curname);
    3640             : 
    3641             :         /* Loop back if we didn't reach end of string */
    3642        1586 :     } while (!done);
    3643             : 
    3644        1584 :     return true;
    3645             : }
    3646             : 
    3647             : 
    3648             : /*
    3649             :  * SplitGUCList --- parse a string containing identifiers or file names
    3650             :  *
    3651             :  * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
    3652             :  * presuming whether the elements will be taken as identifiers or file names.
    3653             :  * We assume the input has already been through flatten_set_variable_args(),
    3654             :  * so that we need never downcase (if appropriate, that was done already).
    3655             :  * Nor do we ever truncate, since we don't know the correct max length.
    3656             :  * We disallow embedded whitespace for simplicity (it shouldn't matter,
    3657             :  * because any embedded whitespace should have led to double-quoting).
    3658             :  * Otherwise the API is identical to SplitIdentifierString.
    3659             :  *
    3660             :  * XXX it's annoying to have so many copies of this string-splitting logic.
    3661             :  * However, it's not clear that having one function with a bunch of option
    3662             :  * flags would be much better.
    3663             :  *
    3664             :  * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
    3665             :  * Be sure to update that if you have to change this.
    3666             :  *
    3667             :  * Inputs:
    3668             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3669             :  *             been modified to contain the separated identifiers.
    3670             :  *  separator: the separator punctuation expected between identifiers
    3671             :  *             (typically '.' or ',').  Whitespace may also appear around
    3672             :  *             identifiers.
    3673             :  * Outputs:
    3674             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3675             :  *            rawstring.  Caller should list_free() this even on error return.
    3676             :  *
    3677             :  * Returns true if okay, false if there is a syntax error in the string.
    3678             :  */
    3679             : bool
    3680        3496 : SplitGUCList(char *rawstring, char separator,
    3681             :              List **namelist)
    3682             : {
    3683        3496 :     char       *nextp = rawstring;
    3684        3496 :     bool        done = false;
    3685             : 
    3686        3496 :     *namelist = NIL;
    3687             : 
    3688        3496 :     while (scanner_isspace(*nextp))
    3689           0 :         nextp++;                /* skip leading whitespace */
    3690             : 
    3691        3496 :     if (*nextp == '\0')
    3692        3422 :         return true;            /* allow empty string */
    3693             : 
    3694             :     /* At the top of the loop, we are at start of a new identifier. */
    3695             :     do
    3696             :     {
    3697             :         char       *curname;
    3698             :         char       *endp;
    3699             : 
    3700         100 :         if (*nextp == '"')
    3701             :         {
    3702             :             /* Quoted name --- collapse quote-quote pairs */
    3703          24 :             curname = nextp + 1;
    3704             :             for (;;)
    3705             :             {
    3706          36 :                 endp = strchr(nextp + 1, '"');
    3707          30 :                 if (endp == NULL)
    3708           0 :                     return false;   /* mismatched quotes */
    3709          30 :                 if (endp[1] != '"')
    3710          24 :                     break;      /* found end of quoted name */
    3711             :                 /* Collapse adjacent quotes into one quote, and look again */
    3712           6 :                 memmove(endp, endp + 1, strlen(endp));
    3713           6 :                 nextp = endp;
    3714             :             }
    3715             :             /* endp now points at the terminating quote */
    3716          24 :             nextp = endp + 1;
    3717             :         }
    3718             :         else
    3719             :         {
    3720             :             /* Unquoted name --- extends to separator or whitespace */
    3721          76 :             curname = nextp;
    3722         718 :             while (*nextp && *nextp != separator &&
    3723         642 :                    !scanner_isspace(*nextp))
    3724         642 :                 nextp++;
    3725          76 :             endp = nextp;
    3726          76 :             if (curname == nextp)
    3727           0 :                 return false;   /* empty unquoted name not allowed */
    3728             :         }
    3729             : 
    3730         100 :         while (scanner_isspace(*nextp))
    3731           0 :             nextp++;            /* skip trailing whitespace */
    3732             : 
    3733         100 :         if (*nextp == separator)
    3734             :         {
    3735          26 :             nextp++;
    3736          44 :             while (scanner_isspace(*nextp))
    3737          18 :                 nextp++;        /* skip leading whitespace for next */
    3738             :             /* we expect another name, so done remains false */
    3739             :         }
    3740          74 :         else if (*nextp == '\0')
    3741          74 :             done = true;
    3742             :         else
    3743           0 :             return false;       /* invalid syntax */
    3744             : 
    3745             :         /* Now safe to overwrite separator with a null */
    3746         100 :         *endp = '\0';
    3747             : 
    3748             :         /*
    3749             :          * Finished isolating current name --- add it to list
    3750             :          */
    3751         100 :         *namelist = lappend(*namelist, curname);
    3752             : 
    3753             :         /* Loop back if we didn't reach end of string */
    3754         100 :     } while (!done);
    3755             : 
    3756          74 :     return true;
    3757             : }
    3758             : 
    3759             : 
    3760             : /*****************************************************************************
    3761             :  *  Comparison Functions used for bytea
    3762             :  *
    3763             :  * Note: btree indexes need these routines not to leak memory; therefore,
    3764             :  * be careful to free working copies of toasted datums.  Most places don't
    3765             :  * need to be so careful.
    3766             :  *****************************************************************************/
    3767             : 
    3768             : Datum
    3769       10390 : byteaeq(PG_FUNCTION_ARGS)
    3770             : {
    3771       10390 :     Datum       arg1 = PG_GETARG_DATUM(0);
    3772       10390 :     Datum       arg2 = PG_GETARG_DATUM(1);
    3773             :     bool        result;
    3774             :     Size        len1,
    3775             :                 len2;
    3776             : 
    3777             :     /*
    3778             :      * We can use a fast path for unequal lengths, which might save us from
    3779             :      * having to detoast one or both values.
    3780             :      */
    3781       10390 :     len1 = toast_raw_datum_size(arg1);
    3782       10390 :     len2 = toast_raw_datum_size(arg2);
    3783       10390 :     if (len1 != len2)
    3784        4316 :         result = false;
    3785             :     else
    3786             :     {
    3787        6074 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    3788        6074 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    3789             : 
    3790        6074 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    3791             :                          len1 - VARHDRSZ) == 0);
    3792             : 
    3793        6074 :         PG_FREE_IF_COPY(barg1, 0);
    3794        6074 :         PG_FREE_IF_COPY(barg2, 1);
    3795             :     }
    3796             : 
    3797       10390 :     PG_RETURN_BOOL(result);
    3798             : }
    3799             : 
    3800             : Datum
    3801         768 : byteane(PG_FUNCTION_ARGS)
    3802             : {
    3803         768 :     Datum       arg1 = PG_GETARG_DATUM(0);
    3804         768 :     Datum       arg2 = PG_GETARG_DATUM(1);
    3805             :     bool        result;
    3806             :     Size        len1,
    3807             :                 len2;
    3808             : 
    3809             :     /*
    3810             :      * We can use a fast path for unequal lengths, which might save us from
    3811             :      * having to detoast one or both values.
    3812             :      */
    3813         768 :     len1 = toast_raw_datum_size(arg1);
    3814         768 :     len2 = toast_raw_datum_size(arg2);
    3815         768 :     if (len1 != len2)
    3816           0 :         result = true;
    3817             :     else
    3818             :     {
    3819         768 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    3820         768 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    3821             : 
    3822         768 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    3823             :                          len1 - VARHDRSZ) != 0);
    3824             : 
    3825         768 :         PG_FREE_IF_COPY(barg1, 0);
    3826         768 :         PG_FREE_IF_COPY(barg2, 1);
    3827             :     }
    3828             : 
    3829         768 :     PG_RETURN_BOOL(result);
    3830             : }
    3831             : 
    3832             : Datum
    3833        8316 : bytealt(PG_FUNCTION_ARGS)
    3834             : {
    3835        8316 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3836        8316 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3837             :     int         len1,
    3838             :                 len2;
    3839             :     int         cmp;
    3840             : 
    3841        8316 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3842        8316 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3843             : 
    3844        8316 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3845             : 
    3846        8316 :     PG_FREE_IF_COPY(arg1, 0);
    3847        8316 :     PG_FREE_IF_COPY(arg2, 1);
    3848             : 
    3849        8316 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
    3850             : }
    3851             : 
    3852             : Datum
    3853        6356 : byteale(PG_FUNCTION_ARGS)
    3854             : {
    3855        6356 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3856        6356 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3857             :     int         len1,
    3858             :                 len2;
    3859             :     int         cmp;
    3860             : 
    3861        6356 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3862        6356 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3863             : 
    3864        6356 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3865             : 
    3866        6356 :     PG_FREE_IF_COPY(arg1, 0);
    3867        6356 :     PG_FREE_IF_COPY(arg2, 1);
    3868             : 
    3869        6356 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
    3870             : }
    3871             : 
    3872             : Datum
    3873        6228 : byteagt(PG_FUNCTION_ARGS)
    3874             : {
    3875        6228 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3876        6228 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3877             :     int         len1,
    3878             :                 len2;
    3879             :     int         cmp;
    3880             : 
    3881        6228 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3882        6228 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3883             : 
    3884        6228 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3885             : 
    3886        6228 :     PG_FREE_IF_COPY(arg1, 0);
    3887        6228 :     PG_FREE_IF_COPY(arg2, 1);
    3888             : 
    3889        6228 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
    3890             : }
    3891             : 
    3892             : Datum
    3893        5010 : byteage(PG_FUNCTION_ARGS)
    3894             : {
    3895        5010 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3896        5010 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3897             :     int         len1,
    3898             :                 len2;
    3899             :     int         cmp;
    3900             : 
    3901        5010 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3902        5010 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3903             : 
    3904        5010 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3905             : 
    3906        5010 :     PG_FREE_IF_COPY(arg1, 0);
    3907        5010 :     PG_FREE_IF_COPY(arg2, 1);
    3908             : 
    3909        5010 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
    3910             : }
    3911             : 
    3912             : Datum
    3913       87610 : byteacmp(PG_FUNCTION_ARGS)
    3914             : {
    3915       87610 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3916       87610 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3917             :     int         len1,
    3918             :                 len2;
    3919             :     int         cmp;
    3920             : 
    3921       87610 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3922       87610 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3923             : 
    3924       87610 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3925       87610 :     if ((cmp == 0) && (len1 != len2))
    3926       14638 :         cmp = (len1 < len2) ? -1 : 1;
    3927             : 
    3928       87610 :     PG_FREE_IF_COPY(arg1, 0);
    3929       87610 :     PG_FREE_IF_COPY(arg2, 1);
    3930             : 
    3931       87610 :     PG_RETURN_INT32(cmp);
    3932             : }
    3933             : 
    3934             : Datum
    3935          24 : bytea_larger(PG_FUNCTION_ARGS)
    3936             : {
    3937          24 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3938          24 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3939             :     bytea      *result;
    3940             :     int         len1,
    3941             :                 len2;
    3942             :     int         cmp;
    3943             : 
    3944          24 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3945          24 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3946             : 
    3947          24 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3948          24 :     result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
    3949             : 
    3950          24 :     PG_RETURN_BYTEA_P(result);
    3951             : }
    3952             : 
    3953             : Datum
    3954          24 : bytea_smaller(PG_FUNCTION_ARGS)
    3955             : {
    3956          24 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3957          24 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3958             :     bytea      *result;
    3959             :     int         len1,
    3960             :                 len2;
    3961             :     int         cmp;
    3962             : 
    3963          24 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3964          24 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3965             : 
    3966          24 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3967          24 :     result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
    3968             : 
    3969          24 :     PG_RETURN_BYTEA_P(result);
    3970             : }
    3971             : 
    3972             : Datum
    3973          20 : bytea_sortsupport(PG_FUNCTION_ARGS)
    3974             : {
    3975          20 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    3976             :     MemoryContext oldcontext;
    3977             : 
    3978          20 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    3979             : 
    3980             :     /* Use generic string SortSupport, forcing "C" collation */
    3981          20 :     varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
    3982             : 
    3983          20 :     MemoryContextSwitchTo(oldcontext);
    3984             : 
    3985          20 :     PG_RETURN_VOID();
    3986             : }
    3987             : 
    3988             : /*
    3989             :  * appendStringInfoText
    3990             :  *
    3991             :  * Append a text to str.
    3992             :  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
    3993             :  */
    3994             : static void
    3995     1704110 : appendStringInfoText(StringInfo str, const text *t)
    3996             : {
    3997     1704110 :     appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
    3998     1704110 : }
    3999             : 
    4000             : /*
    4001             :  * replace_text
    4002             :  * replace all occurrences of 'old_sub_str' in 'orig_str'
    4003             :  * with 'new_sub_str' to form 'new_str'
    4004             :  *
    4005             :  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
    4006             :  * otherwise returns 'new_str'
    4007             :  */
    4008             : Datum
    4009        1160 : replace_text(PG_FUNCTION_ARGS)
    4010             : {
    4011        1160 :     text       *src_text = PG_GETARG_TEXT_PP(0);
    4012        1160 :     text       *from_sub_text = PG_GETARG_TEXT_PP(1);
    4013        1160 :     text       *to_sub_text = PG_GETARG_TEXT_PP(2);
    4014             :     int         src_text_len;
    4015             :     int         from_sub_text_len;
    4016             :     TextPositionState state;
    4017             :     text       *ret_text;
    4018             :     int         chunk_len;
    4019             :     char       *curr_ptr;
    4020             :     char       *start_ptr;
    4021             :     StringInfoData str;
    4022             :     bool        found;
    4023             : 
    4024        1160 :     src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4025        1160 :     from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
    4026             : 
    4027             :     /* Return unmodified source string if empty source or pattern */
    4028        1160 :     if (src_text_len < 1 || from_sub_text_len < 1)
    4029             :     {
    4030           0 :         PG_RETURN_TEXT_P(src_text);
    4031             :     }
    4032             : 
    4033        1160 :     text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
    4034             : 
    4035        1160 :     found = text_position_next(&state);
    4036             : 
    4037             :     /* When the from_sub_text is not found, there is nothing to do. */
    4038        1160 :     if (!found)
    4039             :     {
    4040         276 :         text_position_cleanup(&state);
    4041         276 :         PG_RETURN_TEXT_P(src_text);
    4042             :     }
    4043         884 :     curr_ptr = text_position_get_match_ptr(&state);
    4044         884 :     start_ptr = VARDATA_ANY(src_text);
    4045             : 
    4046         884 :     initStringInfo(&str);
    4047             : 
    4048             :     do
    4049             :     {
    4050        4794 :         CHECK_FOR_INTERRUPTS();
    4051             : 
    4052             :         /* copy the data skipped over by last text_position_next() */
    4053        4794 :         chunk_len = curr_ptr - start_ptr;
    4054        4794 :         appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4055             : 
    4056        4794 :         appendStringInfoText(&str, to_sub_text);
    4057             : 
    4058        4794 :         start_ptr = curr_ptr + from_sub_text_len;
    4059             : 
    4060        4794 :         found = text_position_next(&state);
    4061        4794 :         if (found)
    4062        3910 :             curr_ptr = text_position_get_match_ptr(&state);
    4063             :     }
    4064        4794 :     while (found);
    4065             : 
    4066             :     /* copy trailing data */
    4067         884 :     chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4068         884 :     appendBinaryStringInfo(&str, start_ptr, chunk_len);
    4069             : 
    4070         884 :     text_position_cleanup(&state);
    4071             : 
    4072         884 :     ret_text = cstring_to_text_with_len(str.data, str.len);
    4073         884 :     pfree(str.data);
    4074             : 
    4075         884 :     PG_RETURN_TEXT_P(ret_text);
    4076             : }
    4077             : 
    4078             : /*
    4079             :  * check_replace_text_has_escape
    4080             :  *
    4081             :  * Returns 0 if text contains no backslashes that need processing.
    4082             :  * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
    4083             :  * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
    4084             :  */
    4085             : static int
    4086       12820 : check_replace_text_has_escape(const text *replace_text)
    4087             : {
    4088       12820 :     int         result = 0;
    4089       12820 :     const char *p = VARDATA_ANY(replace_text);
    4090       12820 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4091             : 
    4092       12864 :     while (p < p_end)
    4093             :     {
    4094             :         /* Find next escape char, if any. */
    4095       11898 :         p = memchr(p, '\\', p_end - p);
    4096       11898 :         if (p == NULL)
    4097       11076 :             break;
    4098         822 :         p++;
    4099             :         /* Note: a backslash at the end doesn't require extra processing. */
    4100         822 :         if (p < p_end)
    4101             :         {
    4102         822 :             if (*p >= '1' && *p <= '9')
    4103         778 :                 return 2;       /* Found a submatch specifier, so done */
    4104          44 :             result = 1;         /* Found some other sequence, keep looking */
    4105          44 :             p++;
    4106             :         }
    4107             :     }
    4108       12042 :     return result;
    4109             : }
    4110             : 
    4111             : /*
    4112             :  * appendStringInfoRegexpSubstr
    4113             :  *
    4114             :  * Append replace_text to str, substituting regexp back references for
    4115             :  * \n escapes.  start_ptr is the start of the match in the source string,
    4116             :  * at logical character position data_pos.
    4117             :  */
    4118             : static void
    4119         236 : appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
    4120             :                              regmatch_t *pmatch,
    4121             :                              char *start_ptr, int data_pos)
    4122             : {
    4123         236 :     const char *p = VARDATA_ANY(replace_text);
    4124         236 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    4125             : 
    4126         574 :     while (p < p_end)
    4127             :     {
    4128         518 :         const char *chunk_start = p;
    4129             :         int         so;
    4130             :         int         eo;
    4131             : 
    4132             :         /* Find next escape char, if any. */
    4133         518 :         p = memchr(p, '\\', p_end - p);
    4134         518 :         if (p == NULL)
    4135         174 :             p = p_end;
    4136             : 
    4137             :         /* Copy the text we just scanned over, if any. */
    4138         518 :         if (p > chunk_start)
    4139         318 :             appendBinaryStringInfo(str, chunk_start, p - chunk_start);
    4140             : 
    4141             :         /* Done if at end of string, else advance over escape char. */
    4142         518 :         if (p >= p_end)
    4143         174 :             break;
    4144         344 :         p++;
    4145             : 
    4146         344 :         if (p >= p_end)
    4147             :         {
    4148             :             /* Escape at very end of input.  Treat same as unexpected char */
    4149           6 :             appendStringInfoChar(str, '\\');
    4150           6 :             break;
    4151             :         }
    4152             : 
    4153         338 :         if (*p >= '1' && *p <= '9')
    4154         278 :         {
    4155             :             /* Use the back reference of regexp. */
    4156         278 :             int         idx = *p - '0';
    4157             : 
    4158         278 :             so = pmatch[idx].rm_so;
    4159         278 :             eo = pmatch[idx].rm_eo;
    4160         278 :             p++;
    4161             :         }
    4162          60 :         else if (*p == '&')
    4163             :         {
    4164             :             /* Use the entire matched string. */
    4165          18 :             so = pmatch[0].rm_so;
    4166          18 :             eo = pmatch[0].rm_eo;
    4167          18 :             p++;
    4168             :         }
    4169          42 :         else if (*p == '\\')
    4170             :         {
    4171             :             /* \\ means transfer one \ to output. */
    4172          36 :             appendStringInfoChar(str, '\\');
    4173          36 :             p++;
    4174          36 :             continue;
    4175             :         }
    4176             :         else
    4177             :         {
    4178             :             /*
    4179             :              * If escape char is not followed by any expected char, just treat
    4180             :              * it as ordinary data to copy.  (XXX would it be better to throw
    4181             :              * an error?)
    4182             :              */
    4183           6 :             appendStringInfoChar(str, '\\');
    4184           6 :             continue;
    4185             :         }
    4186             : 
    4187         296 :         if (so >= 0 && eo >= 0)
    4188             :         {
    4189             :             /*
    4190             :              * Copy the text that is back reference of regexp.  Note so and eo
    4191             :              * are counted in characters not bytes.
    4192             :              */
    4193             :             char       *chunk_start;
    4194             :             int         chunk_len;
    4195             : 
    4196             :             Assert(so >= data_pos);
    4197         296 :             chunk_start = start_ptr;
    4198         296 :             chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
    4199         296 :             chunk_len = charlen_to_bytelen(chunk_start, eo - so);
    4200         296 :             appendBinaryStringInfo(str, chunk_start, chunk_len);
    4201             :         }
    4202             :     }
    4203         236 : }
    4204             : 
    4205             : /*
    4206             :  * replace_text_regexp
    4207             :  *
    4208             :  * replace substring(s) in src_text that match pattern with replace_text.
    4209             :  * The replace_text can contain backslash markers to substitute
    4210             :  * (parts of) the matched text.
    4211             :  *
    4212             :  * cflags: regexp compile flags.
    4213             :  * collation: collation to use.
    4214             :  * search_start: the character (not byte) offset in src_text at which to
    4215             :  * begin searching.
    4216             :  * n: if 0, replace all matches; if > 0, replace only the N'th match.
    4217             :  */
    4218             : text *
    4219       12820 : replace_text_regexp(text *src_text, text *pattern_text,
    4220             :                     text *replace_text,
    4221             :                     int cflags, Oid collation,
    4222             :                     int search_start, int n)
    4223             : {
    4224             :     text       *ret_text;
    4225             :     regex_t    *re;
    4226       12820 :     int         src_text_len = VARSIZE_ANY_EXHDR(src_text);
    4227       12820 :     int         nmatches = 0;
    4228             :     StringInfoData buf;
    4229             :     regmatch_t  pmatch[10];     /* main match, plus \1 to \9 */
    4230       12820 :     int         nmatch = lengthof(pmatch);
    4231             :     pg_wchar   *data;
    4232             :     size_t      data_len;
    4233             :     int         data_pos;
    4234             :     char       *start_ptr;
    4235             :     int         escape_status;
    4236             : 
    4237       12820 :     initStringInfo(&buf);
    4238             : 
    4239             :     /* Convert data string to wide characters. */
    4240       12820 :     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
    4241       12820 :     data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
    4242             : 
    4243             :     /* Check whether replace_text has escapes, especially regexp submatches. */
    4244       12820 :     escape_status = check_replace_text_has_escape(replace_text);
    4245             : 
    4246             :     /* If no regexp submatches, we can use REG_NOSUB. */
    4247       12820 :     if (escape_status < 2)
    4248             :     {
    4249       12042 :         cflags |= REG_NOSUB;
    4250             :         /* Also tell pg_regexec we only want the whole-match location. */
    4251       12042 :         nmatch = 1;
    4252             :     }
    4253             : 
    4254             :     /* Prepare the regexp. */
    4255       12820 :     re = RE_compile_and_cache(pattern_text, cflags, collation);
    4256             : 
    4257             :     /* start_ptr points to the data_pos'th character of src_text */
    4258       12820 :     start_ptr = (char *) VARDATA_ANY(src_text);
    4259       12820 :     data_pos = 0;
    4260             : 
    4261       18380 :     while (search_start <= data_len)
    4262             :     {
    4263             :         int         regexec_result;
    4264             : 
    4265       18374 :         CHECK_FOR_INTERRUPTS();
    4266             : 
    4267       18374 :         regexec_result = pg_regexec(re,
    4268             :                                     data,
    4269             :                                     data_len,
    4270             :                                     search_start,
    4271             :                                     NULL,   /* no details */
    4272             :                                     nmatch,
    4273             :                                     pmatch,
    4274             :                                     0);
    4275             : 
    4276       18374 :         if (regexec_result == REG_NOMATCH)
    4277       11194 :             break;
    4278             : 
    4279        7180 :         if (regexec_result != REG_OKAY)
    4280             :         {
    4281             :             char        errMsg[100];
    4282             : 
    4283           0 :             pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
    4284           0 :             ereport(ERROR,
    4285             :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
    4286             :                      errmsg("regular expression failed: %s", errMsg)));
    4287             :         }
    4288             : 
    4289             :         /*
    4290             :          * Count matches, and decide whether to replace this match.
    4291             :          */
    4292        7180 :         nmatches++;
    4293        7180 :         if (n > 0 && nmatches != n)
    4294             :         {
    4295             :             /*
    4296             :              * No, so advance search_start, but not start_ptr/data_pos. (Thus,
    4297             :              * we treat the matched text as if it weren't matched, and copy it
    4298             :              * to the output later.)
    4299             :              */
    4300          60 :             search_start = pmatch[0].rm_eo;
    4301          60 :             if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4302           0 :                 search_start++;
    4303          60 :             continue;
    4304             :         }
    4305             : 
    4306             :         /*
    4307             :          * Copy the text to the left of the match position.  Note we are given
    4308             :          * character not byte indexes.
    4309             :          */
    4310        7120 :         if (pmatch[0].rm_so - data_pos > 0)
    4311             :         {
    4312             :             int         chunk_len;
    4313             : 
    4314        6946 :             chunk_len = charlen_to_bytelen(start_ptr,
    4315        6946 :                                            pmatch[0].rm_so - data_pos);
    4316        6946 :             appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4317             : 
    4318             :             /*
    4319             :              * Advance start_ptr over that text, to avoid multiple rescans of
    4320             :              * it if the replace_text contains multiple back-references.
    4321             :              */
    4322        6946 :             start_ptr += chunk_len;
    4323        6946 :             data_pos = pmatch[0].rm_so;
    4324             :         }
    4325             : 
    4326             :         /*
    4327             :          * Copy the replace_text, processing escapes if any are present.
    4328             :          */
    4329        7120 :         if (escape_status > 0)
    4330         236 :             appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
    4331             :                                          start_ptr, data_pos);
    4332             :         else
    4333        6884 :             appendStringInfoText(&buf, replace_text);
    4334             : 
    4335             :         /* Advance start_ptr and data_pos over the matched text. */
    4336       14240 :         start_ptr += charlen_to_bytelen(start_ptr,
    4337        7120 :                                         pmatch[0].rm_eo - data_pos);
    4338        7120 :         data_pos = pmatch[0].rm_eo;
    4339             : 
    4340             :         /*
    4341             :          * If we only want to replace one occurrence, we're done.
    4342             :          */
    4343        7120 :         if (n > 0)
    4344        1620 :             break;
    4345             : 
    4346             :         /*
    4347             :          * Advance search position.  Normally we start the next search at the
    4348             :          * end of the previous match; but if the match was of zero length, we
    4349             :          * have to advance by one character, or we'd just find the same match
    4350             :          * again.
    4351             :          */
    4352        5500 :         search_start = data_pos;
    4353        5500 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4354          12 :             search_start++;
    4355             :     }
    4356             : 
    4357             :     /*
    4358             :      * Copy the text to the right of the last match.
    4359             :      */
    4360       12820 :     if (data_pos < data_len)
    4361             :     {
    4362             :         int         chunk_len;
    4363             : 
    4364       12248 :         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4365       12248 :         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4366             :     }
    4367             : 
    4368       12820 :     ret_text = cstring_to_text_with_len(buf.data, buf.len);
    4369       12820 :     pfree(buf.data);
    4370       12820 :     pfree(data);
    4371             : 
    4372       12820 :     return ret_text;
    4373             : }
    4374             : 
    4375             : /*
    4376             :  * split_part
    4377             :  * parse input string based on provided field separator
    4378             :  * return N'th item (1 based, negative counts from end)
    4379             :  */
    4380             : Datum
    4381         102 : split_part(PG_FUNCTION_ARGS)
    4382             : {
    4383         102 :     text       *inputstring = PG_GETARG_TEXT_PP(0);
    4384         102 :     text       *fldsep = PG_GETARG_TEXT_PP(1);
    4385         102 :     int         fldnum = PG_GETARG_INT32(2);
    4386             :     int         inputstring_len;
    4387             :     int         fldsep_len;
    4388             :     TextPositionState state;
    4389             :     char       *start_ptr;
    4390             :     char       *end_ptr;
    4391             :     text       *result_text;
    4392             :     bool        found;
    4393             : 
    4394             :     /* field number is 1 based */
    4395         102 :     if (fldnum == 0)
    4396           6 :         ereport(ERROR,
    4397             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    4398             :                  errmsg("field position must not be zero")));
    4399             : 
    4400          96 :     inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4401          96 :     fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4402             : 
    4403             :     /* return empty string for empty input string */
    4404          96 :     if (inputstring_len < 1)
    4405          12 :         PG_RETURN_TEXT_P(cstring_to_text(""));
    4406             : 
    4407             :     /* handle empty field separator */
    4408          84 :     if (fldsep_len < 1)
    4409             :     {
    4410             :         /* if first or last field, return input string, else empty string */
    4411          24 :         if (fldnum == 1 || fldnum == -1)
    4412          12 :             PG_RETURN_TEXT_P(inputstring);
    4413             :         else
    4414          12 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4415             :     }
    4416             : 
    4417             :     /* find the first field separator */
    4418          60 :     text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
    4419             : 
    4420          60 :     found = text_position_next(&state);
    4421             : 
    4422             :     /* special case if fldsep not found at all */
    4423          60 :     if (!found)
    4424             :     {
    4425          12 :         text_position_cleanup(&state);
    4426             :         /* if first or last field, return input string, else empty string */
    4427          12 :         if (fldnum == 1 || fldnum == -1)
    4428           6 :             PG_RETURN_TEXT_P(inputstring);
    4429             :         else
    4430           6 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4431             :     }
    4432             : 
    4433             :     /*
    4434             :      * take care of a negative field number (i.e. count from the right) by
    4435             :      * converting to a positive field number; we need total number of fields
    4436             :      */
    4437          48 :     if (fldnum < 0)
    4438             :     {
    4439             :         /* we found a fldsep, so there are at least two fields */
    4440          24 :         int         numfields = 2;
    4441             : 
    4442          36 :         while (text_position_next(&state))
    4443          12 :             numfields++;
    4444             : 
    4445             :         /* special case of last field does not require an extra pass */
    4446          24 :         if (fldnum == -1)
    4447             :         {
    4448           6 :             start_ptr = text_position_get_match_ptr(&state) + fldsep_len;
    4449           6 :             end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
    4450           6 :             text_position_cleanup(&state);
    4451           6 :             PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
    4452             :                                                       end_ptr - start_ptr));
    4453             :         }
    4454             : 
    4455             :         /* else, convert fldnum to positive notation */
    4456          18 :         fldnum += numfields + 1;
    4457             : 
    4458             :         /* if nonexistent field, return empty string */
    4459          18 :         if (fldnum <= 0)
    4460             :         {
    4461           6 :             text_position_cleanup(&state);
    4462           6 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4463             :         }
    4464             : 
    4465             :         /* reset to pointing at first match, but now with positive fldnum */
    4466          12 :         text_position_reset(&state);
    4467          12 :         found = text_position_next(&state);
    4468             :         Assert(found);
    4469             :     }
    4470             : 
    4471             :     /* identify bounds of first field */
    4472          36 :     start_ptr = VARDATA_ANY(inputstring);
    4473          36 :     end_ptr = text_position_get_match_ptr(&state);
    4474             : 
    4475          66 :     while (found && --fldnum > 0)
    4476             :     {
    4477             :         /* identify bounds of next field */
    4478          30 :         start_ptr = end_ptr + fldsep_len;
    4479          30 :         found = text_position_next(&state);
    4480          30 :         if (found)
    4481          18 :             end_ptr = text_position_get_match_ptr(&state);
    4482             :     }
    4483             : 
    4484          36 :     text_position_cleanup(&state);
    4485             : 
    4486          36 :     if (fldnum > 0)
    4487             :     {
    4488             :         /* N'th field separator not found */
    4489             :         /* if last field requested, return it, else empty string */
    4490          12 :         if (fldnum == 1)
    4491             :         {
    4492           6 :             int         last_len = start_ptr - VARDATA_ANY(inputstring);
    4493             : 
    4494           6 :             result_text = cstring_to_text_with_len(start_ptr,
    4495             :                                                    inputstring_len - last_len);
    4496             :         }
    4497             :         else
    4498           6 :             result_text = cstring_to_text("");
    4499             :     }
    4500             :     else
    4501             :     {
    4502             :         /* non-last field requested */
    4503          24 :         result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
    4504             :     }
    4505             : 
    4506          36 :     PG_RETURN_TEXT_P(result_text);
    4507             : }
    4508             : 
    4509             : /*
    4510             :  * Convenience function to return true when two text params are equal.
    4511             :  */
    4512             : static bool
    4513         348 : text_isequal(text *txt1, text *txt2, Oid collid)
    4514             : {
    4515         348 :     return DatumGetBool(DirectFunctionCall2Coll(texteq,
    4516             :                                                 collid,
    4517             :                                                 PointerGetDatum(txt1),
    4518             :                                                 PointerGetDatum(txt2)));
    4519             : }
    4520             : 
    4521             : /*
    4522             :  * text_to_array
    4523             :  * parse input string and return text array of elements,
    4524             :  * based on provided field separator
    4525             :  */
    4526             : Datum
    4527         146 : text_to_array(PG_FUNCTION_ARGS)
    4528             : {
    4529             :     SplitTextOutputData tstate;
    4530             : 
    4531             :     /* For array output, tstate should start as all zeroes */
    4532         146 :     memset(&tstate, 0, sizeof(tstate));
    4533             : 
    4534         146 :     if (!split_text(fcinfo, &tstate))
    4535           6 :         PG_RETURN_NULL();
    4536             : 
    4537         128 :     if (tstate.astate == NULL)
    4538           6 :         PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
    4539             : 
    4540         122 :     PG_RETURN_DATUM(makeArrayResult(tstate.astate,
    4541             :                                     CurrentMemoryContext));
    4542             : }
    4543             : 
    4544             : /*
    4545             :  * text_to_array_null
    4546             :  * parse input string and return text array of elements,
    4547             :  * based on provided field separator and null string
    4548             :  *
    4549             :  * This is a separate entry point only to prevent the regression tests from
    4550             :  * complaining about different argument sets for the same internal function.
    4551             :  */
    4552             : Datum
    4553          60 : text_to_array_null(PG_FUNCTION_ARGS)
    4554             : {
    4555          60 :     return text_to_array(fcinfo);
    4556             : }
    4557             : 
    4558             : /*
    4559             :  * text_to_table
    4560             :  * parse input string and return table of elements,
    4561             :  * based on provided field separator
    4562             :  */
    4563             : Datum
    4564          84 : text_to_table(PG_FUNCTION_ARGS)
    4565             : {
    4566          84 :     ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
    4567             :     SplitTextOutputData tstate;
    4568             : 
    4569          84 :     tstate.astate = NULL;
    4570          84 :     InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
    4571          84 :     tstate.tupstore = rsi->setResult;
    4572          84 :     tstate.tupdesc = rsi->setDesc;
    4573             : 
    4574          84 :     (void) split_text(fcinfo, &tstate);
    4575             : 
    4576          84 :     return (Datum) 0;
    4577             : }
    4578             : 
    4579             : /*
    4580             :  * text_to_table_null
    4581             :  * parse input string and return table of elements,
    4582             :  * based on provided field separator and null string
    4583             :  *
    4584             :  * This is a separate entry point only to prevent the regression tests from
    4585             :  * complaining about different argument sets for the same internal function.
    4586             :  */
    4587             : Datum
    4588          24 : text_to_table_null(PG_FUNCTION_ARGS)
    4589             : {
    4590          24 :     return text_to_table(fcinfo);
    4591             : }
    4592             : 
    4593             : /*
    4594             :  * Common code for text_to_array, text_to_array_null, text_to_table
    4595             :  * and text_to_table_null functions.
    4596             :  *
    4597             :  * These are not strict so we have to test for null inputs explicitly.
    4598             :  * Returns false if result is to be null, else returns true.
    4599             :  *
    4600             :  * Note that if the result is valid but empty (zero elements), we return
    4601             :  * without changing *tstate --- caller must handle that case, too.
    4602             :  */
    4603             : static bool
    4604         230 : split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
    4605             : {
    4606             :     text       *inputstring;
    4607             :     text       *fldsep;
    4608             :     text       *null_string;
    4609         230 :     Oid         collation = PG_GET_COLLATION();
    4610             :     int         inputstring_len;
    4611             :     int         fldsep_len;
    4612             :     char       *start_ptr;
    4613             :     text       *result_text;
    4614             : 
    4615             :     /* when input string is NULL, then result is NULL too */
    4616         230 :     if (PG_ARGISNULL(0))
    4617          12 :         return false;
    4618             : 
    4619         218 :     inputstring = PG_GETARG_TEXT_PP(0);
    4620             : 
    4621             :     /* fldsep can be NULL */
    4622         218 :     if (!PG_ARGISNULL(1))
    4623         188 :         fldsep = PG_GETARG_TEXT_PP(1);
    4624             :     else
    4625          30 :         fldsep = NULL;
    4626             : 
    4627             :     /* null_string can be NULL or omitted */
    4628         218 :     if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
    4629          84 :         null_string = PG_GETARG_TEXT_PP(2);
    4630             :     else
    4631         134 :         null_string = NULL;
    4632             : 
    4633         218 :     if (fldsep != NULL)
    4634             :     {
    4635             :         /*
    4636             :          * Normal case with non-null fldsep.  Use the text_position machinery
    4637             :          * to search for occurrences of fldsep.
    4638             :          */
    4639             :         TextPositionState state;
    4640             : 
    4641         188 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4642         188 :         fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
    4643             : 
    4644             :         /* return empty set for empty input string */
    4645         188 :         if (inputstring_len < 1)
    4646          60 :             return true;
    4647             : 
    4648             :         /* empty field separator: return input string as a one-element set */
    4649         176 :         if (fldsep_len < 1)
    4650             :         {
    4651          48 :             split_text_accum_result(tstate, inputstring,
    4652             :                                     null_string, collation);
    4653          48 :             return true;
    4654             :         }
    4655             : 
    4656         128 :         text_position_setup(inputstring, fldsep, collation, &state);
    4657             : 
    4658         116 :         start_ptr = VARDATA_ANY(inputstring);
    4659             : 
    4660             :         for (;;)
    4661         464 :         {
    4662             :             bool        found;
    4663             :             char       *end_ptr;
    4664             :             int         chunk_len;
    4665             : 
    4666         580 :             CHECK_FOR_INTERRUPTS();
    4667             : 
    4668         580 :             found = text_position_next(&state);
    4669         580 :             if (!found)
    4670             :             {
    4671             :                 /* fetch last field */
    4672         116 :                 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
    4673         116 :                 end_ptr = NULL; /* not used, but some compilers complain */
    4674             :             }
    4675             :             else
    4676             :             {
    4677             :                 /* fetch non-last field */
    4678         464 :                 end_ptr = text_position_get_match_ptr(&state);
    4679         464 :                 chunk_len = end_ptr - start_ptr;
    4680             :             }
    4681             : 
    4682             :             /* build a temp text datum to pass to split_text_accum_result */
    4683         580 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4684             : 
    4685             :             /* stash away this field */
    4686         580 :             split_text_accum_result(tstate, result_text,
    4687             :                                     null_string, collation);
    4688             : 
    4689         580 :             pfree(result_text);
    4690             : 
    4691         580 :             if (!found)
    4692         116 :                 break;
    4693             : 
    4694         464 :             start_ptr = end_ptr + fldsep_len;
    4695             :         }
    4696             : 
    4697         116 :         text_position_cleanup(&state);
    4698             :     }
    4699             :     else
    4700             :     {
    4701             :         /*
    4702             :          * When fldsep is NULL, each character in the input string becomes a
    4703             :          * separate element in the result set.  The separator is effectively
    4704             :          * the space between characters.
    4705             :          */
    4706          30 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4707             : 
    4708          30 :         start_ptr = VARDATA_ANY(inputstring);
    4709             : 
    4710         252 :         while (inputstring_len > 0)
    4711             :         {
    4712         222 :             int         chunk_len = pg_mblen(start_ptr);
    4713             : 
    4714         222 :             CHECK_FOR_INTERRUPTS();
    4715             : 
    4716             :             /* build a temp text datum to pass to split_text_accum_result */
    4717         222 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4718             : 
    4719             :             /* stash away this field */
    4720         222 :             split_text_accum_result(tstate, result_text,
    4721             :                                     null_string, collation);
    4722             : 
    4723         222 :             pfree(result_text);
    4724             : 
    4725         222 :             start_ptr += chunk_len;
    4726         222 :             inputstring_len -= chunk_len;
    4727             :         }
    4728             :     }
    4729             : 
    4730         146 :     return true;
    4731             : }
    4732             : 
    4733             : /*
    4734             :  * Add text item to result set (table or array).
    4735             :  *
    4736             :  * This is also responsible for checking to see if the item matches
    4737             :  * the null_string, in which case we should emit NULL instead.
    4738             :  */
    4739             : static void
    4740         850 : split_text_accum_result(SplitTextOutputData *tstate,
    4741             :                         text *field_value,
    4742             :                         text *null_string,
    4743             :                         Oid collation)
    4744             : {
    4745         850 :     bool        is_null = false;
    4746             : 
    4747         850 :     if (null_string && text_isequal(field_value, null_string, collation))
    4748          60 :         is_null = true;
    4749             : 
    4750         850 :     if (tstate->tupstore)
    4751             :     {
    4752             :         Datum       values[1];
    4753             :         bool        nulls[1];
    4754             : 
    4755         228 :         values[0] = PointerGetDatum(field_value);
    4756         228 :         nulls[0] = is_null;
    4757             : 
    4758         228 :         tuplestore_putvalues(tstate->tupstore,
    4759             :                              tstate->tupdesc,
    4760             :                              values,
    4761             :                              nulls);
    4762             :     }
    4763             :     else
    4764             :     {
    4765         622 :         tstate->astate = accumArrayResult(tstate->astate,
    4766             :                                           PointerGetDatum(field_value),
    4767             :                                           is_null,
    4768             :                                           TEXTOID,
    4769             :                                           CurrentMemoryContext);
    4770             :     }
    4771         850 : }
    4772             : 
    4773             : /*
    4774             :  * array_to_text
    4775             :  * concatenate Cstring representation of input array elements
    4776             :  * using provided field separator
    4777             :  */
    4778             : Datum
    4779       71730 : array_to_text(PG_FUNCTION_ARGS)
    4780             : {
    4781       71730 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
    4782       71730 :     char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4783             : 
    4784       71730 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
    4785             : }
    4786             : 
    4787             : /*
    4788             :  * array_to_text_null
    4789             :  * concatenate Cstring representation of input array elements
    4790             :  * using provided field separator and null string
    4791             :  *
    4792             :  * This version is not strict so we have to test for null inputs explicitly.
    4793             :  */
    4794             : Datum
    4795          12 : array_to_text_null(PG_FUNCTION_ARGS)
    4796             : {
    4797             :     ArrayType  *v;
    4798             :     char       *fldsep;
    4799             :     char       *null_string;
    4800             : 
    4801             :     /* returns NULL when first or second parameter is NULL */
    4802          12 :     if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
    4803           0 :         PG_RETURN_NULL();
    4804             : 
    4805          12 :     v = PG_GETARG_ARRAYTYPE_P(0);
    4806          12 :     fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4807             : 
    4808             :     /* NULL null string is passed through as a null pointer */
    4809          12 :     if (!PG_ARGISNULL(2))
    4810           6 :         null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
    4811             :     else
    4812           6 :         null_string = NULL;
    4813             : 
    4814          12 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
    4815             : }
    4816             : 
    4817             : /*
    4818             :  * common code for array_to_text and array_to_text_null functions
    4819             :  */
    4820             : static text *
    4821       71760 : array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
    4822             :                        const char *fldsep, const char *null_string)
    4823             : {
    4824             :     text       *result;
    4825             :     int         nitems,
    4826             :                *dims,
    4827             :                 ndims;
    4828             :     Oid         element_type;
    4829             :     int         typlen;
    4830             :     bool        typbyval;
    4831             :     char        typalign;
    4832             :     StringInfoData buf;
    4833       71760 :     bool        printed = false;
    4834             :     char       *p;
    4835             :     bits8      *bitmap;
    4836             :     int         bitmask;
    4837             :     int         i;
    4838             :     ArrayMetaState *my_extra;
    4839             : 
    4840       71760 :     ndims = ARR_NDIM(v);
    4841       71760 :     dims = ARR_DIMS(v);
    4842       71760 :     nitems = ArrayGetNItems(ndims, dims);
    4843             : 
    4844             :     /* if there are no elements, return an empty string */
    4845       71760 :     if (nitems == 0)
    4846       47202 :         return cstring_to_text_with_len("", 0);
    4847             : 
    4848       24558 :     element_type = ARR_ELEMTYPE(v);
    4849       24558 :     initStringInfo(&buf);
    4850             : 
    4851             :     /*
    4852             :      * We arrange to look up info about element type, including its output
    4853             :      * conversion proc, only once per series of calls, assuming the element
    4854             :      * type doesn't change underneath us.
    4855             :      */
    4856       24558 :     my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    4857       24558 :     if (my_extra == NULL)
    4858             :     {
    4859        1410 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    4860             :                                                       sizeof(ArrayMetaState));
    4861        1410 :         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    4862        1410 :         my_extra->element_type = ~element_type;
    4863             :     }
    4864             : 
    4865       24558 :     if (my_extra->element_type != element_type)
    4866             :     {
    4867             :         /*
    4868             :          * Get info about element type, including its output conversion proc
    4869             :          */
    4870        1410 :         get_type_io_data(element_type, IOFunc_output,
    4871             :                          &my_extra->typlen, &my_extra->typbyval,
    4872             :                          &my_extra->typalign, &my_extra->typdelim,
    4873             :                          &my_extra->typioparam, &my_extra->typiofunc);
    4874        1410 :         fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
    4875        1410 :                       fcinfo->flinfo->fn_mcxt);
    4876        1410 :         my_extra->element_type = element_type;
    4877             :     }
    4878       24558 :     typlen = my_extra->typlen;
    4879       24558 :     typbyval = my_extra->typbyval;
    4880       24558 :     typalign = my_extra->typalign;
    4881             : 
    4882       24558 :     p = ARR_DATA_PTR(v);
    4883       24558 :     bitmap = ARR_NULLBITMAP(v);
    4884       24558 :     bitmask = 1;
    4885             : 
    4886       83948 :     for (i = 0; i < nitems; i++)
    4887             :     {
    4888             :         Datum       itemvalue;
    4889             :         char       *value;
    4890             : 
    4891             :         /* Get source element, checking for NULL */
    4892       59390 :         if (bitmap && (*bitmap & bitmask) == 0)
    4893             :         {
    4894             :             /* if null_string is NULL, we just ignore null elements */
    4895          18 :             if (null_string != NULL)
    4896             :             {
    4897           6 :                 if (printed)
    4898           6 :                     appendStringInfo(&buf, "%s%s", fldsep, null_string);
    4899             :                 else
    4900           0 :                     appendStringInfoString(&buf, null_string);
    4901           6 :                 printed = true;
    4902             :             }
    4903             :         }
    4904             :         else
    4905             :         {
    4906       59372 :             itemvalue = fetch_att(p, typbyval, typlen);
    4907             : 
    4908       59372 :             value = OutputFunctionCall(&my_extra->proc, itemvalue);
    4909             : 
    4910       59372 :             if (printed)
    4911       34814 :                 appendStringInfo(&buf, "%s%s", fldsep, value);
    4912             :             else
    4913       24558 :                 appendStringInfoString(&buf, value);
    4914       59372 :             printed = true;
    4915             : 
    4916       59372 :             p = att_addlength_pointer(p, typlen, p);
    4917       59372 :             p = (char *) att_align_nominal(p, typalign);
    4918             :         }
    4919             : 
    4920             :         /* advance bitmap pointer if any */
    4921       59390 :         if (bitmap)
    4922             :         {
    4923         108 :             bitmask <<= 1;
    4924         108 :             if (bitmask == 0x100)
    4925             :             {
    4926           0 :                 bitmap++;
    4927           0 :                 bitmask = 1;
    4928             :             }
    4929             :         }
    4930             :     }
    4931             : 
    4932       24558 :     result = cstring_to_text_with_len(buf.data, buf.len);
    4933       24558 :     pfree(buf.data);
    4934             : 
    4935       24558 :     return result;
    4936             : }
    4937             : 
    4938             : /*
    4939             :  * Workhorse for to_bin, to_oct, and to_hex.  Note that base must be > 1 and <=
    4940             :  * 16.
    4941             :  */
    4942             : static inline text *
    4943       38750 : convert_to_base(uint64 value, int base)
    4944             : {
    4945       38750 :     const char *digits = "0123456789abcdef";
    4946             : 
    4947             :     /* We size the buffer for to_bin's longest possible return value. */
    4948             :     char        buf[sizeof(uint64) * BITS_PER_BYTE];
    4949       38750 :     char       *const end = buf + sizeof(buf);
    4950       38750 :     char       *ptr = end;
    4951             : 
    4952             :     Assert(base > 1);
    4953             :     Assert(base <= 16);
    4954             : 
    4955             :     do
    4956             :     {
    4957       75974 :         *--ptr = digits[value % base];
    4958       75974 :         value /= base;
    4959       75974 :     } while (ptr > buf && value);
    4960             : 
    4961       38750 :     return cstring_to_text_with_len(ptr, end - ptr);
    4962             : }
    4963             : 
    4964             : /*
    4965             :  * Convert an integer to a string containing a base-2 (binary) representation
    4966             :  * of the number.
    4967             :  */
    4968             : Datum
    4969          12 : to_bin32(PG_FUNCTION_ARGS)
    4970             : {
    4971          12 :     uint64      value = (uint32) PG_GETARG_INT32(0);
    4972             : 
    4973          12 :     PG_RETURN_TEXT_P(convert_to_base(value, 2));
    4974             : }
    4975             : Datum
    4976          12 : to_bin64(PG_FUNCTION_ARGS)
    4977             : {
    4978          12 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    4979             : 
    4980          12 :     PG_RETURN_TEXT_P(convert_to_base(value, 2));
    4981             : }
    4982             : 
    4983             : /*
    4984             :  * Convert an integer to a string containing a base-8 (oct) representation of
    4985             :  * the number.
    4986             :  */
    4987             : Datum
    4988          12 : to_oct32(PG_FUNCTION_ARGS)
    4989             : {
    4990          12 :     uint64      value = (uint32) PG_GETARG_INT32(0);
    4991             : 
    4992          12 :     PG_RETURN_TEXT_P(convert_to_base(value, 8));
    4993             : }
    4994             : Datum
    4995          12 : to_oct64(PG_FUNCTION_ARGS)
    4996             : {
    4997          12 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    4998             : 
    4999          12 :     PG_RETURN_TEXT_P(convert_to_base(value, 8));
    5000             : }
    5001             : 
    5002             : /*
    5003             :  * Convert an integer to a string containing a base-16 (hex) representation of
    5004             :  * the number.
    5005             :  */
    5006             : Datum
    5007       38690 : to_hex32(PG_FUNCTION_ARGS)
    5008             : {
    5009       38690 :     uint64      value = (uint32) PG_GETARG_INT32(0);
    5010             : 
    5011       38690 :     PG_RETURN_TEXT_P(convert_to_base(value, 16));
    5012             : }
    5013             : Datum
    5014          12 : to_hex64(PG_FUNCTION_ARGS)
    5015             : {
    5016          12 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    5017             : 
    5018          12 :     PG_RETURN_TEXT_P(convert_to_base(value, 16));
    5019             : }
    5020             : 
    5021             : /*
    5022             :  * Return the size of a datum, possibly compressed
    5023             :  *
    5024             :  * Works on any data type
    5025             :  */
    5026             : Datum
    5027         122 : pg_column_size(PG_FUNCTION_ARGS)
    5028             : {
    5029         122 :     Datum       value = PG_GETARG_DATUM(0);
    5030             :     int32       result;
    5031             :     int         typlen;
    5032             : 
    5033             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5034         122 :     if (fcinfo->flinfo->fn_extra == NULL)
    5035             :     {
    5036             :         /* Lookup the datatype of the supplied argument */
    5037         122 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5038             : 
    5039         122 :         typlen = get_typlen(argtypeid);
    5040         122 :         if (typlen == 0)        /* should not happen */
    5041           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5042             : 
    5043         122 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5044             :                                                       sizeof(int));
    5045         122 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5046             :     }
    5047             :     else
    5048           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5049             : 
    5050         122 :     if (typlen == -1)
    5051             :     {
    5052             :         /* varlena type, possibly toasted */
    5053         122 :         result = toast_datum_size(value);
    5054             :     }
    5055           0 :     else if (typlen == -2)
    5056             :     {
    5057             :         /* cstring */
    5058           0 :         result = strlen(DatumGetCString(value)) + 1;
    5059             :     }
    5060             :     else
    5061             :     {
    5062             :         /* ordinary fixed-width type */
    5063           0 :         result = typlen;
    5064             :     }
    5065             : 
    5066         122 :     PG_RETURN_INT32(result);
    5067             : }
    5068             : 
    5069             : /*
    5070             :  * Return the compression method stored in the compressed attribute.  Return
    5071             :  * NULL for non varlena type or uncompressed data.
    5072             :  */
    5073             : Datum
    5074         162 : pg_column_compression(PG_FUNCTION_ARGS)
    5075             : {
    5076             :     int         typlen;
    5077             :     char       *result;
    5078             :     ToastCompressionId cmid;
    5079             : 
    5080             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5081         162 :     if (fcinfo->flinfo->fn_extra == NULL)
    5082             :     {
    5083             :         /* Lookup the datatype of the supplied argument */
    5084         108 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5085             : 
    5086         108 :         typlen = get_typlen(argtypeid);
    5087         108 :         if (typlen == 0)        /* should not happen */
    5088           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5089             : 
    5090         108 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5091             :                                                       sizeof(int));
    5092         108 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5093             :     }
    5094             :     else
    5095          54 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5096             : 
    5097         162 :     if (typlen != -1)
    5098           0 :         PG_RETURN_NULL();
    5099             : 
    5100             :     /* get the compression method id stored in the compressed varlena */
    5101         162 :     cmid = toast_get_compression_id((struct varlena *)
    5102         162 :                                     DatumGetPointer(PG_GETARG_DATUM(0)));
    5103         162 :     if (cmid == TOAST_INVALID_COMPRESSION_ID)
    5104           6 :         PG_RETURN_NULL();
    5105             : 
    5106             :     /* convert compression method id to compression method name */
    5107         156 :     switch (cmid)
    5108             :     {
    5109          66 :         case TOAST_PGLZ_COMPRESSION_ID:
    5110          66 :             result = "pglz";
    5111          66 :             break;
    5112          90 :         case TOAST_LZ4_COMPRESSION_ID:
    5113          90 :             result = "lz4";
    5114          90 :             break;
    5115           0 :         default:
    5116           0 :             elog(ERROR, "invalid compression method id %d", cmid);
    5117             :     }
    5118             : 
    5119         156 :     PG_RETURN_TEXT_P(cstring_to_text(result));
    5120             : }
    5121             : 
    5122             : /*
    5123             :  * Return the chunk_id of the on-disk TOASTed value.  Return NULL if the value
    5124             :  * is un-TOASTed or not on-disk.
    5125             :  */
    5126             : Datum
    5127          12 : pg_column_toast_chunk_id(PG_FUNCTION_ARGS)
    5128             : {
    5129             :     int         typlen;
    5130             :     struct varlena *attr;
    5131             :     struct varatt_external toast_pointer;
    5132             : 
    5133             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    5134          12 :     if (fcinfo->flinfo->fn_extra == NULL)
    5135             :     {
    5136             :         /* Lookup the datatype of the supplied argument */
    5137          12 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    5138             : 
    5139          12 :         typlen = get_typlen(argtypeid);
    5140          12 :         if (typlen == 0)        /* should not happen */
    5141           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    5142             : 
    5143          12 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5144             :                                                       sizeof(int));
    5145          12 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    5146             :     }
    5147             :     else
    5148           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    5149             : 
    5150          12 :     if (typlen != -1)
    5151           0 :         PG_RETURN_NULL();
    5152             : 
    5153          12 :     attr = (struct varlena *) DatumGetPointer(PG_GETARG_DATUM(0));
    5154             : 
    5155          12 :     if (!VARATT_IS_EXTERNAL_ONDISK(attr))
    5156           6 :         PG_RETURN_NULL();
    5157             : 
    5158           6 :     VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
    5159             : 
    5160           6 :     PG_RETURN_OID(toast_pointer.va_valueid);
    5161             : }
    5162             : 
    5163             : /*
    5164             :  * string_agg - Concatenates values and returns string.
    5165             :  *
    5166             :  * Syntax: string_agg(value text, delimiter text) RETURNS text
    5167             :  *
    5168             :  * Note: Any NULL values are ignored. The first-call delimiter isn't
    5169             :  * actually used at all, and on subsequent calls the delimiter precedes
    5170             :  * the associated value.
    5171             :  */
    5172             : 
    5173             : /* subroutine to initialize state */
    5174             : static StringInfo
    5175        2390 : makeStringAggState(FunctionCallInfo fcinfo)
    5176             : {
    5177             :     StringInfo  state;
    5178             :     MemoryContext aggcontext;
    5179             :     MemoryContext oldcontext;
    5180             : 
    5181        2390 :     if (!AggCheckCallContext(fcinfo, &aggcontext))
    5182             :     {
    5183             :         /* cannot be called directly because of internal-type argument */
    5184           0 :         elog(ERROR, "string_agg_transfn called in non-aggregate context");
    5185             :     }
    5186             : 
    5187             :     /*
    5188             :      * Create state in aggregate context.  It'll stay there across subsequent
    5189             :      * calls.
    5190             :      */
    5191        2390 :     oldcontext = MemoryContextSwitchTo(aggcontext);
    5192        2390 :     state = makeStringInfo();
    5193        2390 :     MemoryContextSwitchTo(oldcontext);
    5194             : 
    5195        2390 :     return state;
    5196             : }
    5197             : 
    5198             : Datum
    5199      861264 : string_agg_transfn(PG_FUNCTION_ARGS)
    5200             : {
    5201             :     StringInfo  state;
    5202             : 
    5203      861264 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5204             : 
    5205             :     /* Append the value unless null, preceding it with the delimiter. */
    5206      861264 :     if (!PG_ARGISNULL(1))
    5207             :     {
    5208      846216 :         text       *value = PG_GETARG_TEXT_PP(1);
    5209      846216 :         bool        isfirst = false;
    5210             : 
    5211             :         /*
    5212             :          * You might think we can just throw away the first delimiter, however
    5213             :          * we must keep it as we may be a parallel worker doing partial
    5214             :          * aggregation building a state to send to the main process.  We need
    5215             :          * to keep the delimiter of every aggregation so that the combine
    5216             :          * function can properly join up the strings of two separately
    5217             :          * partially aggregated results.  The first delimiter is only stripped
    5218             :          * off in the final function.  To know how much to strip off the front
    5219             :          * of the string, we store the length of the first delimiter in the
    5220             :          * StringInfo's cursor field, which we don't otherwise need here.
    5221             :          */
    5222      846216 :         if (state == NULL)
    5223             :         {
    5224        1942 :             state = makeStringAggState(fcinfo);
    5225        1942 :             isfirst = true;
    5226             :         }
    5227             : 
    5228      846216 :         if (!PG_ARGISNULL(2))
    5229             :         {
    5230      846216 :             text       *delim = PG_GETARG_TEXT_PP(2);
    5231             : 
    5232      846216 :             appendStringInfoText(state, delim);
    5233      846216 :             if (isfirst)
    5234        1942 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
    5235             :         }
    5236             : 
    5237      846216 :         appendStringInfoText(state, value);
    5238             :     }
    5239             : 
    5240             :     /*
    5241             :      * The transition type for string_agg() is declared to be "internal",
    5242             :      * which is a pass-by-value type the same size as a pointer.
    5243             :      */
    5244      861264 :     if (state)
    5245      861180 :         PG_RETURN_POINTER(state);
    5246          84 :     PG_RETURN_NULL();
    5247             : }
    5248             : 
    5249             : /*
    5250             :  * string_agg_combine
    5251             :  *      Aggregate combine function for string_agg(text) and string_agg(bytea)
    5252             :  */
    5253             : Datum
    5254         160 : string_agg_combine(PG_FUNCTION_ARGS)
    5255             : {
    5256             :     StringInfo  state1;
    5257             :     StringInfo  state2;
    5258             :     MemoryContext agg_context;
    5259             : 
    5260         160 :     if (!AggCheckCallContext(fcinfo, &agg_context))
    5261           0 :         elog(ERROR, "aggregate function called in non-aggregate context");
    5262             : 
    5263         160 :     state1 = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5264         160 :     state2 = PG_ARGISNULL(1) ? NULL : (StringInfo) PG_GETARG_POINTER(1);
    5265             : 
    5266         160 :     if (state2 == NULL)
    5267             :     {
    5268             :         /*
    5269             :          * NULL state2 is easy, just return state1, which we know is already
    5270             :          * in the agg_context
    5271             :          */
    5272           0 :         if (state1 == NULL)
    5273           0 :             PG_RETURN_NULL();
    5274           0 :         PG_RETURN_POINTER(state1);
    5275             :     }
    5276             : 
    5277         160 :     if (state1 == NULL)
    5278             :     {
    5279             :         /* We must copy state2's data into the agg_context */
    5280             :         MemoryContext old_context;
    5281             : 
    5282         120 :         old_context = MemoryContextSwitchTo(agg_context);
    5283         120 :         state1 = makeStringAggState(fcinfo);
    5284         120 :         appendBinaryStringInfo(state1, state2->data, state2->len);
    5285         120 :         state1->cursor = state2->cursor;
    5286         120 :         MemoryContextSwitchTo(old_context);
    5287             :     }
    5288          40 :     else if (state2->len > 0)
    5289             :     {
    5290             :         /* Combine ... state1->cursor does not change in this case */
    5291          40 :         appendBinaryStringInfo(state1, state2->data, state2->len);
    5292             :     }
    5293             : 
    5294         160 :     PG_RETURN_POINTER(state1);
    5295             : }
    5296             : 
    5297             : /*
    5298             :  * string_agg_serialize
    5299             :  *      Aggregate serialize function for string_agg(text) and string_agg(bytea)
    5300             :  *
    5301             :  * This is strict, so we need not handle NULL input
    5302             :  */
    5303             : Datum
    5304         160 : string_agg_serialize(PG_FUNCTION_ARGS)
    5305             : {
    5306             :     StringInfo  state;
    5307             :     StringInfoData buf;
    5308             :     bytea      *result;
    5309             : 
    5310             :     /* cannot be called directly because of internal-type argument */
    5311             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5312             : 
    5313         160 :     state = (StringInfo) PG_GETARG_POINTER(0);
    5314             : 
    5315         160 :     pq_begintypsend(&buf);
    5316             : 
    5317             :     /* cursor */
    5318         160 :     pq_sendint(&buf, state->cursor, 4);
    5319             : 
    5320             :     /* data */
    5321         160 :     pq_sendbytes(&buf, state->data, state->len);
    5322             : 
    5323         160 :     result = pq_endtypsend(&buf);
    5324             : 
    5325         160 :     PG_RETURN_BYTEA_P(result);
    5326             : }
    5327             : 
    5328             : /*
    5329             :  * string_agg_deserialize
    5330             :  *      Aggregate deserial function for string_agg(text) and string_agg(bytea)
    5331             :  *
    5332             :  * This is strict, so we need not handle NULL input
    5333             :  */
    5334             : Datum
    5335         160 : string_agg_deserialize(PG_FUNCTION_ARGS)
    5336             : {
    5337             :     bytea      *sstate;
    5338             :     StringInfo  result;
    5339             :     StringInfoData buf;
    5340             :     char       *data;
    5341             :     int         datalen;
    5342             : 
    5343             :     /* cannot be called directly because of internal-type argument */
    5344             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5345             : 
    5346         160 :     sstate = PG_GETARG_BYTEA_PP(0);
    5347             : 
    5348             :     /*
    5349             :      * Initialize a StringInfo so that we can "receive" it using the standard
    5350             :      * recv-function infrastructure.
    5351             :      */
    5352         160 :     initReadOnlyStringInfo(&buf, VARDATA_ANY(sstate),
    5353         160 :                            VARSIZE_ANY_EXHDR(sstate));
    5354             : 
    5355         160 :     result = makeStringAggState(fcinfo);
    5356             : 
    5357             :     /* cursor */
    5358         160 :     result->cursor = pq_getmsgint(&buf, 4);
    5359             : 
    5360             :     /* data */
    5361         160 :     datalen = VARSIZE_ANY_EXHDR(sstate) - 4;
    5362         160 :     data = (char *) pq_getmsgbytes(&buf, datalen);
    5363         160 :     appendBinaryStringInfo(result, data, datalen);
    5364             : 
    5365         160 :     pq_getmsgend(&buf);
    5366             : 
    5367         160 :     PG_RETURN_POINTER(result);
    5368             : }
    5369             : 
    5370             : Datum
    5371        1994 : string_agg_finalfn(PG_FUNCTION_ARGS)
    5372             : {
    5373             :     StringInfo  state;
    5374             : 
    5375             :     /* cannot be called directly because of internal-type argument */
    5376             :     Assert(AggCheckCallContext(fcinfo, NULL));
    5377             : 
    5378        1994 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    5379             : 
    5380        1994 :     if (state != NULL)
    5381             :     {
    5382             :         /* As per comment in transfn, strip data before the cursor position */
    5383        1922 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(&state->data[state->cursor],
    5384             :                                                   state->len - state->cursor));
    5385             :     }
    5386             :     else
    5387          72 :         PG_RETURN_NULL();
    5388             : }
    5389             : 
    5390             : /*
    5391             :  * Prepare cache with fmgr info for the output functions of the datatypes of
    5392             :  * the arguments of a concat-like function, beginning with argument "argidx".
    5393             :  * (Arguments before that will have corresponding slots in the resulting
    5394             :  * FmgrInfo array, but we don't fill those slots.)
    5395             :  */
    5396             : static FmgrInfo *
    5397          46 : build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
    5398             : {
    5399             :     FmgrInfo   *foutcache;
    5400             :     int         i;
    5401             : 
    5402             :     /* We keep the info in fn_mcxt so it survives across calls */
    5403          46 :     foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    5404          46 :                                                 PG_NARGS() * sizeof(FmgrInfo));
    5405             : 
    5406         220 :     for (i = argidx; i < PG_NARGS(); i++)
    5407             :     {
    5408             :         Oid         valtype;
    5409             :         Oid         typOutput;
    5410             :         bool        typIsVarlena;
    5411             : 
    5412         174 :         valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
    5413         174 :         if (!OidIsValid(valtype))
    5414           0 :             elog(ERROR, "could not determine data type of concat() input");
    5415             : 
    5416         174 :         getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
    5417         174 :         fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
    5418             :     }
    5419             : 
    5420          46 :     fcinfo->flinfo->fn_extra = foutcache;
    5421             : 
    5422          46 :     return foutcache;
    5423             : }
    5424             : 
    5425             : /*
    5426             :  * Implementation of both concat() and concat_ws().
    5427             :  *
    5428             :  * sepstr is the separator string to place between values.
    5429             :  * argidx identifies the first argument to concatenate (counting from zero);
    5430             :  * note that this must be constant across any one series of calls.
    5431             :  *
    5432             :  * Returns NULL if result should be NULL, else text value.
    5433             :  */
    5434             : static text *
    5435         114 : concat_internal(const char *sepstr, int argidx,
    5436             :                 FunctionCallInfo fcinfo)
    5437             : {
    5438             :     text       *result;
    5439             :     StringInfoData str;
    5440             :     FmgrInfo   *foutcache;
    5441         114 :     bool        first_arg = true;
    5442             :     int         i;
    5443             : 
    5444             :     /*
    5445             :      * concat(VARIADIC some-array) is essentially equivalent to
    5446             :      * array_to_text(), ie concat the array elements with the given separator.
    5447             :      * So we just pass the case off to that code.
    5448             :      */
    5449         114 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5450             :     {
    5451             :         ArrayType  *arr;
    5452             : 
    5453             :         /* Should have just the one argument */
    5454             :         Assert(argidx == PG_NARGS() - 1);
    5455             : 
    5456             :         /* concat(VARIADIC NULL) is defined as NULL */
    5457          30 :         if (PG_ARGISNULL(argidx))
    5458          12 :             return NULL;
    5459             : 
    5460             :         /*
    5461             :          * Non-null argument had better be an array.  We assume that any call
    5462             :          * context that could let get_fn_expr_variadic return true will have
    5463             :          * checked that a VARIADIC-labeled parameter actually is an array.  So
    5464             :          * it should be okay to just Assert that it's an array rather than
    5465             :          * doing a full-fledged error check.
    5466             :          */
    5467             :         Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
    5468             : 
    5469             :         /* OK, safe to fetch the array value */
    5470          18 :         arr = PG_GETARG_ARRAYTYPE_P(argidx);
    5471             : 
    5472             :         /*
    5473             :          * And serialize the array.  We tell array_to_text to ignore null
    5474             :          * elements, which matches the behavior of the loop below.
    5475             :          */
    5476          18 :         return array_to_text_internal(fcinfo, arr, sepstr, NULL);
    5477             :     }
    5478             : 
    5479             :     /* Normal case without explicit VARIADIC marker */
    5480          84 :     initStringInfo(&str);
    5481             : 
    5482             :     /* Get output function info, building it if first time through */
    5483          84 :     foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
    5484          84 :     if (foutcache == NULL)
    5485          46 :         foutcache = build_concat_foutcache(fcinfo, argidx);
    5486             : 
    5487         372 :     for (i = argidx; i < PG_NARGS(); i++)
    5488             :     {
    5489         288 :         if (!PG_ARGISNULL(i))
    5490             :         {
    5491         210 :             Datum       value = PG_GETARG_DATUM(i);
    5492             : 
    5493             :             /* add separator if appropriate */
    5494         210 :             if (first_arg)
    5495          78 :                 first_arg = false;
    5496             :             else
    5497         132 :                 appendStringInfoString(&str, sepstr);
    5498             : 
    5499             :             /* call the appropriate type output function, append the result */
    5500         210 :             appendStringInfoString(&str,
    5501         210 :                                    OutputFunctionCall(&foutcache[i], value));
    5502             :         }
    5503             :     }
    5504             : 
    5505          84 :     result = cstring_to_text_with_len(str.data, str.len);
    5506          84 :     pfree(str.data);
    5507             : 
    5508          84 :     return result;
    5509             : }
    5510             : 
    5511             : /*
    5512             :  * Concatenate all arguments. NULL arguments are ignored.
    5513             :  */
    5514             : Datum
    5515          36 : text_concat(PG_FUNCTION_ARGS)
    5516             : {
    5517             :     text       *result;
    5518             : 
    5519          36 :     result = concat_internal("", 0, fcinfo);
    5520          36 :     if (result == NULL)
    5521           6 :         PG_RETURN_NULL();
    5522          30 :     PG_RETURN_TEXT_P(result);
    5523             : }
    5524             : 
    5525             : /*
    5526             :  * Concatenate all but first argument value with separators. The first
    5527             :  * parameter is used as the separator. NULL arguments are ignored.
    5528             :  */
    5529             : Datum
    5530          84 : text_concat_ws(PG_FUNCTION_ARGS)
    5531             : {
    5532             :     char       *sep;
    5533             :     text       *result;
    5534             : 
    5535             :     /* return NULL when separator is NULL */
    5536          84 :     if (PG_ARGISNULL(0))
    5537           6 :         PG_RETURN_NULL();
    5538          78 :     sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
    5539             : 
    5540          78 :     result = concat_internal(sep, 1, fcinfo);
    5541          78 :     if (result == NULL)
    5542           6 :         PG_RETURN_NULL();
    5543          72 :     PG_RETURN_TEXT_P(result);
    5544             : }
    5545             : 
    5546             : /*
    5547             :  * Return first n characters in the string. When n is negative,
    5548             :  * return all but last |n| characters.
    5549             :  */
    5550             : Datum
    5551        2136 : text_left(PG_FUNCTION_ARGS)
    5552             : {
    5553        2136 :     int         n = PG_GETARG_INT32(1);
    5554             : 
    5555        2136 :     if (n < 0)
    5556             :     {
    5557          30 :         text       *str = PG_GETARG_TEXT_PP(0);
    5558          30 :         const char *p = VARDATA_ANY(str);
    5559          30 :         int         len = VARSIZE_ANY_EXHDR(str);
    5560             :         int         rlen;
    5561             : 
    5562          30 :         n = pg_mbstrlen_with_len(p, len) + n;
    5563          30 :         rlen = pg_mbcharcliplen(p, len, n);
    5564          30 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
    5565             :     }
    5566             :     else
    5567        2106 :         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
    5568             : }
    5569             : 
    5570             : /*
    5571             :  * Return last n characters in the string. When n is negative,
    5572             :  * return all but first |n| characters.
    5573             :  */
    5574             : Datum
    5575          66 : text_right(PG_FUNCTION_ARGS)
    5576             : {
    5577          66 :     text       *str = PG_GETARG_TEXT_PP(0);
    5578          66 :     const char *p = VARDATA_ANY(str);
    5579          66 :     int         len = VARSIZE_ANY_EXHDR(str);
    5580          66 :     int         n = PG_GETARG_INT32(1);
    5581             :     int         off;
    5582             : 
    5583          66 :     if (n < 0)
    5584          30 :         n = -n;
    5585             :     else
    5586          36 :         n = pg_mbstrlen_with_len(p, len) - n;
    5587          66 :     off = pg_mbcharcliplen(p, len, n);
    5588             : 
    5589          66 :     PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
    5590             : }
    5591             : 
    5592             : /*
    5593             :  * Return reversed string
    5594             :  */
    5595             : Datum
    5596           6 : text_reverse(PG_FUNCTION_ARGS)
    5597             : {
    5598           6 :     text       *str = PG_GETARG_TEXT_PP(0);
    5599           6 :     const char *p = VARDATA_ANY(str);
    5600           6 :     int         len = VARSIZE_ANY_EXHDR(str);
    5601           6 :     const char *endp = p + len;
    5602             :     text       *result;
    5603             :     char       *dst;
    5604             : 
    5605           6 :     result = palloc(len + VARHDRSZ);
    5606           6 :     dst = (char *) VARDATA(result) + len;
    5607           6 :     SET_VARSIZE(result, len + VARHDRSZ);
    5608             : 
    5609           6 :     if (pg_database_encoding_max_length() > 1)
    5610             :     {
    5611             :         /* multibyte version */
    5612          36 :         while (p < endp)
    5613             :         {
    5614             :             int         sz;
    5615             : 
    5616          30 :             sz = pg_mblen(p);
    5617          30 :             dst -= sz;
    5618          30 :             memcpy(dst, p, sz);
    5619          30 :             p += sz;
    5620             :         }
    5621             :     }
    5622             :     else
    5623             :     {
    5624             :         /* single byte version */
    5625           0 :         while (p < endp)
    5626           0 :             *(--dst) = *p++;
    5627             :     }
    5628             : 
    5629           6 :     PG_RETURN_TEXT_P(result);
    5630             : }
    5631             : 
    5632             : 
    5633             : /*
    5634             :  * Support macros for text_format()
    5635             :  */
    5636             : #define TEXT_FORMAT_FLAG_MINUS  0x0001  /* is minus flag present? */
    5637             : 
    5638             : #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
    5639             :     do { \
    5640             :         if (++(ptr) >= (end_ptr)) \
    5641             :             ereport(ERROR, \
    5642             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
    5643             :                      errmsg("unterminated format() type specifier"), \
    5644             :                      errhint("For a single \"%%\" use \"%%%%\"."))); \
    5645             :     } while (0)
    5646             : 
    5647             : /*
    5648             :  * Returns a formatted string
    5649             :  */
    5650             : Datum
    5651       30026 : text_format(PG_FUNCTION_ARGS)
    5652             : {
    5653             :     text       *fmt;
    5654             :     StringInfoData str;
    5655             :     const char *cp;
    5656             :     const char *start_ptr;
    5657             :     const char *end_ptr;
    5658             :     text       *result;
    5659             :     int         arg;
    5660             :     bool        funcvariadic;
    5661             :     int         nargs;
    5662       30026 :     Datum      *elements = NULL;
    5663       30026 :     bool       *nulls = NULL;
    5664       30026 :     Oid         element_type = InvalidOid;
    5665       30026 :     Oid         prev_type = InvalidOid;
    5666       30026 :     Oid         prev_width_type = InvalidOid;
    5667             :     FmgrInfo    typoutputfinfo;
    5668             :     FmgrInfo    typoutputinfo_width;
    5669             : 
    5670             :     /* When format string is null, immediately return null */
    5671       30026 :     if (PG_ARGISNULL(0))
    5672           6 :         PG_RETURN_NULL();
    5673             : 
    5674             :     /* If argument is marked VARIADIC, expand array into elements */
    5675       30020 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    5676             :     {
    5677             :         ArrayType  *arr;
    5678             :         int16       elmlen;
    5679             :         bool        elmbyval;
    5680             :         char        elmalign;
    5681             :         int         nitems;
    5682             : 
    5683             :         /* Should have just the one argument */
    5684             :         Assert(PG_NARGS() == 2);
    5685             : 
    5686             :         /* If argument is NULL, we treat it as zero-length array */
    5687          48 :         if (PG_ARGISNULL(1))
    5688           6 :             nitems = 0;
    5689             :         else
    5690             :         {
    5691             :             /*
    5692             :              * Non-null argument had better be an array.  We assume that any
    5693             :              * call context that could let get_fn_expr_variadic return true
    5694             :              * will have checked that a VARIADIC-labeled parameter actually is
    5695             :              * an array.  So it should be okay to just Assert that it's an
    5696             :              * array rather than doing a full-fledged error check.
    5697             :              */
    5698             :             Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
    5699             : 
    5700             :             /* OK, safe to fetch the array value */
    5701          42 :             arr = PG_GETARG_ARRAYTYPE_P(1);
    5702             : 
    5703             :             /* Get info about array element type */
    5704          42 :             element_type = ARR_ELEMTYPE(arr);
    5705          42 :             get_typlenbyvalalign(element_type,
    5706             :                                  &elmlen, &elmbyval, &elmalign);
    5707             : 
    5708             :             /* Extract all array elements */
    5709          42 :             deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
    5710             :                               &elements, &nulls, &nitems);
    5711             :         }
    5712             : 
    5713          48 :         nargs = nitems + 1;
    5714          48 :         funcvariadic = true;
    5715             :     }
    5716             :     else
    5717             :     {
    5718             :         /* Non-variadic case, we'll process the arguments individually */
    5719       29972 :         nargs = PG_NARGS();
    5720       29972 :         funcvariadic = false;
    5721             :     }
    5722             : 
    5723             :     /* Setup for main loop. */
    5724       30020 :     fmt = PG_GETARG_TEXT_PP(0);
    5725       30020 :     start_ptr = VARDATA_ANY(fmt);
    5726       30020 :     end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
    5727       30020 :     initStringInfo(&str);
    5728       30020 :     arg = 1;                    /* next argument position to print */
    5729             : 
    5730             :     /* Scan format string, looking for conversion specifiers. */
    5731      839984 :     for (cp = start_ptr; cp < end_ptr; cp++)
    5732             :     {
    5733             :         int         argpos;
    5734             :         int         widthpos;
    5735             :         int         flags;
    5736             :         int         width;
    5737             :         Datum       value;
    5738             :         bool        isNull;
    5739             :         Oid         typid;
    5740             : 
    5741             :         /*
    5742             :          * If it's not the start of a conversion specifier, just copy it to
    5743             :          * the output buffer.
    5744             :          */
    5745      810024 :         if (*cp != '%')
    5746             :         {
    5747      748128 :             appendStringInfoCharMacro(&str, *cp);
    5748      748146 :             continue;
    5749             :         }
    5750             : 
    5751       61896 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5752             : 
    5753             :         /* Easy case: %% outputs a single % */
    5754       61896 :         if (*cp == '%')
    5755             :         {
    5756          18 :             appendStringInfoCharMacro(&str, *cp);
    5757          18 :             continue;
    5758             :         }
    5759             : 
    5760             :         /* Parse the optional portions of the format specifier */
    5761       61878 :         cp = text_format_parse_format(cp, end_ptr,
    5762             :                                       &argpos, &widthpos,
    5763             :                                       &flags, &width);
    5764             : 
    5765             :         /*
    5766             :          * Next we should see the main conversion specifier.  Whether or not
    5767             :          * an argument position was present, it's known that at least one
    5768             :          * character remains in the string at this point.  Experience suggests
    5769             :          * that it's worth checking that that character is one of the expected
    5770             :          * ones before we try to fetch arguments, so as to produce the least
    5771             :          * confusing response to a mis-formatted specifier.
    5772             :          */
    5773       61854 :         if (strchr("sIL", *cp) == NULL)
    5774           6 :             ereport(ERROR,
    5775             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5776             :                      errmsg("unrecognized format() type specifier \"%.*s\"",
    5777             :                             pg_mblen(cp), cp),
    5778             :                      errhint("For a single \"%%\" use \"%%%%\".")));
    5779             : 
    5780             :         /* If indirect width was specified, get its value */
    5781       61848 :         if (widthpos >= 0)
    5782             :         {
    5783             :             /* Collect the specified or next argument position */
    5784          42 :             if (widthpos > 0)
    5785          36 :                 arg = widthpos;
    5786          42 :             if (arg >= nargs)
    5787           0 :                 ereport(ERROR,
    5788             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5789             :                          errmsg("too few arguments for format()")));
    5790             : 
    5791             :             /* Get the value and type of the selected argument */
    5792          42 :             if (!funcvariadic)
    5793             :             {
    5794          42 :                 value = PG_GETARG_DATUM(arg);
    5795          42 :                 isNull = PG_ARGISNULL(arg);
    5796          42 :                 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5797             :             }
    5798             :             else
    5799             :             {
    5800           0 :                 value = elements[arg - 1];
    5801           0 :                 isNull = nulls[arg - 1];
    5802           0 :                 typid = element_type;
    5803             :             }
    5804          42 :             if (!OidIsValid(typid))
    5805           0 :                 elog(ERROR, "could not determine data type of format() input");
    5806             : 
    5807          42 :             arg++;
    5808             : 
    5809             :             /* We can treat NULL width the same as zero */
    5810          42 :             if (isNull)
    5811           6 :                 width = 0;
    5812          36 :             else if (typid == INT4OID)
    5813          36 :                 width = DatumGetInt32(value);
    5814           0 :             else if (typid == INT2OID)
    5815           0 :                 width = DatumGetInt16(value);
    5816             :             else
    5817             :             {
    5818             :                 /* For less-usual datatypes, convert to text then to int */
    5819             :                 char       *str;
    5820             : 
    5821           0 :                 if (typid != prev_width_type)
    5822             :                 {
    5823             :                     Oid         typoutputfunc;
    5824             :                     bool        typIsVarlena;
    5825             : 
    5826           0 :                     getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5827           0 :                     fmgr_info(typoutputfunc, &typoutputinfo_width);
    5828           0 :                     prev_width_type = typid;
    5829             :                 }
    5830             : 
    5831           0 :                 str = OutputFunctionCall(&typoutputinfo_width, value);
    5832             : 
    5833             :                 /* pg_strtoint32 will complain about bad data or overflow */
    5834           0 :                 width = pg_strtoint32(str);
    5835             : 
    5836           0 :                 pfree(str);
    5837             :             }
    5838             :         }
    5839             : 
    5840             :         /* Collect the specified or next argument position */
    5841       61848 :         if (argpos > 0)
    5842         132 :             arg = argpos;
    5843       61848 :         if (arg >= nargs)
    5844          24 :             ereport(ERROR,
    5845             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5846             :                      errmsg("too few arguments for format()")));
    5847             : 
    5848             :         /* Get the value and type of the selected argument */
    5849       61824 :         if (!funcvariadic)
    5850             :         {
    5851       60552 :             value = PG_GETARG_DATUM(arg);
    5852       60552 :             isNull = PG_ARGISNULL(arg);
    5853       60552 :             typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5854             :         }
    5855             :         else
    5856             :         {
    5857        1272 :             value = elements[arg - 1];
    5858        1272 :             isNull = nulls[arg - 1];
    5859        1272 :             typid = element_type;
    5860             :         }
    5861       61824 :         if (!OidIsValid(typid))
    5862           0 :             elog(ERROR, "could not determine data type of format() input");
    5863             : 
    5864       61824 :         arg++;
    5865             : 
    5866             :         /*
    5867             :          * Get the appropriate typOutput function, reusing previous one if
    5868             :          * same type as previous argument.  That's particularly useful in the
    5869             :          * variadic-array case, but often saves work even for ordinary calls.
    5870             :          */
    5871       61824 :         if (typid != prev_type)
    5872             :         {
    5873             :             Oid         typoutputfunc;
    5874             :             bool        typIsVarlena;
    5875             : 
    5876       30348 :             getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5877       30348 :             fmgr_info(typoutputfunc, &typoutputfinfo);
    5878       30348 :             prev_type = typid;
    5879             :         }
    5880             : 
    5881             :         /*
    5882             :          * And now we can format the value.
    5883             :          */
    5884       61824 :         switch (*cp)
    5885             :         {
    5886       61824 :             case 's':
    5887             :             case 'I':
    5888             :             case 'L':
    5889       61824 :                 text_format_string_conversion(&str, *cp, &typoutputfinfo,
    5890             :                                               value, isNull,
    5891             :                                               flags, width);
    5892       61818 :                 break;
    5893           0 :             default:
    5894             :                 /* should not get here, because of previous check */
    5895           0 :                 ereport(ERROR,
    5896             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5897             :                          errmsg("unrecognized format() type specifier \"%.*s\"",
    5898             :                                 pg_mblen(cp), cp),
    5899             :                          errhint("For a single \"%%\" use \"%%%%\".")));
    5900             :                 break;
    5901             :         }
    5902             :     }
    5903             : 
    5904             :     /* Don't need deconstruct_array results anymore. */
    5905       29960 :     if (elements != NULL)
    5906          42 :         pfree(elements);
    5907       29960 :     if (nulls != NULL)
    5908          42 :         pfree(nulls);
    5909             : 
    5910             :     /* Generate results. */
    5911       29960 :     result = cstring_to_text_with_len(str.data, str.len);
    5912       29960 :     pfree(str.data);
    5913             : 
    5914       29960 :     PG_RETURN_TEXT_P(result);
    5915             : }
    5916             : 
    5917             : /*
    5918             :  * Parse contiguous digits as a decimal number.
    5919             :  *
    5920             :  * Returns true if some digits could be parsed.
    5921             :  * The value is returned into *value, and *ptr is advanced to the next
    5922             :  * character to be parsed.
    5923             :  *
    5924             :  * Note parsing invariant: at least one character is known available before
    5925             :  * string end (end_ptr) at entry, and this is still true at exit.
    5926             :  */
    5927             : static bool
    5928      123720 : text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
    5929             : {
    5930      123720 :     bool        found = false;
    5931      123720 :     const char *cp = *ptr;
    5932      123720 :     int         val = 0;
    5933             : 
    5934      124032 :     while (*cp >= '0' && *cp <= '9')
    5935             :     {
    5936         318 :         int8        digit = (*cp - '0');
    5937             : 
    5938         318 :         if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
    5939         318 :             unlikely(pg_add_s32_overflow(val, digit, &val)))
    5940           0 :             ereport(ERROR,
    5941             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    5942             :                      errmsg("number is out of range")));
    5943         318 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5944         312 :         found = true;
    5945             :     }
    5946             : 
    5947      123714 :     *ptr = cp;
    5948      123714 :     *value = val;
    5949             : 
    5950      123714 :     return found;
    5951             : }
    5952             : 
    5953             : /*
    5954             :  * Parse a format specifier (generally following the SUS printf spec).
    5955             :  *
    5956             :  * We have already advanced over the initial '%', and we are looking for
    5957             :  * [argpos][flags][width]type (but the type character is not consumed here).
    5958             :  *
    5959             :  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
    5960             :  * Output parameters:
    5961             :  *  argpos: argument position for value to be printed.  -1 means unspecified.
    5962             :  *  widthpos: argument position for width.  Zero means the argument position
    5963             :  *          was unspecified (ie, take the next arg) and -1 means no width
    5964             :  *          argument (width was omitted or specified as a constant).
    5965             :  *  flags: bitmask of flags.
    5966             :  *  width: directly-specified width value.  Zero means the width was omitted
    5967             :  *          (note it's not necessary to distinguish this case from an explicit
    5968             :  *          zero width value).
    5969             :  *
    5970             :  * The function result is the next character position to be parsed, ie, the
    5971             :  * location where the type character is/should be.
    5972             :  *
    5973             :  * Note parsing invariant: at least one character is known available before
    5974             :  * string end (end_ptr) at entry, and this is still true at exit.
    5975             :  */
    5976             : static const char *
    5977       61878 : text_format_parse_format(const char *start_ptr, const char *end_ptr,
    5978             :                          int *argpos, int *widthpos,
    5979             :                          int *flags, int *width)
    5980             : {
    5981       61878 :     const char *cp = start_ptr;
    5982             :     int         n;
    5983             : 
    5984             :     /* set defaults for output parameters */
    5985       61878 :     *argpos = -1;
    5986       61878 :     *widthpos = -1;
    5987       61878 :     *flags = 0;
    5988       61878 :     *width = 0;
    5989             : 
    5990             :     /* try to identify first number */
    5991       61878 :     if (text_format_parse_digits(&cp, end_ptr, &n))
    5992             :     {
    5993         174 :         if (*cp != '$')
    5994             :         {
    5995             :             /* Must be just a width and a type, so we're done */
    5996          24 :             *width = n;
    5997          24 :             return cp;
    5998             :         }
    5999             :         /* The number was argument position */
    6000         150 :         *argpos = n;
    6001             :         /* Explicit 0 for argument index is immediately refused */
    6002         150 :         if (n == 0)
    6003           6 :             ereport(ERROR,
    6004             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6005             :                      errmsg("format specifies argument 0, but arguments are numbered from 1")));
    6006         144 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    6007             :     }
    6008             : 
    6009             :     /* Handle flags (only minus is supported now) */
    6010       61872 :     while (*cp == '-')
    6011             :     {
    6012          30 :         *flags |= TEXT_FORMAT_FLAG_MINUS;
    6013          30 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    6014             :     }
    6015             : 
    6016       61842 :     if (*cp == '*')
    6017             :     {
    6018             :         /* Handle indirect width */
    6019          48 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    6020          48 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    6021             :         {
    6022             :             /* number in this position must be closed by $ */
    6023          42 :             if (*cp != '$')
    6024           0 :                 ereport(ERROR,
    6025             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6026             :                          errmsg("width argument position must be ended by \"$\"")));
    6027             :             /* The number was width argument position */
    6028          42 :             *widthpos = n;
    6029             :             /* Explicit 0 for argument index is immediately refused */
    6030          42 :             if (n == 0)
    6031           6 :                 ereport(ERROR,
    6032             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6033             :                          errmsg("format specifies argument 0, but arguments are numbered from 1")));
    6034          36 :             ADVANCE_PARSE_POINTER(cp, end_ptr);
    6035             :         }
    6036             :         else
    6037           6 :             *widthpos = 0;      /* width's argument position is unspecified */
    6038             :     }
    6039             :     else
    6040             :     {
    6041             :         /* Check for direct width specification */
    6042       61794 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    6043          30 :             *width = n;
    6044             :     }
    6045             : 
    6046             :     /* cp should now be pointing at type character */
    6047       61830 :     return cp;
    6048             : }
    6049             : 
    6050             : /*
    6051             :  * Format a %s, %I, or %L conversion
    6052             :  */
    6053             : static void
    6054       61824 : text_format_string_conversion(StringInfo buf, char conversion,
    6055             :                               FmgrInfo *typOutputInfo,
    6056             :                               Datum value, bool isNull,
    6057             :                               int flags, int width)
    6058             : {
    6059             :     char       *str;
    6060             : 
    6061             :     /* Handle NULL arguments before trying to stringify the value. */
    6062       61824 :     if (isNull)
    6063             :     {
    6064         342 :         if (conversion == 's')
    6065         270 :             text_format_append_string(buf, "", flags, width);
    6066          72 :         else if (conversion == 'L')
    6067          66 :             text_format_append_string(buf, "NULL", flags, width);
    6068           6 :         else if (conversion == 'I')
    6069           6 :             ereport(ERROR,
    6070             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    6071             :                      errmsg("null values cannot be formatted as an SQL identifier")));
    6072         336 :         return;
    6073             :     }
    6074             : 
    6075             :     /* Stringify. */
    6076       61482 :     str = OutputFunctionCall(typOutputInfo, value);
    6077             : 
    6078             :     /* Escape. */
    6079       61482 :     if (conversion == 'I')
    6080             :     {
    6081             :         /* quote_identifier may or may not allocate a new string. */
    6082        3106 :         text_format_append_string(buf, quote_identifier(str), flags, width);
    6083             :     }
    6084       58376 :     else if (conversion == 'L')
    6085             :     {
    6086        3232 :         char       *qstr = quote_literal_cstr(str);
    6087             : 
    6088        3232 :         text_format_append_string(buf, qstr, flags, width);
    6089             :         /* quote_literal_cstr() always allocates a new string */
    6090        3232 :         pfree(qstr);
    6091             :     }
    6092             :     else
    6093       55144 :         text_format_append_string(buf, str, flags, width);
    6094             : 
    6095             :     /* Cleanup. */
    6096       61482 :     pfree(str);
    6097             : }
    6098             : 
    6099             : /*
    6100             :  * Append str to buf, padding as directed by flags/width
    6101             :  */
    6102             : static void
    6103       61818 : text_format_append_string(StringInfo buf, const char *str,
    6104             :                           int flags, int width)
    6105             : {
    6106       61818 :     bool        align_to_left = false;
    6107             :     int         len;
    6108             : 
    6109             :     /* fast path for typical easy case */
    6110       61818 :     if (width == 0)
    6111             :     {
    6112       61734 :         appendStringInfoString(buf, str);
    6113       61734 :         return;
    6114             :     }
    6115             : 
    6116          84 :     if (width < 0)
    6117             :     {
    6118             :         /* Negative width: implicit '-' flag, then take absolute value */
    6119           6 :         align_to_left = true;
    6120             :         /* -INT_MIN is undefined */
    6121           6 :         if (width <= INT_MIN)
    6122           0 :             ereport(ERROR,
    6123             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    6124             :                      errmsg("number is out of range")));
    6125           6 :         width = -width;
    6126             :     }
    6127          78 :     else if (flags & TEXT_FORMAT_FLAG_MINUS)
    6128          24 :         align_to_left = true;
    6129             : 
    6130          84 :     len = pg_mbstrlen(str);
    6131          84 :     if (align_to_left)
    6132             :     {
    6133             :         /* left justify */
    6134          30 :         appendStringInfoString(buf, str);
    6135          30 :         if (len < width)
    6136          30 :             appendStringInfoSpaces(buf, width - len);
    6137             :     }
    6138             :     else
    6139             :     {
    6140             :         /* right justify */
    6141          54 :         if (len < width)
    6142          54 :             appendStringInfoSpaces(buf, width - len);
    6143          54 :         appendStringInfoString(buf, str);
    6144             :     }
    6145             : }
    6146             : 
    6147             : /*
    6148             :  * text_format_nv - nonvariadic wrapper for text_format function.
    6149             :  *
    6150             :  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
    6151             :  * which checks that all built-in functions that share the implementing C
    6152             :  * function take the same number of arguments.
    6153             :  */
    6154             : Datum
    6155        2816 : text_format_nv(PG_FUNCTION_ARGS)
    6156             : {
    6157        2816 :     return text_format(fcinfo);
    6158             : }
    6159             : 
    6160             : /*
    6161             :  * Helper function for Levenshtein distance functions. Faster than memcmp(),
    6162             :  * for this use case.
    6163             :  */
    6164             : static inline bool
    6165           0 : rest_of_char_same(const char *s1, const char *s2, int len)
    6166             : {
    6167           0 :     while (len > 0)
    6168             :     {
    6169           0 :         len--;
    6170           0 :         if (s1[len] != s2[len])
    6171           0 :             return false;
    6172             :     }
    6173           0 :     return true;
    6174             : }
    6175             : 
    6176             : /* Expand each Levenshtein distance variant */
    6177             : #include "levenshtein.c"
    6178             : #define LEVENSHTEIN_LESS_EQUAL
    6179             : #include "levenshtein.c"
    6180             : 
    6181             : 
    6182             : /*
    6183             :  * The following *ClosestMatch() functions can be used to determine whether a
    6184             :  * user-provided string resembles any known valid values, which is useful for
    6185             :  * providing hints in log messages, among other things.  Use these functions
    6186             :  * like so:
    6187             :  *
    6188             :  *      initClosestMatch(&state, source_string, max_distance);
    6189             :  *
    6190             :  *      for (int i = 0; i < num_valid_strings; i++)
    6191             :  *          updateClosestMatch(&state, valid_strings[i]);
    6192             :  *
    6193             :  *      closestMatch = getClosestMatch(&state);
    6194             :  */
    6195             : 
    6196             : /*
    6197             :  * Initialize the given state with the source string and maximum Levenshtein
    6198             :  * distance to consider.
    6199             :  */
    6200             : void
    6201          56 : initClosestMatch(ClosestMatchState *state, const char *source, int max_d)
    6202             : {
    6203             :     Assert(state);
    6204             :     Assert(max_d >= 0);
    6205             : 
    6206          56 :     state->source = source;
    6207          56 :     state->min_d = -1;
    6208          56 :     state->max_d = max_d;
    6209          56 :     state->match = NULL;
    6210          56 : }
    6211             : 
    6212             : /*
    6213             :  * If the candidate string is a closer match than the current one saved (or
    6214             :  * there is no match saved), save it as the closest match.
    6215             :  *
    6216             :  * If the source or candidate string is NULL, empty, or too long, this function
    6217             :  * takes no action.  Likewise, if the Levenshtein distance exceeds the maximum
    6218             :  * allowed or more than half the characters are different, no action is taken.
    6219             :  */
    6220             : void
    6221         348 : updateClosestMatch(ClosestMatchState *state, const char *candidate)
    6222             : {
    6223             :     int         dist;
    6224             : 
    6225             :     Assert(state);
    6226             : 
    6227         348 :     if (state->source == NULL || state->source[0] == '\0' ||
    6228         348 :         candidate == NULL || candidate[0] == '\0')
    6229           0 :         return;
    6230             : 
    6231             :     /*
    6232             :      * To avoid ERROR-ing, we check the lengths here instead of setting
    6233             :      * 'trusted' to false in the call to varstr_levenshtein_less_equal().
    6234             :      */
    6235         348 :     if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN ||
    6236         348 :         strlen(candidate) > MAX_LEVENSHTEIN_STRLEN)
    6237           0 :         return;
    6238             : 
    6239         348 :     dist = varstr_levenshtein_less_equal(state->source, strlen(state->source),
    6240         348 :                                          candidate, strlen(candidate), 1, 1, 1,
    6241             :                                          state->max_d, true);
    6242         348 :     if (dist <= state->max_d &&
    6243          56 :         dist <= strlen(state->source) / 2 &&
    6244          14 :         (state->min_d == -1 || dist < state->min_d))
    6245             :     {
    6246          14 :         state->min_d = dist;
    6247          14 :         state->match = candidate;
    6248             :     }
    6249             : }
    6250             : 
    6251             : /*
    6252             :  * Return the closest match.  If no suitable candidates were provided via
    6253             :  * updateClosestMatch(), return NULL.
    6254             :  */
    6255             : const char *
    6256          56 : getClosestMatch(ClosestMatchState *state)
    6257             : {
    6258             :     Assert(state);
    6259             : 
    6260          56 :     return state->match;
    6261             : }
    6262             : 
    6263             : 
    6264             : /*
    6265             :  * Unicode support
    6266             :  */
    6267             : 
    6268             : static UnicodeNormalizationForm
    6269         186 : unicode_norm_form_from_string(const char *formstr)
    6270             : {
    6271         186 :     UnicodeNormalizationForm form = -1;
    6272             : 
    6273             :     /*
    6274             :      * Might as well check this while we're here.
    6275             :      */
    6276         186 :     if (GetDatabaseEncoding() != PG_UTF8)
    6277           0 :         ereport(ERROR,
    6278             :                 (errcode(ERRCODE_SYNTAX_ERROR),
    6279             :                  errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
    6280             : 
    6281         186 :     if (pg_strcasecmp(formstr, "NFC") == 0)
    6282          66 :         form = UNICODE_NFC;
    6283         120 :     else if (pg_strcasecmp(formstr, "NFD") == 0)
    6284          36 :         form = UNICODE_NFD;
    6285          84 :     else if (pg_strcasecmp(formstr, "NFKC") == 0)
    6286          36 :         form = UNICODE_NFKC;
    6287          48 :     else if (pg_strcasecmp(formstr, "NFKD") == 0)
    6288          36 :         form = UNICODE_NFKD;
    6289             :     else
    6290          12 :         ereport(ERROR,
    6291             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6292             :                  errmsg("invalid normalization form: %s", formstr)));
    6293             : 
    6294         174 :     return form;
    6295             : }
    6296             : 
    6297             : /*
    6298             :  * Returns version of Unicode used by Postgres in "major.minor" format (the
    6299             :  * same format as the Unicode version reported by ICU). The third component
    6300             :  * ("update version") never involves additions to the character repertoire and
    6301             :  * is unimportant for most purposes.
    6302             :  *
    6303             :  * See: https://unicode.org/versions/
    6304             :  */
    6305             : Datum
    6306           6 : unicode_version(PG_FUNCTION_ARGS)
    6307             : {
    6308           6 :     PG_RETURN_TEXT_P(cstring_to_text(PG_UNICODE_VERSION));
    6309             : }
    6310             : 
    6311             : /*
    6312             :  * Returns version of Unicode used by ICU, if enabled; otherwise NULL.
    6313             :  */
    6314             : Datum
    6315           2 : icu_unicode_version(PG_FUNCTION_ARGS)
    6316             : {
    6317             : #ifdef USE_ICU
    6318           2 :     PG_RETURN_TEXT_P(cstring_to_text(U_UNICODE_VERSION));
    6319             : #else
    6320             :     PG_RETURN_NULL();
    6321             : #endif
    6322             : }
    6323             : 
    6324             : /*
    6325             :  * Check whether the string contains only assigned Unicode code
    6326             :  * points. Requires that the database encoding is UTF-8.
    6327             :  */
    6328             : Datum
    6329          12 : unicode_assigned(PG_FUNCTION_ARGS)
    6330             : {
    6331          12 :     text       *input = PG_GETARG_TEXT_PP(0);
    6332             :     unsigned char *p;
    6333             :     int         size;
    6334             : 
    6335          12 :     if (GetDatabaseEncoding() != PG_UTF8)
    6336           0 :         ereport(ERROR,
    6337             :                 (errmsg("Unicode categorization can only be performed if server encoding is UTF8")));
    6338             : 
    6339             :     /* convert to pg_wchar */
    6340          12 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6341          12 :     p = (unsigned char *) VARDATA_ANY(input);
    6342          48 :     for (int i = 0; i < size; i++)
    6343             :     {
    6344          42 :         pg_wchar    uchar = utf8_to_unicode(p);
    6345          42 :         int         category = unicode_category(uchar);
    6346             : 
    6347          42 :         if (category == PG_U_UNASSIGNED)
    6348           6 :             PG_RETURN_BOOL(false);
    6349             : 
    6350          36 :         p += pg_utf_mblen(p);
    6351             :     }
    6352             : 
    6353           6 :     PG_RETURN_BOOL(true);
    6354             : }
    6355             : 
    6356             : Datum
    6357          48 : unicode_normalize_func(PG_FUNCTION_ARGS)
    6358             : {
    6359          48 :     text       *input = PG_GETARG_TEXT_PP(0);
    6360          48 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
    6361             :     UnicodeNormalizationForm form;
    6362             :     int         size;
    6363             :     pg_wchar   *input_chars;
    6364             :     pg_wchar   *output_chars;
    6365             :     unsigned char *p;
    6366             :     text       *result;
    6367             :     int         i;
    6368             : 
    6369          48 :     form = unicode_norm_form_from_string(formstr);
    6370             : 
    6371             :     /* convert to pg_wchar */
    6372          42 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6373          42 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
    6374          42 :     p = (unsigned char *) VARDATA_ANY(input);
    6375         168 :     for (i = 0; i < size; i++)
    6376             :     {
    6377         126 :         input_chars[i] = utf8_to_unicode(p);
    6378         126 :         p += pg_utf_mblen(p);
    6379             :     }
    6380          42 :     input_chars[i] = (pg_wchar) '\0';
    6381             :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
    6382             : 
    6383             :     /* action */
    6384          42 :     output_chars = unicode_normalize(form, input_chars);
    6385             : 
    6386             :     /* convert back to UTF-8 string */
    6387          42 :     size = 0;
    6388         162 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6389             :     {
    6390             :         unsigned char buf[4];
    6391             : 
    6392         120 :         unicode_to_utf8(*wp, buf);
    6393         120 :         size += pg_utf_mblen(buf);
    6394             :     }
    6395             : 
    6396          42 :     result = palloc(size + VARHDRSZ);
    6397          42 :     SET_VARSIZE(result, size + VARHDRSZ);
    6398             : 
    6399          42 :     p = (unsigned char *) VARDATA_ANY(result);
    6400         162 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6401             :     {
    6402         120 :         unicode_to_utf8(*wp, p);
    6403         120 :         p += pg_utf_mblen(p);
    6404             :     }
    6405             :     Assert((char *) p == (char *) result + size + VARHDRSZ);
    6406             : 
    6407          42 :     PG_RETURN_TEXT_P(result);
    6408             : }
    6409             : 
    6410             : /*
    6411             :  * Check whether the string is in the specified Unicode normalization form.
    6412             :  *
    6413             :  * This is done by converting the string to the specified normal form and then
    6414             :  * comparing that to the original string.  To speed that up, we also apply the
    6415             :  * "quick check" algorithm specified in UAX #15, which can give a yes or no
    6416             :  * answer for many strings by just scanning the string once.
    6417             :  *
    6418             :  * This function should generally be optimized for the case where the string
    6419             :  * is in fact normalized.  In that case, we'll end up looking at the entire
    6420             :  * string, so it's probably not worth doing any incremental conversion etc.
    6421             :  */
    6422             : Datum
    6423         138 : unicode_is_normalized(PG_FUNCTION_ARGS)
    6424             : {
    6425         138 :     text       *input = PG_GETARG_TEXT_PP(0);
    6426         138 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
    6427             :     UnicodeNormalizationForm form;
    6428             :     int         size;
    6429             :     pg_wchar   *input_chars;
    6430             :     pg_wchar   *output_chars;
    6431             :     unsigned char *p;
    6432             :     int         i;
    6433             :     UnicodeNormalizationQC quickcheck;
    6434             :     int         output_size;
    6435             :     bool        result;
    6436             : 
    6437         138 :     form = unicode_norm_form_from_string(formstr);
    6438             : 
    6439             :     /* convert to pg_wchar */
    6440         132 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
    6441         132 :     input_chars = palloc((size + 1) * sizeof(pg_wchar));
    6442         132 :     p = (unsigned char *) VARDATA_ANY(input);
    6443         504 :     for (i = 0; i < size; i++)
    6444             :     {
    6445         372 :         input_chars[i] = utf8_to_unicode(p);
    6446         372 :         p += pg_utf_mblen(p);
    6447             :     }
    6448         132 :     input_chars[i] = (pg_wchar) '\0';
    6449             :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
    6450             : 
    6451             :     /* quick check (see UAX #15) */
    6452         132 :     quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
    6453         132 :     if (quickcheck == UNICODE_NORM_QC_YES)
    6454          42 :         PG_RETURN_BOOL(true);
    6455          90 :     else if (quickcheck == UNICODE_NORM_QC_NO)
    6456          12 :         PG_RETURN_BOOL(false);
    6457             : 
    6458             :     /* normalize and compare with original */
    6459          78 :     output_chars = unicode_normalize(form, input_chars);
    6460             : 
    6461          78 :     output_size = 0;
    6462         324 :     for (pg_wchar *wp = output_chars; *wp; wp++)
    6463         246 :         output_size++;
    6464             : 
    6465         114 :     result = (size == output_size) &&
    6466          36 :         (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
    6467             : 
    6468          78 :     PG_RETURN_BOOL(result);
    6469             : }
    6470             : 
    6471             : /*
    6472             :  * Check if first n chars are hexadecimal digits
    6473             :  */
    6474             : static bool
    6475         156 : isxdigits_n(const char *instr, size_t n)
    6476             : {
    6477         660 :     for (size_t i = 0; i < n; i++)
    6478         570 :         if (!isxdigit((unsigned char) instr[i]))
    6479          66 :             return false;
    6480             : 
    6481          90 :     return true;
    6482             : }
    6483             : 
    6484             : static unsigned int
    6485         504 : hexval(unsigned char c)
    6486             : {
    6487         504 :     if (c >= '0' && c <= '9')
    6488         384 :         return c - '0';
    6489         120 :     if (c >= 'a' && c <= 'f')
    6490          60 :         return c - 'a' + 0xA;
    6491          60 :     if (c >= 'A' && c <= 'F')
    6492          60 :         return c - 'A' + 0xA;
    6493           0 :     elog(ERROR, "invalid hexadecimal digit");
    6494             :     return 0;                   /* not reached */
    6495             : }
    6496             : 
    6497             : /*
    6498             :  * Translate string with hexadecimal digits to number
    6499             :  */
    6500             : static unsigned int
    6501          90 : hexval_n(const char *instr, size_t n)
    6502             : {
    6503          90 :     unsigned int result = 0;
    6504             : 
    6505         594 :     for (size_t i = 0; i < n; i++)
    6506         504 :         result += hexval(instr[i]) << (4 * (n - i - 1));
    6507             : 
    6508          90 :     return result;
    6509             : }
    6510             : 
    6511             : /*
    6512             :  * Replaces Unicode escape sequences by Unicode characters
    6513             :  */
    6514             : Datum
    6515          66 : unistr(PG_FUNCTION_ARGS)
    6516             : {
    6517          66 :     text       *input_text = PG_GETARG_TEXT_PP(0);
    6518             :     char       *instr;
    6519             :     int         len;
    6520             :     StringInfoData str;
    6521             :     text       *result;
    6522          66 :     pg_wchar    pair_first = 0;
    6523             :     char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
    6524             : 
    6525          66 :     instr = VARDATA_ANY(input_text);
    6526          66 :     len = VARSIZE_ANY_EXHDR(input_text);
    6527             : 
    6528          66 :     initStringInfo(&str);
    6529             : 
    6530         510 :     while (len > 0)
    6531             :     {
    6532         486 :         if (instr[0] == '\\')
    6533             :         {
    6534         102 :             if (len >= 2 &&
    6535         102 :                 instr[1] == '\\')
    6536             :             {
    6537           6 :                 if (pair_first)
    6538           0 :                     goto invalid_pair;
    6539           6 :                 appendStringInfoChar(&str, '\\');
    6540           6 :                 instr += 2;
    6541           6 :                 len -= 2;
    6542             :             }
    6543          96 :             else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
    6544          66 :                      (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
    6545          30 :             {
    6546             :                 pg_wchar    unicode;
    6547          42 :                 int         offset = instr[1] == 'u' ? 2 : 1;
    6548             : 
    6549          42 :                 unicode = hexval_n(instr + offset, 4);
    6550             : 
    6551          42 :                 if (!is_valid_unicode_codepoint(unicode))
    6552           0 :                     ereport(ERROR,
    6553             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6554             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6555             : 
    6556          42 :                 if (pair_first)
    6557             :                 {
    6558          12 :                     if (is_utf16_surrogate_second(unicode))
    6559             :                     {
    6560           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6561           0 :                         pair_first = 0;
    6562             :                     }
    6563             :                     else
    6564          12 :                         goto invalid_pair;
    6565             :                 }
    6566          30 :                 else if (is_utf16_surrogate_second(unicode))
    6567           0 :                     goto invalid_pair;
    6568             : 
    6569          30 :                 if (is_utf16_surrogate_first(unicode))
    6570          18 :                     pair_first = unicode;
    6571             :                 else
    6572             :                 {
    6573          12 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6574          12 :                     appendStringInfoString(&str, cbuf);
    6575             :                 }
    6576             : 
    6577          30 :                 instr += 4 + offset;
    6578          30 :                 len -= 4 + offset;
    6579             :             }
    6580          54 :             else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
    6581          12 :             {
    6582             :                 pg_wchar    unicode;
    6583             : 
    6584          24 :                 unicode = hexval_n(instr + 2, 6);
    6585             : 
    6586          24 :                 if (!is_valid_unicode_codepoint(unicode))
    6587           6 :                     ereport(ERROR,
    6588             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6589             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6590             : 
    6591          18 :                 if (pair_first)
    6592             :                 {
    6593           6 :                     if (is_utf16_surrogate_second(unicode))
    6594             :                     {
    6595           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6596           0 :                         pair_first = 0;
    6597             :                     }
    6598             :                     else
    6599           6 :                         goto invalid_pair;
    6600             :                 }
    6601          12 :                 else if (is_utf16_surrogate_second(unicode))
    6602           0 :                     goto invalid_pair;
    6603             : 
    6604          12 :                 if (is_utf16_surrogate_first(unicode))
    6605           6 :                     pair_first = unicode;
    6606             :                 else
    6607             :                 {
    6608           6 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6609           6 :                     appendStringInfoString(&str, cbuf);
    6610             :                 }
    6611             : 
    6612          12 :                 instr += 8;
    6613          12 :                 len -= 8;
    6614             :             }
    6615          30 :             else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
    6616          12 :             {
    6617             :                 pg_wchar    unicode;
    6618             : 
    6619          24 :                 unicode = hexval_n(instr + 2, 8);
    6620             : 
    6621          24 :                 if (!is_valid_unicode_codepoint(unicode))
    6622           6 :                     ereport(ERROR,
    6623             :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    6624             :                             errmsg("invalid Unicode code point: %04X", unicode));
    6625             : 
    6626          18 :                 if (pair_first)
    6627             :                 {
    6628           6 :                     if (is_utf16_surrogate_second(unicode))
    6629             :                     {
    6630           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
    6631           0 :                         pair_first = 0;
    6632             :                     }
    6633             :                     else
    6634           6 :                         goto invalid_pair;
    6635             :                 }
    6636          12 :                 else if (is_utf16_surrogate_second(unicode))
    6637           0 :                     goto invalid_pair;
    6638             : 
    6639          12 :                 if (is_utf16_surrogate_first(unicode))
    6640           6 :                     pair_first = unicode;
    6641             :                 else
    6642             :                 {
    6643           6 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
    6644           6 :                     appendStringInfoString(&str, cbuf);
    6645             :                 }
    6646             : 
    6647          12 :                 instr += 10;
    6648          12 :                 len -= 10;
    6649             :             }
    6650             :             else
    6651           6 :                 ereport(ERROR,
    6652             :                         (errcode(ERRCODE_SYNTAX_ERROR),
    6653             :                          errmsg("invalid Unicode escape"),
    6654             :                          errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX.")));
    6655             :         }
    6656             :         else
    6657             :         {
    6658         384 :             if (pair_first)
    6659           0 :                 goto invalid_pair;
    6660             : 
    6661         384 :             appendStringInfoChar(&str, *instr++);
    6662         384 :             len--;
    6663             :         }
    6664             :     }
    6665             : 
    6666             :     /* unfinished surrogate pair? */
    6667          24 :     if (pair_first)
    6668           6 :         goto invalid_pair;
    6669             : 
    6670          18 :     result = cstring_to_text_with_len(str.data, str.len);
    6671          18 :     pfree(str.data);
    6672             : 
    6673          18 :     PG_RETURN_TEXT_P(result);
    6674             : 
    6675          30 : invalid_pair:
    6676          30 :     ereport(ERROR,
    6677             :             (errcode(ERRCODE_SYNTAX_ERROR),
    6678             :              errmsg("invalid Unicode surrogate pair")));
    6679             :     PG_RETURN_NULL();           /* keep compiler quiet */
    6680             : }

Generated by: LCOV version 1.14