LCOV - code coverage report
Current view: top level - src/backend/utils/adt - bytea.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 92.4 % 458 423
Test Date: 2026-03-21 19:16:18 Functions: 97.7 % 43 42
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * bytea.c
       4              :  *    Functions for the bytea type.
       5              :  *
       6              :  * Portions Copyright (c) 2025-2026, PostgreSQL Global Development Group
       7              :  *
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *    src/backend/utils/adt/bytea.c
      11              :  *
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/detoast.h"
      18              : #include "common/hashfn.h"
      19              : #include "common/int.h"
      20              : #include "fmgr.h"
      21              : #include "lib/hyperloglog.h"
      22              : #include "libpq/pqformat.h"
      23              : #include "port/pg_bitutils.h"
      24              : #include "port/pg_bswap.h"
      25              : #include "utils/builtins.h"
      26              : #include "utils/bytea.h"
      27              : #include "utils/fmgrprotos.h"
      28              : #include "utils/guc.h"
      29              : #include "utils/memutils.h"
      30              : #include "utils/sortsupport.h"
      31              : #include "utils/uuid.h"
      32              : #include "varatt.h"
      33              : 
      34              : /* GUC variable */
      35              : int         bytea_output = BYTEA_OUTPUT_HEX;
      36              : 
      37              : static bytea *bytea_catenate(bytea *t1, bytea *t2);
      38              : static bytea *bytea_substring(Datum str, int S, int L,
      39              :                               bool length_not_specified);
      40              : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
      41              : 
      42              : typedef struct
      43              : {
      44              :     bool        abbreviate;     /* Should we abbreviate keys? */
      45              :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
      46              :     hyperLogLogState full_card; /* Full key cardinality state */
      47              :     double      prop_card;      /* Required cardinality proportion */
      48              : } ByteaSortSupport;
      49              : 
      50              : /* Static function declarations for sort support */
      51              : static int  byteafastcmp(Datum x, Datum y, SortSupport ssup);
      52              : static Datum bytea_abbrev_convert(Datum original, SortSupport ssup);
      53              : static bool bytea_abbrev_abort(int memtupcount, SortSupport ssup);
      54              : 
      55              : /*
      56              :  * bytea_catenate
      57              :  *  Guts of byteacat(), broken out so it can be used by other functions
      58              :  *
      59              :  * Arguments can be in short-header form, but not compressed or out-of-line
      60              :  */
      61              : static bytea *
      62         1044 : bytea_catenate(bytea *t1, bytea *t2)
      63              : {
      64              :     bytea      *result;
      65              :     int         len1,
      66              :                 len2,
      67              :                 len;
      68              :     char       *ptr;
      69              : 
      70         1044 :     len1 = VARSIZE_ANY_EXHDR(t1);
      71         1044 :     len2 = VARSIZE_ANY_EXHDR(t2);
      72              : 
      73              :     /* paranoia ... probably should throw error instead? */
      74         1044 :     if (len1 < 0)
      75            0 :         len1 = 0;
      76         1044 :     if (len2 < 0)
      77            0 :         len2 = 0;
      78              : 
      79         1044 :     len = len1 + len2 + VARHDRSZ;
      80         1044 :     result = (bytea *) palloc(len);
      81              : 
      82              :     /* Set size of result string... */
      83         1044 :     SET_VARSIZE(result, len);
      84              : 
      85              :     /* Fill data field of result string... */
      86         1044 :     ptr = VARDATA(result);
      87         1044 :     if (len1 > 0)
      88         1044 :         memcpy(ptr, VARDATA_ANY(t1), len1);
      89         1044 :     if (len2 > 0)
      90         1029 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
      91              : 
      92         1044 :     return result;
      93              : }
      94              : 
      95              : #define PG_STR_GET_BYTEA(str_) \
      96              :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
      97              : 
      98              : static bytea *
      99         2810 : bytea_substring(Datum str,
     100              :                 int S,
     101              :                 int L,
     102              :                 bool length_not_specified)
     103              : {
     104              :     int32       S1;             /* adjusted start position */
     105              :     int32       L1;             /* adjusted substring length */
     106              :     int32       E;              /* end position */
     107              : 
     108              :     /*
     109              :      * The logic here should generally match text_substring().
     110              :      */
     111         2810 :     S1 = Max(S, 1);
     112              : 
     113         2810 :     if (length_not_specified)
     114              :     {
     115              :         /*
     116              :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
     117              :          * end of the string if we pass it a negative value for length.
     118              :          */
     119         2640 :         L1 = -1;
     120              :     }
     121          170 :     else if (L < 0)
     122              :     {
     123              :         /* SQL99 says to throw an error for E < S, i.e., negative length */
     124            8 :         ereport(ERROR,
     125              :                 (errcode(ERRCODE_SUBSTRING_ERROR),
     126              :                  errmsg("negative substring length not allowed")));
     127              :         L1 = -1;                /* silence stupider compilers */
     128              :     }
     129          162 :     else if (pg_add_s32_overflow(S, L, &E))
     130              :     {
     131              :         /*
     132              :          * L could be large enough for S + L to overflow, in which case the
     133              :          * substring must run to end of string.
     134              :          */
     135            5 :         L1 = -1;
     136              :     }
     137              :     else
     138              :     {
     139              :         /*
     140              :          * A zero or negative value for the end position can happen if the
     141              :          * start was negative or one. SQL99 says to return a zero-length
     142              :          * string.
     143              :          */
     144          157 :         if (E < 1)
     145            0 :             return PG_STR_GET_BYTEA("");
     146              : 
     147          157 :         L1 = E - S1;
     148              :     }
     149              : 
     150              :     /*
     151              :      * If the start position is past the end of the string, SQL99 says to
     152              :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
     153              :      * us.  We need only convert S1 to zero-based starting position.
     154              :      */
     155         2802 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
     156              : }
     157              : 
     158              : static bytea *
     159           15 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
     160              : {
     161              :     bytea      *result;
     162              :     bytea      *s1;
     163              :     bytea      *s2;
     164              :     int         sp_pl_sl;
     165              : 
     166              :     /*
     167              :      * Check for possible integer-overflow cases.  For negative sp, throw a
     168              :      * "substring length" error because that's what should be expected
     169              :      * according to the spec's definition of OVERLAY().
     170              :      */
     171           15 :     if (sp <= 0)
     172            0 :         ereport(ERROR,
     173              :                 (errcode(ERRCODE_SUBSTRING_ERROR),
     174              :                  errmsg("negative substring length not allowed")));
     175           15 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
     176            0 :         ereport(ERROR,
     177              :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
     178              :                  errmsg("integer out of range")));
     179              : 
     180           15 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
     181           15 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
     182           15 :     result = bytea_catenate(s1, t2);
     183           15 :     result = bytea_catenate(result, s2);
     184              : 
     185           15 :     return result;
     186              : }
     187              : 
     188              : /*****************************************************************************
     189              :  *   USER I/O ROUTINES                                                       *
     190              :  *****************************************************************************/
     191              : 
     192              : #define VAL(CH)         ((CH) - '0')
     193              : #define DIG(VAL)        ((VAL) + '0')
     194              : 
     195              : /*
     196              :  *      byteain         - converts from printable representation of byte array
     197              :  *
     198              :  *      Non-printable characters must be passed as '\nnn' (octal) and are
     199              :  *      converted to internal form.  '\' must be passed as '\\'.
     200              :  */
     201              : Datum
     202       740801 : byteain(PG_FUNCTION_ARGS)
     203              : {
     204       740801 :     char       *inputText = PG_GETARG_CSTRING(0);
     205       740801 :     Node       *escontext = fcinfo->context;
     206       740801 :     size_t      len = strlen(inputText);
     207              :     size_t      bc;
     208              :     char       *tp;
     209              :     char       *rp;
     210              :     bytea      *result;
     211              : 
     212              :     /* Recognize hex input */
     213       740801 :     if (inputText[0] == '\\' && inputText[1] == 'x')
     214              :     {
     215        56385 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
     216        56385 :         result = palloc(bc);
     217        56385 :         bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
     218              :                              escontext);
     219        56345 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
     220              : 
     221        56345 :         PG_RETURN_BYTEA_P(result);
     222              :     }
     223              : 
     224              :     /* Else, it's the traditional escaped style */
     225       684416 :     result = (bytea *) palloc(len + VARHDRSZ);  /* maximum possible length */
     226              : 
     227       684416 :     tp = inputText;
     228       684416 :     rp = VARDATA(result);
     229      5499483 :     while (*tp != '\0')
     230              :     {
     231      4815075 :         if (tp[0] != '\\')
     232      4814425 :             *rp++ = *tp++;
     233          650 :         else if ((tp[1] >= '0' && tp[1] <= '3') &&
     234          634 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     235          634 :                  (tp[3] >= '0' && tp[3] <= '7'))
     236          634 :         {
     237              :             int         v;
     238              : 
     239          634 :             v = VAL(tp[1]);
     240          634 :             v <<= 3;
     241          634 :             v += VAL(tp[2]);
     242          634 :             v <<= 3;
     243          634 :             *rp++ = v + VAL(tp[3]);
     244              : 
     245          634 :             tp += 4;
     246              :         }
     247           16 :         else if (tp[1] == '\\')
     248              :         {
     249            8 :             *rp++ = '\\';
     250            8 :             tp += 2;
     251              :         }
     252              :         else
     253              :         {
     254              :             /*
     255              :              * one backslash, not followed by another or ### valid octal
     256              :              */
     257            8 :             ereturn(escontext, (Datum) 0,
     258              :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     259              :                      errmsg("invalid input syntax for type %s", "bytea")));
     260              :         }
     261              :     }
     262              : 
     263       684408 :     bc = rp - VARDATA(result);  /* actual length */
     264       684408 :     SET_VARSIZE(result, bc + VARHDRSZ);
     265              : 
     266       684408 :     PG_RETURN_BYTEA_P(result);
     267              : }
     268              : 
     269              : /*
     270              :  *      byteaout        - converts to printable representation of byte array
     271              :  *
     272              :  *      In the traditional escaped format, non-printable characters are
     273              :  *      printed as '\nnn' (octal) and '\' as '\\'.
     274              :  */
     275              : Datum
     276       282995 : byteaout(PG_FUNCTION_ARGS)
     277              : {
     278       282995 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
     279              :     char       *result;
     280              :     char       *rp;
     281              : 
     282       282995 :     if (bytea_output == BYTEA_OUTPUT_HEX)
     283              :     {
     284              :         /* Print hex format */
     285       282740 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
     286       282740 :         *rp++ = '\\';
     287       282740 :         *rp++ = 'x';
     288       282740 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
     289              :     }
     290          255 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
     291              :     {
     292              :         /* Print traditional escaped format */
     293              :         char       *vp;
     294              :         uint64      len;
     295              :         int         i;
     296              : 
     297          255 :         len = 1;                /* empty string has 1 char */
     298          255 :         vp = VARDATA_ANY(vlena);
     299       145128 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     300              :         {
     301       144873 :             if (*vp == '\\')
     302            4 :                 len += 2;
     303       144869 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     304          330 :                 len += 4;
     305              :             else
     306       144539 :                 len++;
     307              :         }
     308              : 
     309              :         /*
     310              :          * In principle len can't overflow uint32 if the input fit in 1GB, but
     311              :          * for safety let's check rather than relying on palloc's internal
     312              :          * check.
     313              :          */
     314          255 :         if (len > MaxAllocSize)
     315            0 :             ereport(ERROR,
     316              :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     317              :                      errmsg_internal("result of bytea output conversion is too large")));
     318          255 :         rp = result = (char *) palloc(len);
     319              : 
     320          255 :         vp = VARDATA_ANY(vlena);
     321       145128 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     322              :         {
     323       144873 :             if (*vp == '\\')
     324              :             {
     325            4 :                 *rp++ = '\\';
     326            4 :                 *rp++ = '\\';
     327              :             }
     328       144869 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     329          330 :             {
     330              :                 int         val;    /* holds unprintable chars */
     331              : 
     332          330 :                 val = *vp;
     333          330 :                 rp[0] = '\\';
     334          330 :                 rp[3] = DIG(val & 07);
     335          330 :                 val >>= 3;
     336          330 :                 rp[2] = DIG(val & 07);
     337          330 :                 val >>= 3;
     338          330 :                 rp[1] = DIG(val & 03);
     339          330 :                 rp += 4;
     340              :             }
     341              :             else
     342       144539 :                 *rp++ = *vp;
     343              :         }
     344              :     }
     345              :     else
     346              :     {
     347            0 :         elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
     348              :              bytea_output);
     349              :         rp = result = NULL;     /* keep compiler quiet */
     350              :     }
     351       282995 :     *rp = '\0';
     352       282995 :     PG_RETURN_CSTRING(result);
     353              : }
     354              : 
     355              : /*
     356              :  *      bytearecv           - converts external binary format to bytea
     357              :  */
     358              : Datum
     359        54019 : bytearecv(PG_FUNCTION_ARGS)
     360              : {
     361        54019 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     362              :     bytea      *result;
     363              :     int         nbytes;
     364              : 
     365        54019 :     nbytes = buf->len - buf->cursor;
     366        54019 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
     367        54019 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
     368        54019 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
     369        54019 :     PG_RETURN_BYTEA_P(result);
     370              : }
     371              : 
     372              : /*
     373              :  *      byteasend           - converts bytea to binary format
     374              :  *
     375              :  * This is a special case: just copy the input...
     376              :  */
     377              : Datum
     378        34514 : byteasend(PG_FUNCTION_ARGS)
     379              : {
     380        34514 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
     381              : 
     382        34514 :     PG_RETURN_BYTEA_P(vlena);
     383              : }
     384              : 
     385              : Datum
     386       139394 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
     387              : {
     388              :     StringInfo  state;
     389              : 
     390       139394 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     391              : 
     392              :     /* Append the value unless null, preceding it with the delimiter. */
     393       139394 :     if (!PG_ARGISNULL(1))
     394              :     {
     395       129394 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
     396       129394 :         bool        isfirst = false;
     397              : 
     398              :         /*
     399              :          * You might think we can just throw away the first delimiter, however
     400              :          * we must keep it as we may be a parallel worker doing partial
     401              :          * aggregation building a state to send to the main process.  We need
     402              :          * to keep the delimiter of every aggregation so that the combine
     403              :          * function can properly join up the strings of two separately
     404              :          * partially aggregated results.  The first delimiter is only stripped
     405              :          * off in the final function.  To know how much to strip off the front
     406              :          * of the string, we store the length of the first delimiter in the
     407              :          * StringInfo's cursor field, which we don't otherwise need here.
     408              :          */
     409       129394 :         if (state == NULL)
     410              :         {
     411              :             MemoryContext aggcontext;
     412              :             MemoryContext oldcontext;
     413              : 
     414          118 :             if (!AggCheckCallContext(fcinfo, &aggcontext))
     415              :             {
     416              :                 /* cannot be called directly because of internal-type argument */
     417            0 :                 elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context");
     418              :             }
     419              : 
     420              :             /*
     421              :              * Create state in aggregate context.  It'll stay there across
     422              :              * subsequent calls.
     423              :              */
     424          118 :             oldcontext = MemoryContextSwitchTo(aggcontext);
     425          118 :             state = makeStringInfo();
     426          118 :             MemoryContextSwitchTo(oldcontext);
     427              : 
     428          118 :             isfirst = true;
     429              :         }
     430              : 
     431       129394 :         if (!PG_ARGISNULL(2))
     432              :         {
     433       129386 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
     434              : 
     435       129386 :             appendBinaryStringInfo(state, VARDATA_ANY(delim),
     436       129386 :                                    VARSIZE_ANY_EXHDR(delim));
     437       129386 :             if (isfirst)
     438          114 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
     439              :         }
     440              : 
     441       129394 :         appendBinaryStringInfo(state, VARDATA_ANY(value),
     442       129394 :                                VARSIZE_ANY_EXHDR(value));
     443              :     }
     444              : 
     445              :     /*
     446              :      * The transition type for string_agg() is declared to be "internal",
     447              :      * which is a pass-by-value type the same size as a pointer.
     448              :      */
     449       139394 :     if (state)
     450       139365 :         PG_RETURN_POINTER(state);
     451           29 :     PG_RETURN_NULL();
     452              : }
     453              : 
     454              : Datum
     455          102 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
     456              : {
     457              :     StringInfo  state;
     458              : 
     459              :     /* cannot be called directly because of internal-type argument */
     460              :     Assert(AggCheckCallContext(fcinfo, NULL));
     461              : 
     462          102 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     463              : 
     464          102 :     if (state != NULL)
     465              :     {
     466              :         /* As per comment in transfn, strip data before the cursor position */
     467              :         bytea      *result;
     468           98 :         int         strippedlen = state->len - state->cursor;
     469              : 
     470           98 :         result = (bytea *) palloc(strippedlen + VARHDRSZ);
     471           98 :         SET_VARSIZE(result, strippedlen + VARHDRSZ);
     472           98 :         memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
     473           98 :         PG_RETURN_BYTEA_P(result);
     474              :     }
     475              :     else
     476            4 :         PG_RETURN_NULL();
     477              : }
     478              : 
     479              : /*-------------------------------------------------------------
     480              :  * byteaoctetlen
     481              :  *
     482              :  * get the number of bytes contained in an instance of type 'bytea'
     483              :  *-------------------------------------------------------------
     484              :  */
     485              : Datum
     486          805 : byteaoctetlen(PG_FUNCTION_ARGS)
     487              : {
     488          805 :     Datum       str = PG_GETARG_DATUM(0);
     489              : 
     490              :     /* We need not detoast the input at all */
     491          805 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     492              : }
     493              : 
     494              : /*
     495              :  * byteacat -
     496              :  *    takes two bytea* and returns a bytea* that is the concatenation of
     497              :  *    the two.
     498              :  *
     499              :  * Cloned from textcat and modified as required.
     500              :  */
     501              : Datum
     502         1014 : byteacat(PG_FUNCTION_ARGS)
     503              : {
     504         1014 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     505         1014 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     506              : 
     507         1014 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
     508              : }
     509              : 
     510              : /*
     511              :  * byteaoverlay
     512              :  *  Replace specified substring of first string with second
     513              :  *
     514              :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
     515              :  * This code is a direct implementation of what the standard says.
     516              :  */
     517              : Datum
     518            5 : byteaoverlay(PG_FUNCTION_ARGS)
     519              : {
     520            5 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     521            5 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     522            5 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
     523            5 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
     524              : 
     525            5 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
     526              : }
     527              : 
     528              : Datum
     529           10 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
     530              : {
     531           10 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     532           10 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     533           10 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
     534              :     int         sl;
     535              : 
     536           10 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
     537           10 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
     538              : }
     539              : 
     540              : /*
     541              :  * bytea_substr()
     542              :  * Return a substring starting at the specified position.
     543              :  * Cloned from text_substr and modified as required.
     544              :  *
     545              :  * Input:
     546              :  *  - string
     547              :  *  - starting position (is one-based)
     548              :  *  - string length (optional)
     549              :  *
     550              :  * If the starting position is zero or less, then return from the start of the string
     551              :  * adjusting the length to be consistent with the "negative start" per SQL.
     552              :  * If the length is less than zero, an ERROR is thrown. If no third argument
     553              :  * (length) is provided, the length to the end of the string is assumed.
     554              :  */
     555              : Datum
     556          155 : bytea_substr(PG_FUNCTION_ARGS)
     557              : {
     558          155 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
     559              :                                       PG_GETARG_INT32(1),
     560              :                                       PG_GETARG_INT32(2),
     561              :                                       false));
     562              : }
     563              : 
     564              : /*
     565              :  * bytea_substr_no_len -
     566              :  *    Wrapper to avoid opr_sanity failure due to
     567              :  *    one function accepting a different number of args.
     568              :  */
     569              : Datum
     570         2625 : bytea_substr_no_len(PG_FUNCTION_ARGS)
     571              : {
     572         2625 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
     573              :                                       PG_GETARG_INT32(1),
     574              :                                       -1,
     575              :                                       true));
     576              : }
     577              : 
     578              : /*
     579              :  * bit_count
     580              :  */
     581              : Datum
     582            5 : bytea_bit_count(PG_FUNCTION_ARGS)
     583              : {
     584            5 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     585              : 
     586            5 :     PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
     587              : }
     588              : 
     589              : /*
     590              :  * byteapos -
     591              :  *    Return the position of the specified substring.
     592              :  *    Implements the SQL POSITION() function.
     593              :  * Cloned from textpos and modified as required.
     594              :  */
     595              : Datum
     596           25 : byteapos(PG_FUNCTION_ARGS)
     597              : {
     598           25 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     599           25 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     600              :     int         pos;
     601              :     int         px,
     602              :                 p;
     603              :     int         len1,
     604              :                 len2;
     605              :     char       *p1,
     606              :                *p2;
     607              : 
     608           25 :     len1 = VARSIZE_ANY_EXHDR(t1);
     609           25 :     len2 = VARSIZE_ANY_EXHDR(t2);
     610              : 
     611           25 :     if (len2 <= 0)
     612            5 :         PG_RETURN_INT32(1);     /* result for empty pattern */
     613              : 
     614           20 :     p1 = VARDATA_ANY(t1);
     615           20 :     p2 = VARDATA_ANY(t2);
     616              : 
     617           20 :     pos = 0;
     618           20 :     px = (len1 - len2);
     619           45 :     for (p = 0; p <= px; p++)
     620              :     {
     621           35 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
     622              :         {
     623           10 :             pos = p + 1;
     624           10 :             break;
     625              :         };
     626           25 :         p1++;
     627              :     };
     628              : 
     629           20 :     PG_RETURN_INT32(pos);
     630              : }
     631              : 
     632              : /*-------------------------------------------------------------
     633              :  * byteaGetByte
     634              :  *
     635              :  * this routine treats "bytea" as an array of bytes.
     636              :  * It returns the Nth byte (a number between 0 and 255).
     637              :  *-------------------------------------------------------------
     638              :  */
     639              : Datum
     640           41 : byteaGetByte(PG_FUNCTION_ARGS)
     641              : {
     642           41 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
     643           41 :     int32       n = PG_GETARG_INT32(1);
     644              :     int         len;
     645              :     int         byte;
     646              : 
     647           41 :     len = VARSIZE_ANY_EXHDR(v);
     648              : 
     649           41 :     if (n < 0 || n >= len)
     650            4 :         ereport(ERROR,
     651              :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     652              :                  errmsg("index %d out of valid range, 0..%d",
     653              :                         n, len - 1)));
     654              : 
     655           37 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
     656              : 
     657           37 :     PG_RETURN_INT32(byte);
     658              : }
     659              : 
     660              : /*-------------------------------------------------------------
     661              :  * byteaGetBit
     662              :  *
     663              :  * This routine treats a "bytea" type like an array of bits.
     664              :  * It returns the value of the Nth bit (0 or 1).
     665              :  *
     666              :  *-------------------------------------------------------------
     667              :  */
     668              : Datum
     669            9 : byteaGetBit(PG_FUNCTION_ARGS)
     670              : {
     671            9 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
     672            9 :     int64       n = PG_GETARG_INT64(1);
     673              :     int         byteNo,
     674              :                 bitNo;
     675              :     int         len;
     676              :     int         byte;
     677              : 
     678            9 :     len = VARSIZE_ANY_EXHDR(v);
     679              : 
     680            9 :     if (n < 0 || n >= (int64) len * 8)
     681            4 :         ereport(ERROR,
     682              :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     683              :                  errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
     684              :                         n, (int64) len * 8 - 1)));
     685              : 
     686              :     /* n/8 is now known < len, so safe to cast to int */
     687            5 :     byteNo = (int) (n / 8);
     688            5 :     bitNo = (int) (n % 8);
     689              : 
     690            5 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
     691              : 
     692            5 :     if (byte & (1 << bitNo))
     693            5 :         PG_RETURN_INT32(1);
     694              :     else
     695            0 :         PG_RETURN_INT32(0);
     696              : }
     697              : 
     698              : /*-------------------------------------------------------------
     699              :  * byteaSetByte
     700              :  *
     701              :  * Given an instance of type 'bytea' creates a new one with
     702              :  * the Nth byte set to the given value.
     703              :  *
     704              :  *-------------------------------------------------------------
     705              :  */
     706              : Datum
     707          265 : byteaSetByte(PG_FUNCTION_ARGS)
     708              : {
     709          265 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
     710          265 :     int32       n = PG_GETARG_INT32(1);
     711          265 :     int32       newByte = PG_GETARG_INT32(2);
     712              :     int         len;
     713              : 
     714          265 :     len = VARSIZE(res) - VARHDRSZ;
     715              : 
     716          265 :     if (n < 0 || n >= len)
     717            4 :         ereport(ERROR,
     718              :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     719              :                  errmsg("index %d out of valid range, 0..%d",
     720              :                         n, len - 1)));
     721              : 
     722              :     /*
     723              :      * Now set the byte.
     724              :      */
     725          261 :     ((unsigned char *) VARDATA(res))[n] = newByte;
     726              : 
     727          261 :     PG_RETURN_BYTEA_P(res);
     728              : }
     729              : 
     730              : /*-------------------------------------------------------------
     731              :  * byteaSetBit
     732              :  *
     733              :  * Given an instance of type 'bytea' creates a new one with
     734              :  * the Nth bit set to the given value.
     735              :  *
     736              :  *-------------------------------------------------------------
     737              :  */
     738              : Datum
     739            9 : byteaSetBit(PG_FUNCTION_ARGS)
     740              : {
     741            9 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
     742            9 :     int64       n = PG_GETARG_INT64(1);
     743            9 :     int32       newBit = PG_GETARG_INT32(2);
     744              :     int         len;
     745              :     int         oldByte,
     746              :                 newByte;
     747              :     int         byteNo,
     748              :                 bitNo;
     749              : 
     750            9 :     len = VARSIZE(res) - VARHDRSZ;
     751              : 
     752            9 :     if (n < 0 || n >= (int64) len * 8)
     753            4 :         ereport(ERROR,
     754              :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     755              :                  errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
     756              :                         n, (int64) len * 8 - 1)));
     757              : 
     758              :     /* n/8 is now known < len, so safe to cast to int */
     759            5 :     byteNo = (int) (n / 8);
     760            5 :     bitNo = (int) (n % 8);
     761              : 
     762              :     /*
     763              :      * sanity check!
     764              :      */
     765            5 :     if (newBit != 0 && newBit != 1)
     766            0 :         ereport(ERROR,
     767              :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     768              :                  errmsg("new bit must be 0 or 1")));
     769              : 
     770              :     /*
     771              :      * Update the byte.
     772              :      */
     773            5 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
     774              : 
     775            5 :     if (newBit == 0)
     776            5 :         newByte = oldByte & (~(1 << bitNo));
     777              :     else
     778            0 :         newByte = oldByte | (1 << bitNo);
     779              : 
     780            5 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
     781              : 
     782            5 :     PG_RETURN_BYTEA_P(res);
     783              : }
     784              : 
     785              : /*
     786              :  * Return reversed bytea
     787              :  */
     788              : Datum
     789           15 : bytea_reverse(PG_FUNCTION_ARGS)
     790              : {
     791           15 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
     792           15 :     const char *p = VARDATA_ANY(v);
     793           15 :     int         len = VARSIZE_ANY_EXHDR(v);
     794           15 :     const char *endp = p + len;
     795           15 :     bytea      *result = palloc(len + VARHDRSZ);
     796           15 :     char       *dst = (char *) VARDATA(result) + len;
     797              : 
     798           15 :     SET_VARSIZE(result, len + VARHDRSZ);
     799              : 
     800           30 :     while (p < endp)
     801           15 :         *(--dst) = *p++;
     802              : 
     803           15 :     PG_RETURN_BYTEA_P(result);
     804              : }
     805              : 
     806              : 
     807              : /*****************************************************************************
     808              :  *  Comparison Functions used for bytea
     809              :  *
     810              :  * Note: btree indexes need these routines not to leak memory; therefore,
     811              :  * be careful to free working copies of toasted datums.  Most places don't
     812              :  * need to be so careful.
     813              :  *****************************************************************************/
     814              : 
     815              : Datum
     816         6592 : byteaeq(PG_FUNCTION_ARGS)
     817              : {
     818         6592 :     Datum       arg1 = PG_GETARG_DATUM(0);
     819         6592 :     Datum       arg2 = PG_GETARG_DATUM(1);
     820              :     bool        result;
     821              :     Size        len1,
     822              :                 len2;
     823              : 
     824              :     /*
     825              :      * We can use a fast path for unequal lengths, which might save us from
     826              :      * having to detoast one or both values.
     827              :      */
     828         6592 :     len1 = toast_raw_datum_size(arg1);
     829         6592 :     len2 = toast_raw_datum_size(arg2);
     830         6592 :     if (len1 != len2)
     831         2340 :         result = false;
     832              :     else
     833              :     {
     834         4252 :         bytea      *barg1 = DatumGetByteaPP(arg1);
     835         4252 :         bytea      *barg2 = DatumGetByteaPP(arg2);
     836              : 
     837         4252 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
     838              :                          len1 - VARHDRSZ) == 0);
     839              : 
     840         4252 :         PG_FREE_IF_COPY(barg1, 0);
     841         4252 :         PG_FREE_IF_COPY(barg2, 1);
     842              :     }
     843              : 
     844         6592 :     PG_RETURN_BOOL(result);
     845              : }
     846              : 
     847              : Datum
     848          520 : byteane(PG_FUNCTION_ARGS)
     849              : {
     850          520 :     Datum       arg1 = PG_GETARG_DATUM(0);
     851          520 :     Datum       arg2 = PG_GETARG_DATUM(1);
     852              :     bool        result;
     853              :     Size        len1,
     854              :                 len2;
     855              : 
     856              :     /*
     857              :      * We can use a fast path for unequal lengths, which might save us from
     858              :      * having to detoast one or both values.
     859              :      */
     860          520 :     len1 = toast_raw_datum_size(arg1);
     861          520 :     len2 = toast_raw_datum_size(arg2);
     862          520 :     if (len1 != len2)
     863            8 :         result = true;
     864              :     else
     865              :     {
     866          512 :         bytea      *barg1 = DatumGetByteaPP(arg1);
     867          512 :         bytea      *barg2 = DatumGetByteaPP(arg2);
     868              : 
     869          512 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
     870              :                          len1 - VARHDRSZ) != 0);
     871              : 
     872          512 :         PG_FREE_IF_COPY(barg1, 0);
     873          512 :         PG_FREE_IF_COPY(barg2, 1);
     874              :     }
     875              : 
     876          520 :     PG_RETURN_BOOL(result);
     877              : }
     878              : 
     879              : Datum
     880         4636 : bytealt(PG_FUNCTION_ARGS)
     881              : {
     882         4636 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     883         4636 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     884              :     int         len1,
     885              :                 len2;
     886              :     int         cmp;
     887              : 
     888         4636 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     889         4636 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     890              : 
     891         4636 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     892              : 
     893         4636 :     PG_FREE_IF_COPY(arg1, 0);
     894         4636 :     PG_FREE_IF_COPY(arg2, 1);
     895              : 
     896         4636 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
     897              : }
     898              : 
     899              : Datum
     900         3578 : byteale(PG_FUNCTION_ARGS)
     901              : {
     902         3578 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     903         3578 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     904              :     int         len1,
     905              :                 len2;
     906              :     int         cmp;
     907              : 
     908         3578 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     909         3578 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     910              : 
     911         3578 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     912              : 
     913         3578 :     PG_FREE_IF_COPY(arg1, 0);
     914         3578 :     PG_FREE_IF_COPY(arg2, 1);
     915              : 
     916         3578 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
     917              : }
     918              : 
     919              : Datum
     920         3646 : byteagt(PG_FUNCTION_ARGS)
     921              : {
     922         3646 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     923         3646 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     924              :     int         len1,
     925              :                 len2;
     926              :     int         cmp;
     927              : 
     928         3646 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     929         3646 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     930              : 
     931         3646 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     932              : 
     933         3646 :     PG_FREE_IF_COPY(arg1, 0);
     934         3646 :     PG_FREE_IF_COPY(arg2, 1);
     935              : 
     936         3646 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
     937              : }
     938              : 
     939              : Datum
     940         2834 : byteage(PG_FUNCTION_ARGS)
     941              : {
     942         2834 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     943         2834 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     944              :     int         len1,
     945              :                 len2;
     946              :     int         cmp;
     947              : 
     948         2834 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     949         2834 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     950              : 
     951         2834 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     952              : 
     953         2834 :     PG_FREE_IF_COPY(arg1, 0);
     954         2834 :     PG_FREE_IF_COPY(arg2, 1);
     955              : 
     956         2834 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
     957              : }
     958              : 
     959              : Datum
     960        47296 : byteacmp(PG_FUNCTION_ARGS)
     961              : {
     962        47296 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     963        47296 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     964              :     int         len1,
     965              :                 len2;
     966              :     int         cmp;
     967              : 
     968        47296 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     969        47296 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     970              : 
     971        47296 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     972        47296 :     if ((cmp == 0) && (len1 != len2))
     973         1700 :         cmp = (len1 < len2) ? -1 : 1;
     974              : 
     975        47296 :     PG_FREE_IF_COPY(arg1, 0);
     976        47296 :     PG_FREE_IF_COPY(arg2, 1);
     977              : 
     978        47296 :     PG_RETURN_INT32(cmp);
     979              : }
     980              : 
     981              : Datum
     982           16 : bytea_larger(PG_FUNCTION_ARGS)
     983              : {
     984           16 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     985           16 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     986              :     bytea      *result;
     987              :     int         len1,
     988              :                 len2;
     989              :     int         cmp;
     990              : 
     991           16 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     992           16 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     993              : 
     994           16 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     995           16 :     result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
     996              : 
     997           16 :     PG_RETURN_BYTEA_P(result);
     998              : }
     999              : 
    1000              : Datum
    1001           16 : bytea_smaller(PG_FUNCTION_ARGS)
    1002              : {
    1003           16 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    1004           16 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    1005              :     bytea      *result;
    1006              :     int         len1,
    1007              :                 len2;
    1008              :     int         cmp;
    1009              : 
    1010           16 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1011           16 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1012              : 
    1013           16 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    1014           16 :     result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
    1015              : 
    1016           16 :     PG_RETURN_BYTEA_P(result);
    1017              : }
    1018              : 
    1019              : /*
    1020              :  * sortsupport comparison func
    1021              :  */
    1022              : static int
    1023         8054 : byteafastcmp(Datum x, Datum y, SortSupport ssup)
    1024              : {
    1025         8054 :     bytea      *arg1 = DatumGetByteaPP(x);
    1026         8054 :     bytea      *arg2 = DatumGetByteaPP(y);
    1027              :     char       *a1p,
    1028              :                *a2p;
    1029              :     int         len1,
    1030              :                 len2,
    1031              :                 result;
    1032              : 
    1033         8054 :     a1p = VARDATA_ANY(arg1);
    1034         8054 :     a2p = VARDATA_ANY(arg2);
    1035              : 
    1036         8054 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1037         8054 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1038              : 
    1039         8054 :     result = memcmp(a1p, a2p, Min(len1, len2));
    1040         8054 :     if ((result == 0) && (len1 != len2))
    1041          252 :         result = (len1 < len2) ? -1 : 1;
    1042              : 
    1043              :     /* We can't afford to leak memory here. */
    1044         8054 :     if (PointerGetDatum(arg1) != x)
    1045            0 :         pfree(arg1);
    1046         8054 :     if (PointerGetDatum(arg2) != y)
    1047            0 :         pfree(arg2);
    1048              : 
    1049         8054 :     return result;
    1050              : }
    1051              : 
    1052              : /*
    1053              :  * Conversion routine for sortsupport.  Converts original to abbreviated key
    1054              :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
    1055              :  * of the bytea data into a Datum (on little-endian machines, the bytes are
    1056              :  * stored in reverse order), and treat it as an unsigned integer.
    1057              :  */
    1058              : static Datum
    1059           12 : bytea_abbrev_convert(Datum original, SortSupport ssup)
    1060              : {
    1061           12 :     const size_t max_prefix_bytes = sizeof(Datum);
    1062           12 :     ByteaSortSupport *bss = (ByteaSortSupport *) ssup->ssup_extra;
    1063           12 :     bytea      *authoritative = DatumGetByteaPP(original);
    1064           12 :     char       *authoritative_data = VARDATA_ANY(authoritative);
    1065              :     Datum       res;
    1066              :     char       *pres;
    1067              :     int         len;
    1068              :     uint32      hash;
    1069              : 
    1070           12 :     pres = (char *) &res;
    1071              : 
    1072              :     /* memset(), so any non-overwritten bytes are NUL */
    1073           12 :     memset(pres, 0, max_prefix_bytes);
    1074           12 :     len = VARSIZE_ANY_EXHDR(authoritative);
    1075              : 
    1076              :     /*
    1077              :      * Short byteas will have terminating NUL bytes in the abbreviated datum.
    1078              :      * Abbreviated comparison need not make a distinction between these NUL
    1079              :      * bytes, and NUL bytes representing actual NULs in the authoritative
    1080              :      * representation.
    1081              :      *
    1082              :      * Hopefully a comparison at or past one abbreviated key's terminating NUL
    1083              :      * byte will resolve the comparison without consulting the authoritative
    1084              :      * representation; specifically, some later non-NUL byte in the longer
    1085              :      * bytea can resolve the comparison against a subsequent terminating NUL
    1086              :      * in the shorter bytea.  There will usually be what is effectively a
    1087              :      * "length-wise" resolution there and then.
    1088              :      *
    1089              :      * If that doesn't work out -- if all bytes in the longer bytea positioned
    1090              :      * at or past the offset of the smaller bytea (first) terminating NUL are
    1091              :      * actually representative of NUL bytes in the authoritative binary bytea
    1092              :      * (perhaps with some *terminating* NUL bytes towards the end of the
    1093              :      * longer bytea iff it happens to still be small) -- then an authoritative
    1094              :      * tie-breaker will happen, and do the right thing: explicitly consider
    1095              :      * bytea length.
    1096              :      */
    1097           12 :     memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
    1098              : 
    1099              :     /*
    1100              :      * Maintain approximate cardinality of both abbreviated keys and original,
    1101              :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
    1102              :      * the worst case, where we do many string abbreviations for no saving in
    1103              :      * full memcmp()-based comparisons.  These statistics are used by
    1104              :      * bytea_abbrev_abort().
    1105              :      *
    1106              :      * First, Hash key proper, or a significant fraction of it.  Mix in length
    1107              :      * in order to compensate for cases where differences are past
    1108              :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
    1109              :      */
    1110           12 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
    1111              :                                    Min(len, PG_CACHE_LINE_SIZE)));
    1112              : 
    1113           12 :     if (len > PG_CACHE_LINE_SIZE)
    1114            0 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
    1115              : 
    1116           12 :     addHyperLogLog(&bss->full_card, hash);
    1117              : 
    1118              :     /* Hash abbreviated key */
    1119              :     {
    1120              :         uint32      tmp;
    1121              : 
    1122           12 :         tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
    1123           12 :         hash = DatumGetUInt32(hash_uint32(tmp));
    1124              :     }
    1125              : 
    1126           12 :     addHyperLogLog(&bss->abbr_card, hash);
    1127              : 
    1128              :     /*
    1129              :      * Byteswap on little-endian machines.
    1130              :      *
    1131              :      * This is needed so that ssup_datum_unsigned_cmp() works correctly on all
    1132              :      * platforms.
    1133              :      */
    1134           12 :     res = DatumBigEndianToNative(res);
    1135              : 
    1136              :     /* Don't leak memory here */
    1137           12 :     if (PointerGetDatum(authoritative) != original)
    1138            0 :         pfree(authoritative);
    1139              : 
    1140           12 :     return res;
    1141              : }
    1142              : 
    1143              : /*
    1144              :  * Callback for estimating effectiveness of abbreviated key optimization, using
    1145              :  * heuristic rules.  Returns value indicating if the abbreviation optimization
    1146              :  * should be aborted, based on its projected effectiveness.
    1147              :  *
    1148              :  * This is based on varstr_abbrev_abort(), but some comments have been elided
    1149              :  * for brevity. See there for more details.
    1150              :  */
    1151              : static bool
    1152            0 : bytea_abbrev_abort(int memtupcount, SortSupport ssup)
    1153              : {
    1154            0 :     ByteaSortSupport *bss = (ByteaSortSupport *) ssup->ssup_extra;
    1155              :     double      abbrev_distinct,
    1156              :                 key_distinct;
    1157              : 
    1158              :     Assert(ssup->abbreviate);
    1159              : 
    1160              :     /* Have a little patience */
    1161            0 :     if (memtupcount < 100)
    1162            0 :         return false;
    1163              : 
    1164            0 :     abbrev_distinct = estimateHyperLogLog(&bss->abbr_card);
    1165            0 :     key_distinct = estimateHyperLogLog(&bss->full_card);
    1166              : 
    1167              :     /*
    1168              :      * Clamp cardinality estimates to at least one distinct value.  While
    1169              :      * NULLs are generally disregarded, if only NULL values were seen so far,
    1170              :      * that might misrepresent costs if we failed to clamp.
    1171              :      */
    1172            0 :     if (abbrev_distinct < 1.0)
    1173            0 :         abbrev_distinct = 1.0;
    1174              : 
    1175            0 :     if (key_distinct < 1.0)
    1176            0 :         key_distinct = 1.0;
    1177              : 
    1178            0 :     if (trace_sort)
    1179              :     {
    1180            0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
    1181              : 
    1182            0 :         elog(LOG, "bytea_abbrev: abbrev_distinct after %d: %f "
    1183              :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
    1184              :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
    1185              :              bss->prop_card);
    1186              :     }
    1187              : 
    1188              :     /*
    1189              :      * If the number of distinct abbreviated keys approximately matches the
    1190              :      * number of distinct original keys, continue with abbreviation.
    1191              :      */
    1192            0 :     if (abbrev_distinct > key_distinct * bss->prop_card)
    1193              :     {
    1194              :         /*
    1195              :          * Decay required cardinality aggressively after 10,000 tuples.
    1196              :          */
    1197            0 :         if (memtupcount > 10000)
    1198            0 :             bss->prop_card *= 0.65;
    1199              : 
    1200            0 :         return false;
    1201              :     }
    1202              : 
    1203              :     /*
    1204              :      * Abort abbreviation strategy.
    1205              :      */
    1206            0 :     if (trace_sort)
    1207            0 :         elog(LOG, "bytea_abbrev: aborted abbreviation at %d "
    1208              :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
    1209              :              memtupcount, abbrev_distinct, key_distinct, bss->prop_card);
    1210              : 
    1211            0 :     return true;
    1212              : }
    1213              : 
    1214              : Datum
    1215           35 : bytea_sortsupport(PG_FUNCTION_ARGS)
    1216              : {
    1217           35 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    1218              :     MemoryContext oldcontext;
    1219              : 
    1220           35 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    1221              : 
    1222           35 :     ssup->comparator = byteafastcmp;
    1223              : 
    1224              :     /*
    1225              :      * Set up abbreviation support if requested.
    1226              :      */
    1227           35 :     if (ssup->abbreviate)
    1228              :     {
    1229              :         ByteaSortSupport *bss;
    1230              : 
    1231           12 :         bss = palloc_object(ByteaSortSupport);
    1232           12 :         bss->abbreviate = true;
    1233           12 :         bss->prop_card = 0.20;
    1234           12 :         initHyperLogLog(&bss->abbr_card, 10);
    1235           12 :         initHyperLogLog(&bss->full_card, 10);
    1236              : 
    1237           12 :         ssup->ssup_extra = bss;
    1238           12 :         ssup->abbrev_full_comparator = ssup->comparator;
    1239           12 :         ssup->comparator = ssup_datum_unsigned_cmp;
    1240           12 :         ssup->abbrev_converter = bytea_abbrev_convert;
    1241           12 :         ssup->abbrev_abort = bytea_abbrev_abort;
    1242              :     }
    1243              : 
    1244           35 :     MemoryContextSwitchTo(oldcontext);
    1245              : 
    1246           35 :     PG_RETURN_VOID();
    1247              : }
    1248              : 
    1249              : /* Cast bytea -> int2 */
    1250              : Datum
    1251           29 : bytea_int2(PG_FUNCTION_ARGS)
    1252              : {
    1253           29 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    1254           29 :     int         len = VARSIZE_ANY_EXHDR(v);
    1255              :     uint16      result;
    1256              : 
    1257              :     /* Check that the byte array is not too long */
    1258           29 :     if (len > sizeof(result))
    1259            4 :         ereport(ERROR,
    1260              :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1261              :                 errmsg("smallint out of range"));
    1262              : 
    1263              :     /* Convert it to an integer; most significant bytes come first */
    1264           25 :     result = 0;
    1265           60 :     for (int i = 0; i < len; i++)
    1266              :     {
    1267           35 :         result <<= BITS_PER_BYTE;
    1268           35 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    1269              :     }
    1270              : 
    1271           25 :     PG_RETURN_INT16(result);
    1272              : }
    1273              : 
    1274              : /* Cast bytea -> int4 */
    1275              : Datum
    1276           29 : bytea_int4(PG_FUNCTION_ARGS)
    1277              : {
    1278           29 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    1279           29 :     int         len = VARSIZE_ANY_EXHDR(v);
    1280              :     uint32      result;
    1281              : 
    1282              :     /* Check that the byte array is not too long */
    1283           29 :     if (len > sizeof(result))
    1284            4 :         ereport(ERROR,
    1285              :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1286              :                 errmsg("integer out of range"));
    1287              : 
    1288              :     /* Convert it to an integer; most significant bytes come first */
    1289           25 :     result = 0;
    1290           90 :     for (int i = 0; i < len; i++)
    1291              :     {
    1292           65 :         result <<= BITS_PER_BYTE;
    1293           65 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    1294              :     }
    1295              : 
    1296           25 :     PG_RETURN_INT32(result);
    1297              : }
    1298              : 
    1299              : /* Cast bytea -> int8 */
    1300              : Datum
    1301           29 : bytea_int8(PG_FUNCTION_ARGS)
    1302              : {
    1303           29 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    1304           29 :     int         len = VARSIZE_ANY_EXHDR(v);
    1305              :     uint64      result;
    1306              : 
    1307              :     /* Check that the byte array is not too long */
    1308           29 :     if (len > sizeof(result))
    1309            4 :         ereport(ERROR,
    1310              :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1311              :                 errmsg("bigint out of range"));
    1312              : 
    1313              :     /* Convert it to an integer; most significant bytes come first */
    1314           25 :     result = 0;
    1315          150 :     for (int i = 0; i < len; i++)
    1316              :     {
    1317          125 :         result <<= BITS_PER_BYTE;
    1318          125 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    1319              :     }
    1320              : 
    1321           25 :     PG_RETURN_INT64(result);
    1322              : }
    1323              : 
    1324              : /* Cast int2 -> bytea; can just use int2send() */
    1325              : Datum
    1326           10 : int2_bytea(PG_FUNCTION_ARGS)
    1327              : {
    1328           10 :     return int2send(fcinfo);
    1329              : }
    1330              : 
    1331              : /* Cast int4 -> bytea; can just use int4send() */
    1332              : Datum
    1333        20490 : int4_bytea(PG_FUNCTION_ARGS)
    1334              : {
    1335        20490 :     return int4send(fcinfo);
    1336              : }
    1337              : 
    1338              : /* Cast int8 -> bytea; can just use int8send() */
    1339              : Datum
    1340           10 : int8_bytea(PG_FUNCTION_ARGS)
    1341              : {
    1342           10 :     return int8send(fcinfo);
    1343              : }
    1344              : 
    1345              : /* Cast bytea -> uuid */
    1346              : Datum
    1347           13 : bytea_uuid(PG_FUNCTION_ARGS)
    1348              : {
    1349           13 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    1350           13 :     int         len = VARSIZE_ANY_EXHDR(v);
    1351              :     pg_uuid_t  *uuid;
    1352              : 
    1353           13 :     if (len != UUID_LEN)
    1354            4 :         ereport(ERROR,
    1355              :                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
    1356              :                  errmsg("invalid input length for type %s", "uuid"),
    1357              :                  errdetail("Expected %d bytes, got %d.", UUID_LEN, len)));
    1358              : 
    1359            9 :     uuid = palloc_object(pg_uuid_t);
    1360            9 :     memcpy(uuid->data, VARDATA_ANY(v), UUID_LEN);
    1361            9 :     PG_RETURN_UUID_P(uuid);
    1362              : }
    1363              : 
    1364              : /* Cast uuid -> bytea; can just use uuid_send() */
    1365              : Datum
    1366            9 : uuid_bytea(PG_FUNCTION_ARGS)
    1367              : {
    1368            9 :     return uuid_send(fcinfo);
    1369              : }
        

Generated by: LCOV version 2.0-1