LCOV - code coverage report
Current view: top level - src/backend/utils/adt - bytea.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 92.0 % 448 412
Test Date: 2026-03-01 15:14:58 Functions: 97.6 % 41 40
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * bytea.c
       4              :  *    Functions for the bytea type.
       5              :  *
       6              :  * Portions Copyright (c) 2025-2026, PostgreSQL Global Development Group
       7              :  *
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *    src/backend/utils/adt/bytea.c
      11              :  *
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/detoast.h"
      18              : #include "common/hashfn.h"
      19              : #include "common/int.h"
      20              : #include "fmgr.h"
      21              : #include "lib/hyperloglog.h"
      22              : #include "libpq/pqformat.h"
      23              : #include "port/pg_bitutils.h"
      24              : #include "port/pg_bswap.h"
      25              : #include "utils/builtins.h"
      26              : #include "utils/bytea.h"
      27              : #include "utils/fmgrprotos.h"
      28              : #include "utils/guc.h"
      29              : #include "utils/memutils.h"
      30              : #include "utils/sortsupport.h"
      31              : #include "varatt.h"
      32              : 
      33              : /* GUC variable */
      34              : int         bytea_output = BYTEA_OUTPUT_HEX;
      35              : 
      36              : static bytea *bytea_catenate(bytea *t1, bytea *t2);
      37              : static bytea *bytea_substring(Datum str, int S, int L,
      38              :                               bool length_not_specified);
      39              : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
      40              : 
      41              : typedef struct
      42              : {
      43              :     bool        abbreviate;     /* Should we abbreviate keys? */
      44              :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
      45              :     hyperLogLogState full_card; /* Full key cardinality state */
      46              :     double      prop_card;      /* Required cardinality proportion */
      47              : } ByteaSortSupport;
      48              : 
      49              : /* Static function declarations for sort support */
      50              : static int  byteafastcmp(Datum x, Datum y, SortSupport ssup);
      51              : static Datum bytea_abbrev_convert(Datum original, SortSupport ssup);
      52              : static bool bytea_abbrev_abort(int memtupcount, SortSupport ssup);
      53              : 
      54              : /*
      55              :  * bytea_catenate
      56              :  *  Guts of byteacat(), broken out so it can be used by other functions
      57              :  *
      58              :  * Arguments can be in short-header form, but not compressed or out-of-line
      59              :  */
      60              : static bytea *
      61          779 : bytea_catenate(bytea *t1, bytea *t2)
      62              : {
      63              :     bytea      *result;
      64              :     int         len1,
      65              :                 len2,
      66              :                 len;
      67              :     char       *ptr;
      68              : 
      69          779 :     len1 = VARSIZE_ANY_EXHDR(t1);
      70          779 :     len2 = VARSIZE_ANY_EXHDR(t2);
      71              : 
      72              :     /* paranoia ... probably should throw error instead? */
      73          779 :     if (len1 < 0)
      74            0 :         len1 = 0;
      75          779 :     if (len2 < 0)
      76            0 :         len2 = 0;
      77              : 
      78          779 :     len = len1 + len2 + VARHDRSZ;
      79          779 :     result = (bytea *) palloc(len);
      80              : 
      81              :     /* Set size of result string... */
      82          779 :     SET_VARSIZE(result, len);
      83              : 
      84              :     /* Fill data field of result string... */
      85          779 :     ptr = VARDATA(result);
      86          779 :     if (len1 > 0)
      87          779 :         memcpy(ptr, VARDATA_ANY(t1), len1);
      88          779 :     if (len2 > 0)
      89          770 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
      90              : 
      91          779 :     return result;
      92              : }
      93              : 
      94              : #define PG_STR_GET_BYTEA(str_) \
      95              :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
      96              : 
      97              : static bytea *
      98         2101 : bytea_substring(Datum str,
      99              :                 int S,
     100              :                 int L,
     101              :                 bool length_not_specified)
     102              : {
     103              :     int32       S1;             /* adjusted start position */
     104              :     int32       L1;             /* adjusted substring length */
     105              :     int32       E;              /* end position */
     106              : 
     107              :     /*
     108              :      * The logic here should generally match text_substring().
     109              :      */
     110         2101 :     S1 = Max(S, 1);
     111              : 
     112         2101 :     if (length_not_specified)
     113              :     {
     114              :         /*
     115              :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
     116              :          * end of the string if we pass it a negative value for length.
     117              :          */
     118         1977 :         L1 = -1;
     119              :     }
     120          124 :     else if (L < 0)
     121              :     {
     122              :         /* SQL99 says to throw an error for E < S, i.e., negative length */
     123            6 :         ereport(ERROR,
     124              :                 (errcode(ERRCODE_SUBSTRING_ERROR),
     125              :                  errmsg("negative substring length not allowed")));
     126              :         L1 = -1;                /* silence stupider compilers */
     127              :     }
     128          118 :     else if (pg_add_s32_overflow(S, L, &E))
     129              :     {
     130              :         /*
     131              :          * L could be large enough for S + L to overflow, in which case the
     132              :          * substring must run to end of string.
     133              :          */
     134            3 :         L1 = -1;
     135              :     }
     136              :     else
     137              :     {
     138              :         /*
     139              :          * A zero or negative value for the end position can happen if the
     140              :          * start was negative or one. SQL99 says to return a zero-length
     141              :          * string.
     142              :          */
     143          115 :         if (E < 1)
     144            0 :             return PG_STR_GET_BYTEA("");
     145              : 
     146          115 :         L1 = E - S1;
     147              :     }
     148              : 
     149              :     /*
     150              :      * If the start position is past the end of the string, SQL99 says to
     151              :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
     152              :      * us.  We need only convert S1 to zero-based starting position.
     153              :      */
     154         2095 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
     155              : }
     156              : 
     157              : static bytea *
     158            9 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
     159              : {
     160              :     bytea      *result;
     161              :     bytea      *s1;
     162              :     bytea      *s2;
     163              :     int         sp_pl_sl;
     164              : 
     165              :     /*
     166              :      * Check for possible integer-overflow cases.  For negative sp, throw a
     167              :      * "substring length" error because that's what should be expected
     168              :      * according to the spec's definition of OVERLAY().
     169              :      */
     170            9 :     if (sp <= 0)
     171            0 :         ereport(ERROR,
     172              :                 (errcode(ERRCODE_SUBSTRING_ERROR),
     173              :                  errmsg("negative substring length not allowed")));
     174            9 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
     175            0 :         ereport(ERROR,
     176              :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
     177              :                  errmsg("integer out of range")));
     178              : 
     179            9 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
     180            9 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
     181            9 :     result = bytea_catenate(s1, t2);
     182            9 :     result = bytea_catenate(result, s2);
     183              : 
     184            9 :     return result;
     185              : }
     186              : 
     187              : /*****************************************************************************
     188              :  *   USER I/O ROUTINES                                                       *
     189              :  *****************************************************************************/
     190              : 
     191              : #define VAL(CH)         ((CH) - '0')
     192              : #define DIG(VAL)        ((VAL) + '0')
     193              : 
     194              : /*
     195              :  *      byteain         - converts from printable representation of byte array
     196              :  *
     197              :  *      Non-printable characters must be passed as '\nnn' (octal) and are
     198              :  *      converted to internal form.  '\' must be passed as '\\'.
     199              :  */
     200              : Datum
     201       693941 : byteain(PG_FUNCTION_ARGS)
     202              : {
     203       693941 :     char       *inputText = PG_GETARG_CSTRING(0);
     204       693941 :     Node       *escontext = fcinfo->context;
     205       693941 :     size_t      len = strlen(inputText);
     206              :     size_t      bc;
     207              :     char       *tp;
     208              :     char       *rp;
     209              :     bytea      *result;
     210              : 
     211              :     /* Recognize hex input */
     212       693941 :     if (inputText[0] == '\\' && inputText[1] == 'x')
     213              :     {
     214        56110 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
     215        56110 :         result = palloc(bc);
     216        56110 :         bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
     217              :                              escontext);
     218        56080 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
     219              : 
     220        56080 :         PG_RETURN_BYTEA_P(result);
     221              :     }
     222              : 
     223              :     /* Else, it's the traditional escaped style */
     224       637831 :     result = (bytea *) palloc(len + VARHDRSZ);  /* maximum possible length */
     225              : 
     226       637831 :     tp = inputText;
     227       637831 :     rp = VARDATA(result);
     228      4787485 :     while (*tp != '\0')
     229              :     {
     230      4149660 :         if (tp[0] != '\\')
     231      4149136 :             *rp++ = *tp++;
     232          524 :         else if ((tp[1] >= '0' && tp[1] <= '3') &&
     233          512 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     234          512 :                  (tp[3] >= '0' && tp[3] <= '7'))
     235          512 :         {
     236              :             int         v;
     237              : 
     238          512 :             v = VAL(tp[1]);
     239          512 :             v <<= 3;
     240          512 :             v += VAL(tp[2]);
     241          512 :             v <<= 3;
     242          512 :             *rp++ = v + VAL(tp[3]);
     243              : 
     244          512 :             tp += 4;
     245              :         }
     246           12 :         else if (tp[1] == '\\')
     247              :         {
     248            6 :             *rp++ = '\\';
     249            6 :             tp += 2;
     250              :         }
     251              :         else
     252              :         {
     253              :             /*
     254              :              * one backslash, not followed by another or ### valid octal
     255              :              */
     256            6 :             ereturn(escontext, (Datum) 0,
     257              :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     258              :                      errmsg("invalid input syntax for type %s", "bytea")));
     259              :         }
     260              :     }
     261              : 
     262       637825 :     bc = rp - VARDATA(result);  /* actual length */
     263       637825 :     SET_VARSIZE(result, bc + VARHDRSZ);
     264              : 
     265       637825 :     PG_RETURN_BYTEA_P(result);
     266              : }
     267              : 
     268              : /*
     269              :  *      byteaout        - converts to printable representation of byte array
     270              :  *
     271              :  *      In the traditional escaped format, non-printable characters are
     272              :  *      printed as '\nnn' (octal) and '\' as '\\'.
     273              :  */
     274              : Datum
     275       280631 : byteaout(PG_FUNCTION_ARGS)
     276              : {
     277       280631 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
     278              :     char       *result;
     279              :     char       *rp;
     280              : 
     281       280631 :     if (bytea_output == BYTEA_OUTPUT_HEX)
     282              :     {
     283              :         /* Print hex format */
     284       280436 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
     285       280436 :         *rp++ = '\\';
     286       280436 :         *rp++ = 'x';
     287       280436 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
     288              :     }
     289          195 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
     290              :     {
     291              :         /* Print traditional escaped format */
     292              :         char       *vp;
     293              :         uint64      len;
     294              :         int         i;
     295              : 
     296          195 :         len = 1;                /* empty string has 1 char */
     297          195 :         vp = VARDATA_ANY(vlena);
     298       108860 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     299              :         {
     300       108665 :             if (*vp == '\\')
     301            3 :                 len += 2;
     302       108662 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     303          249 :                 len += 4;
     304              :             else
     305       108413 :                 len++;
     306              :         }
     307              : 
     308              :         /*
     309              :          * In principle len can't overflow uint32 if the input fit in 1GB, but
     310              :          * for safety let's check rather than relying on palloc's internal
     311              :          * check.
     312              :          */
     313          195 :         if (len > MaxAllocSize)
     314            0 :             ereport(ERROR,
     315              :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     316              :                      errmsg_internal("result of bytea output conversion is too large")));
     317          195 :         rp = result = (char *) palloc(len);
     318              : 
     319          195 :         vp = VARDATA_ANY(vlena);
     320       108860 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     321              :         {
     322       108665 :             if (*vp == '\\')
     323              :             {
     324            3 :                 *rp++ = '\\';
     325            3 :                 *rp++ = '\\';
     326              :             }
     327       108662 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     328          249 :             {
     329              :                 int         val;    /* holds unprintable chars */
     330              : 
     331          249 :                 val = *vp;
     332          249 :                 rp[0] = '\\';
     333          249 :                 rp[3] = DIG(val & 07);
     334          249 :                 val >>= 3;
     335          249 :                 rp[2] = DIG(val & 07);
     336          249 :                 val >>= 3;
     337          249 :                 rp[1] = DIG(val & 03);
     338          249 :                 rp += 4;
     339              :             }
     340              :             else
     341       108413 :                 *rp++ = *vp;
     342              :         }
     343              :     }
     344              :     else
     345              :     {
     346            0 :         elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
     347              :              bytea_output);
     348              :         rp = result = NULL;     /* keep compiler quiet */
     349              :     }
     350       280631 :     *rp = '\0';
     351       280631 :     PG_RETURN_CSTRING(result);
     352              : }
     353              : 
     354              : /*
     355              :  *      bytearecv           - converts external binary format to bytea
     356              :  */
     357              : Datum
     358        53855 : bytearecv(PG_FUNCTION_ARGS)
     359              : {
     360        53855 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     361              :     bytea      *result;
     362              :     int         nbytes;
     363              : 
     364        53855 :     nbytes = buf->len - buf->cursor;
     365        53855 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
     366        53855 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
     367        53855 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
     368        53855 :     PG_RETURN_BYTEA_P(result);
     369              : }
     370              : 
     371              : /*
     372              :  *      byteasend           - converts bytea to binary format
     373              :  *
     374              :  * This is a special case: just copy the input...
     375              :  */
     376              : Datum
     377        34397 : byteasend(PG_FUNCTION_ARGS)
     378              : {
     379        34397 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
     380              : 
     381        34397 :     PG_RETURN_BYTEA_P(vlena);
     382              : }
     383              : 
     384              : Datum
     385       129387 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
     386              : {
     387              :     StringInfo  state;
     388              : 
     389       129387 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     390              : 
     391              :     /* Append the value unless null, preceding it with the delimiter. */
     392       129387 :     if (!PG_ARGISNULL(1))
     393              :     {
     394       121887 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
     395       121887 :         bool        isfirst = false;
     396              : 
     397              :         /*
     398              :          * You might think we can just throw away the first delimiter, however
     399              :          * we must keep it as we may be a parallel worker doing partial
     400              :          * aggregation building a state to send to the main process.  We need
     401              :          * to keep the delimiter of every aggregation so that the combine
     402              :          * function can properly join up the strings of two separately
     403              :          * partially aggregated results.  The first delimiter is only stripped
     404              :          * off in the final function.  To know how much to strip off the front
     405              :          * of the string, we store the length of the first delimiter in the
     406              :          * StringInfo's cursor field, which we don't otherwise need here.
     407              :          */
     408       121887 :         if (state == NULL)
     409              :         {
     410              :             MemoryContext aggcontext;
     411              :             MemoryContext oldcontext;
     412              : 
     413          104 :             if (!AggCheckCallContext(fcinfo, &aggcontext))
     414              :             {
     415              :                 /* cannot be called directly because of internal-type argument */
     416            0 :                 elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context");
     417              :             }
     418              : 
     419              :             /*
     420              :              * Create state in aggregate context.  It'll stay there across
     421              :              * subsequent calls.
     422              :              */
     423          104 :             oldcontext = MemoryContextSwitchTo(aggcontext);
     424          104 :             state = makeStringInfo();
     425          104 :             MemoryContextSwitchTo(oldcontext);
     426              : 
     427          104 :             isfirst = true;
     428              :         }
     429              : 
     430       121887 :         if (!PG_ARGISNULL(2))
     431              :         {
     432       121881 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
     433              : 
     434       121881 :             appendBinaryStringInfo(state, VARDATA_ANY(delim),
     435       121881 :                                    VARSIZE_ANY_EXHDR(delim));
     436       121881 :             if (isfirst)
     437          101 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
     438              :         }
     439              : 
     440       121887 :         appendBinaryStringInfo(state, VARDATA_ANY(value),
     441       121887 :                                VARSIZE_ANY_EXHDR(value));
     442              :     }
     443              : 
     444              :     /*
     445              :      * The transition type for string_agg() is declared to be "internal",
     446              :      * which is a pass-by-value type the same size as a pointer.
     447              :      */
     448       129387 :     if (state)
     449       129363 :         PG_RETURN_POINTER(state);
     450           24 :     PG_RETURN_NULL();
     451              : }
     452              : 
     453              : Datum
     454           77 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
     455              : {
     456              :     StringInfo  state;
     457              : 
     458              :     /* cannot be called directly because of internal-type argument */
     459              :     Assert(AggCheckCallContext(fcinfo, NULL));
     460              : 
     461           77 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     462              : 
     463           77 :     if (state != NULL)
     464              :     {
     465              :         /* As per comment in transfn, strip data before the cursor position */
     466              :         bytea      *result;
     467           74 :         int         strippedlen = state->len - state->cursor;
     468              : 
     469           74 :         result = (bytea *) palloc(strippedlen + VARHDRSZ);
     470           74 :         SET_VARSIZE(result, strippedlen + VARHDRSZ);
     471           74 :         memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
     472           74 :         PG_RETURN_BYTEA_P(result);
     473              :     }
     474              :     else
     475            3 :         PG_RETURN_NULL();
     476              : }
     477              : 
     478              : /*-------------------------------------------------------------
     479              :  * byteaoctetlen
     480              :  *
     481              :  * get the number of bytes contained in an instance of type 'bytea'
     482              :  *-------------------------------------------------------------
     483              :  */
     484              : Datum
     485          607 : byteaoctetlen(PG_FUNCTION_ARGS)
     486              : {
     487          607 :     Datum       str = PG_GETARG_DATUM(0);
     488              : 
     489              :     /* We need not detoast the input at all */
     490          607 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     491              : }
     492              : 
     493              : /*
     494              :  * byteacat -
     495              :  *    takes two bytea* and returns a bytea* that is the concatenation of
     496              :  *    the two.
     497              :  *
     498              :  * Cloned from textcat and modified as required.
     499              :  */
     500              : Datum
     501          761 : byteacat(PG_FUNCTION_ARGS)
     502              : {
     503          761 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     504          761 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     505              : 
     506          761 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
     507              : }
     508              : 
     509              : /*
     510              :  * byteaoverlay
     511              :  *  Replace specified substring of first string with second
     512              :  *
     513              :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
     514              :  * This code is a direct implementation of what the standard says.
     515              :  */
     516              : Datum
     517            3 : byteaoverlay(PG_FUNCTION_ARGS)
     518              : {
     519            3 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     520            3 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     521            3 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
     522            3 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
     523              : 
     524            3 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
     525              : }
     526              : 
     527              : Datum
     528            6 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
     529              : {
     530            6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     531            6 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     532            6 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
     533              :     int         sl;
     534              : 
     535            6 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
     536            6 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
     537              : }
     538              : 
     539              : /*
     540              :  * bytea_substr()
     541              :  * Return a substring starting at the specified position.
     542              :  * Cloned from text_substr and modified as required.
     543              :  *
     544              :  * Input:
     545              :  *  - string
     546              :  *  - starting position (is one-based)
     547              :  *  - string length (optional)
     548              :  *
     549              :  * If the starting position is zero or less, then return from the start of the string
     550              :  * adjusting the length to be consistent with the "negative start" per SQL.
     551              :  * If the length is less than zero, an ERROR is thrown. If no third argument
     552              :  * (length) is provided, the length to the end of the string is assumed.
     553              :  */
     554              : Datum
     555          115 : bytea_substr(PG_FUNCTION_ARGS)
     556              : {
     557          115 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
     558              :                                       PG_GETARG_INT32(1),
     559              :                                       PG_GETARG_INT32(2),
     560              :                                       false));
     561              : }
     562              : 
     563              : /*
     564              :  * bytea_substr_no_len -
     565              :  *    Wrapper to avoid opr_sanity failure due to
     566              :  *    one function accepting a different number of args.
     567              :  */
     568              : Datum
     569         1968 : bytea_substr_no_len(PG_FUNCTION_ARGS)
     570              : {
     571         1968 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
     572              :                                       PG_GETARG_INT32(1),
     573              :                                       -1,
     574              :                                       true));
     575              : }
     576              : 
     577              : /*
     578              :  * bit_count
     579              :  */
     580              : Datum
     581            3 : bytea_bit_count(PG_FUNCTION_ARGS)
     582              : {
     583            3 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     584              : 
     585            3 :     PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
     586              : }
     587              : 
     588              : /*
     589              :  * byteapos -
     590              :  *    Return the position of the specified substring.
     591              :  *    Implements the SQL POSITION() function.
     592              :  * Cloned from textpos and modified as required.
     593              :  */
     594              : Datum
     595           15 : byteapos(PG_FUNCTION_ARGS)
     596              : {
     597           15 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     598           15 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     599              :     int         pos;
     600              :     int         px,
     601              :                 p;
     602              :     int         len1,
     603              :                 len2;
     604              :     char       *p1,
     605              :                *p2;
     606              : 
     607           15 :     len1 = VARSIZE_ANY_EXHDR(t1);
     608           15 :     len2 = VARSIZE_ANY_EXHDR(t2);
     609              : 
     610           15 :     if (len2 <= 0)
     611            3 :         PG_RETURN_INT32(1);     /* result for empty pattern */
     612              : 
     613           12 :     p1 = VARDATA_ANY(t1);
     614           12 :     p2 = VARDATA_ANY(t2);
     615              : 
     616           12 :     pos = 0;
     617           12 :     px = (len1 - len2);
     618           27 :     for (p = 0; p <= px; p++)
     619              :     {
     620           21 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
     621              :         {
     622            6 :             pos = p + 1;
     623            6 :             break;
     624              :         };
     625           15 :         p1++;
     626              :     };
     627              : 
     628           12 :     PG_RETURN_INT32(pos);
     629              : }
     630              : 
     631              : /*-------------------------------------------------------------
     632              :  * byteaGetByte
     633              :  *
     634              :  * this routine treats "bytea" as an array of bytes.
     635              :  * It returns the Nth byte (a number between 0 and 255).
     636              :  *-------------------------------------------------------------
     637              :  */
     638              : Datum
     639           38 : byteaGetByte(PG_FUNCTION_ARGS)
     640              : {
     641           38 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
     642           38 :     int32       n = PG_GETARG_INT32(1);
     643              :     int         len;
     644              :     int         byte;
     645              : 
     646           38 :     len = VARSIZE_ANY_EXHDR(v);
     647              : 
     648           38 :     if (n < 0 || n >= len)
     649            3 :         ereport(ERROR,
     650              :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     651              :                  errmsg("index %d out of valid range, 0..%d",
     652              :                         n, len - 1)));
     653              : 
     654           35 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
     655              : 
     656           35 :     PG_RETURN_INT32(byte);
     657              : }
     658              : 
     659              : /*-------------------------------------------------------------
     660              :  * byteaGetBit
     661              :  *
     662              :  * This routine treats a "bytea" type like an array of bits.
     663              :  * It returns the value of the Nth bit (0 or 1).
     664              :  *
     665              :  *-------------------------------------------------------------
     666              :  */
     667              : Datum
     668            6 : byteaGetBit(PG_FUNCTION_ARGS)
     669              : {
     670            6 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
     671            6 :     int64       n = PG_GETARG_INT64(1);
     672              :     int         byteNo,
     673              :                 bitNo;
     674              :     int         len;
     675              :     int         byte;
     676              : 
     677            6 :     len = VARSIZE_ANY_EXHDR(v);
     678              : 
     679            6 :     if (n < 0 || n >= (int64) len * 8)
     680            3 :         ereport(ERROR,
     681              :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     682              :                  errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
     683              :                         n, (int64) len * 8 - 1)));
     684              : 
     685              :     /* n/8 is now known < len, so safe to cast to int */
     686            3 :     byteNo = (int) (n / 8);
     687            3 :     bitNo = (int) (n % 8);
     688              : 
     689            3 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
     690              : 
     691            3 :     if (byte & (1 << bitNo))
     692            3 :         PG_RETURN_INT32(1);
     693              :     else
     694            0 :         PG_RETURN_INT32(0);
     695              : }
     696              : 
     697              : /*-------------------------------------------------------------
     698              :  * byteaSetByte
     699              :  *
     700              :  * Given an instance of type 'bytea' creates a new one with
     701              :  * the Nth byte set to the given value.
     702              :  *
     703              :  *-------------------------------------------------------------
     704              :  */
     705              : Datum
     706            6 : byteaSetByte(PG_FUNCTION_ARGS)
     707              : {
     708            6 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
     709            6 :     int32       n = PG_GETARG_INT32(1);
     710            6 :     int32       newByte = PG_GETARG_INT32(2);
     711              :     int         len;
     712              : 
     713            6 :     len = VARSIZE(res) - VARHDRSZ;
     714              : 
     715            6 :     if (n < 0 || n >= len)
     716            3 :         ereport(ERROR,
     717              :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     718              :                  errmsg("index %d out of valid range, 0..%d",
     719              :                         n, len - 1)));
     720              : 
     721              :     /*
     722              :      * Now set the byte.
     723              :      */
     724            3 :     ((unsigned char *) VARDATA(res))[n] = newByte;
     725              : 
     726            3 :     PG_RETURN_BYTEA_P(res);
     727              : }
     728              : 
     729              : /*-------------------------------------------------------------
     730              :  * byteaSetBit
     731              :  *
     732              :  * Given an instance of type 'bytea' creates a new one with
     733              :  * the Nth bit set to the given value.
     734              :  *
     735              :  *-------------------------------------------------------------
     736              :  */
     737              : Datum
     738            6 : byteaSetBit(PG_FUNCTION_ARGS)
     739              : {
     740            6 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
     741            6 :     int64       n = PG_GETARG_INT64(1);
     742            6 :     int32       newBit = PG_GETARG_INT32(2);
     743              :     int         len;
     744              :     int         oldByte,
     745              :                 newByte;
     746              :     int         byteNo,
     747              :                 bitNo;
     748              : 
     749            6 :     len = VARSIZE(res) - VARHDRSZ;
     750              : 
     751            6 :     if (n < 0 || n >= (int64) len * 8)
     752            3 :         ereport(ERROR,
     753              :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     754              :                  errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
     755              :                         n, (int64) len * 8 - 1)));
     756              : 
     757              :     /* n/8 is now known < len, so safe to cast to int */
     758            3 :     byteNo = (int) (n / 8);
     759            3 :     bitNo = (int) (n % 8);
     760              : 
     761              :     /*
     762              :      * sanity check!
     763              :      */
     764            3 :     if (newBit != 0 && newBit != 1)
     765            0 :         ereport(ERROR,
     766              :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     767              :                  errmsg("new bit must be 0 or 1")));
     768              : 
     769              :     /*
     770              :      * Update the byte.
     771              :      */
     772            3 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
     773              : 
     774            3 :     if (newBit == 0)
     775            3 :         newByte = oldByte & (~(1 << bitNo));
     776              :     else
     777            0 :         newByte = oldByte | (1 << bitNo);
     778              : 
     779            3 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
     780              : 
     781            3 :     PG_RETURN_BYTEA_P(res);
     782              : }
     783              : 
     784              : /*
     785              :  * Return reversed bytea
     786              :  */
     787              : Datum
     788            9 : bytea_reverse(PG_FUNCTION_ARGS)
     789              : {
     790            9 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
     791            9 :     const char *p = VARDATA_ANY(v);
     792            9 :     int         len = VARSIZE_ANY_EXHDR(v);
     793            9 :     const char *endp = p + len;
     794            9 :     bytea      *result = palloc(len + VARHDRSZ);
     795            9 :     char       *dst = (char *) VARDATA(result) + len;
     796              : 
     797            9 :     SET_VARSIZE(result, len + VARHDRSZ);
     798              : 
     799           18 :     while (p < endp)
     800            9 :         *(--dst) = *p++;
     801              : 
     802            9 :     PG_RETURN_BYTEA_P(result);
     803              : }
     804              : 
     805              : 
     806              : /*****************************************************************************
     807              :  *  Comparison Functions used for bytea
     808              :  *
     809              :  * Note: btree indexes need these routines not to leak memory; therefore,
     810              :  * be careful to free working copies of toasted datums.  Most places don't
     811              :  * need to be so careful.
     812              :  *****************************************************************************/
     813              : 
     814              : Datum
     815         5479 : byteaeq(PG_FUNCTION_ARGS)
     816              : {
     817         5479 :     Datum       arg1 = PG_GETARG_DATUM(0);
     818         5479 :     Datum       arg2 = PG_GETARG_DATUM(1);
     819              :     bool        result;
     820              :     Size        len1,
     821              :                 len2;
     822              : 
     823              :     /*
     824              :      * We can use a fast path for unequal lengths, which might save us from
     825              :      * having to detoast one or both values.
     826              :      */
     827         5479 :     len1 = toast_raw_datum_size(arg1);
     828         5479 :     len2 = toast_raw_datum_size(arg2);
     829         5479 :     if (len1 != len2)
     830         2305 :         result = false;
     831              :     else
     832              :     {
     833         3174 :         bytea      *barg1 = DatumGetByteaPP(arg1);
     834         3174 :         bytea      *barg2 = DatumGetByteaPP(arg2);
     835              : 
     836         3174 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
     837              :                          len1 - VARHDRSZ) == 0);
     838              : 
     839         3174 :         PG_FREE_IF_COPY(barg1, 0);
     840         3174 :         PG_FREE_IF_COPY(barg2, 1);
     841              :     }
     842              : 
     843         5479 :     PG_RETURN_BOOL(result);
     844              : }
     845              : 
     846              : Datum
     847          384 : byteane(PG_FUNCTION_ARGS)
     848              : {
     849          384 :     Datum       arg1 = PG_GETARG_DATUM(0);
     850          384 :     Datum       arg2 = PG_GETARG_DATUM(1);
     851              :     bool        result;
     852              :     Size        len1,
     853              :                 len2;
     854              : 
     855              :     /*
     856              :      * We can use a fast path for unequal lengths, which might save us from
     857              :      * having to detoast one or both values.
     858              :      */
     859          384 :     len1 = toast_raw_datum_size(arg1);
     860          384 :     len2 = toast_raw_datum_size(arg2);
     861          384 :     if (len1 != len2)
     862            0 :         result = true;
     863              :     else
     864              :     {
     865          384 :         bytea      *barg1 = DatumGetByteaPP(arg1);
     866          384 :         bytea      *barg2 = DatumGetByteaPP(arg2);
     867              : 
     868          384 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
     869              :                          len1 - VARHDRSZ) != 0);
     870              : 
     871          384 :         PG_FREE_IF_COPY(barg1, 0);
     872          384 :         PG_FREE_IF_COPY(barg2, 1);
     873              :     }
     874              : 
     875          384 :     PG_RETURN_BOOL(result);
     876              : }
     877              : 
     878              : Datum
     879         4126 : bytealt(PG_FUNCTION_ARGS)
     880              : {
     881         4126 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     882         4126 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     883              :     int         len1,
     884              :                 len2;
     885              :     int         cmp;
     886              : 
     887         4126 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     888         4126 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     889              : 
     890         4126 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     891              : 
     892         4126 :     PG_FREE_IF_COPY(arg1, 0);
     893         4126 :     PG_FREE_IF_COPY(arg2, 1);
     894              : 
     895         4126 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
     896              : }
     897              : 
     898              : Datum
     899         3178 : byteale(PG_FUNCTION_ARGS)
     900              : {
     901         3178 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     902         3178 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     903              :     int         len1,
     904              :                 len2;
     905              :     int         cmp;
     906              : 
     907         3178 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     908         3178 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     909              : 
     910         3178 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     911              : 
     912         3178 :     PG_FREE_IF_COPY(arg1, 0);
     913         3178 :     PG_FREE_IF_COPY(arg2, 1);
     914              : 
     915         3178 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
     916              : }
     917              : 
     918              : Datum
     919         3136 : byteagt(PG_FUNCTION_ARGS)
     920              : {
     921         3136 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     922         3136 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     923              :     int         len1,
     924              :                 len2;
     925              :     int         cmp;
     926              : 
     927         3136 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     928         3136 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     929              : 
     930         3136 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     931              : 
     932         3136 :     PG_FREE_IF_COPY(arg1, 0);
     933         3136 :     PG_FREE_IF_COPY(arg2, 1);
     934              : 
     935         3136 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
     936              : }
     937              : 
     938              : Datum
     939         2527 : byteage(PG_FUNCTION_ARGS)
     940              : {
     941         2527 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     942         2527 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     943              :     int         len1,
     944              :                 len2;
     945              :     int         cmp;
     946              : 
     947         2527 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     948         2527 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     949              : 
     950         2527 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     951              : 
     952         2527 :     PG_FREE_IF_COPY(arg1, 0);
     953         2527 :     PG_FREE_IF_COPY(arg2, 1);
     954              : 
     955         2527 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
     956              : }
     957              : 
     958              : Datum
     959        47296 : byteacmp(PG_FUNCTION_ARGS)
     960              : {
     961        47296 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     962        47296 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     963              :     int         len1,
     964              :                 len2;
     965              :     int         cmp;
     966              : 
     967        47296 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     968        47296 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     969              : 
     970        47296 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     971        47296 :     if ((cmp == 0) && (len1 != len2))
     972         1700 :         cmp = (len1 < len2) ? -1 : 1;
     973              : 
     974        47296 :     PG_FREE_IF_COPY(arg1, 0);
     975        47296 :     PG_FREE_IF_COPY(arg2, 1);
     976              : 
     977        47296 :     PG_RETURN_INT32(cmp);
     978              : }
     979              : 
     980              : Datum
     981           12 : bytea_larger(PG_FUNCTION_ARGS)
     982              : {
     983           12 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     984           12 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     985              :     bytea      *result;
     986              :     int         len1,
     987              :                 len2;
     988              :     int         cmp;
     989              : 
     990           12 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     991           12 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     992              : 
     993           12 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     994           12 :     result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
     995              : 
     996           12 :     PG_RETURN_BYTEA_P(result);
     997              : }
     998              : 
     999              : Datum
    1000           12 : bytea_smaller(PG_FUNCTION_ARGS)
    1001              : {
    1002           12 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    1003           12 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    1004              :     bytea      *result;
    1005              :     int         len1,
    1006              :                 len2;
    1007              :     int         cmp;
    1008              : 
    1009           12 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1010           12 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1011              : 
    1012           12 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    1013           12 :     result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
    1014              : 
    1015           12 :     PG_RETURN_BYTEA_P(result);
    1016              : }
    1017              : 
    1018              : /*
    1019              :  * sortsupport comparison func
    1020              :  */
    1021              : static int
    1022        10132 : byteafastcmp(Datum x, Datum y, SortSupport ssup)
    1023              : {
    1024        10132 :     bytea      *arg1 = DatumGetByteaPP(x);
    1025        10132 :     bytea      *arg2 = DatumGetByteaPP(y);
    1026              :     char       *a1p,
    1027              :                *a2p;
    1028              :     int         len1,
    1029              :                 len2,
    1030              :                 result;
    1031              : 
    1032        10132 :     a1p = VARDATA_ANY(arg1);
    1033        10132 :     a2p = VARDATA_ANY(arg2);
    1034              : 
    1035        10132 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1036        10132 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1037              : 
    1038        10132 :     result = memcmp(a1p, a2p, Min(len1, len2));
    1039        10132 :     if ((result == 0) && (len1 != len2))
    1040          415 :         result = (len1 < len2) ? -1 : 1;
    1041              : 
    1042              :     /* We can't afford to leak memory here. */
    1043        10132 :     if (PointerGetDatum(arg1) != x)
    1044            0 :         pfree(arg1);
    1045        10132 :     if (PointerGetDatum(arg2) != y)
    1046            0 :         pfree(arg2);
    1047              : 
    1048        10132 :     return result;
    1049              : }
    1050              : 
    1051              : /*
    1052              :  * Conversion routine for sortsupport.  Converts original to abbreviated key
    1053              :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
    1054              :  * of the bytea data into a Datum (on little-endian machines, the bytes are
    1055              :  * stored in reverse order), and treat it as an unsigned integer.
    1056              :  */
    1057              : static Datum
    1058           12 : bytea_abbrev_convert(Datum original, SortSupport ssup)
    1059              : {
    1060           12 :     const size_t max_prefix_bytes = sizeof(Datum);
    1061           12 :     ByteaSortSupport *bss = (ByteaSortSupport *) ssup->ssup_extra;
    1062           12 :     bytea      *authoritative = DatumGetByteaPP(original);
    1063           12 :     char       *authoritative_data = VARDATA_ANY(authoritative);
    1064              :     Datum       res;
    1065              :     char       *pres;
    1066              :     int         len;
    1067              :     uint32      hash;
    1068              : 
    1069           12 :     pres = (char *) &res;
    1070              : 
    1071              :     /* memset(), so any non-overwritten bytes are NUL */
    1072           12 :     memset(pres, 0, max_prefix_bytes);
    1073           12 :     len = VARSIZE_ANY_EXHDR(authoritative);
    1074              : 
    1075              :     /*
    1076              :      * Short byteas will have terminating NUL bytes in the abbreviated datum.
    1077              :      * Abbreviated comparison need not make a distinction between these NUL
    1078              :      * bytes, and NUL bytes representing actual NULs in the authoritative
    1079              :      * representation.
    1080              :      *
    1081              :      * Hopefully a comparison at or past one abbreviated key's terminating NUL
    1082              :      * byte will resolve the comparison without consulting the authoritative
    1083              :      * representation; specifically, some later non-NUL byte in the longer
    1084              :      * bytea can resolve the comparison against a subsequent terminating NUL
    1085              :      * in the shorter bytea.  There will usually be what is effectively a
    1086              :      * "length-wise" resolution there and then.
    1087              :      *
    1088              :      * If that doesn't work out -- if all bytes in the longer bytea positioned
    1089              :      * at or past the offset of the smaller bytea (first) terminating NUL are
    1090              :      * actually representative of NUL bytes in the authoritative binary bytea
    1091              :      * (perhaps with some *terminating* NUL bytes towards the end of the
    1092              :      * longer bytea iff it happens to still be small) -- then an authoritative
    1093              :      * tie-breaker will happen, and do the right thing: explicitly consider
    1094              :      * bytea length.
    1095              :      */
    1096           12 :     memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
    1097              : 
    1098              :     /*
    1099              :      * Maintain approximate cardinality of both abbreviated keys and original,
    1100              :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
    1101              :      * the worst case, where we do many string abbreviations for no saving in
    1102              :      * full memcmp()-based comparisons.  These statistics are used by
    1103              :      * bytea_abbrev_abort().
    1104              :      *
    1105              :      * First, Hash key proper, or a significant fraction of it.  Mix in length
    1106              :      * in order to compensate for cases where differences are past
    1107              :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
    1108              :      */
    1109           12 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
    1110              :                                    Min(len, PG_CACHE_LINE_SIZE)));
    1111              : 
    1112           12 :     if (len > PG_CACHE_LINE_SIZE)
    1113            0 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
    1114              : 
    1115           12 :     addHyperLogLog(&bss->full_card, hash);
    1116              : 
    1117              :     /* Hash abbreviated key */
    1118              :     {
    1119              :         uint32      tmp;
    1120              : 
    1121           12 :         tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
    1122           12 :         hash = DatumGetUInt32(hash_uint32(tmp));
    1123              :     }
    1124              : 
    1125           12 :     addHyperLogLog(&bss->abbr_card, hash);
    1126              : 
    1127              :     /*
    1128              :      * Byteswap on little-endian machines.
    1129              :      *
    1130              :      * This is needed so that ssup_datum_unsigned_cmp() works correctly on all
    1131              :      * platforms.
    1132              :      */
    1133           12 :     res = DatumBigEndianToNative(res);
    1134              : 
    1135              :     /* Don't leak memory here */
    1136           12 :     if (PointerGetDatum(authoritative) != original)
    1137            0 :         pfree(authoritative);
    1138              : 
    1139           12 :     return res;
    1140              : }
    1141              : 
    1142              : /*
    1143              :  * Callback for estimating effectiveness of abbreviated key optimization, using
    1144              :  * heuristic rules.  Returns value indicating if the abbreviation optimization
    1145              :  * should be aborted, based on its projected effectiveness.
    1146              :  *
    1147              :  * This is based on varstr_abbrev_abort(), but some comments have been elided
    1148              :  * for brevity. See there for more details.
    1149              :  */
    1150              : static bool
    1151            0 : bytea_abbrev_abort(int memtupcount, SortSupport ssup)
    1152              : {
    1153            0 :     ByteaSortSupport *bss = (ByteaSortSupport *) ssup->ssup_extra;
    1154              :     double      abbrev_distinct,
    1155              :                 key_distinct;
    1156              : 
    1157              :     Assert(ssup->abbreviate);
    1158              : 
    1159              :     /* Have a little patience */
    1160            0 :     if (memtupcount < 100)
    1161            0 :         return false;
    1162              : 
    1163            0 :     abbrev_distinct = estimateHyperLogLog(&bss->abbr_card);
    1164            0 :     key_distinct = estimateHyperLogLog(&bss->full_card);
    1165              : 
    1166              :     /*
    1167              :      * Clamp cardinality estimates to at least one distinct value.  While
    1168              :      * NULLs are generally disregarded, if only NULL values were seen so far,
    1169              :      * that might misrepresent costs if we failed to clamp.
    1170              :      */
    1171            0 :     if (abbrev_distinct < 1.0)
    1172            0 :         abbrev_distinct = 1.0;
    1173              : 
    1174            0 :     if (key_distinct < 1.0)
    1175            0 :         key_distinct = 1.0;
    1176              : 
    1177            0 :     if (trace_sort)
    1178              :     {
    1179            0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
    1180              : 
    1181            0 :         elog(LOG, "bytea_abbrev: abbrev_distinct after %d: %f "
    1182              :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
    1183              :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
    1184              :              bss->prop_card);
    1185              :     }
    1186              : 
    1187              :     /*
    1188              :      * If the number of distinct abbreviated keys approximately matches the
    1189              :      * number of distinct original keys, continue with abbreviation.
    1190              :      */
    1191            0 :     if (abbrev_distinct > key_distinct * bss->prop_card)
    1192              :     {
    1193              :         /*
    1194              :          * Decay required cardinality aggressively after 10,000 tuples.
    1195              :          */
    1196            0 :         if (memtupcount > 10000)
    1197            0 :             bss->prop_card *= 0.65;
    1198              : 
    1199            0 :         return false;
    1200              :     }
    1201              : 
    1202              :     /*
    1203              :      * Abort abbreviation strategy.
    1204              :      */
    1205            0 :     if (trace_sort)
    1206            0 :         elog(LOG, "bytea_abbrev: aborted abbreviation at %d "
    1207              :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
    1208              :              memtupcount, abbrev_distinct, key_distinct, bss->prop_card);
    1209              : 
    1210            0 :     return true;
    1211              : }
    1212              : 
    1213              : Datum
    1214           72 : bytea_sortsupport(PG_FUNCTION_ARGS)
    1215              : {
    1216           72 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    1217              :     MemoryContext oldcontext;
    1218              : 
    1219           72 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    1220              : 
    1221           72 :     ssup->comparator = byteafastcmp;
    1222              : 
    1223              :     /*
    1224              :      * Set up abbreviation support if requested.
    1225              :      */
    1226           72 :     if (ssup->abbreviate)
    1227              :     {
    1228              :         ByteaSortSupport *bss;
    1229              : 
    1230           10 :         bss = palloc_object(ByteaSortSupport);
    1231           10 :         bss->abbreviate = true;
    1232           10 :         bss->prop_card = 0.20;
    1233           10 :         initHyperLogLog(&bss->abbr_card, 10);
    1234           10 :         initHyperLogLog(&bss->full_card, 10);
    1235              : 
    1236           10 :         ssup->ssup_extra = bss;
    1237           10 :         ssup->abbrev_full_comparator = ssup->comparator;
    1238           10 :         ssup->comparator = ssup_datum_unsigned_cmp;
    1239           10 :         ssup->abbrev_converter = bytea_abbrev_convert;
    1240           10 :         ssup->abbrev_abort = bytea_abbrev_abort;
    1241              :     }
    1242              : 
    1243           72 :     MemoryContextSwitchTo(oldcontext);
    1244              : 
    1245           72 :     PG_RETURN_VOID();
    1246              : }
    1247              : 
    1248              : /* Cast bytea -> int2 */
    1249              : Datum
    1250           18 : bytea_int2(PG_FUNCTION_ARGS)
    1251              : {
    1252           18 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    1253           18 :     int         len = VARSIZE_ANY_EXHDR(v);
    1254              :     uint16      result;
    1255              : 
    1256              :     /* Check that the byte array is not too long */
    1257           18 :     if (len > sizeof(result))
    1258            3 :         ereport(ERROR,
    1259              :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1260              :                 errmsg("smallint out of range"));
    1261              : 
    1262              :     /* Convert it to an integer; most significant bytes come first */
    1263           15 :     result = 0;
    1264           36 :     for (int i = 0; i < len; i++)
    1265              :     {
    1266           21 :         result <<= BITS_PER_BYTE;
    1267           21 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    1268              :     }
    1269              : 
    1270           15 :     PG_RETURN_INT16(result);
    1271              : }
    1272              : 
    1273              : /* Cast bytea -> int4 */
    1274              : Datum
    1275           18 : bytea_int4(PG_FUNCTION_ARGS)
    1276              : {
    1277           18 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    1278           18 :     int         len = VARSIZE_ANY_EXHDR(v);
    1279              :     uint32      result;
    1280              : 
    1281              :     /* Check that the byte array is not too long */
    1282           18 :     if (len > sizeof(result))
    1283            3 :         ereport(ERROR,
    1284              :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1285              :                 errmsg("integer out of range"));
    1286              : 
    1287              :     /* Convert it to an integer; most significant bytes come first */
    1288           15 :     result = 0;
    1289           54 :     for (int i = 0; i < len; i++)
    1290              :     {
    1291           39 :         result <<= BITS_PER_BYTE;
    1292           39 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    1293              :     }
    1294              : 
    1295           15 :     PG_RETURN_INT32(result);
    1296              : }
    1297              : 
    1298              : /* Cast bytea -> int8 */
    1299              : Datum
    1300           18 : bytea_int8(PG_FUNCTION_ARGS)
    1301              : {
    1302           18 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    1303           18 :     int         len = VARSIZE_ANY_EXHDR(v);
    1304              :     uint64      result;
    1305              : 
    1306              :     /* Check that the byte array is not too long */
    1307           18 :     if (len > sizeof(result))
    1308            3 :         ereport(ERROR,
    1309              :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1310              :                 errmsg("bigint out of range"));
    1311              : 
    1312              :     /* Convert it to an integer; most significant bytes come first */
    1313           15 :     result = 0;
    1314           90 :     for (int i = 0; i < len; i++)
    1315              :     {
    1316           75 :         result <<= BITS_PER_BYTE;
    1317           75 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    1318              :     }
    1319              : 
    1320           15 :     PG_RETURN_INT64(result);
    1321              : }
    1322              : 
    1323              : /* Cast int2 -> bytea; can just use int2send() */
    1324              : Datum
    1325            6 : int2_bytea(PG_FUNCTION_ARGS)
    1326              : {
    1327            6 :     return int2send(fcinfo);
    1328              : }
    1329              : 
    1330              : /* Cast int4 -> bytea; can just use int4send() */
    1331              : Datum
    1332        20486 : int4_bytea(PG_FUNCTION_ARGS)
    1333              : {
    1334        20486 :     return int4send(fcinfo);
    1335              : }
    1336              : 
    1337              : /* Cast int8 -> bytea; can just use int8send() */
    1338              : Datum
    1339            6 : int8_bytea(PG_FUNCTION_ARGS)
    1340              : {
    1341            6 :     return int8send(fcinfo);
    1342              : }
        

Generated by: LCOV version 2.0-1