LCOV - code coverage report
Current view: top level - src/backend/utils/adt - bytea.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 412 448 92.0 %
Date: 2025-12-23 14:18:26 Functions: 40 41 97.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * bytea.c
       4             :  *    Functions for the bytea type.
       5             :  *
       6             :  * Portions Copyright (c) 2025, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/utils/adt/bytea.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/detoast.h"
      18             : #include "common/hashfn.h"
      19             : #include "common/int.h"
      20             : #include "fmgr.h"
      21             : #include "lib/hyperloglog.h"
      22             : #include "libpq/pqformat.h"
      23             : #include "port/pg_bitutils.h"
      24             : #include "port/pg_bswap.h"
      25             : #include "utils/builtins.h"
      26             : #include "utils/bytea.h"
      27             : #include "utils/fmgrprotos.h"
      28             : #include "utils/guc.h"
      29             : #include "utils/memutils.h"
      30             : #include "utils/sortsupport.h"
      31             : #include "varatt.h"
      32             : 
      33             : /* GUC variable */
      34             : int         bytea_output = BYTEA_OUTPUT_HEX;
      35             : 
      36             : static bytea *bytea_catenate(bytea *t1, bytea *t2);
      37             : static bytea *bytea_substring(Datum str, int S, int L,
      38             :                               bool length_not_specified);
      39             : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
      40             : 
      41             : typedef struct
      42             : {
      43             :     bool        abbreviate;     /* Should we abbreviate keys? */
      44             :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
      45             :     hyperLogLogState full_card; /* Full key cardinality state */
      46             :     double      prop_card;      /* Required cardinality proportion */
      47             : } ByteaSortSupport;
      48             : 
      49             : /* Static function declarations for sort support */
      50             : static int  byteafastcmp(Datum x, Datum y, SortSupport ssup);
      51             : static Datum bytea_abbrev_convert(Datum original, SortSupport ssup);
      52             : static bool bytea_abbrev_abort(int memtupcount, SortSupport ssup);
      53             : 
      54             : /*
      55             :  * bytea_catenate
      56             :  *  Guts of byteacat(), broken out so it can be used by other functions
      57             :  *
      58             :  * Arguments can be in short-header form, but not compressed or out-of-line
      59             :  */
      60             : static bytea *
      61        1558 : bytea_catenate(bytea *t1, bytea *t2)
      62             : {
      63             :     bytea      *result;
      64             :     int         len1,
      65             :                 len2,
      66             :                 len;
      67             :     char       *ptr;
      68             : 
      69        1558 :     len1 = VARSIZE_ANY_EXHDR(t1);
      70        1558 :     len2 = VARSIZE_ANY_EXHDR(t2);
      71             : 
      72             :     /* paranoia ... probably should throw error instead? */
      73        1558 :     if (len1 < 0)
      74           0 :         len1 = 0;
      75        1558 :     if (len2 < 0)
      76           0 :         len2 = 0;
      77             : 
      78        1558 :     len = len1 + len2 + VARHDRSZ;
      79        1558 :     result = (bytea *) palloc(len);
      80             : 
      81             :     /* Set size of result string... */
      82        1558 :     SET_VARSIZE(result, len);
      83             : 
      84             :     /* Fill data field of result string... */
      85        1558 :     ptr = VARDATA(result);
      86        1558 :     if (len1 > 0)
      87        1558 :         memcpy(ptr, VARDATA_ANY(t1), len1);
      88        1558 :     if (len2 > 0)
      89        1540 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
      90             : 
      91        1558 :     return result;
      92             : }
      93             : 
      94             : #define PG_STR_GET_BYTEA(str_) \
      95             :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
      96             : 
      97             : static bytea *
      98        4058 : bytea_substring(Datum str,
      99             :                 int S,
     100             :                 int L,
     101             :                 bool length_not_specified)
     102             : {
     103             :     int32       S1;             /* adjusted start position */
     104             :     int32       L1;             /* adjusted substring length */
     105             :     int32       E;              /* end position */
     106             : 
     107             :     /*
     108             :      * The logic here should generally match text_substring().
     109             :      */
     110        4058 :     S1 = Max(S, 1);
     111             : 
     112        4058 :     if (length_not_specified)
     113             :     {
     114             :         /*
     115             :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
     116             :          * end of the string if we pass it a negative value for length.
     117             :          */
     118        3954 :         L1 = -1;
     119             :     }
     120         104 :     else if (L < 0)
     121             :     {
     122             :         /* SQL99 says to throw an error for E < S, i.e., negative length */
     123          12 :         ereport(ERROR,
     124             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
     125             :                  errmsg("negative substring length not allowed")));
     126             :         L1 = -1;                /* silence stupider compilers */
     127             :     }
     128          92 :     else if (pg_add_s32_overflow(S, L, &E))
     129             :     {
     130             :         /*
     131             :          * L could be large enough for S + L to overflow, in which case the
     132             :          * substring must run to end of string.
     133             :          */
     134           6 :         L1 = -1;
     135             :     }
     136             :     else
     137             :     {
     138             :         /*
     139             :          * A zero or negative value for the end position can happen if the
     140             :          * start was negative or one. SQL99 says to return a zero-length
     141             :          * string.
     142             :          */
     143          86 :         if (E < 1)
     144           0 :             return PG_STR_GET_BYTEA("");
     145             : 
     146          86 :         L1 = E - S1;
     147             :     }
     148             : 
     149             :     /*
     150             :      * If the start position is past the end of the string, SQL99 says to
     151             :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
     152             :      * us.  We need only convert S1 to zero-based starting position.
     153             :      */
     154        4046 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
     155             : }
     156             : 
     157             : static bytea *
     158          18 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
     159             : {
     160             :     bytea      *result;
     161             :     bytea      *s1;
     162             :     bytea      *s2;
     163             :     int         sp_pl_sl;
     164             : 
     165             :     /*
     166             :      * Check for possible integer-overflow cases.  For negative sp, throw a
     167             :      * "substring length" error because that's what should be expected
     168             :      * according to the spec's definition of OVERLAY().
     169             :      */
     170          18 :     if (sp <= 0)
     171           0 :         ereport(ERROR,
     172             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
     173             :                  errmsg("negative substring length not allowed")));
     174          18 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
     175           0 :         ereport(ERROR,
     176             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
     177             :                  errmsg("integer out of range")));
     178             : 
     179          18 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
     180          18 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
     181          18 :     result = bytea_catenate(s1, t2);
     182          18 :     result = bytea_catenate(result, s2);
     183             : 
     184          18 :     return result;
     185             : }
     186             : 
     187             : /*****************************************************************************
     188             :  *   USER I/O ROUTINES                                                       *
     189             :  *****************************************************************************/
     190             : 
     191             : #define VAL(CH)         ((CH) - '0')
     192             : #define DIG(VAL)        ((VAL) + '0')
     193             : 
     194             : /*
     195             :  *      byteain         - converts from printable representation of byte array
     196             :  *
     197             :  *      Non-printable characters must be passed as '\nnn' (octal) and are
     198             :  *      converted to internal form.  '\' must be passed as '\\'.
     199             :  */
     200             : Datum
     201     1387674 : byteain(PG_FUNCTION_ARGS)
     202             : {
     203     1387674 :     char       *inputText = PG_GETARG_CSTRING(0);
     204     1387674 :     Node       *escontext = fcinfo->context;
     205     1387674 :     size_t      len = strlen(inputText);
     206             :     size_t      bc;
     207             :     char       *tp;
     208             :     char       *rp;
     209             :     bytea      *result;
     210             : 
     211             :     /* Recognize hex input */
     212     1387674 :     if (inputText[0] == '\\' && inputText[1] == 'x')
     213             :     {
     214      111874 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
     215      111874 :         result = palloc(bc);
     216      111874 :         bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
     217             :                              escontext);
     218      111814 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
     219             : 
     220      111814 :         PG_RETURN_BYTEA_P(result);
     221             :     }
     222             : 
     223             :     /* Else, it's the traditional escaped style */
     224     1275800 :     result = (bytea *) palloc(len + VARHDRSZ);  /* maximum possible length */
     225             : 
     226     1275800 :     tp = inputText;
     227     1275800 :     rp = VARDATA(result);
     228     9573702 :     while (*tp != '\0')
     229             :     {
     230     8297914 :         if (tp[0] != '\\')
     231     8296866 :             *rp++ = *tp++;
     232        1048 :         else if ((tp[1] >= '0' && tp[1] <= '3') &&
     233        1024 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     234        1024 :                  (tp[3] >= '0' && tp[3] <= '7'))
     235        1024 :         {
     236             :             int         v;
     237             : 
     238        1024 :             v = VAL(tp[1]);
     239        1024 :             v <<= 3;
     240        1024 :             v += VAL(tp[2]);
     241        1024 :             v <<= 3;
     242        1024 :             *rp++ = v + VAL(tp[3]);
     243             : 
     244        1024 :             tp += 4;
     245             :         }
     246          24 :         else if (tp[1] == '\\')
     247             :         {
     248          12 :             *rp++ = '\\';
     249          12 :             tp += 2;
     250             :         }
     251             :         else
     252             :         {
     253             :             /*
     254             :              * one backslash, not followed by another or ### valid octal
     255             :              */
     256          12 :             ereturn(escontext, (Datum) 0,
     257             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     258             :                      errmsg("invalid input syntax for type %s", "bytea")));
     259             :         }
     260             :     }
     261             : 
     262     1275788 :     bc = rp - VARDATA(result);  /* actual length */
     263     1275788 :     SET_VARSIZE(result, bc + VARHDRSZ);
     264             : 
     265     1275788 :     PG_RETURN_BYTEA_P(result);
     266             : }
     267             : 
     268             : /*
     269             :  *      byteaout        - converts to printable representation of byte array
     270             :  *
     271             :  *      In the traditional escaped format, non-printable characters are
     272             :  *      printed as '\nnn' (octal) and '\' as '\\'.
     273             :  */
     274             : Datum
     275      560570 : byteaout(PG_FUNCTION_ARGS)
     276             : {
     277      560570 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
     278             :     char       *result;
     279             :     char       *rp;
     280             : 
     281      560570 :     if (bytea_output == BYTEA_OUTPUT_HEX)
     282             :     {
     283             :         /* Print hex format */
     284      560180 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
     285      560180 :         *rp++ = '\\';
     286      560180 :         *rp++ = 'x';
     287      560180 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
     288             :     }
     289         390 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
     290             :     {
     291             :         /* Print traditional escaped format */
     292             :         char       *vp;
     293             :         uint64      len;
     294             :         int         i;
     295             : 
     296         390 :         len = 1;                /* empty string has 1 char */
     297         390 :         vp = VARDATA_ANY(vlena);
     298      217720 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     299             :         {
     300      217330 :             if (*vp == '\\')
     301           6 :                 len += 2;
     302      217324 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     303         498 :                 len += 4;
     304             :             else
     305      216826 :                 len++;
     306             :         }
     307             : 
     308             :         /*
     309             :          * In principle len can't overflow uint32 if the input fit in 1GB, but
     310             :          * for safety let's check rather than relying on palloc's internal
     311             :          * check.
     312             :          */
     313         390 :         if (len > MaxAllocSize)
     314           0 :             ereport(ERROR,
     315             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     316             :                      errmsg_internal("result of bytea output conversion is too large")));
     317         390 :         rp = result = (char *) palloc(len);
     318             : 
     319         390 :         vp = VARDATA_ANY(vlena);
     320      217720 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     321             :         {
     322      217330 :             if (*vp == '\\')
     323             :             {
     324           6 :                 *rp++ = '\\';
     325           6 :                 *rp++ = '\\';
     326             :             }
     327      217324 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     328         498 :             {
     329             :                 int         val;    /* holds unprintable chars */
     330             : 
     331         498 :                 val = *vp;
     332         498 :                 rp[0] = '\\';
     333         498 :                 rp[3] = DIG(val & 07);
     334         498 :                 val >>= 3;
     335         498 :                 rp[2] = DIG(val & 07);
     336         498 :                 val >>= 3;
     337         498 :                 rp[1] = DIG(val & 03);
     338         498 :                 rp += 4;
     339             :             }
     340             :             else
     341      216826 :                 *rp++ = *vp;
     342             :         }
     343             :     }
     344             :     else
     345             :     {
     346           0 :         elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
     347             :              bytea_output);
     348             :         rp = result = NULL;     /* keep compiler quiet */
     349             :     }
     350      560570 :     *rp = '\0';
     351      560570 :     PG_RETURN_CSTRING(result);
     352             : }
     353             : 
     354             : /*
     355             :  *      bytearecv           - converts external binary format to bytea
     356             :  */
     357             : Datum
     358      107710 : bytearecv(PG_FUNCTION_ARGS)
     359             : {
     360      107710 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     361             :     bytea      *result;
     362             :     int         nbytes;
     363             : 
     364      107710 :     nbytes = buf->len - buf->cursor;
     365      107710 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
     366      107710 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
     367      107710 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
     368      107710 :     PG_RETURN_BYTEA_P(result);
     369             : }
     370             : 
     371             : /*
     372             :  *      byteasend           - converts bytea to binary format
     373             :  *
     374             :  * This is a special case: just copy the input...
     375             :  */
     376             : Datum
     377       68826 : byteasend(PG_FUNCTION_ARGS)
     378             : {
     379       68826 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
     380             : 
     381       68826 :     PG_RETURN_BYTEA_P(vlena);
     382             : }
     383             : 
     384             : Datum
     385      258774 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
     386             : {
     387             :     StringInfo  state;
     388             : 
     389      258774 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     390             : 
     391             :     /* Append the value unless null, preceding it with the delimiter. */
     392      258774 :     if (!PG_ARGISNULL(1))
     393             :     {
     394      243774 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
     395      243774 :         bool        isfirst = false;
     396             : 
     397             :         /*
     398             :          * You might think we can just throw away the first delimiter, however
     399             :          * we must keep it as we may be a parallel worker doing partial
     400             :          * aggregation building a state to send to the main process.  We need
     401             :          * to keep the delimiter of every aggregation so that the combine
     402             :          * function can properly join up the strings of two separately
     403             :          * partially aggregated results.  The first delimiter is only stripped
     404             :          * off in the final function.  To know how much to strip off the front
     405             :          * of the string, we store the length of the first delimiter in the
     406             :          * StringInfo's cursor field, which we don't otherwise need here.
     407             :          */
     408      243774 :         if (state == NULL)
     409             :         {
     410             :             MemoryContext aggcontext;
     411             :             MemoryContext oldcontext;
     412             : 
     413         188 :             if (!AggCheckCallContext(fcinfo, &aggcontext))
     414             :             {
     415             :                 /* cannot be called directly because of internal-type argument */
     416           0 :                 elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context");
     417             :             }
     418             : 
     419             :             /*
     420             :              * Create state in aggregate context.  It'll stay there across
     421             :              * subsequent calls.
     422             :              */
     423         188 :             oldcontext = MemoryContextSwitchTo(aggcontext);
     424         188 :             state = makeStringInfo();
     425         188 :             MemoryContextSwitchTo(oldcontext);
     426             : 
     427         188 :             isfirst = true;
     428             :         }
     429             : 
     430      243774 :         if (!PG_ARGISNULL(2))
     431             :         {
     432      243762 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
     433             : 
     434      243762 :             appendBinaryStringInfo(state, VARDATA_ANY(delim),
     435      243762 :                                    VARSIZE_ANY_EXHDR(delim));
     436      243762 :             if (isfirst)
     437         182 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
     438             :         }
     439             : 
     440      243774 :         appendBinaryStringInfo(state, VARDATA_ANY(value),
     441      243774 :                                VARSIZE_ANY_EXHDR(value));
     442             :     }
     443             : 
     444             :     /*
     445             :      * The transition type for string_agg() is declared to be "internal",
     446             :      * which is a pass-by-value type the same size as a pointer.
     447             :      */
     448      258774 :     if (state)
     449      258728 :         PG_RETURN_POINTER(state);
     450          46 :     PG_RETURN_NULL();
     451             : }
     452             : 
     453             : Datum
     454         154 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
     455             : {
     456             :     StringInfo  state;
     457             : 
     458             :     /* cannot be called directly because of internal-type argument */
     459             :     Assert(AggCheckCallContext(fcinfo, NULL));
     460             : 
     461         154 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     462             : 
     463         154 :     if (state != NULL)
     464             :     {
     465             :         /* As per comment in transfn, strip data before the cursor position */
     466             :         bytea      *result;
     467         148 :         int         strippedlen = state->len - state->cursor;
     468             : 
     469         148 :         result = (bytea *) palloc(strippedlen + VARHDRSZ);
     470         148 :         SET_VARSIZE(result, strippedlen + VARHDRSZ);
     471         148 :         memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
     472         148 :         PG_RETURN_BYTEA_P(result);
     473             :     }
     474             :     else
     475           6 :         PG_RETURN_NULL();
     476             : }
     477             : 
     478             : /*-------------------------------------------------------------
     479             :  * byteaoctetlen
     480             :  *
     481             :  * get the number of bytes contained in an instance of type 'bytea'
     482             :  *-------------------------------------------------------------
     483             :  */
     484             : Datum
     485         650 : byteaoctetlen(PG_FUNCTION_ARGS)
     486             : {
     487         650 :     Datum       str = PG_GETARG_DATUM(0);
     488             : 
     489             :     /* We need not detoast the input at all */
     490         650 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     491             : }
     492             : 
     493             : /*
     494             :  * byteacat -
     495             :  *    takes two bytea* and returns a bytea* that is the concatenation of
     496             :  *    the two.
     497             :  *
     498             :  * Cloned from textcat and modified as required.
     499             :  */
     500             : Datum
     501        1522 : byteacat(PG_FUNCTION_ARGS)
     502             : {
     503        1522 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     504        1522 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     505             : 
     506        1522 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
     507             : }
     508             : 
     509             : /*
     510             :  * byteaoverlay
     511             :  *  Replace specified substring of first string with second
     512             :  *
     513             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
     514             :  * This code is a direct implementation of what the standard says.
     515             :  */
     516             : Datum
     517           6 : byteaoverlay(PG_FUNCTION_ARGS)
     518             : {
     519           6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     520           6 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     521           6 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
     522           6 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
     523             : 
     524           6 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
     525             : }
     526             : 
     527             : Datum
     528          12 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
     529             : {
     530          12 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     531          12 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     532          12 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
     533             :     int         sl;
     534             : 
     535          12 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
     536          12 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
     537             : }
     538             : 
     539             : /*
     540             :  * bytea_substr()
     541             :  * Return a substring starting at the specified position.
     542             :  * Cloned from text_substr and modified as required.
     543             :  *
     544             :  * Input:
     545             :  *  - string
     546             :  *  - starting position (is one-based)
     547             :  *  - string length (optional)
     548             :  *
     549             :  * If the starting position is zero or less, then return from the start of the string
     550             :  * adjusting the length to be consistent with the "negative start" per SQL.
     551             :  * If the length is less than zero, an ERROR is thrown. If no third argument
     552             :  * (length) is provided, the length to the end of the string is assumed.
     553             :  */
     554             : Datum
     555          86 : bytea_substr(PG_FUNCTION_ARGS)
     556             : {
     557          86 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
     558             :                                       PG_GETARG_INT32(1),
     559             :                                       PG_GETARG_INT32(2),
     560             :                                       false));
     561             : }
     562             : 
     563             : /*
     564             :  * bytea_substr_no_len -
     565             :  *    Wrapper to avoid opr_sanity failure due to
     566             :  *    one function accepting a different number of args.
     567             :  */
     568             : Datum
     569        3936 : bytea_substr_no_len(PG_FUNCTION_ARGS)
     570             : {
     571        3936 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
     572             :                                       PG_GETARG_INT32(1),
     573             :                                       -1,
     574             :                                       true));
     575             : }
     576             : 
     577             : /*
     578             :  * bit_count
     579             :  */
     580             : Datum
     581           6 : bytea_bit_count(PG_FUNCTION_ARGS)
     582             : {
     583           6 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     584             : 
     585           6 :     PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
     586             : }
     587             : 
     588             : /*
     589             :  * byteapos -
     590             :  *    Return the position of the specified substring.
     591             :  *    Implements the SQL POSITION() function.
     592             :  * Cloned from textpos and modified as required.
     593             :  */
     594             : Datum
     595          30 : byteapos(PG_FUNCTION_ARGS)
     596             : {
     597          30 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
     598          30 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
     599             :     int         pos;
     600             :     int         px,
     601             :                 p;
     602             :     int         len1,
     603             :                 len2;
     604             :     char       *p1,
     605             :                *p2;
     606             : 
     607          30 :     len1 = VARSIZE_ANY_EXHDR(t1);
     608          30 :     len2 = VARSIZE_ANY_EXHDR(t2);
     609             : 
     610          30 :     if (len2 <= 0)
     611           6 :         PG_RETURN_INT32(1);     /* result for empty pattern */
     612             : 
     613          24 :     p1 = VARDATA_ANY(t1);
     614          24 :     p2 = VARDATA_ANY(t2);
     615             : 
     616          24 :     pos = 0;
     617          24 :     px = (len1 - len2);
     618          54 :     for (p = 0; p <= px; p++)
     619             :     {
     620          42 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
     621             :         {
     622          12 :             pos = p + 1;
     623          12 :             break;
     624             :         };
     625          30 :         p1++;
     626             :     };
     627             : 
     628          24 :     PG_RETURN_INT32(pos);
     629             : }
     630             : 
     631             : /*-------------------------------------------------------------
     632             :  * byteaGetByte
     633             :  *
     634             :  * this routine treats "bytea" as an array of bytes.
     635             :  * It returns the Nth byte (a number between 0 and 255).
     636             :  *-------------------------------------------------------------
     637             :  */
     638             : Datum
     639          76 : byteaGetByte(PG_FUNCTION_ARGS)
     640             : {
     641          76 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
     642          76 :     int32       n = PG_GETARG_INT32(1);
     643             :     int         len;
     644             :     int         byte;
     645             : 
     646          76 :     len = VARSIZE_ANY_EXHDR(v);
     647             : 
     648          76 :     if (n < 0 || n >= len)
     649           6 :         ereport(ERROR,
     650             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     651             :                  errmsg("index %d out of valid range, 0..%d",
     652             :                         n, len - 1)));
     653             : 
     654          70 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
     655             : 
     656          70 :     PG_RETURN_INT32(byte);
     657             : }
     658             : 
     659             : /*-------------------------------------------------------------
     660             :  * byteaGetBit
     661             :  *
     662             :  * This routine treats a "bytea" type like an array of bits.
     663             :  * It returns the value of the Nth bit (0 or 1).
     664             :  *
     665             :  *-------------------------------------------------------------
     666             :  */
     667             : Datum
     668          12 : byteaGetBit(PG_FUNCTION_ARGS)
     669             : {
     670          12 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
     671          12 :     int64       n = PG_GETARG_INT64(1);
     672             :     int         byteNo,
     673             :                 bitNo;
     674             :     int         len;
     675             :     int         byte;
     676             : 
     677          12 :     len = VARSIZE_ANY_EXHDR(v);
     678             : 
     679          12 :     if (n < 0 || n >= (int64) len * 8)
     680           6 :         ereport(ERROR,
     681             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     682             :                  errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
     683             :                         n, (int64) len * 8 - 1)));
     684             : 
     685             :     /* n/8 is now known < len, so safe to cast to int */
     686           6 :     byteNo = (int) (n / 8);
     687           6 :     bitNo = (int) (n % 8);
     688             : 
     689           6 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
     690             : 
     691           6 :     if (byte & (1 << bitNo))
     692           6 :         PG_RETURN_INT32(1);
     693             :     else
     694           0 :         PG_RETURN_INT32(0);
     695             : }
     696             : 
     697             : /*-------------------------------------------------------------
     698             :  * byteaSetByte
     699             :  *
     700             :  * Given an instance of type 'bytea' creates a new one with
     701             :  * the Nth byte set to the given value.
     702             :  *
     703             :  *-------------------------------------------------------------
     704             :  */
     705             : Datum
     706          12 : byteaSetByte(PG_FUNCTION_ARGS)
     707             : {
     708          12 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
     709          12 :     int32       n = PG_GETARG_INT32(1);
     710          12 :     int32       newByte = PG_GETARG_INT32(2);
     711             :     int         len;
     712             : 
     713          12 :     len = VARSIZE(res) - VARHDRSZ;
     714             : 
     715          12 :     if (n < 0 || n >= len)
     716           6 :         ereport(ERROR,
     717             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     718             :                  errmsg("index %d out of valid range, 0..%d",
     719             :                         n, len - 1)));
     720             : 
     721             :     /*
     722             :      * Now set the byte.
     723             :      */
     724           6 :     ((unsigned char *) VARDATA(res))[n] = newByte;
     725             : 
     726           6 :     PG_RETURN_BYTEA_P(res);
     727             : }
     728             : 
     729             : /*-------------------------------------------------------------
     730             :  * byteaSetBit
     731             :  *
     732             :  * Given an instance of type 'bytea' creates a new one with
     733             :  * the Nth bit set to the given value.
     734             :  *
     735             :  *-------------------------------------------------------------
     736             :  */
     737             : Datum
     738          12 : byteaSetBit(PG_FUNCTION_ARGS)
     739             : {
     740          12 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
     741          12 :     int64       n = PG_GETARG_INT64(1);
     742          12 :     int32       newBit = PG_GETARG_INT32(2);
     743             :     int         len;
     744             :     int         oldByte,
     745             :                 newByte;
     746             :     int         byteNo,
     747             :                 bitNo;
     748             : 
     749          12 :     len = VARSIZE(res) - VARHDRSZ;
     750             : 
     751          12 :     if (n < 0 || n >= (int64) len * 8)
     752           6 :         ereport(ERROR,
     753             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
     754             :                  errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
     755             :                         n, (int64) len * 8 - 1)));
     756             : 
     757             :     /* n/8 is now known < len, so safe to cast to int */
     758           6 :     byteNo = (int) (n / 8);
     759           6 :     bitNo = (int) (n % 8);
     760             : 
     761             :     /*
     762             :      * sanity check!
     763             :      */
     764           6 :     if (newBit != 0 && newBit != 1)
     765           0 :         ereport(ERROR,
     766             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     767             :                  errmsg("new bit must be 0 or 1")));
     768             : 
     769             :     /*
     770             :      * Update the byte.
     771             :      */
     772           6 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
     773             : 
     774           6 :     if (newBit == 0)
     775           6 :         newByte = oldByte & (~(1 << bitNo));
     776             :     else
     777           0 :         newByte = oldByte | (1 << bitNo);
     778             : 
     779           6 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
     780             : 
     781           6 :     PG_RETURN_BYTEA_P(res);
     782             : }
     783             : 
     784             : /*
     785             :  * Return reversed bytea
     786             :  */
     787             : Datum
     788          18 : bytea_reverse(PG_FUNCTION_ARGS)
     789             : {
     790          18 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
     791          18 :     const char *p = VARDATA_ANY(v);
     792          18 :     int         len = VARSIZE_ANY_EXHDR(v);
     793          18 :     const char *endp = p + len;
     794          18 :     bytea      *result = palloc(len + VARHDRSZ);
     795          18 :     char       *dst = (char *) VARDATA(result) + len;
     796             : 
     797          18 :     SET_VARSIZE(result, len + VARHDRSZ);
     798             : 
     799          36 :     while (p < endp)
     800          18 :         *(--dst) = *p++;
     801             : 
     802          18 :     PG_RETURN_BYTEA_P(result);
     803             : }
     804             : 
     805             : 
     806             : /*****************************************************************************
     807             :  *  Comparison Functions used for bytea
     808             :  *
     809             :  * Note: btree indexes need these routines not to leak memory; therefore,
     810             :  * be careful to free working copies of toasted datums.  Most places don't
     811             :  * need to be so careful.
     812             :  *****************************************************************************/
     813             : 
     814             : Datum
     815       10478 : byteaeq(PG_FUNCTION_ARGS)
     816             : {
     817       10478 :     Datum       arg1 = PG_GETARG_DATUM(0);
     818       10478 :     Datum       arg2 = PG_GETARG_DATUM(1);
     819             :     bool        result;
     820             :     Size        len1,
     821             :                 len2;
     822             : 
     823             :     /*
     824             :      * We can use a fast path for unequal lengths, which might save us from
     825             :      * having to detoast one or both values.
     826             :      */
     827       10478 :     len1 = toast_raw_datum_size(arg1);
     828       10478 :     len2 = toast_raw_datum_size(arg2);
     829       10478 :     if (len1 != len2)
     830        4400 :         result = false;
     831             :     else
     832             :     {
     833        6078 :         bytea      *barg1 = DatumGetByteaPP(arg1);
     834        6078 :         bytea      *barg2 = DatumGetByteaPP(arg2);
     835             : 
     836        6078 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
     837             :                          len1 - VARHDRSZ) == 0);
     838             : 
     839        6078 :         PG_FREE_IF_COPY(barg1, 0);
     840        6078 :         PG_FREE_IF_COPY(barg2, 1);
     841             :     }
     842             : 
     843       10478 :     PG_RETURN_BOOL(result);
     844             : }
     845             : 
     846             : Datum
     847         768 : byteane(PG_FUNCTION_ARGS)
     848             : {
     849         768 :     Datum       arg1 = PG_GETARG_DATUM(0);
     850         768 :     Datum       arg2 = PG_GETARG_DATUM(1);
     851             :     bool        result;
     852             :     Size        len1,
     853             :                 len2;
     854             : 
     855             :     /*
     856             :      * We can use a fast path for unequal lengths, which might save us from
     857             :      * having to detoast one or both values.
     858             :      */
     859         768 :     len1 = toast_raw_datum_size(arg1);
     860         768 :     len2 = toast_raw_datum_size(arg2);
     861         768 :     if (len1 != len2)
     862           0 :         result = true;
     863             :     else
     864             :     {
     865         768 :         bytea      *barg1 = DatumGetByteaPP(arg1);
     866         768 :         bytea      *barg2 = DatumGetByteaPP(arg2);
     867             : 
     868         768 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
     869             :                          len1 - VARHDRSZ) != 0);
     870             : 
     871         768 :         PG_FREE_IF_COPY(barg1, 0);
     872         768 :         PG_FREE_IF_COPY(barg2, 1);
     873             :     }
     874             : 
     875         768 :     PG_RETURN_BOOL(result);
     876             : }
     877             : 
     878             : Datum
     879        8252 : bytealt(PG_FUNCTION_ARGS)
     880             : {
     881        8252 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     882        8252 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     883             :     int         len1,
     884             :                 len2;
     885             :     int         cmp;
     886             : 
     887        8252 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     888        8252 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     889             : 
     890        8252 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     891             : 
     892        8252 :     PG_FREE_IF_COPY(arg1, 0);
     893        8252 :     PG_FREE_IF_COPY(arg2, 1);
     894             : 
     895        8252 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
     896             : }
     897             : 
     898             : Datum
     899        6356 : byteale(PG_FUNCTION_ARGS)
     900             : {
     901        6356 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     902        6356 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     903             :     int         len1,
     904             :                 len2;
     905             :     int         cmp;
     906             : 
     907        6356 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     908        6356 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     909             : 
     910        6356 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     911             : 
     912        6356 :     PG_FREE_IF_COPY(arg1, 0);
     913        6356 :     PG_FREE_IF_COPY(arg2, 1);
     914             : 
     915        6356 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
     916             : }
     917             : 
     918             : Datum
     919        6272 : byteagt(PG_FUNCTION_ARGS)
     920             : {
     921        6272 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     922        6272 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     923             :     int         len1,
     924             :                 len2;
     925             :     int         cmp;
     926             : 
     927        6272 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     928        6272 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     929             : 
     930        6272 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     931             : 
     932        6272 :     PG_FREE_IF_COPY(arg1, 0);
     933        6272 :     PG_FREE_IF_COPY(arg2, 1);
     934             : 
     935        6272 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
     936             : }
     937             : 
     938             : Datum
     939        5054 : byteage(PG_FUNCTION_ARGS)
     940             : {
     941        5054 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     942        5054 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     943             :     int         len1,
     944             :                 len2;
     945             :     int         cmp;
     946             : 
     947        5054 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     948        5054 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     949             : 
     950        5054 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     951             : 
     952        5054 :     PG_FREE_IF_COPY(arg1, 0);
     953        5054 :     PG_FREE_IF_COPY(arg2, 1);
     954             : 
     955        5054 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
     956             : }
     957             : 
     958             : Datum
     959       94592 : byteacmp(PG_FUNCTION_ARGS)
     960             : {
     961       94592 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     962       94592 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     963             :     int         len1,
     964             :                 len2;
     965             :     int         cmp;
     966             : 
     967       94592 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     968       94592 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     969             : 
     970       94592 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     971       94592 :     if ((cmp == 0) && (len1 != len2))
     972        3400 :         cmp = (len1 < len2) ? -1 : 1;
     973             : 
     974       94592 :     PG_FREE_IF_COPY(arg1, 0);
     975       94592 :     PG_FREE_IF_COPY(arg2, 1);
     976             : 
     977       94592 :     PG_RETURN_INT32(cmp);
     978             : }
     979             : 
     980             : Datum
     981          24 : bytea_larger(PG_FUNCTION_ARGS)
     982             : {
     983          24 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
     984          24 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
     985             :     bytea      *result;
     986             :     int         len1,
     987             :                 len2;
     988             :     int         cmp;
     989             : 
     990          24 :     len1 = VARSIZE_ANY_EXHDR(arg1);
     991          24 :     len2 = VARSIZE_ANY_EXHDR(arg2);
     992             : 
     993          24 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
     994          24 :     result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
     995             : 
     996          24 :     PG_RETURN_BYTEA_P(result);
     997             : }
     998             : 
     999             : Datum
    1000          24 : bytea_smaller(PG_FUNCTION_ARGS)
    1001             : {
    1002          24 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    1003          24 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    1004             :     bytea      *result;
    1005             :     int         len1,
    1006             :                 len2;
    1007             :     int         cmp;
    1008             : 
    1009          24 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1010          24 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1011             : 
    1012          24 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    1013          24 :     result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
    1014             : 
    1015          24 :     PG_RETURN_BYTEA_P(result);
    1016             : }
    1017             : 
    1018             : /*
    1019             :  * sortsupport comparison func
    1020             :  */
    1021             : static int
    1022       12016 : byteafastcmp(Datum x, Datum y, SortSupport ssup)
    1023             : {
    1024       12016 :     bytea      *arg1 = DatumGetByteaPP(x);
    1025       12016 :     bytea      *arg2 = DatumGetByteaPP(y);
    1026             :     char       *a1p,
    1027             :                *a2p;
    1028             :     int         len1,
    1029             :                 len2,
    1030             :                 result;
    1031             : 
    1032       12016 :     a1p = VARDATA_ANY(arg1);
    1033       12016 :     a2p = VARDATA_ANY(arg2);
    1034             : 
    1035       12016 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1036       12016 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1037             : 
    1038       12016 :     result = memcmp(a1p, a2p, Min(len1, len2));
    1039       12016 :     if ((result == 0) && (len1 != len2))
    1040         558 :         result = (len1 < len2) ? -1 : 1;
    1041             : 
    1042             :     /* We can't afford to leak memory here. */
    1043       12016 :     if (PointerGetDatum(arg1) != x)
    1044           0 :         pfree(arg1);
    1045       12016 :     if (PointerGetDatum(arg2) != y)
    1046           0 :         pfree(arg2);
    1047             : 
    1048       12016 :     return result;
    1049             : }
    1050             : 
    1051             : /*
    1052             :  * Conversion routine for sortsupport.  Converts original to abbreviated key
    1053             :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
    1054             :  * of the bytea data into a Datum (on little-endian machines, the bytes are
    1055             :  * stored in reverse order), and treat it as an unsigned integer.
    1056             :  */
    1057             : static Datum
    1058          24 : bytea_abbrev_convert(Datum original, SortSupport ssup)
    1059             : {
    1060          24 :     const size_t max_prefix_bytes = sizeof(Datum);
    1061          24 :     ByteaSortSupport *bss = (ByteaSortSupport *) ssup->ssup_extra;
    1062          24 :     bytea      *authoritative = DatumGetByteaPP(original);
    1063          24 :     char       *authoritative_data = VARDATA_ANY(authoritative);
    1064             :     Datum       res;
    1065             :     char       *pres;
    1066             :     int         len;
    1067             :     uint32      hash;
    1068             : 
    1069          24 :     pres = (char *) &res;
    1070             : 
    1071             :     /* memset(), so any non-overwritten bytes are NUL */
    1072          24 :     memset(pres, 0, max_prefix_bytes);
    1073          24 :     len = VARSIZE_ANY_EXHDR(authoritative);
    1074             : 
    1075             :     /*
    1076             :      * Short byteas will have terminating NUL bytes in the abbreviated datum.
    1077             :      * Abbreviated comparison need not make a distinction between these NUL
    1078             :      * bytes, and NUL bytes representing actual NULs in the authoritative
    1079             :      * representation.
    1080             :      *
    1081             :      * Hopefully a comparison at or past one abbreviated key's terminating NUL
    1082             :      * byte will resolve the comparison without consulting the authoritative
    1083             :      * representation; specifically, some later non-NUL byte in the longer
    1084             :      * bytea can resolve the comparison against a subsequent terminating NUL
    1085             :      * in the shorter bytea.  There will usually be what is effectively a
    1086             :      * "length-wise" resolution there and then.
    1087             :      *
    1088             :      * If that doesn't work out -- if all bytes in the longer bytea positioned
    1089             :      * at or past the offset of the smaller bytea (first) terminating NUL are
    1090             :      * actually representative of NUL bytes in the authoritative binary bytea
    1091             :      * (perhaps with some *terminating* NUL bytes towards the end of the
    1092             :      * longer bytea iff it happens to still be small) -- then an authoritative
    1093             :      * tie-breaker will happen, and do the right thing: explicitly consider
    1094             :      * bytea length.
    1095             :      */
    1096          24 :     memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
    1097             : 
    1098             :     /*
    1099             :      * Maintain approximate cardinality of both abbreviated keys and original,
    1100             :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
    1101             :      * the worst case, where we do many string abbreviations for no saving in
    1102             :      * full memcmp()-based comparisons.  These statistics are used by
    1103             :      * bytea_abbrev_abort().
    1104             :      *
    1105             :      * First, Hash key proper, or a significant fraction of it.  Mix in length
    1106             :      * in order to compensate for cases where differences are past
    1107             :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
    1108             :      */
    1109          24 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
    1110             :                                    Min(len, PG_CACHE_LINE_SIZE)));
    1111             : 
    1112          24 :     if (len > PG_CACHE_LINE_SIZE)
    1113           0 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
    1114             : 
    1115          24 :     addHyperLogLog(&bss->full_card, hash);
    1116             : 
    1117             :     /* Hash abbreviated key */
    1118             :     {
    1119             :         uint32      tmp;
    1120             : 
    1121          24 :         tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
    1122          24 :         hash = DatumGetUInt32(hash_uint32(tmp));
    1123             :     }
    1124             : 
    1125          24 :     addHyperLogLog(&bss->abbr_card, hash);
    1126             : 
    1127             :     /*
    1128             :      * Byteswap on little-endian machines.
    1129             :      *
    1130             :      * This is needed so that ssup_datum_unsigned_cmp() works correctly on all
    1131             :      * platforms.
    1132             :      */
    1133          24 :     res = DatumBigEndianToNative(res);
    1134             : 
    1135             :     /* Don't leak memory here */
    1136          24 :     if (PointerGetDatum(authoritative) != original)
    1137           0 :         pfree(authoritative);
    1138             : 
    1139          24 :     return res;
    1140             : }
    1141             : 
    1142             : /*
    1143             :  * Callback for estimating effectiveness of abbreviated key optimization, using
    1144             :  * heuristic rules.  Returns value indicating if the abbreviation optimization
    1145             :  * should be aborted, based on its projected effectiveness.
    1146             :  *
    1147             :  * This is based on varstr_abbrev_abort(), but some comments have been elided
    1148             :  * for brevity. See there for more details.
    1149             :  */
    1150             : static bool
    1151           0 : bytea_abbrev_abort(int memtupcount, SortSupport ssup)
    1152             : {
    1153           0 :     ByteaSortSupport *bss = (ByteaSortSupport *) ssup->ssup_extra;
    1154             :     double      abbrev_distinct,
    1155             :                 key_distinct;
    1156             : 
    1157             :     Assert(ssup->abbreviate);
    1158             : 
    1159             :     /* Have a little patience */
    1160           0 :     if (memtupcount < 100)
    1161           0 :         return false;
    1162             : 
    1163           0 :     abbrev_distinct = estimateHyperLogLog(&bss->abbr_card);
    1164           0 :     key_distinct = estimateHyperLogLog(&bss->full_card);
    1165             : 
    1166             :     /*
    1167             :      * Clamp cardinality estimates to at least one distinct value.  While
    1168             :      * NULLs are generally disregarded, if only NULL values were seen so far,
    1169             :      * that might misrepresent costs if we failed to clamp.
    1170             :      */
    1171           0 :     if (abbrev_distinct < 1.0)
    1172           0 :         abbrev_distinct = 1.0;
    1173             : 
    1174           0 :     if (key_distinct < 1.0)
    1175           0 :         key_distinct = 1.0;
    1176             : 
    1177           0 :     if (trace_sort)
    1178             :     {
    1179           0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
    1180             : 
    1181           0 :         elog(LOG, "bytea_abbrev: abbrev_distinct after %d: %f "
    1182             :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
    1183             :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
    1184             :              bss->prop_card);
    1185             :     }
    1186             : 
    1187             :     /*
    1188             :      * If the number of distinct abbreviated keys approximately matches the
    1189             :      * number of distinct original keys, continue with abbreviation.
    1190             :      */
    1191           0 :     if (abbrev_distinct > key_distinct * bss->prop_card)
    1192             :     {
    1193             :         /*
    1194             :          * Decay required cardinality aggressively after 10,000 tuples.
    1195             :          */
    1196           0 :         if (memtupcount > 10000)
    1197           0 :             bss->prop_card *= 0.65;
    1198             : 
    1199           0 :         return false;
    1200             :     }
    1201             : 
    1202             :     /*
    1203             :      * Abort abbreviation strategy.
    1204             :      */
    1205           0 :     if (trace_sort)
    1206           0 :         elog(LOG, "bytea_abbrev: aborted abbreviation at %d "
    1207             :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
    1208             :              memtupcount, abbrev_distinct, key_distinct, bss->prop_card);
    1209             : 
    1210           0 :     return true;
    1211             : }
    1212             : 
    1213             : Datum
    1214          54 : bytea_sortsupport(PG_FUNCTION_ARGS)
    1215             : {
    1216          54 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    1217             :     MemoryContext oldcontext;
    1218             : 
    1219          54 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    1220             : 
    1221          54 :     ssup->comparator = byteafastcmp;
    1222             : 
    1223             :     /*
    1224             :      * Set up abbreviation support if requested.
    1225             :      */
    1226          54 :     if (ssup->abbreviate)
    1227             :     {
    1228             :         ByteaSortSupport *bss;
    1229             : 
    1230          20 :         bss = palloc_object(ByteaSortSupport);
    1231          20 :         bss->abbreviate = true;
    1232          20 :         bss->prop_card = 0.20;
    1233          20 :         initHyperLogLog(&bss->abbr_card, 10);
    1234          20 :         initHyperLogLog(&bss->full_card, 10);
    1235             : 
    1236          20 :         ssup->ssup_extra = bss;
    1237          20 :         ssup->abbrev_full_comparator = ssup->comparator;
    1238          20 :         ssup->comparator = ssup_datum_unsigned_cmp;
    1239          20 :         ssup->abbrev_converter = bytea_abbrev_convert;
    1240          20 :         ssup->abbrev_abort = bytea_abbrev_abort;
    1241             :     }
    1242             : 
    1243          54 :     MemoryContextSwitchTo(oldcontext);
    1244             : 
    1245          54 :     PG_RETURN_VOID();
    1246             : }
    1247             : 
    1248             : /* Cast bytea -> int2 */
    1249             : Datum
    1250          36 : bytea_int2(PG_FUNCTION_ARGS)
    1251             : {
    1252          36 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    1253          36 :     int         len = VARSIZE_ANY_EXHDR(v);
    1254             :     uint16      result;
    1255             : 
    1256             :     /* Check that the byte array is not too long */
    1257          36 :     if (len > sizeof(result))
    1258           6 :         ereport(ERROR,
    1259             :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1260             :                 errmsg("smallint out of range"));
    1261             : 
    1262             :     /* Convert it to an integer; most significant bytes come first */
    1263          30 :     result = 0;
    1264          72 :     for (int i = 0; i < len; i++)
    1265             :     {
    1266          42 :         result <<= BITS_PER_BYTE;
    1267          42 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    1268             :     }
    1269             : 
    1270          30 :     PG_RETURN_INT16(result);
    1271             : }
    1272             : 
    1273             : /* Cast bytea -> int4 */
    1274             : Datum
    1275          36 : bytea_int4(PG_FUNCTION_ARGS)
    1276             : {
    1277          36 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    1278          36 :     int         len = VARSIZE_ANY_EXHDR(v);
    1279             :     uint32      result;
    1280             : 
    1281             :     /* Check that the byte array is not too long */
    1282          36 :     if (len > sizeof(result))
    1283           6 :         ereport(ERROR,
    1284             :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1285             :                 errmsg("integer out of range"));
    1286             : 
    1287             :     /* Convert it to an integer; most significant bytes come first */
    1288          30 :     result = 0;
    1289         108 :     for (int i = 0; i < len; i++)
    1290             :     {
    1291          78 :         result <<= BITS_PER_BYTE;
    1292          78 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    1293             :     }
    1294             : 
    1295          30 :     PG_RETURN_INT32(result);
    1296             : }
    1297             : 
    1298             : /* Cast bytea -> int8 */
    1299             : Datum
    1300          36 : bytea_int8(PG_FUNCTION_ARGS)
    1301             : {
    1302          36 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    1303          36 :     int         len = VARSIZE_ANY_EXHDR(v);
    1304             :     uint64      result;
    1305             : 
    1306             :     /* Check that the byte array is not too long */
    1307          36 :     if (len > sizeof(result))
    1308           6 :         ereport(ERROR,
    1309             :                 errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1310             :                 errmsg("bigint out of range"));
    1311             : 
    1312             :     /* Convert it to an integer; most significant bytes come first */
    1313          30 :     result = 0;
    1314         180 :     for (int i = 0; i < len; i++)
    1315             :     {
    1316         150 :         result <<= BITS_PER_BYTE;
    1317         150 :         result |= ((unsigned char *) VARDATA_ANY(v))[i];
    1318             :     }
    1319             : 
    1320          30 :     PG_RETURN_INT64(result);
    1321             : }
    1322             : 
    1323             : /* Cast int2 -> bytea; can just use int2send() */
    1324             : Datum
    1325          12 : int2_bytea(PG_FUNCTION_ARGS)
    1326             : {
    1327          12 :     return int2send(fcinfo);
    1328             : }
    1329             : 
    1330             : /* Cast int4 -> bytea; can just use int4send() */
    1331             : Datum
    1332       40972 : int4_bytea(PG_FUNCTION_ARGS)
    1333             : {
    1334       40972 :     return int4send(fcinfo);
    1335             : }
    1336             : 
    1337             : /* Cast int8 -> bytea; can just use int8send() */
    1338             : Datum
    1339          12 : int8_bytea(PG_FUNCTION_ARGS)
    1340             : {
    1341          12 :     return int8send(fcinfo);
    1342             : }

Generated by: LCOV version 1.16