LCOV - code coverage report
Current view: top level - src/backend/utils/adt - uuid.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 127 144 88.2 %
Date: 2023-12-11 14:10:49 Functions: 18 20 90.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * uuid.c
       4             :  *    Functions for the built-in type "uuid".
       5             :  *
       6             :  * Copyright (c) 2007-2023, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/adt/uuid.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : 
      14             : #include "postgres.h"
      15             : 
      16             : #include "common/hashfn.h"
      17             : #include "lib/hyperloglog.h"
      18             : #include "libpq/pqformat.h"
      19             : #include "port/pg_bswap.h"
      20             : #include "utils/builtins.h"
      21             : #include "utils/guc.h"
      22             : #include "utils/sortsupport.h"
      23             : #include "utils/uuid.h"
      24             : 
      25             : /* sortsupport for uuid */
      26             : typedef struct
      27             : {
      28             :     int64       input_count;    /* number of non-null values seen */
      29             :     bool        estimating;     /* true if estimating cardinality */
      30             : 
      31             :     hyperLogLogState abbr_card; /* cardinality estimator */
      32             : } uuid_sortsupport_state;
      33             : 
      34             : static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext);
      35             : static int  uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
      36             : static int  uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
      37             : static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
      38             : static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
      39             : 
      40             : Datum
      41      585848 : uuid_in(PG_FUNCTION_ARGS)
      42             : {
      43      585848 :     char       *uuid_str = PG_GETARG_CSTRING(0);
      44             :     pg_uuid_t  *uuid;
      45             : 
      46      585848 :     uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
      47      585848 :     string_to_uuid(uuid_str, uuid, fcinfo->context);
      48      585812 :     PG_RETURN_UUID_P(uuid);
      49             : }
      50             : 
      51             : Datum
      52        4662 : uuid_out(PG_FUNCTION_ARGS)
      53             : {
      54        4662 :     pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
      55             :     static const char hex_chars[] = "0123456789abcdef";
      56             :     StringInfoData buf;
      57             :     int         i;
      58             : 
      59        4662 :     initStringInfo(&buf);
      60       79254 :     for (i = 0; i < UUID_LEN; i++)
      61             :     {
      62             :         int         hi;
      63             :         int         lo;
      64             : 
      65             :         /*
      66             :          * We print uuid values as a string of 8, 4, 4, 4, and then 12
      67             :          * hexadecimal characters, with each group is separated by a hyphen
      68             :          * ("-"). Therefore, add the hyphens at the appropriate places here.
      69             :          */
      70       74592 :         if (i == 4 || i == 6 || i == 8 || i == 10)
      71       18648 :             appendStringInfoChar(&buf, '-');
      72             : 
      73       74592 :         hi = uuid->data[i] >> 4;
      74       74592 :         lo = uuid->data[i] & 0x0F;
      75             : 
      76       74592 :         appendStringInfoChar(&buf, hex_chars[hi]);
      77       74592 :         appendStringInfoChar(&buf, hex_chars[lo]);
      78             :     }
      79             : 
      80        4662 :     PG_RETURN_CSTRING(buf.data);
      81             : }
      82             : 
      83             : /*
      84             :  * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
      85             :  * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
      86             :  * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
      87             :  * digits, is the only one used for output.)
      88             :  */
      89             : static void
      90      585848 : string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext)
      91             : {
      92      585848 :     const char *src = source;
      93      585848 :     bool        braces = false;
      94             :     int         i;
      95             : 
      96      585848 :     if (src[0] == '{')
      97             :     {
      98          24 :         src++;
      99          24 :         braces = true;
     100             :     }
     101             : 
     102     9959002 :     for (i = 0; i < UUID_LEN; i++)
     103             :     {
     104             :         char        str_buf[3];
     105             : 
     106     9373190 :         if (src[0] == '\0' || src[1] == '\0')
     107          36 :             goto syntax_error;
     108     9373178 :         memcpy(str_buf, src, 2);
     109     9373178 :         if (!isxdigit((unsigned char) str_buf[0]) ||
     110     9373166 :             !isxdigit((unsigned char) str_buf[1]))
     111          24 :             goto syntax_error;
     112             : 
     113     9373154 :         str_buf[2] = '\0';
     114     9373154 :         uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
     115     9373154 :         src += 2;
     116     9373154 :         if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
     117     1935218 :             src++;
     118             :     }
     119             : 
     120      585812 :     if (braces)
     121             :     {
     122          18 :         if (*src != '}')
     123           6 :             goto syntax_error;
     124          12 :         src++;
     125             :     }
     126             : 
     127      585806 :     if (*src != '\0')
     128           6 :         goto syntax_error;
     129             : 
     130      585800 :     return;
     131             : 
     132          48 : syntax_error:
     133          48 :     ereturn(escontext,,
     134             :             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     135             :              errmsg("invalid input syntax for type %s: \"%s\"",
     136             :                     "uuid", source)));
     137             : }
     138             : 
     139             : Datum
     140           0 : uuid_recv(PG_FUNCTION_ARGS)
     141             : {
     142           0 :     StringInfo  buffer = (StringInfo) PG_GETARG_POINTER(0);
     143             :     pg_uuid_t  *uuid;
     144             : 
     145           0 :     uuid = (pg_uuid_t *) palloc(UUID_LEN);
     146           0 :     memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
     147           0 :     PG_RETURN_POINTER(uuid);
     148             : }
     149             : 
     150             : Datum
     151           0 : uuid_send(PG_FUNCTION_ARGS)
     152             : {
     153           0 :     pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
     154             :     StringInfoData buffer;
     155             : 
     156           0 :     pq_begintypsend(&buffer);
     157           0 :     pq_sendbytes(&buffer, uuid->data, UUID_LEN);
     158           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
     159             : }
     160             : 
     161             : /* internal uuid compare function */
     162             : static int
     163    41857528 : uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
     164             : {
     165    41857528 :     return memcmp(arg1->data, arg2->data, UUID_LEN);
     166             : }
     167             : 
     168             : Datum
     169       84558 : uuid_lt(PG_FUNCTION_ARGS)
     170             : {
     171       84558 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     172       84558 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     173             : 
     174       84558 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
     175             : }
     176             : 
     177             : Datum
     178       17046 : uuid_le(PG_FUNCTION_ARGS)
     179             : {
     180       17046 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     181       17046 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     182             : 
     183       17046 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
     184             : }
     185             : 
     186             : Datum
     187      154438 : uuid_eq(PG_FUNCTION_ARGS)
     188             : {
     189      154438 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     190      154438 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     191             : 
     192      154438 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
     193             : }
     194             : 
     195             : Datum
     196       12920 : uuid_ge(PG_FUNCTION_ARGS)
     197             : {
     198       12920 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     199       12920 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     200             : 
     201       12920 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
     202             : }
     203             : 
     204             : Datum
     205       16290 : uuid_gt(PG_FUNCTION_ARGS)
     206             : {
     207       16290 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     208       16290 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     209             : 
     210       16290 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
     211             : }
     212             : 
     213             : Datum
     214          18 : uuid_ne(PG_FUNCTION_ARGS)
     215             : {
     216          18 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     217          18 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     218             : 
     219          18 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
     220             : }
     221             : 
     222             : /* handler for btree index operator */
     223             : Datum
     224        9272 : uuid_cmp(PG_FUNCTION_ARGS)
     225             : {
     226        9272 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     227        9272 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     228             : 
     229        9272 :     PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
     230             : }
     231             : 
     232             : /*
     233             :  * Sort support strategy routine
     234             :  */
     235             : Datum
     236         354 : uuid_sortsupport(PG_FUNCTION_ARGS)
     237             : {
     238         354 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
     239             : 
     240         354 :     ssup->comparator = uuid_fast_cmp;
     241         354 :     ssup->ssup_extra = NULL;
     242             : 
     243         354 :     if (ssup->abbreviate)
     244             :     {
     245             :         uuid_sortsupport_state *uss;
     246             :         MemoryContext oldcontext;
     247             : 
     248         278 :         oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
     249             : 
     250         278 :         uss = palloc(sizeof(uuid_sortsupport_state));
     251         278 :         uss->input_count = 0;
     252         278 :         uss->estimating = true;
     253         278 :         initHyperLogLog(&uss->abbr_card, 10);
     254             : 
     255         278 :         ssup->ssup_extra = uss;
     256             : 
     257         278 :         ssup->comparator = ssup_datum_unsigned_cmp;
     258         278 :         ssup->abbrev_converter = uuid_abbrev_convert;
     259         278 :         ssup->abbrev_abort = uuid_abbrev_abort;
     260         278 :         ssup->abbrev_full_comparator = uuid_fast_cmp;
     261             : 
     262         278 :         MemoryContextSwitchTo(oldcontext);
     263             :     }
     264             : 
     265         354 :     PG_RETURN_VOID();
     266             : }
     267             : 
     268             : /*
     269             :  * SortSupport comparison func
     270             :  */
     271             : static int
     272    41562986 : uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
     273             : {
     274    41562986 :     pg_uuid_t  *arg1 = DatumGetUUIDP(x);
     275    41562986 :     pg_uuid_t  *arg2 = DatumGetUUIDP(y);
     276             : 
     277    41562986 :     return uuid_internal_cmp(arg1, arg2);
     278             : }
     279             : 
     280             : /*
     281             :  * Callback for estimating effectiveness of abbreviated key optimization.
     282             :  *
     283             :  * We pay no attention to the cardinality of the non-abbreviated data, because
     284             :  * there is no equality fast-path within authoritative uuid comparator.
     285             :  */
     286             : static bool
     287        2298 : uuid_abbrev_abort(int memtupcount, SortSupport ssup)
     288             : {
     289        2298 :     uuid_sortsupport_state *uss = ssup->ssup_extra;
     290             :     double      abbr_card;
     291             : 
     292        2298 :     if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
     293        2106 :         return false;
     294             : 
     295         192 :     abbr_card = estimateHyperLogLog(&uss->abbr_card);
     296             : 
     297             :     /*
     298             :      * If we have >100k distinct values, then even if we were sorting many
     299             :      * billion rows we'd likely still break even, and the penalty of undoing
     300             :      * that many rows of abbrevs would probably not be worth it.  Stop even
     301             :      * counting at that point.
     302             :      */
     303         192 :     if (abbr_card > 100000.0)
     304             :     {
     305             : #ifdef TRACE_SORT
     306           0 :         if (trace_sort)
     307           0 :             elog(LOG,
     308             :                  "uuid_abbrev: estimation ends at cardinality %f"
     309             :                  " after " INT64_FORMAT " values (%d rows)",
     310             :                  abbr_card, uss->input_count, memtupcount);
     311             : #endif
     312           0 :         uss->estimating = false;
     313           0 :         return false;
     314             :     }
     315             : 
     316             :     /*
     317             :      * Target minimum cardinality is 1 per ~2k of non-null inputs.  0.5 row
     318             :      * fudge factor allows us to abort earlier on genuinely pathological data
     319             :      * where we've had exactly one abbreviated value in the first 2k
     320             :      * (non-null) rows.
     321             :      */
     322         192 :     if (abbr_card < uss->input_count / 2000.0 + 0.5)
     323             :     {
     324             : #ifdef TRACE_SORT
     325          96 :         if (trace_sort)
     326           0 :             elog(LOG,
     327             :                  "uuid_abbrev: aborting abbreviation at cardinality %f"
     328             :                  " below threshold %f after " INT64_FORMAT " values (%d rows)",
     329             :                  abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
     330             :                  memtupcount);
     331             : #endif
     332          96 :         return true;
     333             :     }
     334             : 
     335             : #ifdef TRACE_SORT
     336          96 :     if (trace_sort)
     337           0 :         elog(LOG,
     338             :              "uuid_abbrev: cardinality %f after " INT64_FORMAT
     339             :              " values (%d rows)", abbr_card, uss->input_count, memtupcount);
     340             : #endif
     341             : 
     342          96 :     return false;
     343             : }
     344             : 
     345             : /*
     346             :  * Conversion routine for sortsupport.  Converts original uuid representation
     347             :  * to abbreviated key representation.  Our encoding strategy is simple -- pack
     348             :  * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
     349             :  * machines, the bytes are stored in reverse order), and treat it as an
     350             :  * unsigned integer.
     351             :  */
     352             : static Datum
     353     3384090 : uuid_abbrev_convert(Datum original, SortSupport ssup)
     354             : {
     355     3384090 :     uuid_sortsupport_state *uss = ssup->ssup_extra;
     356     3384090 :     pg_uuid_t  *authoritative = DatumGetUUIDP(original);
     357             :     Datum       res;
     358             : 
     359     3384090 :     memcpy(&res, authoritative->data, sizeof(Datum));
     360     3384090 :     uss->input_count += 1;
     361             : 
     362     3384090 :     if (uss->estimating)
     363             :     {
     364             :         uint32      tmp;
     365             : 
     366             : #if SIZEOF_DATUM == 8
     367     3384090 :         tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
     368             : #else                           /* SIZEOF_DATUM != 8 */
     369             :         tmp = (uint32) res;
     370             : #endif
     371             : 
     372     3384090 :         addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
     373             :     }
     374             : 
     375             :     /*
     376             :      * Byteswap on little-endian machines.
     377             :      *
     378             :      * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
     379             :      * 3-way comparator) works correctly on all platforms.  If we didn't do
     380             :      * this, the comparator would have to call memcmp() with a pair of
     381             :      * pointers to the first byte of each abbreviated key, which is slower.
     382             :      */
     383     3384090 :     res = DatumBigEndianToNative(res);
     384             : 
     385     3384090 :     return res;
     386             : }
     387             : 
     388             : /* hash index support */
     389             : Datum
     390        2392 : uuid_hash(PG_FUNCTION_ARGS)
     391             : {
     392        2392 :     pg_uuid_t  *key = PG_GETARG_UUID_P(0);
     393             : 
     394        2392 :     return hash_any(key->data, UUID_LEN);
     395             : }
     396             : 
     397             : Datum
     398          60 : uuid_hash_extended(PG_FUNCTION_ARGS)
     399             : {
     400          60 :     pg_uuid_t  *key = PG_GETARG_UUID_P(0);
     401             : 
     402          60 :     return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
     403             : }
     404             : 
     405             : Datum
     406          12 : gen_random_uuid(PG_FUNCTION_ARGS)
     407             : {
     408          12 :     pg_uuid_t  *uuid = palloc(UUID_LEN);
     409             : 
     410          12 :     if (!pg_strong_random(uuid, UUID_LEN))
     411           0 :         ereport(ERROR,
     412             :                 (errcode(ERRCODE_INTERNAL_ERROR),
     413             :                  errmsg("could not generate random values")));
     414             : 
     415             :     /*
     416             :      * Set magic numbers for a "version 4" (pseudorandom) UUID, see
     417             :      * http://tools.ietf.org/html/rfc4122#section-4.4
     418             :      */
     419          12 :     uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40;    /* time_hi_and_version */
     420          12 :     uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;    /* clock_seq_hi_and_reserved */
     421             : 
     422          12 :     PG_RETURN_UUID_P(uuid);
     423             : }

Generated by: LCOV version 1.14