LCOV - code coverage report
Current view: top level - src/backend/utils/adt - uuid.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 152 169 89.9 %
Date: 2024-04-25 09:11:48 Functions: 20 22 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * uuid.c
       4             :  *    Functions for the built-in type "uuid".
       5             :  *
       6             :  * Copyright (c) 2007-2024, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/adt/uuid.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : 
      14             : #include "postgres.h"
      15             : 
      16             : #include "common/hashfn.h"
      17             : #include "lib/hyperloglog.h"
      18             : #include "libpq/pqformat.h"
      19             : #include "port/pg_bswap.h"
      20             : #include "utils/fmgrprotos.h"
      21             : #include "utils/guc.h"
      22             : #include "utils/sortsupport.h"
      23             : #include "utils/timestamp.h"
      24             : #include "utils/uuid.h"
      25             : 
      26             : /* sortsupport for uuid */
      27             : typedef struct
      28             : {
      29             :     int64       input_count;    /* number of non-null values seen */
      30             :     bool        estimating;     /* true if estimating cardinality */
      31             : 
      32             :     hyperLogLogState abbr_card; /* cardinality estimator */
      33             : } uuid_sortsupport_state;
      34             : 
      35             : static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext);
      36             : static int  uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
      37             : static int  uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
      38             : static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
      39             : static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
      40             : 
      41             : Datum
      42      585872 : uuid_in(PG_FUNCTION_ARGS)
      43             : {
      44      585872 :     char       *uuid_str = PG_GETARG_CSTRING(0);
      45             :     pg_uuid_t  *uuid;
      46             : 
      47      585872 :     uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
      48      585872 :     string_to_uuid(uuid_str, uuid, fcinfo->context);
      49      585836 :     PG_RETURN_UUID_P(uuid);
      50             : }
      51             : 
      52             : Datum
      53        4662 : uuid_out(PG_FUNCTION_ARGS)
      54             : {
      55        4662 :     pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
      56             :     static const char hex_chars[] = "0123456789abcdef";
      57             :     char       *buf,
      58             :                *p;
      59             :     int         i;
      60             : 
      61             :     /* counts for the four hyphens and the zero-terminator */
      62        4662 :     buf = palloc(2 * UUID_LEN + 5);
      63        4662 :     p = buf;
      64       79254 :     for (i = 0; i < UUID_LEN; i++)
      65             :     {
      66             :         int         hi;
      67             :         int         lo;
      68             : 
      69             :         /*
      70             :          * We print uuid values as a string of 8, 4, 4, 4, and then 12
      71             :          * hexadecimal characters, with each group is separated by a hyphen
      72             :          * ("-"). Therefore, add the hyphens at the appropriate places here.
      73             :          */
      74       74592 :         if (i == 4 || i == 6 || i == 8 || i == 10)
      75       18648 :             *p++ = '-';
      76             : 
      77       74592 :         hi = uuid->data[i] >> 4;
      78       74592 :         lo = uuid->data[i] & 0x0F;
      79             : 
      80       74592 :         *p++ = hex_chars[hi];
      81       74592 :         *p++ = hex_chars[lo];
      82             :     }
      83        4662 :     *p = '\0';
      84             : 
      85        4662 :     PG_RETURN_CSTRING(buf);
      86             : }
      87             : 
      88             : /*
      89             :  * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
      90             :  * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
      91             :  * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
      92             :  * digits, is the only one used for output.)
      93             :  */
      94             : static void
      95      585872 : string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext)
      96             : {
      97      585872 :     const char *src = source;
      98      585872 :     bool        braces = false;
      99             :     int         i;
     100             : 
     101      585872 :     if (src[0] == '{')
     102             :     {
     103          24 :         src++;
     104          24 :         braces = true;
     105             :     }
     106             : 
     107     9959410 :     for (i = 0; i < UUID_LEN; i++)
     108             :     {
     109             :         char        str_buf[3];
     110             : 
     111     9373574 :         if (src[0] == '\0' || src[1] == '\0')
     112          36 :             goto syntax_error;
     113     9373562 :         memcpy(str_buf, src, 2);
     114     9373562 :         if (!isxdigit((unsigned char) str_buf[0]) ||
     115     9373550 :             !isxdigit((unsigned char) str_buf[1]))
     116          24 :             goto syntax_error;
     117             : 
     118     9373538 :         str_buf[2] = '\0';
     119     9373538 :         uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
     120     9373538 :         src += 2;
     121     9373538 :         if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
     122     1935314 :             src++;
     123             :     }
     124             : 
     125      585836 :     if (braces)
     126             :     {
     127          18 :         if (*src != '}')
     128           6 :             goto syntax_error;
     129          12 :         src++;
     130             :     }
     131             : 
     132      585830 :     if (*src != '\0')
     133           6 :         goto syntax_error;
     134             : 
     135      585824 :     return;
     136             : 
     137          48 : syntax_error:
     138          48 :     ereturn(escontext,,
     139             :             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     140             :              errmsg("invalid input syntax for type %s: \"%s\"",
     141             :                     "uuid", source)));
     142             : }
     143             : 
     144             : Datum
     145           0 : uuid_recv(PG_FUNCTION_ARGS)
     146             : {
     147           0 :     StringInfo  buffer = (StringInfo) PG_GETARG_POINTER(0);
     148             :     pg_uuid_t  *uuid;
     149             : 
     150           0 :     uuid = (pg_uuid_t *) palloc(UUID_LEN);
     151           0 :     memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
     152           0 :     PG_RETURN_POINTER(uuid);
     153             : }
     154             : 
     155             : Datum
     156           0 : uuid_send(PG_FUNCTION_ARGS)
     157             : {
     158           0 :     pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
     159             :     StringInfoData buffer;
     160             : 
     161           0 :     pq_begintypsend(&buffer);
     162           0 :     pq_sendbytes(&buffer, uuid->data, UUID_LEN);
     163           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
     164             : }
     165             : 
     166             : /* internal uuid compare function */
     167             : static int
     168    41771586 : uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
     169             : {
     170    41771586 :     return memcmp(arg1->data, arg2->data, UUID_LEN);
     171             : }
     172             : 
     173             : Datum
     174       83346 : uuid_lt(PG_FUNCTION_ARGS)
     175             : {
     176       83346 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     177       83346 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     178             : 
     179       83346 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
     180             : }
     181             : 
     182             : Datum
     183       17046 : uuid_le(PG_FUNCTION_ARGS)
     184             : {
     185       17046 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     186       17046 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     187             : 
     188       17046 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
     189             : }
     190             : 
     191             : Datum
     192      154434 : uuid_eq(PG_FUNCTION_ARGS)
     193             : {
     194      154434 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     195      154434 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     196             : 
     197      154434 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
     198             : }
     199             : 
     200             : Datum
     201       12556 : uuid_ge(PG_FUNCTION_ARGS)
     202             : {
     203       12556 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     204       12556 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     205             : 
     206       12556 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
     207             : }
     208             : 
     209             : Datum
     210       16290 : uuid_gt(PG_FUNCTION_ARGS)
     211             : {
     212       16290 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     213       16290 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     214             : 
     215       16290 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
     216             : }
     217             : 
     218             : Datum
     219          18 : uuid_ne(PG_FUNCTION_ARGS)
     220             : {
     221          18 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     222          18 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     223             : 
     224          18 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
     225             : }
     226             : 
     227             : /* handler for btree index operator */
     228             : Datum
     229        9272 : uuid_cmp(PG_FUNCTION_ARGS)
     230             : {
     231        9272 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     232        9272 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     233             : 
     234        9272 :     PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
     235             : }
     236             : 
     237             : /*
     238             :  * Sort support strategy routine
     239             :  */
     240             : Datum
     241         350 : uuid_sortsupport(PG_FUNCTION_ARGS)
     242             : {
     243         350 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
     244             : 
     245         350 :     ssup->comparator = uuid_fast_cmp;
     246         350 :     ssup->ssup_extra = NULL;
     247             : 
     248         350 :     if (ssup->abbreviate)
     249             :     {
     250             :         uuid_sortsupport_state *uss;
     251             :         MemoryContext oldcontext;
     252             : 
     253         278 :         oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
     254             : 
     255         278 :         uss = palloc(sizeof(uuid_sortsupport_state));
     256         278 :         uss->input_count = 0;
     257         278 :         uss->estimating = true;
     258         278 :         initHyperLogLog(&uss->abbr_card, 10);
     259             : 
     260         278 :         ssup->ssup_extra = uss;
     261             : 
     262         278 :         ssup->comparator = ssup_datum_unsigned_cmp;
     263         278 :         ssup->abbrev_converter = uuid_abbrev_convert;
     264         278 :         ssup->abbrev_abort = uuid_abbrev_abort;
     265         278 :         ssup->abbrev_full_comparator = uuid_fast_cmp;
     266             : 
     267         278 :         MemoryContextSwitchTo(oldcontext);
     268             :     }
     269             : 
     270         350 :     PG_RETURN_VOID();
     271             : }
     272             : 
     273             : /*
     274             :  * SortSupport comparison func
     275             :  */
     276             : static int
     277    41478624 : uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
     278             : {
     279    41478624 :     pg_uuid_t  *arg1 = DatumGetUUIDP(x);
     280    41478624 :     pg_uuid_t  *arg2 = DatumGetUUIDP(y);
     281             : 
     282    41478624 :     return uuid_internal_cmp(arg1, arg2);
     283             : }
     284             : 
     285             : /*
     286             :  * Callback for estimating effectiveness of abbreviated key optimization.
     287             :  *
     288             :  * We pay no attention to the cardinality of the non-abbreviated data, because
     289             :  * there is no equality fast-path within authoritative uuid comparator.
     290             :  */
     291             : static bool
     292        2322 : uuid_abbrev_abort(int memtupcount, SortSupport ssup)
     293             : {
     294        2322 :     uuid_sortsupport_state *uss = ssup->ssup_extra;
     295             :     double      abbr_card;
     296             : 
     297        2322 :     if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
     298        2130 :         return false;
     299             : 
     300         192 :     abbr_card = estimateHyperLogLog(&uss->abbr_card);
     301             : 
     302             :     /*
     303             :      * If we have >100k distinct values, then even if we were sorting many
     304             :      * billion rows we'd likely still break even, and the penalty of undoing
     305             :      * that many rows of abbrevs would probably not be worth it.  Stop even
     306             :      * counting at that point.
     307             :      */
     308         192 :     if (abbr_card > 100000.0)
     309             :     {
     310             : #ifdef TRACE_SORT
     311           0 :         if (trace_sort)
     312           0 :             elog(LOG,
     313             :                  "uuid_abbrev: estimation ends at cardinality %f"
     314             :                  " after " INT64_FORMAT " values (%d rows)",
     315             :                  abbr_card, uss->input_count, memtupcount);
     316             : #endif
     317           0 :         uss->estimating = false;
     318           0 :         return false;
     319             :     }
     320             : 
     321             :     /*
     322             :      * Target minimum cardinality is 1 per ~2k of non-null inputs.  0.5 row
     323             :      * fudge factor allows us to abort earlier on genuinely pathological data
     324             :      * where we've had exactly one abbreviated value in the first 2k
     325             :      * (non-null) rows.
     326             :      */
     327         192 :     if (abbr_card < uss->input_count / 2000.0 + 0.5)
     328             :     {
     329             : #ifdef TRACE_SORT
     330          96 :         if (trace_sort)
     331           0 :             elog(LOG,
     332             :                  "uuid_abbrev: aborting abbreviation at cardinality %f"
     333             :                  " below threshold %f after " INT64_FORMAT " values (%d rows)",
     334             :                  abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
     335             :                  memtupcount);
     336             : #endif
     337          96 :         return true;
     338             :     }
     339             : 
     340             : #ifdef TRACE_SORT
     341          96 :     if (trace_sort)
     342           0 :         elog(LOG,
     343             :              "uuid_abbrev: cardinality %f after " INT64_FORMAT
     344             :              " values (%d rows)", abbr_card, uss->input_count, memtupcount);
     345             : #endif
     346             : 
     347          96 :     return false;
     348             : }
     349             : 
     350             : /*
     351             :  * Conversion routine for sortsupport.  Converts original uuid representation
     352             :  * to abbreviated key representation.  Our encoding strategy is simple -- pack
     353             :  * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
     354             :  * machines, the bytes are stored in reverse order), and treat it as an
     355             :  * unsigned integer.
     356             :  */
     357             : static Datum
     358     3384090 : uuid_abbrev_convert(Datum original, SortSupport ssup)
     359             : {
     360     3384090 :     uuid_sortsupport_state *uss = ssup->ssup_extra;
     361     3384090 :     pg_uuid_t  *authoritative = DatumGetUUIDP(original);
     362             :     Datum       res;
     363             : 
     364     3384090 :     memcpy(&res, authoritative->data, sizeof(Datum));
     365     3384090 :     uss->input_count += 1;
     366             : 
     367     3384090 :     if (uss->estimating)
     368             :     {
     369             :         uint32      tmp;
     370             : 
     371             : #if SIZEOF_DATUM == 8
     372     3384090 :         tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
     373             : #else                           /* SIZEOF_DATUM != 8 */
     374             :         tmp = (uint32) res;
     375             : #endif
     376             : 
     377     3384090 :         addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
     378             :     }
     379             : 
     380             :     /*
     381             :      * Byteswap on little-endian machines.
     382             :      *
     383             :      * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
     384             :      * 3-way comparator) works correctly on all platforms.  If we didn't do
     385             :      * this, the comparator would have to call memcmp() with a pair of
     386             :      * pointers to the first byte of each abbreviated key, which is slower.
     387             :      */
     388     3384090 :     res = DatumBigEndianToNative(res);
     389             : 
     390     3384090 :     return res;
     391             : }
     392             : 
     393             : /* hash index support */
     394             : Datum
     395        2388 : uuid_hash(PG_FUNCTION_ARGS)
     396             : {
     397        2388 :     pg_uuid_t  *key = PG_GETARG_UUID_P(0);
     398             : 
     399        2388 :     return hash_any(key->data, UUID_LEN);
     400             : }
     401             : 
     402             : Datum
     403          60 : uuid_hash_extended(PG_FUNCTION_ARGS)
     404             : {
     405          60 :     pg_uuid_t  *key = PG_GETARG_UUID_P(0);
     406             : 
     407          60 :     return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
     408             : }
     409             : 
     410             : Datum
     411          24 : gen_random_uuid(PG_FUNCTION_ARGS)
     412             : {
     413          24 :     pg_uuid_t  *uuid = palloc(UUID_LEN);
     414             : 
     415          24 :     if (!pg_strong_random(uuid, UUID_LEN))
     416           0 :         ereport(ERROR,
     417             :                 (errcode(ERRCODE_INTERNAL_ERROR),
     418             :                  errmsg("could not generate random values")));
     419             : 
     420             :     /*
     421             :      * Set magic numbers for a "version 4" (pseudorandom) UUID, see
     422             :      * http://tools.ietf.org/html/rfc4122#section-4.4
     423             :      */
     424          24 :     uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40;    /* time_hi_and_version */
     425          24 :     uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;    /* clock_seq_hi_and_reserved */
     426             : 
     427          24 :     PG_RETURN_UUID_P(uuid);
     428             : }
     429             : 
     430             : #define UUIDV1_EPOCH_JDATE  2299161 /* == date2j(1582,10,15) */
     431             : 
     432             : /*
     433             :  * Extract timestamp from UUID.
     434             :  *
     435             :  * Returns null if not RFC 4122 variant or not a version that has a timestamp.
     436             :  */
     437             : Datum
     438          18 : uuid_extract_timestamp(PG_FUNCTION_ARGS)
     439             : {
     440          18 :     pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
     441             :     int         version;
     442             :     uint64      tms;
     443             :     TimestampTz ts;
     444             : 
     445             :     /* check if RFC 4122 variant */
     446          18 :     if ((uuid->data[8] & 0xc0) != 0x80)
     447           6 :         PG_RETURN_NULL();
     448             : 
     449          12 :     version = uuid->data[6] >> 4;
     450             : 
     451          12 :     if (version == 1)
     452             :     {
     453           6 :         tms = ((uint64) uuid->data[0] << 24)
     454           6 :             + ((uint64) uuid->data[1] << 16)
     455           6 :             + ((uint64) uuid->data[2] << 8)
     456           6 :             + ((uint64) uuid->data[3])
     457           6 :             + ((uint64) uuid->data[4] << 40)
     458           6 :             + ((uint64) uuid->data[5] << 32)
     459           6 :             + (((uint64) uuid->data[6] & 0xf) << 56)
     460           6 :             + ((uint64) uuid->data[7] << 48);
     461             : 
     462             :         /* convert 100-ns intervals to us, then adjust */
     463           6 :         ts = (TimestampTz) (tms / 10) -
     464             :             ((uint64) POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
     465             : 
     466           6 :         PG_RETURN_TIMESTAMPTZ(ts);
     467             :     }
     468             : 
     469             :     /* not a timestamp-containing UUID version */
     470           6 :     PG_RETURN_NULL();
     471             : }
     472             : 
     473             : /*
     474             :  * Extract UUID version.
     475             :  *
     476             :  * Returns null if not RFC 4122 variant.
     477             :  */
     478             : Datum
     479          18 : uuid_extract_version(PG_FUNCTION_ARGS)
     480             : {
     481          18 :     pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
     482             :     uint16      version;
     483             : 
     484             :     /* check if RFC 4122 variant */
     485          18 :     if ((uuid->data[8] & 0xc0) != 0x80)
     486           6 :         PG_RETURN_NULL();
     487             : 
     488          12 :     version = uuid->data[6] >> 4;
     489             : 
     490          12 :     PG_RETURN_UINT16(version);
     491             : }

Generated by: LCOV version 1.14