Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * uuid.c
4 : * Functions for the built-in type "uuid".
5 : *
6 : * Copyright (c) 2007-2024, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/adt/uuid.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres.h"
15 :
16 : #include "common/hashfn.h"
17 : #include "lib/hyperloglog.h"
18 : #include "libpq/pqformat.h"
19 : #include "port/pg_bswap.h"
20 : #include "utils/fmgrprotos.h"
21 : #include "utils/guc.h"
22 : #include "utils/sortsupport.h"
23 : #include "utils/timestamp.h"
24 : #include "utils/uuid.h"
25 :
26 : /* sortsupport for uuid */
27 : typedef struct
28 : {
29 : int64 input_count; /* number of non-null values seen */
30 : bool estimating; /* true if estimating cardinality */
31 :
32 : hyperLogLogState abbr_card; /* cardinality estimator */
33 : } uuid_sortsupport_state;
34 :
35 : static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext);
36 : static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
37 : static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
38 : static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
39 : static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
40 :
41 : Datum
42 585872 : uuid_in(PG_FUNCTION_ARGS)
43 : {
44 585872 : char *uuid_str = PG_GETARG_CSTRING(0);
45 : pg_uuid_t *uuid;
46 :
47 585872 : uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
48 585872 : string_to_uuid(uuid_str, uuid, fcinfo->context);
49 585836 : PG_RETURN_UUID_P(uuid);
50 : }
51 :
52 : Datum
53 4662 : uuid_out(PG_FUNCTION_ARGS)
54 : {
55 4662 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
56 : static const char hex_chars[] = "0123456789abcdef";
57 : char *buf,
58 : *p;
59 : int i;
60 :
61 : /* counts for the four hyphens and the zero-terminator */
62 4662 : buf = palloc(2 * UUID_LEN + 5);
63 4662 : p = buf;
64 79254 : for (i = 0; i < UUID_LEN; i++)
65 : {
66 : int hi;
67 : int lo;
68 :
69 : /*
70 : * We print uuid values as a string of 8, 4, 4, 4, and then 12
71 : * hexadecimal characters, with each group is separated by a hyphen
72 : * ("-"). Therefore, add the hyphens at the appropriate places here.
73 : */
74 74592 : if (i == 4 || i == 6 || i == 8 || i == 10)
75 18648 : *p++ = '-';
76 :
77 74592 : hi = uuid->data[i] >> 4;
78 74592 : lo = uuid->data[i] & 0x0F;
79 :
80 74592 : *p++ = hex_chars[hi];
81 74592 : *p++ = hex_chars[lo];
82 : }
83 4662 : *p = '\0';
84 :
85 4662 : PG_RETURN_CSTRING(buf);
86 : }
87 :
88 : /*
89 : * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
90 : * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
91 : * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
92 : * digits, is the only one used for output.)
93 : */
94 : static void
95 585872 : string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext)
96 : {
97 585872 : const char *src = source;
98 585872 : bool braces = false;
99 : int i;
100 :
101 585872 : if (src[0] == '{')
102 : {
103 24 : src++;
104 24 : braces = true;
105 : }
106 :
107 9959410 : for (i = 0; i < UUID_LEN; i++)
108 : {
109 : char str_buf[3];
110 :
111 9373574 : if (src[0] == '\0' || src[1] == '\0')
112 36 : goto syntax_error;
113 9373562 : memcpy(str_buf, src, 2);
114 9373562 : if (!isxdigit((unsigned char) str_buf[0]) ||
115 9373550 : !isxdigit((unsigned char) str_buf[1]))
116 24 : goto syntax_error;
117 :
118 9373538 : str_buf[2] = '\0';
119 9373538 : uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
120 9373538 : src += 2;
121 9373538 : if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
122 1935314 : src++;
123 : }
124 :
125 585836 : if (braces)
126 : {
127 18 : if (*src != '}')
128 6 : goto syntax_error;
129 12 : src++;
130 : }
131 :
132 585830 : if (*src != '\0')
133 6 : goto syntax_error;
134 :
135 585824 : return;
136 :
137 48 : syntax_error:
138 48 : ereturn(escontext,,
139 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
140 : errmsg("invalid input syntax for type %s: \"%s\"",
141 : "uuid", source)));
142 : }
143 :
144 : Datum
145 0 : uuid_recv(PG_FUNCTION_ARGS)
146 : {
147 0 : StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0);
148 : pg_uuid_t *uuid;
149 :
150 0 : uuid = (pg_uuid_t *) palloc(UUID_LEN);
151 0 : memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
152 0 : PG_RETURN_POINTER(uuid);
153 : }
154 :
155 : Datum
156 0 : uuid_send(PG_FUNCTION_ARGS)
157 : {
158 0 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
159 : StringInfoData buffer;
160 :
161 0 : pq_begintypsend(&buffer);
162 0 : pq_sendbytes(&buffer, uuid->data, UUID_LEN);
163 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
164 : }
165 :
166 : /* internal uuid compare function */
167 : static int
168 41771586 : uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
169 : {
170 41771586 : return memcmp(arg1->data, arg2->data, UUID_LEN);
171 : }
172 :
173 : Datum
174 83346 : uuid_lt(PG_FUNCTION_ARGS)
175 : {
176 83346 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
177 83346 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
178 :
179 83346 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
180 : }
181 :
182 : Datum
183 17046 : uuid_le(PG_FUNCTION_ARGS)
184 : {
185 17046 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
186 17046 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
187 :
188 17046 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
189 : }
190 :
191 : Datum
192 154434 : uuid_eq(PG_FUNCTION_ARGS)
193 : {
194 154434 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
195 154434 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
196 :
197 154434 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
198 : }
199 :
200 : Datum
201 12556 : uuid_ge(PG_FUNCTION_ARGS)
202 : {
203 12556 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
204 12556 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
205 :
206 12556 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
207 : }
208 :
209 : Datum
210 16290 : uuid_gt(PG_FUNCTION_ARGS)
211 : {
212 16290 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
213 16290 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
214 :
215 16290 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
216 : }
217 :
218 : Datum
219 18 : uuid_ne(PG_FUNCTION_ARGS)
220 : {
221 18 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
222 18 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
223 :
224 18 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
225 : }
226 :
227 : /* handler for btree index operator */
228 : Datum
229 9272 : uuid_cmp(PG_FUNCTION_ARGS)
230 : {
231 9272 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
232 9272 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
233 :
234 9272 : PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
235 : }
236 :
237 : /*
238 : * Sort support strategy routine
239 : */
240 : Datum
241 350 : uuid_sortsupport(PG_FUNCTION_ARGS)
242 : {
243 350 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
244 :
245 350 : ssup->comparator = uuid_fast_cmp;
246 350 : ssup->ssup_extra = NULL;
247 :
248 350 : if (ssup->abbreviate)
249 : {
250 : uuid_sortsupport_state *uss;
251 : MemoryContext oldcontext;
252 :
253 278 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
254 :
255 278 : uss = palloc(sizeof(uuid_sortsupport_state));
256 278 : uss->input_count = 0;
257 278 : uss->estimating = true;
258 278 : initHyperLogLog(&uss->abbr_card, 10);
259 :
260 278 : ssup->ssup_extra = uss;
261 :
262 278 : ssup->comparator = ssup_datum_unsigned_cmp;
263 278 : ssup->abbrev_converter = uuid_abbrev_convert;
264 278 : ssup->abbrev_abort = uuid_abbrev_abort;
265 278 : ssup->abbrev_full_comparator = uuid_fast_cmp;
266 :
267 278 : MemoryContextSwitchTo(oldcontext);
268 : }
269 :
270 350 : PG_RETURN_VOID();
271 : }
272 :
273 : /*
274 : * SortSupport comparison func
275 : */
276 : static int
277 41478624 : uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
278 : {
279 41478624 : pg_uuid_t *arg1 = DatumGetUUIDP(x);
280 41478624 : pg_uuid_t *arg2 = DatumGetUUIDP(y);
281 :
282 41478624 : return uuid_internal_cmp(arg1, arg2);
283 : }
284 :
285 : /*
286 : * Callback for estimating effectiveness of abbreviated key optimization.
287 : *
288 : * We pay no attention to the cardinality of the non-abbreviated data, because
289 : * there is no equality fast-path within authoritative uuid comparator.
290 : */
291 : static bool
292 2322 : uuid_abbrev_abort(int memtupcount, SortSupport ssup)
293 : {
294 2322 : uuid_sortsupport_state *uss = ssup->ssup_extra;
295 : double abbr_card;
296 :
297 2322 : if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
298 2130 : return false;
299 :
300 192 : abbr_card = estimateHyperLogLog(&uss->abbr_card);
301 :
302 : /*
303 : * If we have >100k distinct values, then even if we were sorting many
304 : * billion rows we'd likely still break even, and the penalty of undoing
305 : * that many rows of abbrevs would probably not be worth it. Stop even
306 : * counting at that point.
307 : */
308 192 : if (abbr_card > 100000.0)
309 : {
310 : #ifdef TRACE_SORT
311 0 : if (trace_sort)
312 0 : elog(LOG,
313 : "uuid_abbrev: estimation ends at cardinality %f"
314 : " after " INT64_FORMAT " values (%d rows)",
315 : abbr_card, uss->input_count, memtupcount);
316 : #endif
317 0 : uss->estimating = false;
318 0 : return false;
319 : }
320 :
321 : /*
322 : * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
323 : * fudge factor allows us to abort earlier on genuinely pathological data
324 : * where we've had exactly one abbreviated value in the first 2k
325 : * (non-null) rows.
326 : */
327 192 : if (abbr_card < uss->input_count / 2000.0 + 0.5)
328 : {
329 : #ifdef TRACE_SORT
330 96 : if (trace_sort)
331 0 : elog(LOG,
332 : "uuid_abbrev: aborting abbreviation at cardinality %f"
333 : " below threshold %f after " INT64_FORMAT " values (%d rows)",
334 : abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
335 : memtupcount);
336 : #endif
337 96 : return true;
338 : }
339 :
340 : #ifdef TRACE_SORT
341 96 : if (trace_sort)
342 0 : elog(LOG,
343 : "uuid_abbrev: cardinality %f after " INT64_FORMAT
344 : " values (%d rows)", abbr_card, uss->input_count, memtupcount);
345 : #endif
346 :
347 96 : return false;
348 : }
349 :
350 : /*
351 : * Conversion routine for sortsupport. Converts original uuid representation
352 : * to abbreviated key representation. Our encoding strategy is simple -- pack
353 : * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
354 : * machines, the bytes are stored in reverse order), and treat it as an
355 : * unsigned integer.
356 : */
357 : static Datum
358 3384090 : uuid_abbrev_convert(Datum original, SortSupport ssup)
359 : {
360 3384090 : uuid_sortsupport_state *uss = ssup->ssup_extra;
361 3384090 : pg_uuid_t *authoritative = DatumGetUUIDP(original);
362 : Datum res;
363 :
364 3384090 : memcpy(&res, authoritative->data, sizeof(Datum));
365 3384090 : uss->input_count += 1;
366 :
367 3384090 : if (uss->estimating)
368 : {
369 : uint32 tmp;
370 :
371 : #if SIZEOF_DATUM == 8
372 3384090 : tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
373 : #else /* SIZEOF_DATUM != 8 */
374 : tmp = (uint32) res;
375 : #endif
376 :
377 3384090 : addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
378 : }
379 :
380 : /*
381 : * Byteswap on little-endian machines.
382 : *
383 : * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
384 : * 3-way comparator) works correctly on all platforms. If we didn't do
385 : * this, the comparator would have to call memcmp() with a pair of
386 : * pointers to the first byte of each abbreviated key, which is slower.
387 : */
388 3384090 : res = DatumBigEndianToNative(res);
389 :
390 3384090 : return res;
391 : }
392 :
393 : /* hash index support */
394 : Datum
395 2388 : uuid_hash(PG_FUNCTION_ARGS)
396 : {
397 2388 : pg_uuid_t *key = PG_GETARG_UUID_P(0);
398 :
399 2388 : return hash_any(key->data, UUID_LEN);
400 : }
401 :
402 : Datum
403 60 : uuid_hash_extended(PG_FUNCTION_ARGS)
404 : {
405 60 : pg_uuid_t *key = PG_GETARG_UUID_P(0);
406 :
407 60 : return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
408 : }
409 :
410 : Datum
411 24 : gen_random_uuid(PG_FUNCTION_ARGS)
412 : {
413 24 : pg_uuid_t *uuid = palloc(UUID_LEN);
414 :
415 24 : if (!pg_strong_random(uuid, UUID_LEN))
416 0 : ereport(ERROR,
417 : (errcode(ERRCODE_INTERNAL_ERROR),
418 : errmsg("could not generate random values")));
419 :
420 : /*
421 : * Set magic numbers for a "version 4" (pseudorandom) UUID, see
422 : * http://tools.ietf.org/html/rfc4122#section-4.4
423 : */
424 24 : uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */
425 24 : uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
426 :
427 24 : PG_RETURN_UUID_P(uuid);
428 : }
429 :
430 : #define UUIDV1_EPOCH_JDATE 2299161 /* == date2j(1582,10,15) */
431 :
432 : /*
433 : * Extract timestamp from UUID.
434 : *
435 : * Returns null if not RFC 4122 variant or not a version that has a timestamp.
436 : */
437 : Datum
438 18 : uuid_extract_timestamp(PG_FUNCTION_ARGS)
439 : {
440 18 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
441 : int version;
442 : uint64 tms;
443 : TimestampTz ts;
444 :
445 : /* check if RFC 4122 variant */
446 18 : if ((uuid->data[8] & 0xc0) != 0x80)
447 6 : PG_RETURN_NULL();
448 :
449 12 : version = uuid->data[6] >> 4;
450 :
451 12 : if (version == 1)
452 : {
453 6 : tms = ((uint64) uuid->data[0] << 24)
454 6 : + ((uint64) uuid->data[1] << 16)
455 6 : + ((uint64) uuid->data[2] << 8)
456 6 : + ((uint64) uuid->data[3])
457 6 : + ((uint64) uuid->data[4] << 40)
458 6 : + ((uint64) uuid->data[5] << 32)
459 6 : + (((uint64) uuid->data[6] & 0xf) << 56)
460 6 : + ((uint64) uuid->data[7] << 48);
461 :
462 : /* convert 100-ns intervals to us, then adjust */
463 6 : ts = (TimestampTz) (tms / 10) -
464 : ((uint64) POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
465 :
466 6 : PG_RETURN_TIMESTAMPTZ(ts);
467 : }
468 :
469 : /* not a timestamp-containing UUID version */
470 6 : PG_RETURN_NULL();
471 : }
472 :
473 : /*
474 : * Extract UUID version.
475 : *
476 : * Returns null if not RFC 4122 variant.
477 : */
478 : Datum
479 18 : uuid_extract_version(PG_FUNCTION_ARGS)
480 : {
481 18 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
482 : uint16 version;
483 :
484 : /* check if RFC 4122 variant */
485 18 : if ((uuid->data[8] & 0xc0) != 0x80)
486 6 : PG_RETURN_NULL();
487 :
488 12 : version = uuid->data[6] >> 4;
489 :
490 12 : PG_RETURN_UINT16(version);
491 : }
|