Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * uuid.c
4 : * Functions for the built-in type "uuid".
5 : *
6 : * Copyright (c) 2007-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/adt/uuid.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres.h"
15 :
16 : #include <limits.h>
17 : #include <time.h> /* for clock_gettime() */
18 :
19 : #include "common/hashfn.h"
20 : #include "lib/hyperloglog.h"
21 : #include "libpq/pqformat.h"
22 : #include "port/pg_bswap.h"
23 : #include "utils/fmgrprotos.h"
24 : #include "utils/guc.h"
25 : #include "utils/skipsupport.h"
26 : #include "utils/sortsupport.h"
27 : #include "utils/timestamp.h"
28 : #include "utils/uuid.h"
29 :
30 : /* helper macros */
31 : #define NS_PER_S INT64CONST(1000000000)
32 : #define NS_PER_MS INT64CONST(1000000)
33 : #define NS_PER_US INT64CONST(1000)
34 : #define US_PER_MS INT64CONST(1000)
35 :
36 : /*
37 : * UUID version 7 uses 12 bits in "rand_a" to store 1/4096 (or 2^12) fractions of
38 : * sub-millisecond. While most Unix-like platforms provide nanosecond-precision
39 : * timestamps, some systems only offer microsecond precision, limiting us to 10
40 : * bits of sub-millisecond information. For example, on macOS, real time is
41 : * truncated to microseconds. Additionally, MSVC uses the ported version of
42 : * gettimeofday() that returns microsecond precision.
43 : *
44 : * On systems with only 10 bits of sub-millisecond precision, we still use
45 : * 1/4096 parts of a millisecond, but fill lower 2 bits with random numbers
46 : * (see generate_uuidv7() for details).
47 : *
48 : * SUBMS_MINIMAL_STEP_NS defines the minimum number of nanoseconds that guarantees
49 : * an increase in the UUID's clock precision.
50 : */
51 : #if defined(__darwin__) || defined(_MSC_VER)
52 : #define SUBMS_MINIMAL_STEP_BITS 10
53 : #else
54 : #define SUBMS_MINIMAL_STEP_BITS 12
55 : #endif
56 : #define SUBMS_BITS 12
57 : #define SUBMS_MINIMAL_STEP_NS ((NS_PER_MS / (1 << SUBMS_MINIMAL_STEP_BITS)) + 1)
58 :
59 : /* sortsupport for uuid */
60 : typedef struct
61 : {
62 : int64 input_count; /* number of non-null values seen */
63 : bool estimating; /* true if estimating cardinality */
64 :
65 : hyperLogLogState abbr_card; /* cardinality estimator */
66 : } uuid_sortsupport_state;
67 :
68 : static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext);
69 : static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
70 : static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
71 : static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
72 : static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
73 : static inline void uuid_set_version(pg_uuid_t *uuid, unsigned char version);
74 : static inline int64 get_real_time_ns_ascending();
75 : static pg_uuid_t *generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms);
76 :
77 : Datum
78 586552 : uuid_in(PG_FUNCTION_ARGS)
79 : {
80 586552 : char *uuid_str = PG_GETARG_CSTRING(0);
81 : pg_uuid_t *uuid;
82 :
83 586552 : uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
84 586552 : string_to_uuid(uuid_str, uuid, fcinfo->context);
85 586516 : PG_RETURN_UUID_P(uuid);
86 : }
87 :
88 : Datum
89 6600 : uuid_out(PG_FUNCTION_ARGS)
90 : {
91 6600 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
92 : static const char hex_chars[] = "0123456789abcdef";
93 : char *buf,
94 : *p;
95 : int i;
96 :
97 : /* counts for the four hyphens and the zero-terminator */
98 6600 : buf = palloc(2 * UUID_LEN + 5);
99 6600 : p = buf;
100 112200 : for (i = 0; i < UUID_LEN; i++)
101 : {
102 : int hi;
103 : int lo;
104 :
105 : /*
106 : * We print uuid values as a string of 8, 4, 4, 4, and then 12
107 : * hexadecimal characters, with each group is separated by a hyphen
108 : * ("-"). Therefore, add the hyphens at the appropriate places here.
109 : */
110 105600 : if (i == 4 || i == 6 || i == 8 || i == 10)
111 26400 : *p++ = '-';
112 :
113 105600 : hi = uuid->data[i] >> 4;
114 105600 : lo = uuid->data[i] & 0x0F;
115 :
116 105600 : *p++ = hex_chars[hi];
117 105600 : *p++ = hex_chars[lo];
118 : }
119 6600 : *p = '\0';
120 :
121 6600 : PG_RETURN_CSTRING(buf);
122 : }
123 :
124 : /*
125 : * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
126 : * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
127 : * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
128 : * digits, is the only one used for output.)
129 : */
130 : static void
131 586552 : string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext)
132 : {
133 586552 : const char *src = source;
134 586552 : bool braces = false;
135 : int i;
136 :
137 586552 : if (src[0] == '{')
138 : {
139 24 : src++;
140 24 : braces = true;
141 : }
142 :
143 9970970 : for (i = 0; i < UUID_LEN; i++)
144 : {
145 : char str_buf[3];
146 :
147 9384454 : if (src[0] == '\0' || src[1] == '\0')
148 36 : goto syntax_error;
149 9384442 : memcpy(str_buf, src, 2);
150 9384442 : if (!isxdigit((unsigned char) str_buf[0]) ||
151 9384430 : !isxdigit((unsigned char) str_buf[1]))
152 24 : goto syntax_error;
153 :
154 9384418 : str_buf[2] = '\0';
155 9384418 : uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
156 9384418 : src += 2;
157 9384418 : if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
158 1937962 : src++;
159 : }
160 :
161 586516 : if (braces)
162 : {
163 18 : if (*src != '}')
164 6 : goto syntax_error;
165 12 : src++;
166 : }
167 :
168 586510 : if (*src != '\0')
169 6 : goto syntax_error;
170 :
171 586504 : return;
172 :
173 48 : syntax_error:
174 48 : ereturn(escontext,,
175 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
176 : errmsg("invalid input syntax for type %s: \"%s\"",
177 : "uuid", source)));
178 : }
179 :
180 : Datum
181 0 : uuid_recv(PG_FUNCTION_ARGS)
182 : {
183 0 : StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0);
184 : pg_uuid_t *uuid;
185 :
186 0 : uuid = (pg_uuid_t *) palloc(UUID_LEN);
187 0 : memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
188 0 : PG_RETURN_POINTER(uuid);
189 : }
190 :
191 : Datum
192 0 : uuid_send(PG_FUNCTION_ARGS)
193 : {
194 0 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
195 : StringInfoData buffer;
196 :
197 0 : pq_begintypsend(&buffer);
198 0 : pq_sendbytes(&buffer, uuid->data, UUID_LEN);
199 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
200 : }
201 :
202 : /* internal uuid compare function */
203 : static int
204 41775722 : uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
205 : {
206 41775722 : return memcmp(arg1->data, arg2->data, UUID_LEN);
207 : }
208 :
209 : Datum
210 84494 : uuid_lt(PG_FUNCTION_ARGS)
211 : {
212 84494 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
213 84494 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
214 :
215 84494 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
216 : }
217 :
218 : Datum
219 17046 : uuid_le(PG_FUNCTION_ARGS)
220 : {
221 17046 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
222 17046 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
223 :
224 17046 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
225 : }
226 :
227 : Datum
228 154596 : uuid_eq(PG_FUNCTION_ARGS)
229 : {
230 154596 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
231 154596 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
232 :
233 154596 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
234 : }
235 :
236 : Datum
237 12374 : uuid_ge(PG_FUNCTION_ARGS)
238 : {
239 12374 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
240 12374 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
241 :
242 12374 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
243 : }
244 :
245 : Datum
246 16386 : uuid_gt(PG_FUNCTION_ARGS)
247 : {
248 16386 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
249 16386 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
250 :
251 16386 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
252 : }
253 :
254 : Datum
255 18 : uuid_ne(PG_FUNCTION_ARGS)
256 : {
257 18 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
258 18 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
259 :
260 18 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
261 : }
262 :
263 : /* handler for btree index operator */
264 : Datum
265 9308 : uuid_cmp(PG_FUNCTION_ARGS)
266 : {
267 9308 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
268 9308 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
269 :
270 9308 : PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
271 : }
272 :
273 : /*
274 : * Sort support strategy routine
275 : */
276 : Datum
277 384 : uuid_sortsupport(PG_FUNCTION_ARGS)
278 : {
279 384 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
280 :
281 384 : ssup->comparator = uuid_fast_cmp;
282 384 : ssup->ssup_extra = NULL;
283 :
284 384 : if (ssup->abbreviate)
285 : {
286 : uuid_sortsupport_state *uss;
287 : MemoryContext oldcontext;
288 :
289 308 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
290 :
291 308 : uss = palloc(sizeof(uuid_sortsupport_state));
292 308 : uss->input_count = 0;
293 308 : uss->estimating = true;
294 308 : initHyperLogLog(&uss->abbr_card, 10);
295 :
296 308 : ssup->ssup_extra = uss;
297 :
298 308 : ssup->comparator = ssup_datum_unsigned_cmp;
299 308 : ssup->abbrev_converter = uuid_abbrev_convert;
300 308 : ssup->abbrev_abort = uuid_abbrev_abort;
301 308 : ssup->abbrev_full_comparator = uuid_fast_cmp;
302 :
303 308 : MemoryContextSwitchTo(oldcontext);
304 : }
305 :
306 384 : PG_RETURN_VOID();
307 : }
308 :
309 : /*
310 : * SortSupport comparison func
311 : */
312 : static int
313 41481500 : uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
314 : {
315 41481500 : pg_uuid_t *arg1 = DatumGetUUIDP(x);
316 41481500 : pg_uuid_t *arg2 = DatumGetUUIDP(y);
317 :
318 41481500 : return uuid_internal_cmp(arg1, arg2);
319 : }
320 :
321 : /*
322 : * Callback for estimating effectiveness of abbreviated key optimization.
323 : *
324 : * We pay no attention to the cardinality of the non-abbreviated data, because
325 : * there is no equality fast-path within authoritative uuid comparator.
326 : */
327 : static bool
328 2322 : uuid_abbrev_abort(int memtupcount, SortSupport ssup)
329 : {
330 2322 : uuid_sortsupport_state *uss = ssup->ssup_extra;
331 : double abbr_card;
332 :
333 2322 : if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
334 2130 : return false;
335 :
336 192 : abbr_card = estimateHyperLogLog(&uss->abbr_card);
337 :
338 : /*
339 : * If we have >100k distinct values, then even if we were sorting many
340 : * billion rows we'd likely still break even, and the penalty of undoing
341 : * that many rows of abbrevs would probably not be worth it. Stop even
342 : * counting at that point.
343 : */
344 192 : if (abbr_card > 100000.0)
345 : {
346 0 : if (trace_sort)
347 0 : elog(LOG,
348 : "uuid_abbrev: estimation ends at cardinality %f"
349 : " after " INT64_FORMAT " values (%d rows)",
350 : abbr_card, uss->input_count, memtupcount);
351 0 : uss->estimating = false;
352 0 : return false;
353 : }
354 :
355 : /*
356 : * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
357 : * fudge factor allows us to abort earlier on genuinely pathological data
358 : * where we've had exactly one abbreviated value in the first 2k
359 : * (non-null) rows.
360 : */
361 192 : if (abbr_card < uss->input_count / 2000.0 + 0.5)
362 : {
363 96 : if (trace_sort)
364 0 : elog(LOG,
365 : "uuid_abbrev: aborting abbreviation at cardinality %f"
366 : " below threshold %f after " INT64_FORMAT " values (%d rows)",
367 : abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
368 : memtupcount);
369 96 : return true;
370 : }
371 :
372 96 : if (trace_sort)
373 0 : elog(LOG,
374 : "uuid_abbrev: cardinality %f after " INT64_FORMAT
375 : " values (%d rows)", abbr_card, uss->input_count, memtupcount);
376 :
377 96 : return false;
378 : }
379 :
380 : /*
381 : * Conversion routine for sortsupport. Converts original uuid representation
382 : * to abbreviated key representation. Our encoding strategy is simple -- pack
383 : * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
384 : * machines, the bytes are stored in reverse order), and treat it as an
385 : * unsigned integer.
386 : */
387 : static Datum
388 3384150 : uuid_abbrev_convert(Datum original, SortSupport ssup)
389 : {
390 3384150 : uuid_sortsupport_state *uss = ssup->ssup_extra;
391 3384150 : pg_uuid_t *authoritative = DatumGetUUIDP(original);
392 : Datum res;
393 :
394 3384150 : memcpy(&res, authoritative->data, sizeof(Datum));
395 3384150 : uss->input_count += 1;
396 :
397 3384150 : if (uss->estimating)
398 : {
399 : uint32 tmp;
400 :
401 : #if SIZEOF_DATUM == 8
402 3384150 : tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
403 : #else /* SIZEOF_DATUM != 8 */
404 : tmp = (uint32) res;
405 : #endif
406 :
407 3384150 : addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
408 : }
409 :
410 : /*
411 : * Byteswap on little-endian machines.
412 : *
413 : * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
414 : * 3-way comparator) works correctly on all platforms. If we didn't do
415 : * this, the comparator would have to call memcmp() with a pair of
416 : * pointers to the first byte of each abbreviated key, which is slower.
417 : */
418 3384150 : res = DatumBigEndianToNative(res);
419 :
420 3384150 : return res;
421 : }
422 :
423 : static Datum
424 0 : uuid_decrement(Relation rel, Datum existing, bool *underflow)
425 : {
426 : pg_uuid_t *uuid;
427 :
428 0 : uuid = (pg_uuid_t *) palloc(UUID_LEN);
429 0 : memcpy(uuid, DatumGetUUIDP(existing), UUID_LEN);
430 0 : for (int i = UUID_LEN - 1; i >= 0; i--)
431 : {
432 0 : if (uuid->data[i] > 0)
433 : {
434 0 : uuid->data[i]--;
435 0 : *underflow = false;
436 0 : return UUIDPGetDatum(uuid);
437 : }
438 0 : uuid->data[i] = UCHAR_MAX;
439 : }
440 :
441 0 : pfree(uuid); /* cannot leak memory */
442 :
443 : /* return value is undefined */
444 0 : *underflow = true;
445 0 : return (Datum) 0;
446 : }
447 :
448 : static Datum
449 0 : uuid_increment(Relation rel, Datum existing, bool *overflow)
450 : {
451 : pg_uuid_t *uuid;
452 :
453 0 : uuid = (pg_uuid_t *) palloc(UUID_LEN);
454 0 : memcpy(uuid, DatumGetUUIDP(existing), UUID_LEN);
455 0 : for (int i = UUID_LEN - 1; i >= 0; i--)
456 : {
457 0 : if (uuid->data[i] < UCHAR_MAX)
458 : {
459 0 : uuid->data[i]++;
460 0 : *overflow = false;
461 0 : return UUIDPGetDatum(uuid);
462 : }
463 0 : uuid->data[i] = 0;
464 : }
465 :
466 0 : pfree(uuid); /* cannot leak memory */
467 :
468 : /* return value is undefined */
469 0 : *overflow = true;
470 0 : return (Datum) 0;
471 : }
472 :
473 : Datum
474 0 : uuid_skipsupport(PG_FUNCTION_ARGS)
475 : {
476 0 : SkipSupport sksup = (SkipSupport) PG_GETARG_POINTER(0);
477 0 : pg_uuid_t *uuid_min = palloc(UUID_LEN);
478 0 : pg_uuid_t *uuid_max = palloc(UUID_LEN);
479 :
480 0 : memset(uuid_min->data, 0x00, UUID_LEN);
481 0 : memset(uuid_max->data, 0xFF, UUID_LEN);
482 :
483 0 : sksup->decrement = uuid_decrement;
484 0 : sksup->increment = uuid_increment;
485 0 : sksup->low_elem = UUIDPGetDatum(uuid_min);
486 0 : sksup->high_elem = UUIDPGetDatum(uuid_max);
487 :
488 0 : PG_RETURN_VOID();
489 : }
490 :
491 : /* hash index support */
492 : Datum
493 2632 : uuid_hash(PG_FUNCTION_ARGS)
494 : {
495 2632 : pg_uuid_t *key = PG_GETARG_UUID_P(0);
496 :
497 2632 : return hash_any(key->data, UUID_LEN);
498 : }
499 :
500 : Datum
501 60 : uuid_hash_extended(PG_FUNCTION_ARGS)
502 : {
503 60 : pg_uuid_t *key = PG_GETARG_UUID_P(0);
504 :
505 60 : return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
506 : }
507 :
508 : /*
509 : * Set the given UUID version and the variant bits
510 : */
511 : static inline void
512 53640 : uuid_set_version(pg_uuid_t *uuid, unsigned char version)
513 : {
514 : /* set version field, top four bits */
515 53640 : uuid->data[6] = (uuid->data[6] & 0x0f) | (version << 4);
516 :
517 : /* set variant field, top two bits are 1, 0 */
518 53640 : uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;
519 53640 : }
520 :
521 : /*
522 : * Generate UUID version 4.
523 : *
524 : * All UUID bytes are filled with strong random numbers except version and
525 : * variant bits.
526 : */
527 : Datum
528 42 : gen_random_uuid(PG_FUNCTION_ARGS)
529 : {
530 42 : pg_uuid_t *uuid = palloc(UUID_LEN);
531 :
532 42 : if (!pg_strong_random(uuid, UUID_LEN))
533 0 : ereport(ERROR,
534 : (errcode(ERRCODE_INTERNAL_ERROR),
535 : errmsg("could not generate random values")));
536 :
537 : /*
538 : * Set magic numbers for a "version 4" (pseudorandom) UUID and variant,
539 : * see https://datatracker.ietf.org/doc/html/rfc9562#name-uuid-version-4
540 : */
541 42 : uuid_set_version(uuid, 4);
542 :
543 42 : PG_RETURN_UUID_P(uuid);
544 : }
545 :
546 : /*
547 : * Get the current timestamp with nanosecond precision for UUID generation.
548 : * The returned timestamp is ensured to be at least SUBMS_MINIMAL_STEP greater
549 : * than the previous returned timestamp (on this backend).
550 : */
551 : static inline int64
552 53598 : get_real_time_ns_ascending()
553 : {
554 : static int64 previous_ns = 0;
555 : int64 ns;
556 :
557 : /* Get the current real timestamp */
558 :
559 : #ifdef _MSC_VER
560 : struct timeval tmp;
561 :
562 : gettimeofday(&tmp, NULL);
563 : ns = tmp.tv_sec * NS_PER_S + tmp.tv_usec * NS_PER_US;
564 : #else
565 : struct timespec tmp;
566 :
567 : /*
568 : * We don't use gettimeofday(), instead use clock_gettime() with
569 : * CLOCK_REALTIME where available in order to get a high-precision
570 : * (nanoseconds) real timestamp.
571 : *
572 : * Note while a timestamp returned by clock_gettime() with CLOCK_REALTIME
573 : * is nanosecond-precision on most Unix-like platforms, on some platforms
574 : * such as macOS it's restricted to microsecond-precision.
575 : */
576 53598 : clock_gettime(CLOCK_REALTIME, &tmp);
577 53598 : ns = tmp.tv_sec * NS_PER_S + tmp.tv_nsec;
578 : #endif
579 :
580 : /* Guarantee the minimal step advancement of the timestamp */
581 53598 : if (previous_ns + SUBMS_MINIMAL_STEP_NS >= ns)
582 0 : ns = previous_ns + SUBMS_MINIMAL_STEP_NS;
583 53598 : previous_ns = ns;
584 :
585 53598 : return ns;
586 : }
587 :
588 : /*
589 : * Generate UUID version 7 per RFC 9562, with the given timestamp.
590 : *
591 : * UUID version 7 consists of a Unix timestamp in milliseconds (48 bits) and
592 : * 74 random bits, excluding the required version and variant bits. To ensure
593 : * monotonicity in scenarios of high-frequency UUID generation, we employ the
594 : * method "Replace Leftmost Random Bits with Increased Clock Precision (Method 3)",
595 : * described in the RFC. This method utilizes 12 bits from the "rand_a" bits
596 : * to store a 1/4096 (or 2^12) fraction of sub-millisecond precision.
597 : *
598 : * unix_ts_ms is a number of milliseconds since start of the UNIX epoch,
599 : * and sub_ms is a number of nanoseconds within millisecond. These values are
600 : * used for time-dependent bits of UUID.
601 : *
602 : * NB: all numbers here are unsigned, unix_ts_ms cannot be negative per RFC.
603 : */
604 : static pg_uuid_t *
605 53598 : generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms)
606 : {
607 53598 : pg_uuid_t *uuid = palloc(UUID_LEN);
608 : uint32 increased_clock_precision;
609 :
610 : /* Fill in time part */
611 53598 : uuid->data[0] = (unsigned char) (unix_ts_ms >> 40);
612 53598 : uuid->data[1] = (unsigned char) (unix_ts_ms >> 32);
613 53598 : uuid->data[2] = (unsigned char) (unix_ts_ms >> 24);
614 53598 : uuid->data[3] = (unsigned char) (unix_ts_ms >> 16);
615 53598 : uuid->data[4] = (unsigned char) (unix_ts_ms >> 8);
616 53598 : uuid->data[5] = (unsigned char) unix_ts_ms;
617 :
618 : /*
619 : * sub-millisecond timestamp fraction (SUBMS_BITS bits, not
620 : * SUBMS_MINIMAL_STEP_BITS)
621 : */
622 53598 : increased_clock_precision = (sub_ms * (1 << SUBMS_BITS)) / NS_PER_MS;
623 :
624 : /* Fill the increased clock precision to "rand_a" bits */
625 53598 : uuid->data[6] = (unsigned char) (increased_clock_precision >> 8);
626 53598 : uuid->data[7] = (unsigned char) (increased_clock_precision);
627 :
628 : /* fill everything after the increased clock precision with random bytes */
629 53598 : if (!pg_strong_random(&uuid->data[8], UUID_LEN - 8))
630 0 : ereport(ERROR,
631 : (errcode(ERRCODE_INTERNAL_ERROR),
632 : errmsg("could not generate random values")));
633 :
634 : #if SUBMS_MINIMAL_STEP_BITS == 10
635 :
636 : /*
637 : * On systems that have only 10 bits of sub-ms precision, 2 least
638 : * significant are dependent on other time-specific bits, and they do not
639 : * contribute to uniqueness. To make these bit random we mix in two bits
640 : * from CSPRNG. SUBMS_MINIMAL_STEP is chosen so that we still guarantee
641 : * monotonicity despite altering these bits.
642 : */
643 : uuid->data[7] = uuid->data[7] ^ (uuid->data[8] >> 6);
644 : #endif
645 :
646 : /*
647 : * Set magic numbers for a "version 7" (pseudorandom) UUID and variant,
648 : * see https://www.rfc-editor.org/rfc/rfc9562#name-version-field
649 : */
650 53598 : uuid_set_version(uuid, 7);
651 :
652 53598 : return uuid;
653 : }
654 :
655 : /*
656 : * Generate UUID version 7 with the current timestamp.
657 : */
658 : Datum
659 78 : uuidv7(PG_FUNCTION_ARGS)
660 : {
661 78 : int64 ns = get_real_time_ns_ascending();
662 78 : pg_uuid_t *uuid = generate_uuidv7(ns / NS_PER_MS, ns % NS_PER_MS);
663 :
664 78 : PG_RETURN_UUID_P(uuid);
665 : }
666 :
667 : /*
668 : * Similar to uuidv7() but with the timestamp adjusted by the given interval.
669 : */
670 : Datum
671 53520 : uuidv7_interval(PG_FUNCTION_ARGS)
672 : {
673 53520 : Interval *shift = PG_GETARG_INTERVAL_P(0);
674 : TimestampTz ts;
675 : pg_uuid_t *uuid;
676 53520 : int64 ns = get_real_time_ns_ascending();
677 : int64 us;
678 :
679 : /*
680 : * Shift the current timestamp by the given interval. To calculate time
681 : * shift correctly, we convert the UNIX epoch to TimestampTz and use
682 : * timestamptz_pl_interval(). This calculation is done with microsecond
683 : * precision.
684 : */
685 :
686 53520 : ts = (TimestampTz) (ns / NS_PER_US) -
687 : (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
688 :
689 : /* Compute time shift */
690 53520 : ts = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_pl_interval,
691 : TimestampTzGetDatum(ts),
692 : IntervalPGetDatum(shift)));
693 :
694 : /* Convert a TimestampTz value back to an UNIX epoch timestamp */
695 53520 : us = ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
696 :
697 : /* Generate an UUIDv7 */
698 53520 : uuid = generate_uuidv7(us / US_PER_MS, (us % US_PER_MS) * NS_PER_US + ns % NS_PER_US);
699 :
700 53520 : PG_RETURN_UUID_P(uuid);
701 : }
702 :
703 : /*
704 : * Start of a Gregorian epoch == date2j(1582,10,15)
705 : * We cast it to 64-bit because it's used in overflow-prone computations
706 : */
707 : #define GREGORIAN_EPOCH_JDATE INT64CONST(2299161)
708 :
709 : /*
710 : * Extract timestamp from UUID.
711 : *
712 : * Returns null if not RFC 9562 variant or not a version that has a timestamp.
713 : */
714 : Datum
715 53538 : uuid_extract_timestamp(PG_FUNCTION_ARGS)
716 : {
717 53538 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
718 : int version;
719 : uint64 tms;
720 : TimestampTz ts;
721 :
722 : /* check if RFC 9562 variant */
723 53538 : if ((uuid->data[8] & 0xc0) != 0x80)
724 6 : PG_RETURN_NULL();
725 :
726 53532 : version = uuid->data[6] >> 4;
727 :
728 53532 : if (version == 1)
729 : {
730 6 : tms = ((uint64) uuid->data[0] << 24)
731 6 : + ((uint64) uuid->data[1] << 16)
732 6 : + ((uint64) uuid->data[2] << 8)
733 6 : + ((uint64) uuid->data[3])
734 6 : + ((uint64) uuid->data[4] << 40)
735 6 : + ((uint64) uuid->data[5] << 32)
736 6 : + (((uint64) uuid->data[6] & 0xf) << 56)
737 6 : + ((uint64) uuid->data[7] << 48);
738 :
739 : /* convert 100-ns intervals to us, then adjust */
740 6 : ts = (TimestampTz) (tms / 10) -
741 : ((uint64) POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
742 6 : PG_RETURN_TIMESTAMPTZ(ts);
743 : }
744 :
745 53526 : if (version == 7)
746 : {
747 53520 : tms = (uuid->data[5])
748 53520 : + (((uint64) uuid->data[4]) << 8)
749 53520 : + (((uint64) uuid->data[3]) << 16)
750 53520 : + (((uint64) uuid->data[2]) << 24)
751 53520 : + (((uint64) uuid->data[1]) << 32)
752 53520 : + (((uint64) uuid->data[0]) << 40);
753 :
754 : /* convert ms to us, then adjust */
755 53520 : ts = (TimestampTz) (tms * NS_PER_US) -
756 : (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
757 :
758 53520 : PG_RETURN_TIMESTAMPTZ(ts);
759 : }
760 :
761 : /* not a timestamp-containing UUID version */
762 6 : PG_RETURN_NULL();
763 : }
764 :
765 : /*
766 : * Extract UUID version.
767 : *
768 : * Returns null if not RFC 9562 variant.
769 : */
770 : Datum
771 30 : uuid_extract_version(PG_FUNCTION_ARGS)
772 : {
773 30 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
774 : uint16 version;
775 :
776 : /* check if RFC 9562 variant */
777 30 : if ((uuid->data[8] & 0xc0) != 0x80)
778 6 : PG_RETURN_NULL();
779 :
780 24 : version = uuid->data[6] >> 4;
781 :
782 24 : PG_RETURN_UINT16(version);
783 : }
|