1 /*-------------------------------------------------------------------------
4 * Functions for the built-in type "uuid".
6 * Copyright (c) 2007-2022, PostgreSQL Global Development Group
9 * src/backend/utils/adt/uuid.c
11 *-------------------------------------------------------------------------
16 #include "common/hashfn.h"
17 #include "lib/hyperloglog.h"
18 #include "libpq/pqformat.h"
19 #include "port/pg_bswap.h"
20 #include "utils/builtins.h"
21 #include "utils/guc.h"
22 #include "utils/sortsupport.h"
23 #include "utils/uuid.h"
25 /* sortsupport for uuid */
28 int64 input_count
; /* number of non-null values seen */
29 bool estimating
; /* true if estimating cardinality */
31 hyperLogLogState abbr_card
; /* cardinality estimator */
32 } uuid_sortsupport_state
;
34 static void string_to_uuid(const char *source
, pg_uuid_t
*uuid
);
35 static int uuid_internal_cmp(const pg_uuid_t
*arg1
, const pg_uuid_t
*arg2
);
36 static int uuid_fast_cmp(Datum x
, Datum y
, SortSupport ssup
);
37 static int uuid_cmp_abbrev(Datum x
, Datum y
, SortSupport ssup
);
38 static bool uuid_abbrev_abort(int memtupcount
, SortSupport ssup
);
39 static Datum
uuid_abbrev_convert(Datum original
, SortSupport ssup
);
42 uuid_in(PG_FUNCTION_ARGS
)
44 char *uuid_str
= PG_GETARG_CSTRING(0);
47 uuid
= (pg_uuid_t
*) palloc(sizeof(*uuid
));
48 string_to_uuid(uuid_str
, uuid
);
49 PG_RETURN_UUID_P(uuid
);
53 uuid_out(PG_FUNCTION_ARGS
)
55 pg_uuid_t
*uuid
= PG_GETARG_UUID_P(0);
56 static const char hex_chars
[] = "0123456789abcdef";
61 for (i
= 0; i
< UUID_LEN
; i
++)
67 * We print uuid values as a string of 8, 4, 4, 4, and then 12
68 * hexadecimal characters, with each group is separated by a hyphen
69 * ("-"). Therefore, add the hyphens at the appropriate places here.
71 if (i
== 4 || i
== 6 || i
== 8 || i
== 10)
72 appendStringInfoChar(&buf
, '-');
74 hi
= uuid
->data
[i
] >> 4;
75 lo
= uuid
->data
[i
] & 0x0F;
77 appendStringInfoChar(&buf
, hex_chars
[hi
]);
78 appendStringInfoChar(&buf
, hex_chars
[lo
]);
81 PG_RETURN_CSTRING(buf
.data
);
85 * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
86 * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
87 * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
88 * digits, is the only one used for output.)
91 string_to_uuid(const char *source
, pg_uuid_t
*uuid
)
93 const char *src
= source
;
103 for (i
= 0; i
< UUID_LEN
; i
++)
107 if (src
[0] == '\0' || src
[1] == '\0')
109 memcpy(str_buf
, src
, 2);
110 if (!isxdigit((unsigned char) str_buf
[0]) ||
111 !isxdigit((unsigned char) str_buf
[1]))
115 uuid
->data
[i
] = (unsigned char) strtoul(str_buf
, NULL
, 16);
117 if (src
[0] == '-' && (i
% 2) == 1 && i
< UUID_LEN
- 1)
135 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION
),
136 errmsg("invalid input syntax for type %s: \"%s\"",
141 uuid_recv(PG_FUNCTION_ARGS
)
143 StringInfo buffer
= (StringInfo
) PG_GETARG_POINTER(0);
146 uuid
= (pg_uuid_t
*) palloc(UUID_LEN
);
147 memcpy(uuid
->data
, pq_getmsgbytes(buffer
, UUID_LEN
), UUID_LEN
);
148 PG_RETURN_POINTER(uuid
);
152 uuid_send(PG_FUNCTION_ARGS
)
154 pg_uuid_t
*uuid
= PG_GETARG_UUID_P(0);
155 StringInfoData buffer
;
157 pq_begintypsend(&buffer
);
158 pq_sendbytes(&buffer
, (char *) uuid
->data
, UUID_LEN
);
159 PG_RETURN_BYTEA_P(pq_endtypsend(&buffer
));
162 /* internal uuid compare function */
164 uuid_internal_cmp(const pg_uuid_t
*arg1
, const pg_uuid_t
*arg2
)
166 return memcmp(arg1
->data
, arg2
->data
, UUID_LEN
);
170 uuid_lt(PG_FUNCTION_ARGS
)
172 pg_uuid_t
*arg1
= PG_GETARG_UUID_P(0);
173 pg_uuid_t
*arg2
= PG_GETARG_UUID_P(1);
175 PG_RETURN_BOOL(uuid_internal_cmp(arg1
, arg2
) < 0);
179 uuid_le(PG_FUNCTION_ARGS
)
181 pg_uuid_t
*arg1
= PG_GETARG_UUID_P(0);
182 pg_uuid_t
*arg2
= PG_GETARG_UUID_P(1);
184 PG_RETURN_BOOL(uuid_internal_cmp(arg1
, arg2
) <= 0);
188 uuid_eq(PG_FUNCTION_ARGS
)
190 pg_uuid_t
*arg1
= PG_GETARG_UUID_P(0);
191 pg_uuid_t
*arg2
= PG_GETARG_UUID_P(1);
193 PG_RETURN_BOOL(uuid_internal_cmp(arg1
, arg2
) == 0);
197 uuid_ge(PG_FUNCTION_ARGS
)
199 pg_uuid_t
*arg1
= PG_GETARG_UUID_P(0);
200 pg_uuid_t
*arg2
= PG_GETARG_UUID_P(1);
202 PG_RETURN_BOOL(uuid_internal_cmp(arg1
, arg2
) >= 0);
206 uuid_gt(PG_FUNCTION_ARGS
)
208 pg_uuid_t
*arg1
= PG_GETARG_UUID_P(0);
209 pg_uuid_t
*arg2
= PG_GETARG_UUID_P(1);
211 PG_RETURN_BOOL(uuid_internal_cmp(arg1
, arg2
) > 0);
215 uuid_ne(PG_FUNCTION_ARGS
)
217 pg_uuid_t
*arg1
= PG_GETARG_UUID_P(0);
218 pg_uuid_t
*arg2
= PG_GETARG_UUID_P(1);
220 PG_RETURN_BOOL(uuid_internal_cmp(arg1
, arg2
) != 0);
223 /* handler for btree index operator */
225 uuid_cmp(PG_FUNCTION_ARGS
)
227 pg_uuid_t
*arg1
= PG_GETARG_UUID_P(0);
228 pg_uuid_t
*arg2
= PG_GETARG_UUID_P(1);
230 PG_RETURN_INT32(uuid_internal_cmp(arg1
, arg2
));
234 * Sort support strategy routine
237 uuid_sortsupport(PG_FUNCTION_ARGS
)
239 SortSupport ssup
= (SortSupport
) PG_GETARG_POINTER(0);
241 ssup
->comparator
= uuid_fast_cmp
;
242 ssup
->ssup_extra
= NULL
;
244 if (ssup
->abbreviate
)
246 uuid_sortsupport_state
*uss
;
247 MemoryContext oldcontext
;
249 oldcontext
= MemoryContextSwitchTo(ssup
->ssup_cxt
);
251 uss
= palloc(sizeof(uuid_sortsupport_state
));
252 uss
->input_count
= 0;
253 uss
->estimating
= true;
254 initHyperLogLog(&uss
->abbr_card
, 10);
256 ssup
->ssup_extra
= uss
;
258 ssup
->comparator
= uuid_cmp_abbrev
;
259 ssup
->abbrev_converter
= uuid_abbrev_convert
;
260 ssup
->abbrev_abort
= uuid_abbrev_abort
;
261 ssup
->abbrev_full_comparator
= uuid_fast_cmp
;
263 MemoryContextSwitchTo(oldcontext
);
270 * SortSupport comparison func
273 uuid_fast_cmp(Datum x
, Datum y
, SortSupport ssup
)
275 pg_uuid_t
*arg1
= DatumGetUUIDP(x
);
276 pg_uuid_t
*arg2
= DatumGetUUIDP(y
);
278 return uuid_internal_cmp(arg1
, arg2
);
282 * Abbreviated key comparison func
285 uuid_cmp_abbrev(Datum x
, Datum y
, SortSupport ssup
)
296 * Callback for estimating effectiveness of abbreviated key optimization.
298 * We pay no attention to the cardinality of the non-abbreviated data, because
299 * there is no equality fast-path within authoritative uuid comparator.
302 uuid_abbrev_abort(int memtupcount
, SortSupport ssup
)
304 uuid_sortsupport_state
*uss
= ssup
->ssup_extra
;
307 if (memtupcount
< 10000 || uss
->input_count
< 10000 || !uss
->estimating
)
310 abbr_card
= estimateHyperLogLog(&uss
->abbr_card
);
313 * If we have >100k distinct values, then even if we were sorting many
314 * billion rows we'd likely still break even, and the penalty of undoing
315 * that many rows of abbrevs would probably not be worth it. Stop even
316 * counting at that point.
318 if (abbr_card
> 100000.0)
323 "uuid_abbrev: estimation ends at cardinality %f"
324 " after " INT64_FORMAT
" values (%d rows)",
325 abbr_card
, uss
->input_count
, memtupcount
);
327 uss
->estimating
= false;
332 * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
333 * fudge factor allows us to abort earlier on genuinely pathological data
334 * where we've had exactly one abbreviated value in the first 2k
337 if (abbr_card
< uss
->input_count
/ 2000.0 + 0.5)
342 "uuid_abbrev: aborting abbreviation at cardinality %f"
343 " below threshold %f after " INT64_FORMAT
" values (%d rows)",
344 abbr_card
, uss
->input_count
/ 2000.0 + 0.5, uss
->input_count
,
353 "uuid_abbrev: cardinality %f after " INT64_FORMAT
354 " values (%d rows)", abbr_card
, uss
->input_count
, memtupcount
);
361 * Conversion routine for sortsupport. Converts original uuid representation
362 * to abbreviated key representation. Our encoding strategy is simple -- pack
363 * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
364 * machines, the bytes are stored in reverse order), and treat it as an
368 uuid_abbrev_convert(Datum original
, SortSupport ssup
)
370 uuid_sortsupport_state
*uss
= ssup
->ssup_extra
;
371 pg_uuid_t
*authoritative
= DatumGetUUIDP(original
);
374 memcpy(&res
, authoritative
->data
, sizeof(Datum
));
375 uss
->input_count
+= 1;
381 #if SIZEOF_DATUM == 8
382 tmp
= (uint32
) res
^ (uint32
) ((uint64
) res
>> 32);
383 #else /* SIZEOF_DATUM != 8 */
387 addHyperLogLog(&uss
->abbr_card
, DatumGetUInt32(hash_uint32(tmp
)));
391 * Byteswap on little-endian machines.
393 * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
394 * comparator) works correctly on all platforms. If we didn't do this,
395 * the comparator would have to call memcmp() with a pair of pointers to
396 * the first byte of each abbreviated key, which is slower.
398 res
= DatumBigEndianToNative(res
);
403 /* hash index support */
405 uuid_hash(PG_FUNCTION_ARGS
)
407 pg_uuid_t
*key
= PG_GETARG_UUID_P(0);
409 return hash_any(key
->data
, UUID_LEN
);
413 uuid_hash_extended(PG_FUNCTION_ARGS
)
415 pg_uuid_t
*key
= PG_GETARG_UUID_P(0);
417 return hash_any_extended(key
->data
, UUID_LEN
, PG_GETARG_INT64(1));
421 gen_random_uuid(PG_FUNCTION_ARGS
)
423 pg_uuid_t
*uuid
= palloc(UUID_LEN
);
425 if (!pg_strong_random(uuid
, UUID_LEN
))
427 (errcode(ERRCODE_INTERNAL_ERROR
),
428 errmsg("could not generate random values")));
431 * Set magic numbers for a "version 4" (pseudorandom) UUID, see
432 * http://tools.ietf.org/html/rfc4122#section-4.4
434 uuid
->data
[6] = (uuid
->data
[6] & 0x0f) | 0x40; /* time_hi_and_version */
435 uuid
->data
[8] = (uuid
->data
[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
437 PG_RETURN_UUID_P(uuid
);