Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_ndistinct.c
4 : * pg_ndistinct data type support.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/pg_ndistinct.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "common/int.h"
18 : #include "common/jsonapi.h"
19 : #include "lib/stringinfo.h"
20 : #include "mb/pg_wchar.h"
21 : #include "nodes/miscnodes.h"
22 : #include "statistics/extended_stats_internal.h"
23 : #include "statistics/statistics_format.h"
24 : #include "utils/builtins.h"
25 : #include "utils/fmgrprotos.h"
26 :
27 : /* Parsing state data */
28 : typedef enum
29 : {
30 : NDIST_EXPECT_START = 0,
31 : NDIST_EXPECT_ITEM,
32 : NDIST_EXPECT_KEY,
33 : NDIST_EXPECT_ATTNUM_LIST,
34 : NDIST_EXPECT_ATTNUM,
35 : NDIST_EXPECT_NDISTINCT,
36 : NDIST_EXPECT_COMPLETE,
37 : } NDistinctSemanticState;
38 :
39 : typedef struct
40 : {
41 : const char *str;
42 : NDistinctSemanticState state;
43 :
44 : List *distinct_items; /* Accumulated complete MVNDistinctItems */
45 : Node *escontext;
46 :
47 : bool found_attributes; /* Item has "attributes" key */
48 : bool found_ndistinct; /* Item has "ndistinct" key */
49 : List *attnum_list; /* Accumulated attribute numbers */
50 : int32 ndistinct;
51 : } NDistinctParseState;
52 :
53 : /*
54 : * Invoked at the start of each MVNDistinctItem.
55 : *
56 : * The entire JSON document should be one array of MVNDistinctItem objects.
57 : * If we are anywhere else in the document, it is an error.
58 : */
59 : static JsonParseErrorType
60 288 : ndistinct_object_start(void *state)
61 : {
62 288 : NDistinctParseState *parse = state;
63 :
64 288 : switch (parse->state)
65 : {
66 258 : case NDIST_EXPECT_ITEM:
67 : /* Now we expect to see attributes/ndistinct keys */
68 258 : parse->state = NDIST_EXPECT_KEY;
69 258 : return JSON_SUCCESS;
70 :
71 12 : case NDIST_EXPECT_START:
72 : /* pg_ndistinct must begin with a '[' */
73 12 : errsave(parse->escontext,
74 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
75 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
76 : errdetail("Initial element must be an array."));
77 6 : break;
78 :
79 0 : case NDIST_EXPECT_KEY:
80 : /* In an object, expecting key */
81 0 : errsave(parse->escontext,
82 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
83 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
84 : errdetail("A key was expected."));
85 0 : break;
86 :
87 6 : case NDIST_EXPECT_ATTNUM_LIST:
88 : /* Just followed an "attributes" key */
89 6 : errsave(parse->escontext,
90 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
91 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
92 : errdetail("Value of \"%s\" must be an array of attribute numbers.",
93 : PG_NDISTINCT_KEY_ATTRIBUTES));
94 3 : break;
95 :
96 6 : case NDIST_EXPECT_ATTNUM:
97 : /* In an attribute number list, expect only scalar integers */
98 6 : errsave(parse->escontext,
99 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
100 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
101 : errdetail("Attribute lists can only contain attribute numbers."));
102 3 : break;
103 :
104 6 : case NDIST_EXPECT_NDISTINCT:
105 : /* Just followed an "ndistinct" key */
106 6 : errsave(parse->escontext,
107 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
108 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
109 : errdetail("Value of \"%s\" must be an integer.",
110 : PG_NDISTINCT_KEY_NDISTINCT));
111 3 : break;
112 :
113 0 : default:
114 0 : elog(ERROR,
115 : "object start of \"%s\" found in unexpected parse state: %d.",
116 : "pg_ndistinct", (int) parse->state);
117 : break;
118 : }
119 :
120 15 : return JSON_SEM_ACTION_FAILED;
121 : }
122 :
123 : /*
124 : * Invoked at the end of an object.
125 : *
126 : * Check to ensure that it was a complete MVNDistinctItem
127 : */
128 : static JsonParseErrorType
129 108 : ndistinct_object_end(void *state)
130 : {
131 108 : NDistinctParseState *parse = state;
132 :
133 108 : int natts = 0;
134 :
135 : MVNDistinctItem *item;
136 :
137 108 : if (parse->state != NDIST_EXPECT_KEY)
138 0 : elog(ERROR,
139 : "object end of \"%s\" found in unexpected parse state: %d.",
140 : "pg_ndistinct", (int) parse->state);
141 :
142 108 : if (!parse->found_attributes)
143 : {
144 6 : errsave(parse->escontext,
145 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
146 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
147 : errdetail("Item must contain \"%s\" key.",
148 : PG_NDISTINCT_KEY_ATTRIBUTES));
149 3 : return JSON_SEM_ACTION_FAILED;
150 : }
151 :
152 102 : if (!parse->found_ndistinct)
153 : {
154 6 : errsave(parse->escontext,
155 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
156 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
157 : errdetail("Item must contain \"%s\" key.",
158 : PG_NDISTINCT_KEY_NDISTINCT));
159 3 : return JSON_SEM_ACTION_FAILED;
160 : }
161 :
162 : /*
163 : * We need at least two attribute numbers for a ndistinct item, anything
164 : * less is malformed.
165 : */
166 96 : natts = list_length(parse->attnum_list);
167 96 : if ((natts < 2) || (natts > STATS_MAX_DIMENSIONS))
168 : {
169 12 : errsave(parse->escontext,
170 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
171 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
172 : errdetail("The \"%s\" key must contain an array of at least %d and no more than %d attributes.",
173 : PG_NDISTINCT_KEY_ATTRIBUTES, 2, STATS_MAX_DIMENSIONS));
174 6 : return JSON_SEM_ACTION_FAILED;
175 : }
176 :
177 : /* Create the MVNDistinctItem */
178 84 : item = palloc_object(MVNDistinctItem);
179 84 : item->nattributes = natts;
180 84 : item->attributes = palloc0(natts * sizeof(AttrNumber));
181 84 : item->ndistinct = (double) parse->ndistinct;
182 :
183 279 : for (int i = 0; i < natts; i++)
184 195 : item->attributes[i] = (AttrNumber) list_nth_int(parse->attnum_list, i);
185 :
186 84 : parse->distinct_items = lappend(parse->distinct_items, (void *) item);
187 :
188 : /* reset item state vars */
189 84 : list_free(parse->attnum_list);
190 84 : parse->attnum_list = NIL;
191 84 : parse->ndistinct = 0;
192 84 : parse->found_attributes = false;
193 84 : parse->found_ndistinct = false;
194 :
195 : /* Now we are looking for the next MVNDistinctItem */
196 84 : parse->state = NDIST_EXPECT_ITEM;
197 84 : return JSON_SUCCESS;
198 : }
199 :
200 :
201 : /*
202 : * Invoked at the start of an array.
203 : *
204 : * ndistinct input format has two types of arrays, the outer MVNDistinctItem
205 : * array and the attribute number array within each MVNDistinctItem.
206 : */
207 : static JsonParseErrorType
208 459 : ndistinct_array_start(void *state)
209 : {
210 459 : NDistinctParseState *parse = state;
211 :
212 459 : switch (parse->state)
213 : {
214 213 : case NDIST_EXPECT_ATTNUM_LIST:
215 213 : parse->state = NDIST_EXPECT_ATTNUM;
216 213 : break;
217 :
218 228 : case NDIST_EXPECT_START:
219 228 : parse->state = NDIST_EXPECT_ITEM;
220 228 : break;
221 :
222 18 : default:
223 18 : errsave(parse->escontext,
224 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
225 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
226 : errdetail("Array has been found at an unexpected location."));
227 9 : return JSON_SEM_ACTION_FAILED;
228 : }
229 :
230 441 : return JSON_SUCCESS;
231 : }
232 :
233 :
234 : /*
235 : * Invoked at the end of an array.
236 : *
237 : * Arrays can never be empty.
238 : */
239 : static JsonParseErrorType
240 222 : ndistinct_array_end(void *state)
241 : {
242 222 : NDistinctParseState *parse = state;
243 :
244 222 : switch (parse->state)
245 : {
246 174 : case NDIST_EXPECT_ATTNUM:
247 174 : if (list_length(parse->attnum_list) > 0)
248 : {
249 : /*
250 : * The attribute number list is complete, look for more
251 : * MVNDistinctItem keys.
252 : */
253 168 : parse->state = NDIST_EXPECT_KEY;
254 168 : return JSON_SUCCESS;
255 : }
256 :
257 6 : errsave(parse->escontext,
258 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
259 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
260 : errdetail("The \"%s\" key must be a non-empty array.",
261 : PG_NDISTINCT_KEY_ATTRIBUTES));
262 3 : break;
263 :
264 48 : case NDIST_EXPECT_ITEM:
265 48 : if (list_length(parse->distinct_items) > 0)
266 : {
267 : /* Item list is complete, we are done. */
268 42 : parse->state = NDIST_EXPECT_COMPLETE;
269 42 : return JSON_SUCCESS;
270 : }
271 :
272 6 : errsave(parse->escontext,
273 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
274 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
275 : errdetail("Item array cannot be empty."));
276 3 : break;
277 :
278 0 : default:
279 :
280 : /*
281 : * This can only happen if a case was missed in
282 : * ndistinct_array_start().
283 : */
284 0 : elog(ERROR,
285 : "array end of \"%s\" found in unexpected parse state: %d.",
286 : "pg_ndistinct", (int) parse->state);
287 : break;
288 : }
289 :
290 6 : return JSON_SEM_ACTION_FAILED;
291 : }
292 :
293 : /*
294 : * Invoked at the start of a key/value field.
295 : *
296 : * The valid keys for the MVNDistinctItem object are:
297 : * - attributes
298 : * - ndistinct
299 : */
300 : static JsonParseErrorType
301 408 : ndistinct_object_field_start(void *state, char *fname, bool isnull)
302 : {
303 408 : NDistinctParseState *parse = state;
304 :
305 408 : if (strcmp(fname, PG_NDISTINCT_KEY_ATTRIBUTES) == 0)
306 : {
307 246 : if (parse->found_attributes)
308 : {
309 6 : errsave(parse->escontext,
310 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
311 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
312 : errdetail("Multiple \"%s\" keys are not allowed.",
313 : PG_NDISTINCT_KEY_ATTRIBUTES));
314 3 : return JSON_SEM_ACTION_FAILED;
315 : }
316 240 : parse->found_attributes = true;
317 240 : parse->state = NDIST_EXPECT_ATTNUM_LIST;
318 240 : return JSON_SUCCESS;
319 : }
320 :
321 162 : if (strcmp(fname, PG_NDISTINCT_KEY_NDISTINCT) == 0)
322 : {
323 150 : if (parse->found_ndistinct)
324 : {
325 6 : errsave(parse->escontext,
326 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
327 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
328 : errdetail("Multiple \"%s\" keys are not allowed.",
329 : PG_NDISTINCT_KEY_NDISTINCT));
330 3 : return JSON_SEM_ACTION_FAILED;
331 : }
332 144 : parse->found_ndistinct = true;
333 144 : parse->state = NDIST_EXPECT_NDISTINCT;
334 144 : return JSON_SUCCESS;
335 : }
336 :
337 12 : errsave(parse->escontext,
338 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
339 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
340 : errdetail("Only allowed keys are \"%s\" and \"%s\".",
341 : PG_NDISTINCT_KEY_ATTRIBUTES,
342 : PG_NDISTINCT_KEY_NDISTINCT));
343 6 : return JSON_SEM_ACTION_FAILED;
344 : }
345 :
346 : /*
347 : * Invoked at the start of an array element.
348 : *
349 : * The overall structure of the datatype is an array, but there are also
350 : * arrays as the value of every attributes key.
351 : */
352 : static JsonParseErrorType
353 726 : ndistinct_array_element_start(void *state, bool isnull)
354 : {
355 726 : const NDistinctParseState *parse = state;
356 :
357 726 : switch (parse->state)
358 : {
359 462 : case NDIST_EXPECT_ATTNUM:
360 462 : if (!isnull)
361 456 : return JSON_SUCCESS;
362 :
363 6 : errsave(parse->escontext,
364 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
365 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
366 : errdetail("Attribute number array cannot be null."));
367 3 : break;
368 :
369 264 : case NDIST_EXPECT_ITEM:
370 264 : if (!isnull)
371 258 : return JSON_SUCCESS;
372 :
373 6 : errsave(parse->escontext,
374 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
375 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
376 : errdetail("Item list elements cannot be null."));
377 :
378 3 : break;
379 :
380 0 : default:
381 0 : elog(ERROR,
382 : "array element start of \"%s\" found in unexpected parse state: %d.",
383 : "pg_ndistinct", (int) parse->state);
384 : break;
385 : }
386 :
387 6 : return JSON_SEM_ACTION_FAILED;
388 : }
389 :
390 : /*
391 : * Test for valid subsequent attribute number.
392 : *
393 : * If the previous value is positive, then current value must either be
394 : * greater than the previous value, or negative.
395 : *
396 : * If the previous value is negative, then the value must be less than
397 : * the previous value.
398 : *
399 : * Duplicate values are obviously not allowed, but that is already covered
400 : * by the rules listed above.
401 : */
402 : static bool
403 237 : valid_subsequent_attnum(AttrNumber prev, AttrNumber cur)
404 : {
405 : Assert(prev != 0);
406 :
407 237 : if (prev > 0)
408 228 : return ((cur > prev) || (cur < 0));
409 :
410 9 : return (cur < prev);
411 : }
412 :
413 : /*
414 : * Handle scalar events from the ndistinct input parser.
415 : *
416 : * Override integer parse error messages and replace them with errors
417 : * specific to the context.
418 : */
419 : static JsonParseErrorType
420 597 : ndistinct_scalar(void *state, char *token, JsonTokenType tokentype)
421 : {
422 597 : NDistinctParseState *parse = state;
423 : AttrNumber attnum;
424 597 : ErrorSaveContext escontext = {T_ErrorSaveContext};
425 :
426 597 : switch (parse->state)
427 : {
428 450 : case NDIST_EXPECT_ATTNUM:
429 450 : attnum = pg_strtoint16_safe(token, (Node *) &escontext);
430 :
431 450 : if (escontext.error_occurred)
432 : {
433 6 : errsave(parse->escontext,
434 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
435 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
436 : errdetail("Key \"%s\" has an incorrect value.", PG_NDISTINCT_KEY_ATTRIBUTES));
437 3 : return JSON_SEM_ACTION_FAILED;
438 : }
439 :
440 : /*
441 : * The attribute number cannot be zero a negative number beyond
442 : * the number of the possible expressions.
443 : */
444 444 : if (attnum == 0 || attnum < (0 - STATS_MAX_DIMENSIONS))
445 : {
446 9 : errsave(parse->escontext,
447 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
448 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
449 : errdetail("Invalid \"%s\" element has been found: %d.",
450 : PG_NDISTINCT_KEY_ATTRIBUTES, attnum));
451 3 : return JSON_SEM_ACTION_FAILED;
452 : }
453 :
454 435 : if (list_length(parse->attnum_list) > 0)
455 : {
456 237 : const AttrNumber prev = llast_int(parse->attnum_list);
457 :
458 237 : if (!valid_subsequent_attnum(prev, attnum))
459 : {
460 6 : errsave(parse->escontext,
461 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
462 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
463 : errdetail("Invalid \"%s\" element has been found: %d cannot follow %d.",
464 : PG_NDISTINCT_KEY_ATTRIBUTES, attnum, prev));
465 3 : return JSON_SEM_ACTION_FAILED;
466 : }
467 : }
468 :
469 429 : parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum);
470 429 : return JSON_SUCCESS;
471 :
472 120 : case NDIST_EXPECT_NDISTINCT:
473 :
474 : /*
475 : * While the structure dictates that ndistinct is a double
476 : * precision floating point, it has always been an integer in the
477 : * output generated. Therefore, we parse it as an integer here.
478 : */
479 120 : parse->ndistinct = pg_strtoint32_safe(token, (Node *) &escontext);
480 :
481 120 : if (!escontext.error_occurred)
482 : {
483 108 : parse->state = NDIST_EXPECT_KEY;
484 108 : return JSON_SUCCESS;
485 : }
486 :
487 12 : errsave(parse->escontext,
488 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
489 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
490 : errdetail("Key \"%s\" has an incorrect value.",
491 : PG_NDISTINCT_KEY_NDISTINCT));
492 6 : break;
493 :
494 27 : default:
495 27 : errsave(parse->escontext,
496 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
497 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
498 : errdetail("Unexpected scalar has been found."));
499 15 : break;
500 : }
501 :
502 21 : return JSON_SEM_ACTION_FAILED;
503 : }
504 :
505 : /*
506 : * Compare the attribute arrays of two MVNDistinctItem values,
507 : * looking for duplicate sets. Return true if a duplicate set is found.
508 : *
509 : * The arrays are required to be in canonical order (all positive numbers
510 : * in ascending order first, followed by all negative numbers in descending
511 : * order) so it's safe to compare the attrnums in order, stopping at the
512 : * first difference.
513 : */
514 : static bool
515 72 : item_attributes_eq(const MVNDistinctItem *a, const MVNDistinctItem *b)
516 : {
517 72 : if (a->nattributes != b->nattributes)
518 48 : return false;
519 :
520 45 : for (int i = 0; i < a->nattributes; i++)
521 : {
522 39 : if (a->attributes[i] != b->attributes[i])
523 18 : return false;
524 : }
525 :
526 6 : return true;
527 : }
528 :
529 : /*
530 : * Ensure that an attribute number appears as one of the attribute numbers
531 : * in a MVNDistinctItem.
532 : */
533 : static bool
534 42 : item_has_attnum(const MVNDistinctItem *item, AttrNumber attnum)
535 : {
536 105 : for (int i = 0; i < item->nattributes; i++)
537 : {
538 99 : if (attnum == item->attributes[i])
539 36 : return true;
540 : }
541 6 : return false;
542 : }
543 :
544 : /*
545 : * Ensure that the attributes in MVNDistinctItem A are a subset of the
546 : * reference MVNDistinctItem B.
547 : */
548 : static bool
549 24 : item_is_attnum_subset(const MVNDistinctItem *item,
550 : const MVNDistinctItem *refitem)
551 : {
552 60 : for (int i = 0; i < item->nattributes; i++)
553 : {
554 42 : if (!item_has_attnum(refitem, item->attributes[i]))
555 6 : return false;
556 : }
557 18 : return true;
558 : }
559 :
560 : /*
561 : * Generate a string representing an array of attribute numbers.
562 : *
563 : * Freeing the allocated string is the responsibility of the caller.
564 : */
565 : static char *
566 18 : item_attnum_list(const MVNDistinctItem *item)
567 : {
568 : StringInfoData str;
569 :
570 18 : initStringInfo(&str);
571 :
572 18 : appendStringInfo(&str, "%d", item->attributes[0]);
573 :
574 48 : for (int i = 1; i < item->nattributes; i++)
575 30 : appendStringInfo(&str, ", %d", item->attributes[i]);
576 :
577 18 : return str.data;
578 : }
579 :
580 : /*
581 : * Attempt to build and serialize the MVNDistinct object.
582 : *
583 : * This can only be executed after the completion of the JSON parsing.
584 : *
585 : * In the event of an error, set the error context and return NULL.
586 : */
587 : static bytea *
588 42 : build_mvndistinct(NDistinctParseState *parse, char *str)
589 : {
590 : MVNDistinct *ndistinct;
591 42 : int nitems = list_length(parse->distinct_items);
592 : bytea *bytes;
593 42 : int item_most_attrs = 0;
594 42 : int item_most_attrs_idx = 0;
595 :
596 42 : switch (parse->state)
597 : {
598 42 : case NDIST_EXPECT_COMPLETE:
599 :
600 : /*
601 : * Parsing has ended correctly and we should have a list of items.
602 : * If we don't, something has been done wrong in one of the
603 : * earlier parsing steps.
604 : */
605 42 : if (nitems == 0)
606 0 : elog(ERROR,
607 : "cannot have empty item list after parsing success.");
608 42 : break;
609 :
610 0 : case NDIST_EXPECT_START:
611 : /* blank */
612 0 : errsave(parse->escontext,
613 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
614 : errmsg("malformed pg_ndistinct: \"%s\"", str),
615 : errdetail("Value cannot be empty."));
616 0 : return NULL;
617 :
618 0 : default:
619 : /* Unexpected end-state. */
620 0 : errsave(parse->escontext,
621 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
622 : errmsg("malformed pg_ndistinct: \"%s\"", str),
623 : errdetail("Unexpected end state has been found: %d.", parse->state));
624 0 : return NULL;
625 : }
626 :
627 42 : ndistinct = palloc(offsetof(MVNDistinct, items) +
628 42 : nitems * sizeof(MVNDistinctItem));
629 :
630 42 : ndistinct->magic = STATS_NDISTINCT_MAGIC;
631 42 : ndistinct->type = STATS_NDISTINCT_TYPE_BASIC;
632 42 : ndistinct->nitems = nitems;
633 :
634 120 : for (int i = 0; i < nitems; i++)
635 : {
636 84 : MVNDistinctItem *item = list_nth(parse->distinct_items, i);
637 :
638 : /*
639 : * Ensure that this item does not duplicate the attributes of any
640 : * pre-existing item.
641 : */
642 150 : for (int j = 0; j < i; j++)
643 : {
644 72 : if (item_attributes_eq(item, &ndistinct->items[j]))
645 : {
646 6 : char *s = item_attnum_list(item);
647 :
648 6 : errsave(parse->escontext,
649 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
650 : errmsg("malformed pg_ndistinct: \"%s\"", str),
651 : errdetail("Duplicated \"%s\" array has been found: [%s].",
652 : PG_NDISTINCT_KEY_ATTRIBUTES, s));
653 3 : pfree(s);
654 3 : return NULL;
655 : }
656 : }
657 :
658 78 : ndistinct->items[i].ndistinct = item->ndistinct;
659 78 : ndistinct->items[i].nattributes = item->nattributes;
660 :
661 : /*
662 : * This transfers free-ing responsibility from the distinct_items list
663 : * to the ndistinct object.
664 : */
665 78 : ndistinct->items[i].attributes = item->attributes;
666 :
667 : /*
668 : * Keep track of the first longest attribute list. All other attribute
669 : * lists must be a subset of this list.
670 : */
671 78 : if (item->nattributes > item_most_attrs)
672 : {
673 63 : item_most_attrs = item->nattributes;
674 63 : item_most_attrs_idx = i;
675 : }
676 : }
677 :
678 : /*
679 : * Verify that all the sets of attribute numbers are a proper subset of
680 : * the longest set recorded. This acts as an extra sanity check based on
681 : * the input given. Note that this still needs to be cross-checked with
682 : * the extended statistics objects this would be assigned to, but it
683 : * provides one extra layer of protection.
684 : */
685 84 : for (int i = 0; i < nitems; i++)
686 : {
687 54 : if (i == item_most_attrs_idx)
688 30 : continue;
689 :
690 24 : if (!item_is_attnum_subset(&ndistinct->items[i],
691 24 : &ndistinct->items[item_most_attrs_idx]))
692 : {
693 6 : const MVNDistinctItem *item = &ndistinct->items[i];
694 6 : const MVNDistinctItem *refitem = &ndistinct->items[item_most_attrs_idx];
695 6 : char *item_list = item_attnum_list(item);
696 6 : char *refitem_list = item_attnum_list(refitem);
697 :
698 6 : errsave(parse->escontext,
699 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
700 : errmsg("malformed pg_ndistinct: \"%s\"", str),
701 : errdetail("\"%s\" array [%s] must be a subset of array [%s].",
702 : PG_NDISTINCT_KEY_ATTRIBUTES,
703 : item_list, refitem_list));
704 3 : pfree(item_list);
705 3 : pfree(refitem_list);
706 3 : return NULL;
707 : }
708 : }
709 :
710 30 : bytes = statext_ndistinct_serialize(ndistinct);
711 :
712 : /*
713 : * Free the attribute lists, before the ndistinct itself.
714 : */
715 78 : for (int i = 0; i < nitems; i++)
716 48 : pfree(ndistinct->items[i].attributes);
717 30 : pfree(ndistinct);
718 :
719 30 : return bytes;
720 : }
721 :
722 : /*
723 : * pg_ndistinct_in
724 : * input routine for type pg_ndistinct.
725 : */
726 : Datum
727 246 : pg_ndistinct_in(PG_FUNCTION_ARGS)
728 : {
729 246 : char *str = PG_GETARG_CSTRING(0);
730 : NDistinctParseState parse_state;
731 : JsonParseErrorType result;
732 : JsonLexContext *lex;
733 : JsonSemAction sem_action;
734 246 : bytea *bytes = NULL;
735 :
736 : /* initialize semantic state */
737 246 : parse_state.str = str;
738 246 : parse_state.state = NDIST_EXPECT_START;
739 246 : parse_state.distinct_items = NIL;
740 246 : parse_state.escontext = fcinfo->context;
741 246 : parse_state.found_attributes = false;
742 246 : parse_state.found_ndistinct = false;
743 246 : parse_state.attnum_list = NIL;
744 246 : parse_state.ndistinct = 0;
745 :
746 : /* set callbacks */
747 246 : sem_action.semstate = (void *) &parse_state;
748 246 : sem_action.object_start = ndistinct_object_start;
749 246 : sem_action.object_end = ndistinct_object_end;
750 246 : sem_action.array_start = ndistinct_array_start;
751 246 : sem_action.array_end = ndistinct_array_end;
752 246 : sem_action.object_field_start = ndistinct_object_field_start;
753 246 : sem_action.object_field_end = NULL;
754 246 : sem_action.array_element_start = ndistinct_array_element_start;
755 246 : sem_action.array_element_end = NULL;
756 246 : sem_action.scalar = ndistinct_scalar;
757 :
758 246 : lex = makeJsonLexContextCstringLen(NULL, str, strlen(str),
759 : PG_UTF8, true);
760 246 : result = pg_parse_json(lex, &sem_action);
761 156 : freeJsonLexContext(lex);
762 :
763 156 : if (result == JSON_SUCCESS)
764 42 : bytes = build_mvndistinct(&parse_state, str);
765 :
766 150 : list_free(parse_state.attnum_list);
767 150 : list_free_deep(parse_state.distinct_items);
768 :
769 150 : if (bytes)
770 30 : PG_RETURN_BYTEA_P(bytes);
771 :
772 : /*
773 : * If escontext already set, just use that. Anything else is a generic
774 : * JSON parse error.
775 : */
776 120 : if (!SOFT_ERROR_OCCURRED(parse_state.escontext))
777 24 : errsave(parse_state.escontext,
778 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
779 : errmsg("malformed pg_ndistinct: \"%s\"", str),
780 : errdetail("Input data must be valid JSON."));
781 :
782 108 : PG_RETURN_NULL();
783 : }
784 :
785 : /*
786 : * pg_ndistinct_out
787 : * output routine for type pg_ndistinct
788 : *
789 : * Produces a human-readable representation of the value.
790 : */
791 : Datum
792 63 : pg_ndistinct_out(PG_FUNCTION_ARGS)
793 : {
794 63 : bytea *data = PG_GETARG_BYTEA_PP(0);
795 63 : MVNDistinct *ndist = statext_ndistinct_deserialize(data);
796 : int i;
797 : StringInfoData str;
798 :
799 63 : initStringInfo(&str);
800 63 : appendStringInfoChar(&str, '[');
801 :
802 180 : for (i = 0; i < ndist->nitems; i++)
803 : {
804 117 : MVNDistinctItem item = ndist->items[i];
805 :
806 117 : if (i > 0)
807 54 : appendStringInfoString(&str, ", ");
808 :
809 117 : if (item.nattributes <= 0)
810 0 : elog(ERROR, "invalid zero-length attribute array in MVNDistinct");
811 :
812 117 : appendStringInfo(&str, "{\"" PG_NDISTINCT_KEY_ATTRIBUTES "\": [%d",
813 117 : item.attributes[0]);
814 :
815 255 : for (int j = 1; j < item.nattributes; j++)
816 138 : appendStringInfo(&str, ", %d", item.attributes[j]);
817 :
818 117 : appendStringInfo(&str, "], \"" PG_NDISTINCT_KEY_NDISTINCT "\": %d}",
819 117 : (int) item.ndistinct);
820 : }
821 :
822 63 : appendStringInfoChar(&str, ']');
823 :
824 63 : PG_RETURN_CSTRING(str.data);
825 : }
826 :
827 : /*
828 : * pg_ndistinct_recv
829 : * binary input routine for type pg_ndistinct
830 : */
831 : Datum
832 0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
833 : {
834 0 : ereport(ERROR,
835 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
836 : errmsg("cannot accept a value of type %s", "pg_ndistinct")));
837 :
838 : PG_RETURN_VOID(); /* keep compiler quiet */
839 : }
840 :
841 : /*
842 : * pg_ndistinct_send
843 : * binary output routine for type pg_ndistinct
844 : *
845 : * n-distinct is serialized into a bytea value, so let's send that.
846 : */
847 : Datum
848 0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
849 : {
850 0 : return byteasend(fcinfo);
851 : }
|