Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_ndistinct.c
4 : * pg_ndistinct data type support.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/pg_ndistinct.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "common/int.h"
18 : #include "common/jsonapi.h"
19 : #include "lib/stringinfo.h"
20 : #include "mb/pg_wchar.h"
21 : #include "nodes/miscnodes.h"
22 : #include "statistics/extended_stats_internal.h"
23 : #include "statistics/statistics_format.h"
24 : #include "utils/builtins.h"
25 : #include "utils/fmgrprotos.h"
26 :
27 : /* Parsing state data */
28 : typedef enum
29 : {
30 : NDIST_EXPECT_START = 0,
31 : NDIST_EXPECT_ITEM,
32 : NDIST_EXPECT_KEY,
33 : NDIST_EXPECT_ATTNUM_LIST,
34 : NDIST_EXPECT_ATTNUM,
35 : NDIST_EXPECT_NDISTINCT,
36 : NDIST_EXPECT_COMPLETE,
37 : } NDistinctSemanticState;
38 :
39 : typedef struct
40 : {
41 : const char *str;
42 : NDistinctSemanticState state;
43 :
44 : List *distinct_items; /* Accumulated complete MVNDistinctItems */
45 : Node *escontext;
46 :
47 : bool found_attributes; /* Item has "attributes" key */
48 : bool found_ndistinct; /* Item has "ndistinct" key */
49 : List *attnum_list; /* Accumulated attribute numbers */
50 : int32 ndistinct;
51 : } NDistinctParseState;
52 :
53 : /*
54 : * Invoked at the start of each MVNDistinctItem.
55 : *
56 : * The entire JSON document should be one array of MVNDistinctItem objects.
57 : * If we are anywhere else in the document, it is an error.
58 : */
59 : static JsonParseErrorType
60 516 : ndistinct_object_start(void *state)
61 : {
62 516 : NDistinctParseState *parse = state;
63 :
64 516 : switch (parse->state)
65 : {
66 456 : case NDIST_EXPECT_ITEM:
67 : /* Now we expect to see attributes/ndistinct keys */
68 456 : parse->state = NDIST_EXPECT_KEY;
69 456 : return JSON_SUCCESS;
70 :
71 24 : case NDIST_EXPECT_START:
72 : /* pg_ndistinct must begin with a '[' */
73 24 : errsave(parse->escontext,
74 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
75 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
76 : errdetail("Initial element must be an array."));
77 12 : break;
78 :
79 0 : case NDIST_EXPECT_KEY:
80 : /* In an object, expecting key */
81 0 : errsave(parse->escontext,
82 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
83 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
84 : errdetail("A key was expected."));
85 0 : break;
86 :
87 12 : case NDIST_EXPECT_ATTNUM_LIST:
88 : /* Just followed an "attributes" key */
89 12 : errsave(parse->escontext,
90 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
91 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
92 : errdetail("Value of \"%s\" must be an array of attribute numbers.",
93 : PG_NDISTINCT_KEY_ATTRIBUTES));
94 6 : break;
95 :
96 12 : case NDIST_EXPECT_ATTNUM:
97 : /* In an attribute number list, expect only scalar integers */
98 12 : errsave(parse->escontext,
99 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
100 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
101 : errdetail("Attribute lists can only contain attribute numbers."));
102 6 : break;
103 :
104 12 : case NDIST_EXPECT_NDISTINCT:
105 : /* Just followed an "ndistinct" key */
106 12 : errsave(parse->escontext,
107 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
108 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
109 : errdetail("Value of \"%s\" must be an integer.",
110 : PG_NDISTINCT_KEY_NDISTINCT));
111 6 : break;
112 :
113 0 : default:
114 0 : elog(ERROR,
115 : "object start of \"%s\" found in unexpected parse state: %d.",
116 : "pg_ndistinct", (int) parse->state);
117 : break;
118 : }
119 :
120 30 : return JSON_SEM_ACTION_FAILED;
121 : }
122 :
123 : /*
124 : * Invoked at the end of an object.
125 : *
126 : * Check to ensure that it was a complete MVNDistinctItem
127 : */
128 : static JsonParseErrorType
129 156 : ndistinct_object_end(void *state)
130 : {
131 156 : NDistinctParseState *parse = state;
132 :
133 156 : int natts = 0;
134 :
135 : MVNDistinctItem *item;
136 :
137 156 : if (parse->state != NDIST_EXPECT_KEY)
138 0 : elog(ERROR,
139 : "object end of \"%s\" found in unexpected parse state: %d.",
140 : "pg_ndistinct", (int) parse->state);
141 :
142 156 : if (!parse->found_attributes)
143 : {
144 12 : errsave(parse->escontext,
145 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
146 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
147 : errdetail("Item must contain \"%s\" key.",
148 : PG_NDISTINCT_KEY_ATTRIBUTES));
149 6 : return JSON_SEM_ACTION_FAILED;
150 : }
151 :
152 144 : if (!parse->found_ndistinct)
153 : {
154 12 : errsave(parse->escontext,
155 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
156 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
157 : errdetail("Item must contain \"%s\" key.",
158 : PG_NDISTINCT_KEY_NDISTINCT));
159 6 : return JSON_SEM_ACTION_FAILED;
160 : }
161 :
162 : /*
163 : * We need at least two attribute numbers for a ndistinct item, anything
164 : * less is malformed.
165 : */
166 132 : natts = list_length(parse->attnum_list);
167 132 : if ((natts < 2) || (natts > STATS_MAX_DIMENSIONS))
168 : {
169 24 : errsave(parse->escontext,
170 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
171 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
172 : errdetail("The \"%s\" key must contain an array of at least %d and no more than %d attributes.",
173 : PG_NDISTINCT_KEY_ATTRIBUTES, 2, STATS_MAX_DIMENSIONS));
174 12 : return JSON_SEM_ACTION_FAILED;
175 : }
176 :
177 : /* Create the MVNDistinctItem */
178 108 : item = palloc_object(MVNDistinctItem);
179 108 : item->nattributes = natts;
180 108 : item->attributes = palloc0(natts * sizeof(AttrNumber));
181 108 : item->ndistinct = (double) parse->ndistinct;
182 :
183 372 : for (int i = 0; i < natts; i++)
184 264 : item->attributes[i] = (AttrNumber) list_nth_int(parse->attnum_list, i);
185 :
186 108 : parse->distinct_items = lappend(parse->distinct_items, (void *) item);
187 :
188 : /* reset item state vars */
189 108 : list_free(parse->attnum_list);
190 108 : parse->attnum_list = NIL;
191 108 : parse->ndistinct = 0;
192 108 : parse->found_attributes = false;
193 108 : parse->found_ndistinct = false;
194 :
195 : /* Now we are looking for the next MVNDistinctItem */
196 108 : parse->state = NDIST_EXPECT_ITEM;
197 108 : return JSON_SUCCESS;
198 : }
199 :
200 :
201 : /*
202 : * Invoked at the start of an array.
203 : *
204 : * ndistinct input format has two types of arrays, the outer MVNDistinctItem
205 : * array and the attribute number array within each MVNDistinctItem.
206 : */
207 : static JsonParseErrorType
208 816 : ndistinct_array_start(void *state)
209 : {
210 816 : NDistinctParseState *parse = state;
211 :
212 816 : switch (parse->state)
213 : {
214 366 : case NDIST_EXPECT_ATTNUM_LIST:
215 366 : parse->state = NDIST_EXPECT_ATTNUM;
216 366 : break;
217 :
218 414 : case NDIST_EXPECT_START:
219 414 : parse->state = NDIST_EXPECT_ITEM;
220 414 : break;
221 :
222 36 : default:
223 36 : errsave(parse->escontext,
224 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
225 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
226 : errdetail("Array has been found at an unexpected location."));
227 18 : return JSON_SEM_ACTION_FAILED;
228 : }
229 :
230 780 : return JSON_SUCCESS;
231 : }
232 :
233 :
234 : /*
235 : * Invoked at the end of an array.
236 : *
237 : * Arrays can never be empty.
238 : */
239 : static JsonParseErrorType
240 342 : ndistinct_array_end(void *state)
241 : {
242 342 : NDistinctParseState *parse = state;
243 :
244 342 : switch (parse->state)
245 : {
246 288 : case NDIST_EXPECT_ATTNUM:
247 288 : if (list_length(parse->attnum_list) > 0)
248 : {
249 : /*
250 : * The attribute number list is complete, look for more
251 : * MVNDistinctItem keys.
252 : */
253 276 : parse->state = NDIST_EXPECT_KEY;
254 276 : return JSON_SUCCESS;
255 : }
256 :
257 12 : errsave(parse->escontext,
258 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
259 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
260 : errdetail("The \"%s\" key must be a non-empty array.",
261 : PG_NDISTINCT_KEY_ATTRIBUTES));
262 6 : break;
263 :
264 54 : case NDIST_EXPECT_ITEM:
265 54 : if (list_length(parse->distinct_items) > 0)
266 : {
267 : /* Item list is complete, we are done. */
268 42 : parse->state = NDIST_EXPECT_COMPLETE;
269 42 : return JSON_SUCCESS;
270 : }
271 :
272 12 : errsave(parse->escontext,
273 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
274 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
275 : errdetail("Item array cannot be empty."));
276 6 : break;
277 :
278 0 : default:
279 :
280 : /*
281 : * This can only happen if a case was missed in
282 : * ndistinct_array_start().
283 : */
284 0 : elog(ERROR,
285 : "array end of \"%s\" found in unexpected parse state: %d.",
286 : "pg_ndistinct", (int) parse->state);
287 : break;
288 : }
289 :
290 12 : return JSON_SEM_ACTION_FAILED;
291 : }
292 :
293 : /*
294 : * Invoked at the start of a key/value field.
295 : *
296 : * The valid keys for the MVNDistinctItem object are:
297 : * - attributes
298 : * - ndistinct
299 : */
300 : static JsonParseErrorType
301 696 : ndistinct_object_field_start(void *state, char *fname, bool isnull)
302 : {
303 696 : NDistinctParseState *parse = state;
304 :
305 696 : if (strcmp(fname, PG_NDISTINCT_KEY_ATTRIBUTES) == 0)
306 : {
307 432 : if (parse->found_attributes)
308 : {
309 12 : errsave(parse->escontext,
310 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
311 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
312 : errdetail("Multiple \"%s\" keys are not allowed.",
313 : PG_NDISTINCT_KEY_ATTRIBUTES));
314 6 : return JSON_SEM_ACTION_FAILED;
315 : }
316 420 : parse->found_attributes = true;
317 420 : parse->state = NDIST_EXPECT_ATTNUM_LIST;
318 420 : return JSON_SUCCESS;
319 : }
320 :
321 264 : if (strcmp(fname, PG_NDISTINCT_KEY_NDISTINCT) == 0)
322 : {
323 240 : if (parse->found_ndistinct)
324 : {
325 12 : errsave(parse->escontext,
326 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
327 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
328 : errdetail("Multiple \"%s\" keys are not allowed.",
329 : PG_NDISTINCT_KEY_NDISTINCT));
330 6 : return JSON_SEM_ACTION_FAILED;
331 : }
332 228 : parse->found_ndistinct = true;
333 228 : parse->state = NDIST_EXPECT_NDISTINCT;
334 228 : return JSON_SUCCESS;
335 : }
336 :
337 24 : errsave(parse->escontext,
338 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
339 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
340 : errdetail("Only allowed keys are \"%s\" and \"%s\".",
341 : PG_NDISTINCT_KEY_ATTRIBUTES,
342 : PG_NDISTINCT_KEY_NDISTINCT));
343 12 : return JSON_SEM_ACTION_FAILED;
344 : }
345 :
346 : /*
347 : * Invoked at the start of an array element.
348 : *
349 : * The overall structure of the datatype is an array, but there are also
350 : * arrays as the value of every attributes key.
351 : */
352 : static JsonParseErrorType
353 1266 : ndistinct_array_element_start(void *state, bool isnull)
354 : {
355 1266 : const NDistinctParseState *parse = state;
356 :
357 1266 : switch (parse->state)
358 : {
359 798 : case NDIST_EXPECT_ATTNUM:
360 798 : if (!isnull)
361 786 : return JSON_SUCCESS;
362 :
363 12 : errsave(parse->escontext,
364 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
365 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
366 : errdetail("Attribute number array cannot be null."));
367 6 : break;
368 :
369 468 : case NDIST_EXPECT_ITEM:
370 468 : if (!isnull)
371 456 : return JSON_SUCCESS;
372 :
373 12 : errsave(parse->escontext,
374 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
375 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
376 : errdetail("Item list elements cannot be null."));
377 :
378 6 : break;
379 :
380 0 : default:
381 0 : elog(ERROR,
382 : "array element start of \"%s\" found in unexpected parse state: %d.",
383 : "pg_ndistinct", (int) parse->state);
384 : break;
385 : }
386 :
387 12 : return JSON_SEM_ACTION_FAILED;
388 : }
389 :
390 : /*
391 : * Test for valid subsequent attribute number.
392 : *
393 : * If the previous value is positive, then current value must either be
394 : * greater than the previous value, or negative.
395 : *
396 : * If the previous value is negative, then the value must be less than
397 : * the previous value.
398 : *
399 : * Duplicate values are obviously not allowed, but that is already covered
400 : * by the rules listed above.
401 : */
402 : static bool
403 408 : valid_subsequent_attnum(AttrNumber prev, AttrNumber cur)
404 : {
405 : Assert(prev != 0);
406 :
407 408 : if (prev > 0)
408 396 : return ((cur > prev) || (cur < 0));
409 :
410 12 : return (cur < prev);
411 : }
412 :
413 : /*
414 : * Handle scalar events from the ndistinct input parser.
415 : *
416 : * Override integer parse error messages and replace them with errors
417 : * specific to the context.
418 : */
419 : static JsonParseErrorType
420 1008 : ndistinct_scalar(void *state, char *token, JsonTokenType tokentype)
421 : {
422 1008 : NDistinctParseState *parse = state;
423 : AttrNumber attnum;
424 1008 : ErrorSaveContext escontext = {T_ErrorSaveContext};
425 :
426 1008 : switch (parse->state)
427 : {
428 774 : case NDIST_EXPECT_ATTNUM:
429 774 : attnum = pg_strtoint16_safe(token, (Node *) &escontext);
430 :
431 774 : if (escontext.error_occurred)
432 : {
433 12 : errsave(parse->escontext,
434 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
435 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
436 : errdetail("Key \"%s\" has an incorrect value.", PG_NDISTINCT_KEY_ATTRIBUTES));
437 6 : return JSON_SEM_ACTION_FAILED;
438 : }
439 :
440 : /*
441 : * The attribute number cannot be zero a negative number beyond
442 : * the number of the possible expressions.
443 : */
444 762 : if (attnum == 0 || attnum < (0 - STATS_MAX_DIMENSIONS))
445 : {
446 18 : errsave(parse->escontext,
447 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
448 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
449 : errdetail("Invalid \"%s\" element has been found: %d.",
450 : PG_NDISTINCT_KEY_ATTRIBUTES, attnum));
451 6 : return JSON_SEM_ACTION_FAILED;
452 : }
453 :
454 744 : if (list_length(parse->attnum_list) > 0)
455 : {
456 408 : const AttrNumber prev = llast_int(parse->attnum_list);
457 :
458 408 : if (!valid_subsequent_attnum(prev, attnum))
459 : {
460 12 : errsave(parse->escontext,
461 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
462 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
463 : errdetail("Invalid \"%s\" element has been found: %d cannot follow %d.",
464 : PG_NDISTINCT_KEY_ATTRIBUTES, attnum, prev));
465 6 : return JSON_SEM_ACTION_FAILED;
466 : }
467 : }
468 :
469 732 : parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum);
470 732 : return JSON_SUCCESS;
471 :
472 180 : case NDIST_EXPECT_NDISTINCT:
473 :
474 : /*
475 : * While the structure dictates that ndistinct is a double
476 : * precision floating point, it has always been an integer in the
477 : * output generated. Therefore, we parse it as an integer here.
478 : */
479 180 : parse->ndistinct = pg_strtoint32_safe(token, (Node *) &escontext);
480 :
481 180 : if (!escontext.error_occurred)
482 : {
483 156 : parse->state = NDIST_EXPECT_KEY;
484 156 : return JSON_SUCCESS;
485 : }
486 :
487 24 : errsave(parse->escontext,
488 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
489 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
490 : errdetail("Key \"%s\" has an incorrect value.",
491 : PG_NDISTINCT_KEY_NDISTINCT));
492 12 : break;
493 :
494 54 : default:
495 54 : errsave(parse->escontext,
496 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
497 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
498 : errdetail("Unexpected scalar has been found."));
499 30 : break;
500 : }
501 :
502 42 : return JSON_SEM_ACTION_FAILED;
503 : }
504 :
505 : /*
506 : * Compare the attribute arrays of two MVNDistinctItem values,
507 : * looking for duplicate sets. Return true if a duplicate set is found.
508 : *
509 : * The arrays are required to be in canonical order (all positive numbers
510 : * in ascending order first, followed by all negative numbers in descending
511 : * order) so it's safe to compare the attrnums in order, stopping at the
512 : * first difference.
513 : */
514 : static bool
515 108 : item_attributes_eq(const MVNDistinctItem *a, const MVNDistinctItem *b)
516 : {
517 108 : if (a->nattributes != b->nattributes)
518 78 : return false;
519 :
520 66 : for (int i = 0; i < a->nattributes; i++)
521 : {
522 54 : if (a->attributes[i] != b->attributes[i])
523 18 : return false;
524 : }
525 :
526 12 : return true;
527 : }
528 :
529 : /*
530 : * Ensure that an attribute number appears as one of the attribute numbers
531 : * in a MVNDistinctItem.
532 : */
533 : static bool
534 48 : item_has_attnum(const MVNDistinctItem *item, AttrNumber attnum)
535 : {
536 138 : for (int i = 0; i < item->nattributes; i++)
537 : {
538 126 : if (attnum == item->attributes[i])
539 36 : return true;
540 : }
541 12 : return false;
542 : }
543 :
544 : /*
545 : * Ensure that the attributes in MVNDistinctItem A are a subset of the
546 : * reference MVNDistinctItem B.
547 : */
548 : static bool
549 30 : item_is_attnum_subset(const MVNDistinctItem *item,
550 : const MVNDistinctItem *refitem)
551 : {
552 66 : for (int i = 0; i < item->nattributes; i++)
553 : {
554 48 : if (!item_has_attnum(refitem, item->attributes[i]))
555 12 : return false;
556 : }
557 18 : return true;
558 : }
559 :
560 : /*
561 : * Generate a string representing an array of attribute numbers.
562 : *
563 : * Freeing the allocated string is the responsibility of the caller.
564 : */
565 : static char *
566 36 : item_attnum_list(const MVNDistinctItem *item)
567 : {
568 : StringInfoData str;
569 :
570 36 : initStringInfo(&str);
571 :
572 36 : appendStringInfo(&str, "%d", item->attributes[0]);
573 :
574 96 : for (int i = 1; i < item->nattributes; i++)
575 60 : appendStringInfo(&str, ", %d", item->attributes[i]);
576 :
577 36 : return str.data;
578 : }
579 :
580 : /*
581 : * Attempt to build and serialize the MVNDistinct object.
582 : *
583 : * This can only be executed after the completion of the JSON parsing.
584 : *
585 : * In the event of an error, set the error context and return NULL.
586 : */
587 : static bytea *
588 42 : build_mvndistinct(NDistinctParseState *parse, char *str)
589 : {
590 : MVNDistinct *ndistinct;
591 42 : int nitems = list_length(parse->distinct_items);
592 : bytea *bytes;
593 42 : int item_most_attrs = 0;
594 42 : int item_most_attrs_idx = 0;
595 :
596 42 : switch (parse->state)
597 : {
598 42 : case NDIST_EXPECT_COMPLETE:
599 :
600 : /*
601 : * Parsing has ended correctly and we should have a list of items.
602 : * If we don't, something has been done wrong in one of the
603 : * earlier parsing steps.
604 : */
605 42 : if (nitems == 0)
606 0 : elog(ERROR,
607 : "cannot have empty item list after parsing success.");
608 42 : break;
609 :
610 0 : case NDIST_EXPECT_START:
611 : /* blank */
612 0 : errsave(parse->escontext,
613 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
614 : errmsg("malformed pg_ndistinct: \"%s\"", str),
615 : errdetail("Value cannot be empty."));
616 0 : return NULL;
617 :
618 0 : default:
619 : /* Unexpected end-state. */
620 0 : errsave(parse->escontext,
621 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
622 : errmsg("malformed pg_ndistinct: \"%s\"", str),
623 : errdetail("Unexpected end state has been found: %d.", parse->state));
624 0 : return NULL;
625 : }
626 :
627 42 : ndistinct = palloc(offsetof(MVNDistinct, items) +
628 42 : nitems * sizeof(MVNDistinctItem));
629 :
630 42 : ndistinct->magic = STATS_NDISTINCT_MAGIC;
631 42 : ndistinct->type = STATS_NDISTINCT_TYPE_BASIC;
632 42 : ndistinct->nitems = nitems;
633 :
634 138 : for (int i = 0; i < nitems; i++)
635 : {
636 108 : MVNDistinctItem *item = list_nth(parse->distinct_items, i);
637 :
638 : /*
639 : * Ensure that this item does not duplicate the attributes of any
640 : * pre-existing item.
641 : */
642 204 : for (int j = 0; j < i; j++)
643 : {
644 108 : if (item_attributes_eq(item, &ndistinct->items[j]))
645 : {
646 12 : char *s = item_attnum_list(item);
647 :
648 12 : errsave(parse->escontext,
649 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
650 : errmsg("malformed pg_ndistinct: \"%s\"", str),
651 : errdetail("Duplicated \"%s\" array has been found: [%s].",
652 : PG_NDISTINCT_KEY_ATTRIBUTES, s));
653 6 : pfree(s);
654 6 : return NULL;
655 : }
656 : }
657 :
658 96 : ndistinct->items[i].ndistinct = item->ndistinct;
659 96 : ndistinct->items[i].nattributes = item->nattributes;
660 :
661 : /*
662 : * This transfers free-ing responsibility from the distinct_items list
663 : * to the ndistinct object.
664 : */
665 96 : ndistinct->items[i].attributes = item->attributes;
666 :
667 : /*
668 : * Keep track of the first longest attribute list. All other attribute
669 : * lists must be a subset of this list.
670 : */
671 96 : if (item->nattributes > item_most_attrs)
672 : {
673 78 : item_most_attrs = item->nattributes;
674 78 : item_most_attrs_idx = i;
675 : }
676 : }
677 :
678 : /*
679 : * Verify that all the sets of attribute numbers are a proper subset of
680 : * the longest set recorded. This acts as an extra sanity check based on
681 : * the input given. Note that this still needs to be cross-checked with
682 : * the extended statistics objects this would be assigned to, but it
683 : * provides one extra layer of protection.
684 : */
685 66 : for (int i = 0; i < nitems; i++)
686 : {
687 48 : if (i == item_most_attrs_idx)
688 18 : continue;
689 :
690 30 : if (!item_is_attnum_subset(&ndistinct->items[i],
691 30 : &ndistinct->items[item_most_attrs_idx]))
692 : {
693 12 : const MVNDistinctItem *item = &ndistinct->items[i];
694 12 : const MVNDistinctItem *refitem = &ndistinct->items[item_most_attrs_idx];
695 12 : char *item_list = item_attnum_list(item);
696 12 : char *refitem_list = item_attnum_list(refitem);
697 :
698 12 : errsave(parse->escontext,
699 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
700 : errmsg("malformed pg_ndistinct: \"%s\"", str),
701 : errdetail("\"%s\" array [%s] must be a subset of array [%s].",
702 : PG_NDISTINCT_KEY_ATTRIBUTES,
703 : item_list, refitem_list));
704 6 : pfree(item_list);
705 6 : pfree(refitem_list);
706 6 : return NULL;
707 : }
708 : }
709 :
710 18 : bytes = statext_ndistinct_serialize(ndistinct);
711 :
712 : /*
713 : * Free the attribute lists, before the ndistinct itself.
714 : */
715 54 : for (int i = 0; i < nitems; i++)
716 36 : pfree(ndistinct->items[i].attributes);
717 18 : pfree(ndistinct);
718 :
719 18 : return bytes;
720 : }
721 :
722 : /*
723 : * pg_ndistinct_in
724 : * input routine for type pg_ndistinct.
725 : */
726 : Datum
727 450 : pg_ndistinct_in(PG_FUNCTION_ARGS)
728 : {
729 450 : char *str = PG_GETARG_CSTRING(0);
730 : NDistinctParseState parse_state;
731 : JsonParseErrorType result;
732 : JsonLexContext *lex;
733 : JsonSemAction sem_action;
734 450 : bytea *bytes = NULL;
735 :
736 : /* initialize semantic state */
737 450 : parse_state.str = str;
738 450 : parse_state.state = NDIST_EXPECT_START;
739 450 : parse_state.distinct_items = NIL;
740 450 : parse_state.escontext = fcinfo->context;
741 450 : parse_state.found_attributes = false;
742 450 : parse_state.found_ndistinct = false;
743 450 : parse_state.attnum_list = NIL;
744 450 : parse_state.ndistinct = 0;
745 :
746 : /* set callbacks */
747 450 : sem_action.semstate = (void *) &parse_state;
748 450 : sem_action.object_start = ndistinct_object_start;
749 450 : sem_action.object_end = ndistinct_object_end;
750 450 : sem_action.array_start = ndistinct_array_start;
751 450 : sem_action.array_end = ndistinct_array_end;
752 450 : sem_action.object_field_start = ndistinct_object_field_start;
753 450 : sem_action.object_field_end = NULL;
754 450 : sem_action.array_element_start = ndistinct_array_element_start;
755 450 : sem_action.array_element_end = NULL;
756 450 : sem_action.scalar = ndistinct_scalar;
757 :
758 450 : lex = makeJsonLexContextCstringLen(NULL, str, strlen(str),
759 : PG_UTF8, true);
760 450 : result = pg_parse_json(lex, &sem_action);
761 270 : freeJsonLexContext(lex);
762 :
763 270 : if (result == JSON_SUCCESS)
764 42 : bytes = build_mvndistinct(&parse_state, str);
765 :
766 258 : list_free(parse_state.attnum_list);
767 258 : list_free_deep(parse_state.distinct_items);
768 :
769 258 : if (bytes)
770 18 : PG_RETURN_BYTEA_P(bytes);
771 :
772 : /*
773 : * If escontext already set, just use that. Anything else is a generic
774 : * JSON parse error.
775 : */
776 240 : if (!SOFT_ERROR_OCCURRED(parse_state.escontext))
777 48 : errsave(parse_state.escontext,
778 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
779 : errmsg("malformed pg_ndistinct: \"%s\"", str),
780 : errdetail("Input data must be valid JSON."));
781 :
782 216 : PG_RETURN_NULL();
783 : }
784 :
785 : /*
786 : * pg_ndistinct_out
787 : * output routine for type pg_ndistinct
788 : *
789 : * Produces a human-readable representation of the value.
790 : */
791 : Datum
792 42 : pg_ndistinct_out(PG_FUNCTION_ARGS)
793 : {
794 42 : bytea *data = PG_GETARG_BYTEA_PP(0);
795 42 : MVNDistinct *ndist = statext_ndistinct_deserialize(data);
796 : int i;
797 : StringInfoData str;
798 :
799 42 : initStringInfo(&str);
800 42 : appendStringInfoChar(&str, '[');
801 :
802 174 : for (i = 0; i < ndist->nitems; i++)
803 : {
804 132 : MVNDistinctItem item = ndist->items[i];
805 :
806 132 : if (i > 0)
807 90 : appendStringInfoString(&str, ", ");
808 :
809 132 : if (item.nattributes <= 0)
810 0 : elog(ERROR, "invalid zero-length attribute array in MVNDistinct");
811 :
812 132 : appendStringInfo(&str, "{\"" PG_NDISTINCT_KEY_ATTRIBUTES "\": [%d",
813 132 : item.attributes[0]);
814 :
815 300 : for (int j = 1; j < item.nattributes; j++)
816 168 : appendStringInfo(&str, ", %d", item.attributes[j]);
817 :
818 132 : appendStringInfo(&str, "], \"" PG_NDISTINCT_KEY_NDISTINCT "\": %d}",
819 132 : (int) item.ndistinct);
820 : }
821 :
822 42 : appendStringInfoChar(&str, ']');
823 :
824 42 : PG_RETURN_CSTRING(str.data);
825 : }
826 :
827 : /*
828 : * pg_ndistinct_recv
829 : * binary input routine for type pg_ndistinct
830 : */
831 : Datum
832 0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
833 : {
834 0 : ereport(ERROR,
835 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
836 : errmsg("cannot accept a value of type %s", "pg_ndistinct")));
837 :
838 : PG_RETURN_VOID(); /* keep compiler quiet */
839 : }
840 :
841 : /*
842 : * pg_ndistinct_send
843 : * binary output routine for type pg_ndistinct
844 : *
845 : * n-distinct is serialized into a bytea value, so let's send that.
846 : */
847 : Datum
848 0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
849 : {
850 0 : return byteasend(fcinfo);
851 : }
|