Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_ndistinct.c
4 : * pg_ndistinct data type support.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/pg_ndistinct.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "common/int.h"
18 : #include "common/jsonapi.h"
19 : #include "lib/stringinfo.h"
20 : #include "mb/pg_wchar.h"
21 : #include "nodes/miscnodes.h"
22 : #include "statistics/extended_stats_internal.h"
23 : #include "statistics/statistics_format.h"
24 : #include "utils/builtins.h"
25 : #include "utils/fmgrprotos.h"
26 :
27 : /* Parsing state data */
28 : typedef enum
29 : {
30 : NDIST_EXPECT_START = 0,
31 : NDIST_EXPECT_ITEM,
32 : NDIST_EXPECT_KEY,
33 : NDIST_EXPECT_ATTNUM_LIST,
34 : NDIST_EXPECT_ATTNUM,
35 : NDIST_EXPECT_NDISTINCT,
36 : NDIST_EXPECT_COMPLETE,
37 : } NDistinctSemanticState;
38 :
39 : typedef struct
40 : {
41 : const char *str;
42 : NDistinctSemanticState state;
43 :
44 : List *distinct_items; /* Accumulated complete MVNDistinctItems */
45 : Node *escontext;
46 :
47 : bool found_attributes; /* Item has "attributes" key */
48 : bool found_ndistinct; /* Item has "ndistinct" key */
49 : List *attnum_list; /* Accumulated attribute numbers */
50 : int32 ndistinct;
51 : } NDistinctParseState;
52 :
53 : /*
54 : * Invoked at the start of each MVNDistinctItem.
55 : *
56 : * The entire JSON document should be one array of MVNDistinctItem objects.
57 : * If we are anywhere else in the document, it is an error.
58 : */
59 : static JsonParseErrorType
60 576 : ndistinct_object_start(void *state)
61 : {
62 576 : NDistinctParseState *parse = state;
63 :
64 576 : switch (parse->state)
65 : {
66 516 : case NDIST_EXPECT_ITEM:
67 : /* Now we expect to see attributes/ndistinct keys */
68 516 : parse->state = NDIST_EXPECT_KEY;
69 516 : return JSON_SUCCESS;
70 :
71 24 : case NDIST_EXPECT_START:
72 : /* pg_ndistinct must begin with a '[' */
73 24 : errsave(parse->escontext,
74 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
75 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
76 : errdetail("Initial element must be an array."));
77 12 : break;
78 :
79 0 : case NDIST_EXPECT_KEY:
80 : /* In an object, expecting key */
81 0 : errsave(parse->escontext,
82 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
83 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
84 : errdetail("A key was expected."));
85 0 : break;
86 :
87 12 : case NDIST_EXPECT_ATTNUM_LIST:
88 : /* Just followed an "attributes" key */
89 12 : errsave(parse->escontext,
90 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
91 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
92 : errdetail("Value of \"%s\" must be an array of attribute numbers.",
93 : PG_NDISTINCT_KEY_ATTRIBUTES));
94 6 : break;
95 :
96 12 : case NDIST_EXPECT_ATTNUM:
97 : /* In an attribute number list, expect only scalar integers */
98 12 : errsave(parse->escontext,
99 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
100 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
101 : errdetail("Attribute lists can only contain attribute numbers."));
102 6 : break;
103 :
104 12 : case NDIST_EXPECT_NDISTINCT:
105 : /* Just followed an "ndistinct" key */
106 12 : errsave(parse->escontext,
107 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
108 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
109 : errdetail("Value of \"%s\" must be an integer.",
110 : PG_NDISTINCT_KEY_NDISTINCT));
111 6 : break;
112 :
113 0 : default:
114 0 : elog(ERROR,
115 : "object start of \"%s\" found in unexpected parse state: %d.",
116 : "pg_ndistinct", (int) parse->state);
117 : break;
118 : }
119 :
120 30 : return JSON_SEM_ACTION_FAILED;
121 : }
122 :
123 : /*
124 : * Invoked at the end of an object.
125 : *
126 : * Check to ensure that it was a complete MVNDistinctItem
127 : */
128 : static JsonParseErrorType
129 216 : ndistinct_object_end(void *state)
130 : {
131 216 : NDistinctParseState *parse = state;
132 :
133 216 : int natts = 0;
134 :
135 : MVNDistinctItem *item;
136 :
137 216 : if (parse->state != NDIST_EXPECT_KEY)
138 0 : elog(ERROR,
139 : "object end of \"%s\" found in unexpected parse state: %d.",
140 : "pg_ndistinct", (int) parse->state);
141 :
142 216 : if (!parse->found_attributes)
143 : {
144 12 : errsave(parse->escontext,
145 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
146 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
147 : errdetail("Item must contain \"%s\" key.",
148 : PG_NDISTINCT_KEY_ATTRIBUTES));
149 6 : return JSON_SEM_ACTION_FAILED;
150 : }
151 :
152 204 : if (!parse->found_ndistinct)
153 : {
154 12 : errsave(parse->escontext,
155 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
156 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
157 : errdetail("Item must contain \"%s\" key.",
158 : PG_NDISTINCT_KEY_NDISTINCT));
159 6 : return JSON_SEM_ACTION_FAILED;
160 : }
161 :
162 : /*
163 : * We need at least two attribute numbers for a ndistinct item, anything
164 : * less is malformed.
165 : */
166 192 : natts = list_length(parse->attnum_list);
167 192 : if ((natts < 2) || (natts > STATS_MAX_DIMENSIONS))
168 : {
169 24 : errsave(parse->escontext,
170 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
171 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
172 : errdetail("The \"%s\" key must contain an array of at least %d and no more than %d attributes.",
173 : PG_NDISTINCT_KEY_ATTRIBUTES, 2, STATS_MAX_DIMENSIONS));
174 12 : return JSON_SEM_ACTION_FAILED;
175 : }
176 :
177 : /* Create the MVNDistinctItem */
178 168 : item = palloc_object(MVNDistinctItem);
179 168 : item->nattributes = natts;
180 168 : item->attributes = palloc0(natts * sizeof(AttrNumber));
181 168 : item->ndistinct = (double) parse->ndistinct;
182 :
183 558 : for (int i = 0; i < natts; i++)
184 390 : item->attributes[i] = (AttrNumber) list_nth_int(parse->attnum_list, i);
185 :
186 168 : parse->distinct_items = lappend(parse->distinct_items, (void *) item);
187 :
188 : /* reset item state vars */
189 168 : list_free(parse->attnum_list);
190 168 : parse->attnum_list = NIL;
191 168 : parse->ndistinct = 0;
192 168 : parse->found_attributes = false;
193 168 : parse->found_ndistinct = false;
194 :
195 : /* Now we are looking for the next MVNDistinctItem */
196 168 : parse->state = NDIST_EXPECT_ITEM;
197 168 : return JSON_SUCCESS;
198 : }
199 :
200 :
201 : /*
202 : * Invoked at the start of an array.
203 : *
204 : * ndistinct input format has two types of arrays, the outer MVNDistinctItem
205 : * array and the attribute number array within each MVNDistinctItem.
206 : */
207 : static JsonParseErrorType
208 918 : ndistinct_array_start(void *state)
209 : {
210 918 : NDistinctParseState *parse = state;
211 :
212 918 : switch (parse->state)
213 : {
214 426 : case NDIST_EXPECT_ATTNUM_LIST:
215 426 : parse->state = NDIST_EXPECT_ATTNUM;
216 426 : break;
217 :
218 456 : case NDIST_EXPECT_START:
219 456 : parse->state = NDIST_EXPECT_ITEM;
220 456 : break;
221 :
222 36 : default:
223 36 : errsave(parse->escontext,
224 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
225 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
226 : errdetail("Array has been found at an unexpected location."));
227 18 : return JSON_SEM_ACTION_FAILED;
228 : }
229 :
230 882 : return JSON_SUCCESS;
231 : }
232 :
233 :
234 : /*
235 : * Invoked at the end of an array.
236 : *
237 : * Arrays can never be empty.
238 : */
239 : static JsonParseErrorType
240 444 : ndistinct_array_end(void *state)
241 : {
242 444 : NDistinctParseState *parse = state;
243 :
244 444 : switch (parse->state)
245 : {
246 348 : case NDIST_EXPECT_ATTNUM:
247 348 : if (list_length(parse->attnum_list) > 0)
248 : {
249 : /*
250 : * The attribute number list is complete, look for more
251 : * MVNDistinctItem keys.
252 : */
253 336 : parse->state = NDIST_EXPECT_KEY;
254 336 : return JSON_SUCCESS;
255 : }
256 :
257 12 : errsave(parse->escontext,
258 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
259 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
260 : errdetail("The \"%s\" key must be a non-empty array.",
261 : PG_NDISTINCT_KEY_ATTRIBUTES));
262 6 : break;
263 :
264 96 : case NDIST_EXPECT_ITEM:
265 96 : if (list_length(parse->distinct_items) > 0)
266 : {
267 : /* Item list is complete, we are done. */
268 84 : parse->state = NDIST_EXPECT_COMPLETE;
269 84 : return JSON_SUCCESS;
270 : }
271 :
272 12 : errsave(parse->escontext,
273 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
274 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
275 : errdetail("Item array cannot be empty."));
276 6 : break;
277 :
278 0 : default:
279 :
280 : /*
281 : * This can only happen if a case was missed in
282 : * ndistinct_array_start().
283 : */
284 0 : elog(ERROR,
285 : "array end of \"%s\" found in unexpected parse state: %d.",
286 : "pg_ndistinct", (int) parse->state);
287 : break;
288 : }
289 :
290 12 : return JSON_SEM_ACTION_FAILED;
291 : }
292 :
293 : /*
294 : * Invoked at the start of a key/value field.
295 : *
296 : * The valid keys for the MVNDistinctItem object are:
297 : * - attributes
298 : * - ndistinct
299 : */
300 : static JsonParseErrorType
301 816 : ndistinct_object_field_start(void *state, char *fname, bool isnull)
302 : {
303 816 : NDistinctParseState *parse = state;
304 :
305 816 : if (strcmp(fname, PG_NDISTINCT_KEY_ATTRIBUTES) == 0)
306 : {
307 492 : if (parse->found_attributes)
308 : {
309 12 : errsave(parse->escontext,
310 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
311 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
312 : errdetail("Multiple \"%s\" keys are not allowed.",
313 : PG_NDISTINCT_KEY_ATTRIBUTES));
314 6 : return JSON_SEM_ACTION_FAILED;
315 : }
316 480 : parse->found_attributes = true;
317 480 : parse->state = NDIST_EXPECT_ATTNUM_LIST;
318 480 : return JSON_SUCCESS;
319 : }
320 :
321 324 : if (strcmp(fname, PG_NDISTINCT_KEY_NDISTINCT) == 0)
322 : {
323 300 : if (parse->found_ndistinct)
324 : {
325 12 : errsave(parse->escontext,
326 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
327 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
328 : errdetail("Multiple \"%s\" keys are not allowed.",
329 : PG_NDISTINCT_KEY_NDISTINCT));
330 6 : return JSON_SEM_ACTION_FAILED;
331 : }
332 288 : parse->found_ndistinct = true;
333 288 : parse->state = NDIST_EXPECT_NDISTINCT;
334 288 : return JSON_SUCCESS;
335 : }
336 :
337 24 : errsave(parse->escontext,
338 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
339 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
340 : errdetail("Only allowed keys are \"%s\" and \"%s\".",
341 : PG_NDISTINCT_KEY_ATTRIBUTES,
342 : PG_NDISTINCT_KEY_NDISTINCT));
343 12 : return JSON_SEM_ACTION_FAILED;
344 : }
345 :
346 : /*
347 : * Invoked at the start of an array element.
348 : *
349 : * The overall structure of the datatype is an array, but there are also
350 : * arrays as the value of every attributes key.
351 : */
352 : static JsonParseErrorType
353 1452 : ndistinct_array_element_start(void *state, bool isnull)
354 : {
355 1452 : const NDistinctParseState *parse = state;
356 :
357 1452 : switch (parse->state)
358 : {
359 924 : case NDIST_EXPECT_ATTNUM:
360 924 : if (!isnull)
361 912 : return JSON_SUCCESS;
362 :
363 12 : errsave(parse->escontext,
364 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
365 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
366 : errdetail("Attribute number array cannot be null."));
367 6 : break;
368 :
369 528 : case NDIST_EXPECT_ITEM:
370 528 : if (!isnull)
371 516 : return JSON_SUCCESS;
372 :
373 12 : errsave(parse->escontext,
374 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
375 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
376 : errdetail("Item list elements cannot be null."));
377 :
378 6 : break;
379 :
380 0 : default:
381 0 : elog(ERROR,
382 : "array element start of \"%s\" found in unexpected parse state: %d.",
383 : "pg_ndistinct", (int) parse->state);
384 : break;
385 : }
386 :
387 12 : return JSON_SEM_ACTION_FAILED;
388 : }
389 :
390 : /*
391 : * Test for valid subsequent attribute number.
392 : *
393 : * If the previous value is positive, then current value must either be
394 : * greater than the previous value, or negative.
395 : *
396 : * If the previous value is negative, then the value must be less than
397 : * the previous value.
398 : *
399 : * Duplicate values are obviously not allowed, but that is already covered
400 : * by the rules listed above.
401 : */
402 : static bool
403 474 : valid_subsequent_attnum(AttrNumber prev, AttrNumber cur)
404 : {
405 : Assert(prev != 0);
406 :
407 474 : if (prev > 0)
408 456 : return ((cur > prev) || (cur < 0));
409 :
410 18 : return (cur < prev);
411 : }
412 :
413 : /*
414 : * Handle scalar events from the ndistinct input parser.
415 : *
416 : * Override integer parse error messages and replace them with errors
417 : * specific to the context.
418 : */
419 : static JsonParseErrorType
420 1194 : ndistinct_scalar(void *state, char *token, JsonTokenType tokentype)
421 : {
422 1194 : NDistinctParseState *parse = state;
423 : AttrNumber attnum;
424 1194 : ErrorSaveContext escontext = {T_ErrorSaveContext};
425 :
426 1194 : switch (parse->state)
427 : {
428 900 : case NDIST_EXPECT_ATTNUM:
429 900 : attnum = pg_strtoint16_safe(token, (Node *) &escontext);
430 :
431 900 : if (escontext.error_occurred)
432 : {
433 12 : errsave(parse->escontext,
434 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
435 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
436 : errdetail("Key \"%s\" has an incorrect value.", PG_NDISTINCT_KEY_ATTRIBUTES));
437 6 : return JSON_SEM_ACTION_FAILED;
438 : }
439 :
440 : /*
441 : * The attribute number cannot be zero a negative number beyond
442 : * the number of the possible expressions.
443 : */
444 888 : if (attnum == 0 || attnum < (0 - STATS_MAX_DIMENSIONS))
445 : {
446 18 : errsave(parse->escontext,
447 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
448 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
449 : errdetail("Invalid \"%s\" element has been found: %d.",
450 : PG_NDISTINCT_KEY_ATTRIBUTES, attnum));
451 6 : return JSON_SEM_ACTION_FAILED;
452 : }
453 :
454 870 : if (list_length(parse->attnum_list) > 0)
455 : {
456 474 : const AttrNumber prev = llast_int(parse->attnum_list);
457 :
458 474 : if (!valid_subsequent_attnum(prev, attnum))
459 : {
460 12 : errsave(parse->escontext,
461 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
462 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
463 : errdetail("Invalid \"%s\" element has been found: %d cannot follow %d.",
464 : PG_NDISTINCT_KEY_ATTRIBUTES, attnum, prev));
465 6 : return JSON_SEM_ACTION_FAILED;
466 : }
467 : }
468 :
469 858 : parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum);
470 858 : return JSON_SUCCESS;
471 :
472 240 : case NDIST_EXPECT_NDISTINCT:
473 :
474 : /*
475 : * While the structure dictates that ndistinct is a double
476 : * precision floating point, it has always been an integer in the
477 : * output generated. Therefore, we parse it as an integer here.
478 : */
479 240 : parse->ndistinct = pg_strtoint32_safe(token, (Node *) &escontext);
480 :
481 240 : if (!escontext.error_occurred)
482 : {
483 216 : parse->state = NDIST_EXPECT_KEY;
484 216 : return JSON_SUCCESS;
485 : }
486 :
487 24 : errsave(parse->escontext,
488 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
489 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
490 : errdetail("Key \"%s\" has an incorrect value.",
491 : PG_NDISTINCT_KEY_NDISTINCT));
492 12 : break;
493 :
494 54 : default:
495 54 : errsave(parse->escontext,
496 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
497 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
498 : errdetail("Unexpected scalar has been found."));
499 30 : break;
500 : }
501 :
502 42 : return JSON_SEM_ACTION_FAILED;
503 : }
504 :
505 : /*
506 : * Compare the attribute arrays of two MVNDistinctItem values,
507 : * looking for duplicate sets. Return true if a duplicate set is found.
508 : *
509 : * The arrays are required to be in canonical order (all positive numbers
510 : * in ascending order first, followed by all negative numbers in descending
511 : * order) so it's safe to compare the attrnums in order, stopping at the
512 : * first difference.
513 : */
514 : static bool
515 144 : item_attributes_eq(const MVNDistinctItem *a, const MVNDistinctItem *b)
516 : {
517 144 : if (a->nattributes != b->nattributes)
518 96 : return false;
519 :
520 90 : for (int i = 0; i < a->nattributes; i++)
521 : {
522 78 : if (a->attributes[i] != b->attributes[i])
523 36 : return false;
524 : }
525 :
526 12 : return true;
527 : }
528 :
529 : /*
530 : * Ensure that an attribute number appears as one of the attribute numbers
531 : * in a MVNDistinctItem.
532 : */
533 : static bool
534 84 : item_has_attnum(const MVNDistinctItem *item, AttrNumber attnum)
535 : {
536 210 : for (int i = 0; i < item->nattributes; i++)
537 : {
538 198 : if (attnum == item->attributes[i])
539 72 : return true;
540 : }
541 12 : return false;
542 : }
543 :
544 : /*
545 : * Ensure that the attributes in MVNDistinctItem A are a subset of the
546 : * reference MVNDistinctItem B.
547 : */
548 : static bool
549 48 : item_is_attnum_subset(const MVNDistinctItem *item,
550 : const MVNDistinctItem *refitem)
551 : {
552 120 : for (int i = 0; i < item->nattributes; i++)
553 : {
554 84 : if (!item_has_attnum(refitem, item->attributes[i]))
555 12 : return false;
556 : }
557 36 : return true;
558 : }
559 :
560 : /*
561 : * Generate a string representing an array of attribute numbers.
562 : *
563 : * Freeing the allocated string is the responsibility of the caller.
564 : */
565 : static char *
566 36 : item_attnum_list(const MVNDistinctItem *item)
567 : {
568 : StringInfoData str;
569 :
570 36 : initStringInfo(&str);
571 :
572 36 : appendStringInfo(&str, "%d", item->attributes[0]);
573 :
574 96 : for (int i = 1; i < item->nattributes; i++)
575 60 : appendStringInfo(&str, ", %d", item->attributes[i]);
576 :
577 36 : return str.data;
578 : }
579 :
580 : /*
581 : * Attempt to build and serialize the MVNDistinct object.
582 : *
583 : * This can only be executed after the completion of the JSON parsing.
584 : *
585 : * In the event of an error, set the error context and return NULL.
586 : */
587 : static bytea *
588 84 : build_mvndistinct(NDistinctParseState *parse, char *str)
589 : {
590 : MVNDistinct *ndistinct;
591 84 : int nitems = list_length(parse->distinct_items);
592 : bytea *bytes;
593 84 : int item_most_attrs = 0;
594 84 : int item_most_attrs_idx = 0;
595 :
596 84 : switch (parse->state)
597 : {
598 84 : case NDIST_EXPECT_COMPLETE:
599 :
600 : /*
601 : * Parsing has ended correctly and we should have a list of items.
602 : * If we don't, something has been done wrong in one of the
603 : * earlier parsing steps.
604 : */
605 84 : if (nitems == 0)
606 0 : elog(ERROR,
607 : "cannot have empty item list after parsing success.");
608 84 : break;
609 :
610 0 : case NDIST_EXPECT_START:
611 : /* blank */
612 0 : errsave(parse->escontext,
613 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
614 : errmsg("malformed pg_ndistinct: \"%s\"", str),
615 : errdetail("Value cannot be empty."));
616 0 : return NULL;
617 :
618 0 : default:
619 : /* Unexpected end-state. */
620 0 : errsave(parse->escontext,
621 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
622 : errmsg("malformed pg_ndistinct: \"%s\"", str),
623 : errdetail("Unexpected end state has been found: %d.", parse->state));
624 0 : return NULL;
625 : }
626 :
627 84 : ndistinct = palloc(offsetof(MVNDistinct, items) +
628 84 : nitems * sizeof(MVNDistinctItem));
629 :
630 84 : ndistinct->magic = STATS_NDISTINCT_MAGIC;
631 84 : ndistinct->type = STATS_NDISTINCT_TYPE_BASIC;
632 84 : ndistinct->nitems = nitems;
633 :
634 240 : for (int i = 0; i < nitems; i++)
635 : {
636 168 : MVNDistinctItem *item = list_nth(parse->distinct_items, i);
637 :
638 : /*
639 : * Ensure that this item does not duplicate the attributes of any
640 : * pre-existing item.
641 : */
642 300 : for (int j = 0; j < i; j++)
643 : {
644 144 : if (item_attributes_eq(item, &ndistinct->items[j]))
645 : {
646 12 : char *s = item_attnum_list(item);
647 :
648 12 : errsave(parse->escontext,
649 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
650 : errmsg("malformed pg_ndistinct: \"%s\"", str),
651 : errdetail("Duplicated \"%s\" array has been found: [%s].",
652 : PG_NDISTINCT_KEY_ATTRIBUTES, s));
653 6 : pfree(s);
654 6 : return NULL;
655 : }
656 : }
657 :
658 156 : ndistinct->items[i].ndistinct = item->ndistinct;
659 156 : ndistinct->items[i].nattributes = item->nattributes;
660 :
661 : /*
662 : * This transfers free-ing responsibility from the distinct_items list
663 : * to the ndistinct object.
664 : */
665 156 : ndistinct->items[i].attributes = item->attributes;
666 :
667 : /*
668 : * Keep track of the first longest attribute list. All other attribute
669 : * lists must be a subset of this list.
670 : */
671 156 : if (item->nattributes > item_most_attrs)
672 : {
673 126 : item_most_attrs = item->nattributes;
674 126 : item_most_attrs_idx = i;
675 : }
676 : }
677 :
678 : /*
679 : * Verify that all the sets of attribute numbers are a proper subset of
680 : * the longest set recorded. This acts as an extra sanity check based on
681 : * the input given. Note that this still needs to be cross-checked with
682 : * the extended statistics objects this would be assigned to, but it
683 : * provides one extra layer of protection.
684 : */
685 168 : for (int i = 0; i < nitems; i++)
686 : {
687 108 : if (i == item_most_attrs_idx)
688 60 : continue;
689 :
690 48 : if (!item_is_attnum_subset(&ndistinct->items[i],
691 48 : &ndistinct->items[item_most_attrs_idx]))
692 : {
693 12 : const MVNDistinctItem *item = &ndistinct->items[i];
694 12 : const MVNDistinctItem *refitem = &ndistinct->items[item_most_attrs_idx];
695 12 : char *item_list = item_attnum_list(item);
696 12 : char *refitem_list = item_attnum_list(refitem);
697 :
698 12 : errsave(parse->escontext,
699 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
700 : errmsg("malformed pg_ndistinct: \"%s\"", str),
701 : errdetail("\"%s\" array [%s] must be a subset of array [%s].",
702 : PG_NDISTINCT_KEY_ATTRIBUTES,
703 : item_list, refitem_list));
704 6 : pfree(item_list);
705 6 : pfree(refitem_list);
706 6 : return NULL;
707 : }
708 : }
709 :
710 60 : bytes = statext_ndistinct_serialize(ndistinct);
711 :
712 : /*
713 : * Free the attribute lists, before the ndistinct itself.
714 : */
715 156 : for (int i = 0; i < nitems; i++)
716 96 : pfree(ndistinct->items[i].attributes);
717 60 : pfree(ndistinct);
718 :
719 60 : return bytes;
720 : }
721 :
722 : /*
723 : * pg_ndistinct_in
724 : * input routine for type pg_ndistinct.
725 : */
726 : Datum
727 492 : pg_ndistinct_in(PG_FUNCTION_ARGS)
728 : {
729 492 : char *str = PG_GETARG_CSTRING(0);
730 : NDistinctParseState parse_state;
731 : JsonParseErrorType result;
732 : JsonLexContext *lex;
733 : JsonSemAction sem_action;
734 492 : bytea *bytes = NULL;
735 :
736 : /* initialize semantic state */
737 492 : parse_state.str = str;
738 492 : parse_state.state = NDIST_EXPECT_START;
739 492 : parse_state.distinct_items = NIL;
740 492 : parse_state.escontext = fcinfo->context;
741 492 : parse_state.found_attributes = false;
742 492 : parse_state.found_ndistinct = false;
743 492 : parse_state.attnum_list = NIL;
744 492 : parse_state.ndistinct = 0;
745 :
746 : /* set callbacks */
747 492 : sem_action.semstate = (void *) &parse_state;
748 492 : sem_action.object_start = ndistinct_object_start;
749 492 : sem_action.object_end = ndistinct_object_end;
750 492 : sem_action.array_start = ndistinct_array_start;
751 492 : sem_action.array_end = ndistinct_array_end;
752 492 : sem_action.object_field_start = ndistinct_object_field_start;
753 492 : sem_action.object_field_end = NULL;
754 492 : sem_action.array_element_start = ndistinct_array_element_start;
755 492 : sem_action.array_element_end = NULL;
756 492 : sem_action.scalar = ndistinct_scalar;
757 :
758 492 : lex = makeJsonLexContextCstringLen(NULL, str, strlen(str),
759 : PG_UTF8, true);
760 492 : result = pg_parse_json(lex, &sem_action);
761 312 : freeJsonLexContext(lex);
762 :
763 312 : if (result == JSON_SUCCESS)
764 84 : bytes = build_mvndistinct(&parse_state, str);
765 :
766 300 : list_free(parse_state.attnum_list);
767 300 : list_free_deep(parse_state.distinct_items);
768 :
769 300 : if (bytes)
770 60 : PG_RETURN_BYTEA_P(bytes);
771 :
772 : /*
773 : * If escontext already set, just use that. Anything else is a generic
774 : * JSON parse error.
775 : */
776 240 : if (!SOFT_ERROR_OCCURRED(parse_state.escontext))
777 48 : errsave(parse_state.escontext,
778 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
779 : errmsg("malformed pg_ndistinct: \"%s\"", str),
780 : errdetail("Input data must be valid JSON."));
781 :
782 216 : PG_RETURN_NULL();
783 : }
784 :
785 : /*
786 : * pg_ndistinct_out
787 : * output routine for type pg_ndistinct
788 : *
789 : * Produces a human-readable representation of the value.
790 : */
791 : Datum
792 126 : pg_ndistinct_out(PG_FUNCTION_ARGS)
793 : {
794 126 : bytea *data = PG_GETARG_BYTEA_PP(0);
795 126 : MVNDistinct *ndist = statext_ndistinct_deserialize(data);
796 : int i;
797 : StringInfoData str;
798 :
799 126 : initStringInfo(&str);
800 126 : appendStringInfoChar(&str, '[');
801 :
802 360 : for (i = 0; i < ndist->nitems; i++)
803 : {
804 234 : MVNDistinctItem item = ndist->items[i];
805 :
806 234 : if (i > 0)
807 108 : appendStringInfoString(&str, ", ");
808 :
809 234 : if (item.nattributes <= 0)
810 0 : elog(ERROR, "invalid zero-length attribute array in MVNDistinct");
811 :
812 234 : appendStringInfo(&str, "{\"" PG_NDISTINCT_KEY_ATTRIBUTES "\": [%d",
813 234 : item.attributes[0]);
814 :
815 510 : for (int j = 1; j < item.nattributes; j++)
816 276 : appendStringInfo(&str, ", %d", item.attributes[j]);
817 :
818 234 : appendStringInfo(&str, "], \"" PG_NDISTINCT_KEY_NDISTINCT "\": %d}",
819 234 : (int) item.ndistinct);
820 : }
821 :
822 126 : appendStringInfoChar(&str, ']');
823 :
824 126 : PG_RETURN_CSTRING(str.data);
825 : }
826 :
827 : /*
828 : * pg_ndistinct_recv
829 : * binary input routine for type pg_ndistinct
830 : */
831 : Datum
832 0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
833 : {
834 0 : ereport(ERROR,
835 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
836 : errmsg("cannot accept a value of type %s", "pg_ndistinct")));
837 :
838 : PG_RETURN_VOID(); /* keep compiler quiet */
839 : }
840 :
841 : /*
842 : * pg_ndistinct_send
843 : * binary output routine for type pg_ndistinct
844 : *
845 : * n-distinct is serialized into a bytea value, so let's send that.
846 : */
847 : Datum
848 0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
849 : {
850 0 : return byteasend(fcinfo);
851 : }
|