Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_ndistinct.c
4 : * pg_ndistinct data type support.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/pg_ndistinct.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "common/int.h"
18 : #include "common/jsonapi.h"
19 : #include "lib/stringinfo.h"
20 : #include "mb/pg_wchar.h"
21 : #include "nodes/miscnodes.h"
22 : #include "statistics/extended_stats_internal.h"
23 : #include "statistics/statistics_format.h"
24 : #include "utils/builtins.h"
25 : #include "utils/fmgrprotos.h"
26 :
27 : /* Parsing state data */
28 : typedef enum
29 : {
30 : NDIST_EXPECT_START = 0,
31 : NDIST_EXPECT_ITEM,
32 : NDIST_EXPECT_KEY,
33 : NDIST_EXPECT_ATTNUM_LIST,
34 : NDIST_EXPECT_ATTNUM,
35 : NDIST_EXPECT_NDISTINCT,
36 : NDIST_EXPECT_COMPLETE,
37 : } NDistinctSemanticState;
38 :
39 : typedef struct
40 : {
41 : const char *str;
42 : NDistinctSemanticState state;
43 :
44 : List *distinct_items; /* Accumulated complete MVNDistinctItems */
45 : Node *escontext;
46 :
47 : bool found_attributes; /* Item has "attributes" key */
48 : bool found_ndistinct; /* Item has "ndistinct" key */
49 : List *attnum_list; /* Accumulated attribute numbers */
50 : int32 ndistinct;
51 : } NDistinctParseState;
52 :
53 : /*
54 : * Invoked at the start of each MVNDistinctItem.
55 : *
56 : * The entire JSON document should be one array of MVNDistinctItem objects.
57 : * If we are anywhere else in the document, it is an error.
58 : */
59 : static JsonParseErrorType
60 516 : ndistinct_object_start(void *state)
61 : {
62 516 : NDistinctParseState *parse = state;
63 :
64 516 : switch (parse->state)
65 : {
66 456 : case NDIST_EXPECT_ITEM:
67 : /* Now we expect to see attributes/ndistinct keys */
68 456 : parse->state = NDIST_EXPECT_KEY;
69 456 : return JSON_SUCCESS;
70 :
71 24 : case NDIST_EXPECT_START:
72 : /* pg_ndistinct must begin with a '[' */
73 24 : errsave(parse->escontext,
74 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
75 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
76 : errdetail("Initial element must be an array."));
77 12 : break;
78 :
79 0 : case NDIST_EXPECT_KEY:
80 : /* In an object, expecting key */
81 0 : errsave(parse->escontext,
82 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
83 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
84 : errdetail("Expected an object key."));
85 0 : break;
86 :
87 12 : case NDIST_EXPECT_ATTNUM_LIST:
88 : /* Just followed an "attributes" key */
89 12 : errsave(parse->escontext,
90 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
91 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
92 : errdetail("Value of \"%s\" must be an array of attribute numbers.",
93 : PG_NDISTINCT_KEY_ATTRIBUTES));
94 6 : break;
95 :
96 12 : case NDIST_EXPECT_ATTNUM:
97 : /* In an attribute number list, expect only scalar integers */
98 12 : errsave(parse->escontext,
99 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
100 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
101 : errdetail("Attribute lists can only contain attribute numbers."));
102 6 : break;
103 :
104 12 : case NDIST_EXPECT_NDISTINCT:
105 : /* Just followed an "ndistinct" key */
106 12 : errsave(parse->escontext,
107 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
108 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
109 : errdetail("Value of \"%s\" must be an integer.",
110 : PG_NDISTINCT_KEY_NDISTINCT));
111 6 : break;
112 :
113 0 : default:
114 0 : errsave(parse->escontext,
115 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
116 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
117 : errdetail("Unexpected parse state: %d", (int) parse->state));
118 0 : break;
119 : }
120 :
121 30 : return JSON_SEM_ACTION_FAILED;
122 : }
123 :
124 : /*
125 : * Invoked at the end of an object.
126 : *
127 : * Check to ensure that it was a complete MVNDistinctItem
128 : */
129 : static JsonParseErrorType
130 156 : ndistinct_object_end(void *state)
131 : {
132 156 : NDistinctParseState *parse = state;
133 :
134 156 : int natts = 0;
135 :
136 : MVNDistinctItem *item;
137 :
138 156 : if (parse->state != NDIST_EXPECT_KEY)
139 : {
140 0 : errsave(parse->escontext,
141 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
142 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
143 : errdetail("Unexpected parse state: %d", (int) parse->state));
144 0 : return JSON_SEM_ACTION_FAILED;
145 : }
146 :
147 156 : if (!parse->found_attributes)
148 : {
149 12 : errsave(parse->escontext,
150 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
151 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
152 : errdetail("Item must contain \"%s\" key.",
153 : PG_NDISTINCT_KEY_ATTRIBUTES));
154 6 : return JSON_SEM_ACTION_FAILED;
155 : }
156 :
157 144 : if (!parse->found_ndistinct)
158 : {
159 12 : errsave(parse->escontext,
160 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
161 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
162 : errdetail("Item must contain \"%s\" key.",
163 : PG_NDISTINCT_KEY_NDISTINCT));
164 6 : return JSON_SEM_ACTION_FAILED;
165 : }
166 :
167 : /*
168 : * We need at least two attribute numbers for a ndistinct item, anything
169 : * less is malformed.
170 : */
171 132 : natts = list_length(parse->attnum_list);
172 132 : if ((natts < 2) || (natts > STATS_MAX_DIMENSIONS))
173 : {
174 24 : errsave(parse->escontext,
175 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
176 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
177 : errdetail("The \"%s\" key must contain an array of at least %d and no more than %d attributes.",
178 : PG_NDISTINCT_KEY_ATTRIBUTES, 2, STATS_MAX_DIMENSIONS));
179 12 : return JSON_SEM_ACTION_FAILED;
180 : }
181 :
182 : /* Create the MVNDistinctItem */
183 108 : item = palloc(sizeof(MVNDistinctItem));
184 108 : item->nattributes = natts;
185 108 : item->attributes = palloc0(natts * sizeof(AttrNumber));
186 108 : item->ndistinct = (double) parse->ndistinct;
187 :
188 372 : for (int i = 0; i < natts; i++)
189 264 : item->attributes[i] = (AttrNumber) list_nth_int(parse->attnum_list, i);
190 :
191 108 : parse->distinct_items = lappend(parse->distinct_items, (void *) item);
192 :
193 : /* reset item state vars */
194 108 : list_free(parse->attnum_list);
195 108 : parse->attnum_list = NIL;
196 108 : parse->ndistinct = 0;
197 108 : parse->found_attributes = false;
198 108 : parse->found_ndistinct = false;
199 :
200 : /* Now we are looking for the next MVNDistinctItem */
201 108 : parse->state = NDIST_EXPECT_ITEM;
202 108 : return JSON_SUCCESS;
203 : }
204 :
205 :
206 : /*
207 : * Invoked at the start of an array.
208 : *
209 : * ndistinct input format has two types of arrays, the outer MVNDistinctItem
210 : * array and the attribute number array within each MVNDistinctItem.
211 : */
212 : static JsonParseErrorType
213 816 : ndistinct_array_start(void *state)
214 : {
215 816 : NDistinctParseState *parse = state;
216 :
217 816 : switch (parse->state)
218 : {
219 366 : case NDIST_EXPECT_ATTNUM_LIST:
220 366 : parse->state = NDIST_EXPECT_ATTNUM;
221 366 : break;
222 :
223 414 : case NDIST_EXPECT_START:
224 414 : parse->state = NDIST_EXPECT_ITEM;
225 414 : break;
226 :
227 36 : default:
228 36 : errsave(parse->escontext,
229 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
230 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
231 : errdetail("Array found in unexpected place."));
232 18 : return JSON_SEM_ACTION_FAILED;
233 : }
234 :
235 780 : return JSON_SUCCESS;
236 : }
237 :
238 :
239 : /*
240 : * Invoked at the end of an array.
241 : *
242 : * Arrays can never be empty.
243 : */
244 : static JsonParseErrorType
245 342 : ndistinct_array_end(void *state)
246 : {
247 342 : NDistinctParseState *parse = state;
248 :
249 342 : switch (parse->state)
250 : {
251 288 : case NDIST_EXPECT_ATTNUM:
252 288 : if (list_length(parse->attnum_list) > 0)
253 : {
254 : /*
255 : * The attribute number list is complete, look for more
256 : * MVNDistinctItem keys.
257 : */
258 276 : parse->state = NDIST_EXPECT_KEY;
259 276 : return JSON_SUCCESS;
260 : }
261 :
262 12 : errsave(parse->escontext,
263 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
264 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
265 : errdetail("The \"%s\" key must be a non-empty array.",
266 : PG_NDISTINCT_KEY_ATTRIBUTES));
267 6 : break;
268 :
269 54 : case NDIST_EXPECT_ITEM:
270 54 : if (list_length(parse->distinct_items) > 0)
271 : {
272 : /* Item list is complete, we are done. */
273 42 : parse->state = NDIST_EXPECT_COMPLETE;
274 42 : return JSON_SUCCESS;
275 : }
276 :
277 12 : errsave(parse->escontext,
278 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
279 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
280 : errdetail("Item array cannot be empty."));
281 6 : break;
282 :
283 0 : default:
284 :
285 : /*
286 : * This can only happen if a case was missed in
287 : * ndistinct_array_start().
288 : */
289 0 : errsave(parse->escontext,
290 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
291 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
292 : errdetail("Array found in unexpected place."));
293 0 : break;
294 : }
295 :
296 12 : return JSON_SEM_ACTION_FAILED;
297 : }
298 :
299 : /*
300 : * Invoked at the start of a key/value field.
301 : *
302 : * The valid keys for the MVNDistinctItem object are:
303 : * - attributes
304 : * - ndistinct
305 : */
306 : static JsonParseErrorType
307 696 : ndistinct_object_field_start(void *state, char *fname, bool isnull)
308 : {
309 696 : NDistinctParseState *parse = state;
310 :
311 696 : if (strcmp(fname, PG_NDISTINCT_KEY_ATTRIBUTES) == 0)
312 : {
313 432 : if (parse->found_attributes)
314 : {
315 12 : errsave(parse->escontext,
316 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
317 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
318 : errdetail("Multiple \"%s\" keys are not allowed.",
319 : PG_NDISTINCT_KEY_ATTRIBUTES));
320 6 : return JSON_SEM_ACTION_FAILED;
321 : }
322 420 : parse->found_attributes = true;
323 420 : parse->state = NDIST_EXPECT_ATTNUM_LIST;
324 420 : return JSON_SUCCESS;
325 : }
326 :
327 264 : if (strcmp(fname, PG_NDISTINCT_KEY_NDISTINCT) == 0)
328 : {
329 240 : if (parse->found_ndistinct)
330 : {
331 12 : errsave(parse->escontext,
332 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
333 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
334 : errdetail("Multiple \"%s\" keys are not allowed.",
335 : PG_NDISTINCT_KEY_NDISTINCT));
336 6 : return JSON_SEM_ACTION_FAILED;
337 : }
338 228 : parse->found_ndistinct = true;
339 228 : parse->state = NDIST_EXPECT_NDISTINCT;
340 228 : return JSON_SUCCESS;
341 : }
342 :
343 24 : errsave(parse->escontext,
344 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
345 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
346 : errdetail("Only allowed keys are \"%s\" and \"%s\".",
347 : PG_NDISTINCT_KEY_ATTRIBUTES,
348 : PG_NDISTINCT_KEY_NDISTINCT));
349 12 : return JSON_SEM_ACTION_FAILED;
350 : }
351 :
352 : /*
353 : * Invoked at the start of an array element.
354 : *
355 : * The overall structure of the datatype is an array, but there are also
356 : * arrays as the value of every attributes key.
357 : */
358 : static JsonParseErrorType
359 1266 : ndistinct_array_element_start(void *state, bool isnull)
360 : {
361 1266 : const NDistinctParseState *parse = state;
362 :
363 1266 : switch (parse->state)
364 : {
365 798 : case NDIST_EXPECT_ATTNUM:
366 798 : if (!isnull)
367 786 : return JSON_SUCCESS;
368 :
369 12 : errsave(parse->escontext,
370 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
371 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
372 : errdetail("Attribute number array cannot be null."));
373 6 : break;
374 :
375 468 : case NDIST_EXPECT_ITEM:
376 468 : if (!isnull)
377 456 : return JSON_SUCCESS;
378 :
379 12 : errsave(parse->escontext,
380 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
381 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
382 : errdetail("Item list elements cannot be null."));
383 :
384 6 : break;
385 :
386 0 : default:
387 0 : errsave(parse->escontext,
388 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
389 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
390 : errdetail("Unexpected array element."));
391 0 : break;
392 : }
393 :
394 12 : return JSON_SEM_ACTION_FAILED;
395 : }
396 :
397 : /*
398 : * Test for valid subsequent attribute number.
399 : *
400 : * If the previous value is positive, then current value must either be
401 : * greater than the previous value, or negative.
402 : *
403 : * If the previous value is negative, then the value must be less than
404 : * the previous value.
405 : *
406 : * Duplicate values are obviously not allowed, but that is already covered
407 : * by the rules listed above.
408 : */
409 : static bool
410 408 : valid_subsequent_attnum(AttrNumber prev, AttrNumber cur)
411 : {
412 : Assert(prev != 0);
413 :
414 408 : if (prev > 0)
415 396 : return ((cur > prev) || (cur < 0));
416 :
417 12 : return (cur < prev);
418 : }
419 :
420 : /*
421 : * Handle scalar events from the ndistinct input parser.
422 : *
423 : * Override integer parse error messages and replace them with errors
424 : * specific to the context.
425 : */
426 : static JsonParseErrorType
427 1008 : ndistinct_scalar(void *state, char *token, JsonTokenType tokentype)
428 : {
429 1008 : NDistinctParseState *parse = state;
430 : AttrNumber attnum;
431 1008 : ErrorSaveContext escontext = {T_ErrorSaveContext};
432 :
433 1008 : switch (parse->state)
434 : {
435 774 : case NDIST_EXPECT_ATTNUM:
436 774 : attnum = pg_strtoint16_safe(token, (Node *) &escontext);
437 :
438 774 : if (SOFT_ERROR_OCCURRED(&escontext))
439 : {
440 12 : errsave(parse->escontext,
441 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
442 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
443 : errdetail("Invalid \"%s\" value.", PG_NDISTINCT_KEY_ATTRIBUTES));
444 6 : return JSON_SEM_ACTION_FAILED;
445 : }
446 :
447 : /*
448 : * The attribute number cannot be zero a negative number beyond
449 : * the number of the possible expressions.
450 : */
451 762 : if (attnum == 0 || attnum < (0 - STATS_MAX_DIMENSIONS))
452 : {
453 18 : errsave(parse->escontext,
454 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
455 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
456 : errdetail("Invalid \"%s\" element: %d.",
457 : PG_NDISTINCT_KEY_ATTRIBUTES, attnum));
458 6 : return JSON_SEM_ACTION_FAILED;
459 : }
460 :
461 744 : if (list_length(parse->attnum_list) > 0)
462 : {
463 408 : const AttrNumber prev = llast_int(parse->attnum_list);
464 :
465 408 : if (!valid_subsequent_attnum(prev, attnum))
466 : {
467 12 : errsave(parse->escontext,
468 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
469 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
470 : errdetail("Invalid \"%s\" element: %d cannot follow %d.",
471 : PG_NDISTINCT_KEY_ATTRIBUTES, attnum, prev));
472 6 : return JSON_SEM_ACTION_FAILED;
473 : }
474 : }
475 :
476 732 : parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum);
477 732 : return JSON_SUCCESS;
478 :
479 180 : case NDIST_EXPECT_NDISTINCT:
480 :
481 : /*
482 : * While the structure dictates that ndistinct is a double
483 : * precision floating point, it has always been an integer in the
484 : * output generated. Therefore, we parse it as an integer here.
485 : */
486 180 : parse->ndistinct = pg_strtoint32_safe(token, (Node *) &escontext);
487 :
488 180 : if (!SOFT_ERROR_OCCURRED(&escontext))
489 : {
490 156 : parse->state = NDIST_EXPECT_KEY;
491 156 : return JSON_SUCCESS;
492 : }
493 :
494 24 : errsave(parse->escontext,
495 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
496 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
497 : errdetail("Invalid \"%s\" value.",
498 : PG_NDISTINCT_KEY_NDISTINCT));
499 12 : break;
500 :
501 54 : default:
502 54 : errsave(parse->escontext,
503 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
504 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
505 : errdetail("Unexpected scalar."));
506 30 : break;
507 : }
508 :
509 42 : return JSON_SEM_ACTION_FAILED;
510 : }
511 :
512 : /*
513 : * Compare the attribute arrays of two MVNDistinctItem values,
514 : * looking for duplicate sets. Return true if a duplicate set is found.
515 : *
516 : * The arrays are required to be in canonical order (all positive numbers
517 : * in ascending order first, followed by all negative numbers in descending
518 : * order) so it's safe to compare the attrnums in order, stopping at the
519 : * first difference.
520 : */
521 : static bool
522 108 : item_attributes_eq(const MVNDistinctItem *a, const MVNDistinctItem *b)
523 : {
524 108 : if (a->nattributes != b->nattributes)
525 78 : return false;
526 :
527 66 : for (int i = 0; i < a->nattributes; i++)
528 : {
529 54 : if (a->attributes[i] != b->attributes[i])
530 18 : return false;
531 : }
532 :
533 12 : return true;
534 : }
535 :
536 : /*
537 : * Ensure that an attribute number appears as one of the attribute numbers
538 : * in a MVNDistinctItem.
539 : */
540 : static bool
541 48 : item_has_attnum(const MVNDistinctItem *item, AttrNumber attnum)
542 : {
543 138 : for (int i = 0; i < item->nattributes; i++)
544 : {
545 126 : if (attnum == item->attributes[i])
546 36 : return true;
547 : }
548 12 : return false;
549 : }
550 :
551 : /*
552 : * Ensure that the attributes in MVNDistinctItem A are a subset of the
553 : * reference MVNDistinctItem B.
554 : */
555 : static bool
556 30 : item_is_attnum_subset(const MVNDistinctItem *item,
557 : const MVNDistinctItem *refitem)
558 : {
559 66 : for (int i = 0; i < item->nattributes; i++)
560 : {
561 48 : if (!item_has_attnum(refitem, item->attributes[i]))
562 12 : return false;
563 : }
564 18 : return true;
565 : }
566 :
567 : /*
568 : * Generate a string representing an array of attribute numbers.
569 : *
570 : * Freeing the allocated string is the responsibility of the caller.
571 : */
572 : static char *
573 36 : item_attnum_list(const MVNDistinctItem *item)
574 : {
575 : StringInfoData str;
576 :
577 36 : initStringInfo(&str);
578 :
579 36 : appendStringInfo(&str, "%d", item->attributes[0]);
580 :
581 96 : for (int i = 1; i < item->nattributes; i++)
582 60 : appendStringInfo(&str, ", %d", item->attributes[i]);
583 :
584 36 : return str.data;
585 : }
586 :
587 : /*
588 : * Attempt to build and serialize the MVNDistinct object.
589 : *
590 : * This can only be executed after the completion of the JSON parsing.
591 : *
592 : * In the event of an error, set the error context and return NULL.
593 : */
594 : static bytea *
595 42 : build_mvndistinct(NDistinctParseState *parse, char *str)
596 : {
597 : MVNDistinct *ndistinct;
598 42 : int nitems = list_length(parse->distinct_items);
599 : bytea *bytes;
600 42 : int item_most_attrs = 0;
601 42 : int item_most_attrs_idx = 0;
602 :
603 42 : switch (parse->state)
604 : {
605 42 : case NDIST_EXPECT_COMPLETE:
606 :
607 : /*
608 : * Parsing has ended correctly and we should have a list of items.
609 : * If we don't, something has been done wrong in one of the
610 : * earlier parsing steps.
611 : */
612 42 : if (nitems == 0)
613 0 : elog(ERROR,
614 : "cannot have empty item list after parsing success.");
615 42 : break;
616 :
617 0 : case NDIST_EXPECT_START:
618 : /* blank */
619 0 : errsave(parse->escontext,
620 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
621 : errmsg("malformed pg_ndistinct: \"%s\"", str),
622 : errdetail("Value cannot be empty."));
623 0 : return NULL;
624 :
625 0 : default:
626 : /* Unexpected end-state. */
627 0 : errsave(parse->escontext,
628 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
629 : errmsg("malformed pg_ndistinct: \"%s\"", str),
630 : errdetail("Unexpected end state %d.", parse->state));
631 0 : return NULL;
632 : }
633 :
634 42 : ndistinct = palloc(offsetof(MVNDistinct, items) +
635 42 : nitems * sizeof(MVNDistinctItem));
636 :
637 42 : ndistinct->magic = STATS_NDISTINCT_MAGIC;
638 42 : ndistinct->type = STATS_NDISTINCT_TYPE_BASIC;
639 42 : ndistinct->nitems = nitems;
640 :
641 138 : for (int i = 0; i < nitems; i++)
642 : {
643 108 : MVNDistinctItem *item = list_nth(parse->distinct_items, i);
644 :
645 : /*
646 : * Ensure that this item does not duplicate the attributes of any
647 : * pre-existing item.
648 : */
649 204 : for (int j = 0; j < i; j++)
650 : {
651 108 : if (item_attributes_eq(item, &ndistinct->items[j]))
652 : {
653 12 : char *s = item_attnum_list(item);
654 :
655 12 : errsave(parse->escontext,
656 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
657 : errmsg("malformed pg_ndistinct: \"%s\"", str),
658 : errdetail("Duplicated \"%s\" array found: [%s]",
659 : PG_NDISTINCT_KEY_ATTRIBUTES, s));
660 6 : pfree(s);
661 6 : return NULL;
662 : }
663 : }
664 :
665 96 : ndistinct->items[i].ndistinct = item->ndistinct;
666 96 : ndistinct->items[i].nattributes = item->nattributes;
667 :
668 : /*
669 : * This transfers free-ing responsibility from the distinct_items list
670 : * to the ndistinct object.
671 : */
672 96 : ndistinct->items[i].attributes = item->attributes;
673 :
674 : /*
675 : * Keep track of the first longest attribute list. All other attribute
676 : * lists must be a subset of this list.
677 : */
678 96 : if (item->nattributes > item_most_attrs)
679 : {
680 78 : item_most_attrs = item->nattributes;
681 78 : item_most_attrs_idx = i;
682 : }
683 : }
684 :
685 : /*
686 : * Verify that all the sets of attribute numbers are a proper subset of
687 : * the longest set recorded. This acts as an extra sanity check based on
688 : * the input given. Note that this still needs to be cross-checked with
689 : * the extended statistics objects this would be assigned to, but it
690 : * provides one extra layer of protection.
691 : */
692 66 : for (int i = 0; i < nitems; i++)
693 : {
694 48 : if (i == item_most_attrs_idx)
695 18 : continue;
696 :
697 30 : if (!item_is_attnum_subset(&ndistinct->items[i],
698 30 : &ndistinct->items[item_most_attrs_idx]))
699 : {
700 12 : const MVNDistinctItem *item = &ndistinct->items[i];
701 12 : const MVNDistinctItem *refitem = &ndistinct->items[item_most_attrs_idx];
702 12 : char *item_list = item_attnum_list(item);
703 12 : char *refitem_list = item_attnum_list(refitem);
704 :
705 12 : errsave(parse->escontext,
706 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
707 : errmsg("malformed pg_ndistinct: \"%s\"", str),
708 : errdetail("\"%s\" array: [%s] must be a subset of array: [%s]",
709 : PG_NDISTINCT_KEY_ATTRIBUTES,
710 : item_list, refitem_list));
711 6 : pfree(item_list);
712 6 : pfree(refitem_list);
713 6 : return NULL;
714 : }
715 : }
716 :
717 18 : bytes = statext_ndistinct_serialize(ndistinct);
718 :
719 : /*
720 : * Free the attribute lists, before the ndistinct itself.
721 : */
722 54 : for (int i = 0; i < nitems; i++)
723 36 : pfree(ndistinct->items[i].attributes);
724 18 : pfree(ndistinct);
725 :
726 18 : return bytes;
727 : }
728 :
729 : /*
730 : * pg_ndistinct_in
731 : * input routine for type pg_ndistinct.
732 : */
733 : Datum
734 450 : pg_ndistinct_in(PG_FUNCTION_ARGS)
735 : {
736 450 : char *str = PG_GETARG_CSTRING(0);
737 : NDistinctParseState parse_state;
738 : JsonParseErrorType result;
739 : JsonLexContext *lex;
740 : JsonSemAction sem_action;
741 450 : bytea *bytes = NULL;
742 :
743 : /* initialize semantic state */
744 450 : parse_state.str = str;
745 450 : parse_state.state = NDIST_EXPECT_START;
746 450 : parse_state.distinct_items = NIL;
747 450 : parse_state.escontext = fcinfo->context;
748 450 : parse_state.found_attributes = false;
749 450 : parse_state.found_ndistinct = false;
750 450 : parse_state.attnum_list = NIL;
751 450 : parse_state.ndistinct = 0;
752 :
753 : /* set callbacks */
754 450 : sem_action.semstate = (void *) &parse_state;
755 450 : sem_action.object_start = ndistinct_object_start;
756 450 : sem_action.object_end = ndistinct_object_end;
757 450 : sem_action.array_start = ndistinct_array_start;
758 450 : sem_action.array_end = ndistinct_array_end;
759 450 : sem_action.object_field_start = ndistinct_object_field_start;
760 450 : sem_action.object_field_end = NULL;
761 450 : sem_action.array_element_start = ndistinct_array_element_start;
762 450 : sem_action.array_element_end = NULL;
763 450 : sem_action.scalar = ndistinct_scalar;
764 :
765 450 : lex = makeJsonLexContextCstringLen(NULL, str, strlen(str),
766 : PG_UTF8, true);
767 450 : result = pg_parse_json(lex, &sem_action);
768 270 : freeJsonLexContext(lex);
769 :
770 270 : if (result == JSON_SUCCESS)
771 42 : bytes = build_mvndistinct(&parse_state, str);
772 :
773 258 : list_free(parse_state.attnum_list);
774 258 : list_free_deep(parse_state.distinct_items);
775 :
776 258 : if (bytes)
777 18 : PG_RETURN_BYTEA_P(bytes);
778 :
779 : /*
780 : * If escontext already set, just use that. Anything else is a generic
781 : * JSON parse error.
782 : */
783 240 : if (!SOFT_ERROR_OCCURRED(parse_state.escontext))
784 48 : errsave(parse_state.escontext,
785 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
786 : errmsg("malformed pg_ndistinct: \"%s\"", str),
787 : errdetail("Must be valid JSON."));
788 :
789 216 : PG_RETURN_NULL();
790 : }
791 :
792 : /*
793 : * pg_ndistinct_out
794 : * output routine for type pg_ndistinct
795 : *
796 : * Produces a human-readable representation of the value.
797 : */
798 : Datum
799 42 : pg_ndistinct_out(PG_FUNCTION_ARGS)
800 : {
801 42 : bytea *data = PG_GETARG_BYTEA_PP(0);
802 42 : MVNDistinct *ndist = statext_ndistinct_deserialize(data);
803 : int i;
804 : StringInfoData str;
805 :
806 42 : initStringInfo(&str);
807 42 : appendStringInfoChar(&str, '[');
808 :
809 174 : for (i = 0; i < ndist->nitems; i++)
810 : {
811 132 : MVNDistinctItem item = ndist->items[i];
812 :
813 132 : if (i > 0)
814 90 : appendStringInfoString(&str, ", ");
815 :
816 132 : if (item.nattributes <= 0)
817 0 : elog(ERROR, "invalid zero-length attribute array in MVNDistinct");
818 :
819 132 : appendStringInfo(&str, "{\"" PG_NDISTINCT_KEY_ATTRIBUTES "\": [%d",
820 132 : item.attributes[0]);
821 :
822 300 : for (int j = 1; j < item.nattributes; j++)
823 168 : appendStringInfo(&str, ", %d", item.attributes[j]);
824 :
825 132 : appendStringInfo(&str, "], \"" PG_NDISTINCT_KEY_NDISTINCT "\": %d}",
826 132 : (int) item.ndistinct);
827 : }
828 :
829 42 : appendStringInfoChar(&str, ']');
830 :
831 42 : PG_RETURN_CSTRING(str.data);
832 : }
833 :
834 : /*
835 : * pg_ndistinct_recv
836 : * binary input routine for type pg_ndistinct
837 : */
838 : Datum
839 0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
840 : {
841 0 : ereport(ERROR,
842 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
843 : errmsg("cannot accept a value of type %s", "pg_ndistinct")));
844 :
845 : PG_RETURN_VOID(); /* keep compiler quiet */
846 : }
847 :
848 : /*
849 : * pg_ndistinct_send
850 : * binary output routine for type pg_ndistinct
851 : *
852 : * n-distinct is serialized into a bytea value, so let's send that.
853 : */
854 : Datum
855 0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
856 : {
857 0 : return byteasend(fcinfo);
858 : }
|