Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_ndistinct.c
4 : * pg_ndistinct data type support.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/pg_ndistinct.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "common/int.h"
18 : #include "common/jsonapi.h"
19 : #include "lib/stringinfo.h"
20 : #include "mb/pg_wchar.h"
21 : #include "nodes/miscnodes.h"
22 : #include "statistics/extended_stats_internal.h"
23 : #include "statistics/statistics_format.h"
24 : #include "utils/builtins.h"
25 : #include "utils/fmgrprotos.h"
26 :
27 : /* Parsing state data */
28 : typedef enum
29 : {
30 : NDIST_EXPECT_START = 0,
31 : NDIST_EXPECT_ITEM,
32 : NDIST_EXPECT_KEY,
33 : NDIST_EXPECT_ATTNUM_LIST,
34 : NDIST_EXPECT_ATTNUM,
35 : NDIST_EXPECT_NDISTINCT,
36 : NDIST_EXPECT_COMPLETE,
37 : } NDistinctSemanticState;
38 :
39 : typedef struct
40 : {
41 : const char *str;
42 : NDistinctSemanticState state;
43 :
44 : List *distinct_items; /* Accumulated complete MVNDistinctItems */
45 : Node *escontext;
46 :
47 : bool found_attributes; /* Item has "attributes" key */
48 : bool found_ndistinct; /* Item has "ndistinct" key */
49 : List *attnum_list; /* Accumulated attribute numbers */
50 : int32 ndistinct;
51 : } NDistinctParseState;
52 :
53 : /*
54 : * Invoked at the start of each MVNDistinctItem.
55 : *
56 : * The entire JSON document should be one array of MVNDistinctItem objects.
57 : * If we are anywhere else in the document, it is an error.
58 : */
59 : static JsonParseErrorType
60 384 : ndistinct_object_start(void *state)
61 : {
62 384 : NDistinctParseState *parse = state;
63 :
64 384 : switch (parse->state)
65 : {
66 344 : case NDIST_EXPECT_ITEM:
67 : /* Now we expect to see attributes/ndistinct keys */
68 344 : parse->state = NDIST_EXPECT_KEY;
69 344 : return JSON_SUCCESS;
70 :
71 16 : case NDIST_EXPECT_START:
72 : /* pg_ndistinct must begin with a '[' */
73 16 : errsave(parse->escontext,
74 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
75 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
76 : errdetail("Initial element must be an array."));
77 8 : break;
78 :
79 0 : case NDIST_EXPECT_KEY:
80 : /* In an object, expecting key */
81 0 : errsave(parse->escontext,
82 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
83 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
84 : errdetail("A key was expected."));
85 0 : break;
86 :
87 8 : case NDIST_EXPECT_ATTNUM_LIST:
88 : /* Just followed an "attributes" key */
89 8 : errsave(parse->escontext,
90 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
91 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
92 : errdetail("Value of \"%s\" must be an array of attribute numbers.",
93 : PG_NDISTINCT_KEY_ATTRIBUTES));
94 4 : break;
95 :
96 8 : case NDIST_EXPECT_ATTNUM:
97 : /* In an attribute number list, expect only scalar integers */
98 8 : errsave(parse->escontext,
99 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
100 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
101 : errdetail("Attribute lists can only contain attribute numbers."));
102 4 : break;
103 :
104 8 : case NDIST_EXPECT_NDISTINCT:
105 : /* Just followed an "ndistinct" key */
106 8 : errsave(parse->escontext,
107 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
108 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
109 : errdetail("Value of \"%s\" must be an integer.",
110 : PG_NDISTINCT_KEY_NDISTINCT));
111 4 : break;
112 :
113 0 : default:
114 0 : elog(ERROR,
115 : "object start of \"%s\" found in unexpected parse state: %d.",
116 : "pg_ndistinct", (int) parse->state);
117 : break;
118 : }
119 :
120 20 : return JSON_SEM_ACTION_FAILED;
121 : }
122 :
123 : /*
124 : * Invoked at the end of an object.
125 : *
126 : * Check to ensure that it was a complete MVNDistinctItem
127 : */
128 : static JsonParseErrorType
129 144 : ndistinct_object_end(void *state)
130 : {
131 144 : NDistinctParseState *parse = state;
132 :
133 144 : int natts = 0;
134 :
135 : MVNDistinctItem *item;
136 :
137 144 : if (parse->state != NDIST_EXPECT_KEY)
138 0 : elog(ERROR,
139 : "object end of \"%s\" found in unexpected parse state: %d.",
140 : "pg_ndistinct", (int) parse->state);
141 :
142 144 : if (!parse->found_attributes)
143 : {
144 8 : errsave(parse->escontext,
145 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
146 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
147 : errdetail("Item must contain \"%s\" key.",
148 : PG_NDISTINCT_KEY_ATTRIBUTES));
149 4 : return JSON_SEM_ACTION_FAILED;
150 : }
151 :
152 136 : if (!parse->found_ndistinct)
153 : {
154 8 : errsave(parse->escontext,
155 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
156 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
157 : errdetail("Item must contain \"%s\" key.",
158 : PG_NDISTINCT_KEY_NDISTINCT));
159 4 : return JSON_SEM_ACTION_FAILED;
160 : }
161 :
162 : /*
163 : * We need at least two attribute numbers for a ndistinct item, anything
164 : * less is malformed.
165 : */
166 128 : natts = list_length(parse->attnum_list);
167 128 : if ((natts < 2) || (natts > STATS_MAX_DIMENSIONS))
168 : {
169 16 : errsave(parse->escontext,
170 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
171 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
172 : errdetail("The \"%s\" key must contain an array of at least %d and no more than %d attributes.",
173 : PG_NDISTINCT_KEY_ATTRIBUTES, 2, STATS_MAX_DIMENSIONS));
174 8 : return JSON_SEM_ACTION_FAILED;
175 : }
176 :
177 : /* Create the MVNDistinctItem */
178 112 : item = palloc_object(MVNDistinctItem);
179 112 : item->nattributes = natts;
180 112 : item->attributes = palloc0(natts * sizeof(AttrNumber));
181 112 : item->ndistinct = (double) parse->ndistinct;
182 :
183 372 : for (int i = 0; i < natts; i++)
184 260 : item->attributes[i] = (AttrNumber) list_nth_int(parse->attnum_list, i);
185 :
186 112 : parse->distinct_items = lappend(parse->distinct_items, (void *) item);
187 :
188 : /* reset item state vars */
189 112 : list_free(parse->attnum_list);
190 112 : parse->attnum_list = NIL;
191 112 : parse->ndistinct = 0;
192 112 : parse->found_attributes = false;
193 112 : parse->found_ndistinct = false;
194 :
195 : /* Now we are looking for the next MVNDistinctItem */
196 112 : parse->state = NDIST_EXPECT_ITEM;
197 112 : return JSON_SUCCESS;
198 : }
199 :
200 :
201 : /*
202 : * Invoked at the start of an array.
203 : *
204 : * ndistinct input format has two types of arrays, the outer MVNDistinctItem
205 : * array and the attribute number array within each MVNDistinctItem.
206 : */
207 : static JsonParseErrorType
208 612 : ndistinct_array_start(void *state)
209 : {
210 612 : NDistinctParseState *parse = state;
211 :
212 612 : switch (parse->state)
213 : {
214 284 : case NDIST_EXPECT_ATTNUM_LIST:
215 284 : parse->state = NDIST_EXPECT_ATTNUM;
216 284 : break;
217 :
218 304 : case NDIST_EXPECT_START:
219 304 : parse->state = NDIST_EXPECT_ITEM;
220 304 : break;
221 :
222 24 : default:
223 24 : errsave(parse->escontext,
224 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
225 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
226 : errdetail("Array has been found at an unexpected location."));
227 12 : return JSON_SEM_ACTION_FAILED;
228 : }
229 :
230 588 : return JSON_SUCCESS;
231 : }
232 :
233 :
234 : /*
235 : * Invoked at the end of an array.
236 : *
237 : * Arrays can never be empty.
238 : */
239 : static JsonParseErrorType
240 296 : ndistinct_array_end(void *state)
241 : {
242 296 : NDistinctParseState *parse = state;
243 :
244 296 : switch (parse->state)
245 : {
246 232 : case NDIST_EXPECT_ATTNUM:
247 232 : if (list_length(parse->attnum_list) > 0)
248 : {
249 : /*
250 : * The attribute number list is complete, look for more
251 : * MVNDistinctItem keys.
252 : */
253 224 : parse->state = NDIST_EXPECT_KEY;
254 224 : return JSON_SUCCESS;
255 : }
256 :
257 8 : errsave(parse->escontext,
258 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
259 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
260 : errdetail("The \"%s\" key must be a non-empty array.",
261 : PG_NDISTINCT_KEY_ATTRIBUTES));
262 4 : break;
263 :
264 64 : case NDIST_EXPECT_ITEM:
265 64 : if (list_length(parse->distinct_items) > 0)
266 : {
267 : /* Item list is complete, we are done. */
268 56 : parse->state = NDIST_EXPECT_COMPLETE;
269 56 : return JSON_SUCCESS;
270 : }
271 :
272 8 : errsave(parse->escontext,
273 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
274 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
275 : errdetail("Item array cannot be empty."));
276 4 : break;
277 :
278 0 : default:
279 :
280 : /*
281 : * This can only happen if a case was missed in
282 : * ndistinct_array_start().
283 : */
284 0 : elog(ERROR,
285 : "array end of \"%s\" found in unexpected parse state: %d.",
286 : "pg_ndistinct", (int) parse->state);
287 : break;
288 : }
289 :
290 8 : return JSON_SEM_ACTION_FAILED;
291 : }
292 :
293 : /*
294 : * Invoked at the start of a key/value field.
295 : *
296 : * The valid keys for the MVNDistinctItem object are:
297 : * - attributes
298 : * - ndistinct
299 : */
300 : static JsonParseErrorType
301 544 : ndistinct_object_field_start(void *state, char *fname, bool isnull)
302 : {
303 544 : NDistinctParseState *parse = state;
304 :
305 544 : if (strcmp(fname, PG_NDISTINCT_KEY_ATTRIBUTES) == 0)
306 : {
307 328 : if (parse->found_attributes)
308 : {
309 8 : errsave(parse->escontext,
310 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
311 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
312 : errdetail("Multiple \"%s\" keys are not allowed.",
313 : PG_NDISTINCT_KEY_ATTRIBUTES));
314 4 : return JSON_SEM_ACTION_FAILED;
315 : }
316 320 : parse->found_attributes = true;
317 320 : parse->state = NDIST_EXPECT_ATTNUM_LIST;
318 320 : return JSON_SUCCESS;
319 : }
320 :
321 216 : if (strcmp(fname, PG_NDISTINCT_KEY_NDISTINCT) == 0)
322 : {
323 200 : if (parse->found_ndistinct)
324 : {
325 8 : errsave(parse->escontext,
326 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
327 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
328 : errdetail("Multiple \"%s\" keys are not allowed.",
329 : PG_NDISTINCT_KEY_NDISTINCT));
330 4 : return JSON_SEM_ACTION_FAILED;
331 : }
332 192 : parse->found_ndistinct = true;
333 192 : parse->state = NDIST_EXPECT_NDISTINCT;
334 192 : return JSON_SUCCESS;
335 : }
336 :
337 16 : errsave(parse->escontext,
338 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
339 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
340 : errdetail("Only allowed keys are \"%s\" and \"%s\".",
341 : PG_NDISTINCT_KEY_ATTRIBUTES,
342 : PG_NDISTINCT_KEY_NDISTINCT));
343 8 : return JSON_SEM_ACTION_FAILED;
344 : }
345 :
346 : /*
347 : * Invoked at the start of an array element.
348 : *
349 : * The overall structure of the datatype is an array, but there are also
350 : * arrays as the value of every attributes key.
351 : */
352 : static JsonParseErrorType
353 968 : ndistinct_array_element_start(void *state, bool isnull)
354 : {
355 968 : const NDistinctParseState *parse = state;
356 :
357 968 : switch (parse->state)
358 : {
359 616 : case NDIST_EXPECT_ATTNUM:
360 616 : if (!isnull)
361 608 : return JSON_SUCCESS;
362 :
363 8 : errsave(parse->escontext,
364 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
365 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
366 : errdetail("Attribute number array cannot be null."));
367 4 : break;
368 :
369 352 : case NDIST_EXPECT_ITEM:
370 352 : if (!isnull)
371 344 : return JSON_SUCCESS;
372 :
373 8 : errsave(parse->escontext,
374 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
375 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
376 : errdetail("Item list elements cannot be null."));
377 :
378 4 : break;
379 :
380 0 : default:
381 0 : elog(ERROR,
382 : "array element start of \"%s\" found in unexpected parse state: %d.",
383 : "pg_ndistinct", (int) parse->state);
384 : break;
385 : }
386 :
387 8 : return JSON_SEM_ACTION_FAILED;
388 : }
389 :
390 : /*
391 : * Test for valid subsequent attribute number.
392 : *
393 : * If the previous value is positive, then current value must either be
394 : * greater than the previous value, or negative.
395 : *
396 : * If the previous value is negative, then the value must be less than
397 : * the previous value.
398 : *
399 : * Duplicate values are obviously not allowed, but that is already covered
400 : * by the rules listed above.
401 : */
402 : static bool
403 316 : valid_subsequent_attnum(AttrNumber prev, AttrNumber cur)
404 : {
405 : Assert(prev != 0);
406 :
407 316 : if (prev > 0)
408 304 : return ((cur > prev) || (cur < 0));
409 :
410 12 : return (cur < prev);
411 : }
412 :
413 : /*
414 : * Handle scalar events from the ndistinct input parser.
415 : *
416 : * Override integer parse error messages and replace them with errors
417 : * specific to the context.
418 : */
419 : static JsonParseErrorType
420 796 : ndistinct_scalar(void *state, char *token, JsonTokenType tokentype)
421 : {
422 796 : NDistinctParseState *parse = state;
423 : AttrNumber attnum;
424 796 : ErrorSaveContext escontext = {T_ErrorSaveContext};
425 :
426 796 : switch (parse->state)
427 : {
428 600 : case NDIST_EXPECT_ATTNUM:
429 600 : attnum = pg_strtoint16_safe(token, (Node *) &escontext);
430 :
431 600 : if (escontext.error_occurred)
432 : {
433 8 : errsave(parse->escontext,
434 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
435 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
436 : errdetail("Key \"%s\" has an incorrect value.", PG_NDISTINCT_KEY_ATTRIBUTES));
437 4 : return JSON_SEM_ACTION_FAILED;
438 : }
439 :
440 : /*
441 : * The attribute number cannot be zero a negative number beyond
442 : * the number of the possible expressions.
443 : */
444 592 : if (attnum == 0 || attnum < (0 - STATS_MAX_DIMENSIONS))
445 : {
446 12 : errsave(parse->escontext,
447 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
448 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
449 : errdetail("Invalid \"%s\" element has been found: %d.",
450 : PG_NDISTINCT_KEY_ATTRIBUTES, attnum));
451 4 : return JSON_SEM_ACTION_FAILED;
452 : }
453 :
454 580 : if (list_length(parse->attnum_list) > 0)
455 : {
456 316 : const AttrNumber prev = llast_int(parse->attnum_list);
457 :
458 316 : if (!valid_subsequent_attnum(prev, attnum))
459 : {
460 8 : errsave(parse->escontext,
461 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
462 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
463 : errdetail("Invalid \"%s\" element has been found: %d cannot follow %d.",
464 : PG_NDISTINCT_KEY_ATTRIBUTES, attnum, prev));
465 4 : return JSON_SEM_ACTION_FAILED;
466 : }
467 : }
468 :
469 572 : parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum);
470 572 : return JSON_SUCCESS;
471 :
472 160 : case NDIST_EXPECT_NDISTINCT:
473 :
474 : /*
475 : * While the structure dictates that ndistinct is a double
476 : * precision floating point, it has always been an integer in the
477 : * output generated. Therefore, we parse it as an integer here.
478 : */
479 160 : parse->ndistinct = pg_strtoint32_safe(token, (Node *) &escontext);
480 :
481 160 : if (!escontext.error_occurred)
482 : {
483 144 : parse->state = NDIST_EXPECT_KEY;
484 144 : return JSON_SUCCESS;
485 : }
486 :
487 16 : errsave(parse->escontext,
488 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
489 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
490 : errdetail("Key \"%s\" has an incorrect value.",
491 : PG_NDISTINCT_KEY_NDISTINCT));
492 8 : break;
493 :
494 36 : default:
495 36 : errsave(parse->escontext,
496 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
497 : errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
498 : errdetail("Unexpected scalar has been found."));
499 20 : break;
500 : }
501 :
502 28 : return JSON_SEM_ACTION_FAILED;
503 : }
504 :
505 : /*
506 : * Compare the attribute arrays of two MVNDistinctItem values,
507 : * looking for duplicate sets. Return true if a duplicate set is found.
508 : *
509 : * The arrays are required to be in canonical order (all positive numbers
510 : * in ascending order first, followed by all negative numbers in descending
511 : * order) so it's safe to compare the attrnums in order, stopping at the
512 : * first difference.
513 : */
514 : static bool
515 96 : item_attributes_eq(const MVNDistinctItem *a, const MVNDistinctItem *b)
516 : {
517 96 : if (a->nattributes != b->nattributes)
518 64 : return false;
519 :
520 60 : for (int i = 0; i < a->nattributes; i++)
521 : {
522 52 : if (a->attributes[i] != b->attributes[i])
523 24 : return false;
524 : }
525 :
526 8 : return true;
527 : }
528 :
529 : /*
530 : * Ensure that an attribute number appears as one of the attribute numbers
531 : * in a MVNDistinctItem.
532 : */
533 : static bool
534 56 : item_has_attnum(const MVNDistinctItem *item, AttrNumber attnum)
535 : {
536 140 : for (int i = 0; i < item->nattributes; i++)
537 : {
538 132 : if (attnum == item->attributes[i])
539 48 : return true;
540 : }
541 8 : return false;
542 : }
543 :
544 : /*
545 : * Ensure that the attributes in MVNDistinctItem A are a subset of the
546 : * reference MVNDistinctItem B.
547 : */
548 : static bool
549 32 : item_is_attnum_subset(const MVNDistinctItem *item,
550 : const MVNDistinctItem *refitem)
551 : {
552 80 : for (int i = 0; i < item->nattributes; i++)
553 : {
554 56 : if (!item_has_attnum(refitem, item->attributes[i]))
555 8 : return false;
556 : }
557 24 : return true;
558 : }
559 :
560 : /*
561 : * Generate a string representing an array of attribute numbers.
562 : *
563 : * Freeing the allocated string is the responsibility of the caller.
564 : */
565 : static char *
566 24 : item_attnum_list(const MVNDistinctItem *item)
567 : {
568 : StringInfoData str;
569 :
570 24 : initStringInfo(&str);
571 :
572 24 : appendStringInfo(&str, "%d", item->attributes[0]);
573 :
574 64 : for (int i = 1; i < item->nattributes; i++)
575 40 : appendStringInfo(&str, ", %d", item->attributes[i]);
576 :
577 24 : return str.data;
578 : }
579 :
580 : /*
581 : * Attempt to build and serialize the MVNDistinct object.
582 : *
583 : * This can only be executed after the completion of the JSON parsing.
584 : *
585 : * In the event of an error, set the error context and return NULL.
586 : */
587 : static bytea *
588 56 : build_mvndistinct(NDistinctParseState *parse, char *str)
589 : {
590 : MVNDistinct *ndistinct;
591 56 : int nitems = list_length(parse->distinct_items);
592 : bytea *bytes;
593 56 : int item_most_attrs = 0;
594 56 : int item_most_attrs_idx = 0;
595 :
596 56 : switch (parse->state)
597 : {
598 56 : case NDIST_EXPECT_COMPLETE:
599 :
600 : /*
601 : * Parsing has ended correctly and we should have a list of items.
602 : * If we don't, something has been done wrong in one of the
603 : * earlier parsing steps.
604 : */
605 56 : if (nitems == 0)
606 0 : elog(ERROR,
607 : "cannot have empty item list after parsing success.");
608 56 : break;
609 :
610 0 : case NDIST_EXPECT_START:
611 : /* blank */
612 0 : errsave(parse->escontext,
613 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
614 : errmsg("malformed pg_ndistinct: \"%s\"", str),
615 : errdetail("Value cannot be empty."));
616 0 : return NULL;
617 :
618 0 : default:
619 : /* Unexpected end-state. */
620 0 : errsave(parse->escontext,
621 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
622 : errmsg("malformed pg_ndistinct: \"%s\"", str),
623 : errdetail("Unexpected end state has been found: %d.", parse->state));
624 0 : return NULL;
625 : }
626 :
627 56 : ndistinct = palloc(offsetof(MVNDistinct, items) +
628 56 : nitems * sizeof(MVNDistinctItem));
629 :
630 56 : ndistinct->magic = STATS_NDISTINCT_MAGIC;
631 56 : ndistinct->type = STATS_NDISTINCT_TYPE_BASIC;
632 56 : ndistinct->nitems = nitems;
633 :
634 160 : for (int i = 0; i < nitems; i++)
635 : {
636 112 : MVNDistinctItem *item = list_nth(parse->distinct_items, i);
637 :
638 : /*
639 : * Ensure that this item does not duplicate the attributes of any
640 : * pre-existing item.
641 : */
642 200 : for (int j = 0; j < i; j++)
643 : {
644 96 : if (item_attributes_eq(item, &ndistinct->items[j]))
645 : {
646 8 : char *s = item_attnum_list(item);
647 :
648 8 : errsave(parse->escontext,
649 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
650 : errmsg("malformed pg_ndistinct: \"%s\"", str),
651 : errdetail("Duplicated \"%s\" array has been found: [%s].",
652 : PG_NDISTINCT_KEY_ATTRIBUTES, s));
653 4 : pfree(s);
654 4 : return NULL;
655 : }
656 : }
657 :
658 104 : ndistinct->items[i].ndistinct = item->ndistinct;
659 104 : ndistinct->items[i].nattributes = item->nattributes;
660 :
661 : /*
662 : * This transfers free-ing responsibility from the distinct_items list
663 : * to the ndistinct object.
664 : */
665 104 : ndistinct->items[i].attributes = item->attributes;
666 :
667 : /*
668 : * Keep track of the first longest attribute list. All other attribute
669 : * lists must be a subset of this list.
670 : */
671 104 : if (item->nattributes > item_most_attrs)
672 : {
673 84 : item_most_attrs = item->nattributes;
674 84 : item_most_attrs_idx = i;
675 : }
676 : }
677 :
678 : /*
679 : * Verify that all the sets of attribute numbers are a proper subset of
680 : * the longest set recorded. This acts as an extra sanity check based on
681 : * the input given. Note that this still needs to be cross-checked with
682 : * the extended statistics objects this would be assigned to, but it
683 : * provides one extra layer of protection.
684 : */
685 112 : for (int i = 0; i < nitems; i++)
686 : {
687 72 : if (i == item_most_attrs_idx)
688 40 : continue;
689 :
690 32 : if (!item_is_attnum_subset(&ndistinct->items[i],
691 32 : &ndistinct->items[item_most_attrs_idx]))
692 : {
693 8 : const MVNDistinctItem *item = &ndistinct->items[i];
694 8 : const MVNDistinctItem *refitem = &ndistinct->items[item_most_attrs_idx];
695 8 : char *item_list = item_attnum_list(item);
696 8 : char *refitem_list = item_attnum_list(refitem);
697 :
698 8 : errsave(parse->escontext,
699 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
700 : errmsg("malformed pg_ndistinct: \"%s\"", str),
701 : errdetail("\"%s\" array [%s] must be a subset of array [%s].",
702 : PG_NDISTINCT_KEY_ATTRIBUTES,
703 : item_list, refitem_list));
704 4 : pfree(item_list);
705 4 : pfree(refitem_list);
706 4 : return NULL;
707 : }
708 : }
709 :
710 40 : bytes = statext_ndistinct_serialize(ndistinct);
711 :
712 : /*
713 : * Free the attribute lists, before the ndistinct itself.
714 : */
715 104 : for (int i = 0; i < nitems; i++)
716 64 : pfree(ndistinct->items[i].attributes);
717 40 : pfree(ndistinct);
718 :
719 40 : return bytes;
720 : }
721 :
722 : /*
723 : * pg_ndistinct_in
724 : * input routine for type pg_ndistinct.
725 : */
726 : Datum
727 328 : pg_ndistinct_in(PG_FUNCTION_ARGS)
728 : {
729 328 : char *str = PG_GETARG_CSTRING(0);
730 : NDistinctParseState parse_state;
731 : JsonParseErrorType result;
732 : JsonLexContext *lex;
733 : JsonSemAction sem_action;
734 328 : bytea *bytes = NULL;
735 :
736 : /* initialize semantic state */
737 328 : parse_state.str = str;
738 328 : parse_state.state = NDIST_EXPECT_START;
739 328 : parse_state.distinct_items = NIL;
740 328 : parse_state.escontext = fcinfo->context;
741 328 : parse_state.found_attributes = false;
742 328 : parse_state.found_ndistinct = false;
743 328 : parse_state.attnum_list = NIL;
744 328 : parse_state.ndistinct = 0;
745 :
746 : /* set callbacks */
747 328 : sem_action.semstate = (void *) &parse_state;
748 328 : sem_action.object_start = ndistinct_object_start;
749 328 : sem_action.object_end = ndistinct_object_end;
750 328 : sem_action.array_start = ndistinct_array_start;
751 328 : sem_action.array_end = ndistinct_array_end;
752 328 : sem_action.object_field_start = ndistinct_object_field_start;
753 328 : sem_action.object_field_end = NULL;
754 328 : sem_action.array_element_start = ndistinct_array_element_start;
755 328 : sem_action.array_element_end = NULL;
756 328 : sem_action.scalar = ndistinct_scalar;
757 :
758 328 : lex = makeJsonLexContextCstringLen(NULL, str, strlen(str),
759 : PG_UTF8, true);
760 328 : result = pg_parse_json(lex, &sem_action);
761 208 : freeJsonLexContext(lex);
762 :
763 208 : if (result == JSON_SUCCESS)
764 56 : bytes = build_mvndistinct(&parse_state, str);
765 :
766 200 : list_free(parse_state.attnum_list);
767 200 : list_free_deep(parse_state.distinct_items);
768 :
769 200 : if (bytes)
770 40 : PG_RETURN_BYTEA_P(bytes);
771 :
772 : /*
773 : * If escontext already set, just use that. Anything else is a generic
774 : * JSON parse error.
775 : */
776 160 : if (!SOFT_ERROR_OCCURRED(parse_state.escontext))
777 32 : errsave(parse_state.escontext,
778 : errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
779 : errmsg("malformed pg_ndistinct: \"%s\"", str),
780 : errdetail("Input data must be valid JSON."));
781 :
782 144 : PG_RETURN_NULL();
783 : }
784 :
785 : /*
786 : * pg_ndistinct_out
787 : * output routine for type pg_ndistinct
788 : *
789 : * Produces a human-readable representation of the value.
790 : */
791 : Datum
792 73 : pg_ndistinct_out(PG_FUNCTION_ARGS)
793 : {
794 73 : bytea *data = PG_GETARG_BYTEA_PP(0);
795 73 : MVNDistinct *ndist = statext_ndistinct_deserialize(data);
796 : int i;
797 : StringInfoData str;
798 :
799 73 : initStringInfo(&str);
800 73 : appendStringInfoChar(&str, '[');
801 :
802 218 : for (i = 0; i < ndist->nitems; i++)
803 : {
804 145 : MVNDistinctItem item = ndist->items[i];
805 :
806 145 : if (i > 0)
807 72 : appendStringInfoString(&str, ", ");
808 :
809 145 : if (item.nattributes <= 0)
810 0 : elog(ERROR, "invalid zero-length attribute array in MVNDistinct");
811 :
812 145 : appendStringInfo(&str, "{\"" PG_NDISTINCT_KEY_ATTRIBUTES "\": [%d",
813 145 : item.attributes[0]);
814 :
815 318 : for (int j = 1; j < item.nattributes; j++)
816 173 : appendStringInfo(&str, ", %d", item.attributes[j]);
817 :
818 145 : appendStringInfo(&str, "], \"" PG_NDISTINCT_KEY_NDISTINCT "\": %d}",
819 145 : (int) item.ndistinct);
820 : }
821 :
822 73 : appendStringInfoChar(&str, ']');
823 :
824 73 : PG_RETURN_CSTRING(str.data);
825 : }
826 :
827 : /*
828 : * pg_ndistinct_recv
829 : * binary input routine for type pg_ndistinct
830 : */
831 : Datum
832 0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
833 : {
834 0 : ereport(ERROR,
835 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
836 : errmsg("cannot accept a value of type %s", "pg_ndistinct")));
837 :
838 : PG_RETURN_VOID(); /* keep compiler quiet */
839 : }
840 :
841 : /*
842 : * pg_ndistinct_send
843 : * binary output routine for type pg_ndistinct
844 : *
845 : * n-distinct is serialized into a bytea value, so let's send that.
846 : */
847 : Datum
848 0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
849 : {
850 0 : return byteasend(fcinfo);
851 : }
|