Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * parse_collate.c
4 : * Routines for assigning collation information.
5 : *
6 : * We choose to handle collation analysis in a post-pass over the output
7 : * of expression parse analysis. This is because we need more state to
8 : * perform this processing than is needed in the finished tree. If we
9 : * did it on-the-fly while building the tree, all that state would have
10 : * to be kept in expression node trees permanently. This way, the extra
11 : * storage is just local variables in this recursive routine.
12 : *
13 : * The info that is actually saved in the finished tree is:
14 : * 1. The output collation of each expression node, or InvalidOid if it
15 : * returns a noncollatable data type. This can also be InvalidOid if the
16 : * result type is collatable but the collation is indeterminate.
17 : * 2. The collation to be used in executing each function. InvalidOid means
18 : * that there are no collatable inputs or their collation is indeterminate.
19 : * This value is only stored in node types that might call collation-using
20 : * functions.
21 : *
22 : * You might think we could get away with storing only one collation per
23 : * node, but the two concepts really need to be kept distinct. Otherwise
24 : * it's too confusing when a function produces a collatable output type but
25 : * has no collatable inputs or produces noncollatable output from collatable
26 : * inputs.
27 : *
28 : * Cases with indeterminate collation might result in an error being thrown
29 : * at runtime. If we knew exactly which functions require collation
30 : * information, we could throw those errors at parse time instead.
31 : *
32 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
33 : * Portions Copyright (c) 1994, Regents of the University of California
34 : *
35 : *
36 : * IDENTIFICATION
37 : * src/backend/parser/parse_collate.c
38 : *
39 : *-------------------------------------------------------------------------
40 : */
41 : #include "postgres.h"
42 :
43 : #include "catalog/pg_aggregate.h"
44 : #include "catalog/pg_collation.h"
45 : #include "nodes/makefuncs.h"
46 : #include "nodes/nodeFuncs.h"
47 : #include "parser/parse_collate.h"
48 : #include "utils/lsyscache.h"
49 :
50 :
51 : /*
52 : * Collation strength (the SQL standard calls this "derivation"). Order is
53 : * chosen to allow comparisons to work usefully. Note: the standard doesn't
54 : * seem to distinguish between NONE and CONFLICT.
55 : */
56 : typedef enum
57 : {
58 : COLLATE_NONE, /* expression is of a noncollatable datatype */
59 : COLLATE_IMPLICIT, /* collation was derived implicitly */
60 : COLLATE_CONFLICT, /* we had a conflict of implicit collations */
61 : COLLATE_EXPLICIT, /* collation was derived explicitly */
62 : } CollateStrength;
63 :
64 : typedef struct
65 : {
66 : ParseState *pstate; /* parse state (for error reporting) */
67 : Oid collation; /* OID of current collation, if any */
68 : CollateStrength strength; /* strength of current collation choice */
69 : int location; /* location of expr that set collation */
70 : /* Remaining fields are only valid when strength == COLLATE_CONFLICT */
71 : Oid collation2; /* OID of conflicting collation */
72 : int location2; /* location of expr that set collation2 */
73 : } assign_collations_context;
74 :
75 : static bool assign_query_collations_walker(Node *node, ParseState *pstate);
76 : static bool assign_collations_walker(Node *node,
77 : assign_collations_context *context);
78 : static void merge_collation_state(Oid collation,
79 : CollateStrength strength,
80 : int location,
81 : Oid collation2,
82 : int location2,
83 : assign_collations_context *context);
84 : static void assign_aggregate_collations(Aggref *aggref,
85 : assign_collations_context *loccontext);
86 : static void assign_ordered_set_collations(Aggref *aggref,
87 : assign_collations_context *loccontext);
88 : static void assign_hypothetical_collations(Aggref *aggref,
89 : assign_collations_context *loccontext);
90 :
91 :
92 : /*
93 : * assign_query_collations()
94 : * Mark all expressions in the given Query with collation information.
95 : *
96 : * This should be applied to each Query after completion of parse analysis
97 : * for expressions. Note that we do not recurse into sub-Queries, since
98 : * those should have been processed when built.
99 : */
100 : void
101 370490 : assign_query_collations(ParseState *pstate, Query *query)
102 : {
103 : /*
104 : * We just use query_tree_walker() to visit all the contained expressions.
105 : * We can skip the rangetable and CTE subqueries, though, since RTEs and
106 : * subqueries had better have been processed already (else Vars referring
107 : * to them would not get created with the right collation).
108 : */
109 370490 : (void) query_tree_walker(query,
110 : assign_query_collations_walker,
111 : pstate,
112 : QTW_IGNORE_RANGE_TABLE |
113 : QTW_IGNORE_CTE_SUBQUERIES);
114 370462 : }
115 :
116 : /*
117 : * Walker for assign_query_collations
118 : *
119 : * Each expression found by query_tree_walker is processed independently.
120 : * Note that query_tree_walker may pass us a whole List, such as the
121 : * targetlist, in which case each subexpression must be processed
122 : * independently --- we don't want to bleat if two different targetentries
123 : * have different collations.
124 : */
125 : static bool
126 4079166 : assign_query_collations_walker(Node *node, ParseState *pstate)
127 : {
128 : /* Need do nothing for empty subexpressions */
129 4079166 : if (node == NULL)
130 3328369 : return false;
131 :
132 : /*
133 : * We don't want to recurse into a set-operations tree; it's already been
134 : * fully processed in transformSetOperationStmt.
135 : */
136 750797 : if (IsA(node, SetOperationStmt))
137 8375 : return false;
138 :
139 742422 : if (IsA(node, List))
140 363941 : assign_list_collations(pstate, (List *) node);
141 : else
142 378481 : assign_expr_collations(pstate, node);
143 :
144 742394 : return false;
145 : }
146 :
147 : /*
148 : * assign_list_collations()
149 : * Mark all nodes in the list of expressions with collation information.
150 : *
151 : * The list member expressions are processed independently; they do not have
152 : * to share a common collation.
153 : */
154 : void
155 406903 : assign_list_collations(ParseState *pstate, List *exprs)
156 : {
157 : ListCell *lc;
158 :
159 1449713 : foreach(lc, exprs)
160 : {
161 1042830 : Node *node = (Node *) lfirst(lc);
162 :
163 1042830 : assign_expr_collations(pstate, node);
164 : }
165 406883 : }
166 :
167 : /*
168 : * assign_expr_collations()
169 : * Mark all nodes in the given expression tree with collation information.
170 : *
171 : * This is exported for the benefit of various utility commands that process
172 : * expressions without building a complete Query. It should be applied after
173 : * calling transformExpr() plus any expression-modifying operations such as
174 : * coerce_to_boolean().
175 : */
176 : void
177 1515209 : assign_expr_collations(ParseState *pstate, Node *expr)
178 : {
179 : assign_collations_context context;
180 :
181 : /* initialize context for tree walk */
182 1515209 : context.pstate = pstate;
183 1515209 : context.collation = InvalidOid;
184 1515209 : context.strength = COLLATE_NONE;
185 1515209 : context.location = -1;
186 :
187 : /* and away we go */
188 1515209 : (void) assign_collations_walker(expr, &context);
189 1515177 : }
190 :
191 : /*
192 : * select_common_collation()
193 : * Identify a common collation for a list of expressions.
194 : *
195 : * The expressions should all return the same datatype, else this is not
196 : * terribly meaningful.
197 : *
198 : * none_ok means that it is permitted to return InvalidOid, indicating that
199 : * no common collation could be identified, even for collatable datatypes.
200 : * Otherwise, an error is thrown for conflict of implicit collations.
201 : *
202 : * In theory, none_ok = true reflects the rules of SQL standard clause "Result
203 : * of data type combinations", none_ok = false reflects the rules of clause
204 : * "Collation determination" (in some cases invoked via "Grouping
205 : * operations").
206 : */
207 : Oid
208 47657 : select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
209 : {
210 : assign_collations_context context;
211 :
212 : /* initialize context for tree walk */
213 47657 : context.pstate = pstate;
214 47657 : context.collation = InvalidOid;
215 47657 : context.strength = COLLATE_NONE;
216 47657 : context.location = -1;
217 :
218 : /* and away we go */
219 47657 : (void) assign_collations_walker((Node *) exprs, &context);
220 :
221 : /* deal with collation conflict */
222 47657 : if (context.strength == COLLATE_CONFLICT)
223 : {
224 48 : if (none_ok)
225 24 : return InvalidOid;
226 24 : ereport(ERROR,
227 : (errcode(ERRCODE_COLLATION_MISMATCH),
228 : errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
229 : get_collation_name(context.collation),
230 : get_collation_name(context.collation2)),
231 : errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
232 : parser_errposition(context.pstate, context.location2)));
233 : }
234 :
235 : /*
236 : * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but
237 : * that's okay because it must mean none of the expressions returned
238 : * collatable datatypes.
239 : */
240 47609 : return context.collation;
241 : }
242 :
243 : /*
244 : * assign_collations_walker()
245 : * Recursive guts of collation processing.
246 : *
247 : * Nodes with no children (eg, Vars, Consts, Params) must have been marked
248 : * when built. All upper-level nodes are marked here.
249 : *
250 : * Note: if this is invoked directly on a List, it will attempt to infer a
251 : * common collation for all the list members. In particular, it will throw
252 : * error if there are conflicting explicit collations for different members.
253 : */
254 : static bool
255 5716106 : assign_collations_walker(Node *node, assign_collations_context *context)
256 : {
257 : assign_collations_context loccontext;
258 : Oid collation;
259 : CollateStrength strength;
260 : int location;
261 :
262 : /* Need do nothing for empty subexpressions */
263 5716106 : if (node == NULL)
264 315581 : return false;
265 :
266 : /*
267 : * Prepare for recursion. For most node types, though not all, the first
268 : * thing we do is recurse to process all nodes below this one. Each level
269 : * of the tree has its own local context.
270 : */
271 5400525 : loccontext.pstate = context->pstate;
272 5400525 : loccontext.collation = InvalidOid;
273 5400525 : loccontext.strength = COLLATE_NONE;
274 5400525 : loccontext.location = -1;
275 : /* Set these fields just to suppress uninitialized-value warnings: */
276 5400525 : loccontext.collation2 = InvalidOid;
277 5400525 : loccontext.location2 = -1;
278 :
279 : /*
280 : * Recurse if appropriate, then determine the collation for this node.
281 : *
282 : * Note: the general cases are at the bottom of the switch, after various
283 : * special cases.
284 : */
285 5400525 : switch (nodeTag(node))
286 : {
287 6459 : case T_CollateExpr:
288 : {
289 : /*
290 : * COLLATE sets an explicitly derived collation, regardless of
291 : * what the child state is. But we must recurse to set up
292 : * collation info below here.
293 : */
294 6459 : CollateExpr *expr = (CollateExpr *) node;
295 :
296 6459 : (void) expression_tree_walker(node,
297 : assign_collations_walker,
298 : &loccontext);
299 :
300 6459 : collation = expr->collOid;
301 : Assert(OidIsValid(collation));
302 6459 : strength = COLLATE_EXPLICIT;
303 6459 : location = expr->location;
304 : }
305 6459 : break;
306 12297 : case T_FieldSelect:
307 : {
308 : /*
309 : * For FieldSelect, the result has the field's declared
310 : * collation, independently of what happened in the arguments.
311 : * (The immediate argument must be composite and thus not
312 : * collatable, anyhow.) The field's collation was already
313 : * looked up and saved in the node.
314 : */
315 12297 : FieldSelect *expr = (FieldSelect *) node;
316 :
317 : /* ... but first, recurse */
318 12297 : (void) expression_tree_walker(node,
319 : assign_collations_walker,
320 : &loccontext);
321 :
322 12297 : if (OidIsValid(expr->resultcollid))
323 : {
324 : /* Node's result type is collatable. */
325 : /* Pass up field's collation as an implicit choice. */
326 1913 : collation = expr->resultcollid;
327 1913 : strength = COLLATE_IMPLICIT;
328 1913 : location = exprLocation(node);
329 : }
330 : else
331 : {
332 : /* Node's result type isn't collatable. */
333 10384 : collation = InvalidOid;
334 10384 : strength = COLLATE_NONE;
335 10384 : location = -1; /* won't be used */
336 : }
337 : }
338 12297 : break;
339 2724 : case T_RowExpr:
340 : {
341 : /*
342 : * RowExpr is a special case because the subexpressions are
343 : * independent: we don't want to complain if some of them have
344 : * incompatible explicit collations.
345 : */
346 2724 : RowExpr *expr = (RowExpr *) node;
347 :
348 2724 : assign_list_collations(context->pstate, expr->args);
349 :
350 : /*
351 : * Since the result is always composite and therefore never
352 : * has a collation, we can just stop here: this node has no
353 : * impact on the collation of its parent.
354 : */
355 2724 : return false; /* done */
356 : }
357 184 : case T_RowCompareExpr:
358 : {
359 : /*
360 : * For RowCompare, we have to find the common collation of
361 : * each pair of input columns and build a list. If we can't
362 : * find a common collation, we just put InvalidOid into the
363 : * list, which may or may not cause an error at runtime.
364 : */
365 184 : RowCompareExpr *expr = (RowCompareExpr *) node;
366 184 : List *colls = NIL;
367 : ListCell *l;
368 : ListCell *r;
369 :
370 588 : forboth(l, expr->largs, r, expr->rargs)
371 : {
372 404 : Node *le = (Node *) lfirst(l);
373 404 : Node *re = (Node *) lfirst(r);
374 : Oid coll;
375 :
376 404 : coll = select_common_collation(context->pstate,
377 404 : list_make2(le, re),
378 : true);
379 404 : colls = lappend_oid(colls, coll);
380 : }
381 184 : expr->inputcollids = colls;
382 :
383 : /*
384 : * Since the result is always boolean and therefore never has
385 : * a collation, we can just stop here: this node has no impact
386 : * on the collation of its parent.
387 : */
388 184 : return false; /* done */
389 : }
390 50205 : case T_CoerceToDomain:
391 : {
392 : /*
393 : * If the domain declaration included a non-default COLLATE
394 : * spec, then use that collation as the output collation of
395 : * the coercion. Otherwise allow the input collation to
396 : * bubble up. (The input should be of the domain's base type,
397 : * therefore we don't need to worry about it not being
398 : * collatable when the domain is.)
399 : */
400 50205 : CoerceToDomain *expr = (CoerceToDomain *) node;
401 50205 : Oid typcollation = get_typcollation(expr->resulttype);
402 :
403 : /* ... but first, recurse */
404 50205 : (void) expression_tree_walker(node,
405 : assign_collations_walker,
406 : &loccontext);
407 :
408 50205 : if (OidIsValid(typcollation))
409 : {
410 : /* Node's result type is collatable. */
411 41885 : if (typcollation == DEFAULT_COLLATION_OID)
412 : {
413 : /* Collation state bubbles up from child. */
414 416 : collation = loccontext.collation;
415 416 : strength = loccontext.strength;
416 416 : location = loccontext.location;
417 : }
418 : else
419 : {
420 : /* Use domain's collation as an implicit choice. */
421 41469 : collation = typcollation;
422 41469 : strength = COLLATE_IMPLICIT;
423 41469 : location = exprLocation(node);
424 : }
425 : }
426 : else
427 : {
428 : /* Node's result type isn't collatable. */
429 8320 : collation = InvalidOid;
430 8320 : strength = COLLATE_NONE;
431 8320 : location = -1; /* won't be used */
432 : }
433 :
434 : /*
435 : * Save the state into the expression node. We know it
436 : * doesn't care about input collation.
437 : */
438 50205 : if (strength == COLLATE_CONFLICT)
439 0 : exprSetCollation(node, InvalidOid);
440 : else
441 50205 : exprSetCollation(node, collation);
442 : }
443 50205 : break;
444 1012547 : case T_TargetEntry:
445 1012547 : (void) expression_tree_walker(node,
446 : assign_collations_walker,
447 : &loccontext);
448 :
449 : /*
450 : * TargetEntry can have only one child, and should bubble that
451 : * state up to its parent. We can't use the general-case code
452 : * below because exprType and friends don't work on TargetEntry.
453 : */
454 1012535 : collation = loccontext.collation;
455 1012535 : strength = loccontext.strength;
456 1012535 : location = loccontext.location;
457 :
458 : /*
459 : * Throw error if the collation is indeterminate for a TargetEntry
460 : * that is a sort/group target. We prefer to do this now, instead
461 : * of leaving the comparison functions to fail at runtime, because
462 : * we can give a syntax error pointer to help locate the problem.
463 : * There are some cases where there might not be a failure, for
464 : * example if the planner chooses to use hash aggregation instead
465 : * of sorting for grouping; but it seems better to predictably
466 : * throw an error. (Compare transformSetOperationTree, which will
467 : * throw error for indeterminate collation of set-op columns, even
468 : * though the planner might be able to implement the set-op
469 : * without sorting.)
470 : */
471 1012535 : if (strength == COLLATE_CONFLICT &&
472 20 : ((TargetEntry *) node)->ressortgroupref != 0)
473 12 : ereport(ERROR,
474 : (errcode(ERRCODE_COLLATION_MISMATCH),
475 : errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
476 : get_collation_name(loccontext.collation),
477 : get_collation_name(loccontext.collation2)),
478 : errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
479 : parser_errposition(context->pstate,
480 : loccontext.location2)));
481 1012523 : break;
482 759494 : case T_InferenceElem:
483 : case T_RangeTblRef:
484 : case T_JoinExpr:
485 : case T_FromExpr:
486 : case T_OnConflictExpr:
487 : case T_SortGroupClause:
488 : case T_MergeAction:
489 759494 : (void) expression_tree_walker(node,
490 : assign_collations_walker,
491 : &loccontext);
492 :
493 : /*
494 : * When we're invoked on a query's jointree, we don't need to do
495 : * anything with join nodes except recurse through them to process
496 : * WHERE/ON expressions. So just stop here. Likewise, we don't
497 : * need to do anything when invoked on sort/group lists.
498 : */
499 759486 : return false;
500 35418 : case T_Query:
501 : {
502 : /*
503 : * We get here when we're invoked on the Query belonging to a
504 : * SubLink. Act as though the Query returns its first output
505 : * column, which indeed is what it does for EXPR_SUBLINK and
506 : * ARRAY_SUBLINK cases. In the cases where the SubLink
507 : * returns boolean, this info will be ignored. Special case:
508 : * in EXISTS, the Query might return no columns, in which case
509 : * we need do nothing.
510 : *
511 : * We needn't recurse, since the Query is already processed.
512 : */
513 35418 : Query *qtree = (Query *) node;
514 : TargetEntry *tent;
515 :
516 35418 : if (qtree->targetList == NIL)
517 4 : return false;
518 35414 : tent = linitial_node(TargetEntry, qtree->targetList);
519 35414 : if (tent->resjunk)
520 0 : return false;
521 :
522 35414 : collation = exprCollation((Node *) tent->expr);
523 : /* collation doesn't change if it's converted to array */
524 35414 : strength = COLLATE_IMPLICIT;
525 35414 : location = exprLocation((Node *) tent->expr);
526 : }
527 35414 : break;
528 90614 : case T_List:
529 90614 : (void) expression_tree_walker(node,
530 : assign_collations_walker,
531 : &loccontext);
532 :
533 : /*
534 : * When processing a list, collation state just bubbles up from
535 : * the list elements.
536 : */
537 90614 : collation = loccontext.collation;
538 90614 : strength = loccontext.strength;
539 90614 : location = loccontext.location;
540 90614 : break;
541 :
542 2305975 : case T_Var:
543 : case T_Const:
544 : case T_Param:
545 : case T_CoerceToDomainValue:
546 : case T_CaseTestExpr:
547 : case T_SetToDefault:
548 : case T_CurrentOfExpr:
549 : case T_GraphPropertyRef:
550 :
551 : /*
552 : * General case for childless expression nodes. These should
553 : * already have a collation assigned; it is not this function's
554 : * responsibility to look into the catalogs for base-case
555 : * information.
556 : */
557 2305975 : collation = exprCollation(node);
558 :
559 : /*
560 : * Note: in most cases, there will be an assigned collation
561 : * whenever type_is_collatable(exprType(node)); but an exception
562 : * occurs for a Var referencing a subquery output column for which
563 : * a unique collation was not determinable. That may lead to a
564 : * runtime failure if a collation-sensitive function is applied to
565 : * the Var.
566 : */
567 :
568 2305975 : if (OidIsValid(collation))
569 541631 : strength = COLLATE_IMPLICIT;
570 : else
571 1764344 : strength = COLLATE_NONE;
572 2305975 : location = exprLocation(node);
573 2305975 : break;
574 :
575 1124608 : default:
576 : {
577 : /*
578 : * General case for most expression nodes with children. First
579 : * recurse, then figure out what to assign to this node.
580 : */
581 : Oid typcollation;
582 :
583 : /*
584 : * For most node types, we want to treat all the child
585 : * expressions alike; but there are a few exceptions, hence
586 : * this inner switch.
587 : */
588 1124608 : switch (nodeTag(node))
589 : {
590 30735 : case T_Aggref:
591 : {
592 : /*
593 : * Aggref is messy enough that we give it its own
594 : * function, in fact three of them. The FILTER
595 : * clause is independent of the rest of the
596 : * aggregate, however, so it can be processed
597 : * separately.
598 : */
599 30735 : Aggref *aggref = (Aggref *) node;
600 :
601 30735 : switch (aggref->aggkind)
602 : {
603 30535 : case AGGKIND_NORMAL:
604 30535 : assign_aggregate_collations(aggref,
605 : &loccontext);
606 30527 : break;
607 118 : case AGGKIND_ORDERED_SET:
608 118 : assign_ordered_set_collations(aggref,
609 : &loccontext);
610 118 : break;
611 82 : case AGGKIND_HYPOTHETICAL:
612 82 : assign_hypothetical_collations(aggref,
613 : &loccontext);
614 78 : break;
615 0 : default:
616 0 : elog(ERROR, "unrecognized aggkind: %d",
617 : (int) aggref->aggkind);
618 : }
619 :
620 30723 : assign_expr_collations(context->pstate,
621 30723 : (Node *) aggref->aggfilter);
622 : }
623 30723 : break;
624 2586 : case T_WindowFunc:
625 : {
626 : /*
627 : * WindowFunc requires special processing only for
628 : * its aggfilter clause, as for aggregates.
629 : */
630 2586 : WindowFunc *wfunc = (WindowFunc *) node;
631 :
632 2586 : (void) assign_collations_walker((Node *) wfunc->args,
633 : &loccontext);
634 :
635 2586 : assign_expr_collations(context->pstate,
636 2586 : (Node *) wfunc->aggfilter);
637 : }
638 2586 : break;
639 28952 : case T_CaseExpr:
640 : {
641 : /*
642 : * CaseExpr is a special case because we do not
643 : * want to recurse into the test expression (if
644 : * any). It was already marked with collations
645 : * during transformCaseExpr, and furthermore its
646 : * collation is not relevant to the result of the
647 : * CASE --- only the output expressions are.
648 : */
649 28952 : CaseExpr *expr = (CaseExpr *) node;
650 : ListCell *lc;
651 :
652 77945 : foreach(lc, expr->args)
653 : {
654 48993 : CaseWhen *when = lfirst_node(CaseWhen, lc);
655 :
656 : /*
657 : * The condition expressions mustn't affect
658 : * the CASE's result collation either; but
659 : * since they are known to yield boolean, it's
660 : * safe to recurse directly on them --- they
661 : * won't change loccontext.
662 : */
663 48993 : (void) assign_collations_walker((Node *) when->expr,
664 : &loccontext);
665 48993 : (void) assign_collations_walker((Node *) when->result,
666 : &loccontext);
667 : }
668 28952 : (void) assign_collations_walker((Node *) expr->defresult,
669 : &loccontext);
670 : }
671 28952 : break;
672 8591 : case T_SubscriptingRef:
673 : {
674 : /*
675 : * The subscripts are treated as independent
676 : * expressions not contributing to the node's
677 : * collation. Only the container, and the source
678 : * expression if any, contribute. (This models
679 : * the old behavior, in which the subscripts could
680 : * be counted on to be integers and thus not
681 : * contribute anything.)
682 : */
683 8591 : SubscriptingRef *sbsref = (SubscriptingRef *) node;
684 :
685 8591 : assign_expr_collations(context->pstate,
686 8591 : (Node *) sbsref->refupperindexpr);
687 8591 : assign_expr_collations(context->pstate,
688 8591 : (Node *) sbsref->reflowerindexpr);
689 8591 : (void) assign_collations_walker((Node *) sbsref->refexpr,
690 : &loccontext);
691 8591 : (void) assign_collations_walker((Node *) sbsref->refassgnexpr,
692 : &loccontext);
693 : }
694 8591 : break;
695 1053744 : default:
696 :
697 : /*
698 : * Normal case: all child expressions contribute
699 : * equally to loccontext.
700 : */
701 1053744 : (void) expression_tree_walker(node,
702 : assign_collations_walker,
703 : &loccontext);
704 1053736 : break;
705 : }
706 :
707 : /*
708 : * Now figure out what collation to assign to this node.
709 : */
710 1124588 : typcollation = get_typcollation(exprType(node));
711 1124588 : if (OidIsValid(typcollation))
712 : {
713 : /* Node's result is collatable; what about its input? */
714 194774 : if (loccontext.strength > COLLATE_NONE)
715 : {
716 : /* Collation state bubbles up from children. */
717 145176 : collation = loccontext.collation;
718 145176 : strength = loccontext.strength;
719 145176 : location = loccontext.location;
720 : }
721 : else
722 : {
723 : /*
724 : * Collatable output produced without any collatable
725 : * input. Use the type's collation (which is usually
726 : * DEFAULT_COLLATION_OID, but might be different for a
727 : * domain).
728 : */
729 49598 : collation = typcollation;
730 49598 : strength = COLLATE_IMPLICIT;
731 49598 : location = exprLocation(node);
732 : }
733 : }
734 : else
735 : {
736 : /* Node's result type isn't collatable. */
737 929814 : collation = InvalidOid;
738 929814 : strength = COLLATE_NONE;
739 929814 : location = -1; /* won't be used */
740 : }
741 :
742 : /*
743 : * Save the result collation into the expression node. If the
744 : * state is COLLATE_CONFLICT, we'll set the collation to
745 : * InvalidOid, which might result in an error at runtime.
746 : */
747 1124588 : if (strength == COLLATE_CONFLICT)
748 20 : exprSetCollation(node, InvalidOid);
749 : else
750 1124568 : exprSetCollation(node, collation);
751 :
752 : /*
753 : * Likewise save the input collation, which is the one that
754 : * any function called by this node should use.
755 : */
756 1124588 : if (loccontext.strength == COLLATE_CONFLICT)
757 72 : exprSetInputCollation(node, InvalidOid);
758 : else
759 1124516 : exprSetInputCollation(node, loccontext.collation);
760 : }
761 1124588 : break;
762 : }
763 :
764 : /*
765 : * Now, merge my information into my parent's state.
766 : */
767 4638075 : merge_collation_state(collation,
768 : strength,
769 : location,
770 : loccontext.collation2,
771 : loccontext.location2,
772 : context);
773 :
774 4638059 : return false;
775 : }
776 :
777 : /*
778 : * Merge collation state of a subexpression into the context for its parent.
779 : */
780 : static void
781 4638075 : merge_collation_state(Oid collation,
782 : CollateStrength strength,
783 : int location,
784 : Oid collation2,
785 : int location2,
786 : assign_collations_context *context)
787 : {
788 : /*
789 : * If the collation strength for this node is different from what's
790 : * already in *context, then this node either dominates or is dominated by
791 : * earlier siblings.
792 : */
793 4638075 : if (strength > context->strength)
794 : {
795 : /* Override previous parent state */
796 1040055 : context->collation = collation;
797 1040055 : context->strength = strength;
798 1040055 : context->location = location;
799 : /* Bubble up error info if applicable */
800 1040055 : if (strength == COLLATE_CONFLICT)
801 : {
802 76 : context->collation2 = collation2;
803 76 : context->location2 = location2;
804 : }
805 : }
806 3598020 : else if (strength == context->strength)
807 : {
808 : /* Merge, or detect error if there's a collation conflict */
809 3523198 : switch (strength)
810 : {
811 3370558 : case COLLATE_NONE:
812 : /* Nothing + nothing is still nothing */
813 3370558 : break;
814 152568 : case COLLATE_IMPLICIT:
815 152568 : if (collation != context->collation)
816 : {
817 : /*
818 : * Non-default implicit collation always beats default.
819 : */
820 35109 : if (context->collation == DEFAULT_COLLATION_OID)
821 : {
822 : /* Override previous parent state */
823 9543 : context->collation = collation;
824 9543 : context->strength = strength;
825 9543 : context->location = location;
826 : }
827 25566 : else if (collation != DEFAULT_COLLATION_OID)
828 : {
829 : /*
830 : * Oops, we have a conflict. We cannot throw error
831 : * here, since the conflict could be resolved by a
832 : * later sibling CollateExpr, or the parent might not
833 : * care about collation anyway. Return enough info to
834 : * throw the error later, if needed.
835 : */
836 120 : context->strength = COLLATE_CONFLICT;
837 120 : context->collation2 = collation;
838 120 : context->location2 = location;
839 : }
840 : }
841 152568 : break;
842 0 : case COLLATE_CONFLICT:
843 : /* We're still conflicted ... */
844 0 : break;
845 72 : case COLLATE_EXPLICIT:
846 72 : if (collation != context->collation)
847 : {
848 : /*
849 : * Oops, we have a conflict of explicit COLLATE clauses.
850 : * Here we choose to throw error immediately; that is what
851 : * the SQL standard says to do, and there's no good reason
852 : * to be less strict.
853 : */
854 16 : ereport(ERROR,
855 : (errcode(ERRCODE_COLLATION_MISMATCH),
856 : errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
857 : get_collation_name(context->collation),
858 : get_collation_name(collation)),
859 : parser_errposition(context->pstate, location)));
860 : }
861 56 : break;
862 : }
863 : }
864 4638059 : }
865 :
866 : /*
867 : * Aggref is a special case because expressions used only for ordering
868 : * shouldn't be taken to conflict with each other or with regular args,
869 : * indeed shouldn't affect the aggregate's result collation at all.
870 : * We handle this by applying assign_expr_collations() to them rather than
871 : * passing down our loccontext.
872 : *
873 : * Note that we recurse to each TargetEntry, not directly to its contained
874 : * expression, so that the case above for T_TargetEntry will complain if we
875 : * can't resolve a collation for an ORDER BY item (whether or not it is also
876 : * a normal aggregate arg).
877 : *
878 : * We need not recurse into the aggorder or aggdistinct lists, because those
879 : * contain only SortGroupClause nodes which we need not process.
880 : */
881 : static void
882 30535 : assign_aggregate_collations(Aggref *aggref,
883 : assign_collations_context *loccontext)
884 : {
885 : ListCell *lc;
886 :
887 : /* Plain aggregates have no direct args */
888 : Assert(aggref->aggdirectargs == NIL);
889 :
890 : /* Process aggregated args, holding resjunk ones at arm's length */
891 54285 : foreach(lc, aggref->args)
892 : {
893 23758 : TargetEntry *tle = lfirst_node(TargetEntry, lc);
894 :
895 23758 : if (tle->resjunk)
896 1045 : assign_expr_collations(loccontext->pstate, (Node *) tle);
897 : else
898 22713 : (void) assign_collations_walker((Node *) tle, loccontext);
899 : }
900 30527 : }
901 :
902 : /*
903 : * For ordered-set aggregates, it's somewhat unclear how best to proceed.
904 : * The spec-defined inverse distribution functions have only one sort column
905 : * and don't return collatable types, but this is clearly too restrictive in
906 : * the general case. Our solution is to consider that the aggregate's direct
907 : * arguments contribute normally to determination of the aggregate's own
908 : * collation, while aggregated arguments contribute only when the aggregate
909 : * is designed to have exactly one aggregated argument (i.e., it has a single
910 : * aggregated argument and is non-variadic). If it can have more than one
911 : * aggregated argument, we process the aggregated arguments as independent
912 : * sort columns. This avoids throwing error for something like
913 : * agg(...) within group (order by x collate "foo", y collate "bar")
914 : * while also guaranteeing that variadic aggregates don't change in behavior
915 : * depending on how many sort columns a particular call happens to have.
916 : *
917 : * Otherwise this is much like the plain-aggregate case.
918 : */
919 : static void
920 118 : assign_ordered_set_collations(Aggref *aggref,
921 : assign_collations_context *loccontext)
922 : {
923 : bool merge_sort_collations;
924 : ListCell *lc;
925 :
926 : /* Merge sort collations to parent only if there can be only one */
927 236 : merge_sort_collations = (list_length(aggref->args) == 1 &&
928 118 : get_func_variadictype(aggref->aggfnoid) == InvalidOid);
929 :
930 : /* Direct args, if any, are normal children of the Aggref node */
931 118 : (void) assign_collations_walker((Node *) aggref->aggdirectargs,
932 : loccontext);
933 :
934 : /* Process aggregated args appropriately */
935 236 : foreach(lc, aggref->args)
936 : {
937 118 : TargetEntry *tle = lfirst_node(TargetEntry, lc);
938 :
939 118 : if (merge_sort_collations)
940 118 : (void) assign_collations_walker((Node *) tle, loccontext);
941 : else
942 0 : assign_expr_collations(loccontext->pstate, (Node *) tle);
943 : }
944 118 : }
945 :
946 : /*
947 : * Hypothetical-set aggregates are even more special: per spec, we need to
948 : * unify the collations of each pair of hypothetical and aggregated args.
949 : * And we need to force the choice of collation down into the sort column
950 : * to ensure that the sort happens with the chosen collation. Other than
951 : * that, the behavior is like regular ordered-set aggregates. Note that
952 : * hypothetical direct arguments contribute to the aggregate collation
953 : * only when their partner aggregated arguments do.
954 : */
955 : static void
956 82 : assign_hypothetical_collations(Aggref *aggref,
957 : assign_collations_context *loccontext)
958 : {
959 82 : ListCell *h_cell = list_head(aggref->aggdirectargs);
960 82 : ListCell *s_cell = list_head(aggref->args);
961 : bool merge_sort_collations;
962 : int extra_args;
963 :
964 : /* Merge sort collations to parent only if there can be only one */
965 145 : merge_sort_collations = (list_length(aggref->args) == 1 &&
966 63 : get_func_variadictype(aggref->aggfnoid) == InvalidOid);
967 :
968 : /* Process any non-hypothetical direct args */
969 82 : extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args);
970 : Assert(extra_args >= 0);
971 82 : while (extra_args-- > 0)
972 : {
973 0 : (void) assign_collations_walker((Node *) lfirst(h_cell), loccontext);
974 0 : h_cell = lnext(aggref->aggdirectargs, h_cell);
975 : }
976 :
977 : /* Scan hypothetical args and aggregated args in parallel */
978 195 : while (h_cell && s_cell)
979 : {
980 117 : Node *h_arg = (Node *) lfirst(h_cell);
981 117 : TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell);
982 : assign_collations_context paircontext;
983 :
984 : /*
985 : * Assign collations internally in this pair of expressions, then
986 : * choose a common collation for them. This should match
987 : * select_common_collation(), but we can't use that function as-is
988 : * because we need access to the whole collation state so we can
989 : * bubble it up to the aggregate function's level.
990 : */
991 117 : paircontext.pstate = loccontext->pstate;
992 117 : paircontext.collation = InvalidOid;
993 117 : paircontext.strength = COLLATE_NONE;
994 117 : paircontext.location = -1;
995 : /* Set these fields just to suppress uninitialized-value warnings: */
996 117 : paircontext.collation2 = InvalidOid;
997 117 : paircontext.location2 = -1;
998 :
999 117 : (void) assign_collations_walker(h_arg, &paircontext);
1000 117 : (void) assign_collations_walker((Node *) s_tle->expr, &paircontext);
1001 :
1002 : /* deal with collation conflict */
1003 113 : if (paircontext.strength == COLLATE_CONFLICT)
1004 0 : ereport(ERROR,
1005 : (errcode(ERRCODE_COLLATION_MISMATCH),
1006 : errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
1007 : get_collation_name(paircontext.collation),
1008 : get_collation_name(paircontext.collation2)),
1009 : errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
1010 : parser_errposition(paircontext.pstate,
1011 : paircontext.location2)));
1012 :
1013 : /*
1014 : * At this point paircontext.collation can be InvalidOid only if the
1015 : * type is not collatable; no need to do anything in that case. If we
1016 : * do have to change the sort column's collation, do it by inserting a
1017 : * RelabelType node into the sort column TLE.
1018 : *
1019 : * XXX This is pretty grotty for a couple of reasons:
1020 : * assign_collations_walker isn't supposed to be changing the
1021 : * expression structure like this, and a parse-time change of
1022 : * collation ought to be signaled by a CollateExpr not a RelabelType
1023 : * (the use of RelabelType for collation marking is supposed to be a
1024 : * planner/executor thing only). But we have no better alternative.
1025 : * In particular, injecting a CollateExpr could result in the
1026 : * expression being interpreted differently after dump/reload, since
1027 : * we might be effectively promoting an implicit collation to
1028 : * explicit. This kluge is relying on ruleutils.c not printing a
1029 : * COLLATE clause for a RelabelType, and probably on some other
1030 : * fragile behaviors.
1031 : */
1032 132 : if (OidIsValid(paircontext.collation) &&
1033 19 : paircontext.collation != exprCollation((Node *) s_tle->expr))
1034 : {
1035 0 : s_tle->expr = (Expr *)
1036 0 : makeRelabelType(s_tle->expr,
1037 0 : exprType((Node *) s_tle->expr),
1038 0 : exprTypmod((Node *) s_tle->expr),
1039 : paircontext.collation,
1040 : COERCE_IMPLICIT_CAST);
1041 : }
1042 :
1043 : /*
1044 : * If appropriate, merge this column's collation state up to the
1045 : * aggregate function.
1046 : */
1047 113 : if (merge_sort_collations)
1048 0 : merge_collation_state(paircontext.collation,
1049 : paircontext.strength,
1050 : paircontext.location,
1051 : paircontext.collation2,
1052 : paircontext.location2,
1053 : loccontext);
1054 :
1055 113 : h_cell = lnext(aggref->aggdirectargs, h_cell);
1056 113 : s_cell = lnext(aggref->args, s_cell);
1057 : }
1058 : Assert(h_cell == NULL && s_cell == NULL);
1059 78 : }
|