LCOV - code coverage report
Current view: top level - src/backend/parser - parse_collate.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 265 280 94.6 %
Date: 2025-01-18 04:15:08 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * parse_collate.c
       4             :  *      Routines for assigning collation information.
       5             :  *
       6             :  * We choose to handle collation analysis in a post-pass over the output
       7             :  * of expression parse analysis.  This is because we need more state to
       8             :  * perform this processing than is needed in the finished tree.  If we
       9             :  * did it on-the-fly while building the tree, all that state would have
      10             :  * to be kept in expression node trees permanently.  This way, the extra
      11             :  * storage is just local variables in this recursive routine.
      12             :  *
      13             :  * The info that is actually saved in the finished tree is:
      14             :  * 1. The output collation of each expression node, or InvalidOid if it
      15             :  * returns a noncollatable data type.  This can also be InvalidOid if the
      16             :  * result type is collatable but the collation is indeterminate.
      17             :  * 2. The collation to be used in executing each function.  InvalidOid means
      18             :  * that there are no collatable inputs or their collation is indeterminate.
      19             :  * This value is only stored in node types that might call collation-using
      20             :  * functions.
      21             :  *
      22             :  * You might think we could get away with storing only one collation per
      23             :  * node, but the two concepts really need to be kept distinct.  Otherwise
      24             :  * it's too confusing when a function produces a collatable output type but
      25             :  * has no collatable inputs or produces noncollatable output from collatable
      26             :  * inputs.
      27             :  *
      28             :  * Cases with indeterminate collation might result in an error being thrown
      29             :  * at runtime.  If we knew exactly which functions require collation
      30             :  * information, we could throw those errors at parse time instead.
      31             :  *
      32             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      33             :  * Portions Copyright (c) 1994, Regents of the University of California
      34             :  *
      35             :  *
      36             :  * IDENTIFICATION
      37             :  *    src/backend/parser/parse_collate.c
      38             :  *
      39             :  *-------------------------------------------------------------------------
      40             :  */
      41             : #include "postgres.h"
      42             : 
      43             : #include "catalog/pg_aggregate.h"
      44             : #include "catalog/pg_collation.h"
      45             : #include "nodes/makefuncs.h"
      46             : #include "nodes/nodeFuncs.h"
      47             : #include "parser/parse_collate.h"
      48             : #include "utils/lsyscache.h"
      49             : 
      50             : 
      51             : /*
      52             :  * Collation strength (the SQL standard calls this "derivation").  Order is
      53             :  * chosen to allow comparisons to work usefully.  Note: the standard doesn't
      54             :  * seem to distinguish between NONE and CONFLICT.
      55             :  */
      56             : typedef enum
      57             : {
      58             :     COLLATE_NONE,               /* expression is of a noncollatable datatype */
      59             :     COLLATE_IMPLICIT,           /* collation was derived implicitly */
      60             :     COLLATE_CONFLICT,           /* we had a conflict of implicit collations */
      61             :     COLLATE_EXPLICIT,           /* collation was derived explicitly */
      62             : } CollateStrength;
      63             : 
      64             : typedef struct
      65             : {
      66             :     ParseState *pstate;         /* parse state (for error reporting) */
      67             :     Oid         collation;      /* OID of current collation, if any */
      68             :     CollateStrength strength;   /* strength of current collation choice */
      69             :     int         location;       /* location of expr that set collation */
      70             :     /* Remaining fields are only valid when strength == COLLATE_CONFLICT */
      71             :     Oid         collation2;     /* OID of conflicting collation */
      72             :     int         location2;      /* location of expr that set collation2 */
      73             : } assign_collations_context;
      74             : 
      75             : static bool assign_query_collations_walker(Node *node, ParseState *pstate);
      76             : static bool assign_collations_walker(Node *node,
      77             :                                      assign_collations_context *context);
      78             : static void merge_collation_state(Oid collation,
      79             :                                   CollateStrength strength,
      80             :                                   int location,
      81             :                                   Oid collation2,
      82             :                                   int location2,
      83             :                                   assign_collations_context *context);
      84             : static void assign_aggregate_collations(Aggref *aggref,
      85             :                                         assign_collations_context *loccontext);
      86             : static void assign_ordered_set_collations(Aggref *aggref,
      87             :                                           assign_collations_context *loccontext);
      88             : static void assign_hypothetical_collations(Aggref *aggref,
      89             :                                            assign_collations_context *loccontext);
      90             : 
      91             : 
      92             : /*
      93             :  * assign_query_collations()
      94             :  *      Mark all expressions in the given Query with collation information.
      95             :  *
      96             :  * This should be applied to each Query after completion of parse analysis
      97             :  * for expressions.  Note that we do not recurse into sub-Queries, since
      98             :  * those should have been processed when built.
      99             :  */
     100             : void
     101      564032 : assign_query_collations(ParseState *pstate, Query *query)
     102             : {
     103             :     /*
     104             :      * We just use query_tree_walker() to visit all the contained expressions.
     105             :      * We can skip the rangetable and CTE subqueries, though, since RTEs and
     106             :      * subqueries had better have been processed already (else Vars referring
     107             :      * to them would not get created with the right collation).
     108             :      */
     109      564032 :     (void) query_tree_walker(query,
     110             :                              assign_query_collations_walker,
     111             :                              pstate,
     112             :                              QTW_IGNORE_RANGE_TABLE |
     113             :                              QTW_IGNORE_CTE_SUBQUERIES);
     114      563990 : }
     115             : 
     116             : /*
     117             :  * Walker for assign_query_collations
     118             :  *
     119             :  * Each expression found by query_tree_walker is processed independently.
     120             :  * Note that query_tree_walker may pass us a whole List, such as the
     121             :  * targetlist, in which case each subexpression must be processed
     122             :  * independently --- we don't want to bleat if two different targetentries
     123             :  * have different collations.
     124             :  */
     125             : static bool
     126     6209316 : assign_query_collations_walker(Node *node, ParseState *pstate)
     127             : {
     128             :     /* Need do nothing for empty subexpressions */
     129     6209316 :     if (node == NULL)
     130     5073626 :         return false;
     131             : 
     132             :     /*
     133             :      * We don't want to recurse into a set-operations tree; it's already been
     134             :      * fully processed in transformSetOperationStmt.
     135             :      */
     136     1135690 :     if (IsA(node, SetOperationStmt))
     137        8902 :         return false;
     138             : 
     139     1126788 :     if (IsA(node, List))
     140      551248 :         assign_list_collations(pstate, (List *) node);
     141             :     else
     142      575540 :         assign_expr_collations(pstate, node);
     143             : 
     144     1126746 :     return false;
     145             : }
     146             : 
     147             : /*
     148             :  * assign_list_collations()
     149             :  *      Mark all nodes in the list of expressions with collation information.
     150             :  *
     151             :  * The list member expressions are processed independently; they do not have
     152             :  * to share a common collation.
     153             :  */
     154             : void
     155      613226 : assign_list_collations(ParseState *pstate, List *exprs)
     156             : {
     157             :     ListCell   *lc;
     158             : 
     159     2098556 :     foreach(lc, exprs)
     160             :     {
     161     1485360 :         Node       *node = (Node *) lfirst(lc);
     162             : 
     163     1485360 :         assign_expr_collations(pstate, node);
     164             :     }
     165      613196 : }
     166             : 
     167             : /*
     168             :  * assign_expr_collations()
     169             :  *      Mark all nodes in the given expression tree with collation information.
     170             :  *
     171             :  * This is exported for the benefit of various utility commands that process
     172             :  * expressions without building a complete Query.  It should be applied after
     173             :  * calling transformExpr() plus any expression-modifying operations such as
     174             :  * coerce_to_boolean().
     175             :  */
     176             : void
     177     2243052 : assign_expr_collations(ParseState *pstate, Node *expr)
     178             : {
     179             :     assign_collations_context context;
     180             : 
     181             :     /* initialize context for tree walk */
     182     2243052 :     context.pstate = pstate;
     183     2243052 :     context.collation = InvalidOid;
     184     2243052 :     context.strength = COLLATE_NONE;
     185     2243052 :     context.location = -1;
     186             : 
     187             :     /* and away we go */
     188     2243052 :     (void) assign_collations_walker(expr, &context);
     189     2243004 : }
     190             : 
     191             : /*
     192             :  * select_common_collation()
     193             :  *      Identify a common collation for a list of expressions.
     194             :  *
     195             :  * The expressions should all return the same datatype, else this is not
     196             :  * terribly meaningful.
     197             :  *
     198             :  * none_ok means that it is permitted to return InvalidOid, indicating that
     199             :  * no common collation could be identified, even for collatable datatypes.
     200             :  * Otherwise, an error is thrown for conflict of implicit collations.
     201             :  *
     202             :  * In theory, none_ok = true reflects the rules of SQL standard clause "Result
     203             :  * of data type combinations", none_ok = false reflects the rules of clause
     204             :  * "Collation determination" (in some cases invoked via "Grouping
     205             :  * operations").
     206             :  */
     207             : Oid
     208       61462 : select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
     209             : {
     210             :     assign_collations_context context;
     211             : 
     212             :     /* initialize context for tree walk */
     213       61462 :     context.pstate = pstate;
     214       61462 :     context.collation = InvalidOid;
     215       61462 :     context.strength = COLLATE_NONE;
     216       61462 :     context.location = -1;
     217             : 
     218             :     /* and away we go */
     219       61462 :     (void) assign_collations_walker((Node *) exprs, &context);
     220             : 
     221             :     /* deal with collation conflict */
     222       61462 :     if (context.strength == COLLATE_CONFLICT)
     223             :     {
     224          72 :         if (none_ok)
     225          36 :             return InvalidOid;
     226          36 :         ereport(ERROR,
     227             :                 (errcode(ERRCODE_COLLATION_MISMATCH),
     228             :                  errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
     229             :                         get_collation_name(context.collation),
     230             :                         get_collation_name(context.collation2)),
     231             :                  errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
     232             :                  parser_errposition(context.pstate, context.location2)));
     233             :     }
     234             : 
     235             :     /*
     236             :      * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but
     237             :      * that's okay because it must mean none of the expressions returned
     238             :      * collatable datatypes.
     239             :      */
     240       61390 :     return context.collation;
     241             : }
     242             : 
     243             : /*
     244             :  * assign_collations_walker()
     245             :  *      Recursive guts of collation processing.
     246             :  *
     247             :  * Nodes with no children (eg, Vars, Consts, Params) must have been marked
     248             :  * when built.  All upper-level nodes are marked here.
     249             :  *
     250             :  * Note: if this is invoked directly on a List, it will attempt to infer a
     251             :  * common collation for all the list members.  In particular, it will throw
     252             :  * error if there are conflicting explicit collations for different members.
     253             :  */
     254             : static bool
     255     8524444 : assign_collations_walker(Node *node, assign_collations_context *context)
     256             : {
     257             :     assign_collations_context loccontext;
     258             :     Oid         collation;
     259             :     CollateStrength strength;
     260             :     int         location;
     261             : 
     262             :     /* Need do nothing for empty subexpressions */
     263     8524444 :     if (node == NULL)
     264      487968 :         return false;
     265             : 
     266             :     /*
     267             :      * Prepare for recursion.  For most node types, though not all, the first
     268             :      * thing we do is recurse to process all nodes below this one. Each level
     269             :      * of the tree has its own local context.
     270             :      */
     271     8036476 :     loccontext.pstate = context->pstate;
     272     8036476 :     loccontext.collation = InvalidOid;
     273     8036476 :     loccontext.strength = COLLATE_NONE;
     274     8036476 :     loccontext.location = -1;
     275             :     /* Set these fields just to suppress uninitialized-value warnings: */
     276     8036476 :     loccontext.collation2 = InvalidOid;
     277     8036476 :     loccontext.location2 = -1;
     278             : 
     279             :     /*
     280             :      * Recurse if appropriate, then determine the collation for this node.
     281             :      *
     282             :      * Note: the general cases are at the bottom of the switch, after various
     283             :      * special cases.
     284             :      */
     285     8036476 :     switch (nodeTag(node))
     286             :     {
     287        8450 :         case T_CollateExpr:
     288             :             {
     289             :                 /*
     290             :                  * COLLATE sets an explicitly derived collation, regardless of
     291             :                  * what the child state is.  But we must recurse to set up
     292             :                  * collation info below here.
     293             :                  */
     294        8450 :                 CollateExpr *expr = (CollateExpr *) node;
     295             : 
     296        8450 :                 (void) expression_tree_walker(node,
     297             :                                               assign_collations_walker,
     298             :                                               &loccontext);
     299             : 
     300        8450 :                 collation = expr->collOid;
     301             :                 Assert(OidIsValid(collation));
     302        8450 :                 strength = COLLATE_EXPLICIT;
     303        8450 :                 location = expr->location;
     304             :             }
     305        8450 :             break;
     306       16862 :         case T_FieldSelect:
     307             :             {
     308             :                 /*
     309             :                  * For FieldSelect, the result has the field's declared
     310             :                  * collation, independently of what happened in the arguments.
     311             :                  * (The immediate argument must be composite and thus not
     312             :                  * collatable, anyhow.)  The field's collation was already
     313             :                  * looked up and saved in the node.
     314             :                  */
     315       16862 :                 FieldSelect *expr = (FieldSelect *) node;
     316             : 
     317             :                 /* ... but first, recurse */
     318       16862 :                 (void) expression_tree_walker(node,
     319             :                                               assign_collations_walker,
     320             :                                               &loccontext);
     321             : 
     322       16862 :                 if (OidIsValid(expr->resultcollid))
     323             :                 {
     324             :                     /* Node's result type is collatable. */
     325             :                     /* Pass up field's collation as an implicit choice. */
     326        2848 :                     collation = expr->resultcollid;
     327        2848 :                     strength = COLLATE_IMPLICIT;
     328        2848 :                     location = exprLocation(node);
     329             :                 }
     330             :                 else
     331             :                 {
     332             :                     /* Node's result type isn't collatable. */
     333       14014 :                     collation = InvalidOid;
     334       14014 :                     strength = COLLATE_NONE;
     335       14014 :                     location = -1;  /* won't be used */
     336             :                 }
     337             :             }
     338       16862 :             break;
     339        4226 :         case T_RowExpr:
     340             :             {
     341             :                 /*
     342             :                  * RowExpr is a special case because the subexpressions are
     343             :                  * independent: we don't want to complain if some of them have
     344             :                  * incompatible explicit collations.
     345             :                  */
     346        4226 :                 RowExpr    *expr = (RowExpr *) node;
     347             : 
     348        4226 :                 assign_list_collations(context->pstate, expr->args);
     349             : 
     350             :                 /*
     351             :                  * Since the result is always composite and therefore never
     352             :                  * has a collation, we can just stop here: this node has no
     353             :                  * impact on the collation of its parent.
     354             :                  */
     355        4226 :                 return false;   /* done */
     356             :             }
     357         234 :         case T_RowCompareExpr:
     358             :             {
     359             :                 /*
     360             :                  * For RowCompare, we have to find the common collation of
     361             :                  * each pair of input columns and build a list.  If we can't
     362             :                  * find a common collation, we just put InvalidOid into the
     363             :                  * list, which may or may not cause an error at runtime.
     364             :                  */
     365         234 :                 RowCompareExpr *expr = (RowCompareExpr *) node;
     366         234 :                 List       *colls = NIL;
     367             :                 ListCell   *l;
     368             :                 ListCell   *r;
     369             : 
     370         756 :                 forboth(l, expr->largs, r, expr->rargs)
     371             :                 {
     372         522 :                     Node       *le = (Node *) lfirst(l);
     373         522 :                     Node       *re = (Node *) lfirst(r);
     374             :                     Oid         coll;
     375             : 
     376         522 :                     coll = select_common_collation(context->pstate,
     377         522 :                                                    list_make2(le, re),
     378             :                                                    true);
     379         522 :                     colls = lappend_oid(colls, coll);
     380             :                 }
     381         234 :                 expr->inputcollids = colls;
     382             : 
     383             :                 /*
     384             :                  * Since the result is always boolean and therefore never has
     385             :                  * a collation, we can just stop here: this node has no impact
     386             :                  * on the collation of its parent.
     387             :                  */
     388         234 :                 return false;   /* done */
     389             :             }
     390       80946 :         case T_CoerceToDomain:
     391             :             {
     392             :                 /*
     393             :                  * If the domain declaration included a non-default COLLATE
     394             :                  * spec, then use that collation as the output collation of
     395             :                  * the coercion.  Otherwise allow the input collation to
     396             :                  * bubble up.  (The input should be of the domain's base type,
     397             :                  * therefore we don't need to worry about it not being
     398             :                  * collatable when the domain is.)
     399             :                  */
     400       80946 :                 CoerceToDomain *expr = (CoerceToDomain *) node;
     401       80946 :                 Oid         typcollation = get_typcollation(expr->resulttype);
     402             : 
     403             :                 /* ... but first, recurse */
     404       80946 :                 (void) expression_tree_walker(node,
     405             :                                               assign_collations_walker,
     406             :                                               &loccontext);
     407             : 
     408       80946 :                 if (OidIsValid(typcollation))
     409             :                 {
     410             :                     /* Node's result type is collatable. */
     411       67242 :                     if (typcollation == DEFAULT_COLLATION_OID)
     412             :                     {
     413             :                         /* Collation state bubbles up from child. */
     414         606 :                         collation = loccontext.collation;
     415         606 :                         strength = loccontext.strength;
     416         606 :                         location = loccontext.location;
     417             :                     }
     418             :                     else
     419             :                     {
     420             :                         /* Use domain's collation as an implicit choice. */
     421       66636 :                         collation = typcollation;
     422       66636 :                         strength = COLLATE_IMPLICIT;
     423       66636 :                         location = exprLocation(node);
     424             :                     }
     425             :                 }
     426             :                 else
     427             :                 {
     428             :                     /* Node's result type isn't collatable. */
     429       13704 :                     collation = InvalidOid;
     430       13704 :                     strength = COLLATE_NONE;
     431       13704 :                     location = -1;  /* won't be used */
     432             :                 }
     433             : 
     434             :                 /*
     435             :                  * Save the state into the expression node.  We know it
     436             :                  * doesn't care about input collation.
     437             :                  */
     438       80946 :                 if (strength == COLLATE_CONFLICT)
     439           0 :                     exprSetCollation(node, InvalidOid);
     440             :                 else
     441       80946 :                     exprSetCollation(node, collation);
     442             :             }
     443       80946 :             break;
     444     1447042 :         case T_TargetEntry:
     445     1447042 :             (void) expression_tree_walker(node,
     446             :                                           assign_collations_walker,
     447             :                                           &loccontext);
     448             : 
     449             :             /*
     450             :              * TargetEntry can have only one child, and should bubble that
     451             :              * state up to its parent.  We can't use the general-case code
     452             :              * below because exprType and friends don't work on TargetEntry.
     453             :              */
     454     1447024 :             collation = loccontext.collation;
     455     1447024 :             strength = loccontext.strength;
     456     1447024 :             location = loccontext.location;
     457             : 
     458             :             /*
     459             :              * Throw error if the collation is indeterminate for a TargetEntry
     460             :              * that is a sort/group target.  We prefer to do this now, instead
     461             :              * of leaving the comparison functions to fail at runtime, because
     462             :              * we can give a syntax error pointer to help locate the problem.
     463             :              * There are some cases where there might not be a failure, for
     464             :              * example if the planner chooses to use hash aggregation instead
     465             :              * of sorting for grouping; but it seems better to predictably
     466             :              * throw an error.  (Compare transformSetOperationTree, which will
     467             :              * throw error for indeterminate collation of set-op columns, even
     468             :              * though the planner might be able to implement the set-op
     469             :              * without sorting.)
     470             :              */
     471     1447024 :             if (strength == COLLATE_CONFLICT &&
     472          30 :                 ((TargetEntry *) node)->ressortgroupref != 0)
     473          18 :                 ereport(ERROR,
     474             :                         (errcode(ERRCODE_COLLATION_MISMATCH),
     475             :                          errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
     476             :                                 get_collation_name(loccontext.collation),
     477             :                                 get_collation_name(loccontext.collation2)),
     478             :                          errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
     479             :                          parser_errposition(context->pstate,
     480             :                                             loccontext.location2)));
     481     1447006 :             break;
     482     1097876 :         case T_InferenceElem:
     483             :         case T_RangeTblRef:
     484             :         case T_JoinExpr:
     485             :         case T_FromExpr:
     486             :         case T_OnConflictExpr:
     487             :         case T_SortGroupClause:
     488             :         case T_MergeAction:
     489     1097876 :             (void) expression_tree_walker(node,
     490             :                                           assign_collations_walker,
     491             :                                           &loccontext);
     492             : 
     493             :             /*
     494             :              * When we're invoked on a query's jointree, we don't need to do
     495             :              * anything with join nodes except recurse through them to process
     496             :              * WHERE/ON expressions.  So just stop here.  Likewise, we don't
     497             :              * need to do anything when invoked on sort/group lists.
     498             :              */
     499     1097864 :             return false;
     500       43478 :         case T_Query:
     501             :             {
     502             :                 /*
     503             :                  * We get here when we're invoked on the Query belonging to a
     504             :                  * SubLink.  Act as though the Query returns its first output
     505             :                  * column, which indeed is what it does for EXPR_SUBLINK and
     506             :                  * ARRAY_SUBLINK cases.  In the cases where the SubLink
     507             :                  * returns boolean, this info will be ignored.  Special case:
     508             :                  * in EXISTS, the Query might return no columns, in which case
     509             :                  * we need do nothing.
     510             :                  *
     511             :                  * We needn't recurse, since the Query is already processed.
     512             :                  */
     513       43478 :                 Query      *qtree = (Query *) node;
     514             :                 TargetEntry *tent;
     515             : 
     516       43478 :                 if (qtree->targetList == NIL)
     517           6 :                     return false;
     518       43472 :                 tent = linitial_node(TargetEntry, qtree->targetList);
     519       43472 :                 if (tent->resjunk)
     520           0 :                     return false;
     521             : 
     522       43472 :                 collation = exprCollation((Node *) tent->expr);
     523             :                 /* collation doesn't change if it's converted to array */
     524       43472 :                 strength = COLLATE_IMPLICIT;
     525       43472 :                 location = exprLocation((Node *) tent->expr);
     526             :             }
     527       43472 :             break;
     528      118382 :         case T_List:
     529      118382 :             (void) expression_tree_walker(node,
     530             :                                           assign_collations_walker,
     531             :                                           &loccontext);
     532             : 
     533             :             /*
     534             :              * When processing a list, collation state just bubbles up from
     535             :              * the list elements.
     536             :              */
     537      118382 :             collation = loccontext.collation;
     538      118382 :             strength = loccontext.strength;
     539      118382 :             location = loccontext.location;
     540      118382 :             break;
     541             : 
     542     3469572 :         case T_Var:
     543             :         case T_Const:
     544             :         case T_Param:
     545             :         case T_CoerceToDomainValue:
     546             :         case T_CaseTestExpr:
     547             :         case T_SetToDefault:
     548             :         case T_CurrentOfExpr:
     549             : 
     550             :             /*
     551             :              * General case for childless expression nodes.  These should
     552             :              * already have a collation assigned; it is not this function's
     553             :              * responsibility to look into the catalogs for base-case
     554             :              * information.
     555             :              */
     556     3469572 :             collation = exprCollation(node);
     557             : 
     558             :             /*
     559             :              * Note: in most cases, there will be an assigned collation
     560             :              * whenever type_is_collatable(exprType(node)); but an exception
     561             :              * occurs for a Var referencing a subquery output column for which
     562             :              * a unique collation was not determinable.  That may lead to a
     563             :              * runtime failure if a collation-sensitive function is applied to
     564             :              * the Var.
     565             :              */
     566             : 
     567     3469572 :             if (OidIsValid(collation))
     568      778234 :                 strength = COLLATE_IMPLICIT;
     569             :             else
     570     2691338 :                 strength = COLLATE_NONE;
     571     3469572 :             location = exprLocation(node);
     572     3469572 :             break;
     573             : 
     574     1749408 :         default:
     575             :             {
     576             :                 /*
     577             :                  * General case for most expression nodes with children. First
     578             :                  * recurse, then figure out what to assign to this node.
     579             :                  */
     580             :                 Oid         typcollation;
     581             : 
     582             :                 /*
     583             :                  * For most node types, we want to treat all the child
     584             :                  * expressions alike; but there are a few exceptions, hence
     585             :                  * this inner switch.
     586             :                  */
     587     1749408 :                 switch (nodeTag(node))
     588             :                 {
     589       44716 :                     case T_Aggref:
     590             :                         {
     591             :                             /*
     592             :                              * Aggref is messy enough that we give it its own
     593             :                              * function, in fact three of them.  The FILTER
     594             :                              * clause is independent of the rest of the
     595             :                              * aggregate, however, so it can be processed
     596             :                              * separately.
     597             :                              */
     598       44716 :                             Aggref     *aggref = (Aggref *) node;
     599             : 
     600       44716 :                             switch (aggref->aggkind)
     601             :                             {
     602       44410 :                                 case AGGKIND_NORMAL:
     603       44410 :                                     assign_aggregate_collations(aggref,
     604             :                                                                 &loccontext);
     605       44398 :                                     break;
     606         180 :                                 case AGGKIND_ORDERED_SET:
     607         180 :                                     assign_ordered_set_collations(aggref,
     608             :                                                                   &loccontext);
     609         180 :                                     break;
     610         126 :                                 case AGGKIND_HYPOTHETICAL:
     611         126 :                                     assign_hypothetical_collations(aggref,
     612             :                                                                    &loccontext);
     613         120 :                                     break;
     614           0 :                                 default:
     615           0 :                                     elog(ERROR, "unrecognized aggkind: %d",
     616             :                                          (int) aggref->aggkind);
     617             :                             }
     618             : 
     619       44698 :                             assign_expr_collations(context->pstate,
     620       44698 :                                                    (Node *) aggref->aggfilter);
     621             :                         }
     622       44698 :                         break;
     623        3340 :                     case T_WindowFunc:
     624             :                         {
     625             :                             /*
     626             :                              * WindowFunc requires special processing only for
     627             :                              * its aggfilter clause, as for aggregates.
     628             :                              */
     629        3340 :                             WindowFunc *wfunc = (WindowFunc *) node;
     630             : 
     631        3340 :                             (void) assign_collations_walker((Node *) wfunc->args,
     632             :                                                             &loccontext);
     633             : 
     634        3340 :                             assign_expr_collations(context->pstate,
     635        3340 :                                                    (Node *) wfunc->aggfilter);
     636             :                         }
     637        3340 :                         break;
     638       81254 :                     case T_CaseExpr:
     639             :                         {
     640             :                             /*
     641             :                              * CaseExpr is a special case because we do not
     642             :                              * want to recurse into the test expression (if
     643             :                              * any).  It was already marked with collations
     644             :                              * during transformCaseExpr, and furthermore its
     645             :                              * collation is not relevant to the result of the
     646             :                              * CASE --- only the output expressions are.
     647             :                              */
     648       81254 :                             CaseExpr   *expr = (CaseExpr *) node;
     649             :                             ListCell   *lc;
     650             : 
     651      224960 :                             foreach(lc, expr->args)
     652             :                             {
     653      143706 :                                 CaseWhen   *when = lfirst_node(CaseWhen, lc);
     654             : 
     655             :                                 /*
     656             :                                  * The condition expressions mustn't affect
     657             :                                  * the CASE's result collation either; but
     658             :                                  * since they are known to yield boolean, it's
     659             :                                  * safe to recurse directly on them --- they
     660             :                                  * won't change loccontext.
     661             :                                  */
     662      143706 :                                 (void) assign_collations_walker((Node *) when->expr,
     663             :                                                                 &loccontext);
     664      143706 :                                 (void) assign_collations_walker((Node *) when->result,
     665             :                                                                 &loccontext);
     666             :                             }
     667       81254 :                             (void) assign_collations_walker((Node *) expr->defresult,
     668             :                                                             &loccontext);
     669             :                         }
     670       81254 :                         break;
     671       12110 :                     case T_SubscriptingRef:
     672             :                         {
     673             :                             /*
     674             :                              * The subscripts are treated as independent
     675             :                              * expressions not contributing to the node's
     676             :                              * collation.  Only the container, and the source
     677             :                              * expression if any, contribute.  (This models
     678             :                              * the old behavior, in which the subscripts could
     679             :                              * be counted on to be integers and thus not
     680             :                              * contribute anything.)
     681             :                              */
     682       12110 :                             SubscriptingRef *sbsref = (SubscriptingRef *) node;
     683             : 
     684       12110 :                             assign_expr_collations(context->pstate,
     685       12110 :                                                    (Node *) sbsref->refupperindexpr);
     686       12110 :                             assign_expr_collations(context->pstate,
     687       12110 :                                                    (Node *) sbsref->reflowerindexpr);
     688       12110 :                             (void) assign_collations_walker((Node *) sbsref->refexpr,
     689             :                                                             &loccontext);
     690       12110 :                             (void) assign_collations_walker((Node *) sbsref->refassgnexpr,
     691             :                                                             &loccontext);
     692             :                         }
     693       12110 :                         break;
     694     1607988 :                     default:
     695             : 
     696             :                         /*
     697             :                          * Normal case: all child expressions contribute
     698             :                          * equally to loccontext.
     699             :                          */
     700     1607988 :                         (void) expression_tree_walker(node,
     701             :                                                       assign_collations_walker,
     702             :                                                       &loccontext);
     703     1607976 :                         break;
     704             :                 }
     705             : 
     706             :                 /*
     707             :                  * Now figure out what collation to assign to this node.
     708             :                  */
     709     1749378 :                 typcollation = get_typcollation(exprType(node));
     710     1749378 :                 if (OidIsValid(typcollation))
     711             :                 {
     712             :                     /* Node's result is collatable; what about its input? */
     713      273092 :                     if (loccontext.strength > COLLATE_NONE)
     714             :                     {
     715             :                         /* Collation state bubbles up from children. */
     716      205618 :                         collation = loccontext.collation;
     717      205618 :                         strength = loccontext.strength;
     718      205618 :                         location = loccontext.location;
     719             :                     }
     720             :                     else
     721             :                     {
     722             :                         /*
     723             :                          * Collatable output produced without any collatable
     724             :                          * input.  Use the type's collation (which is usually
     725             :                          * DEFAULT_COLLATION_OID, but might be different for a
     726             :                          * domain).
     727             :                          */
     728       67474 :                         collation = typcollation;
     729       67474 :                         strength = COLLATE_IMPLICIT;
     730       67474 :                         location = exprLocation(node);
     731             :                     }
     732             :                 }
     733             :                 else
     734             :                 {
     735             :                     /* Node's result type isn't collatable. */
     736     1476286 :                     collation = InvalidOid;
     737     1476286 :                     strength = COLLATE_NONE;
     738     1476286 :                     location = -1;  /* won't be used */
     739             :                 }
     740             : 
     741             :                 /*
     742             :                  * Save the result collation into the expression node. If the
     743             :                  * state is COLLATE_CONFLICT, we'll set the collation to
     744             :                  * InvalidOid, which might result in an error at runtime.
     745             :                  */
     746     1749378 :                 if (strength == COLLATE_CONFLICT)
     747          30 :                     exprSetCollation(node, InvalidOid);
     748             :                 else
     749     1749348 :                     exprSetCollation(node, collation);
     750             : 
     751             :                 /*
     752             :                  * Likewise save the input collation, which is the one that
     753             :                  * any function called by this node should use.
     754             :                  */
     755     1749378 :                 if (loccontext.strength == COLLATE_CONFLICT)
     756         108 :                     exprSetInputCollation(node, InvalidOid);
     757             :                 else
     758     1749270 :                     exprSetInputCollation(node, loccontext.collation);
     759             :             }
     760     1749378 :             break;
     761             :     }
     762             : 
     763             :     /*
     764             :      * Now, merge my information into my parent's state.
     765             :      */
     766     6934068 :     merge_collation_state(collation,
     767             :                           strength,
     768             :                           location,
     769             :                           loccontext.collation2,
     770             :                           loccontext.location2,
     771             :                           context);
     772             : 
     773     6934044 :     return false;
     774             : }
     775             : 
     776             : /*
     777             :  * Merge collation state of a subexpression into the context for its parent.
     778             :  */
     779             : static void
     780     6934068 : merge_collation_state(Oid collation,
     781             :                       CollateStrength strength,
     782             :                       int location,
     783             :                       Oid collation2,
     784             :                       int location2,
     785             :                       assign_collations_context *context)
     786             : {
     787             :     /*
     788             :      * If the collation strength for this node is different from what's
     789             :      * already in *context, then this node either dominates or is dominated by
     790             :      * earlier siblings.
     791             :      */
     792     6934068 :     if (strength > context->strength)
     793             :     {
     794             :         /* Override previous parent state */
     795     1480054 :         context->collation = collation;
     796     1480054 :         context->strength = strength;
     797     1480054 :         context->location = location;
     798             :         /* Bubble up error info if applicable */
     799     1480054 :         if (strength == COLLATE_CONFLICT)
     800             :         {
     801         114 :             context->collation2 = collation2;
     802         114 :             context->location2 = location2;
     803             :         }
     804             :     }
     805     5454014 :     else if (strength == context->strength)
     806             :     {
     807             :         /* Merge, or detect error if there's a collation conflict */
     808     5354596 :         switch (strength)
     809             :         {
     810     5136168 :             case COLLATE_NONE:
     811             :                 /* Nothing + nothing is still nothing */
     812     5136168 :                 break;
     813      218344 :             case COLLATE_IMPLICIT:
     814      218344 :                 if (collation != context->collation)
     815             :                 {
     816             :                     /*
     817             :                      * Non-default implicit collation always beats default.
     818             :                      */
     819       48358 :                     if (context->collation == DEFAULT_COLLATION_OID)
     820             :                     {
     821             :                         /* Override previous parent state */
     822       13190 :                         context->collation = collation;
     823       13190 :                         context->strength = strength;
     824       13190 :                         context->location = location;
     825             :                     }
     826       35168 :                     else if (collation != DEFAULT_COLLATION_OID)
     827             :                     {
     828             :                         /*
     829             :                          * Oops, we have a conflict.  We cannot throw error
     830             :                          * here, since the conflict could be resolved by a
     831             :                          * later sibling CollateExpr, or the parent might not
     832             :                          * care about collation anyway.  Return enough info to
     833             :                          * throw the error later, if needed.
     834             :                          */
     835         180 :                         context->strength = COLLATE_CONFLICT;
     836         180 :                         context->collation2 = collation;
     837         180 :                         context->location2 = location;
     838             :                     }
     839             :                 }
     840      218344 :                 break;
     841           0 :             case COLLATE_CONFLICT:
     842             :                 /* We're still conflicted ... */
     843           0 :                 break;
     844          84 :             case COLLATE_EXPLICIT:
     845          84 :                 if (collation != context->collation)
     846             :                 {
     847             :                     /*
     848             :                      * Oops, we have a conflict of explicit COLLATE clauses.
     849             :                      * Here we choose to throw error immediately; that is what
     850             :                      * the SQL standard says to do, and there's no good reason
     851             :                      * to be less strict.
     852             :                      */
     853          24 :                     ereport(ERROR,
     854             :                             (errcode(ERRCODE_COLLATION_MISMATCH),
     855             :                              errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
     856             :                                     get_collation_name(context->collation),
     857             :                                     get_collation_name(collation)),
     858             :                              parser_errposition(context->pstate, location)));
     859             :                 }
     860          60 :                 break;
     861             :         }
     862       99418 :     }
     863     6934044 : }
     864             : 
     865             : /*
     866             :  * Aggref is a special case because expressions used only for ordering
     867             :  * shouldn't be taken to conflict with each other or with regular args,
     868             :  * indeed shouldn't affect the aggregate's result collation at all.
     869             :  * We handle this by applying assign_expr_collations() to them rather than
     870             :  * passing down our loccontext.
     871             :  *
     872             :  * Note that we recurse to each TargetEntry, not directly to its contained
     873             :  * expression, so that the case above for T_TargetEntry will complain if we
     874             :  * can't resolve a collation for an ORDER BY item (whether or not it is also
     875             :  * a normal aggregate arg).
     876             :  *
     877             :  * We need not recurse into the aggorder or aggdistinct lists, because those
     878             :  * contain only SortGroupClause nodes which we need not process.
     879             :  */
     880             : static void
     881       44410 : assign_aggregate_collations(Aggref *aggref,
     882             :                             assign_collations_context *loccontext)
     883             : {
     884             :     ListCell   *lc;
     885             : 
     886             :     /* Plain aggregates have no direct args */
     887             :     Assert(aggref->aggdirectargs == NIL);
     888             : 
     889             :     /* Process aggregated args, holding resjunk ones at arm's length */
     890       82436 :     foreach(lc, aggref->args)
     891             :     {
     892       38038 :         TargetEntry *tle = lfirst_node(TargetEntry, lc);
     893             : 
     894       38038 :         if (tle->resjunk)
     895        1088 :             assign_expr_collations(loccontext->pstate, (Node *) tle);
     896             :         else
     897       36950 :             (void) assign_collations_walker((Node *) tle, loccontext);
     898             :     }
     899       44398 : }
     900             : 
     901             : /*
     902             :  * For ordered-set aggregates, it's somewhat unclear how best to proceed.
     903             :  * The spec-defined inverse distribution functions have only one sort column
     904             :  * and don't return collatable types, but this is clearly too restrictive in
     905             :  * the general case.  Our solution is to consider that the aggregate's direct
     906             :  * arguments contribute normally to determination of the aggregate's own
     907             :  * collation, while aggregated arguments contribute only when the aggregate
     908             :  * is designed to have exactly one aggregated argument (i.e., it has a single
     909             :  * aggregated argument and is non-variadic).  If it can have more than one
     910             :  * aggregated argument, we process the aggregated arguments as independent
     911             :  * sort columns.  This avoids throwing error for something like
     912             :  *      agg(...) within group (order by x collate "foo", y collate "bar")
     913             :  * while also guaranteeing that variadic aggregates don't change in behavior
     914             :  * depending on how many sort columns a particular call happens to have.
     915             :  *
     916             :  * Otherwise this is much like the plain-aggregate case.
     917             :  */
     918             : static void
     919         180 : assign_ordered_set_collations(Aggref *aggref,
     920             :                               assign_collations_context *loccontext)
     921             : {
     922             :     bool        merge_sort_collations;
     923             :     ListCell   *lc;
     924             : 
     925             :     /* Merge sort collations to parent only if there can be only one */
     926         360 :     merge_sort_collations = (list_length(aggref->args) == 1 &&
     927         180 :                              get_func_variadictype(aggref->aggfnoid) == InvalidOid);
     928             : 
     929             :     /* Direct args, if any, are normal children of the Aggref node */
     930         180 :     (void) assign_collations_walker((Node *) aggref->aggdirectargs,
     931             :                                     loccontext);
     932             : 
     933             :     /* Process aggregated args appropriately */
     934         360 :     foreach(lc, aggref->args)
     935             :     {
     936         180 :         TargetEntry *tle = lfirst_node(TargetEntry, lc);
     937             : 
     938         180 :         if (merge_sort_collations)
     939         180 :             (void) assign_collations_walker((Node *) tle, loccontext);
     940             :         else
     941           0 :             assign_expr_collations(loccontext->pstate, (Node *) tle);
     942             :     }
     943         180 : }
     944             : 
     945             : /*
     946             :  * Hypothetical-set aggregates are even more special: per spec, we need to
     947             :  * unify the collations of each pair of hypothetical and aggregated args.
     948             :  * And we need to force the choice of collation down into the sort column
     949             :  * to ensure that the sort happens with the chosen collation.  Other than
     950             :  * that, the behavior is like regular ordered-set aggregates.  Note that
     951             :  * hypothetical direct arguments contribute to the aggregate collation
     952             :  * only when their partner aggregated arguments do.
     953             :  */
     954             : static void
     955         126 : assign_hypothetical_collations(Aggref *aggref,
     956             :                                assign_collations_context *loccontext)
     957             : {
     958         126 :     ListCell   *h_cell = list_head(aggref->aggdirectargs);
     959         126 :     ListCell   *s_cell = list_head(aggref->args);
     960             :     bool        merge_sort_collations;
     961             :     int         extra_args;
     962             : 
     963             :     /* Merge sort collations to parent only if there can be only one */
     964         222 :     merge_sort_collations = (list_length(aggref->args) == 1 &&
     965          96 :                              get_func_variadictype(aggref->aggfnoid) == InvalidOid);
     966             : 
     967             :     /* Process any non-hypothetical direct args */
     968         126 :     extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args);
     969             :     Assert(extra_args >= 0);
     970         126 :     while (extra_args-- > 0)
     971             :     {
     972           0 :         (void) assign_collations_walker((Node *) lfirst(h_cell), loccontext);
     973           0 :         h_cell = lnext(aggref->aggdirectargs, h_cell);
     974             :     }
     975             : 
     976             :     /* Scan hypothetical args and aggregated args in parallel */
     977         300 :     while (h_cell && s_cell)
     978             :     {
     979         180 :         Node       *h_arg = (Node *) lfirst(h_cell);
     980         180 :         TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell);
     981             :         assign_collations_context paircontext;
     982             : 
     983             :         /*
     984             :          * Assign collations internally in this pair of expressions, then
     985             :          * choose a common collation for them.  This should match
     986             :          * select_common_collation(), but we can't use that function as-is
     987             :          * because we need access to the whole collation state so we can
     988             :          * bubble it up to the aggregate function's level.
     989             :          */
     990         180 :         paircontext.pstate = loccontext->pstate;
     991         180 :         paircontext.collation = InvalidOid;
     992         180 :         paircontext.strength = COLLATE_NONE;
     993         180 :         paircontext.location = -1;
     994             :         /* Set these fields just to suppress uninitialized-value warnings: */
     995         180 :         paircontext.collation2 = InvalidOid;
     996         180 :         paircontext.location2 = -1;
     997             : 
     998         180 :         (void) assign_collations_walker(h_arg, &paircontext);
     999         180 :         (void) assign_collations_walker((Node *) s_tle->expr, &paircontext);
    1000             : 
    1001             :         /* deal with collation conflict */
    1002         174 :         if (paircontext.strength == COLLATE_CONFLICT)
    1003           0 :             ereport(ERROR,
    1004             :                     (errcode(ERRCODE_COLLATION_MISMATCH),
    1005             :                      errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
    1006             :                             get_collation_name(paircontext.collation),
    1007             :                             get_collation_name(paircontext.collation2)),
    1008             :                      errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
    1009             :                      parser_errposition(paircontext.pstate,
    1010             :                                         paircontext.location2)));
    1011             : 
    1012             :         /*
    1013             :          * At this point paircontext.collation can be InvalidOid only if the
    1014             :          * type is not collatable; no need to do anything in that case.  If we
    1015             :          * do have to change the sort column's collation, do it by inserting a
    1016             :          * RelabelType node into the sort column TLE.
    1017             :          *
    1018             :          * XXX This is pretty grotty for a couple of reasons:
    1019             :          * assign_collations_walker isn't supposed to be changing the
    1020             :          * expression structure like this, and a parse-time change of
    1021             :          * collation ought to be signaled by a CollateExpr not a RelabelType
    1022             :          * (the use of RelabelType for collation marking is supposed to be a
    1023             :          * planner/executor thing only).  But we have no better alternative.
    1024             :          * In particular, injecting a CollateExpr could result in the
    1025             :          * expression being interpreted differently after dump/reload, since
    1026             :          * we might be effectively promoting an implicit collation to
    1027             :          * explicit.  This kluge is relying on ruleutils.c not printing a
    1028             :          * COLLATE clause for a RelabelType, and probably on some other
    1029             :          * fragile behaviors.
    1030             :          */
    1031         204 :         if (OidIsValid(paircontext.collation) &&
    1032          30 :             paircontext.collation != exprCollation((Node *) s_tle->expr))
    1033             :         {
    1034           0 :             s_tle->expr = (Expr *)
    1035           0 :                 makeRelabelType(s_tle->expr,
    1036           0 :                                 exprType((Node *) s_tle->expr),
    1037           0 :                                 exprTypmod((Node *) s_tle->expr),
    1038             :                                 paircontext.collation,
    1039             :                                 COERCE_IMPLICIT_CAST);
    1040             :         }
    1041             : 
    1042             :         /*
    1043             :          * If appropriate, merge this column's collation state up to the
    1044             :          * aggregate function.
    1045             :          */
    1046         174 :         if (merge_sort_collations)
    1047           0 :             merge_collation_state(paircontext.collation,
    1048             :                                   paircontext.strength,
    1049             :                                   paircontext.location,
    1050             :                                   paircontext.collation2,
    1051             :                                   paircontext.location2,
    1052             :                                   loccontext);
    1053             : 
    1054         174 :         h_cell = lnext(aggref->aggdirectargs, h_cell);
    1055         174 :         s_cell = lnext(aggref->args, s_cell);
    1056             :     }
    1057             :     Assert(h_cell == NULL && s_cell == NULL);
    1058         120 : }

Generated by: LCOV version 1.14