Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * parse_node.c
4 : * various routines that make nodes for querytrees
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/parser/parse_node.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/htup_details.h"
18 : #include "access/table.h"
19 : #include "catalog/pg_type.h"
20 : #include "mb/pg_wchar.h"
21 : #include "nodes/makefuncs.h"
22 : #include "nodes/miscnodes.h"
23 : #include "nodes/nodeFuncs.h"
24 : #include "nodes/subscripting.h"
25 : #include "parser/parse_node.h"
26 : #include "utils/builtins.h"
27 : #include "utils/lsyscache.h"
28 :
29 : static void pcb_error_callback(void *arg);
30 :
31 :
32 : /*
33 : * make_parsestate
34 : * Allocate and initialize a new ParseState.
35 : *
36 : * Caller should eventually release the ParseState via free_parsestate().
37 : */
38 : ParseState *
39 1395578 : make_parsestate(ParseState *parentParseState)
40 : {
41 : ParseState *pstate;
42 :
43 1395578 : pstate = palloc0(sizeof(ParseState));
44 :
45 1395578 : pstate->parentParseState = parentParseState;
46 :
47 : /* Fill in fields that don't start at null/false/zero */
48 1395578 : pstate->p_next_resno = 1;
49 1395578 : pstate->p_resolve_unknowns = true;
50 :
51 1395578 : if (parentParseState)
52 : {
53 89516 : pstate->p_sourcetext = parentParseState->p_sourcetext;
54 : /* all hooks are copied from parent */
55 89516 : pstate->p_pre_columnref_hook = parentParseState->p_pre_columnref_hook;
56 89516 : pstate->p_post_columnref_hook = parentParseState->p_post_columnref_hook;
57 89516 : pstate->p_paramref_hook = parentParseState->p_paramref_hook;
58 89516 : pstate->p_coerce_param_hook = parentParseState->p_coerce_param_hook;
59 89516 : pstate->p_ref_hook_state = parentParseState->p_ref_hook_state;
60 : /* query environment stays in context for the whole parse analysis */
61 89516 : pstate->p_queryEnv = parentParseState->p_queryEnv;
62 : }
63 :
64 1395578 : return pstate;
65 : }
66 :
67 : /*
68 : * free_parsestate
69 : * Release a ParseState and any subsidiary resources.
70 : */
71 : void
72 1276204 : free_parsestate(ParseState *pstate)
73 : {
74 : /*
75 : * Check that we did not produce too many resnos; at the very least we
76 : * cannot allow more than 2^16, since that would exceed the range of a
77 : * AttrNumber. It seems safest to use MaxTupleAttributeNumber.
78 : */
79 1276204 : if (pstate->p_next_resno - 1 > MaxTupleAttributeNumber)
80 0 : ereport(ERROR,
81 : (errcode(ERRCODE_TOO_MANY_COLUMNS),
82 : errmsg("target lists can have at most %d entries",
83 : MaxTupleAttributeNumber)));
84 :
85 1276204 : if (pstate->p_target_relation != NULL)
86 90858 : table_close(pstate->p_target_relation, NoLock);
87 :
88 1276204 : pfree(pstate);
89 1276204 : }
90 :
91 :
92 : /*
93 : * parser_errposition
94 : * Report a parse-analysis-time cursor position, if possible.
95 : *
96 : * This is expected to be used within an ereport() call. The return value
97 : * is a dummy (always 0, in fact).
98 : *
99 : * The locations stored in raw parsetrees are byte offsets into the source
100 : * string. We have to convert them to 1-based character indexes for reporting
101 : * to clients. (We do things this way to avoid unnecessary overhead in the
102 : * normal non-error case: computing character indexes would be much more
103 : * expensive than storing token offsets.)
104 : */
105 : int
106 9022 : parser_errposition(ParseState *pstate, int location)
107 : {
108 : int pos;
109 :
110 : /* No-op if location was not provided */
111 9022 : if (location < 0)
112 82 : return 0;
113 : /* Can't do anything if source text is not available */
114 8940 : if (pstate == NULL || pstate->p_sourcetext == NULL)
115 144 : return 0;
116 : /* Convert offset to character number */
117 8796 : pos = pg_mbstrlen_with_len(pstate->p_sourcetext, location) + 1;
118 : /* And pass it to the ereport mechanism */
119 8796 : return errposition(pos);
120 : }
121 :
122 :
123 : /*
124 : * setup_parser_errposition_callback
125 : * Arrange for non-parser errors to report an error position
126 : *
127 : * Sometimes the parser calls functions that aren't part of the parser
128 : * subsystem and can't reasonably be passed a ParseState; yet we would
129 : * like any errors thrown in those functions to be tagged with a parse
130 : * error location. Use this function to set up an error context stack
131 : * entry that will accomplish that. Usage pattern:
132 : *
133 : * declare a local variable "ParseCallbackState pcbstate"
134 : * ...
135 : * setup_parser_errposition_callback(&pcbstate, pstate, location);
136 : * call function that might throw error;
137 : * cancel_parser_errposition_callback(&pcbstate);
138 : */
139 : void
140 1935884 : setup_parser_errposition_callback(ParseCallbackState *pcbstate,
141 : ParseState *pstate, int location)
142 : {
143 : /* Setup error traceback support for ereport() */
144 1935884 : pcbstate->pstate = pstate;
145 1935884 : pcbstate->location = location;
146 1935884 : pcbstate->errcallback.callback = pcb_error_callback;
147 1935884 : pcbstate->errcallback.arg = pcbstate;
148 1935884 : pcbstate->errcallback.previous = error_context_stack;
149 1935884 : error_context_stack = &pcbstate->errcallback;
150 1935884 : }
151 :
152 : /*
153 : * Cancel a previously-set-up errposition callback.
154 : */
155 : void
156 1931018 : cancel_parser_errposition_callback(ParseCallbackState *pcbstate)
157 : {
158 : /* Pop the error context stack */
159 1931018 : error_context_stack = pcbstate->errcallback.previous;
160 1931018 : }
161 :
162 : /*
163 : * Error context callback for inserting parser error location.
164 : *
165 : * Note that this will be called for *any* error occurring while the
166 : * callback is installed. We avoid inserting an irrelevant error location
167 : * if the error is a query cancel --- are there any other important cases?
168 : */
169 : static void
170 4886 : pcb_error_callback(void *arg)
171 : {
172 4886 : ParseCallbackState *pcbstate = (ParseCallbackState *) arg;
173 :
174 4886 : if (geterrcode() != ERRCODE_QUERY_CANCELED)
175 4886 : (void) parser_errposition(pcbstate->pstate, pcbstate->location);
176 4886 : }
177 :
178 :
179 : /*
180 : * transformContainerType()
181 : * Identify the actual container type for a subscripting operation.
182 : *
183 : * containerType/containerTypmod are modified if necessary to identify
184 : * the actual container type and typmod. This mainly involves smashing
185 : * any domain to its base type, but there are some special considerations.
186 : * Note that caller still needs to check if the result type is a container.
187 : */
188 : void
189 12278 : transformContainerType(Oid *containerType, int32 *containerTypmod)
190 : {
191 : /*
192 : * If the input is a domain, smash to base type, and extract the actual
193 : * typmod to be applied to the base type. Subscripting a domain is an
194 : * operation that necessarily works on the base container type, not the
195 : * domain itself. (Note that we provide no method whereby the creator of a
196 : * domain over a container type could hide its ability to be subscripted.)
197 : */
198 12278 : *containerType = getBaseTypeAndTypmod(*containerType, containerTypmod);
199 :
200 : /*
201 : * We treat int2vector and oidvector as though they were domains over
202 : * int2[] and oid[]. This is needed because array slicing could create an
203 : * array that doesn't satisfy the dimensionality constraints of the
204 : * xxxvector type; so we want the result of a slice operation to be
205 : * considered to be of the more general type.
206 : */
207 12278 : if (*containerType == INT2VECTOROID)
208 3412 : *containerType = INT2ARRAYOID;
209 8866 : else if (*containerType == OIDVECTOROID)
210 822 : *containerType = OIDARRAYOID;
211 12278 : }
212 :
213 : /*
214 : * transformContainerSubscripts()
215 : * Transform container (array, etc) subscripting. This is used for both
216 : * container fetch and container assignment.
217 : *
218 : * In a container fetch, we are given a source container value and we produce
219 : * an expression that represents the result of extracting a single container
220 : * element or a container slice.
221 : *
222 : * Container assignments are treated basically the same as container fetches
223 : * here. The caller will modify the result node to insert the source value
224 : * that is to be assigned to the element or slice that a fetch would have
225 : * retrieved. The execution result will be a new container value with
226 : * the source value inserted into the right part of the container.
227 : *
228 : * For both cases, if the source is of a domain-over-container type, the
229 : * result is the same as if it had been of the container type; essentially,
230 : * we must fold a domain to its base type before applying subscripting.
231 : * (Note that int2vector and oidvector are treated as domains here.)
232 : *
233 : * pstate Parse state
234 : * containerBase Already-transformed expression for the container as a whole
235 : * containerType OID of container's datatype (should match type of
236 : * containerBase, or be the base type of containerBase's
237 : * domain type)
238 : * containerTypMod typmod for the container
239 : * indirection Untransformed list of subscripts (must not be NIL)
240 : * isAssignment True if this will become a container assignment.
241 : */
242 : SubscriptingRef *
243 12278 : transformContainerSubscripts(ParseState *pstate,
244 : Node *containerBase,
245 : Oid containerType,
246 : int32 containerTypMod,
247 : List *indirection,
248 : bool isAssignment)
249 : {
250 : SubscriptingRef *sbsref;
251 : const SubscriptRoutines *sbsroutines;
252 : Oid elementType;
253 12278 : bool isSlice = false;
254 : ListCell *idx;
255 :
256 : /*
257 : * Determine the actual container type, smashing any domain. In the
258 : * assignment case the caller already did this, since it also needs to
259 : * know the actual container type.
260 : */
261 12278 : if (!isAssignment)
262 10498 : transformContainerType(&containerType, &containerTypMod);
263 :
264 : /*
265 : * Verify that the container type is subscriptable, and get its support
266 : * functions and typelem.
267 : */
268 12278 : sbsroutines = getSubscriptingRoutines(containerType, &elementType);
269 12278 : if (!sbsroutines)
270 10 : ereport(ERROR,
271 : (errcode(ERRCODE_DATATYPE_MISMATCH),
272 : errmsg("cannot subscript type %s because it does not support subscripting",
273 : format_type_be(containerType)),
274 : parser_errposition(pstate, exprLocation(containerBase))));
275 :
276 : /*
277 : * Detect whether any of the indirection items are slice specifiers.
278 : *
279 : * A list containing only simple subscripts refers to a single container
280 : * element. If any of the items are slice specifiers (lower:upper), then
281 : * the subscript expression means a container slice operation.
282 : */
283 24554 : foreach(idx, indirection)
284 : {
285 12746 : A_Indices *ai = lfirst_node(A_Indices, idx);
286 :
287 12746 : if (ai->is_slice)
288 : {
289 460 : isSlice = true;
290 460 : break;
291 : }
292 : }
293 :
294 : /*
295 : * Ready to build the SubscriptingRef node.
296 : */
297 12268 : sbsref = makeNode(SubscriptingRef);
298 :
299 12268 : sbsref->refcontainertype = containerType;
300 12268 : sbsref->refelemtype = elementType;
301 : /* refrestype is to be set by container-specific logic */
302 12268 : sbsref->reftypmod = containerTypMod;
303 : /* refcollid will be set by parse_collate.c */
304 : /* refupperindexpr, reflowerindexpr are to be set by container logic */
305 12268 : sbsref->refexpr = (Expr *) containerBase;
306 12268 : sbsref->refassgnexpr = NULL; /* caller will fill if it's an assignment */
307 :
308 : /*
309 : * Call the container-type-specific logic to transform the subscripts and
310 : * determine the subscripting result type.
311 : */
312 12268 : sbsroutines->transform(sbsref, indirection, pstate,
313 : isSlice, isAssignment);
314 :
315 : /*
316 : * Verify we got a valid type (this defends, for example, against someone
317 : * using array_subscript_handler as typsubscript without setting typelem).
318 : */
319 12222 : if (!OidIsValid(sbsref->refrestype))
320 0 : ereport(ERROR,
321 : (errcode(ERRCODE_DATATYPE_MISMATCH),
322 : errmsg("cannot subscript type %s because it does not support subscripting",
323 : format_type_be(containerType))));
324 :
325 12222 : return sbsref;
326 : }
327 :
328 : /*
329 : * make_const
330 : *
331 : * Convert an A_Const node (as returned by the grammar) to a Const node
332 : * of the "natural" type for the constant. Note that this routine is
333 : * only used when there is no explicit cast for the constant, so we
334 : * have to guess what type is wanted.
335 : *
336 : * For string literals we produce a constant of type UNKNOWN ---- whose
337 : * representation is the same as cstring, but it indicates to later type
338 : * resolution that we're not sure yet what type it should be considered.
339 : * Explicit "NULL" constants are also typed as UNKNOWN.
340 : *
341 : * For integers and floats we produce int4, int8, or numeric depending
342 : * on the value of the number. XXX We should produce int2 as well,
343 : * but additional cleanup is needed before we can do that; there are
344 : * too many examples that fail if we try.
345 : */
346 : Const *
347 1122696 : make_const(ParseState *pstate, A_Const *aconst)
348 : {
349 : Const *con;
350 : Datum val;
351 : Oid typeid;
352 : int typelen;
353 : bool typebyval;
354 : ParseCallbackState pcbstate;
355 :
356 1122696 : if (aconst->isnull)
357 : {
358 : /* return a null const */
359 69774 : con = makeConst(UNKNOWNOID,
360 : -1,
361 : InvalidOid,
362 : -2,
363 : (Datum) 0,
364 : true,
365 : false);
366 69774 : con->location = aconst->location;
367 69774 : return con;
368 : }
369 :
370 1052922 : switch (nodeTag(&aconst->val))
371 : {
372 381814 : case T_Integer:
373 381814 : val = Int32GetDatum(intVal(&aconst->val));
374 :
375 381814 : typeid = INT4OID;
376 381814 : typelen = sizeof(int32);
377 381814 : typebyval = true;
378 381814 : break;
379 :
380 11692 : case T_Float:
381 : {
382 : /* could be an oversize integer as well as a float ... */
383 :
384 11692 : ErrorSaveContext escontext = {T_ErrorSaveContext};
385 : int64 val64;
386 :
387 11692 : val64 = pg_strtoint64_safe(aconst->val.fval.fval, (Node *) &escontext);
388 11692 : if (!escontext.error_occurred)
389 : {
390 : /*
391 : * It might actually fit in int32. Probably only INT_MIN
392 : * can occur, but we'll code the test generally just to be
393 : * sure.
394 : */
395 1102 : int32 val32 = (int32) val64;
396 :
397 1102 : if (val64 == (int64) val32)
398 : {
399 174 : val = Int32GetDatum(val32);
400 :
401 174 : typeid = INT4OID;
402 174 : typelen = sizeof(int32);
403 174 : typebyval = true;
404 : }
405 : else
406 : {
407 928 : val = Int64GetDatum(val64);
408 :
409 928 : typeid = INT8OID;
410 928 : typelen = sizeof(int64);
411 928 : typebyval = FLOAT8PASSBYVAL; /* int8 and float8 alike */
412 : }
413 : }
414 : else
415 : {
416 : /* arrange to report location if numeric_in() fails */
417 10590 : setup_parser_errposition_callback(&pcbstate, pstate, aconst->location);
418 10590 : val = DirectFunctionCall3(numeric_in,
419 : CStringGetDatum(aconst->val.fval.fval),
420 : ObjectIdGetDatum(InvalidOid),
421 : Int32GetDatum(-1));
422 10590 : cancel_parser_errposition_callback(&pcbstate);
423 :
424 10590 : typeid = NUMERICOID;
425 10590 : typelen = -1; /* variable len */
426 10590 : typebyval = false;
427 : }
428 11692 : break;
429 : }
430 :
431 56712 : case T_Boolean:
432 56712 : val = BoolGetDatum(boolVal(&aconst->val));
433 :
434 56712 : typeid = BOOLOID;
435 56712 : typelen = 1;
436 56712 : typebyval = true;
437 56712 : break;
438 :
439 598648 : case T_String:
440 :
441 : /*
442 : * We assume here that UNKNOWN's internal representation is the
443 : * same as CSTRING
444 : */
445 598648 : val = CStringGetDatum(strVal(&aconst->val));
446 :
447 598648 : typeid = UNKNOWNOID; /* will be coerced later */
448 598648 : typelen = -2; /* cstring-style varwidth type */
449 598648 : typebyval = false;
450 598648 : break;
451 :
452 4056 : case T_BitString:
453 : /* arrange to report location if bit_in() fails */
454 4056 : setup_parser_errposition_callback(&pcbstate, pstate, aconst->location);
455 4056 : val = DirectFunctionCall3(bit_in,
456 : CStringGetDatum(aconst->val.bsval.bsval),
457 : ObjectIdGetDatum(InvalidOid),
458 : Int32GetDatum(-1));
459 4032 : cancel_parser_errposition_callback(&pcbstate);
460 4032 : typeid = BITOID;
461 4032 : typelen = -1;
462 4032 : typebyval = false;
463 4032 : break;
464 :
465 0 : default:
466 0 : elog(ERROR, "unrecognized node type: %d", (int) nodeTag(&aconst->val));
467 : return NULL; /* keep compiler quiet */
468 : }
469 :
470 1052898 : con = makeConst(typeid,
471 : -1, /* typmod -1 is OK for all cases */
472 : InvalidOid, /* all cases are uncollatable types */
473 : typelen,
474 : val,
475 : false,
476 : typebyval);
477 1052898 : con->location = aconst->location;
478 :
479 1052898 : return con;
480 : }
|