Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * createas.c
4 : * Execution of CREATE TABLE ... AS, a/k/a SELECT INTO.
5 : * Since CREATE MATERIALIZED VIEW shares syntax and most behaviors,
6 : * we implement that here, too.
7 : *
8 : * We implement this by diverting the query's normal output to a
9 : * specialized DestReceiver type.
10 : *
11 : * Formerly, CTAS was implemented as a variant of SELECT, which led
12 : * to assorted legacy behaviors that we still try to preserve, notably that
13 : * we must return a tuples-processed count in the QueryCompletion. (We no
14 : * longer do that for CTAS ... WITH NO DATA, however.)
15 : *
16 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
17 : * Portions Copyright (c) 1994, Regents of the University of California
18 : *
19 : *
20 : * IDENTIFICATION
21 : * src/backend/commands/createas.c
22 : *
23 : *-------------------------------------------------------------------------
24 : */
25 : #include "postgres.h"
26 :
27 : #include "access/heapam.h"
28 : #include "access/reloptions.h"
29 : #include "access/tableam.h"
30 : #include "access/xact.h"
31 : #include "catalog/namespace.h"
32 : #include "catalog/toasting.h"
33 : #include "commands/createas.h"
34 : #include "commands/matview.h"
35 : #include "commands/prepare.h"
36 : #include "commands/tablecmds.h"
37 : #include "commands/view.h"
38 : #include "executor/execdesc.h"
39 : #include "executor/executor.h"
40 : #include "nodes/makefuncs.h"
41 : #include "nodes/nodeFuncs.h"
42 : #include "nodes/queryjumble.h"
43 : #include "parser/analyze.h"
44 : #include "rewrite/rewriteHandler.h"
45 : #include "tcop/tcopprot.h"
46 : #include "utils/builtins.h"
47 : #include "utils/lsyscache.h"
48 : #include "utils/rls.h"
49 : #include "utils/snapmgr.h"
50 :
51 : typedef struct
52 : {
53 : DestReceiver pub; /* publicly-known function pointers */
54 : IntoClause *into; /* target relation specification */
55 : /* These fields are filled by intorel_startup: */
56 : Relation rel; /* relation to write to */
57 : ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */
58 : CommandId output_cid; /* cmin to insert in output tuples */
59 : int ti_options; /* table_tuple_insert performance options */
60 : BulkInsertState bistate; /* bulk insert state */
61 : } DR_intorel;
62 :
63 : /* utility functions for CTAS definition creation */
64 : static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into);
65 : static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into);
66 :
67 : /* DestReceiver routines for collecting data */
68 : static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
69 : static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self);
70 : static void intorel_shutdown(DestReceiver *self);
71 : static void intorel_destroy(DestReceiver *self);
72 :
73 :
74 : /*
75 : * create_ctas_internal
76 : *
77 : * Internal utility used for the creation of the definition of a relation
78 : * created via CREATE TABLE AS or a materialized view. Caller needs to
79 : * provide a list of attributes (ColumnDef nodes).
80 : */
81 : static ObjectAddress
82 1686 : create_ctas_internal(List *attrList, IntoClause *into)
83 : {
84 1686 : CreateStmt *create = makeNode(CreateStmt);
85 : bool is_matview;
86 : char relkind;
87 : Datum toast_options;
88 1686 : const char *const validnsps[] = HEAP_RELOPT_NAMESPACES;
89 : ObjectAddress intoRelationAddr;
90 :
91 : /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
92 1686 : is_matview = (into->viewQuery != NULL);
93 1686 : relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;
94 :
95 : /*
96 : * Create the target relation by faking up a CREATE TABLE parsetree and
97 : * passing it to DefineRelation.
98 : */
99 1686 : create->relation = into->rel;
100 1686 : create->tableElts = attrList;
101 1686 : create->inhRelations = NIL;
102 1686 : create->ofTypename = NULL;
103 1686 : create->constraints = NIL;
104 1686 : create->options = into->options;
105 1686 : create->oncommit = into->onCommit;
106 1686 : create->tablespacename = into->tableSpaceName;
107 1686 : create->if_not_exists = false;
108 1686 : create->accessMethod = into->accessMethod;
109 :
110 : /*
111 : * Create the relation. (This will error out if there's an existing view,
112 : * so we don't need more code to complain if "replace" is false.)
113 : */
114 1686 : intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
115 :
116 : /*
117 : * If necessary, create a TOAST table for the target table. Note that
118 : * NewRelationCreateToastTable ends with CommandCounterIncrement(), so
119 : * that the TOAST table will be visible for insertion.
120 : */
121 1668 : CommandCounterIncrement();
122 :
123 : /* parse and validate reloptions for the toast table */
124 1668 : toast_options = transformRelOptions((Datum) 0,
125 : create->options,
126 : "toast",
127 : validnsps,
128 : true, false);
129 :
130 1668 : (void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true);
131 :
132 1668 : NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options);
133 :
134 : /* Create the "view" part of a materialized view. */
135 1668 : if (is_matview)
136 : {
137 : /* StoreViewQuery scribbles on tree, so make a copy */
138 456 : Query *query = copyObject(into->viewQuery);
139 :
140 456 : StoreViewQuery(intoRelationAddr.objectId, query, false);
141 450 : CommandCounterIncrement();
142 : }
143 :
144 1662 : return intoRelationAddr;
145 : }
146 :
147 :
148 : /*
149 : * create_ctas_nodata
150 : *
151 : * Create CTAS or materialized view when WITH NO DATA is used, starting from
152 : * the targetlist of the SELECT or view definition.
153 : */
154 : static ObjectAddress
155 484 : create_ctas_nodata(List *tlist, IntoClause *into)
156 : {
157 : List *attrList;
158 : ListCell *t,
159 : *lc;
160 :
161 : /*
162 : * Build list of ColumnDefs from non-junk elements of the tlist. If a
163 : * column name list was specified in CREATE TABLE AS, override the column
164 : * names in the query. (Too few column names are OK, too many are not.)
165 : */
166 484 : attrList = NIL;
167 484 : lc = list_head(into->colNames);
168 1538 : foreach(t, tlist)
169 : {
170 1054 : TargetEntry *tle = (TargetEntry *) lfirst(t);
171 :
172 1054 : if (!tle->resjunk)
173 : {
174 : ColumnDef *col;
175 : char *colname;
176 :
177 1054 : if (lc)
178 : {
179 124 : colname = strVal(lfirst(lc));
180 124 : lc = lnext(into->colNames, lc);
181 : }
182 : else
183 930 : colname = tle->resname;
184 :
185 1054 : col = makeColumnDef(colname,
186 1054 : exprType((Node *) tle->expr),
187 1054 : exprTypmod((Node *) tle->expr),
188 1054 : exprCollation((Node *) tle->expr));
189 :
190 : /*
191 : * It's possible that the column is of a collatable type but the
192 : * collation could not be resolved, so double-check. (We must
193 : * check this here because DefineRelation would adopt the type's
194 : * default collation rather than complaining.)
195 : */
196 1924 : if (!OidIsValid(col->collOid) &&
197 870 : type_is_collatable(col->typeName->typeOid))
198 0 : ereport(ERROR,
199 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
200 : errmsg("no collation was derived for column \"%s\" with collatable type %s",
201 : col->colname,
202 : format_type_be(col->typeName->typeOid)),
203 : errhint("Use the COLLATE clause to set the collation explicitly.")));
204 :
205 1054 : attrList = lappend(attrList, col);
206 : }
207 : }
208 :
209 484 : if (lc != NULL)
210 18 : ereport(ERROR,
211 : (errcode(ERRCODE_SYNTAX_ERROR),
212 : errmsg("too many column names were specified")));
213 :
214 : /* Create the relation definition using the ColumnDef list */
215 466 : return create_ctas_internal(attrList, into);
216 : }
217 :
218 :
219 : /*
220 : * ExecCreateTableAs -- execute a CREATE TABLE AS command
221 : */
222 : ObjectAddress
223 1760 : ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt,
224 : ParamListInfo params, QueryEnvironment *queryEnv,
225 : QueryCompletion *qc)
226 : {
227 1760 : Query *query = castNode(Query, stmt->query);
228 1760 : IntoClause *into = stmt->into;
229 1760 : JumbleState *jstate = NULL;
230 1760 : bool is_matview = (into->viewQuery != NULL);
231 1760 : bool do_refresh = false;
232 : DestReceiver *dest;
233 : ObjectAddress address;
234 :
235 : /* Check if the relation exists or not */
236 1760 : if (CreateTableAsRelExists(stmt))
237 46 : return InvalidObjectAddress;
238 :
239 : /*
240 : * Create the tuple receiver object and insert info it will need
241 : */
242 1668 : dest = CreateIntoRelDestReceiver(into);
243 :
244 : /* Query contained by CTAS needs to be jumbled if requested */
245 1668 : if (IsQueryIdEnabled())
246 436 : jstate = JumbleQuery(query);
247 :
248 1668 : if (post_parse_analyze_hook)
249 436 : (*post_parse_analyze_hook) (pstate, query, jstate);
250 :
251 : /*
252 : * The contained Query could be a SELECT, or an EXECUTE utility command.
253 : * If the latter, we just pass it off to ExecuteQuery.
254 : */
255 1668 : if (query->commandType == CMD_UTILITY &&
256 46 : IsA(query->utilityStmt, ExecuteStmt))
257 : {
258 46 : ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt);
259 :
260 : Assert(!is_matview); /* excluded by syntax */
261 46 : ExecuteQuery(pstate, estmt, into, params, dest, qc);
262 :
263 : /* get object address that intorel_startup saved for us */
264 46 : address = ((DR_intorel *) dest)->reladdr;
265 :
266 46 : return address;
267 : }
268 : Assert(query->commandType == CMD_SELECT);
269 :
270 : /*
271 : * For materialized views, always skip data during table creation, and use
272 : * REFRESH instead (see below).
273 : */
274 1622 : if (is_matview)
275 : {
276 456 : do_refresh = !into->skipData;
277 456 : into->skipData = true;
278 : }
279 :
280 1622 : if (into->skipData)
281 : {
282 : /*
283 : * If WITH NO DATA was specified, do not go through the rewriter,
284 : * planner and executor. Just define the relation using a code path
285 : * similar to CREATE VIEW. This avoids dump/restore problems stemming
286 : * from running the planner before all dependencies are set up.
287 : */
288 484 : address = create_ctas_nodata(query->targetList, into);
289 :
290 : /*
291 : * For materialized views, reuse the REFRESH logic, which locks down
292 : * security-restricted operations and restricts the search_path. This
293 : * reduces the chance that a subsequent refresh will fail.
294 : */
295 460 : if (do_refresh)
296 362 : RefreshMatViewByOid(address.objectId, true, false, false,
297 : pstate->p_sourcetext, qc);
298 :
299 : }
300 : else
301 : {
302 : List *rewritten;
303 : PlannedStmt *plan;
304 : QueryDesc *queryDesc;
305 :
306 : Assert(!is_matview);
307 :
308 : /*
309 : * Parse analysis was done already, but we still have to run the rule
310 : * rewriter. We do not do AcquireRewriteLocks: we assume the query
311 : * either came straight from the parser, or suitable locks were
312 : * acquired by plancache.c.
313 : */
314 1138 : rewritten = QueryRewrite(query);
315 :
316 : /* SELECT should never rewrite to more or less than one SELECT query */
317 1138 : if (list_length(rewritten) != 1)
318 0 : elog(ERROR, "unexpected rewrite result for CREATE TABLE AS SELECT");
319 1138 : query = linitial_node(Query, rewritten);
320 : Assert(query->commandType == CMD_SELECT);
321 :
322 : /* plan the query */
323 1138 : plan = pg_plan_query(query, pstate->p_sourcetext,
324 : CURSOR_OPT_PARALLEL_OK, params);
325 :
326 : /*
327 : * Use a snapshot with an updated command ID to ensure this query sees
328 : * results of any previously executed queries. (This could only
329 : * matter if the planner executed an allegedly-stable function that
330 : * changed the database contents, but let's do it anyway to be
331 : * parallel to the EXPLAIN code path.)
332 : */
333 1138 : PushCopiedSnapshot(GetActiveSnapshot());
334 1138 : UpdateActiveSnapshotCommandId();
335 :
336 : /* Create a QueryDesc, redirecting output to our tuple receiver */
337 1138 : queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext,
338 : GetActiveSnapshot(), InvalidSnapshot,
339 : dest, params, queryEnv, 0);
340 :
341 : /* call ExecutorStart to prepare the plan for execution */
342 1138 : if (!ExecutorStart(queryDesc, GetIntoRelEFlags(into)))
343 0 : elog(ERROR, "ExecutorStart() failed unexpectedly");
344 :
345 : /* run the plan to completion */
346 1138 : ExecutorRun(queryDesc, ForwardScanDirection, 0);
347 :
348 : /* save the rowcount if we're given a qc to fill */
349 1102 : if (qc)
350 1094 : SetQueryCompletion(qc, CMDTAG_SELECT, queryDesc->estate->es_processed);
351 :
352 : /* get object address that intorel_startup saved for us */
353 1102 : address = ((DR_intorel *) dest)->reladdr;
354 :
355 : /* and clean up */
356 1102 : ExecutorFinish(queryDesc);
357 1102 : ExecutorEnd(queryDesc);
358 :
359 1102 : FreeQueryDesc(queryDesc);
360 :
361 1102 : PopActiveSnapshot();
362 : }
363 :
364 1556 : return address;
365 : }
366 :
367 : /*
368 : * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS
369 : *
370 : * This is exported because EXPLAIN and PREPARE need it too. (Note: those
371 : * callers still need to deal explicitly with the skipData flag; since they
372 : * use different methods for suppressing execution, it doesn't seem worth
373 : * trying to encapsulate that part.)
374 : */
375 : int
376 1290 : GetIntoRelEFlags(IntoClause *intoClause)
377 : {
378 1290 : int flags = 0;
379 :
380 1290 : if (intoClause->skipData)
381 42 : flags |= EXEC_FLAG_WITH_NO_DATA;
382 :
383 1290 : return flags;
384 : }
385 :
386 : /*
387 : * CreateTableAsRelExists --- check existence of relation for CreateTableAsStmt
388 : *
389 : * Utility wrapper checking if the relation pending for creation in this
390 : * CreateTableAsStmt query already exists or not. Returns true if the
391 : * relation exists, otherwise false.
392 : */
393 : bool
394 1926 : CreateTableAsRelExists(CreateTableAsStmt *ctas)
395 : {
396 : Oid nspid;
397 : Oid oldrelid;
398 : ObjectAddress address;
399 1926 : IntoClause *into = ctas->into;
400 :
401 1926 : nspid = RangeVarGetCreationNamespace(into->rel);
402 :
403 1926 : oldrelid = get_relname_relid(into->rel->relname, nspid);
404 1926 : if (OidIsValid(oldrelid))
405 : {
406 152 : if (!ctas->if_not_exists)
407 72 : ereport(ERROR,
408 : (errcode(ERRCODE_DUPLICATE_TABLE),
409 : errmsg("relation \"%s\" already exists",
410 : into->rel->relname)));
411 :
412 : /*
413 : * The relation exists and IF NOT EXISTS has been specified.
414 : *
415 : * If we are in an extension script, insist that the pre-existing
416 : * object be a member of the extension, to avoid security risks.
417 : */
418 80 : ObjectAddressSet(address, RelationRelationId, oldrelid);
419 80 : checkMembershipInCurrentExtension(&address);
420 :
421 : /* OK to skip */
422 76 : ereport(NOTICE,
423 : (errcode(ERRCODE_DUPLICATE_TABLE),
424 : errmsg("relation \"%s\" already exists, skipping",
425 : into->rel->relname)));
426 76 : return true;
427 : }
428 :
429 : /* Relation does not exist, it can be created */
430 1774 : return false;
431 : }
432 :
433 : /*
434 : * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
435 : *
436 : * intoClause will be NULL if called from CreateDestReceiver(), in which
437 : * case it has to be provided later. However, it is convenient to allow
438 : * self->into to be filled in immediately for other callers.
439 : */
440 : DestReceiver *
441 1774 : CreateIntoRelDestReceiver(IntoClause *intoClause)
442 : {
443 1774 : DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
444 :
445 1774 : self->pub.receiveSlot = intorel_receive;
446 1774 : self->pub.rStartup = intorel_startup;
447 1774 : self->pub.rShutdown = intorel_shutdown;
448 1774 : self->pub.rDestroy = intorel_destroy;
449 1774 : self->pub.mydest = DestIntoRel;
450 1774 : self->into = intoClause;
451 : /* other private fields will be set during intorel_startup */
452 :
453 1774 : return (DestReceiver *) self;
454 : }
455 :
456 : /*
457 : * intorel_startup --- executor startup
458 : */
459 : static void
460 1238 : intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
461 : {
462 1238 : DR_intorel *myState = (DR_intorel *) self;
463 1238 : IntoClause *into = myState->into;
464 : bool is_matview;
465 : List *attrList;
466 : ObjectAddress intoRelationAddr;
467 : Relation intoRelationDesc;
468 : ListCell *lc;
469 : int attnum;
470 :
471 : Assert(into != NULL); /* else somebody forgot to set it */
472 :
473 : /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
474 1238 : is_matview = (into->viewQuery != NULL);
475 :
476 : /*
477 : * Build column definitions using "pre-cooked" type and collation info. If
478 : * a column name list was specified in CREATE TABLE AS, override the
479 : * column names derived from the query. (Too few column names are OK, too
480 : * many are not.)
481 : */
482 1238 : attrList = NIL;
483 1238 : lc = list_head(into->colNames);
484 4970 : for (attnum = 0; attnum < typeinfo->natts; attnum++)
485 : {
486 3744 : Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum);
487 : ColumnDef *col;
488 : char *colname;
489 :
490 3744 : if (lc)
491 : {
492 216 : colname = strVal(lfirst(lc));
493 216 : lc = lnext(into->colNames, lc);
494 : }
495 : else
496 3528 : colname = NameStr(attribute->attname);
497 :
498 3744 : col = makeColumnDef(colname,
499 : attribute->atttypid,
500 : attribute->atttypmod,
501 : attribute->attcollation);
502 :
503 : /*
504 : * It's possible that the column is of a collatable type but the
505 : * collation could not be resolved, so double-check. (We must check
506 : * this here because DefineRelation would adopt the type's default
507 : * collation rather than complaining.)
508 : */
509 6956 : if (!OidIsValid(col->collOid) &&
510 3212 : type_is_collatable(col->typeName->typeOid))
511 12 : ereport(ERROR,
512 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
513 : errmsg("no collation was derived for column \"%s\" with collatable type %s",
514 : col->colname,
515 : format_type_be(col->typeName->typeOid)),
516 : errhint("Use the COLLATE clause to set the collation explicitly.")));
517 :
518 3732 : attrList = lappend(attrList, col);
519 : }
520 :
521 1226 : if (lc != NULL)
522 6 : ereport(ERROR,
523 : (errcode(ERRCODE_SYNTAX_ERROR),
524 : errmsg("too many column names were specified")));
525 :
526 : /*
527 : * Actually create the target table
528 : */
529 1220 : intoRelationAddr = create_ctas_internal(attrList, into);
530 :
531 : /*
532 : * Finally we can open the target table
533 : */
534 1202 : intoRelationDesc = table_open(intoRelationAddr.objectId, AccessExclusiveLock);
535 :
536 : /*
537 : * Make sure the constructed table does not have RLS enabled.
538 : *
539 : * check_enable_rls() will ereport(ERROR) itself if the user has requested
540 : * something invalid, and otherwise will return RLS_ENABLED if RLS should
541 : * be enabled here. We don't actually support that currently, so throw
542 : * our own ereport(ERROR) if that happens.
543 : */
544 1202 : if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED)
545 0 : ereport(ERROR,
546 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
547 : errmsg("policies not yet implemented for this command")));
548 :
549 : /*
550 : * Tentatively mark the target as populated, if it's a matview and we're
551 : * going to fill it; otherwise, no change needed.
552 : */
553 1202 : if (is_matview && !into->skipData)
554 6 : SetMatViewPopulatedState(intoRelationDesc, true);
555 :
556 : /*
557 : * Fill private fields of myState for use by later routines
558 : */
559 1202 : myState->rel = intoRelationDesc;
560 1202 : myState->reladdr = intoRelationAddr;
561 1202 : myState->output_cid = GetCurrentCommandId(true);
562 1202 : myState->ti_options = TABLE_INSERT_SKIP_FSM;
563 :
564 : /*
565 : * If WITH NO DATA is specified, there is no need to set up the state for
566 : * bulk inserts as there are no tuples to insert.
567 : */
568 1202 : if (!into->skipData)
569 1166 : myState->bistate = GetBulkInsertState();
570 : else
571 36 : myState->bistate = NULL;
572 :
573 : /*
574 : * Valid smgr_targblock implies something already wrote to the relation.
575 : * This may be harmless, but this function hasn't planned for it.
576 : */
577 : Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
578 1202 : }
579 :
580 : /*
581 : * intorel_receive --- receive one tuple
582 : */
583 : static bool
584 2259784 : intorel_receive(TupleTableSlot *slot, DestReceiver *self)
585 : {
586 2259784 : DR_intorel *myState = (DR_intorel *) self;
587 :
588 : /* Nothing to insert if WITH NO DATA is specified. */
589 2259784 : if (!myState->into->skipData)
590 : {
591 : /*
592 : * Note that the input slot might not be of the type of the target
593 : * relation. That's supported by table_tuple_insert(), but slightly
594 : * less efficient than inserting with the right slot - but the
595 : * alternative would be to copy into a slot of the right type, which
596 : * would not be cheap either. This also doesn't allow accessing per-AM
597 : * data (say a tuple's xmin), but since we don't do that here...
598 : */
599 2259784 : table_tuple_insert(myState->rel,
600 : slot,
601 : myState->output_cid,
602 : myState->ti_options,
603 : myState->bistate);
604 : }
605 :
606 : /* We know this is a newly created relation, so there are no indexes */
607 :
608 2259784 : return true;
609 : }
610 :
611 : /*
612 : * intorel_shutdown --- executor end
613 : */
614 : static void
615 1202 : intorel_shutdown(DestReceiver *self)
616 : {
617 1202 : DR_intorel *myState = (DR_intorel *) self;
618 1202 : IntoClause *into = myState->into;
619 :
620 1202 : if (!into->skipData)
621 : {
622 1166 : FreeBulkInsertState(myState->bistate);
623 1166 : table_finish_bulk_insert(myState->rel, myState->ti_options);
624 : }
625 :
626 : /* close rel, but keep lock until commit */
627 1202 : table_close(myState->rel, NoLock);
628 1202 : myState->rel = NULL;
629 1202 : }
630 :
631 : /*
632 : * intorel_destroy --- release DestReceiver object
633 : */
634 : static void
635 106 : intorel_destroy(DestReceiver *self)
636 : {
637 106 : pfree(self);
638 106 : }
|