Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * createas.c
4 : * Execution of CREATE TABLE ... AS, a/k/a SELECT INTO.
5 : * Since CREATE MATERIALIZED VIEW shares syntax and most behaviors,
6 : * we implement that here, too.
7 : *
8 : * We implement this by diverting the query's normal output to a
9 : * specialized DestReceiver type.
10 : *
11 : * Formerly, CTAS was implemented as a variant of SELECT, which led
12 : * to assorted legacy behaviors that we still try to preserve, notably that
13 : * we must return a tuples-processed count in the QueryCompletion. (We no
14 : * longer do that for CTAS ... WITH NO DATA, however.)
15 : *
16 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
17 : * Portions Copyright (c) 1994, Regents of the University of California
18 : *
19 : *
20 : * IDENTIFICATION
21 : * src/backend/commands/createas.c
22 : *
23 : *-------------------------------------------------------------------------
24 : */
25 : #include "postgres.h"
26 :
27 : #include "access/heapam.h"
28 : #include "access/htup_details.h"
29 : #include "access/reloptions.h"
30 : #include "access/sysattr.h"
31 : #include "access/tableam.h"
32 : #include "access/xact.h"
33 : #include "access/xlog.h"
34 : #include "catalog/namespace.h"
35 : #include "catalog/toasting.h"
36 : #include "commands/createas.h"
37 : #include "commands/matview.h"
38 : #include "commands/prepare.h"
39 : #include "commands/tablecmds.h"
40 : #include "commands/view.h"
41 : #include "miscadmin.h"
42 : #include "nodes/makefuncs.h"
43 : #include "nodes/nodeFuncs.h"
44 : #include "parser/parse_clause.h"
45 : #include "rewrite/rewriteHandler.h"
46 : #include "storage/smgr.h"
47 : #include "tcop/tcopprot.h"
48 : #include "utils/builtins.h"
49 : #include "utils/lsyscache.h"
50 : #include "utils/rel.h"
51 : #include "utils/rls.h"
52 : #include "utils/snapmgr.h"
53 :
54 : typedef struct
55 : {
56 : DestReceiver pub; /* publicly-known function pointers */
57 : IntoClause *into; /* target relation specification */
58 : /* These fields are filled by intorel_startup: */
59 : Relation rel; /* relation to write to */
60 : ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */
61 : CommandId output_cid; /* cmin to insert in output tuples */
62 : int ti_options; /* table_tuple_insert performance options */
63 : BulkInsertState bistate; /* bulk insert state */
64 : } DR_intorel;
65 :
66 : /* utility functions for CTAS definition creation */
67 : static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into);
68 : static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into);
69 :
70 : /* DestReceiver routines for collecting data */
71 : static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
72 : static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self);
73 : static void intorel_shutdown(DestReceiver *self);
74 : static void intorel_destroy(DestReceiver *self);
75 :
76 :
77 : /*
78 : * create_ctas_internal
79 : *
80 : * Internal utility used for the creation of the definition of a relation
81 : * created via CREATE TABLE AS or a materialized view. Caller needs to
82 : * provide a list of attributes (ColumnDef nodes).
83 : */
84 : static ObjectAddress
85 1610 : create_ctas_internal(List *attrList, IntoClause *into)
86 : {
87 1610 : CreateStmt *create = makeNode(CreateStmt);
88 : bool is_matview;
89 : char relkind;
90 : Datum toast_options;
91 : static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
92 : ObjectAddress intoRelationAddr;
93 :
94 : /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
95 1610 : is_matview = (into->viewQuery != NULL);
96 1610 : relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;
97 :
98 : /*
99 : * Create the target relation by faking up a CREATE TABLE parsetree and
100 : * passing it to DefineRelation.
101 : */
102 1610 : create->relation = into->rel;
103 1610 : create->tableElts = attrList;
104 1610 : create->inhRelations = NIL;
105 1610 : create->ofTypename = NULL;
106 1610 : create->constraints = NIL;
107 1610 : create->options = into->options;
108 1610 : create->oncommit = into->onCommit;
109 1610 : create->tablespacename = into->tableSpaceName;
110 1610 : create->if_not_exists = false;
111 1610 : create->accessMethod = into->accessMethod;
112 :
113 : /*
114 : * Create the relation. (This will error out if there's an existing view,
115 : * so we don't need more code to complain if "replace" is false.)
116 : */
117 1610 : intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
118 :
119 : /*
120 : * If necessary, create a TOAST table for the target table. Note that
121 : * NewRelationCreateToastTable ends with CommandCounterIncrement(), so
122 : * that the TOAST table will be visible for insertion.
123 : */
124 1592 : CommandCounterIncrement();
125 :
126 : /* parse and validate reloptions for the toast table */
127 1592 : toast_options = transformRelOptions((Datum) 0,
128 : create->options,
129 : "toast",
130 : validnsps,
131 : true, false);
132 :
133 1592 : (void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true);
134 :
135 1592 : NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options);
136 :
137 : /* Create the "view" part of a materialized view. */
138 1592 : if (is_matview)
139 : {
140 : /* StoreViewQuery scribbles on tree, so make a copy */
141 424 : Query *query = (Query *) copyObject(into->viewQuery);
142 :
143 424 : StoreViewQuery(intoRelationAddr.objectId, query, false);
144 424 : CommandCounterIncrement();
145 : }
146 :
147 1592 : return intoRelationAddr;
148 : }
149 :
150 :
151 : /*
152 : * create_ctas_nodata
153 : *
154 : * Create CTAS or materialized view when WITH NO DATA is used, starting from
155 : * the targetlist of the SELECT or view definition.
156 : */
157 : static ObjectAddress
158 110 : create_ctas_nodata(List *tlist, IntoClause *into)
159 : {
160 : List *attrList;
161 : ListCell *t,
162 : *lc;
163 :
164 : /*
165 : * Build list of ColumnDefs from non-junk elements of the tlist. If a
166 : * column name list was specified in CREATE TABLE AS, override the column
167 : * names in the query. (Too few column names are OK, too many are not.)
168 : */
169 110 : attrList = NIL;
170 110 : lc = list_head(into->colNames);
171 352 : foreach(t, tlist)
172 : {
173 242 : TargetEntry *tle = (TargetEntry *) lfirst(t);
174 :
175 242 : if (!tle->resjunk)
176 : {
177 : ColumnDef *col;
178 : char *colname;
179 :
180 242 : if (lc)
181 : {
182 76 : colname = strVal(lfirst(lc));
183 76 : lc = lnext(into->colNames, lc);
184 : }
185 : else
186 166 : colname = tle->resname;
187 :
188 242 : col = makeColumnDef(colname,
189 242 : exprType((Node *) tle->expr),
190 242 : exprTypmod((Node *) tle->expr),
191 242 : exprCollation((Node *) tle->expr));
192 :
193 : /*
194 : * It's possible that the column is of a collatable type but the
195 : * collation could not be resolved, so double-check. (We must
196 : * check this here because DefineRelation would adopt the type's
197 : * default collation rather than complaining.)
198 : */
199 464 : if (!OidIsValid(col->collOid) &&
200 222 : type_is_collatable(col->typeName->typeOid))
201 0 : ereport(ERROR,
202 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
203 : errmsg("no collation was derived for column \"%s\" with collatable type %s",
204 : col->colname,
205 : format_type_be(col->typeName->typeOid)),
206 : errhint("Use the COLLATE clause to set the collation explicitly.")));
207 :
208 242 : attrList = lappend(attrList, col);
209 : }
210 : }
211 :
212 110 : if (lc != NULL)
213 12 : ereport(ERROR,
214 : (errcode(ERRCODE_SYNTAX_ERROR),
215 : errmsg("too many column names were specified")));
216 :
217 : /* Create the relation definition using the ColumnDef list */
218 98 : return create_ctas_internal(attrList, into);
219 : }
220 :
221 :
222 : /*
223 : * ExecCreateTableAs -- execute a CREATE TABLE AS command
224 : */
225 : ObjectAddress
226 1696 : ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt,
227 : ParamListInfo params, QueryEnvironment *queryEnv,
228 : QueryCompletion *qc)
229 : {
230 1696 : Query *query = castNode(Query, stmt->query);
231 1696 : IntoClause *into = stmt->into;
232 1696 : bool is_matview = (into->viewQuery != NULL);
233 : DestReceiver *dest;
234 1696 : Oid save_userid = InvalidOid;
235 1696 : int save_sec_context = 0;
236 1696 : int save_nestlevel = 0;
237 : ObjectAddress address;
238 : List *rewritten;
239 : PlannedStmt *plan;
240 : QueryDesc *queryDesc;
241 :
242 : /* Check if the relation exists or not */
243 1696 : if (CreateTableAsRelExists(stmt))
244 46 : return InvalidObjectAddress;
245 :
246 : /*
247 : * Create the tuple receiver object and insert info it will need
248 : */
249 1604 : dest = CreateIntoRelDestReceiver(into);
250 :
251 : /*
252 : * The contained Query could be a SELECT, or an EXECUTE utility command.
253 : * If the latter, we just pass it off to ExecuteQuery.
254 : */
255 1604 : if (query->commandType == CMD_UTILITY &&
256 42 : IsA(query->utilityStmt, ExecuteStmt))
257 : {
258 42 : ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt);
259 :
260 : Assert(!is_matview); /* excluded by syntax */
261 42 : ExecuteQuery(pstate, estmt, into, params, dest, qc);
262 :
263 : /* get object address that intorel_startup saved for us */
264 42 : address = ((DR_intorel *) dest)->reladdr;
265 :
266 42 : return address;
267 : }
268 : Assert(query->commandType == CMD_SELECT);
269 :
270 : /*
271 : * For materialized views, lock down security-restricted operations and
272 : * arrange to make GUC variable changes local to this command. This is
273 : * not necessary for security, but this keeps the behavior similar to
274 : * REFRESH MATERIALIZED VIEW. Otherwise, one could create a materialized
275 : * view not possible to refresh.
276 : */
277 1562 : if (is_matview)
278 : {
279 430 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
280 430 : SetUserIdAndSecContext(save_userid,
281 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
282 430 : save_nestlevel = NewGUCNestLevel();
283 : }
284 :
285 1562 : if (into->skipData)
286 : {
287 : /*
288 : * If WITH NO DATA was specified, do not go through the rewriter,
289 : * planner and executor. Just define the relation using a code path
290 : * similar to CREATE VIEW. This avoids dump/restore problems stemming
291 : * from running the planner before all dependencies are set up.
292 : */
293 110 : address = create_ctas_nodata(query->targetList, into);
294 : }
295 : else
296 : {
297 : /*
298 : * Parse analysis was done already, but we still have to run the rule
299 : * rewriter. We do not do AcquireRewriteLocks: we assume the query
300 : * either came straight from the parser, or suitable locks were
301 : * acquired by plancache.c.
302 : */
303 1452 : rewritten = QueryRewrite(query);
304 :
305 : /* SELECT should never rewrite to more or less than one SELECT query */
306 1452 : if (list_length(rewritten) != 1)
307 0 : elog(ERROR, "unexpected rewrite result for %s",
308 : is_matview ? "CREATE MATERIALIZED VIEW" :
309 : "CREATE TABLE AS SELECT");
310 1452 : query = linitial_node(Query, rewritten);
311 : Assert(query->commandType == CMD_SELECT);
312 :
313 : /* plan the query */
314 1452 : plan = pg_plan_query(query, pstate->p_sourcetext,
315 : CURSOR_OPT_PARALLEL_OK, params);
316 :
317 : /*
318 : * Use a snapshot with an updated command ID to ensure this query sees
319 : * results of any previously executed queries. (This could only
320 : * matter if the planner executed an allegedly-stable function that
321 : * changed the database contents, but let's do it anyway to be
322 : * parallel to the EXPLAIN code path.)
323 : */
324 1446 : PushCopiedSnapshot(GetActiveSnapshot());
325 1446 : UpdateActiveSnapshotCommandId();
326 :
327 : /* Create a QueryDesc, redirecting output to our tuple receiver */
328 1446 : queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
329 : GetActiveSnapshot(), InvalidSnapshot,
330 : dest, params, queryEnv, 0);
331 :
332 : /* call ExecutorStart to prepare the plan for execution */
333 1446 : ExecutorStart(queryDesc, GetIntoRelEFlags(into));
334 :
335 : /* run the plan to completion */
336 1446 : ExecutorRun(queryDesc, ForwardScanDirection, 0, true);
337 :
338 : /* save the rowcount if we're given a qc to fill */
339 1404 : if (qc)
340 1384 : SetQueryCompletion(qc, CMDTAG_SELECT, queryDesc->estate->es_processed);
341 :
342 : /* get object address that intorel_startup saved for us */
343 1404 : address = ((DR_intorel *) dest)->reladdr;
344 :
345 : /* and clean up */
346 1404 : ExecutorFinish(queryDesc);
347 1404 : ExecutorEnd(queryDesc);
348 :
349 1404 : FreeQueryDesc(queryDesc);
350 :
351 1404 : PopActiveSnapshot();
352 : }
353 :
354 1502 : if (is_matview)
355 : {
356 : /* Roll back any GUC changes */
357 412 : AtEOXact_GUC(false, save_nestlevel);
358 :
359 : /* Restore userid and security context */
360 412 : SetUserIdAndSecContext(save_userid, save_sec_context);
361 : }
362 :
363 1502 : return address;
364 : }
365 :
366 : /*
367 : * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS
368 : *
369 : * This is exported because EXPLAIN and PREPARE need it too. (Note: those
370 : * callers still need to deal explicitly with the skipData flag; since they
371 : * use different methods for suppressing execution, it doesn't seem worth
372 : * trying to encapsulate that part.)
373 : */
374 : int
375 1578 : GetIntoRelEFlags(IntoClause *intoClause)
376 : {
377 1578 : int flags = 0;
378 :
379 1578 : if (intoClause->skipData)
380 42 : flags |= EXEC_FLAG_WITH_NO_DATA;
381 :
382 1578 : return flags;
383 : }
384 :
385 : /*
386 : * CreateTableAsRelExists --- check existence of relation for CreateTableAsStmt
387 : *
388 : * Utility wrapper checking if the relation pending for creation in this
389 : * CreateTableAsStmt query already exists or not. Returns true if the
390 : * relation exists, otherwise false.
391 : */
392 : bool
393 1846 : CreateTableAsRelExists(CreateTableAsStmt *ctas)
394 : {
395 : Oid nspid;
396 : Oid oldrelid;
397 : ObjectAddress address;
398 1846 : IntoClause *into = ctas->into;
399 :
400 1846 : nspid = RangeVarGetCreationNamespace(into->rel);
401 :
402 1846 : oldrelid = get_relname_relid(into->rel->relname, nspid);
403 1846 : if (OidIsValid(oldrelid))
404 : {
405 152 : if (!ctas->if_not_exists)
406 72 : ereport(ERROR,
407 : (errcode(ERRCODE_DUPLICATE_TABLE),
408 : errmsg("relation \"%s\" already exists",
409 : into->rel->relname)));
410 :
411 : /*
412 : * The relation exists and IF NOT EXISTS has been specified.
413 : *
414 : * If we are in an extension script, insist that the pre-existing
415 : * object be a member of the extension, to avoid security risks.
416 : */
417 80 : ObjectAddressSet(address, RelationRelationId, oldrelid);
418 80 : checkMembershipInCurrentExtension(&address);
419 :
420 : /* OK to skip */
421 76 : ereport(NOTICE,
422 : (errcode(ERRCODE_DUPLICATE_TABLE),
423 : errmsg("relation \"%s\" already exists, skipping",
424 : into->rel->relname)));
425 76 : return true;
426 : }
427 :
428 : /* Relation does not exist, it can be created */
429 1694 : return false;
430 : }
431 :
432 : /*
433 : * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
434 : *
435 : * intoClause will be NULL if called from CreateDestReceiver(), in which
436 : * case it has to be provided later. However, it is convenient to allow
437 : * self->into to be filled in immediately for other callers.
438 : */
439 : DestReceiver *
440 1694 : CreateIntoRelDestReceiver(IntoClause *intoClause)
441 : {
442 1694 : DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
443 :
444 1694 : self->pub.receiveSlot = intorel_receive;
445 1694 : self->pub.rStartup = intorel_startup;
446 1694 : self->pub.rShutdown = intorel_shutdown;
447 1694 : self->pub.rDestroy = intorel_destroy;
448 1694 : self->pub.mydest = DestIntoRel;
449 1694 : self->into = intoClause;
450 : /* other private fields will be set during intorel_startup */
451 :
452 1694 : return (DestReceiver *) self;
453 : }
454 :
455 : /*
456 : * intorel_startup --- executor startup
457 : */
458 : static void
459 1536 : intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
460 : {
461 1536 : DR_intorel *myState = (DR_intorel *) self;
462 1536 : IntoClause *into = myState->into;
463 : bool is_matview;
464 : List *attrList;
465 : ObjectAddress intoRelationAddr;
466 : Relation intoRelationDesc;
467 : ListCell *lc;
468 : int attnum;
469 :
470 : Assert(into != NULL); /* else somebody forgot to set it */
471 :
472 : /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
473 1536 : is_matview = (into->viewQuery != NULL);
474 :
475 : /*
476 : * Build column definitions using "pre-cooked" type and collation info. If
477 : * a column name list was specified in CREATE TABLE AS, override the
478 : * column names derived from the query. (Too few column names are OK, too
479 : * many are not.)
480 : */
481 1536 : attrList = NIL;
482 1536 : lc = list_head(into->colNames);
483 5950 : for (attnum = 0; attnum < typeinfo->natts; attnum++)
484 : {
485 4426 : Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum);
486 : ColumnDef *col;
487 : char *colname;
488 :
489 4426 : if (lc)
490 : {
491 252 : colname = strVal(lfirst(lc));
492 252 : lc = lnext(into->colNames, lc);
493 : }
494 : else
495 4174 : colname = NameStr(attribute->attname);
496 :
497 4426 : col = makeColumnDef(colname,
498 : attribute->atttypid,
499 : attribute->atttypmod,
500 : attribute->attcollation);
501 :
502 : /*
503 : * It's possible that the column is of a collatable type but the
504 : * collation could not be resolved, so double-check. (We must check
505 : * this here because DefineRelation would adopt the type's default
506 : * collation rather than complaining.)
507 : */
508 8186 : if (!OidIsValid(col->collOid) &&
509 3760 : type_is_collatable(col->typeName->typeOid))
510 12 : ereport(ERROR,
511 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
512 : errmsg("no collation was derived for column \"%s\" with collatable type %s",
513 : col->colname,
514 : format_type_be(col->typeName->typeOid)),
515 : errhint("Use the COLLATE clause to set the collation explicitly.")));
516 :
517 4414 : attrList = lappend(attrList, col);
518 : }
519 :
520 1524 : if (lc != NULL)
521 12 : ereport(ERROR,
522 : (errcode(ERRCODE_SYNTAX_ERROR),
523 : errmsg("too many column names were specified")));
524 :
525 : /*
526 : * Actually create the target table
527 : */
528 1512 : intoRelationAddr = create_ctas_internal(attrList, into);
529 :
530 : /*
531 : * Finally we can open the target table
532 : */
533 1494 : intoRelationDesc = table_open(intoRelationAddr.objectId, AccessExclusiveLock);
534 :
535 : /*
536 : * Make sure the constructed table does not have RLS enabled.
537 : *
538 : * check_enable_rls() will ereport(ERROR) itself if the user has requested
539 : * something invalid, and otherwise will return RLS_ENABLED if RLS should
540 : * be enabled here. We don't actually support that currently, so throw
541 : * our own ereport(ERROR) if that happens.
542 : */
543 1494 : if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED)
544 0 : ereport(ERROR,
545 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
546 : errmsg("policies not yet implemented for this command")));
547 :
548 : /*
549 : * Tentatively mark the target as populated, if it's a matview and we're
550 : * going to fill it; otherwise, no change needed.
551 : */
552 1494 : if (is_matview && !into->skipData)
553 342 : SetMatViewPopulatedState(intoRelationDesc, true);
554 :
555 : /*
556 : * Fill private fields of myState for use by later routines
557 : */
558 1494 : myState->rel = intoRelationDesc;
559 1494 : myState->reladdr = intoRelationAddr;
560 1494 : myState->output_cid = GetCurrentCommandId(true);
561 1494 : myState->ti_options = TABLE_INSERT_SKIP_FSM;
562 :
563 : /*
564 : * If WITH NO DATA is specified, there is no need to set up the state for
565 : * bulk inserts as there are no tuples to insert.
566 : */
567 1494 : if (!into->skipData)
568 1458 : myState->bistate = GetBulkInsertState();
569 : else
570 36 : myState->bistate = NULL;
571 :
572 : /*
573 : * Valid smgr_targblock implies something already wrote to the relation.
574 : * This may be harmless, but this function hasn't planned for it.
575 : */
576 : Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
577 1494 : }
578 :
579 : /*
580 : * intorel_receive --- receive one tuple
581 : */
582 : static bool
583 2108352 : intorel_receive(TupleTableSlot *slot, DestReceiver *self)
584 : {
585 2108352 : DR_intorel *myState = (DR_intorel *) self;
586 :
587 : /* Nothing to insert if WITH NO DATA is specified. */
588 2108352 : if (!myState->into->skipData)
589 : {
590 : /*
591 : * Note that the input slot might not be of the type of the target
592 : * relation. That's supported by table_tuple_insert(), but slightly
593 : * less efficient than inserting with the right slot - but the
594 : * alternative would be to copy into a slot of the right type, which
595 : * would not be cheap either. This also doesn't allow accessing per-AM
596 : * data (say a tuple's xmin), but since we don't do that here...
597 : */
598 2108352 : table_tuple_insert(myState->rel,
599 : slot,
600 : myState->output_cid,
601 : myState->ti_options,
602 : myState->bistate);
603 : }
604 :
605 : /* We know this is a newly created relation, so there are no indexes */
606 :
607 2108352 : return true;
608 : }
609 :
610 : /*
611 : * intorel_shutdown --- executor end
612 : */
613 : static void
614 1494 : intorel_shutdown(DestReceiver *self)
615 : {
616 1494 : DR_intorel *myState = (DR_intorel *) self;
617 1494 : IntoClause *into = myState->into;
618 :
619 1494 : if (!into->skipData)
620 : {
621 1458 : FreeBulkInsertState(myState->bistate);
622 1458 : table_finish_bulk_insert(myState->rel, myState->ti_options);
623 : }
624 :
625 : /* close rel, but keep lock until commit */
626 1494 : table_close(myState->rel, NoLock);
627 1494 : myState->rel = NULL;
628 1494 : }
629 :
630 : /*
631 : * intorel_destroy --- release DestReceiver object
632 : */
633 : static void
634 0 : intorel_destroy(DestReceiver *self)
635 : {
636 0 : pfree(self);
637 0 : }
|