Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * cluster.c
4 : * REPACK a table; formerly known as CLUSTER. VACUUM FULL also uses
5 : * parts of this code.
6 : *
7 : *
8 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
9 : * Portions Copyright (c) 1994-5, Regents of the University of California
10 : *
11 : *
12 : * IDENTIFICATION
13 : * src/backend/commands/cluster.c
14 : *
15 : *-------------------------------------------------------------------------
16 : */
17 : #include "postgres.h"
18 :
19 : #include "access/amapi.h"
20 : #include "access/heapam.h"
21 : #include "access/multixact.h"
22 : #include "access/relscan.h"
23 : #include "access/tableam.h"
24 : #include "access/toast_internals.h"
25 : #include "access/transam.h"
26 : #include "access/xact.h"
27 : #include "catalog/catalog.h"
28 : #include "catalog/dependency.h"
29 : #include "catalog/heap.h"
30 : #include "catalog/index.h"
31 : #include "catalog/namespace.h"
32 : #include "catalog/objectaccess.h"
33 : #include "catalog/pg_am.h"
34 : #include "catalog/pg_inherits.h"
35 : #include "catalog/toasting.h"
36 : #include "commands/cluster.h"
37 : #include "commands/defrem.h"
38 : #include "commands/progress.h"
39 : #include "commands/tablecmds.h"
40 : #include "commands/vacuum.h"
41 : #include "miscadmin.h"
42 : #include "optimizer/optimizer.h"
43 : #include "pgstat.h"
44 : #include "storage/bufmgr.h"
45 : #include "storage/lmgr.h"
46 : #include "storage/predicate.h"
47 : #include "utils/acl.h"
48 : #include "utils/fmgroids.h"
49 : #include "utils/guc.h"
50 : #include "utils/inval.h"
51 : #include "utils/lsyscache.h"
52 : #include "utils/memutils.h"
53 : #include "utils/pg_rusage.h"
54 : #include "utils/relmapper.h"
55 : #include "utils/snapmgr.h"
56 : #include "utils/syscache.h"
57 :
58 : /*
59 : * This struct is used to pass around the information on tables to be
60 : * clustered. We need this so we can make a list of them when invoked without
61 : * a specific table/index pair.
62 : */
63 : typedef struct
64 : {
65 : Oid tableOid;
66 : Oid indexOid;
67 : } RelToCluster;
68 :
69 : static bool cluster_rel_recheck(RepackCommand cmd, Relation OldHeap,
70 : Oid indexOid, Oid userid, int options);
71 : static void rebuild_relation(Relation OldHeap, Relation index, bool verbose);
72 : static void copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex,
73 : bool verbose, bool *pSwapToastByContent,
74 : TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
75 : static List *get_tables_to_repack(RepackCommand cmd, bool usingindex,
76 : MemoryContext permcxt);
77 : static List *get_tables_to_repack_partitioned(RepackCommand cmd,
78 : Oid relid, bool rel_is_index,
79 : MemoryContext permcxt);
80 : static bool repack_is_permitted_for_relation(RepackCommand cmd,
81 : Oid relid, Oid userid);
82 : static Relation process_single_relation(RepackStmt *stmt,
83 : ClusterParams *params);
84 : static Oid determine_clustered_index(Relation rel, bool usingindex,
85 : const char *indexname);
86 : static const char *RepackCommandAsString(RepackCommand cmd);
87 :
88 :
89 : /*
90 : * The repack code allows for processing multiple tables at once. Because
91 : * of this, we cannot just run everything on a single transaction, or we
92 : * would be forced to acquire exclusive locks on all the tables being
93 : * clustered, simultaneously --- very likely leading to deadlock.
94 : *
95 : * To solve this we follow a similar strategy to VACUUM code, processing each
96 : * relation in a separate transaction. For this to work, we need to:
97 : *
98 : * - provide a separate memory context so that we can pass information in
99 : * a way that survives across transactions
100 : * - start a new transaction every time a new relation is clustered
101 : * - check for validity of the information on to-be-clustered relations,
102 : * as someone might have deleted a relation behind our back, or
103 : * clustered one on a different index
104 : * - end the transaction
105 : *
106 : * The single-relation case does not have any such overhead.
107 : *
108 : * We also allow a relation to be repacked following an index, but without
109 : * naming a specific one. In that case, the indisclustered bit will be
110 : * looked up, and an ERROR will be thrown if no so-marked index is found.
111 : */
112 : void
113 180 : ExecRepack(ParseState *pstate, RepackStmt *stmt, bool isTopLevel)
114 : {
115 180 : ClusterParams params = {0};
116 180 : Relation rel = NULL;
117 : MemoryContext repack_context;
118 : List *rtcs;
119 :
120 : /* Parse option list */
121 376 : foreach_node(DefElem, opt, stmt->params)
122 : {
123 16 : if (strcmp(opt->defname, "verbose") == 0)
124 8 : params.options |= defGetBoolean(opt) ? CLUOPT_VERBOSE : 0;
125 8 : else if (strcmp(opt->defname, "analyze") == 0 ||
126 0 : strcmp(opt->defname, "analyse") == 0)
127 8 : params.options |= defGetBoolean(opt) ? CLUOPT_ANALYZE : 0;
128 : else
129 0 : ereport(ERROR,
130 : errcode(ERRCODE_SYNTAX_ERROR),
131 : errmsg("unrecognized %s option \"%s\"",
132 : RepackCommandAsString(stmt->command),
133 : opt->defname),
134 : parser_errposition(pstate, opt->location));
135 : }
136 :
137 : /*
138 : * If a single relation is specified, process it and we're done ... unless
139 : * the relation is a partitioned table, in which case we fall through.
140 : */
141 180 : if (stmt->relation != NULL)
142 : {
143 163 : rel = process_single_relation(stmt, ¶ms);
144 147 : if (rel == NULL)
145 115 : return; /* all done */
146 : }
147 :
148 : /*
149 : * Don't allow ANALYZE in the multiple-relation case for now. Maybe we
150 : * can add support for this later.
151 : */
152 49 : if (params.options & CLUOPT_ANALYZE)
153 0 : ereport(ERROR,
154 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
155 : errmsg("cannot execute %s on multiple tables",
156 : "REPACK (ANALYZE)"));
157 :
158 : /*
159 : * By here, we know we are in a multi-table situation. In order to avoid
160 : * holding locks for too long, we want to process each table in its own
161 : * transaction. This forces us to disallow running inside a user
162 : * transaction block.
163 : */
164 49 : PreventInTransactionBlock(isTopLevel, RepackCommandAsString(stmt->command));
165 :
166 : /* Also, we need a memory context to hold our list of relations */
167 49 : repack_context = AllocSetContextCreate(PortalContext,
168 : "Repack",
169 : ALLOCSET_DEFAULT_SIZES);
170 :
171 49 : params.options |= CLUOPT_RECHECK;
172 :
173 : /*
174 : * If we don't have a relation yet, determine a relation list. If we do,
175 : * then it must be a partitioned table, and we want to process its
176 : * partitions.
177 : */
178 49 : if (rel == NULL)
179 : {
180 : Assert(stmt->indexname == NULL);
181 17 : rtcs = get_tables_to_repack(stmt->command, stmt->usingindex,
182 : repack_context);
183 17 : params.options |= CLUOPT_RECHECK_ISCLUSTERED;
184 : }
185 : else
186 : {
187 : Oid relid;
188 : bool rel_is_index;
189 :
190 : Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
191 :
192 : /*
193 : * If USING INDEX was specified, resolve the index name now and pass
194 : * it down.
195 : */
196 32 : if (stmt->usingindex)
197 : {
198 : /*
199 : * If no index name was specified when repacking a partitioned
200 : * table, punt for now. Maybe we can improve this later.
201 : */
202 28 : if (!stmt->indexname)
203 : {
204 8 : if (stmt->command == REPACK_COMMAND_CLUSTER)
205 4 : ereport(ERROR,
206 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
207 : errmsg("there is no previously clustered index for table \"%s\"",
208 : RelationGetRelationName(rel)));
209 : else
210 4 : ereport(ERROR,
211 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
212 : /*- translator: first %s is name of a SQL command, eg. REPACK */
213 : errmsg("cannot execute %s on partitioned table \"%s\" USING INDEX with no index name",
214 : RepackCommandAsString(stmt->command),
215 : RelationGetRelationName(rel)));
216 : }
217 :
218 20 : relid = determine_clustered_index(rel, stmt->usingindex,
219 20 : stmt->indexname);
220 20 : if (!OidIsValid(relid))
221 0 : elog(ERROR, "unable to determine index to cluster on");
222 20 : check_index_is_clusterable(rel, relid, AccessExclusiveLock);
223 :
224 16 : rel_is_index = true;
225 : }
226 : else
227 : {
228 4 : relid = RelationGetRelid(rel);
229 4 : rel_is_index = false;
230 : }
231 :
232 20 : rtcs = get_tables_to_repack_partitioned(stmt->command,
233 : relid, rel_is_index,
234 : repack_context);
235 :
236 : /* close parent relation, releasing lock on it */
237 20 : table_close(rel, AccessExclusiveLock);
238 20 : rel = NULL;
239 : }
240 :
241 : /* Commit to get out of starting transaction */
242 37 : PopActiveSnapshot();
243 37 : CommitTransactionCommand();
244 :
245 : /* Cluster the tables, each in a separate transaction */
246 : Assert(rel == NULL);
247 126 : foreach_ptr(RelToCluster, rtc, rtcs)
248 : {
249 : /* Start a new transaction for each relation. */
250 52 : StartTransactionCommand();
251 :
252 : /*
253 : * Open the target table, coping with the case where it has been
254 : * dropped.
255 : */
256 52 : rel = try_table_open(rtc->tableOid, AccessExclusiveLock);
257 52 : if (rel == NULL)
258 : {
259 0 : CommitTransactionCommand();
260 0 : continue;
261 : }
262 :
263 : /* functions in indexes may want a snapshot set */
264 52 : PushActiveSnapshot(GetTransactionSnapshot());
265 :
266 : /* Process this table */
267 52 : cluster_rel(stmt->command, rel, rtc->indexOid, ¶ms);
268 : /* cluster_rel closes the relation, but keeps lock */
269 :
270 52 : PopActiveSnapshot();
271 52 : CommitTransactionCommand();
272 : }
273 :
274 : /* Start a new transaction for the cleanup work. */
275 37 : StartTransactionCommand();
276 :
277 : /* Clean up working storage */
278 37 : MemoryContextDelete(repack_context);
279 : }
280 :
281 : /*
282 : * cluster_rel
283 : *
284 : * This clusters the table by creating a new, clustered table and
285 : * swapping the relfilenumbers of the new table and the old table, so
286 : * the OID of the original table is preserved. Thus we do not lose
287 : * GRANT, inheritance nor references to this table.
288 : *
289 : * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
290 : * the new table, it's better to create the indexes afterwards than to fill
291 : * them incrementally while we load the table.
292 : *
293 : * If indexOid is InvalidOid, the table will be rewritten in physical order
294 : * instead of index order.
295 : *
296 : * 'cmd' indicates which command is being executed, to be used for error
297 : * messages.
298 : */
299 : void
300 389 : cluster_rel(RepackCommand cmd, Relation OldHeap, Oid indexOid,
301 : ClusterParams *params)
302 : {
303 389 : Oid tableOid = RelationGetRelid(OldHeap);
304 : Oid save_userid;
305 : int save_sec_context;
306 : int save_nestlevel;
307 389 : bool verbose = ((params->options & CLUOPT_VERBOSE) != 0);
308 389 : bool recheck = ((params->options & CLUOPT_RECHECK) != 0);
309 : Relation index;
310 :
311 : Assert(CheckRelationLockedByMe(OldHeap, AccessExclusiveLock, false));
312 :
313 : /* Check for user-requested abort. */
314 389 : CHECK_FOR_INTERRUPTS();
315 :
316 389 : pgstat_progress_start_command(PROGRESS_COMMAND_REPACK, tableOid);
317 389 : pgstat_progress_update_param(PROGRESS_REPACK_COMMAND, cmd);
318 :
319 : /*
320 : * Switch to the table owner's userid, so that any index functions are run
321 : * as that user. Also lock down security-restricted operations and
322 : * arrange to make GUC variable changes local to this command.
323 : */
324 389 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
325 389 : SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
326 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
327 389 : save_nestlevel = NewGUCNestLevel();
328 389 : RestrictSearchPath();
329 :
330 : /*
331 : * Since we may open a new transaction for each relation, we have to check
332 : * that the relation still is what we think it is.
333 : *
334 : * If this is a single-transaction CLUSTER, we can skip these tests. We
335 : * *must* skip the one on indisclustered since it would reject an attempt
336 : * to cluster a not-previously-clustered index.
337 : */
338 389 : if (recheck &&
339 52 : !cluster_rel_recheck(cmd, OldHeap, indexOid, save_userid,
340 52 : params->options))
341 0 : goto out;
342 :
343 : /*
344 : * We allow repacking shared catalogs only when not using an index. It
345 : * would work to use an index in most respects, but the index would only
346 : * get marked as indisclustered in the current database, leading to
347 : * unexpected behavior if CLUSTER were later invoked in another database.
348 : */
349 389 : if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
350 0 : ereport(ERROR,
351 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
352 : /*- translator: first %s is name of a SQL command, eg. REPACK */
353 : errmsg("cannot execute %s on a shared catalog",
354 : RepackCommandAsString(cmd)));
355 :
356 : /*
357 : * Don't process temp tables of other backends ... their local buffer
358 : * manager is not going to cope.
359 : */
360 389 : if (RELATION_IS_OTHER_TEMP(OldHeap))
361 0 : ereport(ERROR,
362 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
363 : /*- translator: first %s is name of a SQL command, eg. REPACK */
364 : errmsg("cannot execute %s on temporary tables of other sessions",
365 : RepackCommandAsString(cmd)));
366 :
367 : /*
368 : * Also check for active uses of the relation in the current transaction,
369 : * including open scans and pending AFTER trigger events.
370 : */
371 389 : CheckTableNotInUse(OldHeap, RepackCommandAsString(cmd));
372 :
373 : /* Check heap and index are valid to cluster on */
374 389 : if (OidIsValid(indexOid))
375 : {
376 : /* verify the index is good and lock it */
377 139 : check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
378 : /* also open it */
379 139 : index = index_open(indexOid, NoLock);
380 : }
381 : else
382 250 : index = NULL;
383 :
384 : /*
385 : * When allow_system_table_mods is turned off, we disallow repacking a
386 : * catalog on a particular index unless that's already the clustered index
387 : * for that catalog.
388 : *
389 : * XXX We don't check for this in CLUSTER, because it's historically been
390 : * allowed.
391 : */
392 389 : if (cmd != REPACK_COMMAND_CLUSTER &&
393 282 : !allowSystemTableMods && OidIsValid(indexOid) &&
394 16 : IsCatalogRelation(OldHeap) && !index->rd_index->indisclustered)
395 0 : ereport(ERROR,
396 : errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
397 : errmsg("permission denied: \"%s\" is a system catalog",
398 : RelationGetRelationName(OldHeap)),
399 : errdetail("System catalogs can only be clustered by the index they're already clustered on, if any, unless \"%s\" is enabled.",
400 : "allow_system_table_mods"));
401 :
402 : /*
403 : * Quietly ignore the request if this is a materialized view which has not
404 : * been populated from its query. No harm is done because there is no data
405 : * to deal with, and we don't want to throw an error if this is part of a
406 : * multi-relation request -- for example, CLUSTER was run on the entire
407 : * database.
408 : */
409 389 : if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
410 0 : !RelationIsPopulated(OldHeap))
411 : {
412 0 : relation_close(OldHeap, AccessExclusiveLock);
413 0 : goto out;
414 : }
415 :
416 : Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
417 : OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
418 : OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
419 :
420 : /*
421 : * All predicate locks on the tuples or pages are about to be made
422 : * invalid, because we move tuples around. Promote them to relation
423 : * locks. Predicate locks on indexes will be promoted when they are
424 : * reindexed.
425 : */
426 389 : TransferPredicateLocksToHeapRelation(OldHeap);
427 :
428 : /* rebuild_relation does all the dirty work */
429 389 : rebuild_relation(OldHeap, index, verbose);
430 : /* rebuild_relation closes OldHeap, and index if valid */
431 :
432 385 : out:
433 : /* Roll back any GUC changes executed by index functions */
434 385 : AtEOXact_GUC(false, save_nestlevel);
435 :
436 : /* Restore userid and security context */
437 385 : SetUserIdAndSecContext(save_userid, save_sec_context);
438 :
439 385 : pgstat_progress_end_command();
440 385 : }
441 :
442 : /*
443 : * Check if the table (and its index) still meets the requirements of
444 : * cluster_rel().
445 : */
446 : static bool
447 52 : cluster_rel_recheck(RepackCommand cmd, Relation OldHeap, Oid indexOid,
448 : Oid userid, int options)
449 : {
450 52 : Oid tableOid = RelationGetRelid(OldHeap);
451 :
452 : /* Check that the user still has privileges for the relation */
453 52 : if (!repack_is_permitted_for_relation(cmd, tableOid, userid))
454 : {
455 0 : relation_close(OldHeap, AccessExclusiveLock);
456 0 : return false;
457 : }
458 :
459 : /*
460 : * Silently skip a temp table for a remote session. Only doing this check
461 : * in the "recheck" case is appropriate (which currently means somebody is
462 : * executing a database-wide CLUSTER or on a partitioned table), because
463 : * there is another check in cluster() which will stop any attempt to
464 : * cluster remote temp tables by name. There is another check in
465 : * cluster_rel which is redundant, but we leave it for extra safety.
466 : */
467 52 : if (RELATION_IS_OTHER_TEMP(OldHeap))
468 : {
469 0 : relation_close(OldHeap, AccessExclusiveLock);
470 0 : return false;
471 : }
472 :
473 52 : if (OidIsValid(indexOid))
474 : {
475 : /*
476 : * Check that the index still exists
477 : */
478 32 : if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
479 : {
480 0 : relation_close(OldHeap, AccessExclusiveLock);
481 0 : return false;
482 : }
483 :
484 : /*
485 : * Check that the index is still the one with indisclustered set, if
486 : * needed.
487 : */
488 32 : if ((options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
489 4 : !get_index_isclustered(indexOid))
490 : {
491 0 : relation_close(OldHeap, AccessExclusiveLock);
492 0 : return false;
493 : }
494 : }
495 :
496 52 : return true;
497 : }
498 :
499 : /*
500 : * Verify that the specified heap and index are valid to cluster on
501 : *
502 : * Side effect: obtains lock on the index. The caller may
503 : * in some cases already have AccessExclusiveLock on the table, but
504 : * not in all cases so we can't rely on the table-level lock for
505 : * protection here.
506 : */
507 : void
508 305 : check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
509 : {
510 : Relation OldIndex;
511 :
512 305 : OldIndex = index_open(indexOid, lockmode);
513 :
514 : /*
515 : * Check that index is in fact an index on the given relation
516 : */
517 305 : if (OldIndex->rd_index == NULL ||
518 305 : OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
519 0 : ereport(ERROR,
520 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
521 : errmsg("\"%s\" is not an index for table \"%s\"",
522 : RelationGetRelationName(OldIndex),
523 : RelationGetRelationName(OldHeap))));
524 :
525 : /* Index AM must allow clustering */
526 305 : if (!OldIndex->rd_indam->amclusterable)
527 0 : ereport(ERROR,
528 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
529 : errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
530 : RelationGetRelationName(OldIndex))));
531 :
532 : /*
533 : * Disallow clustering on incomplete indexes (those that might not index
534 : * every row of the relation). We could relax this by making a separate
535 : * seqscan pass over the table to copy the missing rows, but that seems
536 : * expensive and tedious.
537 : */
538 305 : if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
539 0 : ereport(ERROR,
540 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
541 : errmsg("cannot cluster on partial index \"%s\"",
542 : RelationGetRelationName(OldIndex))));
543 :
544 : /*
545 : * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
546 : * it might well not contain entries for every heap row, or might not even
547 : * be internally consistent. (But note that we don't check indcheckxmin;
548 : * the worst consequence of following broken HOT chains would be that we
549 : * might put recently-dead tuples out-of-order in the new table, and there
550 : * is little harm in that.)
551 : */
552 305 : if (!OldIndex->rd_index->indisvalid)
553 4 : ereport(ERROR,
554 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
555 : errmsg("cannot cluster on invalid index \"%s\"",
556 : RelationGetRelationName(OldIndex))));
557 :
558 : /* Drop relcache refcnt on OldIndex, but keep lock */
559 301 : index_close(OldIndex, NoLock);
560 301 : }
561 :
562 : /*
563 : * mark_index_clustered: mark the specified index as the one clustered on
564 : *
565 : * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
566 : */
567 : void
568 186 : mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
569 : {
570 : HeapTuple indexTuple;
571 : Form_pg_index indexForm;
572 : Relation pg_index;
573 : ListCell *index;
574 :
575 : Assert(rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE);
576 :
577 : /*
578 : * If the index is already marked clustered, no need to do anything.
579 : */
580 186 : if (OidIsValid(indexOid))
581 : {
582 178 : if (get_index_isclustered(indexOid))
583 38 : return;
584 : }
585 :
586 : /*
587 : * Check each index of the relation and set/clear the bit as needed.
588 : */
589 148 : pg_index = table_open(IndexRelationId, RowExclusiveLock);
590 :
591 450 : foreach(index, RelationGetIndexList(rel))
592 : {
593 302 : Oid thisIndexOid = lfirst_oid(index);
594 :
595 302 : indexTuple = SearchSysCacheCopy1(INDEXRELID,
596 : ObjectIdGetDatum(thisIndexOid));
597 302 : if (!HeapTupleIsValid(indexTuple))
598 0 : elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
599 302 : indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
600 :
601 : /*
602 : * Unset the bit if set. We know it's wrong because we checked this
603 : * earlier.
604 : */
605 302 : if (indexForm->indisclustered)
606 : {
607 20 : indexForm->indisclustered = false;
608 20 : CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
609 : }
610 282 : else if (thisIndexOid == indexOid)
611 : {
612 : /* this was checked earlier, but let's be real sure */
613 140 : if (!indexForm->indisvalid)
614 0 : elog(ERROR, "cannot cluster on invalid index %u", indexOid);
615 140 : indexForm->indisclustered = true;
616 140 : CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
617 : }
618 :
619 302 : InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
620 : InvalidOid, is_internal);
621 :
622 302 : heap_freetuple(indexTuple);
623 : }
624 :
625 148 : table_close(pg_index, RowExclusiveLock);
626 : }
627 :
628 : /*
629 : * rebuild_relation: rebuild an existing relation in index or physical order
630 : *
631 : * OldHeap: table to rebuild.
632 : * index: index to cluster by, or NULL to rewrite in physical order.
633 : *
634 : * On entry, heap and index (if one is given) must be open, and
635 : * AccessExclusiveLock held on them.
636 : * On exit, they are closed, but locks on them are not released.
637 : */
638 : static void
639 389 : rebuild_relation(Relation OldHeap, Relation index, bool verbose)
640 : {
641 389 : Oid tableOid = RelationGetRelid(OldHeap);
642 389 : Oid accessMethod = OldHeap->rd_rel->relam;
643 389 : Oid tableSpace = OldHeap->rd_rel->reltablespace;
644 : Oid OIDNewHeap;
645 : Relation NewHeap;
646 : char relpersistence;
647 : bool is_system_catalog;
648 : bool swap_toast_by_content;
649 : TransactionId frozenXid;
650 : MultiXactId cutoffMulti;
651 :
652 : Assert(CheckRelationLockedByMe(OldHeap, AccessExclusiveLock, false) &&
653 : (index == NULL || CheckRelationLockedByMe(index, AccessExclusiveLock, false)));
654 :
655 : /* for CLUSTER or REPACK USING INDEX, mark the index as the one to use */
656 389 : if (index != NULL)
657 139 : mark_index_clustered(OldHeap, RelationGetRelid(index), true);
658 :
659 : /* Remember info about rel before closing OldHeap */
660 389 : relpersistence = OldHeap->rd_rel->relpersistence;
661 389 : is_system_catalog = IsSystemRelation(OldHeap);
662 :
663 : /*
664 : * Create the transient table that will receive the re-ordered data.
665 : *
666 : * OldHeap is already locked, so no need to lock it again. make_new_heap
667 : * obtains AccessExclusiveLock on the new heap and its toast table.
668 : */
669 389 : OIDNewHeap = make_new_heap(tableOid, tableSpace,
670 : accessMethod,
671 : relpersistence,
672 : NoLock);
673 : Assert(CheckRelationOidLockedByMe(OIDNewHeap, AccessExclusiveLock, false));
674 389 : NewHeap = table_open(OIDNewHeap, NoLock);
675 :
676 : /* Copy the heap data into the new table in the desired order */
677 389 : copy_table_data(NewHeap, OldHeap, index, verbose,
678 : &swap_toast_by_content, &frozenXid, &cutoffMulti);
679 :
680 :
681 : /* Close relcache entries, but keep lock until transaction commit */
682 389 : table_close(OldHeap, NoLock);
683 389 : if (index)
684 139 : index_close(index, NoLock);
685 :
686 : /*
687 : * Close the new relation so it can be dropped as soon as the storage is
688 : * swapped. The relation is not visible to others, so no need to unlock it
689 : * explicitly.
690 : */
691 389 : table_close(NewHeap, NoLock);
692 :
693 : /*
694 : * Swap the physical files of the target and transient tables, then
695 : * rebuild the target's indexes and throw away the transient table.
696 : */
697 389 : finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
698 : swap_toast_by_content, false, true,
699 : frozenXid, cutoffMulti,
700 : relpersistence);
701 385 : }
702 :
703 :
704 : /*
705 : * Create the transient table that will be filled with new data during
706 : * CLUSTER, ALTER TABLE, and similar operations. The transient table
707 : * duplicates the logical structure of the OldHeap; but will have the
708 : * specified physical storage properties NewTableSpace, NewAccessMethod, and
709 : * relpersistence.
710 : *
711 : * After this, the caller should load the new heap with transferred/modified
712 : * data, then call finish_heap_swap to complete the operation.
713 : */
714 : Oid
715 1561 : make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
716 : char relpersistence, LOCKMODE lockmode)
717 : {
718 : TupleDesc OldHeapDesc;
719 : char NewHeapName[NAMEDATALEN];
720 : Oid OIDNewHeap;
721 : Oid toastid;
722 : Relation OldHeap;
723 : HeapTuple tuple;
724 : Datum reloptions;
725 : bool isNull;
726 : Oid namespaceid;
727 :
728 1561 : OldHeap = table_open(OIDOldHeap, lockmode);
729 1561 : OldHeapDesc = RelationGetDescr(OldHeap);
730 :
731 : /*
732 : * Note that the NewHeap will not receive any of the defaults or
733 : * constraints associated with the OldHeap; we don't need 'em, and there's
734 : * no reason to spend cycles inserting them into the catalogs only to
735 : * delete them.
736 : */
737 :
738 : /*
739 : * But we do want to use reloptions of the old heap for new heap.
740 : */
741 1561 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
742 1561 : if (!HeapTupleIsValid(tuple))
743 0 : elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
744 1561 : reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
745 : &isNull);
746 1561 : if (isNull)
747 1469 : reloptions = (Datum) 0;
748 :
749 1561 : if (relpersistence == RELPERSISTENCE_TEMP)
750 98 : namespaceid = LookupCreationNamespace("pg_temp");
751 : else
752 1463 : namespaceid = RelationGetNamespace(OldHeap);
753 :
754 : /*
755 : * Create the new heap, using a temporary name in the same namespace as
756 : * the existing table. NOTE: there is some risk of collision with user
757 : * relnames. Working around this seems more trouble than it's worth; in
758 : * particular, we can't create the new heap in a different namespace from
759 : * the old, or we will have problems with the TEMP status of temp tables.
760 : *
761 : * Note: the new heap is not a shared relation, even if we are rebuilding
762 : * a shared rel. However, we do make the new heap mapped if the source is
763 : * mapped. This simplifies swap_relation_files, and is absolutely
764 : * necessary for rebuilding pg_class, for reasons explained there.
765 : */
766 1561 : snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
767 :
768 1561 : OIDNewHeap = heap_create_with_catalog(NewHeapName,
769 : namespaceid,
770 : NewTableSpace,
771 : InvalidOid,
772 : InvalidOid,
773 : InvalidOid,
774 1561 : OldHeap->rd_rel->relowner,
775 : NewAccessMethod,
776 : OldHeapDesc,
777 : NIL,
778 : RELKIND_RELATION,
779 : relpersistence,
780 : false,
781 1561 : RelationIsMapped(OldHeap),
782 : ONCOMMIT_NOOP,
783 : reloptions,
784 : false,
785 : true,
786 : true,
787 : OIDOldHeap,
788 1561 : NULL);
789 : Assert(OIDNewHeap != InvalidOid);
790 :
791 1561 : ReleaseSysCache(tuple);
792 :
793 : /*
794 : * Advance command counter so that the newly-created relation's catalog
795 : * tuples will be visible to table_open.
796 : */
797 1561 : CommandCounterIncrement();
798 :
799 : /*
800 : * If necessary, create a TOAST table for the new relation.
801 : *
802 : * If the relation doesn't have a TOAST table already, we can't need one
803 : * for the new relation. The other way around is possible though: if some
804 : * wide columns have been dropped, NewHeapCreateToastTable can decide that
805 : * no TOAST table is needed for the new table.
806 : *
807 : * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
808 : * that the TOAST table will be visible for insertion.
809 : */
810 1561 : toastid = OldHeap->rd_rel->reltoastrelid;
811 1561 : if (OidIsValid(toastid))
812 : {
813 : /* keep the existing toast table's reloptions, if any */
814 552 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
815 552 : if (!HeapTupleIsValid(tuple))
816 0 : elog(ERROR, "cache lookup failed for relation %u", toastid);
817 552 : reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
818 : &isNull);
819 552 : if (isNull)
820 552 : reloptions = (Datum) 0;
821 :
822 552 : NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
823 :
824 552 : ReleaseSysCache(tuple);
825 : }
826 :
827 1561 : table_close(OldHeap, NoLock);
828 :
829 1561 : return OIDNewHeap;
830 : }
831 :
832 : /*
833 : * Do the physical copying of table data.
834 : *
835 : * There are three output parameters:
836 : * *pSwapToastByContent is set true if toast tables must be swapped by content.
837 : * *pFreezeXid receives the TransactionId used as freeze cutoff point.
838 : * *pCutoffMulti receives the MultiXactId used as a cutoff point.
839 : */
840 : static void
841 389 : copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verbose,
842 : bool *pSwapToastByContent, TransactionId *pFreezeXid,
843 : MultiXactId *pCutoffMulti)
844 : {
845 : Relation relRelation;
846 : HeapTuple reltup;
847 : Form_pg_class relform;
848 : TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
849 : TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY;
850 : VacuumParams params;
851 : struct VacuumCutoffs cutoffs;
852 : bool use_sort;
853 389 : double num_tuples = 0,
854 389 : tups_vacuumed = 0,
855 389 : tups_recently_dead = 0;
856 : BlockNumber num_pages;
857 389 : int elevel = verbose ? INFO : DEBUG2;
858 : PGRUsage ru0;
859 : char *nspname;
860 :
861 389 : pg_rusage_init(&ru0);
862 :
863 : /* Store a copy of the namespace name for logging purposes */
864 389 : nspname = get_namespace_name(RelationGetNamespace(OldHeap));
865 :
866 : /*
867 : * Their tuple descriptors should be exactly alike, but here we only need
868 : * assume that they have the same number of columns.
869 : */
870 389 : oldTupDesc = RelationGetDescr(OldHeap);
871 389 : newTupDesc = RelationGetDescr(NewHeap);
872 : Assert(newTupDesc->natts == oldTupDesc->natts);
873 :
874 : /*
875 : * If the OldHeap has a toast table, get lock on the toast table to keep
876 : * it from being vacuumed. This is needed because autovacuum processes
877 : * toast tables independently of their main tables, with no lock on the
878 : * latter. If an autovacuum were to start on the toast table after we
879 : * compute our OldestXmin below, it would use a later OldestXmin, and then
880 : * possibly remove as DEAD toast tuples belonging to main tuples we think
881 : * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
882 : * tuples.
883 : *
884 : * We don't need to open the toast relation here, just lock it. The lock
885 : * will be held till end of transaction.
886 : */
887 389 : if (OldHeap->rd_rel->reltoastrelid)
888 124 : LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
889 :
890 : /*
891 : * If both tables have TOAST tables, perform toast swap by content. It is
892 : * possible that the old table has a toast table but the new one doesn't,
893 : * if toastable columns have been dropped. In that case we have to do
894 : * swap by links. This is okay because swap by content is only essential
895 : * for system catalogs, and we don't support schema changes for them.
896 : */
897 389 : if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
898 : {
899 124 : *pSwapToastByContent = true;
900 :
901 : /*
902 : * When doing swap by content, any toast pointers written into NewHeap
903 : * must use the old toast table's OID, because that's where the toast
904 : * data will eventually be found. Set this up by setting rd_toastoid.
905 : * This also tells toast_save_datum() to preserve the toast value
906 : * OIDs, which we want so as not to invalidate toast pointers in
907 : * system catalog caches, and to avoid making multiple copies of a
908 : * single toast value.
909 : *
910 : * Note that we must hold NewHeap open until we are done writing data,
911 : * since the relcache will not guarantee to remember this setting once
912 : * the relation is closed. Also, this technique depends on the fact
913 : * that no one will try to read from the NewHeap until after we've
914 : * finished writing it and swapping the rels --- otherwise they could
915 : * follow the toast pointers to the wrong place. (It would actually
916 : * work for values copied over from the old toast table, but not for
917 : * any values that we toast which were previously not toasted.)
918 : */
919 124 : NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
920 : }
921 : else
922 265 : *pSwapToastByContent = false;
923 :
924 : /*
925 : * Compute xids used to freeze and weed out dead tuples and multixacts.
926 : * Since we're going to rewrite the whole table anyway, there's no reason
927 : * not to be aggressive about this.
928 : */
929 389 : memset(¶ms, 0, sizeof(VacuumParams));
930 389 : vacuum_get_cutoffs(OldHeap, ¶ms, &cutoffs);
931 :
932 : /*
933 : * FreezeXid will become the table's new relfrozenxid, and that mustn't go
934 : * backwards, so take the max.
935 : */
936 : {
937 389 : TransactionId relfrozenxid = OldHeap->rd_rel->relfrozenxid;
938 :
939 778 : if (TransactionIdIsValid(relfrozenxid) &&
940 389 : TransactionIdPrecedes(cutoffs.FreezeLimit, relfrozenxid))
941 59 : cutoffs.FreezeLimit = relfrozenxid;
942 : }
943 :
944 : /*
945 : * MultiXactCutoff, similarly, shouldn't go backwards either.
946 : */
947 : {
948 389 : MultiXactId relminmxid = OldHeap->rd_rel->relminmxid;
949 :
950 778 : if (MultiXactIdIsValid(relminmxid) &&
951 389 : MultiXactIdPrecedes(cutoffs.MultiXactCutoff, relminmxid))
952 0 : cutoffs.MultiXactCutoff = relminmxid;
953 : }
954 :
955 : /*
956 : * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
957 : * the OldHeap. We know how to use a sort to duplicate the ordering of a
958 : * btree index, and will use seqscan-and-sort for that case if the planner
959 : * tells us it's cheaper. Otherwise, always indexscan if an index is
960 : * provided, else plain seqscan.
961 : */
962 389 : if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
963 139 : use_sort = plan_cluster_use_sort(RelationGetRelid(OldHeap),
964 : RelationGetRelid(OldIndex));
965 : else
966 250 : use_sort = false;
967 :
968 : /* Log what we're doing */
969 389 : if (OldIndex != NULL && !use_sort)
970 59 : ereport(elevel,
971 : errmsg("repacking \"%s.%s\" using index scan on \"%s\"",
972 : nspname,
973 : RelationGetRelationName(OldHeap),
974 : RelationGetRelationName(OldIndex)));
975 330 : else if (use_sort)
976 80 : ereport(elevel,
977 : errmsg("repacking \"%s.%s\" using sequential scan and sort",
978 : nspname,
979 : RelationGetRelationName(OldHeap)));
980 : else
981 250 : ereport(elevel,
982 : errmsg("repacking \"%s.%s\" in physical order",
983 : nspname,
984 : RelationGetRelationName(OldHeap)));
985 :
986 : /*
987 : * Hand off the actual copying to AM specific function, the generic code
988 : * cannot know how to deal with visibility across AMs. Note that this
989 : * routine is allowed to set FreezeXid / MultiXactCutoff to different
990 : * values (e.g. because the AM doesn't use freezing).
991 : */
992 389 : table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
993 : cutoffs.OldestXmin, &cutoffs.FreezeLimit,
994 : &cutoffs.MultiXactCutoff,
995 : &num_tuples, &tups_vacuumed,
996 : &tups_recently_dead);
997 :
998 : /* return selected values to caller, get set as relfrozenxid/minmxid */
999 389 : *pFreezeXid = cutoffs.FreezeLimit;
1000 389 : *pCutoffMulti = cutoffs.MultiXactCutoff;
1001 :
1002 : /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
1003 389 : NewHeap->rd_toastoid = InvalidOid;
1004 :
1005 389 : num_pages = RelationGetNumberOfBlocks(NewHeap);
1006 :
1007 : /* Log what we did */
1008 389 : ereport(elevel,
1009 : (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
1010 : nspname,
1011 : RelationGetRelationName(OldHeap),
1012 : tups_vacuumed, num_tuples,
1013 : RelationGetNumberOfBlocks(OldHeap)),
1014 : errdetail("%.0f dead row versions cannot be removed yet.\n"
1015 : "%s.",
1016 : tups_recently_dead,
1017 : pg_rusage_show(&ru0))));
1018 :
1019 : /* Update pg_class to reflect the correct values of pages and tuples. */
1020 389 : relRelation = table_open(RelationRelationId, RowExclusiveLock);
1021 :
1022 389 : reltup = SearchSysCacheCopy1(RELOID,
1023 : ObjectIdGetDatum(RelationGetRelid(NewHeap)));
1024 389 : if (!HeapTupleIsValid(reltup))
1025 0 : elog(ERROR, "cache lookup failed for relation %u",
1026 : RelationGetRelid(NewHeap));
1027 389 : relform = (Form_pg_class) GETSTRUCT(reltup);
1028 :
1029 389 : relform->relpages = num_pages;
1030 389 : relform->reltuples = num_tuples;
1031 :
1032 : /* Don't update the stats for pg_class. See swap_relation_files. */
1033 389 : if (RelationGetRelid(OldHeap) != RelationRelationId)
1034 366 : CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
1035 : else
1036 23 : CacheInvalidateRelcacheByTuple(reltup);
1037 :
1038 : /* Clean up. */
1039 389 : heap_freetuple(reltup);
1040 389 : table_close(relRelation, RowExclusiveLock);
1041 :
1042 : /* Make the update visible */
1043 389 : CommandCounterIncrement();
1044 389 : }
1045 :
1046 : /*
1047 : * Swap the physical files of two given relations.
1048 : *
1049 : * We swap the physical identity (reltablespace, relfilenumber) while keeping
1050 : * the same logical identities of the two relations. relpersistence is also
1051 : * swapped, which is critical since it determines where buffers live for each
1052 : * relation.
1053 : *
1054 : * We can swap associated TOAST data in either of two ways: recursively swap
1055 : * the physical content of the toast tables (and their indexes), or swap the
1056 : * TOAST links in the given relations' pg_class entries. The former is needed
1057 : * to manage rewrites of shared catalogs (where we cannot change the pg_class
1058 : * links) while the latter is the only way to handle cases in which a toast
1059 : * table is added or removed altogether.
1060 : *
1061 : * Additionally, the first relation is marked with relfrozenxid set to
1062 : * frozenXid. It seems a bit ugly to have this here, but the caller would
1063 : * have to do it anyway, so having it here saves a heap_update. Note: in
1064 : * the swap-toast-links case, we assume we don't need to change the toast
1065 : * table's relfrozenxid: the new version of the toast table should already
1066 : * have relfrozenxid set to RecentXmin, which is good enough.
1067 : *
1068 : * Lastly, if r2 and its toast table and toast index (if any) are mapped,
1069 : * their OIDs are emitted into mapped_tables[]. This is hacky but beats
1070 : * having to look the information up again later in finish_heap_swap.
1071 : */
1072 : static void
1073 1676 : swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
1074 : bool swap_toast_by_content,
1075 : bool is_internal,
1076 : TransactionId frozenXid,
1077 : MultiXactId cutoffMulti,
1078 : Oid *mapped_tables)
1079 : {
1080 : Relation relRelation;
1081 : HeapTuple reltup1,
1082 : reltup2;
1083 : Form_pg_class relform1,
1084 : relform2;
1085 : RelFileNumber relfilenumber1,
1086 : relfilenumber2;
1087 : RelFileNumber swaptemp;
1088 : char swptmpchr;
1089 : Oid relam1,
1090 : relam2;
1091 :
1092 : /* We need writable copies of both pg_class tuples. */
1093 1676 : relRelation = table_open(RelationRelationId, RowExclusiveLock);
1094 :
1095 1676 : reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
1096 1676 : if (!HeapTupleIsValid(reltup1))
1097 0 : elog(ERROR, "cache lookup failed for relation %u", r1);
1098 1676 : relform1 = (Form_pg_class) GETSTRUCT(reltup1);
1099 :
1100 1676 : reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
1101 1676 : if (!HeapTupleIsValid(reltup2))
1102 0 : elog(ERROR, "cache lookup failed for relation %u", r2);
1103 1676 : relform2 = (Form_pg_class) GETSTRUCT(reltup2);
1104 :
1105 1676 : relfilenumber1 = relform1->relfilenode;
1106 1676 : relfilenumber2 = relform2->relfilenode;
1107 1676 : relam1 = relform1->relam;
1108 1676 : relam2 = relform2->relam;
1109 :
1110 1676 : if (RelFileNumberIsValid(relfilenumber1) &&
1111 : RelFileNumberIsValid(relfilenumber2))
1112 : {
1113 : /*
1114 : * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
1115 : * relpersistence
1116 : */
1117 : Assert(!target_is_pg_class);
1118 :
1119 1587 : swaptemp = relform1->relfilenode;
1120 1587 : relform1->relfilenode = relform2->relfilenode;
1121 1587 : relform2->relfilenode = swaptemp;
1122 :
1123 1587 : swaptemp = relform1->reltablespace;
1124 1587 : relform1->reltablespace = relform2->reltablespace;
1125 1587 : relform2->reltablespace = swaptemp;
1126 :
1127 1587 : swaptemp = relform1->relam;
1128 1587 : relform1->relam = relform2->relam;
1129 1587 : relform2->relam = swaptemp;
1130 :
1131 1587 : swptmpchr = relform1->relpersistence;
1132 1587 : relform1->relpersistence = relform2->relpersistence;
1133 1587 : relform2->relpersistence = swptmpchr;
1134 :
1135 : /* Also swap toast links, if we're swapping by links */
1136 1587 : if (!swap_toast_by_content)
1137 : {
1138 1275 : swaptemp = relform1->reltoastrelid;
1139 1275 : relform1->reltoastrelid = relform2->reltoastrelid;
1140 1275 : relform2->reltoastrelid = swaptemp;
1141 : }
1142 : }
1143 : else
1144 : {
1145 : /*
1146 : * Mapped-relation case. Here we have to swap the relation mappings
1147 : * instead of modifying the pg_class columns. Both must be mapped.
1148 : */
1149 89 : if (RelFileNumberIsValid(relfilenumber1) ||
1150 : RelFileNumberIsValid(relfilenumber2))
1151 0 : elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
1152 : NameStr(relform1->relname));
1153 :
1154 : /*
1155 : * We can't change the tablespace nor persistence of a mapped rel, and
1156 : * we can't handle toast link swapping for one either, because we must
1157 : * not apply any critical changes to its pg_class row. These cases
1158 : * should be prevented by upstream permissions tests, so these checks
1159 : * are non-user-facing emergency backstop.
1160 : */
1161 89 : if (relform1->reltablespace != relform2->reltablespace)
1162 0 : elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
1163 : NameStr(relform1->relname));
1164 89 : if (relform1->relpersistence != relform2->relpersistence)
1165 0 : elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
1166 : NameStr(relform1->relname));
1167 89 : if (relform1->relam != relform2->relam)
1168 0 : elog(ERROR, "cannot change access method of mapped relation \"%s\"",
1169 : NameStr(relform1->relname));
1170 89 : if (!swap_toast_by_content &&
1171 29 : (relform1->reltoastrelid || relform2->reltoastrelid))
1172 0 : elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
1173 : NameStr(relform1->relname));
1174 :
1175 : /*
1176 : * Fetch the mappings --- shouldn't fail, but be paranoid
1177 : */
1178 89 : relfilenumber1 = RelationMapOidToFilenumber(r1, relform1->relisshared);
1179 89 : if (!RelFileNumberIsValid(relfilenumber1))
1180 0 : elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1181 : NameStr(relform1->relname), r1);
1182 89 : relfilenumber2 = RelationMapOidToFilenumber(r2, relform2->relisshared);
1183 89 : if (!RelFileNumberIsValid(relfilenumber2))
1184 0 : elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1185 : NameStr(relform2->relname), r2);
1186 :
1187 : /*
1188 : * Send replacement mappings to relmapper. Note these won't actually
1189 : * take effect until CommandCounterIncrement.
1190 : */
1191 89 : RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false);
1192 89 : RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false);
1193 :
1194 : /* Pass OIDs of mapped r2 tables back to caller */
1195 89 : *mapped_tables++ = r2;
1196 : }
1197 :
1198 : /*
1199 : * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
1200 : * subtransaction. The rel2 storage (swapped from rel1) may or may not be
1201 : * new.
1202 : */
1203 : {
1204 : Relation rel1,
1205 : rel2;
1206 :
1207 1676 : rel1 = relation_open(r1, NoLock);
1208 1676 : rel2 = relation_open(r2, NoLock);
1209 1676 : rel2->rd_createSubid = rel1->rd_createSubid;
1210 1676 : rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid;
1211 1676 : rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid;
1212 1676 : RelationAssumeNewRelfilelocator(rel1);
1213 1676 : relation_close(rel1, NoLock);
1214 1676 : relation_close(rel2, NoLock);
1215 : }
1216 :
1217 : /*
1218 : * In the case of a shared catalog, these next few steps will only affect
1219 : * our own database's pg_class row; but that's okay, because they are all
1220 : * noncritical updates. That's also an important fact for the case of a
1221 : * mapped catalog, because it's possible that we'll commit the map change
1222 : * and then fail to commit the pg_class update.
1223 : */
1224 :
1225 : /* set rel1's frozen Xid and minimum MultiXid */
1226 1676 : if (relform1->relkind != RELKIND_INDEX)
1227 : {
1228 : Assert(!TransactionIdIsValid(frozenXid) ||
1229 : TransactionIdIsNormal(frozenXid));
1230 1552 : relform1->relfrozenxid = frozenXid;
1231 1552 : relform1->relminmxid = cutoffMulti;
1232 : }
1233 :
1234 : /* swap size statistics too, since new rel has freshly-updated stats */
1235 : {
1236 : int32 swap_pages;
1237 : float4 swap_tuples;
1238 : int32 swap_allvisible;
1239 : int32 swap_allfrozen;
1240 :
1241 1676 : swap_pages = relform1->relpages;
1242 1676 : relform1->relpages = relform2->relpages;
1243 1676 : relform2->relpages = swap_pages;
1244 :
1245 1676 : swap_tuples = relform1->reltuples;
1246 1676 : relform1->reltuples = relform2->reltuples;
1247 1676 : relform2->reltuples = swap_tuples;
1248 :
1249 1676 : swap_allvisible = relform1->relallvisible;
1250 1676 : relform1->relallvisible = relform2->relallvisible;
1251 1676 : relform2->relallvisible = swap_allvisible;
1252 :
1253 1676 : swap_allfrozen = relform1->relallfrozen;
1254 1676 : relform1->relallfrozen = relform2->relallfrozen;
1255 1676 : relform2->relallfrozen = swap_allfrozen;
1256 : }
1257 :
1258 : /*
1259 : * Update the tuples in pg_class --- unless the target relation of the
1260 : * swap is pg_class itself. In that case, there is zero point in making
1261 : * changes because we'd be updating the old data that we're about to throw
1262 : * away. Because the real work being done here for a mapped relation is
1263 : * just to change the relation map settings, it's all right to not update
1264 : * the pg_class rows in this case. The most important changes will instead
1265 : * performed later, in finish_heap_swap() itself.
1266 : */
1267 1676 : if (!target_is_pg_class)
1268 : {
1269 : CatalogIndexState indstate;
1270 :
1271 1653 : indstate = CatalogOpenIndexes(relRelation);
1272 1653 : CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
1273 : indstate);
1274 1653 : CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
1275 : indstate);
1276 1653 : CatalogCloseIndexes(indstate);
1277 : }
1278 : else
1279 : {
1280 : /* no update ... but we do still need relcache inval */
1281 23 : CacheInvalidateRelcacheByTuple(reltup1);
1282 23 : CacheInvalidateRelcacheByTuple(reltup2);
1283 : }
1284 :
1285 : /*
1286 : * Now that pg_class has been updated with its relevant information for
1287 : * the swap, update the dependency of the relations to point to their new
1288 : * table AM, if it has changed.
1289 : */
1290 1676 : if (relam1 != relam2)
1291 : {
1292 24 : if (changeDependencyFor(RelationRelationId,
1293 : r1,
1294 : AccessMethodRelationId,
1295 : relam1,
1296 : relam2) != 1)
1297 0 : elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
1298 : get_namespace_name(get_rel_namespace(r1)),
1299 : get_rel_name(r1));
1300 24 : if (changeDependencyFor(RelationRelationId,
1301 : r2,
1302 : AccessMethodRelationId,
1303 : relam2,
1304 : relam1) != 1)
1305 0 : elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
1306 : get_namespace_name(get_rel_namespace(r2)),
1307 : get_rel_name(r2));
1308 : }
1309 :
1310 : /*
1311 : * Post alter hook for modified relations. The change to r2 is always
1312 : * internal, but r1 depends on the invocation context.
1313 : */
1314 1676 : InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
1315 : InvalidOid, is_internal);
1316 1676 : InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
1317 : InvalidOid, true);
1318 :
1319 : /*
1320 : * If we have toast tables associated with the relations being swapped,
1321 : * deal with them too.
1322 : */
1323 1676 : if (relform1->reltoastrelid || relform2->reltoastrelid)
1324 : {
1325 523 : if (swap_toast_by_content)
1326 : {
1327 124 : if (relform1->reltoastrelid && relform2->reltoastrelid)
1328 : {
1329 : /* Recursively swap the contents of the toast tables */
1330 124 : swap_relation_files(relform1->reltoastrelid,
1331 : relform2->reltoastrelid,
1332 : target_is_pg_class,
1333 : swap_toast_by_content,
1334 : is_internal,
1335 : frozenXid,
1336 : cutoffMulti,
1337 : mapped_tables);
1338 : }
1339 : else
1340 : {
1341 : /* caller messed up */
1342 0 : elog(ERROR, "cannot swap toast files by content when there's only one");
1343 : }
1344 : }
1345 : else
1346 : {
1347 : /*
1348 : * We swapped the ownership links, so we need to change dependency
1349 : * data to match.
1350 : *
1351 : * NOTE: it is possible that only one table has a toast table.
1352 : *
1353 : * NOTE: at present, a TOAST table's only dependency is the one on
1354 : * its owning table. If more are ever created, we'd need to use
1355 : * something more selective than deleteDependencyRecordsFor() to
1356 : * get rid of just the link we want.
1357 : */
1358 : ObjectAddress baseobject,
1359 : toastobject;
1360 : long count;
1361 :
1362 : /*
1363 : * We disallow this case for system catalogs, to avoid the
1364 : * possibility that the catalog we're rebuilding is one of the
1365 : * ones the dependency changes would change. It's too late to be
1366 : * making any data changes to the target catalog.
1367 : */
1368 399 : if (IsSystemClass(r1, relform1))
1369 0 : elog(ERROR, "cannot swap toast files by links for system catalogs");
1370 :
1371 : /* Delete old dependencies */
1372 399 : if (relform1->reltoastrelid)
1373 : {
1374 378 : count = deleteDependencyRecordsFor(RelationRelationId,
1375 : relform1->reltoastrelid,
1376 : false);
1377 378 : if (count != 1)
1378 0 : elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1379 : count);
1380 : }
1381 399 : if (relform2->reltoastrelid)
1382 : {
1383 399 : count = deleteDependencyRecordsFor(RelationRelationId,
1384 : relform2->reltoastrelid,
1385 : false);
1386 399 : if (count != 1)
1387 0 : elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1388 : count);
1389 : }
1390 :
1391 : /* Register new dependencies */
1392 399 : baseobject.classId = RelationRelationId;
1393 399 : baseobject.objectSubId = 0;
1394 399 : toastobject.classId = RelationRelationId;
1395 399 : toastobject.objectSubId = 0;
1396 :
1397 399 : if (relform1->reltoastrelid)
1398 : {
1399 378 : baseobject.objectId = r1;
1400 378 : toastobject.objectId = relform1->reltoastrelid;
1401 378 : recordDependencyOn(&toastobject, &baseobject,
1402 : DEPENDENCY_INTERNAL);
1403 : }
1404 :
1405 399 : if (relform2->reltoastrelid)
1406 : {
1407 399 : baseobject.objectId = r2;
1408 399 : toastobject.objectId = relform2->reltoastrelid;
1409 399 : recordDependencyOn(&toastobject, &baseobject,
1410 : DEPENDENCY_INTERNAL);
1411 : }
1412 : }
1413 : }
1414 :
1415 : /*
1416 : * If we're swapping two toast tables by content, do the same for their
1417 : * valid index. The swap can actually be safely done only if the relations
1418 : * have indexes.
1419 : */
1420 1676 : if (swap_toast_by_content &&
1421 372 : relform1->relkind == RELKIND_TOASTVALUE &&
1422 124 : relform2->relkind == RELKIND_TOASTVALUE)
1423 : {
1424 : Oid toastIndex1,
1425 : toastIndex2;
1426 :
1427 : /* Get valid index for each relation */
1428 124 : toastIndex1 = toast_get_valid_index(r1,
1429 : AccessExclusiveLock);
1430 124 : toastIndex2 = toast_get_valid_index(r2,
1431 : AccessExclusiveLock);
1432 :
1433 124 : swap_relation_files(toastIndex1,
1434 : toastIndex2,
1435 : target_is_pg_class,
1436 : swap_toast_by_content,
1437 : is_internal,
1438 : InvalidTransactionId,
1439 : InvalidMultiXactId,
1440 : mapped_tables);
1441 : }
1442 :
1443 : /* Clean up. */
1444 1676 : heap_freetuple(reltup1);
1445 1676 : heap_freetuple(reltup2);
1446 :
1447 1676 : table_close(relRelation, RowExclusiveLock);
1448 1676 : }
1449 :
1450 : /*
1451 : * Remove the transient table that was built by make_new_heap, and finish
1452 : * cleaning up (including rebuilding all indexes on the old heap).
1453 : */
1454 : void
1455 1428 : finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
1456 : bool is_system_catalog,
1457 : bool swap_toast_by_content,
1458 : bool check_constraints,
1459 : bool is_internal,
1460 : TransactionId frozenXid,
1461 : MultiXactId cutoffMulti,
1462 : char newrelpersistence)
1463 : {
1464 : ObjectAddress object;
1465 : Oid mapped_tables[4];
1466 : int reindex_flags;
1467 1428 : ReindexParams reindex_params = {0};
1468 : int i;
1469 :
1470 : /* Report that we are now swapping relation files */
1471 1428 : pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
1472 : PROGRESS_REPACK_PHASE_SWAP_REL_FILES);
1473 :
1474 : /* Zero out possible results from swapped_relation_files */
1475 1428 : memset(mapped_tables, 0, sizeof(mapped_tables));
1476 :
1477 : /*
1478 : * Swap the contents of the heap relations (including any toast tables).
1479 : * Also set old heap's relfrozenxid to frozenXid.
1480 : */
1481 1428 : swap_relation_files(OIDOldHeap, OIDNewHeap,
1482 : (OIDOldHeap == RelationRelationId),
1483 : swap_toast_by_content, is_internal,
1484 : frozenXid, cutoffMulti, mapped_tables);
1485 :
1486 : /*
1487 : * If it's a system catalog, queue a sinval message to flush all catcaches
1488 : * on the catalog when we reach CommandCounterIncrement.
1489 : */
1490 1428 : if (is_system_catalog)
1491 121 : CacheInvalidateCatalog(OIDOldHeap);
1492 :
1493 : /*
1494 : * Rebuild each index on the relation (but not the toast table, which is
1495 : * all-new at this point). It is important to do this before the DROP
1496 : * step because if we are processing a system catalog that will be used
1497 : * during DROP, we want to have its indexes available. There is no
1498 : * advantage to the other order anyway because this is all transactional,
1499 : * so no chance to reclaim disk space before commit. We do not need a
1500 : * final CommandCounterIncrement() because reindex_relation does it.
1501 : *
1502 : * Note: because index_build is called via reindex_relation, it will never
1503 : * set indcheckxmin true for the indexes. This is OK even though in some
1504 : * sense we are building new indexes rather than rebuilding existing ones,
1505 : * because the new heap won't contain any HOT chains at all, let alone
1506 : * broken ones, so it can't be necessary to set indcheckxmin.
1507 : */
1508 1428 : reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
1509 1428 : if (check_constraints)
1510 1039 : reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
1511 :
1512 : /*
1513 : * Ensure that the indexes have the same persistence as the parent
1514 : * relation.
1515 : */
1516 1428 : if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
1517 25 : reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
1518 1403 : else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
1519 1350 : reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
1520 :
1521 : /* Report that we are now reindexing relations */
1522 1428 : pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
1523 : PROGRESS_REPACK_PHASE_REBUILD_INDEX);
1524 :
1525 1428 : reindex_relation(NULL, OIDOldHeap, reindex_flags, &reindex_params);
1526 :
1527 : /* Report that we are now doing clean up */
1528 1416 : pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
1529 : PROGRESS_REPACK_PHASE_FINAL_CLEANUP);
1530 :
1531 : /*
1532 : * If the relation being rebuilt is pg_class, swap_relation_files()
1533 : * couldn't update pg_class's own pg_class entry (check comments in
1534 : * swap_relation_files()), thus relfrozenxid was not updated. That's
1535 : * annoying because a potential reason for doing a VACUUM FULL is a
1536 : * imminent or actual anti-wraparound shutdown. So, now that we can
1537 : * access the new relation using its indices, update relfrozenxid.
1538 : * pg_class doesn't have a toast relation, so we don't need to update the
1539 : * corresponding toast relation. Not that there's little point moving all
1540 : * relfrozenxid updates here since swap_relation_files() needs to write to
1541 : * pg_class for non-mapped relations anyway.
1542 : */
1543 1416 : if (OIDOldHeap == RelationRelationId)
1544 : {
1545 : Relation relRelation;
1546 : HeapTuple reltup;
1547 : Form_pg_class relform;
1548 :
1549 23 : relRelation = table_open(RelationRelationId, RowExclusiveLock);
1550 :
1551 23 : reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
1552 23 : if (!HeapTupleIsValid(reltup))
1553 0 : elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
1554 23 : relform = (Form_pg_class) GETSTRUCT(reltup);
1555 :
1556 23 : relform->relfrozenxid = frozenXid;
1557 23 : relform->relminmxid = cutoffMulti;
1558 :
1559 23 : CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
1560 :
1561 23 : table_close(relRelation, RowExclusiveLock);
1562 : }
1563 :
1564 : /* Destroy new heap with old filenumber */
1565 1416 : object.classId = RelationRelationId;
1566 1416 : object.objectId = OIDNewHeap;
1567 1416 : object.objectSubId = 0;
1568 :
1569 : /*
1570 : * The new relation is local to our transaction and we know nothing
1571 : * depends on it, so DROP_RESTRICT should be OK.
1572 : */
1573 1416 : performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
1574 :
1575 : /* performDeletion does CommandCounterIncrement at end */
1576 :
1577 : /*
1578 : * Now we must remove any relation mapping entries that we set up for the
1579 : * transient table, as well as its toast table and toast index if any. If
1580 : * we fail to do this before commit, the relmapper will complain about new
1581 : * permanent map entries being added post-bootstrap.
1582 : */
1583 1505 : for (i = 0; OidIsValid(mapped_tables[i]); i++)
1584 89 : RelationMapRemoveMapping(mapped_tables[i]);
1585 :
1586 : /*
1587 : * At this point, everything is kosher except that, if we did toast swap
1588 : * by links, the toast table's name corresponds to the transient table.
1589 : * The name is irrelevant to the backend because it's referenced by OID,
1590 : * but users looking at the catalogs could be confused. Rename it to
1591 : * prevent this problem.
1592 : *
1593 : * Note no lock required on the relation, because we already hold an
1594 : * exclusive lock on it.
1595 : */
1596 1416 : if (!swap_toast_by_content)
1597 : {
1598 : Relation newrel;
1599 :
1600 1292 : newrel = table_open(OIDOldHeap, NoLock);
1601 1292 : if (OidIsValid(newrel->rd_rel->reltoastrelid))
1602 : {
1603 : Oid toastidx;
1604 : char NewToastName[NAMEDATALEN];
1605 :
1606 : /* Get the associated valid index to be renamed */
1607 378 : toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
1608 : NoLock);
1609 :
1610 : /* rename the toast table ... */
1611 378 : snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
1612 : OIDOldHeap);
1613 378 : RenameRelationInternal(newrel->rd_rel->reltoastrelid,
1614 : NewToastName, true, false);
1615 :
1616 : /* ... and its valid index too. */
1617 378 : snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
1618 : OIDOldHeap);
1619 :
1620 378 : RenameRelationInternal(toastidx,
1621 : NewToastName, true, true);
1622 :
1623 : /*
1624 : * Reset the relrewrite for the toast. The command-counter
1625 : * increment is required here as we are about to update the tuple
1626 : * that is updated as part of RenameRelationInternal.
1627 : */
1628 378 : CommandCounterIncrement();
1629 378 : ResetRelRewrite(newrel->rd_rel->reltoastrelid);
1630 : }
1631 1292 : relation_close(newrel, NoLock);
1632 : }
1633 :
1634 : /* if it's not a catalog table, clear any missing attribute settings */
1635 1416 : if (!is_system_catalog)
1636 : {
1637 : Relation newrel;
1638 :
1639 1295 : newrel = table_open(OIDOldHeap, NoLock);
1640 1295 : RelationClearMissing(newrel);
1641 1295 : relation_close(newrel, NoLock);
1642 : }
1643 1416 : }
1644 :
1645 : /*
1646 : * Determine which relations to process, when REPACK/CLUSTER is called
1647 : * without specifying a table name. The exact process depends on whether
1648 : * USING INDEX was given or not, and in any case we only return tables and
1649 : * materialized views that the current user has privileges to repack/cluster.
1650 : *
1651 : * If USING INDEX was given, we scan pg_index to find those that have
1652 : * indisclustered set; if it was not given, scan pg_class and return all
1653 : * tables.
1654 : *
1655 : * Return it as a list of RelToCluster in the given memory context.
1656 : */
1657 : static List *
1658 17 : get_tables_to_repack(RepackCommand cmd, bool usingindex, MemoryContext permcxt)
1659 : {
1660 : Relation catalog;
1661 : TableScanDesc scan;
1662 : HeapTuple tuple;
1663 17 : List *rtcs = NIL;
1664 :
1665 17 : if (usingindex)
1666 : {
1667 : ScanKeyData entry;
1668 :
1669 : /*
1670 : * For USING INDEX, scan pg_index to find those with indisclustered.
1671 : */
1672 13 : catalog = table_open(IndexRelationId, AccessShareLock);
1673 13 : ScanKeyInit(&entry,
1674 : Anum_pg_index_indisclustered,
1675 : BTEqualStrategyNumber, F_BOOLEQ,
1676 : BoolGetDatum(true));
1677 13 : scan = table_beginscan_catalog(catalog, 1, &entry);
1678 25 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1679 : {
1680 : RelToCluster *rtc;
1681 : Form_pg_index index;
1682 : MemoryContext oldcxt;
1683 :
1684 12 : index = (Form_pg_index) GETSTRUCT(tuple);
1685 :
1686 : /*
1687 : * Try to obtain a light lock on the index's table, to ensure it
1688 : * doesn't go away while we collect the list. If we cannot, just
1689 : * disregard it. Be sure to release this if we ultimately decide
1690 : * not to process the table!
1691 : */
1692 12 : if (!ConditionalLockRelationOid(index->indrelid, AccessShareLock))
1693 0 : continue;
1694 :
1695 : /* Verify that the table still exists; skip if not */
1696 12 : if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(index->indrelid)))
1697 : {
1698 0 : UnlockRelationOid(index->indrelid, AccessShareLock);
1699 0 : continue;
1700 : }
1701 :
1702 : /* noisily skip rels which the user can't process */
1703 12 : if (!repack_is_permitted_for_relation(cmd, index->indrelid,
1704 : GetUserId()))
1705 : {
1706 8 : UnlockRelationOid(index->indrelid, AccessShareLock);
1707 8 : continue;
1708 : }
1709 :
1710 : /* Use a permanent memory context for the result list */
1711 4 : oldcxt = MemoryContextSwitchTo(permcxt);
1712 4 : rtc = palloc_object(RelToCluster);
1713 4 : rtc->tableOid = index->indrelid;
1714 4 : rtc->indexOid = index->indexrelid;
1715 4 : rtcs = lappend(rtcs, rtc);
1716 4 : MemoryContextSwitchTo(oldcxt);
1717 : }
1718 : }
1719 : else
1720 : {
1721 4 : catalog = table_open(RelationRelationId, AccessShareLock);
1722 4 : scan = table_beginscan_catalog(catalog, 0, NULL);
1723 :
1724 6306 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1725 : {
1726 : RelToCluster *rtc;
1727 : Form_pg_class class;
1728 : MemoryContext oldcxt;
1729 :
1730 6302 : class = (Form_pg_class) GETSTRUCT(tuple);
1731 :
1732 : /*
1733 : * Try to obtain a light lock on the table, to ensure it doesn't
1734 : * go away while we collect the list. If we cannot, just
1735 : * disregard the table. Be sure to release this if we ultimately
1736 : * decide not to process the table!
1737 : */
1738 6302 : if (!ConditionalLockRelationOid(class->oid, AccessShareLock))
1739 0 : continue;
1740 :
1741 : /* Verify that the table still exists */
1742 6302 : if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(class->oid)))
1743 : {
1744 6 : UnlockRelationOid(class->oid, AccessShareLock);
1745 6 : continue;
1746 : }
1747 :
1748 : /* Can only process plain tables and matviews */
1749 6296 : if (class->relkind != RELKIND_RELATION &&
1750 4208 : class->relkind != RELKIND_MATVIEW)
1751 : {
1752 4180 : UnlockRelationOid(class->oid, AccessShareLock);
1753 4180 : continue;
1754 : }
1755 :
1756 : /* noisily skip rels which the user can't process */
1757 2116 : if (!repack_is_permitted_for_relation(cmd, class->oid,
1758 : GetUserId()))
1759 : {
1760 2108 : UnlockRelationOid(class->oid, AccessShareLock);
1761 2108 : continue;
1762 : }
1763 :
1764 : /* Use a permanent memory context for the result list */
1765 8 : oldcxt = MemoryContextSwitchTo(permcxt);
1766 8 : rtc = palloc_object(RelToCluster);
1767 8 : rtc->tableOid = class->oid;
1768 8 : rtc->indexOid = InvalidOid;
1769 8 : rtcs = lappend(rtcs, rtc);
1770 8 : MemoryContextSwitchTo(oldcxt);
1771 : }
1772 : }
1773 :
1774 17 : table_endscan(scan);
1775 17 : relation_close(catalog, AccessShareLock);
1776 :
1777 17 : return rtcs;
1778 : }
1779 :
1780 : /*
1781 : * Given a partitioned table or its index, return a list of RelToCluster for
1782 : * all the leaf child tables/indexes.
1783 : *
1784 : * 'rel_is_index' tells whether 'relid' is that of an index (true) or of the
1785 : * owning relation.
1786 : */
1787 : static List *
1788 20 : get_tables_to_repack_partitioned(RepackCommand cmd, Oid relid,
1789 : bool rel_is_index, MemoryContext permcxt)
1790 : {
1791 : List *inhoids;
1792 20 : List *rtcs = NIL;
1793 :
1794 : /*
1795 : * Do not lock the children until they're processed. Note that we do hold
1796 : * a lock on the parent partitioned table.
1797 : */
1798 20 : inhoids = find_all_inheritors(relid, NoLock, NULL);
1799 148 : foreach_oid(child_oid, inhoids)
1800 : {
1801 : Oid table_oid,
1802 : index_oid;
1803 : RelToCluster *rtc;
1804 : MemoryContext oldcxt;
1805 :
1806 108 : if (rel_is_index)
1807 : {
1808 : /* consider only leaf indexes */
1809 80 : if (get_rel_relkind(child_oid) != RELKIND_INDEX)
1810 40 : continue;
1811 :
1812 40 : table_oid = IndexGetRelation(child_oid, false);
1813 40 : index_oid = child_oid;
1814 : }
1815 : else
1816 : {
1817 : /* consider only leaf relations */
1818 28 : if (get_rel_relkind(child_oid) != RELKIND_RELATION)
1819 16 : continue;
1820 :
1821 12 : table_oid = child_oid;
1822 12 : index_oid = InvalidOid;
1823 : }
1824 :
1825 : /*
1826 : * It's possible that the user does not have privileges to CLUSTER the
1827 : * leaf partition despite having them on the partitioned table. Skip
1828 : * if so.
1829 : */
1830 52 : if (!repack_is_permitted_for_relation(cmd, table_oid, GetUserId()))
1831 12 : continue;
1832 :
1833 : /* Use a permanent memory context for the result list */
1834 40 : oldcxt = MemoryContextSwitchTo(permcxt);
1835 40 : rtc = palloc_object(RelToCluster);
1836 40 : rtc->tableOid = table_oid;
1837 40 : rtc->indexOid = index_oid;
1838 40 : rtcs = lappend(rtcs, rtc);
1839 40 : MemoryContextSwitchTo(oldcxt);
1840 : }
1841 :
1842 20 : return rtcs;
1843 : }
1844 :
1845 :
1846 : /*
1847 : * Return whether userid has privileges to REPACK relid. If not, this
1848 : * function emits a WARNING.
1849 : */
1850 : static bool
1851 2232 : repack_is_permitted_for_relation(RepackCommand cmd, Oid relid, Oid userid)
1852 : {
1853 : Assert(cmd == REPACK_COMMAND_CLUSTER || cmd == REPACK_COMMAND_REPACK);
1854 :
1855 2232 : if (pg_class_aclcheck(relid, userid, ACL_MAINTAIN) == ACLCHECK_OK)
1856 104 : return true;
1857 :
1858 2128 : ereport(WARNING,
1859 : errmsg("permission denied to execute %s on \"%s\", skipping it",
1860 : RepackCommandAsString(cmd),
1861 : get_rel_name(relid)));
1862 :
1863 2128 : return false;
1864 : }
1865 :
1866 :
1867 : /*
1868 : * Given a RepackStmt with an indicated relation name, resolve the relation
1869 : * name, obtain lock on it, then determine what to do based on the relation
1870 : * type: if it's table and not partitioned, repack it as indicated (using an
1871 : * existing clustered index, or following the given one), and return NULL.
1872 : *
1873 : * On the other hand, if the table is partitioned, do nothing further and
1874 : * instead return the opened and locked relcache entry, so that caller can
1875 : * process the partitions using the multiple-table handling code. In this
1876 : * case, if an index name is given, it's up to the caller to resolve it.
1877 : */
1878 : static Relation
1879 163 : process_single_relation(RepackStmt *stmt, ClusterParams *params)
1880 : {
1881 : Relation rel;
1882 : Oid tableOid;
1883 :
1884 : Assert(stmt->relation != NULL);
1885 : Assert(stmt->command == REPACK_COMMAND_CLUSTER ||
1886 : stmt->command == REPACK_COMMAND_REPACK);
1887 :
1888 : /*
1889 : * Make sure ANALYZE is specified if a column list is present.
1890 : */
1891 163 : if ((params->options & CLUOPT_ANALYZE) == 0 && stmt->relation->va_cols != NIL)
1892 4 : ereport(ERROR,
1893 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1894 : errmsg("ANALYZE option must be specified when a column list is provided"));
1895 :
1896 : /*
1897 : * Find, lock, and check permissions on the table. We obtain
1898 : * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
1899 : * single-transaction case.
1900 : */
1901 159 : tableOid = RangeVarGetRelidExtended(stmt->relation->relation,
1902 : AccessExclusiveLock,
1903 : 0,
1904 : RangeVarCallbackMaintainsTable,
1905 : NULL);
1906 151 : rel = table_open(tableOid, NoLock);
1907 :
1908 : /*
1909 : * Reject clustering a remote temp table ... their local buffer manager is
1910 : * not going to cope.
1911 : */
1912 151 : if (RELATION_IS_OTHER_TEMP(rel))
1913 0 : ereport(ERROR,
1914 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1915 : /*- translator: first %s is name of a SQL command, eg. REPACK */
1916 : errmsg("cannot execute %s on temporary tables of other sessions",
1917 : RepackCommandAsString(stmt->command)));
1918 :
1919 : /*
1920 : * For partitioned tables, let caller handle this. Otherwise, process it
1921 : * here and we're done.
1922 : */
1923 151 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1924 32 : return rel;
1925 : else
1926 : {
1927 : Oid indexOid;
1928 :
1929 119 : indexOid = determine_clustered_index(rel, stmt->usingindex,
1930 119 : stmt->indexname);
1931 115 : if (OidIsValid(indexOid))
1932 107 : check_index_is_clusterable(rel, indexOid, AccessExclusiveLock);
1933 115 : cluster_rel(stmt->command, rel, indexOid, params);
1934 :
1935 : /*
1936 : * Do an analyze, if requested. We close the transaction and start a
1937 : * new one, so that we don't hold the stronger lock for longer than
1938 : * needed.
1939 : */
1940 115 : if (params->options & CLUOPT_ANALYZE)
1941 : {
1942 8 : VacuumParams vac_params = {0};
1943 :
1944 8 : PopActiveSnapshot();
1945 8 : CommitTransactionCommand();
1946 :
1947 8 : StartTransactionCommand();
1948 8 : PushActiveSnapshot(GetTransactionSnapshot());
1949 :
1950 8 : vac_params.options |= VACOPT_ANALYZE;
1951 8 : if (params->options & CLUOPT_VERBOSE)
1952 0 : vac_params.options |= VACOPT_VERBOSE;
1953 8 : analyze_rel(tableOid, NULL, &vac_params,
1954 8 : stmt->relation->va_cols, true, NULL);
1955 8 : PopActiveSnapshot();
1956 8 : CommandCounterIncrement();
1957 : }
1958 :
1959 115 : return NULL;
1960 : }
1961 : }
1962 :
1963 : /*
1964 : * Given a relation and the usingindex/indexname options in a
1965 : * REPACK USING INDEX or CLUSTER command, return the OID of the
1966 : * index to use for clustering the table.
1967 : *
1968 : * Caller must hold lock on the relation so that the set of indexes
1969 : * doesn't change, and must call check_index_is_clusterable.
1970 : */
1971 : static Oid
1972 139 : determine_clustered_index(Relation rel, bool usingindex, const char *indexname)
1973 : {
1974 : Oid indexOid;
1975 :
1976 139 : if (indexname == NULL && usingindex)
1977 : {
1978 : /*
1979 : * If USING INDEX with no name is given, find a clustered index, or
1980 : * error out if none.
1981 : */
1982 19 : indexOid = InvalidOid;
1983 42 : foreach_oid(idxoid, RelationGetIndexList(rel))
1984 : {
1985 19 : if (get_index_isclustered(idxoid))
1986 : {
1987 15 : indexOid = idxoid;
1988 15 : break;
1989 : }
1990 : }
1991 :
1992 19 : if (!OidIsValid(indexOid))
1993 4 : ereport(ERROR,
1994 : errcode(ERRCODE_UNDEFINED_OBJECT),
1995 : errmsg("there is no previously clustered index for table \"%s\"",
1996 : RelationGetRelationName(rel)));
1997 : }
1998 120 : else if (indexname != NULL)
1999 : {
2000 : /* An index was specified; obtain its OID. */
2001 112 : indexOid = get_relname_relid(indexname, rel->rd_rel->relnamespace);
2002 112 : if (!OidIsValid(indexOid))
2003 0 : ereport(ERROR,
2004 : errcode(ERRCODE_UNDEFINED_OBJECT),
2005 : errmsg("index \"%s\" for table \"%s\" does not exist",
2006 : indexname, RelationGetRelationName(rel)));
2007 : }
2008 : else
2009 8 : indexOid = InvalidOid;
2010 :
2011 135 : return indexOid;
2012 : }
2013 :
2014 : static const char *
2015 2570 : RepackCommandAsString(RepackCommand cmd)
2016 : {
2017 2570 : switch (cmd)
2018 : {
2019 2172 : case REPACK_COMMAND_REPACK:
2020 2172 : return "REPACK";
2021 222 : case REPACK_COMMAND_VACUUMFULL:
2022 222 : return "VACUUM";
2023 176 : case REPACK_COMMAND_CLUSTER:
2024 176 : return "CLUSTER";
2025 : }
2026 0 : return "???"; /* keep compiler quiet */
2027 : }
|