LCOV - code coverage report
Current view: top level - src/backend/commands - cluster.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 90.6 % 457 414
Test Date: 2026-03-02 10:14:48 Functions: 100.0 % 13 13
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * cluster.c
       4              :  *    CLUSTER a table on an index.  This is now also used for VACUUM FULL.
       5              :  *
       6              :  * There is hardly anything left of Paul Brown's original implementation...
       7              :  *
       8              :  *
       9              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      10              :  * Portions Copyright (c) 1994-5, Regents of the University of California
      11              :  *
      12              :  *
      13              :  * IDENTIFICATION
      14              :  *    src/backend/commands/cluster.c
      15              :  *
      16              :  *-------------------------------------------------------------------------
      17              :  */
      18              : #include "postgres.h"
      19              : 
      20              : #include "access/amapi.h"
      21              : #include "access/heapam.h"
      22              : #include "access/multixact.h"
      23              : #include "access/relscan.h"
      24              : #include "access/tableam.h"
      25              : #include "access/toast_internals.h"
      26              : #include "access/transam.h"
      27              : #include "access/xact.h"
      28              : #include "catalog/catalog.h"
      29              : #include "catalog/dependency.h"
      30              : #include "catalog/heap.h"
      31              : #include "catalog/index.h"
      32              : #include "catalog/namespace.h"
      33              : #include "catalog/objectaccess.h"
      34              : #include "catalog/pg_am.h"
      35              : #include "catalog/pg_inherits.h"
      36              : #include "catalog/toasting.h"
      37              : #include "commands/cluster.h"
      38              : #include "commands/defrem.h"
      39              : #include "commands/progress.h"
      40              : #include "commands/tablecmds.h"
      41              : #include "commands/vacuum.h"
      42              : #include "miscadmin.h"
      43              : #include "optimizer/optimizer.h"
      44              : #include "pgstat.h"
      45              : #include "storage/bufmgr.h"
      46              : #include "storage/lmgr.h"
      47              : #include "storage/predicate.h"
      48              : #include "utils/acl.h"
      49              : #include "utils/fmgroids.h"
      50              : #include "utils/guc.h"
      51              : #include "utils/inval.h"
      52              : #include "utils/lsyscache.h"
      53              : #include "utils/memutils.h"
      54              : #include "utils/pg_rusage.h"
      55              : #include "utils/relmapper.h"
      56              : #include "utils/snapmgr.h"
      57              : #include "utils/syscache.h"
      58              : 
      59              : /*
      60              :  * This struct is used to pass around the information on tables to be
      61              :  * clustered. We need this so we can make a list of them when invoked without
      62              :  * a specific table/index pair.
      63              :  */
      64              : typedef struct
      65              : {
      66              :     Oid         tableOid;
      67              :     Oid         indexOid;
      68              : } RelToCluster;
      69              : 
      70              : 
      71              : static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
      72              : static void rebuild_relation(Relation OldHeap, Relation index, bool verbose);
      73              : static void copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex,
      74              :                             bool verbose, bool *pSwapToastByContent,
      75              :                             TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
      76              : static List *get_tables_to_cluster(MemoryContext cluster_context);
      77              : static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
      78              :                                                Oid indexOid);
      79              : static bool cluster_is_permitted_for_relation(Oid relid, Oid userid);
      80              : 
      81              : 
      82              : /*---------------------------------------------------------------------------
      83              :  * This cluster code allows for clustering multiple tables at once. Because
      84              :  * of this, we cannot just run everything on a single transaction, or we
      85              :  * would be forced to acquire exclusive locks on all the tables being
      86              :  * clustered, simultaneously --- very likely leading to deadlock.
      87              :  *
      88              :  * To solve this we follow a similar strategy to VACUUM code,
      89              :  * clustering each relation in a separate transaction. For this to work,
      90              :  * we need to:
      91              :  *  - provide a separate memory context so that we can pass information in
      92              :  *    a way that survives across transactions
      93              :  *  - start a new transaction every time a new relation is clustered
      94              :  *  - check for validity of the information on to-be-clustered relations,
      95              :  *    as someone might have deleted a relation behind our back, or
      96              :  *    clustered one on a different index
      97              :  *  - end the transaction
      98              :  *
      99              :  * The single-relation case does not have any such overhead.
     100              :  *
     101              :  * We also allow a relation to be specified without index.  In that case,
     102              :  * the indisclustered bit will be looked up, and an ERROR will be thrown
     103              :  * if there is no index with the bit set.
     104              :  *---------------------------------------------------------------------------
     105              :  */
     106              : void
     107          116 : cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
     108              : {
     109              :     ListCell   *lc;
     110          116 :     ClusterParams params = {0};
     111          116 :     bool        verbose = false;
     112          116 :     Relation    rel = NULL;
     113          116 :     Oid         indexOid = InvalidOid;
     114              :     MemoryContext cluster_context;
     115              :     List       *rtcs;
     116              : 
     117              :     /* Parse option list */
     118          118 :     foreach(lc, stmt->params)
     119              :     {
     120            2 :         DefElem    *opt = (DefElem *) lfirst(lc);
     121              : 
     122            2 :         if (strcmp(opt->defname, "verbose") == 0)
     123            2 :             verbose = defGetBoolean(opt);
     124              :         else
     125            0 :             ereport(ERROR,
     126              :                     (errcode(ERRCODE_SYNTAX_ERROR),
     127              :                      errmsg("unrecognized %s option \"%s\"",
     128              :                             "CLUSTER", opt->defname),
     129              :                      parser_errposition(pstate, opt->location)));
     130              :     }
     131              : 
     132          116 :     params.options = (verbose ? CLUOPT_VERBOSE : 0);
     133              : 
     134          116 :     if (stmt->relation != NULL)
     135              :     {
     136              :         /* This is the single-relation case. */
     137              :         Oid         tableOid;
     138              : 
     139              :         /*
     140              :          * Find, lock, and check permissions on the table.  We obtain
     141              :          * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
     142              :          * single-transaction case.
     143              :          */
     144          106 :         tableOid = RangeVarGetRelidExtended(stmt->relation,
     145              :                                             AccessExclusiveLock,
     146              :                                             0,
     147              :                                             RangeVarCallbackMaintainsTable,
     148              :                                             NULL);
     149          100 :         rel = table_open(tableOid, NoLock);
     150              : 
     151              :         /*
     152              :          * Reject clustering a remote temp table ... their local buffer
     153              :          * manager is not going to cope.
     154              :          */
     155          100 :         if (RELATION_IS_OTHER_TEMP(rel))
     156            0 :             ereport(ERROR,
     157              :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     158              :                      errmsg("cannot cluster temporary tables of other sessions")));
     159              : 
     160          100 :         if (stmt->indexname == NULL)
     161              :         {
     162              :             ListCell   *index;
     163              : 
     164              :             /* We need to find the index that has indisclustered set. */
     165           24 :             foreach(index, RelationGetIndexList(rel))
     166              :             {
     167           18 :                 indexOid = lfirst_oid(index);
     168           18 :                 if (get_index_isclustered(indexOid))
     169           12 :                     break;
     170            6 :                 indexOid = InvalidOid;
     171              :             }
     172              : 
     173           18 :             if (!OidIsValid(indexOid))
     174            6 :                 ereport(ERROR,
     175              :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     176              :                          errmsg("there is no previously clustered index for table \"%s\"",
     177              :                                 stmt->relation->relname)));
     178              :         }
     179              :         else
     180              :         {
     181              :             /*
     182              :              * The index is expected to be in the same namespace as the
     183              :              * relation.
     184              :              */
     185           82 :             indexOid = get_relname_relid(stmt->indexname,
     186           82 :                                          rel->rd_rel->relnamespace);
     187           82 :             if (!OidIsValid(indexOid))
     188            0 :                 ereport(ERROR,
     189              :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     190              :                          errmsg("index \"%s\" for table \"%s\" does not exist",
     191              :                                 stmt->indexname, stmt->relation->relname)));
     192              :         }
     193              : 
     194              :         /* For non-partitioned tables, do what we came here to do. */
     195           94 :         if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
     196              :         {
     197           81 :             cluster_rel(rel, indexOid, &params);
     198              :             /* cluster_rel closes the relation, but keeps lock */
     199              : 
     200           81 :             return;
     201              :         }
     202              :     }
     203              : 
     204              :     /*
     205              :      * By here, we know we are in a multi-table situation.  In order to avoid
     206              :      * holding locks for too long, we want to process each table in its own
     207              :      * transaction.  This forces us to disallow running inside a user
     208              :      * transaction block.
     209              :      */
     210           23 :     PreventInTransactionBlock(isTopLevel, "CLUSTER");
     211              : 
     212              :     /* Also, we need a memory context to hold our list of relations */
     213           23 :     cluster_context = AllocSetContextCreate(PortalContext,
     214              :                                             "Cluster",
     215              :                                             ALLOCSET_DEFAULT_SIZES);
     216              : 
     217              :     /*
     218              :      * Either we're processing a partitioned table, or we were not given any
     219              :      * table name at all.  In either case, obtain a list of relations to
     220              :      * process.
     221              :      *
     222              :      * In the former case, an index name must have been given, so we don't
     223              :      * need to recheck its "indisclustered" bit, but we have to check that it
     224              :      * is an index that we can cluster on.  In the latter case, we set the
     225              :      * option bit to have indisclustered verified.
     226              :      *
     227              :      * Rechecking the relation itself is necessary here in all cases.
     228              :      */
     229           23 :     params.options |= CLUOPT_RECHECK;
     230           23 :     if (rel != NULL)
     231              :     {
     232              :         Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
     233           13 :         check_index_is_clusterable(rel, indexOid, AccessShareLock);
     234           10 :         rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
     235              : 
     236              :         /* close relation, releasing lock on parent table */
     237           10 :         table_close(rel, AccessExclusiveLock);
     238              :     }
     239              :     else
     240              :     {
     241           10 :         rtcs = get_tables_to_cluster(cluster_context);
     242           10 :         params.options |= CLUOPT_RECHECK_ISCLUSTERED;
     243              :     }
     244              : 
     245              :     /* Do the job. */
     246           20 :     cluster_multiple_rels(rtcs, &params);
     247              : 
     248              :     /* Start a new transaction for the cleanup work. */
     249           20 :     StartTransactionCommand();
     250              : 
     251              :     /* Clean up working storage */
     252           20 :     MemoryContextDelete(cluster_context);
     253              : }
     254              : 
     255              : /*
     256              :  * Given a list of relations to cluster, process each of them in a separate
     257              :  * transaction.
     258              :  *
     259              :  * We expect to be in a transaction at start, but there isn't one when we
     260              :  * return.
     261              :  */
     262              : static void
     263           20 : cluster_multiple_rels(List *rtcs, ClusterParams *params)
     264              : {
     265              :     ListCell   *lc;
     266              : 
     267              :     /* Commit to get out of starting transaction */
     268           20 :     PopActiveSnapshot();
     269           20 :     CommitTransactionCommand();
     270              : 
     271              :     /* Cluster the tables, each in a separate transaction */
     272           35 :     foreach(lc, rtcs)
     273              :     {
     274           15 :         RelToCluster *rtc = (RelToCluster *) lfirst(lc);
     275              :         Relation    rel;
     276              : 
     277              :         /* Start a new transaction for each relation. */
     278           15 :         StartTransactionCommand();
     279              : 
     280              :         /* functions in indexes may want a snapshot set */
     281           15 :         PushActiveSnapshot(GetTransactionSnapshot());
     282              : 
     283           15 :         rel = table_open(rtc->tableOid, AccessExclusiveLock);
     284              : 
     285              :         /* Process this table */
     286           15 :         cluster_rel(rel, rtc->indexOid, params);
     287              :         /* cluster_rel closes the relation, but keeps lock */
     288              : 
     289           15 :         PopActiveSnapshot();
     290           15 :         CommitTransactionCommand();
     291              :     }
     292           20 : }
     293              : 
     294              : /*
     295              :  * cluster_rel
     296              :  *
     297              :  * This clusters the table by creating a new, clustered table and
     298              :  * swapping the relfilenumbers of the new table and the old table, so
     299              :  * the OID of the original table is preserved.  Thus we do not lose
     300              :  * GRANT, inheritance nor references to this table.
     301              :  *
     302              :  * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
     303              :  * the new table, it's better to create the indexes afterwards than to fill
     304              :  * them incrementally while we load the table.
     305              :  *
     306              :  * If indexOid is InvalidOid, the table will be rewritten in physical order
     307              :  * instead of index order.  This is the new implementation of VACUUM FULL,
     308              :  * and error messages should refer to the operation as VACUUM not CLUSTER.
     309              :  */
     310              : void
     311          286 : cluster_rel(Relation OldHeap, Oid indexOid, ClusterParams *params)
     312              : {
     313          286 :     Oid         tableOid = RelationGetRelid(OldHeap);
     314              :     Oid         save_userid;
     315              :     int         save_sec_context;
     316              :     int         save_nestlevel;
     317          286 :     bool        verbose = ((params->options & CLUOPT_VERBOSE) != 0);
     318          286 :     bool        recheck = ((params->options & CLUOPT_RECHECK) != 0);
     319              :     Relation    index;
     320              : 
     321              :     Assert(CheckRelationLockedByMe(OldHeap, AccessExclusiveLock, false));
     322              : 
     323              :     /* Check for user-requested abort. */
     324          286 :     CHECK_FOR_INTERRUPTS();
     325              : 
     326          286 :     pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid);
     327          286 :     if (OidIsValid(indexOid))
     328           96 :         pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
     329              :                                      PROGRESS_CLUSTER_COMMAND_CLUSTER);
     330              :     else
     331          190 :         pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
     332              :                                      PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
     333              : 
     334              :     /*
     335              :      * Switch to the table owner's userid, so that any index functions are run
     336              :      * as that user.  Also lock down security-restricted operations and
     337              :      * arrange to make GUC variable changes local to this command.
     338              :      */
     339          286 :     GetUserIdAndSecContext(&save_userid, &save_sec_context);
     340          286 :     SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
     341              :                            save_sec_context | SECURITY_RESTRICTED_OPERATION);
     342          286 :     save_nestlevel = NewGUCNestLevel();
     343          286 :     RestrictSearchPath();
     344              : 
     345              :     /*
     346              :      * Since we may open a new transaction for each relation, we have to check
     347              :      * that the relation still is what we think it is.
     348              :      *
     349              :      * If this is a single-transaction CLUSTER, we can skip these tests. We
     350              :      * *must* skip the one on indisclustered since it would reject an attempt
     351              :      * to cluster a not-previously-clustered index.
     352              :      */
     353          286 :     if (recheck)
     354              :     {
     355              :         /* Check that the user still has privileges for the relation */
     356           15 :         if (!cluster_is_permitted_for_relation(tableOid, save_userid))
     357              :         {
     358            0 :             relation_close(OldHeap, AccessExclusiveLock);
     359            0 :             goto out;
     360              :         }
     361              : 
     362              :         /*
     363              :          * Silently skip a temp table for a remote session.  Only doing this
     364              :          * check in the "recheck" case is appropriate (which currently means
     365              :          * somebody is executing a database-wide CLUSTER or on a partitioned
     366              :          * table), because there is another check in cluster() which will stop
     367              :          * any attempt to cluster remote temp tables by name.  There is
     368              :          * another check in cluster_rel which is redundant, but we leave it
     369              :          * for extra safety.
     370              :          */
     371           15 :         if (RELATION_IS_OTHER_TEMP(OldHeap))
     372              :         {
     373            0 :             relation_close(OldHeap, AccessExclusiveLock);
     374            0 :             goto out;
     375              :         }
     376              : 
     377           15 :         if (OidIsValid(indexOid))
     378              :         {
     379              :             /*
     380              :              * Check that the index still exists
     381              :              */
     382           15 :             if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
     383              :             {
     384            0 :                 relation_close(OldHeap, AccessExclusiveLock);
     385            0 :                 goto out;
     386              :             }
     387              : 
     388              :             /*
     389              :              * Check that the index is still the one with indisclustered set,
     390              :              * if needed.
     391              :              */
     392           15 :             if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
     393            3 :                 !get_index_isclustered(indexOid))
     394              :             {
     395            0 :                 relation_close(OldHeap, AccessExclusiveLock);
     396            0 :                 goto out;
     397              :             }
     398              :         }
     399              :     }
     400              : 
     401              :     /*
     402              :      * We allow VACUUM FULL, but not CLUSTER, on shared catalogs.  CLUSTER
     403              :      * would work in most respects, but the index would only get marked as
     404              :      * indisclustered in the current database, leading to unexpected behavior
     405              :      * if CLUSTER were later invoked in another database.
     406              :      */
     407          286 :     if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
     408            0 :         ereport(ERROR,
     409              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     410              :                  errmsg("cannot cluster a shared catalog")));
     411              : 
     412              :     /*
     413              :      * Don't process temp tables of other backends ... their local buffer
     414              :      * manager is not going to cope.
     415              :      */
     416          286 :     if (RELATION_IS_OTHER_TEMP(OldHeap))
     417              :     {
     418            0 :         if (OidIsValid(indexOid))
     419            0 :             ereport(ERROR,
     420              :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     421              :                      errmsg("cannot cluster temporary tables of other sessions")));
     422              :         else
     423            0 :             ereport(ERROR,
     424              :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     425              :                      errmsg("cannot vacuum temporary tables of other sessions")));
     426              :     }
     427              : 
     428              :     /*
     429              :      * Also check for active uses of the relation in the current transaction,
     430              :      * including open scans and pending AFTER trigger events.
     431              :      */
     432          286 :     CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
     433              : 
     434              :     /* Check heap and index are valid to cluster on */
     435          286 :     if (OidIsValid(indexOid))
     436              :     {
     437              :         /* verify the index is good and lock it */
     438           96 :         check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
     439              :         /* also open it */
     440           96 :         index = index_open(indexOid, NoLock);
     441              :     }
     442              :     else
     443          190 :         index = NULL;
     444              : 
     445              :     /*
     446              :      * Quietly ignore the request if this is a materialized view which has not
     447              :      * been populated from its query. No harm is done because there is no data
     448              :      * to deal with, and we don't want to throw an error if this is part of a
     449              :      * multi-relation request -- for example, CLUSTER was run on the entire
     450              :      * database.
     451              :      */
     452          286 :     if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
     453            0 :         !RelationIsPopulated(OldHeap))
     454              :     {
     455            0 :         relation_close(OldHeap, AccessExclusiveLock);
     456            0 :         goto out;
     457              :     }
     458              : 
     459              :     Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
     460              :            OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
     461              :            OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
     462              : 
     463              :     /*
     464              :      * All predicate locks on the tuples or pages are about to be made
     465              :      * invalid, because we move tuples around.  Promote them to relation
     466              :      * locks.  Predicate locks on indexes will be promoted when they are
     467              :      * reindexed.
     468              :      */
     469          286 :     TransferPredicateLocksToHeapRelation(OldHeap);
     470              : 
     471              :     /* rebuild_relation does all the dirty work */
     472          286 :     rebuild_relation(OldHeap, index, verbose);
     473              :     /* rebuild_relation closes OldHeap, and index if valid */
     474              : 
     475          283 : out:
     476              :     /* Roll back any GUC changes executed by index functions */
     477          283 :     AtEOXact_GUC(false, save_nestlevel);
     478              : 
     479              :     /* Restore userid and security context */
     480          283 :     SetUserIdAndSecContext(save_userid, save_sec_context);
     481              : 
     482          283 :     pgstat_progress_end_command();
     483          283 : }
     484              : 
     485              : /*
     486              :  * Verify that the specified heap and index are valid to cluster on
     487              :  *
     488              :  * Side effect: obtains lock on the index.  The caller may
     489              :  * in some cases already have AccessExclusiveLock on the table, but
     490              :  * not in all cases so we can't rely on the table-level lock for
     491              :  * protection here.
     492              :  */
     493              : void
     494          141 : check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
     495              : {
     496              :     Relation    OldIndex;
     497              : 
     498          141 :     OldIndex = index_open(indexOid, lockmode);
     499              : 
     500              :     /*
     501              :      * Check that index is in fact an index on the given relation
     502              :      */
     503          141 :     if (OldIndex->rd_index == NULL ||
     504          141 :         OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
     505            0 :         ereport(ERROR,
     506              :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     507              :                  errmsg("\"%s\" is not an index for table \"%s\"",
     508              :                         RelationGetRelationName(OldIndex),
     509              :                         RelationGetRelationName(OldHeap))));
     510              : 
     511              :     /* Index AM must allow clustering */
     512          141 :     if (!OldIndex->rd_indam->amclusterable)
     513            0 :         ereport(ERROR,
     514              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     515              :                  errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
     516              :                         RelationGetRelationName(OldIndex))));
     517              : 
     518              :     /*
     519              :      * Disallow clustering on incomplete indexes (those that might not index
     520              :      * every row of the relation).  We could relax this by making a separate
     521              :      * seqscan pass over the table to copy the missing rows, but that seems
     522              :      * expensive and tedious.
     523              :      */
     524          141 :     if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
     525            0 :         ereport(ERROR,
     526              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     527              :                  errmsg("cannot cluster on partial index \"%s\"",
     528              :                         RelationGetRelationName(OldIndex))));
     529              : 
     530              :     /*
     531              :      * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
     532              :      * it might well not contain entries for every heap row, or might not even
     533              :      * be internally consistent.  (But note that we don't check indcheckxmin;
     534              :      * the worst consequence of following broken HOT chains would be that we
     535              :      * might put recently-dead tuples out-of-order in the new table, and there
     536              :      * is little harm in that.)
     537              :      */
     538          141 :     if (!OldIndex->rd_index->indisvalid)
     539            3 :         ereport(ERROR,
     540              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     541              :                  errmsg("cannot cluster on invalid index \"%s\"",
     542              :                         RelationGetRelationName(OldIndex))));
     543              : 
     544              :     /* Drop relcache refcnt on OldIndex, but keep lock */
     545          138 :     index_close(OldIndex, NoLock);
     546          138 : }
     547              : 
     548              : /*
     549              :  * mark_index_clustered: mark the specified index as the one clustered on
     550              :  *
     551              :  * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
     552              :  */
     553              : void
     554          137 : mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
     555              : {
     556              :     HeapTuple   indexTuple;
     557              :     Form_pg_index indexForm;
     558              :     Relation    pg_index;
     559              :     ListCell   *index;
     560              : 
     561              :     /* Disallow applying to a partitioned table */
     562          137 :     if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
     563            6 :         ereport(ERROR,
     564              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     565              :                  errmsg("cannot mark index clustered in partitioned table")));
     566              : 
     567              :     /*
     568              :      * If the index is already marked clustered, no need to do anything.
     569              :      */
     570          131 :     if (OidIsValid(indexOid))
     571              :     {
     572          125 :         if (get_index_isclustered(indexOid))
     573           21 :             return;
     574              :     }
     575              : 
     576              :     /*
     577              :      * Check each index of the relation and set/clear the bit as needed.
     578              :      */
     579          110 :     pg_index = table_open(IndexRelationId, RowExclusiveLock);
     580              : 
     581          323 :     foreach(index, RelationGetIndexList(rel))
     582              :     {
     583          213 :         Oid         thisIndexOid = lfirst_oid(index);
     584              : 
     585          213 :         indexTuple = SearchSysCacheCopy1(INDEXRELID,
     586              :                                          ObjectIdGetDatum(thisIndexOid));
     587          213 :         if (!HeapTupleIsValid(indexTuple))
     588            0 :             elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
     589          213 :         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
     590              : 
     591              :         /*
     592              :          * Unset the bit if set.  We know it's wrong because we checked this
     593              :          * earlier.
     594              :          */
     595          213 :         if (indexForm->indisclustered)
     596              :         {
     597           15 :             indexForm->indisclustered = false;
     598           15 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     599              :         }
     600          198 :         else if (thisIndexOid == indexOid)
     601              :         {
     602              :             /* this was checked earlier, but let's be real sure */
     603          104 :             if (!indexForm->indisvalid)
     604            0 :                 elog(ERROR, "cannot cluster on invalid index %u", indexOid);
     605          104 :             indexForm->indisclustered = true;
     606          104 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     607              :         }
     608              : 
     609          213 :         InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
     610              :                                      InvalidOid, is_internal);
     611              : 
     612          213 :         heap_freetuple(indexTuple);
     613              :     }
     614              : 
     615          110 :     table_close(pg_index, RowExclusiveLock);
     616              : }
     617              : 
     618              : /*
     619              :  * rebuild_relation: rebuild an existing relation in index or physical order
     620              :  *
     621              :  * OldHeap: table to rebuild.
     622              :  * index: index to cluster by, or NULL to rewrite in physical order.
     623              :  *
     624              :  * On entry, heap and index (if one is given) must be open, and
     625              :  * AccessExclusiveLock held on them.
     626              :  * On exit, they are closed, but locks on them are not released.
     627              :  */
     628              : static void
     629          286 : rebuild_relation(Relation OldHeap, Relation index, bool verbose)
     630              : {
     631          286 :     Oid         tableOid = RelationGetRelid(OldHeap);
     632          286 :     Oid         accessMethod = OldHeap->rd_rel->relam;
     633          286 :     Oid         tableSpace = OldHeap->rd_rel->reltablespace;
     634              :     Oid         OIDNewHeap;
     635              :     Relation    NewHeap;
     636              :     char        relpersistence;
     637              :     bool        is_system_catalog;
     638              :     bool        swap_toast_by_content;
     639              :     TransactionId frozenXid;
     640              :     MultiXactId cutoffMulti;
     641              : 
     642              :     Assert(CheckRelationLockedByMe(OldHeap, AccessExclusiveLock, false) &&
     643              :            (index == NULL || CheckRelationLockedByMe(index, AccessExclusiveLock, false)));
     644              : 
     645          286 :     if (index)
     646              :         /* Mark the correct index as clustered */
     647           96 :         mark_index_clustered(OldHeap, RelationGetRelid(index), true);
     648              : 
     649              :     /* Remember info about rel before closing OldHeap */
     650          286 :     relpersistence = OldHeap->rd_rel->relpersistence;
     651          286 :     is_system_catalog = IsSystemRelation(OldHeap);
     652              : 
     653              :     /*
     654              :      * Create the transient table that will receive the re-ordered data.
     655              :      *
     656              :      * OldHeap is already locked, so no need to lock it again.  make_new_heap
     657              :      * obtains AccessExclusiveLock on the new heap and its toast table.
     658              :      */
     659          286 :     OIDNewHeap = make_new_heap(tableOid, tableSpace,
     660              :                                accessMethod,
     661              :                                relpersistence,
     662              :                                NoLock);
     663              :     Assert(CheckRelationOidLockedByMe(OIDNewHeap, AccessExclusiveLock, false));
     664          286 :     NewHeap = table_open(OIDNewHeap, NoLock);
     665              : 
     666              :     /* Copy the heap data into the new table in the desired order */
     667          286 :     copy_table_data(NewHeap, OldHeap, index, verbose,
     668              :                     &swap_toast_by_content, &frozenXid, &cutoffMulti);
     669              : 
     670              : 
     671              :     /* Close relcache entries, but keep lock until transaction commit */
     672          286 :     table_close(OldHeap, NoLock);
     673          286 :     if (index)
     674           96 :         index_close(index, NoLock);
     675              : 
     676              :     /*
     677              :      * Close the new relation so it can be dropped as soon as the storage is
     678              :      * swapped. The relation is not visible to others, so no need to unlock it
     679              :      * explicitly.
     680              :      */
     681          286 :     table_close(NewHeap, NoLock);
     682              : 
     683              :     /*
     684              :      * Swap the physical files of the target and transient tables, then
     685              :      * rebuild the target's indexes and throw away the transient table.
     686              :      */
     687          286 :     finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
     688              :                      swap_toast_by_content, false, true,
     689              :                      frozenXid, cutoffMulti,
     690              :                      relpersistence);
     691          283 : }
     692              : 
     693              : 
     694              : /*
     695              :  * Create the transient table that will be filled with new data during
     696              :  * CLUSTER, ALTER TABLE, and similar operations.  The transient table
     697              :  * duplicates the logical structure of the OldHeap; but will have the
     698              :  * specified physical storage properties NewTableSpace, NewAccessMethod, and
     699              :  * relpersistence.
     700              :  *
     701              :  * After this, the caller should load the new heap with transferred/modified
     702              :  * data, then call finish_heap_swap to complete the operation.
     703              :  */
     704              : Oid
     705         1141 : make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
     706              :               char relpersistence, LOCKMODE lockmode)
     707              : {
     708              :     TupleDesc   OldHeapDesc;
     709              :     char        NewHeapName[NAMEDATALEN];
     710              :     Oid         OIDNewHeap;
     711              :     Oid         toastid;
     712              :     Relation    OldHeap;
     713              :     HeapTuple   tuple;
     714              :     Datum       reloptions;
     715              :     bool        isNull;
     716              :     Oid         namespaceid;
     717              : 
     718         1141 :     OldHeap = table_open(OIDOldHeap, lockmode);
     719         1141 :     OldHeapDesc = RelationGetDescr(OldHeap);
     720              : 
     721              :     /*
     722              :      * Note that the NewHeap will not receive any of the defaults or
     723              :      * constraints associated with the OldHeap; we don't need 'em, and there's
     724              :      * no reason to spend cycles inserting them into the catalogs only to
     725              :      * delete them.
     726              :      */
     727              : 
     728              :     /*
     729              :      * But we do want to use reloptions of the old heap for new heap.
     730              :      */
     731         1141 :     tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
     732         1141 :     if (!HeapTupleIsValid(tuple))
     733            0 :         elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
     734         1141 :     reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     735              :                                  &isNull);
     736         1141 :     if (isNull)
     737         1071 :         reloptions = (Datum) 0;
     738              : 
     739         1141 :     if (relpersistence == RELPERSISTENCE_TEMP)
     740           76 :         namespaceid = LookupCreationNamespace("pg_temp");
     741              :     else
     742         1065 :         namespaceid = RelationGetNamespace(OldHeap);
     743              : 
     744              :     /*
     745              :      * Create the new heap, using a temporary name in the same namespace as
     746              :      * the existing table.  NOTE: there is some risk of collision with user
     747              :      * relnames.  Working around this seems more trouble than it's worth; in
     748              :      * particular, we can't create the new heap in a different namespace from
     749              :      * the old, or we will have problems with the TEMP status of temp tables.
     750              :      *
     751              :      * Note: the new heap is not a shared relation, even if we are rebuilding
     752              :      * a shared rel.  However, we do make the new heap mapped if the source is
     753              :      * mapped.  This simplifies swap_relation_files, and is absolutely
     754              :      * necessary for rebuilding pg_class, for reasons explained there.
     755              :      */
     756         1141 :     snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
     757              : 
     758         1141 :     OIDNewHeap = heap_create_with_catalog(NewHeapName,
     759              :                                           namespaceid,
     760              :                                           NewTableSpace,
     761              :                                           InvalidOid,
     762              :                                           InvalidOid,
     763              :                                           InvalidOid,
     764         1141 :                                           OldHeap->rd_rel->relowner,
     765              :                                           NewAccessMethod,
     766              :                                           OldHeapDesc,
     767              :                                           NIL,
     768              :                                           RELKIND_RELATION,
     769              :                                           relpersistence,
     770              :                                           false,
     771         1141 :                                           RelationIsMapped(OldHeap),
     772              :                                           ONCOMMIT_NOOP,
     773              :                                           reloptions,
     774              :                                           false,
     775              :                                           true,
     776              :                                           true,
     777              :                                           OIDOldHeap,
     778         1141 :                                           NULL);
     779              :     Assert(OIDNewHeap != InvalidOid);
     780              : 
     781         1141 :     ReleaseSysCache(tuple);
     782              : 
     783              :     /*
     784              :      * Advance command counter so that the newly-created relation's catalog
     785              :      * tuples will be visible to table_open.
     786              :      */
     787         1141 :     CommandCounterIncrement();
     788              : 
     789              :     /*
     790              :      * If necessary, create a TOAST table for the new relation.
     791              :      *
     792              :      * If the relation doesn't have a TOAST table already, we can't need one
     793              :      * for the new relation.  The other way around is possible though: if some
     794              :      * wide columns have been dropped, NewHeapCreateToastTable can decide that
     795              :      * no TOAST table is needed for the new table.
     796              :      *
     797              :      * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
     798              :      * that the TOAST table will be visible for insertion.
     799              :      */
     800         1141 :     toastid = OldHeap->rd_rel->reltoastrelid;
     801         1141 :     if (OidIsValid(toastid))
     802              :     {
     803              :         /* keep the existing toast table's reloptions, if any */
     804          433 :         tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
     805          433 :         if (!HeapTupleIsValid(tuple))
     806            0 :             elog(ERROR, "cache lookup failed for relation %u", toastid);
     807          433 :         reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     808              :                                      &isNull);
     809          433 :         if (isNull)
     810          433 :             reloptions = (Datum) 0;
     811              : 
     812          433 :         NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
     813              : 
     814          433 :         ReleaseSysCache(tuple);
     815              :     }
     816              : 
     817         1141 :     table_close(OldHeap, NoLock);
     818              : 
     819         1141 :     return OIDNewHeap;
     820              : }
     821              : 
     822              : /*
     823              :  * Do the physical copying of table data.
     824              :  *
     825              :  * There are three output parameters:
     826              :  * *pSwapToastByContent is set true if toast tables must be swapped by content.
     827              :  * *pFreezeXid receives the TransactionId used as freeze cutoff point.
     828              :  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
     829              :  */
     830              : static void
     831          286 : copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verbose,
     832              :                 bool *pSwapToastByContent, TransactionId *pFreezeXid,
     833              :                 MultiXactId *pCutoffMulti)
     834              : {
     835              :     Relation    relRelation;
     836              :     HeapTuple   reltup;
     837              :     Form_pg_class relform;
     838              :     TupleDesc   oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
     839              :     TupleDesc   newTupDesc PG_USED_FOR_ASSERTS_ONLY;
     840              :     VacuumParams params;
     841              :     struct VacuumCutoffs cutoffs;
     842              :     bool        use_sort;
     843          286 :     double      num_tuples = 0,
     844          286 :                 tups_vacuumed = 0,
     845          286 :                 tups_recently_dead = 0;
     846              :     BlockNumber num_pages;
     847          286 :     int         elevel = verbose ? INFO : DEBUG2;
     848              :     PGRUsage    ru0;
     849              :     char       *nspname;
     850              : 
     851          286 :     pg_rusage_init(&ru0);
     852              : 
     853              :     /* Store a copy of the namespace name for logging purposes */
     854          286 :     nspname = get_namespace_name(RelationGetNamespace(OldHeap));
     855              : 
     856              :     /*
     857              :      * Their tuple descriptors should be exactly alike, but here we only need
     858              :      * assume that they have the same number of columns.
     859              :      */
     860          286 :     oldTupDesc = RelationGetDescr(OldHeap);
     861          286 :     newTupDesc = RelationGetDescr(NewHeap);
     862              :     Assert(newTupDesc->natts == oldTupDesc->natts);
     863              : 
     864              :     /*
     865              :      * If the OldHeap has a toast table, get lock on the toast table to keep
     866              :      * it from being vacuumed.  This is needed because autovacuum processes
     867              :      * toast tables independently of their main tables, with no lock on the
     868              :      * latter.  If an autovacuum were to start on the toast table after we
     869              :      * compute our OldestXmin below, it would use a later OldestXmin, and then
     870              :      * possibly remove as DEAD toast tuples belonging to main tuples we think
     871              :      * are only RECENTLY_DEAD.  Then we'd fail while trying to copy those
     872              :      * tuples.
     873              :      *
     874              :      * We don't need to open the toast relation here, just lock it.  The lock
     875              :      * will be held till end of transaction.
     876              :      */
     877          286 :     if (OldHeap->rd_rel->reltoastrelid)
     878           97 :         LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
     879              : 
     880              :     /*
     881              :      * If both tables have TOAST tables, perform toast swap by content.  It is
     882              :      * possible that the old table has a toast table but the new one doesn't,
     883              :      * if toastable columns have been dropped.  In that case we have to do
     884              :      * swap by links.  This is okay because swap by content is only essential
     885              :      * for system catalogs, and we don't support schema changes for them.
     886              :      */
     887          286 :     if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
     888              :     {
     889           97 :         *pSwapToastByContent = true;
     890              : 
     891              :         /*
     892              :          * When doing swap by content, any toast pointers written into NewHeap
     893              :          * must use the old toast table's OID, because that's where the toast
     894              :          * data will eventually be found.  Set this up by setting rd_toastoid.
     895              :          * This also tells toast_save_datum() to preserve the toast value
     896              :          * OIDs, which we want so as not to invalidate toast pointers in
     897              :          * system catalog caches, and to avoid making multiple copies of a
     898              :          * single toast value.
     899              :          *
     900              :          * Note that we must hold NewHeap open until we are done writing data,
     901              :          * since the relcache will not guarantee to remember this setting once
     902              :          * the relation is closed.  Also, this technique depends on the fact
     903              :          * that no one will try to read from the NewHeap until after we've
     904              :          * finished writing it and swapping the rels --- otherwise they could
     905              :          * follow the toast pointers to the wrong place.  (It would actually
     906              :          * work for values copied over from the old toast table, but not for
     907              :          * any values that we toast which were previously not toasted.)
     908              :          */
     909           97 :         NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
     910              :     }
     911              :     else
     912          189 :         *pSwapToastByContent = false;
     913              : 
     914              :     /*
     915              :      * Compute xids used to freeze and weed out dead tuples and multixacts.
     916              :      * Since we're going to rewrite the whole table anyway, there's no reason
     917              :      * not to be aggressive about this.
     918              :      */
     919          286 :     memset(&params, 0, sizeof(VacuumParams));
     920          286 :     vacuum_get_cutoffs(OldHeap, params, &cutoffs);
     921              : 
     922              :     /*
     923              :      * FreezeXid will become the table's new relfrozenxid, and that mustn't go
     924              :      * backwards, so take the max.
     925              :      */
     926              :     {
     927          286 :         TransactionId relfrozenxid = OldHeap->rd_rel->relfrozenxid;
     928              : 
     929          572 :         if (TransactionIdIsValid(relfrozenxid) &&
     930          286 :             TransactionIdPrecedes(cutoffs.FreezeLimit, relfrozenxid))
     931           43 :             cutoffs.FreezeLimit = relfrozenxid;
     932              :     }
     933              : 
     934              :     /*
     935              :      * MultiXactCutoff, similarly, shouldn't go backwards either.
     936              :      */
     937              :     {
     938          286 :         MultiXactId relminmxid = OldHeap->rd_rel->relminmxid;
     939              : 
     940          572 :         if (MultiXactIdIsValid(relminmxid) &&
     941          286 :             MultiXactIdPrecedes(cutoffs.MultiXactCutoff, relminmxid))
     942            0 :             cutoffs.MultiXactCutoff = relminmxid;
     943              :     }
     944              : 
     945              :     /*
     946              :      * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
     947              :      * the OldHeap.  We know how to use a sort to duplicate the ordering of a
     948              :      * btree index, and will use seqscan-and-sort for that case if the planner
     949              :      * tells us it's cheaper.  Otherwise, always indexscan if an index is
     950              :      * provided, else plain seqscan.
     951              :      */
     952          286 :     if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
     953           96 :         use_sort = plan_cluster_use_sort(RelationGetRelid(OldHeap),
     954              :                                          RelationGetRelid(OldIndex));
     955              :     else
     956          190 :         use_sort = false;
     957              : 
     958              :     /* Log what we're doing */
     959          286 :     if (OldIndex != NULL && !use_sort)
     960           40 :         ereport(elevel,
     961              :                 (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
     962              :                         nspname,
     963              :                         RelationGetRelationName(OldHeap),
     964              :                         RelationGetRelationName(OldIndex))));
     965          246 :     else if (use_sort)
     966           56 :         ereport(elevel,
     967              :                 (errmsg("clustering \"%s.%s\" using sequential scan and sort",
     968              :                         nspname,
     969              :                         RelationGetRelationName(OldHeap))));
     970              :     else
     971          190 :         ereport(elevel,
     972              :                 (errmsg("vacuuming \"%s.%s\"",
     973              :                         nspname,
     974              :                         RelationGetRelationName(OldHeap))));
     975              : 
     976              :     /*
     977              :      * Hand off the actual copying to AM specific function, the generic code
     978              :      * cannot know how to deal with visibility across AMs. Note that this
     979              :      * routine is allowed to set FreezeXid / MultiXactCutoff to different
     980              :      * values (e.g. because the AM doesn't use freezing).
     981              :      */
     982          286 :     table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
     983              :                                     cutoffs.OldestXmin, &cutoffs.FreezeLimit,
     984              :                                     &cutoffs.MultiXactCutoff,
     985              :                                     &num_tuples, &tups_vacuumed,
     986              :                                     &tups_recently_dead);
     987              : 
     988              :     /* return selected values to caller, get set as relfrozenxid/minmxid */
     989          286 :     *pFreezeXid = cutoffs.FreezeLimit;
     990          286 :     *pCutoffMulti = cutoffs.MultiXactCutoff;
     991              : 
     992              :     /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
     993          286 :     NewHeap->rd_toastoid = InvalidOid;
     994              : 
     995          286 :     num_pages = RelationGetNumberOfBlocks(NewHeap);
     996              : 
     997              :     /* Log what we did */
     998          286 :     ereport(elevel,
     999              :             (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
    1000              :                     nspname,
    1001              :                     RelationGetRelationName(OldHeap),
    1002              :                     tups_vacuumed, num_tuples,
    1003              :                     RelationGetNumberOfBlocks(OldHeap)),
    1004              :              errdetail("%.0f dead row versions cannot be removed yet.\n"
    1005              :                        "%s.",
    1006              :                        tups_recently_dead,
    1007              :                        pg_rusage_show(&ru0))));
    1008              : 
    1009              :     /* Update pg_class to reflect the correct values of pages and tuples. */
    1010          286 :     relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1011              : 
    1012          286 :     reltup = SearchSysCacheCopy1(RELOID,
    1013              :                                  ObjectIdGetDatum(RelationGetRelid(NewHeap)));
    1014          286 :     if (!HeapTupleIsValid(reltup))
    1015            0 :         elog(ERROR, "cache lookup failed for relation %u",
    1016              :              RelationGetRelid(NewHeap));
    1017          286 :     relform = (Form_pg_class) GETSTRUCT(reltup);
    1018              : 
    1019          286 :     relform->relpages = num_pages;
    1020          286 :     relform->reltuples = num_tuples;
    1021              : 
    1022              :     /* Don't update the stats for pg_class.  See swap_relation_files. */
    1023          286 :     if (RelationGetRelid(OldHeap) != RelationRelationId)
    1024          264 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1025              :     else
    1026           22 :         CacheInvalidateRelcacheByTuple(reltup);
    1027              : 
    1028              :     /* Clean up. */
    1029          286 :     heap_freetuple(reltup);
    1030          286 :     table_close(relRelation, RowExclusiveLock);
    1031              : 
    1032              :     /* Make the update visible */
    1033          286 :     CommandCounterIncrement();
    1034          286 : }
    1035              : 
    1036              : /*
    1037              :  * Swap the physical files of two given relations.
    1038              :  *
    1039              :  * We swap the physical identity (reltablespace, relfilenumber) while keeping
    1040              :  * the same logical identities of the two relations.  relpersistence is also
    1041              :  * swapped, which is critical since it determines where buffers live for each
    1042              :  * relation.
    1043              :  *
    1044              :  * We can swap associated TOAST data in either of two ways: recursively swap
    1045              :  * the physical content of the toast tables (and their indexes), or swap the
    1046              :  * TOAST links in the given relations' pg_class entries.  The former is needed
    1047              :  * to manage rewrites of shared catalogs (where we cannot change the pg_class
    1048              :  * links) while the latter is the only way to handle cases in which a toast
    1049              :  * table is added or removed altogether.
    1050              :  *
    1051              :  * Additionally, the first relation is marked with relfrozenxid set to
    1052              :  * frozenXid.  It seems a bit ugly to have this here, but the caller would
    1053              :  * have to do it anyway, so having it here saves a heap_update.  Note: in
    1054              :  * the swap-toast-links case, we assume we don't need to change the toast
    1055              :  * table's relfrozenxid: the new version of the toast table should already
    1056              :  * have relfrozenxid set to RecentXmin, which is good enough.
    1057              :  *
    1058              :  * Lastly, if r2 and its toast table and toast index (if any) are mapped,
    1059              :  * their OIDs are emitted into mapped_tables[].  This is hacky but beats
    1060              :  * having to look the information up again later in finish_heap_swap.
    1061              :  */
    1062              : static void
    1063         1257 : swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
    1064              :                     bool swap_toast_by_content,
    1065              :                     bool is_internal,
    1066              :                     TransactionId frozenXid,
    1067              :                     MultiXactId cutoffMulti,
    1068              :                     Oid *mapped_tables)
    1069              : {
    1070              :     Relation    relRelation;
    1071              :     HeapTuple   reltup1,
    1072              :                 reltup2;
    1073              :     Form_pg_class relform1,
    1074              :                 relform2;
    1075              :     RelFileNumber relfilenumber1,
    1076              :                 relfilenumber2;
    1077              :     RelFileNumber swaptemp;
    1078              :     char        swptmpchr;
    1079              :     Oid         relam1,
    1080              :                 relam2;
    1081              : 
    1082              :     /* We need writable copies of both pg_class tuples. */
    1083         1257 :     relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1084              : 
    1085         1257 :     reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
    1086         1257 :     if (!HeapTupleIsValid(reltup1))
    1087            0 :         elog(ERROR, "cache lookup failed for relation %u", r1);
    1088         1257 :     relform1 = (Form_pg_class) GETSTRUCT(reltup1);
    1089              : 
    1090         1257 :     reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
    1091         1257 :     if (!HeapTupleIsValid(reltup2))
    1092            0 :         elog(ERROR, "cache lookup failed for relation %u", r2);
    1093         1257 :     relform2 = (Form_pg_class) GETSTRUCT(reltup2);
    1094              : 
    1095         1257 :     relfilenumber1 = relform1->relfilenode;
    1096         1257 :     relfilenumber2 = relform2->relfilenode;
    1097         1257 :     relam1 = relform1->relam;
    1098         1257 :     relam2 = relform2->relam;
    1099              : 
    1100         1257 :     if (RelFileNumberIsValid(relfilenumber1) &&
    1101              :         RelFileNumberIsValid(relfilenumber2))
    1102              :     {
    1103              :         /*
    1104              :          * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
    1105              :          * relpersistence
    1106              :          */
    1107              :         Assert(!target_is_pg_class);
    1108              : 
    1109         1172 :         swaptemp = relform1->relfilenode;
    1110         1172 :         relform1->relfilenode = relform2->relfilenode;
    1111         1172 :         relform2->relfilenode = swaptemp;
    1112              : 
    1113         1172 :         swaptemp = relform1->reltablespace;
    1114         1172 :         relform1->reltablespace = relform2->reltablespace;
    1115         1172 :         relform2->reltablespace = swaptemp;
    1116              : 
    1117         1172 :         swaptemp = relform1->relam;
    1118         1172 :         relform1->relam = relform2->relam;
    1119         1172 :         relform2->relam = swaptemp;
    1120              : 
    1121         1172 :         swptmpchr = relform1->relpersistence;
    1122         1172 :         relform1->relpersistence = relform2->relpersistence;
    1123         1172 :         relform2->relpersistence = swptmpchr;
    1124              : 
    1125              :         /* Also swap toast links, if we're swapping by links */
    1126         1172 :         if (!swap_toast_by_content)
    1127              :         {
    1128          938 :             swaptemp = relform1->reltoastrelid;
    1129          938 :             relform1->reltoastrelid = relform2->reltoastrelid;
    1130          938 :             relform2->reltoastrelid = swaptemp;
    1131              :         }
    1132              :     }
    1133              :     else
    1134              :     {
    1135              :         /*
    1136              :          * Mapped-relation case.  Here we have to swap the relation mappings
    1137              :          * instead of modifying the pg_class columns.  Both must be mapped.
    1138              :          */
    1139           85 :         if (RelFileNumberIsValid(relfilenumber1) ||
    1140              :             RelFileNumberIsValid(relfilenumber2))
    1141            0 :             elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
    1142              :                  NameStr(relform1->relname));
    1143              : 
    1144              :         /*
    1145              :          * We can't change the tablespace nor persistence of a mapped rel, and
    1146              :          * we can't handle toast link swapping for one either, because we must
    1147              :          * not apply any critical changes to its pg_class row.  These cases
    1148              :          * should be prevented by upstream permissions tests, so these checks
    1149              :          * are non-user-facing emergency backstop.
    1150              :          */
    1151           85 :         if (relform1->reltablespace != relform2->reltablespace)
    1152            0 :             elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
    1153              :                  NameStr(relform1->relname));
    1154           85 :         if (relform1->relpersistence != relform2->relpersistence)
    1155            0 :             elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
    1156              :                  NameStr(relform1->relname));
    1157           85 :         if (relform1->relam != relform2->relam)
    1158            0 :             elog(ERROR, "cannot change access method of mapped relation \"%s\"",
    1159              :                  NameStr(relform1->relname));
    1160           85 :         if (!swap_toast_by_content &&
    1161           28 :             (relform1->reltoastrelid || relform2->reltoastrelid))
    1162            0 :             elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
    1163              :                  NameStr(relform1->relname));
    1164              : 
    1165              :         /*
    1166              :          * Fetch the mappings --- shouldn't fail, but be paranoid
    1167              :          */
    1168           85 :         relfilenumber1 = RelationMapOidToFilenumber(r1, relform1->relisshared);
    1169           85 :         if (!RelFileNumberIsValid(relfilenumber1))
    1170            0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1171              :                  NameStr(relform1->relname), r1);
    1172           85 :         relfilenumber2 = RelationMapOidToFilenumber(r2, relform2->relisshared);
    1173           85 :         if (!RelFileNumberIsValid(relfilenumber2))
    1174            0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1175              :                  NameStr(relform2->relname), r2);
    1176              : 
    1177              :         /*
    1178              :          * Send replacement mappings to relmapper.  Note these won't actually
    1179              :          * take effect until CommandCounterIncrement.
    1180              :          */
    1181           85 :         RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false);
    1182           85 :         RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false);
    1183              : 
    1184              :         /* Pass OIDs of mapped r2 tables back to caller */
    1185           85 :         *mapped_tables++ = r2;
    1186              :     }
    1187              : 
    1188              :     /*
    1189              :      * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
    1190              :      * subtransaction. The rel2 storage (swapped from rel1) may or may not be
    1191              :      * new.
    1192              :      */
    1193              :     {
    1194              :         Relation    rel1,
    1195              :                     rel2;
    1196              : 
    1197         1257 :         rel1 = relation_open(r1, NoLock);
    1198         1257 :         rel2 = relation_open(r2, NoLock);
    1199         1257 :         rel2->rd_createSubid = rel1->rd_createSubid;
    1200         1257 :         rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid;
    1201         1257 :         rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid;
    1202         1257 :         RelationAssumeNewRelfilelocator(rel1);
    1203         1257 :         relation_close(rel1, NoLock);
    1204         1257 :         relation_close(rel2, NoLock);
    1205              :     }
    1206              : 
    1207              :     /*
    1208              :      * In the case of a shared catalog, these next few steps will only affect
    1209              :      * our own database's pg_class row; but that's okay, because they are all
    1210              :      * noncritical updates.  That's also an important fact for the case of a
    1211              :      * mapped catalog, because it's possible that we'll commit the map change
    1212              :      * and then fail to commit the pg_class update.
    1213              :      */
    1214              : 
    1215              :     /* set rel1's frozen Xid and minimum MultiXid */
    1216         1257 :     if (relform1->relkind != RELKIND_INDEX)
    1217              :     {
    1218              :         Assert(!TransactionIdIsValid(frozenXid) ||
    1219              :                TransactionIdIsNormal(frozenXid));
    1220         1160 :         relform1->relfrozenxid = frozenXid;
    1221         1160 :         relform1->relminmxid = cutoffMulti;
    1222              :     }
    1223              : 
    1224              :     /* swap size statistics too, since new rel has freshly-updated stats */
    1225              :     {
    1226              :         int32       swap_pages;
    1227              :         float4      swap_tuples;
    1228              :         int32       swap_allvisible;
    1229              :         int32       swap_allfrozen;
    1230              : 
    1231         1257 :         swap_pages = relform1->relpages;
    1232         1257 :         relform1->relpages = relform2->relpages;
    1233         1257 :         relform2->relpages = swap_pages;
    1234              : 
    1235         1257 :         swap_tuples = relform1->reltuples;
    1236         1257 :         relform1->reltuples = relform2->reltuples;
    1237         1257 :         relform2->reltuples = swap_tuples;
    1238              : 
    1239         1257 :         swap_allvisible = relform1->relallvisible;
    1240         1257 :         relform1->relallvisible = relform2->relallvisible;
    1241         1257 :         relform2->relallvisible = swap_allvisible;
    1242              : 
    1243         1257 :         swap_allfrozen = relform1->relallfrozen;
    1244         1257 :         relform1->relallfrozen = relform2->relallfrozen;
    1245         1257 :         relform2->relallfrozen = swap_allfrozen;
    1246              :     }
    1247              : 
    1248              :     /*
    1249              :      * Update the tuples in pg_class --- unless the target relation of the
    1250              :      * swap is pg_class itself.  In that case, there is zero point in making
    1251              :      * changes because we'd be updating the old data that we're about to throw
    1252              :      * away.  Because the real work being done here for a mapped relation is
    1253              :      * just to change the relation map settings, it's all right to not update
    1254              :      * the pg_class rows in this case. The most important changes will instead
    1255              :      * performed later, in finish_heap_swap() itself.
    1256              :      */
    1257         1257 :     if (!target_is_pg_class)
    1258              :     {
    1259              :         CatalogIndexState indstate;
    1260              : 
    1261         1235 :         indstate = CatalogOpenIndexes(relRelation);
    1262         1235 :         CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
    1263              :                                    indstate);
    1264         1235 :         CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
    1265              :                                    indstate);
    1266         1235 :         CatalogCloseIndexes(indstate);
    1267              :     }
    1268              :     else
    1269              :     {
    1270              :         /* no update ... but we do still need relcache inval */
    1271           22 :         CacheInvalidateRelcacheByTuple(reltup1);
    1272           22 :         CacheInvalidateRelcacheByTuple(reltup2);
    1273              :     }
    1274              : 
    1275              :     /*
    1276              :      * Now that pg_class has been updated with its relevant information for
    1277              :      * the swap, update the dependency of the relations to point to their new
    1278              :      * table AM, if it has changed.
    1279              :      */
    1280         1257 :     if (relam1 != relam2)
    1281              :     {
    1282           18 :         if (changeDependencyFor(RelationRelationId,
    1283              :                                 r1,
    1284              :                                 AccessMethodRelationId,
    1285              :                                 relam1,
    1286              :                                 relam2) != 1)
    1287            0 :             elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
    1288              :                  get_namespace_name(get_rel_namespace(r1)),
    1289              :                  get_rel_name(r1));
    1290           18 :         if (changeDependencyFor(RelationRelationId,
    1291              :                                 r2,
    1292              :                                 AccessMethodRelationId,
    1293              :                                 relam2,
    1294              :                                 relam1) != 1)
    1295            0 :             elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
    1296              :                  get_namespace_name(get_rel_namespace(r2)),
    1297              :                  get_rel_name(r2));
    1298              :     }
    1299              : 
    1300              :     /*
    1301              :      * Post alter hook for modified relations. The change to r2 is always
    1302              :      * internal, but r1 depends on the invocation context.
    1303              :      */
    1304         1257 :     InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
    1305              :                                  InvalidOid, is_internal);
    1306         1257 :     InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
    1307              :                                  InvalidOid, true);
    1308              : 
    1309              :     /*
    1310              :      * If we have toast tables associated with the relations being swapped,
    1311              :      * deal with them too.
    1312              :      */
    1313         1257 :     if (relform1->reltoastrelid || relform2->reltoastrelid)
    1314              :     {
    1315          412 :         if (swap_toast_by_content)
    1316              :         {
    1317           97 :             if (relform1->reltoastrelid && relform2->reltoastrelid)
    1318              :             {
    1319              :                 /* Recursively swap the contents of the toast tables */
    1320           97 :                 swap_relation_files(relform1->reltoastrelid,
    1321              :                                     relform2->reltoastrelid,
    1322              :                                     target_is_pg_class,
    1323              :                                     swap_toast_by_content,
    1324              :                                     is_internal,
    1325              :                                     frozenXid,
    1326              :                                     cutoffMulti,
    1327              :                                     mapped_tables);
    1328              :             }
    1329              :             else
    1330              :             {
    1331              :                 /* caller messed up */
    1332            0 :                 elog(ERROR, "cannot swap toast files by content when there's only one");
    1333              :             }
    1334              :         }
    1335              :         else
    1336              :         {
    1337              :             /*
    1338              :              * We swapped the ownership links, so we need to change dependency
    1339              :              * data to match.
    1340              :              *
    1341              :              * NOTE: it is possible that only one table has a toast table.
    1342              :              *
    1343              :              * NOTE: at present, a TOAST table's only dependency is the one on
    1344              :              * its owning table.  If more are ever created, we'd need to use
    1345              :              * something more selective than deleteDependencyRecordsFor() to
    1346              :              * get rid of just the link we want.
    1347              :              */
    1348              :             ObjectAddress baseobject,
    1349              :                         toastobject;
    1350              :             long        count;
    1351              : 
    1352              :             /*
    1353              :              * We disallow this case for system catalogs, to avoid the
    1354              :              * possibility that the catalog we're rebuilding is one of the
    1355              :              * ones the dependency changes would change.  It's too late to be
    1356              :              * making any data changes to the target catalog.
    1357              :              */
    1358          315 :             if (IsSystemClass(r1, relform1))
    1359            0 :                 elog(ERROR, "cannot swap toast files by links for system catalogs");
    1360              : 
    1361              :             /* Delete old dependencies */
    1362          315 :             if (relform1->reltoastrelid)
    1363              :             {
    1364          299 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1365              :                                                    relform1->reltoastrelid,
    1366              :                                                    false);
    1367          299 :                 if (count != 1)
    1368            0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1369              :                          count);
    1370              :             }
    1371          315 :             if (relform2->reltoastrelid)
    1372              :             {
    1373          315 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1374              :                                                    relform2->reltoastrelid,
    1375              :                                                    false);
    1376          315 :                 if (count != 1)
    1377            0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1378              :                          count);
    1379              :             }
    1380              : 
    1381              :             /* Register new dependencies */
    1382          315 :             baseobject.classId = RelationRelationId;
    1383          315 :             baseobject.objectSubId = 0;
    1384          315 :             toastobject.classId = RelationRelationId;
    1385          315 :             toastobject.objectSubId = 0;
    1386              : 
    1387          315 :             if (relform1->reltoastrelid)
    1388              :             {
    1389          299 :                 baseobject.objectId = r1;
    1390          299 :                 toastobject.objectId = relform1->reltoastrelid;
    1391          299 :                 recordDependencyOn(&toastobject, &baseobject,
    1392              :                                    DEPENDENCY_INTERNAL);
    1393              :             }
    1394              : 
    1395          315 :             if (relform2->reltoastrelid)
    1396              :             {
    1397          315 :                 baseobject.objectId = r2;
    1398          315 :                 toastobject.objectId = relform2->reltoastrelid;
    1399          315 :                 recordDependencyOn(&toastobject, &baseobject,
    1400              :                                    DEPENDENCY_INTERNAL);
    1401              :             }
    1402              :         }
    1403              :     }
    1404              : 
    1405              :     /*
    1406              :      * If we're swapping two toast tables by content, do the same for their
    1407              :      * valid index. The swap can actually be safely done only if the relations
    1408              :      * have indexes.
    1409              :      */
    1410         1257 :     if (swap_toast_by_content &&
    1411          291 :         relform1->relkind == RELKIND_TOASTVALUE &&
    1412           97 :         relform2->relkind == RELKIND_TOASTVALUE)
    1413              :     {
    1414              :         Oid         toastIndex1,
    1415              :                     toastIndex2;
    1416              : 
    1417              :         /* Get valid index for each relation */
    1418           97 :         toastIndex1 = toast_get_valid_index(r1,
    1419              :                                             AccessExclusiveLock);
    1420           97 :         toastIndex2 = toast_get_valid_index(r2,
    1421              :                                             AccessExclusiveLock);
    1422              : 
    1423           97 :         swap_relation_files(toastIndex1,
    1424              :                             toastIndex2,
    1425              :                             target_is_pg_class,
    1426              :                             swap_toast_by_content,
    1427              :                             is_internal,
    1428              :                             InvalidTransactionId,
    1429              :                             InvalidMultiXactId,
    1430              :                             mapped_tables);
    1431              :     }
    1432              : 
    1433              :     /* Clean up. */
    1434         1257 :     heap_freetuple(reltup1);
    1435         1257 :     heap_freetuple(reltup2);
    1436              : 
    1437         1257 :     table_close(relRelation, RowExclusiveLock);
    1438         1257 : }
    1439              : 
    1440              : /*
    1441              :  * Remove the transient table that was built by make_new_heap, and finish
    1442              :  * cleaning up (including rebuilding all indexes on the old heap).
    1443              :  */
    1444              : void
    1445         1063 : finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
    1446              :                  bool is_system_catalog,
    1447              :                  bool swap_toast_by_content,
    1448              :                  bool check_constraints,
    1449              :                  bool is_internal,
    1450              :                  TransactionId frozenXid,
    1451              :                  MultiXactId cutoffMulti,
    1452              :                  char newrelpersistence)
    1453              : {
    1454              :     ObjectAddress object;
    1455              :     Oid         mapped_tables[4];
    1456              :     int         reindex_flags;
    1457         1063 :     ReindexParams reindex_params = {0};
    1458              :     int         i;
    1459              : 
    1460              :     /* Report that we are now swapping relation files */
    1461         1063 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1462              :                                  PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES);
    1463              : 
    1464              :     /* Zero out possible results from swapped_relation_files */
    1465         1063 :     memset(mapped_tables, 0, sizeof(mapped_tables));
    1466              : 
    1467              :     /*
    1468              :      * Swap the contents of the heap relations (including any toast tables).
    1469              :      * Also set old heap's relfrozenxid to frozenXid.
    1470              :      */
    1471         1063 :     swap_relation_files(OIDOldHeap, OIDNewHeap,
    1472              :                         (OIDOldHeap == RelationRelationId),
    1473              :                         swap_toast_by_content, is_internal,
    1474              :                         frozenXid, cutoffMulti, mapped_tables);
    1475              : 
    1476              :     /*
    1477              :      * If it's a system catalog, queue a sinval message to flush all catcaches
    1478              :      * on the catalog when we reach CommandCounterIncrement.
    1479              :      */
    1480         1063 :     if (is_system_catalog)
    1481          111 :         CacheInvalidateCatalog(OIDOldHeap);
    1482              : 
    1483              :     /*
    1484              :      * Rebuild each index on the relation (but not the toast table, which is
    1485              :      * all-new at this point).  It is important to do this before the DROP
    1486              :      * step because if we are processing a system catalog that will be used
    1487              :      * during DROP, we want to have its indexes available.  There is no
    1488              :      * advantage to the other order anyway because this is all transactional,
    1489              :      * so no chance to reclaim disk space before commit.  We do not need a
    1490              :      * final CommandCounterIncrement() because reindex_relation does it.
    1491              :      *
    1492              :      * Note: because index_build is called via reindex_relation, it will never
    1493              :      * set indcheckxmin true for the indexes.  This is OK even though in some
    1494              :      * sense we are building new indexes rather than rebuilding existing ones,
    1495              :      * because the new heap won't contain any HOT chains at all, let alone
    1496              :      * broken ones, so it can't be necessary to set indcheckxmin.
    1497              :      */
    1498         1063 :     reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
    1499         1063 :     if (check_constraints)
    1500          777 :         reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
    1501              : 
    1502              :     /*
    1503              :      * Ensure that the indexes have the same persistence as the parent
    1504              :      * relation.
    1505              :      */
    1506         1063 :     if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
    1507           19 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
    1508         1044 :     else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
    1509         1004 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
    1510              : 
    1511              :     /* Report that we are now reindexing relations */
    1512         1063 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1513              :                                  PROGRESS_CLUSTER_PHASE_REBUILD_INDEX);
    1514              : 
    1515         1063 :     reindex_relation(NULL, OIDOldHeap, reindex_flags, &reindex_params);
    1516              : 
    1517              :     /* Report that we are now doing clean up */
    1518         1054 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1519              :                                  PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP);
    1520              : 
    1521              :     /*
    1522              :      * If the relation being rebuilt is pg_class, swap_relation_files()
    1523              :      * couldn't update pg_class's own pg_class entry (check comments in
    1524              :      * swap_relation_files()), thus relfrozenxid was not updated. That's
    1525              :      * annoying because a potential reason for doing a VACUUM FULL is a
    1526              :      * imminent or actual anti-wraparound shutdown.  So, now that we can
    1527              :      * access the new relation using its indices, update relfrozenxid.
    1528              :      * pg_class doesn't have a toast relation, so we don't need to update the
    1529              :      * corresponding toast relation. Not that there's little point moving all
    1530              :      * relfrozenxid updates here since swap_relation_files() needs to write to
    1531              :      * pg_class for non-mapped relations anyway.
    1532              :      */
    1533         1054 :     if (OIDOldHeap == RelationRelationId)
    1534              :     {
    1535              :         Relation    relRelation;
    1536              :         HeapTuple   reltup;
    1537              :         Form_pg_class relform;
    1538              : 
    1539           22 :         relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1540              : 
    1541           22 :         reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
    1542           22 :         if (!HeapTupleIsValid(reltup))
    1543            0 :             elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
    1544           22 :         relform = (Form_pg_class) GETSTRUCT(reltup);
    1545              : 
    1546           22 :         relform->relfrozenxid = frozenXid;
    1547           22 :         relform->relminmxid = cutoffMulti;
    1548              : 
    1549           22 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1550              : 
    1551           22 :         table_close(relRelation, RowExclusiveLock);
    1552              :     }
    1553              : 
    1554              :     /* Destroy new heap with old filenumber */
    1555         1054 :     object.classId = RelationRelationId;
    1556         1054 :     object.objectId = OIDNewHeap;
    1557         1054 :     object.objectSubId = 0;
    1558              : 
    1559              :     /*
    1560              :      * The new relation is local to our transaction and we know nothing
    1561              :      * depends on it, so DROP_RESTRICT should be OK.
    1562              :      */
    1563         1054 :     performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
    1564              : 
    1565              :     /* performDeletion does CommandCounterIncrement at end */
    1566              : 
    1567              :     /*
    1568              :      * Now we must remove any relation mapping entries that we set up for the
    1569              :      * transient table, as well as its toast table and toast index if any. If
    1570              :      * we fail to do this before commit, the relmapper will complain about new
    1571              :      * permanent map entries being added post-bootstrap.
    1572              :      */
    1573         1139 :     for (i = 0; OidIsValid(mapped_tables[i]); i++)
    1574           85 :         RelationMapRemoveMapping(mapped_tables[i]);
    1575              : 
    1576              :     /*
    1577              :      * At this point, everything is kosher except that, if we did toast swap
    1578              :      * by links, the toast table's name corresponds to the transient table.
    1579              :      * The name is irrelevant to the backend because it's referenced by OID,
    1580              :      * but users looking at the catalogs could be confused.  Rename it to
    1581              :      * prevent this problem.
    1582              :      *
    1583              :      * Note no lock required on the relation, because we already hold an
    1584              :      * exclusive lock on it.
    1585              :      */
    1586         1054 :     if (!swap_toast_by_content)
    1587              :     {
    1588              :         Relation    newrel;
    1589              : 
    1590          957 :         newrel = table_open(OIDOldHeap, NoLock);
    1591          957 :         if (OidIsValid(newrel->rd_rel->reltoastrelid))
    1592              :         {
    1593              :             Oid         toastidx;
    1594              :             char        NewToastName[NAMEDATALEN];
    1595              : 
    1596              :             /* Get the associated valid index to be renamed */
    1597          299 :             toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
    1598              :                                              NoLock);
    1599              : 
    1600              :             /* rename the toast table ... */
    1601          299 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
    1602              :                      OIDOldHeap);
    1603          299 :             RenameRelationInternal(newrel->rd_rel->reltoastrelid,
    1604              :                                    NewToastName, true, false);
    1605              : 
    1606              :             /* ... and its valid index too. */
    1607          299 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
    1608              :                      OIDOldHeap);
    1609              : 
    1610          299 :             RenameRelationInternal(toastidx,
    1611              :                                    NewToastName, true, true);
    1612              : 
    1613              :             /*
    1614              :              * Reset the relrewrite for the toast. The command-counter
    1615              :              * increment is required here as we are about to update the tuple
    1616              :              * that is updated as part of RenameRelationInternal.
    1617              :              */
    1618          299 :             CommandCounterIncrement();
    1619          299 :             ResetRelRewrite(newrel->rd_rel->reltoastrelid);
    1620              :         }
    1621          957 :         relation_close(newrel, NoLock);
    1622              :     }
    1623              : 
    1624              :     /* if it's not a catalog table, clear any missing attribute settings */
    1625         1054 :     if (!is_system_catalog)
    1626              :     {
    1627              :         Relation    newrel;
    1628              : 
    1629          943 :         newrel = table_open(OIDOldHeap, NoLock);
    1630          943 :         RelationClearMissing(newrel);
    1631          943 :         relation_close(newrel, NoLock);
    1632              :     }
    1633         1054 : }
    1634              : 
    1635              : 
    1636              : /*
    1637              :  * Get a list of tables that the current user has privileges on and
    1638              :  * have indisclustered set.  Return the list in a List * of RelToCluster
    1639              :  * (stored in the specified memory context), each one giving the tableOid
    1640              :  * and the indexOid on which the table is already clustered.
    1641              :  */
    1642              : static List *
    1643           10 : get_tables_to_cluster(MemoryContext cluster_context)
    1644              : {
    1645              :     Relation    indRelation;
    1646              :     TableScanDesc scan;
    1647              :     ScanKeyData entry;
    1648              :     HeapTuple   indexTuple;
    1649              :     Form_pg_index index;
    1650              :     MemoryContext old_context;
    1651           10 :     List       *rtcs = NIL;
    1652              : 
    1653              :     /*
    1654              :      * Get all indexes that have indisclustered set and that the current user
    1655              :      * has the appropriate privileges for.
    1656              :      */
    1657           10 :     indRelation = table_open(IndexRelationId, AccessShareLock);
    1658           10 :     ScanKeyInit(&entry,
    1659              :                 Anum_pg_index_indisclustered,
    1660              :                 BTEqualStrategyNumber, F_BOOLEQ,
    1661              :                 BoolGetDatum(true));
    1662           10 :     scan = table_beginscan_catalog(indRelation, 1, &entry);
    1663           19 :     while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1664              :     {
    1665              :         RelToCluster *rtc;
    1666              : 
    1667            9 :         index = (Form_pg_index) GETSTRUCT(indexTuple);
    1668              : 
    1669            9 :         if (!cluster_is_permitted_for_relation(index->indrelid, GetUserId()))
    1670            6 :             continue;
    1671              : 
    1672              :         /* Use a permanent memory context for the result list */
    1673            3 :         old_context = MemoryContextSwitchTo(cluster_context);
    1674              : 
    1675            3 :         rtc = palloc_object(RelToCluster);
    1676            3 :         rtc->tableOid = index->indrelid;
    1677            3 :         rtc->indexOid = index->indexrelid;
    1678            3 :         rtcs = lappend(rtcs, rtc);
    1679              : 
    1680            3 :         MemoryContextSwitchTo(old_context);
    1681              :     }
    1682           10 :     table_endscan(scan);
    1683              : 
    1684           10 :     relation_close(indRelation, AccessShareLock);
    1685              : 
    1686           10 :     return rtcs;
    1687              : }
    1688              : 
    1689              : /*
    1690              :  * Given an index on a partitioned table, return a list of RelToCluster for
    1691              :  * all the children leaves tables/indexes.
    1692              :  *
    1693              :  * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
    1694              :  * on the table containing the index.
    1695              :  */
    1696              : static List *
    1697           10 : get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
    1698              : {
    1699              :     List       *inhoids;
    1700              :     ListCell   *lc;
    1701           10 :     List       *rtcs = NIL;
    1702              :     MemoryContext old_context;
    1703              : 
    1704              :     /* Do not lock the children until they're processed */
    1705           10 :     inhoids = find_all_inheritors(indexOid, NoLock, NULL);
    1706              : 
    1707           52 :     foreach(lc, inhoids)
    1708              :     {
    1709           42 :         Oid         indexrelid = lfirst_oid(lc);
    1710           42 :         Oid         relid = IndexGetRelation(indexrelid, false);
    1711              :         RelToCluster *rtc;
    1712              : 
    1713              :         /* consider only leaf indexes */
    1714           42 :         if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
    1715           19 :             continue;
    1716              : 
    1717              :         /*
    1718              :          * It's possible that the user does not have privileges to CLUSTER the
    1719              :          * leaf partition despite having such privileges on the partitioned
    1720              :          * table.  We skip any partitions which the user is not permitted to
    1721              :          * CLUSTER.
    1722              :          */
    1723           23 :         if (!cluster_is_permitted_for_relation(relid, GetUserId()))
    1724           11 :             continue;
    1725              : 
    1726              :         /* Use a permanent memory context for the result list */
    1727           12 :         old_context = MemoryContextSwitchTo(cluster_context);
    1728              : 
    1729           12 :         rtc = palloc_object(RelToCluster);
    1730           12 :         rtc->tableOid = relid;
    1731           12 :         rtc->indexOid = indexrelid;
    1732           12 :         rtcs = lappend(rtcs, rtc);
    1733              : 
    1734           12 :         MemoryContextSwitchTo(old_context);
    1735              :     }
    1736              : 
    1737           10 :     return rtcs;
    1738              : }
    1739              : 
    1740              : /*
    1741              :  * Return whether userid has privileges to CLUSTER relid.  If not, this
    1742              :  * function emits a WARNING.
    1743              :  */
    1744              : static bool
    1745           47 : cluster_is_permitted_for_relation(Oid relid, Oid userid)
    1746              : {
    1747           47 :     if (pg_class_aclcheck(relid, userid, ACL_MAINTAIN) == ACLCHECK_OK)
    1748           30 :         return true;
    1749              : 
    1750           17 :     ereport(WARNING,
    1751              :             (errmsg("permission denied to cluster \"%s\", skipping it",
    1752              :                     get_rel_name(relid))));
    1753           17 :     return false;
    1754              : }
        

Generated by: LCOV version 2.0-1