LCOV - code coverage report
Current view: top level - src/backend/commands - cluster.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 413 458 90.2 %
Date: 2024-10-07 00:11:18 Functions: 13 13 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * cluster.c
       4             :  *    CLUSTER a table on an index.  This is now also used for VACUUM FULL.
       5             :  *
       6             :  * There is hardly anything left of Paul Brown's original implementation...
       7             :  *
       8             :  *
       9             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994-5, Regents of the University of California
      11             :  *
      12             :  *
      13             :  * IDENTIFICATION
      14             :  *    src/backend/commands/cluster.c
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #include "postgres.h"
      19             : 
      20             : #include "access/amapi.h"
      21             : #include "access/heapam.h"
      22             : #include "access/multixact.h"
      23             : #include "access/relscan.h"
      24             : #include "access/tableam.h"
      25             : #include "access/toast_internals.h"
      26             : #include "access/transam.h"
      27             : #include "access/xact.h"
      28             : #include "catalog/catalog.h"
      29             : #include "catalog/dependency.h"
      30             : #include "catalog/heap.h"
      31             : #include "catalog/index.h"
      32             : #include "catalog/namespace.h"
      33             : #include "catalog/objectaccess.h"
      34             : #include "catalog/pg_am.h"
      35             : #include "catalog/pg_database.h"
      36             : #include "catalog/pg_inherits.h"
      37             : #include "catalog/toasting.h"
      38             : #include "commands/cluster.h"
      39             : #include "commands/defrem.h"
      40             : #include "commands/progress.h"
      41             : #include "commands/tablecmds.h"
      42             : #include "commands/vacuum.h"
      43             : #include "miscadmin.h"
      44             : #include "optimizer/optimizer.h"
      45             : #include "pgstat.h"
      46             : #include "storage/bufmgr.h"
      47             : #include "storage/lmgr.h"
      48             : #include "storage/predicate.h"
      49             : #include "utils/acl.h"
      50             : #include "utils/fmgroids.h"
      51             : #include "utils/guc.h"
      52             : #include "utils/inval.h"
      53             : #include "utils/lsyscache.h"
      54             : #include "utils/memutils.h"
      55             : #include "utils/pg_rusage.h"
      56             : #include "utils/relmapper.h"
      57             : #include "utils/snapmgr.h"
      58             : #include "utils/syscache.h"
      59             : 
      60             : /*
      61             :  * This struct is used to pass around the information on tables to be
      62             :  * clustered. We need this so we can make a list of them when invoked without
      63             :  * a specific table/index pair.
      64             :  */
      65             : typedef struct
      66             : {
      67             :     Oid         tableOid;
      68             :     Oid         indexOid;
      69             : } RelToCluster;
      70             : 
      71             : 
      72             : static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
      73             : static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
      74             : static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
      75             :                             bool verbose, bool *pSwapToastByContent,
      76             :                             TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
      77             : static List *get_tables_to_cluster(MemoryContext cluster_context);
      78             : static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
      79             :                                                Oid indexOid);
      80             : static bool cluster_is_permitted_for_relation(Oid relid, Oid userid);
      81             : 
      82             : 
      83             : /*---------------------------------------------------------------------------
      84             :  * This cluster code allows for clustering multiple tables at once. Because
      85             :  * of this, we cannot just run everything on a single transaction, or we
      86             :  * would be forced to acquire exclusive locks on all the tables being
      87             :  * clustered, simultaneously --- very likely leading to deadlock.
      88             :  *
      89             :  * To solve this we follow a similar strategy to VACUUM code,
      90             :  * clustering each relation in a separate transaction. For this to work,
      91             :  * we need to:
      92             :  *  - provide a separate memory context so that we can pass information in
      93             :  *    a way that survives across transactions
      94             :  *  - start a new transaction every time a new relation is clustered
      95             :  *  - check for validity of the information on to-be-clustered relations,
      96             :  *    as someone might have deleted a relation behind our back, or
      97             :  *    clustered one on a different index
      98             :  *  - end the transaction
      99             :  *
     100             :  * The single-relation case does not have any such overhead.
     101             :  *
     102             :  * We also allow a relation to be specified without index.  In that case,
     103             :  * the indisclustered bit will be looked up, and an ERROR will be thrown
     104             :  * if there is no index with the bit set.
     105             :  *---------------------------------------------------------------------------
     106             :  */
     107             : void
     108         236 : cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
     109             : {
     110             :     ListCell   *lc;
     111         236 :     ClusterParams params = {0};
     112         236 :     bool        verbose = false;
     113         236 :     Relation    rel = NULL;
     114         236 :     Oid         indexOid = InvalidOid;
     115             :     MemoryContext cluster_context;
     116             :     List       *rtcs;
     117             : 
     118             :     /* Parse option list */
     119         248 :     foreach(lc, stmt->params)
     120             :     {
     121          12 :         DefElem    *opt = (DefElem *) lfirst(lc);
     122             : 
     123          12 :         if (strcmp(opt->defname, "verbose") == 0)
     124          12 :             verbose = defGetBoolean(opt);
     125             :         else
     126           0 :             ereport(ERROR,
     127             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     128             :                      errmsg("unrecognized CLUSTER option \"%s\"",
     129             :                             opt->defname),
     130             :                      parser_errposition(pstate, opt->location)));
     131             :     }
     132             : 
     133         236 :     params.options = (verbose ? CLUOPT_VERBOSE : 0);
     134             : 
     135         236 :     if (stmt->relation != NULL)
     136             :     {
     137             :         /* This is the single-relation case. */
     138             :         Oid         tableOid;
     139             : 
     140             :         /*
     141             :          * Find, lock, and check permissions on the table.  We obtain
     142             :          * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
     143             :          * single-transaction case.
     144             :          */
     145         208 :         tableOid = RangeVarGetRelidExtended(stmt->relation,
     146             :                                             AccessExclusiveLock,
     147             :                                             0,
     148             :                                             RangeVarCallbackMaintainsTable,
     149             :                                             NULL);
     150         196 :         rel = table_open(tableOid, NoLock);
     151             : 
     152             :         /*
     153             :          * Reject clustering a remote temp table ... their local buffer
     154             :          * manager is not going to cope.
     155             :          */
     156         196 :         if (RELATION_IS_OTHER_TEMP(rel))
     157           0 :             ereport(ERROR,
     158             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     159             :                      errmsg("cannot cluster temporary tables of other sessions")));
     160             : 
     161         196 :         if (stmt->indexname == NULL)
     162             :         {
     163             :             ListCell   *index;
     164             : 
     165             :             /* We need to find the index that has indisclustered set. */
     166          46 :             foreach(index, RelationGetIndexList(rel))
     167             :             {
     168          34 :                 indexOid = lfirst_oid(index);
     169          34 :                 if (get_index_isclustered(indexOid))
     170          22 :                     break;
     171          12 :                 indexOid = InvalidOid;
     172             :             }
     173             : 
     174          34 :             if (!OidIsValid(indexOid))
     175          12 :                 ereport(ERROR,
     176             :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     177             :                          errmsg("there is no previously clustered index for table \"%s\"",
     178             :                                 stmt->relation->relname)));
     179             :         }
     180             :         else
     181             :         {
     182             :             /*
     183             :              * The index is expected to be in the same namespace as the
     184             :              * relation.
     185             :              */
     186         162 :             indexOid = get_relname_relid(stmt->indexname,
     187         162 :                                          rel->rd_rel->relnamespace);
     188         162 :             if (!OidIsValid(indexOid))
     189           0 :                 ereport(ERROR,
     190             :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     191             :                          errmsg("index \"%s\" for table \"%s\" does not exist",
     192             :                                 stmt->indexname, stmt->relation->relname)));
     193             :         }
     194             : 
     195         184 :         if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
     196             :         {
     197             :             /* close relation, keep lock till commit */
     198         158 :             table_close(rel, NoLock);
     199             : 
     200             :             /* Do the job. */
     201         158 :             cluster_rel(tableOid, indexOid, &params);
     202             : 
     203         158 :             return;
     204             :         }
     205             :     }
     206             : 
     207             :     /*
     208             :      * By here, we know we are in a multi-table situation.  In order to avoid
     209             :      * holding locks for too long, we want to process each table in its own
     210             :      * transaction.  This forces us to disallow running inside a user
     211             :      * transaction block.
     212             :      */
     213          54 :     PreventInTransactionBlock(isTopLevel, "CLUSTER");
     214             : 
     215             :     /* Also, we need a memory context to hold our list of relations */
     216          54 :     cluster_context = AllocSetContextCreate(PortalContext,
     217             :                                             "Cluster",
     218             :                                             ALLOCSET_DEFAULT_SIZES);
     219             : 
     220             :     /*
     221             :      * Either we're processing a partitioned table, or we were not given any
     222             :      * table name at all.  In either case, obtain a list of relations to
     223             :      * process.
     224             :      *
     225             :      * In the former case, an index name must have been given, so we don't
     226             :      * need to recheck its "indisclustered" bit, but we have to check that it
     227             :      * is an index that we can cluster on.  In the latter case, we set the
     228             :      * option bit to have indisclustered verified.
     229             :      *
     230             :      * Rechecking the relation itself is necessary here in all cases.
     231             :      */
     232          54 :     params.options |= CLUOPT_RECHECK;
     233          54 :     if (rel != NULL)
     234             :     {
     235             :         Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
     236          26 :         check_index_is_clusterable(rel, indexOid, AccessShareLock);
     237          20 :         rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
     238             : 
     239             :         /* close relation, releasing lock on parent table */
     240          20 :         table_close(rel, AccessExclusiveLock);
     241             :     }
     242             :     else
     243             :     {
     244          28 :         rtcs = get_tables_to_cluster(cluster_context);
     245          28 :         params.options |= CLUOPT_RECHECK_ISCLUSTERED;
     246             :     }
     247             : 
     248             :     /* Do the job. */
     249          48 :     cluster_multiple_rels(rtcs, &params);
     250             : 
     251             :     /* Start a new transaction for the cleanup work. */
     252          48 :     StartTransactionCommand();
     253             : 
     254             :     /* Clean up working storage */
     255          48 :     MemoryContextDelete(cluster_context);
     256             : }
     257             : 
     258             : /*
     259             :  * Given a list of relations to cluster, process each of them in a separate
     260             :  * transaction.
     261             :  *
     262             :  * We expect to be in a transaction at start, but there isn't one when we
     263             :  * return.
     264             :  */
     265             : static void
     266          48 : cluster_multiple_rels(List *rtcs, ClusterParams *params)
     267             : {
     268             :     ListCell   *lc;
     269             : 
     270             :     /* Commit to get out of starting transaction */
     271          48 :     PopActiveSnapshot();
     272          48 :     CommitTransactionCommand();
     273             : 
     274             :     /* Cluster the tables, each in a separate transaction */
     275          78 :     foreach(lc, rtcs)
     276             :     {
     277          30 :         RelToCluster *rtc = (RelToCluster *) lfirst(lc);
     278             : 
     279             :         /* Start a new transaction for each relation. */
     280          30 :         StartTransactionCommand();
     281             : 
     282             :         /* functions in indexes may want a snapshot set */
     283          30 :         PushActiveSnapshot(GetTransactionSnapshot());
     284             : 
     285             :         /* Do the job. */
     286          30 :         cluster_rel(rtc->tableOid, rtc->indexOid, params);
     287             : 
     288          30 :         PopActiveSnapshot();
     289          30 :         CommitTransactionCommand();
     290             :     }
     291          48 : }
     292             : 
     293             : /*
     294             :  * cluster_rel
     295             :  *
     296             :  * This clusters the table by creating a new, clustered table and
     297             :  * swapping the relfilenumbers of the new table and the old table, so
     298             :  * the OID of the original table is preserved.  Thus we do not lose
     299             :  * GRANT, inheritance nor references to this table (this was a bug
     300             :  * in releases through 7.3).
     301             :  *
     302             :  * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
     303             :  * the new table, it's better to create the indexes afterwards than to fill
     304             :  * them incrementally while we load the table.
     305             :  *
     306             :  * If indexOid is InvalidOid, the table will be rewritten in physical order
     307             :  * instead of index order.  This is the new implementation of VACUUM FULL,
     308             :  * and error messages should refer to the operation as VACUUM not CLUSTER.
     309             :  */
     310             : void
     311         532 : cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
     312             : {
     313             :     Relation    OldHeap;
     314             :     Oid         save_userid;
     315             :     int         save_sec_context;
     316             :     int         save_nestlevel;
     317         532 :     bool        verbose = ((params->options & CLUOPT_VERBOSE) != 0);
     318         532 :     bool        recheck = ((params->options & CLUOPT_RECHECK) != 0);
     319             : 
     320             :     /* Check for user-requested abort. */
     321         532 :     CHECK_FOR_INTERRUPTS();
     322             : 
     323         532 :     pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid);
     324         532 :     if (OidIsValid(indexOid))
     325         188 :         pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
     326             :                                      PROGRESS_CLUSTER_COMMAND_CLUSTER);
     327             :     else
     328         344 :         pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
     329             :                                      PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
     330             : 
     331             :     /*
     332             :      * We grab exclusive access to the target rel and index for the duration
     333             :      * of the transaction.  (This is redundant for the single-transaction
     334             :      * case, since cluster() already did it.)  The index lock is taken inside
     335             :      * check_index_is_clusterable.
     336             :      */
     337         532 :     OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
     338             : 
     339             :     /* If the table has gone away, we can skip processing it */
     340         532 :     if (!OldHeap)
     341             :     {
     342           0 :         pgstat_progress_end_command();
     343           0 :         return;
     344             :     }
     345             : 
     346             :     /*
     347             :      * Switch to the table owner's userid, so that any index functions are run
     348             :      * as that user.  Also lock down security-restricted operations and
     349             :      * arrange to make GUC variable changes local to this command.
     350             :      */
     351         532 :     GetUserIdAndSecContext(&save_userid, &save_sec_context);
     352         532 :     SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
     353             :                            save_sec_context | SECURITY_RESTRICTED_OPERATION);
     354         532 :     save_nestlevel = NewGUCNestLevel();
     355         532 :     RestrictSearchPath();
     356             : 
     357             :     /*
     358             :      * Since we may open a new transaction for each relation, we have to check
     359             :      * that the relation still is what we think it is.
     360             :      *
     361             :      * If this is a single-transaction CLUSTER, we can skip these tests. We
     362             :      * *must* skip the one on indisclustered since it would reject an attempt
     363             :      * to cluster a not-previously-clustered index.
     364             :      */
     365         532 :     if (recheck)
     366             :     {
     367             :         /* Check that the user still has privileges for the relation */
     368          30 :         if (!cluster_is_permitted_for_relation(tableOid, save_userid))
     369             :         {
     370           0 :             relation_close(OldHeap, AccessExclusiveLock);
     371           0 :             goto out;
     372             :         }
     373             : 
     374             :         /*
     375             :          * Silently skip a temp table for a remote session.  Only doing this
     376             :          * check in the "recheck" case is appropriate (which currently means
     377             :          * somebody is executing a database-wide CLUSTER or on a partitioned
     378             :          * table), because there is another check in cluster() which will stop
     379             :          * any attempt to cluster remote temp tables by name.  There is
     380             :          * another check in cluster_rel which is redundant, but we leave it
     381             :          * for extra safety.
     382             :          */
     383          30 :         if (RELATION_IS_OTHER_TEMP(OldHeap))
     384             :         {
     385           0 :             relation_close(OldHeap, AccessExclusiveLock);
     386           0 :             goto out;
     387             :         }
     388             : 
     389          30 :         if (OidIsValid(indexOid))
     390             :         {
     391             :             /*
     392             :              * Check that the index still exists
     393             :              */
     394          30 :             if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
     395             :             {
     396           0 :                 relation_close(OldHeap, AccessExclusiveLock);
     397           0 :                 goto out;
     398             :             }
     399             : 
     400             :             /*
     401             :              * Check that the index is still the one with indisclustered set,
     402             :              * if needed.
     403             :              */
     404          30 :             if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
     405           6 :                 !get_index_isclustered(indexOid))
     406             :             {
     407           0 :                 relation_close(OldHeap, AccessExclusiveLock);
     408           0 :                 goto out;
     409             :             }
     410             :         }
     411             :     }
     412             : 
     413             :     /*
     414             :      * We allow VACUUM FULL, but not CLUSTER, on shared catalogs.  CLUSTER
     415             :      * would work in most respects, but the index would only get marked as
     416             :      * indisclustered in the current database, leading to unexpected behavior
     417             :      * if CLUSTER were later invoked in another database.
     418             :      */
     419         532 :     if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
     420           0 :         ereport(ERROR,
     421             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     422             :                  errmsg("cannot cluster a shared catalog")));
     423             : 
     424             :     /*
     425             :      * Don't process temp tables of other backends ... their local buffer
     426             :      * manager is not going to cope.
     427             :      */
     428         532 :     if (RELATION_IS_OTHER_TEMP(OldHeap))
     429             :     {
     430           0 :         if (OidIsValid(indexOid))
     431           0 :             ereport(ERROR,
     432             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     433             :                      errmsg("cannot cluster temporary tables of other sessions")));
     434             :         else
     435           0 :             ereport(ERROR,
     436             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     437             :                      errmsg("cannot vacuum temporary tables of other sessions")));
     438             :     }
     439             : 
     440             :     /*
     441             :      * Also check for active uses of the relation in the current transaction,
     442             :      * including open scans and pending AFTER trigger events.
     443             :      */
     444         532 :     CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
     445             : 
     446             :     /* Check heap and index are valid to cluster on */
     447         532 :     if (OidIsValid(indexOid))
     448         188 :         check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
     449             : 
     450             :     /*
     451             :      * Quietly ignore the request if this is a materialized view which has not
     452             :      * been populated from its query. No harm is done because there is no data
     453             :      * to deal with, and we don't want to throw an error if this is part of a
     454             :      * multi-relation request -- for example, CLUSTER was run on the entire
     455             :      * database.
     456             :      */
     457         532 :     if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
     458           0 :         !RelationIsPopulated(OldHeap))
     459             :     {
     460           0 :         relation_close(OldHeap, AccessExclusiveLock);
     461           0 :         goto out;
     462             :     }
     463             : 
     464             :     Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
     465             :            OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
     466             :            OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
     467             : 
     468             :     /*
     469             :      * All predicate locks on the tuples or pages are about to be made
     470             :      * invalid, because we move tuples around.  Promote them to relation
     471             :      * locks.  Predicate locks on indexes will be promoted when they are
     472             :      * reindexed.
     473             :      */
     474         532 :     TransferPredicateLocksToHeapRelation(OldHeap);
     475             : 
     476             :     /* rebuild_relation does all the dirty work */
     477         532 :     rebuild_relation(OldHeap, indexOid, verbose);
     478             : 
     479             :     /* NB: rebuild_relation does table_close() on OldHeap */
     480             : 
     481         526 : out:
     482             :     /* Roll back any GUC changes executed by index functions */
     483         526 :     AtEOXact_GUC(false, save_nestlevel);
     484             : 
     485             :     /* Restore userid and security context */
     486         526 :     SetUserIdAndSecContext(save_userid, save_sec_context);
     487             : 
     488         526 :     pgstat_progress_end_command();
     489             : }
     490             : 
     491             : /*
     492             :  * Verify that the specified heap and index are valid to cluster on
     493             :  *
     494             :  * Side effect: obtains lock on the index.  The caller may
     495             :  * in some cases already have AccessExclusiveLock on the table, but
     496             :  * not in all cases so we can't rely on the table-level lock for
     497             :  * protection here.
     498             :  */
     499             : void
     500         278 : check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
     501             : {
     502             :     Relation    OldIndex;
     503             : 
     504         278 :     OldIndex = index_open(indexOid, lockmode);
     505             : 
     506             :     /*
     507             :      * Check that index is in fact an index on the given relation
     508             :      */
     509         278 :     if (OldIndex->rd_index == NULL ||
     510         278 :         OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
     511           0 :         ereport(ERROR,
     512             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     513             :                  errmsg("\"%s\" is not an index for table \"%s\"",
     514             :                         RelationGetRelationName(OldIndex),
     515             :                         RelationGetRelationName(OldHeap))));
     516             : 
     517             :     /* Index AM must allow clustering */
     518         278 :     if (!OldIndex->rd_indam->amclusterable)
     519           0 :         ereport(ERROR,
     520             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     521             :                  errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
     522             :                         RelationGetRelationName(OldIndex))));
     523             : 
     524             :     /*
     525             :      * Disallow clustering on incomplete indexes (those that might not index
     526             :      * every row of the relation).  We could relax this by making a separate
     527             :      * seqscan pass over the table to copy the missing rows, but that seems
     528             :      * expensive and tedious.
     529             :      */
     530         278 :     if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
     531           0 :         ereport(ERROR,
     532             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     533             :                  errmsg("cannot cluster on partial index \"%s\"",
     534             :                         RelationGetRelationName(OldIndex))));
     535             : 
     536             :     /*
     537             :      * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
     538             :      * it might well not contain entries for every heap row, or might not even
     539             :      * be internally consistent.  (But note that we don't check indcheckxmin;
     540             :      * the worst consequence of following broken HOT chains would be that we
     541             :      * might put recently-dead tuples out-of-order in the new table, and there
     542             :      * is little harm in that.)
     543             :      */
     544         278 :     if (!OldIndex->rd_index->indisvalid)
     545           6 :         ereport(ERROR,
     546             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     547             :                  errmsg("cannot cluster on invalid index \"%s\"",
     548             :                         RelationGetRelationName(OldIndex))));
     549             : 
     550             :     /* Drop relcache refcnt on OldIndex, but keep lock */
     551         272 :     index_close(OldIndex, NoLock);
     552         272 : }
     553             : 
     554             : /*
     555             :  * mark_index_clustered: mark the specified index as the one clustered on
     556             :  *
     557             :  * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
     558             :  */
     559             : void
     560         270 : mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
     561             : {
     562             :     HeapTuple   indexTuple;
     563             :     Form_pg_index indexForm;
     564             :     Relation    pg_index;
     565             :     ListCell   *index;
     566             : 
     567             :     /* Disallow applying to a partitioned table */
     568         270 :     if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
     569          12 :         ereport(ERROR,
     570             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     571             :                  errmsg("cannot mark index clustered in partitioned table")));
     572             : 
     573             :     /*
     574             :      * If the index is already marked clustered, no need to do anything.
     575             :      */
     576         258 :     if (OidIsValid(indexOid))
     577             :     {
     578         246 :         if (get_index_isclustered(indexOid))
     579          40 :             return;
     580             :     }
     581             : 
     582             :     /*
     583             :      * Check each index of the relation and set/clear the bit as needed.
     584             :      */
     585         218 :     pg_index = table_open(IndexRelationId, RowExclusiveLock);
     586             : 
     587         642 :     foreach(index, RelationGetIndexList(rel))
     588             :     {
     589         424 :         Oid         thisIndexOid = lfirst_oid(index);
     590             : 
     591         424 :         indexTuple = SearchSysCacheCopy1(INDEXRELID,
     592             :                                          ObjectIdGetDatum(thisIndexOid));
     593         424 :         if (!HeapTupleIsValid(indexTuple))
     594           0 :             elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
     595         424 :         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
     596             : 
     597             :         /*
     598             :          * Unset the bit if set.  We know it's wrong because we checked this
     599             :          * earlier.
     600             :          */
     601         424 :         if (indexForm->indisclustered)
     602             :         {
     603          30 :             indexForm->indisclustered = false;
     604          30 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     605             :         }
     606         394 :         else if (thisIndexOid == indexOid)
     607             :         {
     608             :             /* this was checked earlier, but let's be real sure */
     609         206 :             if (!indexForm->indisvalid)
     610           0 :                 elog(ERROR, "cannot cluster on invalid index %u", indexOid);
     611         206 :             indexForm->indisclustered = true;
     612         206 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     613             :         }
     614             : 
     615         424 :         InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
     616             :                                      InvalidOid, is_internal);
     617             : 
     618         424 :         heap_freetuple(indexTuple);
     619             :     }
     620             : 
     621         218 :     table_close(pg_index, RowExclusiveLock);
     622             : }
     623             : 
     624             : /*
     625             :  * rebuild_relation: rebuild an existing relation in index or physical order
     626             :  *
     627             :  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
     628             :  * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
     629             :  *
     630             :  * NB: this routine closes OldHeap at the right time; caller should not.
     631             :  */
     632             : static void
     633         532 : rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
     634             : {
     635         532 :     Oid         tableOid = RelationGetRelid(OldHeap);
     636         532 :     Oid         accessMethod = OldHeap->rd_rel->relam;
     637         532 :     Oid         tableSpace = OldHeap->rd_rel->reltablespace;
     638             :     Oid         OIDNewHeap;
     639             :     char        relpersistence;
     640             :     bool        is_system_catalog;
     641             :     bool        swap_toast_by_content;
     642             :     TransactionId frozenXid;
     643             :     MultiXactId cutoffMulti;
     644             : 
     645         532 :     if (OidIsValid(indexOid))
     646             :         /* Mark the correct index as clustered */
     647         188 :         mark_index_clustered(OldHeap, indexOid, true);
     648             : 
     649             :     /* Remember info about rel before closing OldHeap */
     650         532 :     relpersistence = OldHeap->rd_rel->relpersistence;
     651         532 :     is_system_catalog = IsSystemRelation(OldHeap);
     652             : 
     653             :     /* Close relcache entry, but keep lock until transaction commit */
     654         532 :     table_close(OldHeap, NoLock);
     655             : 
     656             :     /* Create the transient table that will receive the re-ordered data */
     657         532 :     OIDNewHeap = make_new_heap(tableOid, tableSpace,
     658             :                                accessMethod,
     659             :                                relpersistence,
     660             :                                AccessExclusiveLock);
     661             : 
     662             :     /* Copy the heap data into the new table in the desired order */
     663         532 :     copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
     664             :                     &swap_toast_by_content, &frozenXid, &cutoffMulti);
     665             : 
     666             :     /*
     667             :      * Swap the physical files of the target and transient tables, then
     668             :      * rebuild the target's indexes and throw away the transient table.
     669             :      */
     670         532 :     finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
     671             :                      swap_toast_by_content, false, true,
     672             :                      frozenXid, cutoffMulti,
     673             :                      relpersistence);
     674         526 : }
     675             : 
     676             : 
     677             : /*
     678             :  * Create the transient table that will be filled with new data during
     679             :  * CLUSTER, ALTER TABLE, and similar operations.  The transient table
     680             :  * duplicates the logical structure of the OldHeap; but will have the
     681             :  * specified physical storage properties NewTableSpace, NewAccessMethod, and
     682             :  * relpersistence.
     683             :  *
     684             :  * After this, the caller should load the new heap with transferred/modified
     685             :  * data, then call finish_heap_swap to complete the operation.
     686             :  */
     687             : Oid
     688        2006 : make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
     689             :               char relpersistence, LOCKMODE lockmode)
     690             : {
     691             :     TupleDesc   OldHeapDesc;
     692             :     char        NewHeapName[NAMEDATALEN];
     693             :     Oid         OIDNewHeap;
     694             :     Oid         toastid;
     695             :     Relation    OldHeap;
     696             :     HeapTuple   tuple;
     697             :     Datum       reloptions;
     698             :     bool        isNull;
     699             :     Oid         namespaceid;
     700             : 
     701        2006 :     OldHeap = table_open(OIDOldHeap, lockmode);
     702        2006 :     OldHeapDesc = RelationGetDescr(OldHeap);
     703             : 
     704             :     /*
     705             :      * Note that the NewHeap will not receive any of the defaults or
     706             :      * constraints associated with the OldHeap; we don't need 'em, and there's
     707             :      * no reason to spend cycles inserting them into the catalogs only to
     708             :      * delete them.
     709             :      */
     710             : 
     711             :     /*
     712             :      * But we do want to use reloptions of the old heap for new heap.
     713             :      */
     714        2006 :     tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
     715        2006 :     if (!HeapTupleIsValid(tuple))
     716           0 :         elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
     717        2006 :     reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     718             :                                  &isNull);
     719        2006 :     if (isNull)
     720        1968 :         reloptions = (Datum) 0;
     721             : 
     722        2006 :     if (relpersistence == RELPERSISTENCE_TEMP)
     723         146 :         namespaceid = LookupCreationNamespace("pg_temp");
     724             :     else
     725        1860 :         namespaceid = RelationGetNamespace(OldHeap);
     726             : 
     727             :     /*
     728             :      * Create the new heap, using a temporary name in the same namespace as
     729             :      * the existing table.  NOTE: there is some risk of collision with user
     730             :      * relnames.  Working around this seems more trouble than it's worth; in
     731             :      * particular, we can't create the new heap in a different namespace from
     732             :      * the old, or we will have problems with the TEMP status of temp tables.
     733             :      *
     734             :      * Note: the new heap is not a shared relation, even if we are rebuilding
     735             :      * a shared rel.  However, we do make the new heap mapped if the source is
     736             :      * mapped.  This simplifies swap_relation_files, and is absolutely
     737             :      * necessary for rebuilding pg_class, for reasons explained there.
     738             :      */
     739        2006 :     snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
     740             : 
     741        2006 :     OIDNewHeap = heap_create_with_catalog(NewHeapName,
     742             :                                           namespaceid,
     743             :                                           NewTableSpace,
     744             :                                           InvalidOid,
     745             :                                           InvalidOid,
     746             :                                           InvalidOid,
     747        2006 :                                           OldHeap->rd_rel->relowner,
     748             :                                           NewAccessMethod,
     749             :                                           OldHeapDesc,
     750             :                                           NIL,
     751             :                                           RELKIND_RELATION,
     752             :                                           relpersistence,
     753             :                                           false,
     754        2006 :                                           RelationIsMapped(OldHeap),
     755             :                                           ONCOMMIT_NOOP,
     756             :                                           reloptions,
     757             :                                           false,
     758             :                                           true,
     759             :                                           true,
     760             :                                           OIDOldHeap,
     761             :                                           NULL);
     762             :     Assert(OIDNewHeap != InvalidOid);
     763             : 
     764        2006 :     ReleaseSysCache(tuple);
     765             : 
     766             :     /*
     767             :      * Advance command counter so that the newly-created relation's catalog
     768             :      * tuples will be visible to table_open.
     769             :      */
     770        2006 :     CommandCounterIncrement();
     771             : 
     772             :     /*
     773             :      * If necessary, create a TOAST table for the new relation.
     774             :      *
     775             :      * If the relation doesn't have a TOAST table already, we can't need one
     776             :      * for the new relation.  The other way around is possible though: if some
     777             :      * wide columns have been dropped, NewHeapCreateToastTable can decide that
     778             :      * no TOAST table is needed for the new table.
     779             :      *
     780             :      * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
     781             :      * that the TOAST table will be visible for insertion.
     782             :      */
     783        2006 :     toastid = OldHeap->rd_rel->reltoastrelid;
     784        2006 :     if (OidIsValid(toastid))
     785             :     {
     786             :         /* keep the existing toast table's reloptions, if any */
     787         828 :         tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
     788         828 :         if (!HeapTupleIsValid(tuple))
     789           0 :             elog(ERROR, "cache lookup failed for relation %u", toastid);
     790         828 :         reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     791             :                                      &isNull);
     792         828 :         if (isNull)
     793         828 :             reloptions = (Datum) 0;
     794             : 
     795         828 :         NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
     796             : 
     797         828 :         ReleaseSysCache(tuple);
     798             :     }
     799             : 
     800        2006 :     table_close(OldHeap, NoLock);
     801             : 
     802        2006 :     return OIDNewHeap;
     803             : }
     804             : 
     805             : /*
     806             :  * Do the physical copying of table data.
     807             :  *
     808             :  * There are three output parameters:
     809             :  * *pSwapToastByContent is set true if toast tables must be swapped by content.
     810             :  * *pFreezeXid receives the TransactionId used as freeze cutoff point.
     811             :  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
     812             :  */
     813             : static void
     814         532 : copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
     815             :                 bool *pSwapToastByContent, TransactionId *pFreezeXid,
     816             :                 MultiXactId *pCutoffMulti)
     817             : {
     818             :     Relation    NewHeap,
     819             :                 OldHeap,
     820             :                 OldIndex;
     821             :     Relation    relRelation;
     822             :     HeapTuple   reltup;
     823             :     Form_pg_class relform;
     824             :     TupleDesc   oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
     825             :     TupleDesc   newTupDesc PG_USED_FOR_ASSERTS_ONLY;
     826             :     VacuumParams params;
     827             :     struct VacuumCutoffs cutoffs;
     828             :     bool        use_sort;
     829         532 :     double      num_tuples = 0,
     830         532 :                 tups_vacuumed = 0,
     831         532 :                 tups_recently_dead = 0;
     832             :     BlockNumber num_pages;
     833         532 :     int         elevel = verbose ? INFO : DEBUG2;
     834             :     PGRUsage    ru0;
     835             :     char       *nspname;
     836             : 
     837         532 :     pg_rusage_init(&ru0);
     838             : 
     839             :     /*
     840             :      * Open the relations we need.
     841             :      */
     842         532 :     NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
     843         532 :     OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
     844         532 :     if (OidIsValid(OIDOldIndex))
     845         188 :         OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
     846             :     else
     847         344 :         OldIndex = NULL;
     848             : 
     849             :     /* Store a copy of the namespace name for logging purposes */
     850         532 :     nspname = get_namespace_name(RelationGetNamespace(OldHeap));
     851             : 
     852             :     /*
     853             :      * Their tuple descriptors should be exactly alike, but here we only need
     854             :      * assume that they have the same number of columns.
     855             :      */
     856         532 :     oldTupDesc = RelationGetDescr(OldHeap);
     857         532 :     newTupDesc = RelationGetDescr(NewHeap);
     858             :     Assert(newTupDesc->natts == oldTupDesc->natts);
     859             : 
     860             :     /*
     861             :      * If the OldHeap has a toast table, get lock on the toast table to keep
     862             :      * it from being vacuumed.  This is needed because autovacuum processes
     863             :      * toast tables independently of their main tables, with no lock on the
     864             :      * latter.  If an autovacuum were to start on the toast table after we
     865             :      * compute our OldestXmin below, it would use a later OldestXmin, and then
     866             :      * possibly remove as DEAD toast tuples belonging to main tuples we think
     867             :      * are only RECENTLY_DEAD.  Then we'd fail while trying to copy those
     868             :      * tuples.
     869             :      *
     870             :      * We don't need to open the toast relation here, just lock it.  The lock
     871             :      * will be held till end of transaction.
     872             :      */
     873         532 :     if (OldHeap->rd_rel->reltoastrelid)
     874         184 :         LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
     875             : 
     876             :     /*
     877             :      * If both tables have TOAST tables, perform toast swap by content.  It is
     878             :      * possible that the old table has a toast table but the new one doesn't,
     879             :      * if toastable columns have been dropped.  In that case we have to do
     880             :      * swap by links.  This is okay because swap by content is only essential
     881             :      * for system catalogs, and we don't support schema changes for them.
     882             :      */
     883         532 :     if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
     884             :     {
     885         184 :         *pSwapToastByContent = true;
     886             : 
     887             :         /*
     888             :          * When doing swap by content, any toast pointers written into NewHeap
     889             :          * must use the old toast table's OID, because that's where the toast
     890             :          * data will eventually be found.  Set this up by setting rd_toastoid.
     891             :          * This also tells toast_save_datum() to preserve the toast value
     892             :          * OIDs, which we want so as not to invalidate toast pointers in
     893             :          * system catalog caches, and to avoid making multiple copies of a
     894             :          * single toast value.
     895             :          *
     896             :          * Note that we must hold NewHeap open until we are done writing data,
     897             :          * since the relcache will not guarantee to remember this setting once
     898             :          * the relation is closed.  Also, this technique depends on the fact
     899             :          * that no one will try to read from the NewHeap until after we've
     900             :          * finished writing it and swapping the rels --- otherwise they could
     901             :          * follow the toast pointers to the wrong place.  (It would actually
     902             :          * work for values copied over from the old toast table, but not for
     903             :          * any values that we toast which were previously not toasted.)
     904             :          */
     905         184 :         NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
     906             :     }
     907             :     else
     908         348 :         *pSwapToastByContent = false;
     909             : 
     910             :     /*
     911             :      * Compute xids used to freeze and weed out dead tuples and multixacts.
     912             :      * Since we're going to rewrite the whole table anyway, there's no reason
     913             :      * not to be aggressive about this.
     914             :      */
     915         532 :     memset(&params, 0, sizeof(VacuumParams));
     916         532 :     vacuum_get_cutoffs(OldHeap, &params, &cutoffs);
     917             : 
     918             :     /*
     919             :      * FreezeXid will become the table's new relfrozenxid, and that mustn't go
     920             :      * backwards, so take the max.
     921             :      */
     922             :     {
     923         532 :         TransactionId relfrozenxid = OldHeap->rd_rel->relfrozenxid;
     924             : 
     925        1064 :         if (TransactionIdIsValid(relfrozenxid) &&
     926         532 :             TransactionIdPrecedes(cutoffs.FreezeLimit, relfrozenxid))
     927         104 :             cutoffs.FreezeLimit = relfrozenxid;
     928             :     }
     929             : 
     930             :     /*
     931             :      * MultiXactCutoff, similarly, shouldn't go backwards either.
     932             :      */
     933             :     {
     934         532 :         MultiXactId relminmxid = OldHeap->rd_rel->relminmxid;
     935             : 
     936        1064 :         if (MultiXactIdIsValid(relminmxid) &&
     937         532 :             MultiXactIdPrecedes(cutoffs.MultiXactCutoff, relminmxid))
     938           0 :             cutoffs.MultiXactCutoff = relminmxid;
     939             :     }
     940             : 
     941             :     /*
     942             :      * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
     943             :      * the OldHeap.  We know how to use a sort to duplicate the ordering of a
     944             :      * btree index, and will use seqscan-and-sort for that case if the planner
     945             :      * tells us it's cheaper.  Otherwise, always indexscan if an index is
     946             :      * provided, else plain seqscan.
     947             :      */
     948         532 :     if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
     949         188 :         use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
     950             :     else
     951         344 :         use_sort = false;
     952             : 
     953             :     /* Log what we're doing */
     954         532 :     if (OldIndex != NULL && !use_sort)
     955          78 :         ereport(elevel,
     956             :                 (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
     957             :                         nspname,
     958             :                         RelationGetRelationName(OldHeap),
     959             :                         RelationGetRelationName(OldIndex))));
     960         454 :     else if (use_sort)
     961         110 :         ereport(elevel,
     962             :                 (errmsg("clustering \"%s.%s\" using sequential scan and sort",
     963             :                         nspname,
     964             :                         RelationGetRelationName(OldHeap))));
     965             :     else
     966         344 :         ereport(elevel,
     967             :                 (errmsg("vacuuming \"%s.%s\"",
     968             :                         nspname,
     969             :                         RelationGetRelationName(OldHeap))));
     970             : 
     971             :     /*
     972             :      * Hand off the actual copying to AM specific function, the generic code
     973             :      * cannot know how to deal with visibility across AMs. Note that this
     974             :      * routine is allowed to set FreezeXid / MultiXactCutoff to different
     975             :      * values (e.g. because the AM doesn't use freezing).
     976             :      */
     977         532 :     table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
     978             :                                     cutoffs.OldestXmin, &cutoffs.FreezeLimit,
     979             :                                     &cutoffs.MultiXactCutoff,
     980             :                                     &num_tuples, &tups_vacuumed,
     981             :                                     &tups_recently_dead);
     982             : 
     983             :     /* return selected values to caller, get set as relfrozenxid/minmxid */
     984         532 :     *pFreezeXid = cutoffs.FreezeLimit;
     985         532 :     *pCutoffMulti = cutoffs.MultiXactCutoff;
     986             : 
     987             :     /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
     988         532 :     NewHeap->rd_toastoid = InvalidOid;
     989             : 
     990         532 :     num_pages = RelationGetNumberOfBlocks(NewHeap);
     991             : 
     992             :     /* Log what we did */
     993         532 :     ereport(elevel,
     994             :             (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
     995             :                     nspname,
     996             :                     RelationGetRelationName(OldHeap),
     997             :                     tups_vacuumed, num_tuples,
     998             :                     RelationGetNumberOfBlocks(OldHeap)),
     999             :              errdetail("%.0f dead row versions cannot be removed yet.\n"
    1000             :                        "%s.",
    1001             :                        tups_recently_dead,
    1002             :                        pg_rusage_show(&ru0))));
    1003             : 
    1004         532 :     if (OldIndex != NULL)
    1005         188 :         index_close(OldIndex, NoLock);
    1006         532 :     table_close(OldHeap, NoLock);
    1007         532 :     table_close(NewHeap, NoLock);
    1008             : 
    1009             :     /* Update pg_class to reflect the correct values of pages and tuples. */
    1010         532 :     relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1011             : 
    1012         532 :     reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
    1013         532 :     if (!HeapTupleIsValid(reltup))
    1014           0 :         elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
    1015         532 :     relform = (Form_pg_class) GETSTRUCT(reltup);
    1016             : 
    1017         532 :     relform->relpages = num_pages;
    1018         532 :     relform->reltuples = num_tuples;
    1019             : 
    1020             :     /* Don't update the stats for pg_class.  See swap_relation_files. */
    1021         532 :     if (OIDOldHeap != RelationRelationId)
    1022         508 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1023             :     else
    1024          24 :         CacheInvalidateRelcacheByTuple(reltup);
    1025             : 
    1026             :     /* Clean up. */
    1027         532 :     heap_freetuple(reltup);
    1028         532 :     table_close(relRelation, RowExclusiveLock);
    1029             : 
    1030             :     /* Make the update visible */
    1031         532 :     CommandCounterIncrement();
    1032         532 : }
    1033             : 
    1034             : /*
    1035             :  * Swap the physical files of two given relations.
    1036             :  *
    1037             :  * We swap the physical identity (reltablespace, relfilenumber) while keeping
    1038             :  * the same logical identities of the two relations.  relpersistence is also
    1039             :  * swapped, which is critical since it determines where buffers live for each
    1040             :  * relation.
    1041             :  *
    1042             :  * We can swap associated TOAST data in either of two ways: recursively swap
    1043             :  * the physical content of the toast tables (and their indexes), or swap the
    1044             :  * TOAST links in the given relations' pg_class entries.  The former is needed
    1045             :  * to manage rewrites of shared catalogs (where we cannot change the pg_class
    1046             :  * links) while the latter is the only way to handle cases in which a toast
    1047             :  * table is added or removed altogether.
    1048             :  *
    1049             :  * Additionally, the first relation is marked with relfrozenxid set to
    1050             :  * frozenXid.  It seems a bit ugly to have this here, but the caller would
    1051             :  * have to do it anyway, so having it here saves a heap_update.  Note: in
    1052             :  * the swap-toast-links case, we assume we don't need to change the toast
    1053             :  * table's relfrozenxid: the new version of the toast table should already
    1054             :  * have relfrozenxid set to RecentXmin, which is good enough.
    1055             :  *
    1056             :  * Lastly, if r2 and its toast table and toast index (if any) are mapped,
    1057             :  * their OIDs are emitted into mapped_tables[].  This is hacky but beats
    1058             :  * having to look the information up again later in finish_heap_swap.
    1059             :  */
    1060             : static void
    1061        2236 : swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
    1062             :                     bool swap_toast_by_content,
    1063             :                     bool is_internal,
    1064             :                     TransactionId frozenXid,
    1065             :                     MultiXactId cutoffMulti,
    1066             :                     Oid *mapped_tables)
    1067             : {
    1068             :     Relation    relRelation;
    1069             :     HeapTuple   reltup1,
    1070             :                 reltup2;
    1071             :     Form_pg_class relform1,
    1072             :                 relform2;
    1073             :     RelFileNumber relfilenumber1,
    1074             :                 relfilenumber2;
    1075             :     RelFileNumber swaptemp;
    1076             :     char        swptmpchr;
    1077             :     Oid         relam1,
    1078             :                 relam2;
    1079             : 
    1080             :     /* We need writable copies of both pg_class tuples. */
    1081        2236 :     relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1082             : 
    1083        2236 :     reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
    1084        2236 :     if (!HeapTupleIsValid(reltup1))
    1085           0 :         elog(ERROR, "cache lookup failed for relation %u", r1);
    1086        2236 :     relform1 = (Form_pg_class) GETSTRUCT(reltup1);
    1087             : 
    1088        2236 :     reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
    1089        2236 :     if (!HeapTupleIsValid(reltup2))
    1090           0 :         elog(ERROR, "cache lookup failed for relation %u", r2);
    1091        2236 :     relform2 = (Form_pg_class) GETSTRUCT(reltup2);
    1092             : 
    1093        2236 :     relfilenumber1 = relform1->relfilenode;
    1094        2236 :     relfilenumber2 = relform2->relfilenode;
    1095        2236 :     relam1 = relform1->relam;
    1096        2236 :     relam2 = relform2->relam;
    1097             : 
    1098        2236 :     if (RelFileNumberIsValid(relfilenumber1) &&
    1099             :         RelFileNumberIsValid(relfilenumber2))
    1100             :     {
    1101             :         /*
    1102             :          * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
    1103             :          * relpersistence
    1104             :          */
    1105             :         Assert(!target_is_pg_class);
    1106             : 
    1107        2082 :         swaptemp = relform1->relfilenode;
    1108        2082 :         relform1->relfilenode = relform2->relfilenode;
    1109        2082 :         relform2->relfilenode = swaptemp;
    1110             : 
    1111        2082 :         swaptemp = relform1->reltablespace;
    1112        2082 :         relform1->reltablespace = relform2->reltablespace;
    1113        2082 :         relform2->reltablespace = swaptemp;
    1114             : 
    1115        2082 :         swaptemp = relform1->relam;
    1116        2082 :         relform1->relam = relform2->relam;
    1117        2082 :         relform2->relam = swaptemp;
    1118             : 
    1119        2082 :         swptmpchr = relform1->relpersistence;
    1120        2082 :         relform1->relpersistence = relform2->relpersistence;
    1121        2082 :         relform2->relpersistence = swptmpchr;
    1122             : 
    1123             :         /* Also swap toast links, if we're swapping by links */
    1124        2082 :         if (!swap_toast_by_content)
    1125             :         {
    1126        1650 :             swaptemp = relform1->reltoastrelid;
    1127        1650 :             relform1->reltoastrelid = relform2->reltoastrelid;
    1128        1650 :             relform2->reltoastrelid = swaptemp;
    1129             :         }
    1130             :     }
    1131             :     else
    1132             :     {
    1133             :         /*
    1134             :          * Mapped-relation case.  Here we have to swap the relation mappings
    1135             :          * instead of modifying the pg_class columns.  Both must be mapped.
    1136             :          */
    1137         154 :         if (RelFileNumberIsValid(relfilenumber1) ||
    1138             :             RelFileNumberIsValid(relfilenumber2))
    1139           0 :             elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
    1140             :                  NameStr(relform1->relname));
    1141             : 
    1142             :         /*
    1143             :          * We can't change the tablespace nor persistence of a mapped rel, and
    1144             :          * we can't handle toast link swapping for one either, because we must
    1145             :          * not apply any critical changes to its pg_class row.  These cases
    1146             :          * should be prevented by upstream permissions tests, so these checks
    1147             :          * are non-user-facing emergency backstop.
    1148             :          */
    1149         154 :         if (relform1->reltablespace != relform2->reltablespace)
    1150           0 :             elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
    1151             :                  NameStr(relform1->relname));
    1152         154 :         if (relform1->relpersistence != relform2->relpersistence)
    1153           0 :             elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
    1154             :                  NameStr(relform1->relname));
    1155         154 :         if (relform1->relam != relform2->relam)
    1156           0 :             elog(ERROR, "cannot change access method of mapped relation \"%s\"",
    1157             :                  NameStr(relform1->relname));
    1158         154 :         if (!swap_toast_by_content &&
    1159          34 :             (relform1->reltoastrelid || relform2->reltoastrelid))
    1160           0 :             elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
    1161             :                  NameStr(relform1->relname));
    1162             : 
    1163             :         /*
    1164             :          * Fetch the mappings --- shouldn't fail, but be paranoid
    1165             :          */
    1166         154 :         relfilenumber1 = RelationMapOidToFilenumber(r1, relform1->relisshared);
    1167         154 :         if (!RelFileNumberIsValid(relfilenumber1))
    1168           0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1169             :                  NameStr(relform1->relname), r1);
    1170         154 :         relfilenumber2 = RelationMapOidToFilenumber(r2, relform2->relisshared);
    1171         154 :         if (!RelFileNumberIsValid(relfilenumber2))
    1172           0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1173             :                  NameStr(relform2->relname), r2);
    1174             : 
    1175             :         /*
    1176             :          * Send replacement mappings to relmapper.  Note these won't actually
    1177             :          * take effect until CommandCounterIncrement.
    1178             :          */
    1179         154 :         RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false);
    1180         154 :         RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false);
    1181             : 
    1182             :         /* Pass OIDs of mapped r2 tables back to caller */
    1183         154 :         *mapped_tables++ = r2;
    1184             :     }
    1185             : 
    1186             :     /*
    1187             :      * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
    1188             :      * subtransaction. The rel2 storage (swapped from rel1) may or may not be
    1189             :      * new.
    1190             :      */
    1191             :     {
    1192             :         Relation    rel1,
    1193             :                     rel2;
    1194             : 
    1195        2236 :         rel1 = relation_open(r1, NoLock);
    1196        2236 :         rel2 = relation_open(r2, NoLock);
    1197        2236 :         rel2->rd_createSubid = rel1->rd_createSubid;
    1198        2236 :         rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid;
    1199        2236 :         rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid;
    1200        2236 :         RelationAssumeNewRelfilelocator(rel1);
    1201        2236 :         relation_close(rel1, NoLock);
    1202        2236 :         relation_close(rel2, NoLock);
    1203             :     }
    1204             : 
    1205             :     /*
    1206             :      * In the case of a shared catalog, these next few steps will only affect
    1207             :      * our own database's pg_class row; but that's okay, because they are all
    1208             :      * noncritical updates.  That's also an important fact for the case of a
    1209             :      * mapped catalog, because it's possible that we'll commit the map change
    1210             :      * and then fail to commit the pg_class update.
    1211             :      */
    1212             : 
    1213             :     /* set rel1's frozen Xid and minimum MultiXid */
    1214        2236 :     if (relform1->relkind != RELKIND_INDEX)
    1215             :     {
    1216             :         Assert(!TransactionIdIsValid(frozenXid) ||
    1217             :                TransactionIdIsNormal(frozenXid));
    1218        2052 :         relform1->relfrozenxid = frozenXid;
    1219        2052 :         relform1->relminmxid = cutoffMulti;
    1220             :     }
    1221             : 
    1222             :     /* swap size statistics too, since new rel has freshly-updated stats */
    1223             :     {
    1224             :         int32       swap_pages;
    1225             :         float4      swap_tuples;
    1226             :         int32       swap_allvisible;
    1227             : 
    1228        2236 :         swap_pages = relform1->relpages;
    1229        2236 :         relform1->relpages = relform2->relpages;
    1230        2236 :         relform2->relpages = swap_pages;
    1231             : 
    1232        2236 :         swap_tuples = relform1->reltuples;
    1233        2236 :         relform1->reltuples = relform2->reltuples;
    1234        2236 :         relform2->reltuples = swap_tuples;
    1235             : 
    1236        2236 :         swap_allvisible = relform1->relallvisible;
    1237        2236 :         relform1->relallvisible = relform2->relallvisible;
    1238        2236 :         relform2->relallvisible = swap_allvisible;
    1239             :     }
    1240             : 
    1241             :     /*
    1242             :      * Update the tuples in pg_class --- unless the target relation of the
    1243             :      * swap is pg_class itself.  In that case, there is zero point in making
    1244             :      * changes because we'd be updating the old data that we're about to throw
    1245             :      * away.  Because the real work being done here for a mapped relation is
    1246             :      * just to change the relation map settings, it's all right to not update
    1247             :      * the pg_class rows in this case. The most important changes will instead
    1248             :      * performed later, in finish_heap_swap() itself.
    1249             :      */
    1250        2236 :     if (!target_is_pg_class)
    1251             :     {
    1252             :         CatalogIndexState indstate;
    1253             : 
    1254        2212 :         indstate = CatalogOpenIndexes(relRelation);
    1255        2212 :         CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
    1256             :                                    indstate);
    1257        2212 :         CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
    1258             :                                    indstate);
    1259        2212 :         CatalogCloseIndexes(indstate);
    1260             :     }
    1261             :     else
    1262             :     {
    1263             :         /* no update ... but we do still need relcache inval */
    1264          24 :         CacheInvalidateRelcacheByTuple(reltup1);
    1265          24 :         CacheInvalidateRelcacheByTuple(reltup2);
    1266             :     }
    1267             : 
    1268             :     /*
    1269             :      * Now that pg_class has been updated with its relevant information for
    1270             :      * the swap, update the dependency of the relations to point to their new
    1271             :      * table AM, if it has changed.
    1272             :      */
    1273        2236 :     if (relam1 != relam2)
    1274             :     {
    1275          36 :         if (changeDependencyFor(RelationRelationId,
    1276             :                                 r1,
    1277             :                                 AccessMethodRelationId,
    1278             :                                 relam1,
    1279             :                                 relam2) != 1)
    1280           0 :             elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
    1281             :                  get_namespace_name(get_rel_namespace(r1)),
    1282             :                  get_rel_name(r1));
    1283          36 :         if (changeDependencyFor(RelationRelationId,
    1284             :                                 r2,
    1285             :                                 AccessMethodRelationId,
    1286             :                                 relam2,
    1287             :                                 relam1) != 1)
    1288           0 :             elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
    1289             :                  get_namespace_name(get_rel_namespace(r2)),
    1290             :                  get_rel_name(r2));
    1291             :     }
    1292             : 
    1293             :     /*
    1294             :      * Post alter hook for modified relations. The change to r2 is always
    1295             :      * internal, but r1 depends on the invocation context.
    1296             :      */
    1297        2236 :     InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
    1298             :                                  InvalidOid, is_internal);
    1299        2236 :     InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
    1300             :                                  InvalidOid, true);
    1301             : 
    1302             :     /*
    1303             :      * If we have toast tables associated with the relations being swapped,
    1304             :      * deal with them too.
    1305             :      */
    1306        2236 :     if (relform1->reltoastrelid || relform2->reltoastrelid)
    1307             :     {
    1308         786 :         if (swap_toast_by_content)
    1309             :         {
    1310         184 :             if (relform1->reltoastrelid && relform2->reltoastrelid)
    1311             :             {
    1312             :                 /* Recursively swap the contents of the toast tables */
    1313         184 :                 swap_relation_files(relform1->reltoastrelid,
    1314             :                                     relform2->reltoastrelid,
    1315             :                                     target_is_pg_class,
    1316             :                                     swap_toast_by_content,
    1317             :                                     is_internal,
    1318             :                                     frozenXid,
    1319             :                                     cutoffMulti,
    1320             :                                     mapped_tables);
    1321             :             }
    1322             :             else
    1323             :             {
    1324             :                 /* caller messed up */
    1325           0 :                 elog(ERROR, "cannot swap toast files by content when there's only one");
    1326             :             }
    1327             :         }
    1328             :         else
    1329             :         {
    1330             :             /*
    1331             :              * We swapped the ownership links, so we need to change dependency
    1332             :              * data to match.
    1333             :              *
    1334             :              * NOTE: it is possible that only one table has a toast table.
    1335             :              *
    1336             :              * NOTE: at present, a TOAST table's only dependency is the one on
    1337             :              * its owning table.  If more are ever created, we'd need to use
    1338             :              * something more selective than deleteDependencyRecordsFor() to
    1339             :              * get rid of just the link we want.
    1340             :              */
    1341             :             ObjectAddress baseobject,
    1342             :                         toastobject;
    1343             :             long        count;
    1344             : 
    1345             :             /*
    1346             :              * We disallow this case for system catalogs, to avoid the
    1347             :              * possibility that the catalog we're rebuilding is one of the
    1348             :              * ones the dependency changes would change.  It's too late to be
    1349             :              * making any data changes to the target catalog.
    1350             :              */
    1351         602 :             if (IsSystemClass(r1, relform1))
    1352           0 :                 elog(ERROR, "cannot swap toast files by links for system catalogs");
    1353             : 
    1354             :             /* Delete old dependencies */
    1355         602 :             if (relform1->reltoastrelid)
    1356             :             {
    1357         570 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1358             :                                                    relform1->reltoastrelid,
    1359             :                                                    false);
    1360         570 :                 if (count != 1)
    1361           0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1362             :                          count);
    1363             :             }
    1364         602 :             if (relform2->reltoastrelid)
    1365             :             {
    1366         602 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1367             :                                                    relform2->reltoastrelid,
    1368             :                                                    false);
    1369         602 :                 if (count != 1)
    1370           0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1371             :                          count);
    1372             :             }
    1373             : 
    1374             :             /* Register new dependencies */
    1375         602 :             baseobject.classId = RelationRelationId;
    1376         602 :             baseobject.objectSubId = 0;
    1377         602 :             toastobject.classId = RelationRelationId;
    1378         602 :             toastobject.objectSubId = 0;
    1379             : 
    1380         602 :             if (relform1->reltoastrelid)
    1381             :             {
    1382         570 :                 baseobject.objectId = r1;
    1383         570 :                 toastobject.objectId = relform1->reltoastrelid;
    1384         570 :                 recordDependencyOn(&toastobject, &baseobject,
    1385             :                                    DEPENDENCY_INTERNAL);
    1386             :             }
    1387             : 
    1388         602 :             if (relform2->reltoastrelid)
    1389             :             {
    1390         602 :                 baseobject.objectId = r2;
    1391         602 :                 toastobject.objectId = relform2->reltoastrelid;
    1392         602 :                 recordDependencyOn(&toastobject, &baseobject,
    1393             :                                    DEPENDENCY_INTERNAL);
    1394             :             }
    1395             :         }
    1396             :     }
    1397             : 
    1398             :     /*
    1399             :      * If we're swapping two toast tables by content, do the same for their
    1400             :      * valid index. The swap can actually be safely done only if the relations
    1401             :      * have indexes.
    1402             :      */
    1403        2236 :     if (swap_toast_by_content &&
    1404         552 :         relform1->relkind == RELKIND_TOASTVALUE &&
    1405         184 :         relform2->relkind == RELKIND_TOASTVALUE)
    1406             :     {
    1407             :         Oid         toastIndex1,
    1408             :                     toastIndex2;
    1409             : 
    1410             :         /* Get valid index for each relation */
    1411         184 :         toastIndex1 = toast_get_valid_index(r1,
    1412             :                                             AccessExclusiveLock);
    1413         184 :         toastIndex2 = toast_get_valid_index(r2,
    1414             :                                             AccessExclusiveLock);
    1415             : 
    1416         184 :         swap_relation_files(toastIndex1,
    1417             :                             toastIndex2,
    1418             :                             target_is_pg_class,
    1419             :                             swap_toast_by_content,
    1420             :                             is_internal,
    1421             :                             InvalidTransactionId,
    1422             :                             InvalidMultiXactId,
    1423             :                             mapped_tables);
    1424             :     }
    1425             : 
    1426             :     /* Clean up. */
    1427        2236 :     heap_freetuple(reltup1);
    1428        2236 :     heap_freetuple(reltup2);
    1429             : 
    1430        2236 :     table_close(relRelation, RowExclusiveLock);
    1431        2236 : }
    1432             : 
    1433             : /*
    1434             :  * Remove the transient table that was built by make_new_heap, and finish
    1435             :  * cleaning up (including rebuilding all indexes on the old heap).
    1436             :  */
    1437             : void
    1438        1868 : finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
    1439             :                  bool is_system_catalog,
    1440             :                  bool swap_toast_by_content,
    1441             :                  bool check_constraints,
    1442             :                  bool is_internal,
    1443             :                  TransactionId frozenXid,
    1444             :                  MultiXactId cutoffMulti,
    1445             :                  char newrelpersistence)
    1446             : {
    1447             :     ObjectAddress object;
    1448             :     Oid         mapped_tables[4];
    1449             :     int         reindex_flags;
    1450        1868 :     ReindexParams reindex_params = {0};
    1451             :     int         i;
    1452             : 
    1453             :     /* Report that we are now swapping relation files */
    1454        1868 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1455             :                                  PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES);
    1456             : 
    1457             :     /* Zero out possible results from swapped_relation_files */
    1458        1868 :     memset(mapped_tables, 0, sizeof(mapped_tables));
    1459             : 
    1460             :     /*
    1461             :      * Swap the contents of the heap relations (including any toast tables).
    1462             :      * Also set old heap's relfrozenxid to frozenXid.
    1463             :      */
    1464        1868 :     swap_relation_files(OIDOldHeap, OIDNewHeap,
    1465             :                         (OIDOldHeap == RelationRelationId),
    1466             :                         swap_toast_by_content, is_internal,
    1467             :                         frozenXid, cutoffMulti, mapped_tables);
    1468             : 
    1469             :     /*
    1470             :      * If it's a system catalog, queue a sinval message to flush all catcaches
    1471             :      * on the catalog when we reach CommandCounterIncrement.
    1472             :      */
    1473        1868 :     if (is_system_catalog)
    1474         202 :         CacheInvalidateCatalog(OIDOldHeap);
    1475             : 
    1476             :     /*
    1477             :      * Rebuild each index on the relation (but not the toast table, which is
    1478             :      * all-new at this point).  It is important to do this before the DROP
    1479             :      * step because if we are processing a system catalog that will be used
    1480             :      * during DROP, we want to have its indexes available.  There is no
    1481             :      * advantage to the other order anyway because this is all transactional,
    1482             :      * so no chance to reclaim disk space before commit.  We do not need a
    1483             :      * final CommandCounterIncrement() because reindex_relation does it.
    1484             :      *
    1485             :      * Note: because index_build is called via reindex_relation, it will never
    1486             :      * set indcheckxmin true for the indexes.  This is OK even though in some
    1487             :      * sense we are building new indexes rather than rebuilding existing ones,
    1488             :      * because the new heap won't contain any HOT chains at all, let alone
    1489             :      * broken ones, so it can't be necessary to set indcheckxmin.
    1490             :      */
    1491        1868 :     reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
    1492        1868 :     if (check_constraints)
    1493        1336 :         reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
    1494             : 
    1495             :     /*
    1496             :      * Ensure that the indexes have the same persistence as the parent
    1497             :      * relation.
    1498             :      */
    1499        1868 :     if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
    1500          38 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
    1501        1830 :     else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
    1502        1756 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
    1503             : 
    1504             :     /* Report that we are now reindexing relations */
    1505        1868 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1506             :                                  PROGRESS_CLUSTER_PHASE_REBUILD_INDEX);
    1507             : 
    1508        1868 :     reindex_relation(NULL, OIDOldHeap, reindex_flags, &reindex_params);
    1509             : 
    1510             :     /* Report that we are now doing clean up */
    1511        1850 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1512             :                                  PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP);
    1513             : 
    1514             :     /*
    1515             :      * If the relation being rebuilt is pg_class, swap_relation_files()
    1516             :      * couldn't update pg_class's own pg_class entry (check comments in
    1517             :      * swap_relation_files()), thus relfrozenxid was not updated. That's
    1518             :      * annoying because a potential reason for doing a VACUUM FULL is a
    1519             :      * imminent or actual anti-wraparound shutdown.  So, now that we can
    1520             :      * access the new relation using its indices, update relfrozenxid.
    1521             :      * pg_class doesn't have a toast relation, so we don't need to update the
    1522             :      * corresponding toast relation. Not that there's little point moving all
    1523             :      * relfrozenxid updates here since swap_relation_files() needs to write to
    1524             :      * pg_class for non-mapped relations anyway.
    1525             :      */
    1526        1850 :     if (OIDOldHeap == RelationRelationId)
    1527             :     {
    1528             :         Relation    relRelation;
    1529             :         HeapTuple   reltup;
    1530             :         Form_pg_class relform;
    1531             : 
    1532          24 :         relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1533             : 
    1534          24 :         reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
    1535          24 :         if (!HeapTupleIsValid(reltup))
    1536           0 :             elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
    1537          24 :         relform = (Form_pg_class) GETSTRUCT(reltup);
    1538             : 
    1539          24 :         relform->relfrozenxid = frozenXid;
    1540          24 :         relform->relminmxid = cutoffMulti;
    1541             : 
    1542          24 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1543             : 
    1544          24 :         table_close(relRelation, RowExclusiveLock);
    1545             :     }
    1546             : 
    1547             :     /* Destroy new heap with old filenumber */
    1548        1850 :     object.classId = RelationRelationId;
    1549        1850 :     object.objectId = OIDNewHeap;
    1550        1850 :     object.objectSubId = 0;
    1551             : 
    1552             :     /*
    1553             :      * The new relation is local to our transaction and we know nothing
    1554             :      * depends on it, so DROP_RESTRICT should be OK.
    1555             :      */
    1556        1850 :     performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
    1557             : 
    1558             :     /* performDeletion does CommandCounterIncrement at end */
    1559             : 
    1560             :     /*
    1561             :      * Now we must remove any relation mapping entries that we set up for the
    1562             :      * transient table, as well as its toast table and toast index if any. If
    1563             :      * we fail to do this before commit, the relmapper will complain about new
    1564             :      * permanent map entries being added post-bootstrap.
    1565             :      */
    1566        2004 :     for (i = 0; OidIsValid(mapped_tables[i]); i++)
    1567         154 :         RelationMapRemoveMapping(mapped_tables[i]);
    1568             : 
    1569             :     /*
    1570             :      * At this point, everything is kosher except that, if we did toast swap
    1571             :      * by links, the toast table's name corresponds to the transient table.
    1572             :      * The name is irrelevant to the backend because it's referenced by OID,
    1573             :      * but users looking at the catalogs could be confused.  Rename it to
    1574             :      * prevent this problem.
    1575             :      *
    1576             :      * Note no lock required on the relation, because we already hold an
    1577             :      * exclusive lock on it.
    1578             :      */
    1579        1850 :     if (!swap_toast_by_content)
    1580             :     {
    1581             :         Relation    newrel;
    1582             : 
    1583        1666 :         newrel = table_open(OIDOldHeap, NoLock);
    1584        1666 :         if (OidIsValid(newrel->rd_rel->reltoastrelid))
    1585             :         {
    1586             :             Oid         toastidx;
    1587             :             char        NewToastName[NAMEDATALEN];
    1588             : 
    1589             :             /* Get the associated valid index to be renamed */
    1590         570 :             toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
    1591             :                                              NoLock);
    1592             : 
    1593             :             /* rename the toast table ... */
    1594         570 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
    1595             :                      OIDOldHeap);
    1596         570 :             RenameRelationInternal(newrel->rd_rel->reltoastrelid,
    1597             :                                    NewToastName, true, false);
    1598             : 
    1599             :             /* ... and its valid index too. */
    1600         570 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
    1601             :                      OIDOldHeap);
    1602             : 
    1603         570 :             RenameRelationInternal(toastidx,
    1604             :                                    NewToastName, true, true);
    1605             : 
    1606             :             /*
    1607             :              * Reset the relrewrite for the toast. The command-counter
    1608             :              * increment is required here as we are about to update the tuple
    1609             :              * that is updated as part of RenameRelationInternal.
    1610             :              */
    1611         570 :             CommandCounterIncrement();
    1612         570 :             ResetRelRewrite(newrel->rd_rel->reltoastrelid);
    1613             :         }
    1614        1666 :         relation_close(newrel, NoLock);
    1615             :     }
    1616             : 
    1617             :     /* if it's not a catalog table, clear any missing attribute settings */
    1618        1850 :     if (!is_system_catalog)
    1619             :     {
    1620             :         Relation    newrel;
    1621             : 
    1622        1648 :         newrel = table_open(OIDOldHeap, NoLock);
    1623        1648 :         RelationClearMissing(newrel);
    1624        1648 :         relation_close(newrel, NoLock);
    1625             :     }
    1626        1850 : }
    1627             : 
    1628             : 
    1629             : /*
    1630             :  * Get a list of tables that the current user has privileges on and
    1631             :  * have indisclustered set.  Return the list in a List * of RelToCluster
    1632             :  * (stored in the specified memory context), each one giving the tableOid
    1633             :  * and the indexOid on which the table is already clustered.
    1634             :  */
    1635             : static List *
    1636          28 : get_tables_to_cluster(MemoryContext cluster_context)
    1637             : {
    1638             :     Relation    indRelation;
    1639             :     TableScanDesc scan;
    1640             :     ScanKeyData entry;
    1641             :     HeapTuple   indexTuple;
    1642             :     Form_pg_index index;
    1643             :     MemoryContext old_context;
    1644          28 :     List       *rtcs = NIL;
    1645             : 
    1646             :     /*
    1647             :      * Get all indexes that have indisclustered set and that the current user
    1648             :      * has the appropriate privileges for.
    1649             :      */
    1650          28 :     indRelation = table_open(IndexRelationId, AccessShareLock);
    1651          28 :     ScanKeyInit(&entry,
    1652             :                 Anum_pg_index_indisclustered,
    1653             :                 BTEqualStrategyNumber, F_BOOLEQ,
    1654             :                 BoolGetDatum(true));
    1655          28 :     scan = table_beginscan_catalog(indRelation, 1, &entry);
    1656          46 :     while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1657             :     {
    1658             :         RelToCluster *rtc;
    1659             : 
    1660          18 :         index = (Form_pg_index) GETSTRUCT(indexTuple);
    1661             : 
    1662          18 :         if (!cluster_is_permitted_for_relation(index->indrelid, GetUserId()))
    1663          12 :             continue;
    1664             : 
    1665             :         /* Use a permanent memory context for the result list */
    1666           6 :         old_context = MemoryContextSwitchTo(cluster_context);
    1667             : 
    1668           6 :         rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
    1669           6 :         rtc->tableOid = index->indrelid;
    1670           6 :         rtc->indexOid = index->indexrelid;
    1671           6 :         rtcs = lappend(rtcs, rtc);
    1672             : 
    1673           6 :         MemoryContextSwitchTo(old_context);
    1674             :     }
    1675          28 :     table_endscan(scan);
    1676             : 
    1677          28 :     relation_close(indRelation, AccessShareLock);
    1678             : 
    1679          28 :     return rtcs;
    1680             : }
    1681             : 
    1682             : /*
    1683             :  * Given an index on a partitioned table, return a list of RelToCluster for
    1684             :  * all the children leaves tables/indexes.
    1685             :  *
    1686             :  * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
    1687             :  * on the table containing the index.
    1688             :  */
    1689             : static List *
    1690          20 : get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
    1691             : {
    1692             :     List       *inhoids;
    1693             :     ListCell   *lc;
    1694          20 :     List       *rtcs = NIL;
    1695             :     MemoryContext old_context;
    1696             : 
    1697             :     /* Do not lock the children until they're processed */
    1698          20 :     inhoids = find_all_inheritors(indexOid, NoLock, NULL);
    1699             : 
    1700         104 :     foreach(lc, inhoids)
    1701             :     {
    1702          84 :         Oid         indexrelid = lfirst_oid(lc);
    1703          84 :         Oid         relid = IndexGetRelation(indexrelid, false);
    1704             :         RelToCluster *rtc;
    1705             : 
    1706             :         /* consider only leaf indexes */
    1707          84 :         if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
    1708          38 :             continue;
    1709             : 
    1710             :         /*
    1711             :          * It's possible that the user does not have privileges to CLUSTER the
    1712             :          * leaf partition despite having such privileges on the partitioned
    1713             :          * table.  We skip any partitions which the user is not permitted to
    1714             :          * CLUSTER.
    1715             :          */
    1716          46 :         if (!cluster_is_permitted_for_relation(relid, GetUserId()))
    1717          22 :             continue;
    1718             : 
    1719             :         /* Use a permanent memory context for the result list */
    1720          24 :         old_context = MemoryContextSwitchTo(cluster_context);
    1721             : 
    1722          24 :         rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
    1723          24 :         rtc->tableOid = relid;
    1724          24 :         rtc->indexOid = indexrelid;
    1725          24 :         rtcs = lappend(rtcs, rtc);
    1726             : 
    1727          24 :         MemoryContextSwitchTo(old_context);
    1728             :     }
    1729             : 
    1730          20 :     return rtcs;
    1731             : }
    1732             : 
    1733             : /*
    1734             :  * Return whether userid has privileges to CLUSTER relid.  If not, this
    1735             :  * function emits a WARNING.
    1736             :  */
    1737             : static bool
    1738          94 : cluster_is_permitted_for_relation(Oid relid, Oid userid)
    1739             : {
    1740          94 :     if (pg_class_aclcheck(relid, userid, ACL_MAINTAIN) == ACLCHECK_OK)
    1741          60 :         return true;
    1742             : 
    1743          34 :     ereport(WARNING,
    1744             :             (errmsg("permission denied to cluster \"%s\", skipping it",
    1745             :                     get_rel_name(relid))));
    1746          34 :     return false;
    1747             : }

Generated by: LCOV version 1.14