LCOV - code coverage report
Current view: top level - src/backend/commands - cluster.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16beta1 Lines: 408 451 90.5 %
Date: 2023-05-30 16:15:03 Functions: 13 13 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * cluster.c
       4             :  *    CLUSTER a table on an index.  This is now also used for VACUUM FULL.
       5             :  *
       6             :  * There is hardly anything left of Paul Brown's original implementation...
       7             :  *
       8             :  *
       9             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994-5, Regents of the University of California
      11             :  *
      12             :  *
      13             :  * IDENTIFICATION
      14             :  *    src/backend/commands/cluster.c
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #include "postgres.h"
      19             : 
      20             : #include "access/amapi.h"
      21             : #include "access/heapam.h"
      22             : #include "access/multixact.h"
      23             : #include "access/relscan.h"
      24             : #include "access/tableam.h"
      25             : #include "access/toast_internals.h"
      26             : #include "access/transam.h"
      27             : #include "access/xact.h"
      28             : #include "access/xlog.h"
      29             : #include "catalog/catalog.h"
      30             : #include "catalog/dependency.h"
      31             : #include "catalog/heap.h"
      32             : #include "catalog/index.h"
      33             : #include "catalog/namespace.h"
      34             : #include "catalog/objectaccess.h"
      35             : #include "catalog/partition.h"
      36             : #include "catalog/pg_am.h"
      37             : #include "catalog/pg_database.h"
      38             : #include "catalog/pg_inherits.h"
      39             : #include "catalog/toasting.h"
      40             : #include "commands/cluster.h"
      41             : #include "commands/defrem.h"
      42             : #include "commands/progress.h"
      43             : #include "commands/tablecmds.h"
      44             : #include "commands/vacuum.h"
      45             : #include "miscadmin.h"
      46             : #include "optimizer/optimizer.h"
      47             : #include "pgstat.h"
      48             : #include "storage/bufmgr.h"
      49             : #include "storage/lmgr.h"
      50             : #include "storage/predicate.h"
      51             : #include "utils/acl.h"
      52             : #include "utils/fmgroids.h"
      53             : #include "utils/guc.h"
      54             : #include "utils/inval.h"
      55             : #include "utils/lsyscache.h"
      56             : #include "utils/memutils.h"
      57             : #include "utils/pg_rusage.h"
      58             : #include "utils/relmapper.h"
      59             : #include "utils/snapmgr.h"
      60             : #include "utils/syscache.h"
      61             : #include "utils/tuplesort.h"
      62             : 
      63             : /*
      64             :  * This struct is used to pass around the information on tables to be
      65             :  * clustered. We need this so we can make a list of them when invoked without
      66             :  * a specific table/index pair.
      67             :  */
      68             : typedef struct
      69             : {
      70             :     Oid         tableOid;
      71             :     Oid         indexOid;
      72             : } RelToCluster;
      73             : 
      74             : 
      75             : static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
      76             : static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
      77             : static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
      78             :                             bool verbose, bool *pSwapToastByContent,
      79             :                             TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
      80             : static List *get_tables_to_cluster(MemoryContext cluster_context);
      81             : static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
      82             :                                                Oid indexOid);
      83             : static bool cluster_is_permitted_for_relation(Oid relid, Oid userid);
      84             : 
      85             : 
      86             : /*---------------------------------------------------------------------------
      87             :  * This cluster code allows for clustering multiple tables at once. Because
      88             :  * of this, we cannot just run everything on a single transaction, or we
      89             :  * would be forced to acquire exclusive locks on all the tables being
      90             :  * clustered, simultaneously --- very likely leading to deadlock.
      91             :  *
      92             :  * To solve this we follow a similar strategy to VACUUM code,
      93             :  * clustering each relation in a separate transaction. For this to work,
      94             :  * we need to:
      95             :  *  - provide a separate memory context so that we can pass information in
      96             :  *    a way that survives across transactions
      97             :  *  - start a new transaction every time a new relation is clustered
      98             :  *  - check for validity of the information on to-be-clustered relations,
      99             :  *    as someone might have deleted a relation behind our back, or
     100             :  *    clustered one on a different index
     101             :  *  - end the transaction
     102             :  *
     103             :  * The single-relation case does not have any such overhead.
     104             :  *
     105             :  * We also allow a relation to be specified without index.  In that case,
     106             :  * the indisclustered bit will be looked up, and an ERROR will be thrown
     107             :  * if there is no index with the bit set.
     108             :  *---------------------------------------------------------------------------
     109             :  */
     110             : void
     111         218 : cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
     112             : {
     113             :     ListCell   *lc;
     114         218 :     ClusterParams params = {0};
     115         218 :     bool        verbose = false;
     116         218 :     Relation    rel = NULL;
     117         218 :     Oid         indexOid = InvalidOid;
     118             :     MemoryContext cluster_context;
     119             :     List       *rtcs;
     120             : 
     121             :     /* Parse option list */
     122         234 :     foreach(lc, stmt->params)
     123             :     {
     124          16 :         DefElem    *opt = (DefElem *) lfirst(lc);
     125             : 
     126          16 :         if (strcmp(opt->defname, "verbose") == 0)
     127          16 :             verbose = defGetBoolean(opt);
     128             :         else
     129           0 :             ereport(ERROR,
     130             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     131             :                      errmsg("unrecognized CLUSTER option \"%s\"",
     132             :                             opt->defname),
     133             :                      parser_errposition(pstate, opt->location)));
     134             :     }
     135             : 
     136         218 :     params.options = (verbose ? CLUOPT_VERBOSE : 0);
     137             : 
     138         218 :     if (stmt->relation != NULL)
     139             :     {
     140             :         /* This is the single-relation case. */
     141             :         Oid         tableOid;
     142             : 
     143             :         /*
     144             :          * Find, lock, and check permissions on the table.  We obtain
     145             :          * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
     146             :          * single-transaction case.
     147             :          */
     148         190 :         tableOid = RangeVarGetRelidExtended(stmt->relation,
     149             :                                             AccessExclusiveLock,
     150             :                                             0,
     151             :                                             RangeVarCallbackMaintainsTable,
     152             :                                             NULL);
     153         184 :         rel = table_open(tableOid, NoLock);
     154             : 
     155             :         /*
     156             :          * Reject clustering a remote temp table ... their local buffer
     157             :          * manager is not going to cope.
     158             :          */
     159         184 :         if (RELATION_IS_OTHER_TEMP(rel))
     160           0 :             ereport(ERROR,
     161             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     162             :                      errmsg("cannot cluster temporary tables of other sessions")));
     163             : 
     164         184 :         if (stmt->indexname == NULL)
     165             :         {
     166             :             ListCell   *index;
     167             : 
     168             :             /* We need to find the index that has indisclustered set. */
     169          44 :             foreach(index, RelationGetIndexList(rel))
     170             :             {
     171          32 :                 indexOid = lfirst_oid(index);
     172          32 :                 if (get_index_isclustered(indexOid))
     173          20 :                     break;
     174          12 :                 indexOid = InvalidOid;
     175             :             }
     176             : 
     177          32 :             if (!OidIsValid(indexOid))
     178          12 :                 ereport(ERROR,
     179             :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     180             :                          errmsg("there is no previously clustered index for table \"%s\"",
     181             :                                 stmt->relation->relname)));
     182             :         }
     183             :         else
     184             :         {
     185             :             /*
     186             :              * The index is expected to be in the same namespace as the
     187             :              * relation.
     188             :              */
     189         152 :             indexOid = get_relname_relid(stmt->indexname,
     190         152 :                                          rel->rd_rel->relnamespace);
     191         152 :             if (!OidIsValid(indexOid))
     192           0 :                 ereport(ERROR,
     193             :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     194             :                          errmsg("index \"%s\" for table \"%s\" does not exist",
     195             :                                 stmt->indexname, stmt->relation->relname)));
     196             :         }
     197             : 
     198         172 :         if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
     199             :         {
     200             :             /* close relation, keep lock till commit */
     201         146 :             table_close(rel, NoLock);
     202             : 
     203             :             /* Do the job. */
     204         146 :             cluster_rel(tableOid, indexOid, &params);
     205             : 
     206         146 :             return;
     207             :         }
     208             :     }
     209             : 
     210             :     /*
     211             :      * By here, we know we are in a multi-table situation.  In order to avoid
     212             :      * holding locks for too long, we want to process each table in its own
     213             :      * transaction.  This forces us to disallow running inside a user
     214             :      * transaction block.
     215             :      */
     216          54 :     PreventInTransactionBlock(isTopLevel, "CLUSTER");
     217             : 
     218             :     /* Also, we need a memory context to hold our list of relations */
     219          54 :     cluster_context = AllocSetContextCreate(PortalContext,
     220             :                                             "Cluster",
     221             :                                             ALLOCSET_DEFAULT_SIZES);
     222             : 
     223             :     /*
     224             :      * Either we're processing a partitioned table, or we were not given any
     225             :      * table name at all.  In either case, obtain a list of relations to
     226             :      * process.
     227             :      *
     228             :      * In the former case, an index name must have been given, so we don't
     229             :      * need to recheck its "indisclustered" bit, but we have to check that it
     230             :      * is an index that we can cluster on.  In the latter case, we set the
     231             :      * option bit to have indisclustered verified.
     232             :      *
     233             :      * Rechecking the relation itself is necessary here in all cases.
     234             :      */
     235          54 :     params.options |= CLUOPT_RECHECK;
     236          54 :     if (rel != NULL)
     237             :     {
     238             :         Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
     239          26 :         check_index_is_clusterable(rel, indexOid, AccessShareLock);
     240          20 :         rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
     241             : 
     242             :         /* close relation, releasing lock on parent table */
     243          20 :         table_close(rel, AccessExclusiveLock);
     244             :     }
     245             :     else
     246             :     {
     247          28 :         rtcs = get_tables_to_cluster(cluster_context);
     248          28 :         params.options |= CLUOPT_RECHECK_ISCLUSTERED;
     249             :     }
     250             : 
     251             :     /* Do the job. */
     252          48 :     cluster_multiple_rels(rtcs, &params);
     253             : 
     254             :     /* Start a new transaction for the cleanup work. */
     255          48 :     StartTransactionCommand();
     256             : 
     257             :     /* Clean up working storage */
     258          48 :     MemoryContextDelete(cluster_context);
     259             : }
     260             : 
     261             : /*
     262             :  * Given a list of relations to cluster, process each of them in a separate
     263             :  * transaction.
     264             :  *
     265             :  * We expect to be in a transaction at start, but there isn't one when we
     266             :  * return.
     267             :  */
     268             : static void
     269          48 : cluster_multiple_rels(List *rtcs, ClusterParams *params)
     270             : {
     271             :     ListCell   *lc;
     272             : 
     273             :     /* Commit to get out of starting transaction */
     274          48 :     PopActiveSnapshot();
     275          48 :     CommitTransactionCommand();
     276             : 
     277             :     /* Cluster the tables, each in a separate transaction */
     278         100 :     foreach(lc, rtcs)
     279             :     {
     280          52 :         RelToCluster *rtc = (RelToCluster *) lfirst(lc);
     281             : 
     282             :         /* Start a new transaction for each relation. */
     283          52 :         StartTransactionCommand();
     284             : 
     285             :         /* functions in indexes may want a snapshot set */
     286          52 :         PushActiveSnapshot(GetTransactionSnapshot());
     287             : 
     288             :         /* Do the job. */
     289          52 :         cluster_rel(rtc->tableOid, rtc->indexOid, params);
     290             : 
     291          52 :         PopActiveSnapshot();
     292          52 :         CommitTransactionCommand();
     293             :     }
     294          48 : }
     295             : 
     296             : /*
     297             :  * cluster_rel
     298             :  *
     299             :  * This clusters the table by creating a new, clustered table and
     300             :  * swapping the relfilenumbers of the new table and the old table, so
     301             :  * the OID of the original table is preserved.  Thus we do not lose
     302             :  * GRANT, inheritance nor references to this table (this was a bug
     303             :  * in releases through 7.3).
     304             :  *
     305             :  * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
     306             :  * the new table, it's better to create the indexes afterwards than to fill
     307             :  * them incrementally while we load the table.
     308             :  *
     309             :  * If indexOid is InvalidOid, the table will be rewritten in physical order
     310             :  * instead of index order.  This is the new implementation of VACUUM FULL,
     311             :  * and error messages should refer to the operation as VACUUM not CLUSTER.
     312             :  */
     313             : void
     314         526 : cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
     315             : {
     316             :     Relation    OldHeap;
     317             :     Oid         save_userid;
     318             :     int         save_sec_context;
     319             :     int         save_nestlevel;
     320         526 :     bool        verbose = ((params->options & CLUOPT_VERBOSE) != 0);
     321         526 :     bool        recheck = ((params->options & CLUOPT_RECHECK) != 0);
     322             : 
     323             :     /* Check for user-requested abort. */
     324         526 :     CHECK_FOR_INTERRUPTS();
     325             : 
     326         526 :     pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid);
     327         526 :     if (OidIsValid(indexOid))
     328         198 :         pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
     329             :                                      PROGRESS_CLUSTER_COMMAND_CLUSTER);
     330             :     else
     331         328 :         pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
     332             :                                      PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
     333             : 
     334             :     /*
     335             :      * We grab exclusive access to the target rel and index for the duration
     336             :      * of the transaction.  (This is redundant for the single-transaction
     337             :      * case, since cluster() already did it.)  The index lock is taken inside
     338             :      * check_index_is_clusterable.
     339             :      */
     340         526 :     OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
     341             : 
     342             :     /* If the table has gone away, we can skip processing it */
     343         526 :     if (!OldHeap)
     344             :     {
     345           0 :         pgstat_progress_end_command();
     346           0 :         return;
     347             :     }
     348             : 
     349             :     /*
     350             :      * Switch to the table owner's userid, so that any index functions are run
     351             :      * as that user.  Also lock down security-restricted operations and
     352             :      * arrange to make GUC variable changes local to this command.
     353             :      */
     354         526 :     GetUserIdAndSecContext(&save_userid, &save_sec_context);
     355         526 :     SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
     356             :                            save_sec_context | SECURITY_RESTRICTED_OPERATION);
     357         526 :     save_nestlevel = NewGUCNestLevel();
     358             : 
     359             :     /*
     360             :      * Since we may open a new transaction for each relation, we have to check
     361             :      * that the relation still is what we think it is.
     362             :      *
     363             :      * If this is a single-transaction CLUSTER, we can skip these tests. We
     364             :      * *must* skip the one on indisclustered since it would reject an attempt
     365             :      * to cluster a not-previously-clustered index.
     366             :      */
     367         526 :     if (recheck)
     368             :     {
     369             :         /* Check that the user still has privileges for the relation */
     370          52 :         if (!cluster_is_permitted_for_relation(tableOid, save_userid))
     371             :         {
     372           0 :             relation_close(OldHeap, AccessExclusiveLock);
     373           0 :             goto out;
     374             :         }
     375             : 
     376             :         /*
     377             :          * Silently skip a temp table for a remote session.  Only doing this
     378             :          * check in the "recheck" case is appropriate (which currently means
     379             :          * somebody is executing a database-wide CLUSTER or on a partitioned
     380             :          * table), because there is another check in cluster() which will stop
     381             :          * any attempt to cluster remote temp tables by name.  There is
     382             :          * another check in cluster_rel which is redundant, but we leave it
     383             :          * for extra safety.
     384             :          */
     385          52 :         if (RELATION_IS_OTHER_TEMP(OldHeap))
     386             :         {
     387           0 :             relation_close(OldHeap, AccessExclusiveLock);
     388           0 :             goto out;
     389             :         }
     390             : 
     391          52 :         if (OidIsValid(indexOid))
     392             :         {
     393             :             /*
     394             :              * Check that the index still exists
     395             :              */
     396          52 :             if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
     397             :             {
     398           0 :                 relation_close(OldHeap, AccessExclusiveLock);
     399           0 :                 goto out;
     400             :             }
     401             : 
     402             :             /*
     403             :              * Check that the index is still the one with indisclustered set,
     404             :              * if needed.
     405             :              */
     406          52 :             if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
     407           6 :                 !get_index_isclustered(indexOid))
     408             :             {
     409           0 :                 relation_close(OldHeap, AccessExclusiveLock);
     410           0 :                 goto out;
     411             :             }
     412             :         }
     413             :     }
     414             : 
     415             :     /*
     416             :      * We allow VACUUM FULL, but not CLUSTER, on shared catalogs.  CLUSTER
     417             :      * would work in most respects, but the index would only get marked as
     418             :      * indisclustered in the current database, leading to unexpected behavior
     419             :      * if CLUSTER were later invoked in another database.
     420             :      */
     421         526 :     if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
     422           0 :         ereport(ERROR,
     423             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     424             :                  errmsg("cannot cluster a shared catalog")));
     425             : 
     426             :     /*
     427             :      * Don't process temp tables of other backends ... their local buffer
     428             :      * manager is not going to cope.
     429             :      */
     430         526 :     if (RELATION_IS_OTHER_TEMP(OldHeap))
     431             :     {
     432           0 :         if (OidIsValid(indexOid))
     433           0 :             ereport(ERROR,
     434             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     435             :                      errmsg("cannot cluster temporary tables of other sessions")));
     436             :         else
     437           0 :             ereport(ERROR,
     438             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     439             :                      errmsg("cannot vacuum temporary tables of other sessions")));
     440             :     }
     441             : 
     442             :     /*
     443             :      * Also check for active uses of the relation in the current transaction,
     444             :      * including open scans and pending AFTER trigger events.
     445             :      */
     446         526 :     CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
     447             : 
     448             :     /* Check heap and index are valid to cluster on */
     449         526 :     if (OidIsValid(indexOid))
     450         198 :         check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
     451             : 
     452             :     /*
     453             :      * Quietly ignore the request if this is a materialized view which has not
     454             :      * been populated from its query. No harm is done because there is no data
     455             :      * to deal with, and we don't want to throw an error if this is part of a
     456             :      * multi-relation request -- for example, CLUSTER was run on the entire
     457             :      * database.
     458             :      */
     459         526 :     if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
     460           0 :         !RelationIsPopulated(OldHeap))
     461             :     {
     462           0 :         relation_close(OldHeap, AccessExclusiveLock);
     463           0 :         goto out;
     464             :     }
     465             : 
     466             :     Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
     467             :            OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
     468             :            OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
     469             : 
     470             :     /*
     471             :      * All predicate locks on the tuples or pages are about to be made
     472             :      * invalid, because we move tuples around.  Promote them to relation
     473             :      * locks.  Predicate locks on indexes will be promoted when they are
     474             :      * reindexed.
     475             :      */
     476         526 :     TransferPredicateLocksToHeapRelation(OldHeap);
     477             : 
     478             :     /* rebuild_relation does all the dirty work */
     479         526 :     rebuild_relation(OldHeap, indexOid, verbose);
     480             : 
     481             :     /* NB: rebuild_relation does table_close() on OldHeap */
     482             : 
     483         520 : out:
     484             :     /* Roll back any GUC changes executed by index functions */
     485         520 :     AtEOXact_GUC(false, save_nestlevel);
     486             : 
     487             :     /* Restore userid and security context */
     488         520 :     SetUserIdAndSecContext(save_userid, save_sec_context);
     489             : 
     490         520 :     pgstat_progress_end_command();
     491             : }
     492             : 
     493             : /*
     494             :  * Verify that the specified heap and index are valid to cluster on
     495             :  *
     496             :  * Side effect: obtains lock on the index.  The caller may
     497             :  * in some cases already have AccessExclusiveLock on the table, but
     498             :  * not in all cases so we can't rely on the table-level lock for
     499             :  * protection here.
     500             :  */
     501             : void
     502         288 : check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
     503             : {
     504             :     Relation    OldIndex;
     505             : 
     506         288 :     OldIndex = index_open(indexOid, lockmode);
     507             : 
     508             :     /*
     509             :      * Check that index is in fact an index on the given relation
     510             :      */
     511         288 :     if (OldIndex->rd_index == NULL ||
     512         288 :         OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
     513           0 :         ereport(ERROR,
     514             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     515             :                  errmsg("\"%s\" is not an index for table \"%s\"",
     516             :                         RelationGetRelationName(OldIndex),
     517             :                         RelationGetRelationName(OldHeap))));
     518             : 
     519             :     /* Index AM must allow clustering */
     520         288 :     if (!OldIndex->rd_indam->amclusterable)
     521           0 :         ereport(ERROR,
     522             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     523             :                  errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
     524             :                         RelationGetRelationName(OldIndex))));
     525             : 
     526             :     /*
     527             :      * Disallow clustering on incomplete indexes (those that might not index
     528             :      * every row of the relation).  We could relax this by making a separate
     529             :      * seqscan pass over the table to copy the missing rows, but that seems
     530             :      * expensive and tedious.
     531             :      */
     532         288 :     if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
     533           0 :         ereport(ERROR,
     534             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     535             :                  errmsg("cannot cluster on partial index \"%s\"",
     536             :                         RelationGetRelationName(OldIndex))));
     537             : 
     538             :     /*
     539             :      * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
     540             :      * it might well not contain entries for every heap row, or might not even
     541             :      * be internally consistent.  (But note that we don't check indcheckxmin;
     542             :      * the worst consequence of following broken HOT chains would be that we
     543             :      * might put recently-dead tuples out-of-order in the new table, and there
     544             :      * is little harm in that.)
     545             :      */
     546         288 :     if (!OldIndex->rd_index->indisvalid)
     547           6 :         ereport(ERROR,
     548             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     549             :                  errmsg("cannot cluster on invalid index \"%s\"",
     550             :                         RelationGetRelationName(OldIndex))));
     551             : 
     552             :     /* Drop relcache refcnt on OldIndex, but keep lock */
     553         282 :     index_close(OldIndex, NoLock);
     554         282 : }
     555             : 
     556             : /*
     557             :  * mark_index_clustered: mark the specified index as the one clustered on
     558             :  *
     559             :  * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
     560             :  */
     561             : void
     562         280 : mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
     563             : {
     564             :     HeapTuple   indexTuple;
     565             :     Form_pg_index indexForm;
     566             :     Relation    pg_index;
     567             :     ListCell   *index;
     568             : 
     569             :     /* Disallow applying to a partitioned table */
     570         280 :     if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
     571          12 :         ereport(ERROR,
     572             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     573             :                  errmsg("cannot mark index clustered in partitioned table")));
     574             : 
     575             :     /*
     576             :      * If the index is already marked clustered, no need to do anything.
     577             :      */
     578         268 :     if (OidIsValid(indexOid))
     579             :     {
     580         256 :         if (get_index_isclustered(indexOid))
     581          38 :             return;
     582             :     }
     583             : 
     584             :     /*
     585             :      * Check each index of the relation and set/clear the bit as needed.
     586             :      */
     587         230 :     pg_index = table_open(IndexRelationId, RowExclusiveLock);
     588             : 
     589         672 :     foreach(index, RelationGetIndexList(rel))
     590             :     {
     591         442 :         Oid         thisIndexOid = lfirst_oid(index);
     592             : 
     593         442 :         indexTuple = SearchSysCacheCopy1(INDEXRELID,
     594             :                                          ObjectIdGetDatum(thisIndexOid));
     595         442 :         if (!HeapTupleIsValid(indexTuple))
     596           0 :             elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
     597         442 :         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
     598             : 
     599             :         /*
     600             :          * Unset the bit if set.  We know it's wrong because we checked this
     601             :          * earlier.
     602             :          */
     603         442 :         if (indexForm->indisclustered)
     604             :         {
     605          30 :             indexForm->indisclustered = false;
     606          30 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     607             :         }
     608         412 :         else if (thisIndexOid == indexOid)
     609             :         {
     610             :             /* this was checked earlier, but let's be real sure */
     611         218 :             if (!indexForm->indisvalid)
     612           0 :                 elog(ERROR, "cannot cluster on invalid index %u", indexOid);
     613         218 :             indexForm->indisclustered = true;
     614         218 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     615             :         }
     616             : 
     617         442 :         InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
     618             :                                      InvalidOid, is_internal);
     619             : 
     620         442 :         heap_freetuple(indexTuple);
     621             :     }
     622             : 
     623         230 :     table_close(pg_index, RowExclusiveLock);
     624             : }
     625             : 
     626             : /*
     627             :  * rebuild_relation: rebuild an existing relation in index or physical order
     628             :  *
     629             :  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
     630             :  * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
     631             :  *
     632             :  * NB: this routine closes OldHeap at the right time; caller should not.
     633             :  */
     634             : static void
     635         526 : rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
     636             : {
     637         526 :     Oid         tableOid = RelationGetRelid(OldHeap);
     638         526 :     Oid         accessMethod = OldHeap->rd_rel->relam;
     639         526 :     Oid         tableSpace = OldHeap->rd_rel->reltablespace;
     640             :     Oid         OIDNewHeap;
     641             :     char        relpersistence;
     642             :     bool        is_system_catalog;
     643             :     bool        swap_toast_by_content;
     644             :     TransactionId frozenXid;
     645             :     MultiXactId cutoffMulti;
     646             : 
     647         526 :     if (OidIsValid(indexOid))
     648             :         /* Mark the correct index as clustered */
     649         198 :         mark_index_clustered(OldHeap, indexOid, true);
     650             : 
     651             :     /* Remember info about rel before closing OldHeap */
     652         526 :     relpersistence = OldHeap->rd_rel->relpersistence;
     653         526 :     is_system_catalog = IsSystemRelation(OldHeap);
     654             : 
     655             :     /* Close relcache entry, but keep lock until transaction commit */
     656         526 :     table_close(OldHeap, NoLock);
     657             : 
     658             :     /* Create the transient table that will receive the re-ordered data */
     659         526 :     OIDNewHeap = make_new_heap(tableOid, tableSpace,
     660             :                                accessMethod,
     661             :                                relpersistence,
     662             :                                AccessExclusiveLock);
     663             : 
     664             :     /* Copy the heap data into the new table in the desired order */
     665         526 :     copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
     666             :                     &swap_toast_by_content, &frozenXid, &cutoffMulti);
     667             : 
     668             :     /*
     669             :      * Swap the physical files of the target and transient tables, then
     670             :      * rebuild the target's indexes and throw away the transient table.
     671             :      */
     672         526 :     finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
     673             :                      swap_toast_by_content, false, true,
     674             :                      frozenXid, cutoffMulti,
     675             :                      relpersistence);
     676         520 : }
     677             : 
     678             : 
     679             : /*
     680             :  * Create the transient table that will be filled with new data during
     681             :  * CLUSTER, ALTER TABLE, and similar operations.  The transient table
     682             :  * duplicates the logical structure of the OldHeap; but will have the
     683             :  * specified physical storage properties NewTableSpace, NewAccessMethod, and
     684             :  * relpersistence.
     685             :  *
     686             :  * After this, the caller should load the new heap with transferred/modified
     687             :  * data, then call finish_heap_swap to complete the operation.
     688             :  */
     689             : Oid
     690        1494 : make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
     691             :               char relpersistence, LOCKMODE lockmode)
     692             : {
     693             :     TupleDesc   OldHeapDesc;
     694             :     char        NewHeapName[NAMEDATALEN];
     695             :     Oid         OIDNewHeap;
     696             :     Oid         toastid;
     697             :     Relation    OldHeap;
     698             :     HeapTuple   tuple;
     699             :     Datum       reloptions;
     700             :     bool        isNull;
     701             :     Oid         namespaceid;
     702             : 
     703        1494 :     OldHeap = table_open(OIDOldHeap, lockmode);
     704        1494 :     OldHeapDesc = RelationGetDescr(OldHeap);
     705             : 
     706             :     /*
     707             :      * Note that the NewHeap will not receive any of the defaults or
     708             :      * constraints associated with the OldHeap; we don't need 'em, and there's
     709             :      * no reason to spend cycles inserting them into the catalogs only to
     710             :      * delete them.
     711             :      */
     712             : 
     713             :     /*
     714             :      * But we do want to use reloptions of the old heap for new heap.
     715             :      */
     716        1494 :     tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
     717        1494 :     if (!HeapTupleIsValid(tuple))
     718           0 :         elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
     719        1494 :     reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     720             :                                  &isNull);
     721        1494 :     if (isNull)
     722        1456 :         reloptions = (Datum) 0;
     723             : 
     724        1494 :     if (relpersistence == RELPERSISTENCE_TEMP)
     725         140 :         namespaceid = LookupCreationNamespace("pg_temp");
     726             :     else
     727        1354 :         namespaceid = RelationGetNamespace(OldHeap);
     728             : 
     729             :     /*
     730             :      * Create the new heap, using a temporary name in the same namespace as
     731             :      * the existing table.  NOTE: there is some risk of collision with user
     732             :      * relnames.  Working around this seems more trouble than it's worth; in
     733             :      * particular, we can't create the new heap in a different namespace from
     734             :      * the old, or we will have problems with the TEMP status of temp tables.
     735             :      *
     736             :      * Note: the new heap is not a shared relation, even if we are rebuilding
     737             :      * a shared rel.  However, we do make the new heap mapped if the source is
     738             :      * mapped.  This simplifies swap_relation_files, and is absolutely
     739             :      * necessary for rebuilding pg_class, for reasons explained there.
     740             :      */
     741        1494 :     snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
     742             : 
     743        1494 :     OIDNewHeap = heap_create_with_catalog(NewHeapName,
     744             :                                           namespaceid,
     745             :                                           NewTableSpace,
     746             :                                           InvalidOid,
     747             :                                           InvalidOid,
     748             :                                           InvalidOid,
     749        1494 :                                           OldHeap->rd_rel->relowner,
     750             :                                           NewAccessMethod,
     751             :                                           OldHeapDesc,
     752             :                                           NIL,
     753             :                                           RELKIND_RELATION,
     754             :                                           relpersistence,
     755             :                                           false,
     756        1494 :                                           RelationIsMapped(OldHeap),
     757             :                                           ONCOMMIT_NOOP,
     758             :                                           reloptions,
     759             :                                           false,
     760             :                                           true,
     761             :                                           true,
     762             :                                           OIDOldHeap,
     763             :                                           NULL);
     764             :     Assert(OIDNewHeap != InvalidOid);
     765             : 
     766        1494 :     ReleaseSysCache(tuple);
     767             : 
     768             :     /*
     769             :      * Advance command counter so that the newly-created relation's catalog
     770             :      * tuples will be visible to table_open.
     771             :      */
     772        1494 :     CommandCounterIncrement();
     773             : 
     774             :     /*
     775             :      * If necessary, create a TOAST table for the new relation.
     776             :      *
     777             :      * If the relation doesn't have a TOAST table already, we can't need one
     778             :      * for the new relation.  The other way around is possible though: if some
     779             :      * wide columns have been dropped, NewHeapCreateToastTable can decide that
     780             :      * no TOAST table is needed for the new table.
     781             :      *
     782             :      * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
     783             :      * that the TOAST table will be visible for insertion.
     784             :      */
     785        1494 :     toastid = OldHeap->rd_rel->reltoastrelid;
     786        1494 :     if (OidIsValid(toastid))
     787             :     {
     788             :         /* keep the existing toast table's reloptions, if any */
     789         588 :         tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
     790         588 :         if (!HeapTupleIsValid(tuple))
     791           0 :             elog(ERROR, "cache lookup failed for relation %u", toastid);
     792         588 :         reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     793             :                                      &isNull);
     794         588 :         if (isNull)
     795         588 :             reloptions = (Datum) 0;
     796             : 
     797         588 :         NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
     798             : 
     799         588 :         ReleaseSysCache(tuple);
     800             :     }
     801             : 
     802        1494 :     table_close(OldHeap, NoLock);
     803             : 
     804        1494 :     return OIDNewHeap;
     805             : }
     806             : 
     807             : /*
     808             :  * Do the physical copying of table data.
     809             :  *
     810             :  * There are three output parameters:
     811             :  * *pSwapToastByContent is set true if toast tables must be swapped by content.
     812             :  * *pFreezeXid receives the TransactionId used as freeze cutoff point.
     813             :  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
     814             :  */
     815             : static void
     816         526 : copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
     817             :                 bool *pSwapToastByContent, TransactionId *pFreezeXid,
     818             :                 MultiXactId *pCutoffMulti)
     819             : {
     820             :     Relation    NewHeap,
     821             :                 OldHeap,
     822             :                 OldIndex;
     823             :     Relation    relRelation;
     824             :     HeapTuple   reltup;
     825             :     Form_pg_class relform;
     826             :     TupleDesc   oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
     827             :     TupleDesc   newTupDesc PG_USED_FOR_ASSERTS_ONLY;
     828             :     VacuumParams params;
     829             :     struct VacuumCutoffs cutoffs;
     830             :     bool        use_sort;
     831         526 :     double      num_tuples = 0,
     832         526 :                 tups_vacuumed = 0,
     833         526 :                 tups_recently_dead = 0;
     834             :     BlockNumber num_pages;
     835         526 :     int         elevel = verbose ? INFO : DEBUG2;
     836             :     PGRUsage    ru0;
     837             :     char       *nspname;
     838             : 
     839         526 :     pg_rusage_init(&ru0);
     840             : 
     841             :     /*
     842             :      * Open the relations we need.
     843             :      */
     844         526 :     NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
     845         526 :     OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
     846         526 :     if (OidIsValid(OIDOldIndex))
     847         198 :         OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
     848             :     else
     849         328 :         OldIndex = NULL;
     850             : 
     851             :     /* Store a copy of the namespace name for logging purposes */
     852         526 :     nspname = get_namespace_name(RelationGetNamespace(OldHeap));
     853             : 
     854             :     /*
     855             :      * Their tuple descriptors should be exactly alike, but here we only need
     856             :      * assume that they have the same number of columns.
     857             :      */
     858         526 :     oldTupDesc = RelationGetDescr(OldHeap);
     859         526 :     newTupDesc = RelationGetDescr(NewHeap);
     860             :     Assert(newTupDesc->natts == oldTupDesc->natts);
     861             : 
     862             :     /*
     863             :      * If the OldHeap has a toast table, get lock on the toast table to keep
     864             :      * it from being vacuumed.  This is needed because autovacuum processes
     865             :      * toast tables independently of their main tables, with no lock on the
     866             :      * latter.  If an autovacuum were to start on the toast table after we
     867             :      * compute our OldestXmin below, it would use a later OldestXmin, and then
     868             :      * possibly remove as DEAD toast tuples belonging to main tuples we think
     869             :      * are only RECENTLY_DEAD.  Then we'd fail while trying to copy those
     870             :      * tuples.
     871             :      *
     872             :      * We don't need to open the toast relation here, just lock it.  The lock
     873             :      * will be held till end of transaction.
     874             :      */
     875         526 :     if (OldHeap->rd_rel->reltoastrelid)
     876         174 :         LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
     877             : 
     878             :     /*
     879             :      * If both tables have TOAST tables, perform toast swap by content.  It is
     880             :      * possible that the old table has a toast table but the new one doesn't,
     881             :      * if toastable columns have been dropped.  In that case we have to do
     882             :      * swap by links.  This is okay because swap by content is only essential
     883             :      * for system catalogs, and we don't support schema changes for them.
     884             :      */
     885         526 :     if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
     886             :     {
     887         174 :         *pSwapToastByContent = true;
     888             : 
     889             :         /*
     890             :          * When doing swap by content, any toast pointers written into NewHeap
     891             :          * must use the old toast table's OID, because that's where the toast
     892             :          * data will eventually be found.  Set this up by setting rd_toastoid.
     893             :          * This also tells toast_save_datum() to preserve the toast value
     894             :          * OIDs, which we want so as not to invalidate toast pointers in
     895             :          * system catalog caches, and to avoid making multiple copies of a
     896             :          * single toast value.
     897             :          *
     898             :          * Note that we must hold NewHeap open until we are done writing data,
     899             :          * since the relcache will not guarantee to remember this setting once
     900             :          * the relation is closed.  Also, this technique depends on the fact
     901             :          * that no one will try to read from the NewHeap until after we've
     902             :          * finished writing it and swapping the rels --- otherwise they could
     903             :          * follow the toast pointers to the wrong place.  (It would actually
     904             :          * work for values copied over from the old toast table, but not for
     905             :          * any values that we toast which were previously not toasted.)
     906             :          */
     907         174 :         NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
     908             :     }
     909             :     else
     910         352 :         *pSwapToastByContent = false;
     911             : 
     912             :     /*
     913             :      * Compute xids used to freeze and weed out dead tuples and multixacts.
     914             :      * Since we're going to rewrite the whole table anyway, there's no reason
     915             :      * not to be aggressive about this.
     916             :      */
     917         526 :     memset(&params, 0, sizeof(VacuumParams));
     918         526 :     vacuum_get_cutoffs(OldHeap, &params, &cutoffs);
     919             : 
     920             :     /*
     921             :      * FreezeXid will become the table's new relfrozenxid, and that mustn't go
     922             :      * backwards, so take the max.
     923             :      */
     924        1052 :     if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) &&
     925         526 :         TransactionIdPrecedes(cutoffs.FreezeLimit,
     926         526 :                               OldHeap->rd_rel->relfrozenxid))
     927         106 :         cutoffs.FreezeLimit = OldHeap->rd_rel->relfrozenxid;
     928             : 
     929             :     /*
     930             :      * MultiXactCutoff, similarly, shouldn't go backwards either.
     931             :      */
     932        1052 :     if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) &&
     933         526 :         MultiXactIdPrecedes(cutoffs.MultiXactCutoff,
     934         526 :                             OldHeap->rd_rel->relminmxid))
     935           0 :         cutoffs.MultiXactCutoff = OldHeap->rd_rel->relminmxid;
     936             : 
     937             :     /*
     938             :      * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
     939             :      * the OldHeap.  We know how to use a sort to duplicate the ordering of a
     940             :      * btree index, and will use seqscan-and-sort for that case if the planner
     941             :      * tells us it's cheaper.  Otherwise, always indexscan if an index is
     942             :      * provided, else plain seqscan.
     943             :      */
     944         526 :     if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
     945         198 :         use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
     946             :     else
     947         328 :         use_sort = false;
     948             : 
     949             :     /* Log what we're doing */
     950         526 :     if (OldIndex != NULL && !use_sort)
     951          96 :         ereport(elevel,
     952             :                 (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
     953             :                         nspname,
     954             :                         RelationGetRelationName(OldHeap),
     955             :                         RelationGetRelationName(OldIndex))));
     956         430 :     else if (use_sort)
     957         102 :         ereport(elevel,
     958             :                 (errmsg("clustering \"%s.%s\" using sequential scan and sort",
     959             :                         nspname,
     960             :                         RelationGetRelationName(OldHeap))));
     961             :     else
     962         328 :         ereport(elevel,
     963             :                 (errmsg("vacuuming \"%s.%s\"",
     964             :                         nspname,
     965             :                         RelationGetRelationName(OldHeap))));
     966             : 
     967             :     /*
     968             :      * Hand off the actual copying to AM specific function, the generic code
     969             :      * cannot know how to deal with visibility across AMs. Note that this
     970             :      * routine is allowed to set FreezeXid / MultiXactCutoff to different
     971             :      * values (e.g. because the AM doesn't use freezing).
     972             :      */
     973         526 :     table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
     974             :                                     cutoffs.OldestXmin, &cutoffs.FreezeLimit,
     975             :                                     &cutoffs.MultiXactCutoff,
     976             :                                     &num_tuples, &tups_vacuumed,
     977             :                                     &tups_recently_dead);
     978             : 
     979             :     /* return selected values to caller, get set as relfrozenxid/minmxid */
     980         526 :     *pFreezeXid = cutoffs.FreezeLimit;
     981         526 :     *pCutoffMulti = cutoffs.MultiXactCutoff;
     982             : 
     983             :     /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
     984         526 :     NewHeap->rd_toastoid = InvalidOid;
     985             : 
     986         526 :     num_pages = RelationGetNumberOfBlocks(NewHeap);
     987             : 
     988             :     /* Log what we did */
     989         526 :     ereport(elevel,
     990             :             (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
     991             :                     nspname,
     992             :                     RelationGetRelationName(OldHeap),
     993             :                     tups_vacuumed, num_tuples,
     994             :                     RelationGetNumberOfBlocks(OldHeap)),
     995             :              errdetail("%.0f dead row versions cannot be removed yet.\n"
     996             :                        "%s.",
     997             :                        tups_recently_dead,
     998             :                        pg_rusage_show(&ru0))));
     999             : 
    1000         526 :     if (OldIndex != NULL)
    1001         198 :         index_close(OldIndex, NoLock);
    1002         526 :     table_close(OldHeap, NoLock);
    1003         526 :     table_close(NewHeap, NoLock);
    1004             : 
    1005             :     /* Update pg_class to reflect the correct values of pages and tuples. */
    1006         526 :     relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1007             : 
    1008         526 :     reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
    1009         526 :     if (!HeapTupleIsValid(reltup))
    1010           0 :         elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
    1011         526 :     relform = (Form_pg_class) GETSTRUCT(reltup);
    1012             : 
    1013         526 :     relform->relpages = num_pages;
    1014         526 :     relform->reltuples = num_tuples;
    1015             : 
    1016             :     /* Don't update the stats for pg_class.  See swap_relation_files. */
    1017         526 :     if (OIDOldHeap != RelationRelationId)
    1018         502 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1019             :     else
    1020          24 :         CacheInvalidateRelcacheByTuple(reltup);
    1021             : 
    1022             :     /* Clean up. */
    1023         526 :     heap_freetuple(reltup);
    1024         526 :     table_close(relRelation, RowExclusiveLock);
    1025             : 
    1026             :     /* Make the update visible */
    1027         526 :     CommandCounterIncrement();
    1028         526 : }
    1029             : 
    1030             : /*
    1031             :  * Swap the physical files of two given relations.
    1032             :  *
    1033             :  * We swap the physical identity (reltablespace, relfilenumber) while keeping
    1034             :  * the same logical identities of the two relations.  relpersistence is also
    1035             :  * swapped, which is critical since it determines where buffers live for each
    1036             :  * relation.
    1037             :  *
    1038             :  * We can swap associated TOAST data in either of two ways: recursively swap
    1039             :  * the physical content of the toast tables (and their indexes), or swap the
    1040             :  * TOAST links in the given relations' pg_class entries.  The former is needed
    1041             :  * to manage rewrites of shared catalogs (where we cannot change the pg_class
    1042             :  * links) while the latter is the only way to handle cases in which a toast
    1043             :  * table is added or removed altogether.
    1044             :  *
    1045             :  * Additionally, the first relation is marked with relfrozenxid set to
    1046             :  * frozenXid.  It seems a bit ugly to have this here, but the caller would
    1047             :  * have to do it anyway, so having it here saves a heap_update.  Note: in
    1048             :  * the swap-toast-links case, we assume we don't need to change the toast
    1049             :  * table's relfrozenxid: the new version of the toast table should already
    1050             :  * have relfrozenxid set to RecentXmin, which is good enough.
    1051             :  *
    1052             :  * Lastly, if r2 and its toast table and toast index (if any) are mapped,
    1053             :  * their OIDs are emitted into mapped_tables[].  This is hacky but beats
    1054             :  * having to look the information up again later in finish_heap_swap.
    1055             :  */
    1056             : static void
    1057        1722 : swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
    1058             :                     bool swap_toast_by_content,
    1059             :                     bool is_internal,
    1060             :                     TransactionId frozenXid,
    1061             :                     MultiXactId cutoffMulti,
    1062             :                     Oid *mapped_tables)
    1063             : {
    1064             :     Relation    relRelation;
    1065             :     HeapTuple   reltup1,
    1066             :                 reltup2;
    1067             :     Form_pg_class relform1,
    1068             :                 relform2;
    1069             :     RelFileNumber relfilenumber1,
    1070             :                 relfilenumber2;
    1071             :     RelFileNumber swaptemp;
    1072             :     char        swptmpchr;
    1073             : 
    1074             :     /* We need writable copies of both pg_class tuples. */
    1075        1722 :     relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1076             : 
    1077        1722 :     reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
    1078        1722 :     if (!HeapTupleIsValid(reltup1))
    1079           0 :         elog(ERROR, "cache lookup failed for relation %u", r1);
    1080        1722 :     relform1 = (Form_pg_class) GETSTRUCT(reltup1);
    1081             : 
    1082        1722 :     reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
    1083        1722 :     if (!HeapTupleIsValid(reltup2))
    1084           0 :         elog(ERROR, "cache lookup failed for relation %u", r2);
    1085        1722 :     relform2 = (Form_pg_class) GETSTRUCT(reltup2);
    1086             : 
    1087        1722 :     relfilenumber1 = relform1->relfilenode;
    1088        1722 :     relfilenumber2 = relform2->relfilenode;
    1089             : 
    1090        1722 :     if (RelFileNumberIsValid(relfilenumber1) &&
    1091             :         RelFileNumberIsValid(relfilenumber2))
    1092             :     {
    1093             :         /*
    1094             :          * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
    1095             :          * relpersistence
    1096             :          */
    1097             :         Assert(!target_is_pg_class);
    1098             : 
    1099        1566 :         swaptemp = relform1->relfilenode;
    1100        1566 :         relform1->relfilenode = relform2->relfilenode;
    1101        1566 :         relform2->relfilenode = swaptemp;
    1102             : 
    1103        1566 :         swaptemp = relform1->reltablespace;
    1104        1566 :         relform1->reltablespace = relform2->reltablespace;
    1105        1566 :         relform2->reltablespace = swaptemp;
    1106             : 
    1107        1566 :         swaptemp = relform1->relam;
    1108        1566 :         relform1->relam = relform2->relam;
    1109        1566 :         relform2->relam = swaptemp;
    1110             : 
    1111        1566 :         swptmpchr = relform1->relpersistence;
    1112        1566 :         relform1->relpersistence = relform2->relpersistence;
    1113        1566 :         relform2->relpersistence = swptmpchr;
    1114             : 
    1115             :         /* Also swap toast links, if we're swapping by links */
    1116        1566 :         if (!swap_toast_by_content)
    1117             :         {
    1118        1170 :             swaptemp = relform1->reltoastrelid;
    1119        1170 :             relform1->reltoastrelid = relform2->reltoastrelid;
    1120        1170 :             relform2->reltoastrelid = swaptemp;
    1121             :         }
    1122             :     }
    1123             :     else
    1124             :     {
    1125             :         /*
    1126             :          * Mapped-relation case.  Here we have to swap the relation mappings
    1127             :          * instead of modifying the pg_class columns.  Both must be mapped.
    1128             :          */
    1129         156 :         if (RelFileNumberIsValid(relfilenumber1) ||
    1130             :             RelFileNumberIsValid(relfilenumber2))
    1131           0 :             elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
    1132             :                  NameStr(relform1->relname));
    1133             : 
    1134             :         /*
    1135             :          * We can't change the tablespace nor persistence of a mapped rel, and
    1136             :          * we can't handle toast link swapping for one either, because we must
    1137             :          * not apply any critical changes to its pg_class row.  These cases
    1138             :          * should be prevented by upstream permissions tests, so these checks
    1139             :          * are non-user-facing emergency backstop.
    1140             :          */
    1141         156 :         if (relform1->reltablespace != relform2->reltablespace)
    1142           0 :             elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
    1143             :                  NameStr(relform1->relname));
    1144         156 :         if (relform1->relpersistence != relform2->relpersistence)
    1145           0 :             elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
    1146             :                  NameStr(relform1->relname));
    1147         156 :         if (relform1->relam != relform2->relam)
    1148           0 :             elog(ERROR, "cannot change access method of mapped relation \"%s\"",
    1149             :                  NameStr(relform1->relname));
    1150         156 :         if (!swap_toast_by_content &&
    1151          30 :             (relform1->reltoastrelid || relform2->reltoastrelid))
    1152           0 :             elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
    1153             :                  NameStr(relform1->relname));
    1154             : 
    1155             :         /*
    1156             :          * Fetch the mappings --- shouldn't fail, but be paranoid
    1157             :          */
    1158         156 :         relfilenumber1 = RelationMapOidToFilenumber(r1, relform1->relisshared);
    1159         156 :         if (!RelFileNumberIsValid(relfilenumber1))
    1160           0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1161             :                  NameStr(relform1->relname), r1);
    1162         156 :         relfilenumber2 = RelationMapOidToFilenumber(r2, relform2->relisshared);
    1163         156 :         if (!RelFileNumberIsValid(relfilenumber2))
    1164           0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1165             :                  NameStr(relform2->relname), r2);
    1166             : 
    1167             :         /*
    1168             :          * Send replacement mappings to relmapper.  Note these won't actually
    1169             :          * take effect until CommandCounterIncrement.
    1170             :          */
    1171         156 :         RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false);
    1172         156 :         RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false);
    1173             : 
    1174             :         /* Pass OIDs of mapped r2 tables back to caller */
    1175         156 :         *mapped_tables++ = r2;
    1176             :     }
    1177             : 
    1178             :     /*
    1179             :      * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
    1180             :      * subtransaction. The rel2 storage (swapped from rel1) may or may not be
    1181             :      * new.
    1182             :      */
    1183             :     {
    1184             :         Relation    rel1,
    1185             :                     rel2;
    1186             : 
    1187        1722 :         rel1 = relation_open(r1, NoLock);
    1188        1722 :         rel2 = relation_open(r2, NoLock);
    1189        1722 :         rel2->rd_createSubid = rel1->rd_createSubid;
    1190        1722 :         rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid;
    1191        1722 :         rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid;
    1192        1722 :         RelationAssumeNewRelfilelocator(rel1);
    1193        1722 :         relation_close(rel1, NoLock);
    1194        1722 :         relation_close(rel2, NoLock);
    1195             :     }
    1196             : 
    1197             :     /*
    1198             :      * In the case of a shared catalog, these next few steps will only affect
    1199             :      * our own database's pg_class row; but that's okay, because they are all
    1200             :      * noncritical updates.  That's also an important fact for the case of a
    1201             :      * mapped catalog, because it's possible that we'll commit the map change
    1202             :      * and then fail to commit the pg_class update.
    1203             :      */
    1204             : 
    1205             :     /* set rel1's frozen Xid and minimum MultiXid */
    1206        1722 :     if (relform1->relkind != RELKIND_INDEX)
    1207             :     {
    1208             :         Assert(!TransactionIdIsValid(frozenXid) ||
    1209             :                TransactionIdIsNormal(frozenXid));
    1210        1548 :         relform1->relfrozenxid = frozenXid;
    1211        1548 :         relform1->relminmxid = cutoffMulti;
    1212             :     }
    1213             : 
    1214             :     /* swap size statistics too, since new rel has freshly-updated stats */
    1215             :     {
    1216             :         int32       swap_pages;
    1217             :         float4      swap_tuples;
    1218             :         int32       swap_allvisible;
    1219             : 
    1220        1722 :         swap_pages = relform1->relpages;
    1221        1722 :         relform1->relpages = relform2->relpages;
    1222        1722 :         relform2->relpages = swap_pages;
    1223             : 
    1224        1722 :         swap_tuples = relform1->reltuples;
    1225        1722 :         relform1->reltuples = relform2->reltuples;
    1226        1722 :         relform2->reltuples = swap_tuples;
    1227             : 
    1228        1722 :         swap_allvisible = relform1->relallvisible;
    1229        1722 :         relform1->relallvisible = relform2->relallvisible;
    1230        1722 :         relform2->relallvisible = swap_allvisible;
    1231             :     }
    1232             : 
    1233             :     /*
    1234             :      * Update the tuples in pg_class --- unless the target relation of the
    1235             :      * swap is pg_class itself.  In that case, there is zero point in making
    1236             :      * changes because we'd be updating the old data that we're about to throw
    1237             :      * away.  Because the real work being done here for a mapped relation is
    1238             :      * just to change the relation map settings, it's all right to not update
    1239             :      * the pg_class rows in this case. The most important changes will instead
    1240             :      * performed later, in finish_heap_swap() itself.
    1241             :      */
    1242        1722 :     if (!target_is_pg_class)
    1243             :     {
    1244             :         CatalogIndexState indstate;
    1245             : 
    1246        1698 :         indstate = CatalogOpenIndexes(relRelation);
    1247        1698 :         CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
    1248             :                                    indstate);
    1249        1698 :         CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
    1250             :                                    indstate);
    1251        1698 :         CatalogCloseIndexes(indstate);
    1252             :     }
    1253             :     else
    1254             :     {
    1255             :         /* no update ... but we do still need relcache inval */
    1256          24 :         CacheInvalidateRelcacheByTuple(reltup1);
    1257          24 :         CacheInvalidateRelcacheByTuple(reltup2);
    1258             :     }
    1259             : 
    1260             :     /*
    1261             :      * Post alter hook for modified relations. The change to r2 is always
    1262             :      * internal, but r1 depends on the invocation context.
    1263             :      */
    1264        1722 :     InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
    1265             :                                  InvalidOid, is_internal);
    1266        1722 :     InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
    1267             :                                  InvalidOid, true);
    1268             : 
    1269             :     /*
    1270             :      * If we have toast tables associated with the relations being swapped,
    1271             :      * deal with them too.
    1272             :      */
    1273        1722 :     if (relform1->reltoastrelid || relform2->reltoastrelid)
    1274             :     {
    1275         546 :         if (swap_toast_by_content)
    1276             :         {
    1277         174 :             if (relform1->reltoastrelid && relform2->reltoastrelid)
    1278             :             {
    1279             :                 /* Recursively swap the contents of the toast tables */
    1280         174 :                 swap_relation_files(relform1->reltoastrelid,
    1281             :                                     relform2->reltoastrelid,
    1282             :                                     target_is_pg_class,
    1283             :                                     swap_toast_by_content,
    1284             :                                     is_internal,
    1285             :                                     frozenXid,
    1286             :                                     cutoffMulti,
    1287             :                                     mapped_tables);
    1288             :             }
    1289             :             else
    1290             :             {
    1291             :                 /* caller messed up */
    1292           0 :                 elog(ERROR, "cannot swap toast files by content when there's only one");
    1293             :             }
    1294             :         }
    1295             :         else
    1296             :         {
    1297             :             /*
    1298             :              * We swapped the ownership links, so we need to change dependency
    1299             :              * data to match.
    1300             :              *
    1301             :              * NOTE: it is possible that only one table has a toast table.
    1302             :              *
    1303             :              * NOTE: at present, a TOAST table's only dependency is the one on
    1304             :              * its owning table.  If more are ever created, we'd need to use
    1305             :              * something more selective than deleteDependencyRecordsFor() to
    1306             :              * get rid of just the link we want.
    1307             :              */
    1308             :             ObjectAddress baseobject,
    1309             :                         toastobject;
    1310             :             long        count;
    1311             : 
    1312             :             /*
    1313             :              * We disallow this case for system catalogs, to avoid the
    1314             :              * possibility that the catalog we're rebuilding is one of the
    1315             :              * ones the dependency changes would change.  It's too late to be
    1316             :              * making any data changes to the target catalog.
    1317             :              */
    1318         372 :             if (IsSystemClass(r1, relform1))
    1319           0 :                 elog(ERROR, "cannot swap toast files by links for system catalogs");
    1320             : 
    1321             :             /* Delete old dependencies */
    1322         372 :             if (relform1->reltoastrelid)
    1323             :             {
    1324         340 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1325             :                                                    relform1->reltoastrelid,
    1326             :                                                    false);
    1327         340 :                 if (count != 1)
    1328           0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1329             :                          count);
    1330             :             }
    1331         372 :             if (relform2->reltoastrelid)
    1332             :             {
    1333         372 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1334             :                                                    relform2->reltoastrelid,
    1335             :                                                    false);
    1336         372 :                 if (count != 1)
    1337           0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1338             :                          count);
    1339             :             }
    1340             : 
    1341             :             /* Register new dependencies */
    1342         372 :             baseobject.classId = RelationRelationId;
    1343         372 :             baseobject.objectSubId = 0;
    1344         372 :             toastobject.classId = RelationRelationId;
    1345         372 :             toastobject.objectSubId = 0;
    1346             : 
    1347         372 :             if (relform1->reltoastrelid)
    1348             :             {
    1349         340 :                 baseobject.objectId = r1;
    1350         340 :                 toastobject.objectId = relform1->reltoastrelid;
    1351         340 :                 recordDependencyOn(&toastobject, &baseobject,
    1352             :                                    DEPENDENCY_INTERNAL);
    1353             :             }
    1354             : 
    1355         372 :             if (relform2->reltoastrelid)
    1356             :             {
    1357         372 :                 baseobject.objectId = r2;
    1358         372 :                 toastobject.objectId = relform2->reltoastrelid;
    1359         372 :                 recordDependencyOn(&toastobject, &baseobject,
    1360             :                                    DEPENDENCY_INTERNAL);
    1361             :             }
    1362             :         }
    1363             :     }
    1364             : 
    1365             :     /*
    1366             :      * If we're swapping two toast tables by content, do the same for their
    1367             :      * valid index. The swap can actually be safely done only if the relations
    1368             :      * have indexes.
    1369             :      */
    1370        1722 :     if (swap_toast_by_content &&
    1371         522 :         relform1->relkind == RELKIND_TOASTVALUE &&
    1372         174 :         relform2->relkind == RELKIND_TOASTVALUE)
    1373             :     {
    1374             :         Oid         toastIndex1,
    1375             :                     toastIndex2;
    1376             : 
    1377             :         /* Get valid index for each relation */
    1378         174 :         toastIndex1 = toast_get_valid_index(r1,
    1379             :                                             AccessExclusiveLock);
    1380         174 :         toastIndex2 = toast_get_valid_index(r2,
    1381             :                                             AccessExclusiveLock);
    1382             : 
    1383         174 :         swap_relation_files(toastIndex1,
    1384             :                             toastIndex2,
    1385             :                             target_is_pg_class,
    1386             :                             swap_toast_by_content,
    1387             :                             is_internal,
    1388             :                             InvalidTransactionId,
    1389             :                             InvalidMultiXactId,
    1390             :                             mapped_tables);
    1391             :     }
    1392             : 
    1393             :     /* Clean up. */
    1394        1722 :     heap_freetuple(reltup1);
    1395        1722 :     heap_freetuple(reltup2);
    1396             : 
    1397        1722 :     table_close(relRelation, RowExclusiveLock);
    1398             : 
    1399             :     /*
    1400             :      * Close both relcache entries' smgr links.  We need this kluge because
    1401             :      * both links will be invalidated during upcoming CommandCounterIncrement.
    1402             :      * Whichever of the rels is the second to be cleared will have a dangling
    1403             :      * reference to the other's smgr entry.  Rather than trying to avoid this
    1404             :      * by ordering operations just so, it's easiest to close the links first.
    1405             :      * (Fortunately, since one of the entries is local in our transaction,
    1406             :      * it's sufficient to clear out our own relcache this way; the problem
    1407             :      * cannot arise for other backends when they see our update on the
    1408             :      * non-transient relation.)
    1409             :      *
    1410             :      * Caution: the placement of this step interacts with the decision to
    1411             :      * handle toast rels by recursion.  When we are trying to rebuild pg_class
    1412             :      * itself, the smgr close on pg_class must happen after all accesses in
    1413             :      * this function.
    1414             :      */
    1415        1722 :     RelationCloseSmgrByOid(r1);
    1416        1722 :     RelationCloseSmgrByOid(r2);
    1417        1722 : }
    1418             : 
    1419             : /*
    1420             :  * Remove the transient table that was built by make_new_heap, and finish
    1421             :  * cleaning up (including rebuilding all indexes on the old heap).
    1422             :  */
    1423             : void
    1424        1374 : finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
    1425             :                  bool is_system_catalog,
    1426             :                  bool swap_toast_by_content,
    1427             :                  bool check_constraints,
    1428             :                  bool is_internal,
    1429             :                  TransactionId frozenXid,
    1430             :                  MultiXactId cutoffMulti,
    1431             :                  char newrelpersistence)
    1432             : {
    1433             :     ObjectAddress object;
    1434             :     Oid         mapped_tables[4];
    1435             :     int         reindex_flags;
    1436        1374 :     ReindexParams reindex_params = {0};
    1437             :     int         i;
    1438             : 
    1439             :     /* Report that we are now swapping relation files */
    1440        1374 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1441             :                                  PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES);
    1442             : 
    1443             :     /* Zero out possible results from swapped_relation_files */
    1444        1374 :     memset(mapped_tables, 0, sizeof(mapped_tables));
    1445             : 
    1446             :     /*
    1447             :      * Swap the contents of the heap relations (including any toast tables).
    1448             :      * Also set old heap's relfrozenxid to frozenXid.
    1449             :      */
    1450        1374 :     swap_relation_files(OIDOldHeap, OIDNewHeap,
    1451             :                         (OIDOldHeap == RelationRelationId),
    1452             :                         swap_toast_by_content, is_internal,
    1453             :                         frozenXid, cutoffMulti, mapped_tables);
    1454             : 
    1455             :     /*
    1456             :      * If it's a system catalog, queue a sinval message to flush all catcaches
    1457             :      * on the catalog when we reach CommandCounterIncrement.
    1458             :      */
    1459        1374 :     if (is_system_catalog)
    1460         200 :         CacheInvalidateCatalog(OIDOldHeap);
    1461             : 
    1462             :     /*
    1463             :      * Rebuild each index on the relation (but not the toast table, which is
    1464             :      * all-new at this point).  It is important to do this before the DROP
    1465             :      * step because if we are processing a system catalog that will be used
    1466             :      * during DROP, we want to have its indexes available.  There is no
    1467             :      * advantage to the other order anyway because this is all transactional,
    1468             :      * so no chance to reclaim disk space before commit.  We do not need a
    1469             :      * final CommandCounterIncrement() because reindex_relation does it.
    1470             :      *
    1471             :      * Note: because index_build is called via reindex_relation, it will never
    1472             :      * set indcheckxmin true for the indexes.  This is OK even though in some
    1473             :      * sense we are building new indexes rather than rebuilding existing ones,
    1474             :      * because the new heap won't contain any HOT chains at all, let alone
    1475             :      * broken ones, so it can't be necessary to set indcheckxmin.
    1476             :      */
    1477        1374 :     reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
    1478        1374 :     if (check_constraints)
    1479         848 :         reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
    1480             : 
    1481             :     /*
    1482             :      * Ensure that the indexes have the same persistence as the parent
    1483             :      * relation.
    1484             :      */
    1485        1374 :     if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
    1486          20 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
    1487        1354 :     else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
    1488        1280 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
    1489             : 
    1490             :     /* Report that we are now reindexing relations */
    1491        1374 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1492             :                                  PROGRESS_CLUSTER_PHASE_REBUILD_INDEX);
    1493             : 
    1494        1374 :     reindex_relation(OIDOldHeap, reindex_flags, &reindex_params);
    1495             : 
    1496             :     /* Report that we are now doing clean up */
    1497        1356 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1498             :                                  PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP);
    1499             : 
    1500             :     /*
    1501             :      * If the relation being rebuilt is pg_class, swap_relation_files()
    1502             :      * couldn't update pg_class's own pg_class entry (check comments in
    1503             :      * swap_relation_files()), thus relfrozenxid was not updated. That's
    1504             :      * annoying because a potential reason for doing a VACUUM FULL is a
    1505             :      * imminent or actual anti-wraparound shutdown.  So, now that we can
    1506             :      * access the new relation using its indices, update relfrozenxid.
    1507             :      * pg_class doesn't have a toast relation, so we don't need to update the
    1508             :      * corresponding toast relation. Not that there's little point moving all
    1509             :      * relfrozenxid updates here since swap_relation_files() needs to write to
    1510             :      * pg_class for non-mapped relations anyway.
    1511             :      */
    1512        1356 :     if (OIDOldHeap == RelationRelationId)
    1513             :     {
    1514             :         Relation    relRelation;
    1515             :         HeapTuple   reltup;
    1516             :         Form_pg_class relform;
    1517             : 
    1518          24 :         relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1519             : 
    1520          24 :         reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
    1521          24 :         if (!HeapTupleIsValid(reltup))
    1522           0 :             elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
    1523          24 :         relform = (Form_pg_class) GETSTRUCT(reltup);
    1524             : 
    1525          24 :         relform->relfrozenxid = frozenXid;
    1526          24 :         relform->relminmxid = cutoffMulti;
    1527             : 
    1528          24 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1529             : 
    1530          24 :         table_close(relRelation, RowExclusiveLock);
    1531             :     }
    1532             : 
    1533             :     /* Destroy new heap with old filenumber */
    1534        1356 :     object.classId = RelationRelationId;
    1535        1356 :     object.objectId = OIDNewHeap;
    1536        1356 :     object.objectSubId = 0;
    1537             : 
    1538             :     /*
    1539             :      * The new relation is local to our transaction and we know nothing
    1540             :      * depends on it, so DROP_RESTRICT should be OK.
    1541             :      */
    1542        1356 :     performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
    1543             : 
    1544             :     /* performDeletion does CommandCounterIncrement at end */
    1545             : 
    1546             :     /*
    1547             :      * Now we must remove any relation mapping entries that we set up for the
    1548             :      * transient table, as well as its toast table and toast index if any. If
    1549             :      * we fail to do this before commit, the relmapper will complain about new
    1550             :      * permanent map entries being added post-bootstrap.
    1551             :      */
    1552        1512 :     for (i = 0; OidIsValid(mapped_tables[i]); i++)
    1553         156 :         RelationMapRemoveMapping(mapped_tables[i]);
    1554             : 
    1555             :     /*
    1556             :      * At this point, everything is kosher except that, if we did toast swap
    1557             :      * by links, the toast table's name corresponds to the transient table.
    1558             :      * The name is irrelevant to the backend because it's referenced by OID,
    1559             :      * but users looking at the catalogs could be confused.  Rename it to
    1560             :      * prevent this problem.
    1561             :      *
    1562             :      * Note no lock required on the relation, because we already hold an
    1563             :      * exclusive lock on it.
    1564             :      */
    1565        1356 :     if (!swap_toast_by_content)
    1566             :     {
    1567             :         Relation    newrel;
    1568             : 
    1569        1182 :         newrel = table_open(OIDOldHeap, NoLock);
    1570        1182 :         if (OidIsValid(newrel->rd_rel->reltoastrelid))
    1571             :         {
    1572             :             Oid         toastidx;
    1573             :             char        NewToastName[NAMEDATALEN];
    1574             : 
    1575             :             /* Get the associated valid index to be renamed */
    1576         340 :             toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
    1577             :                                              NoLock);
    1578             : 
    1579             :             /* rename the toast table ... */
    1580         340 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
    1581             :                      OIDOldHeap);
    1582         340 :             RenameRelationInternal(newrel->rd_rel->reltoastrelid,
    1583             :                                    NewToastName, true, false);
    1584             : 
    1585             :             /* ... and its valid index too. */
    1586         340 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
    1587             :                      OIDOldHeap);
    1588             : 
    1589         340 :             RenameRelationInternal(toastidx,
    1590             :                                    NewToastName, true, true);
    1591             : 
    1592             :             /*
    1593             :              * Reset the relrewrite for the toast. The command-counter
    1594             :              * increment is required here as we are about to update the tuple
    1595             :              * that is updated as part of RenameRelationInternal.
    1596             :              */
    1597         340 :             CommandCounterIncrement();
    1598         340 :             ResetRelRewrite(newrel->rd_rel->reltoastrelid);
    1599             :         }
    1600        1182 :         relation_close(newrel, NoLock);
    1601             :     }
    1602             : 
    1603             :     /* if it's not a catalog table, clear any missing attribute settings */
    1604        1356 :     if (!is_system_catalog)
    1605             :     {
    1606             :         Relation    newrel;
    1607             : 
    1608        1156 :         newrel = table_open(OIDOldHeap, NoLock);
    1609        1156 :         RelationClearMissing(newrel);
    1610        1156 :         relation_close(newrel, NoLock);
    1611             :     }
    1612        1356 : }
    1613             : 
    1614             : 
    1615             : /*
    1616             :  * Get a list of tables that the current user has privileges on and
    1617             :  * have indisclustered set.  Return the list in a List * of RelToCluster
    1618             :  * (stored in the specified memory context), each one giving the tableOid
    1619             :  * and the indexOid on which the table is already clustered.
    1620             :  */
    1621             : static List *
    1622          28 : get_tables_to_cluster(MemoryContext cluster_context)
    1623             : {
    1624             :     Relation    indRelation;
    1625             :     TableScanDesc scan;
    1626             :     ScanKeyData entry;
    1627             :     HeapTuple   indexTuple;
    1628             :     Form_pg_index index;
    1629             :     MemoryContext old_context;
    1630          28 :     List       *rtcs = NIL;
    1631             : 
    1632             :     /*
    1633             :      * Get all indexes that have indisclustered set and that the current user
    1634             :      * has the appropriate privileges for.
    1635             :      */
    1636          28 :     indRelation = table_open(IndexRelationId, AccessShareLock);
    1637          28 :     ScanKeyInit(&entry,
    1638             :                 Anum_pg_index_indisclustered,
    1639             :                 BTEqualStrategyNumber, F_BOOLEQ,
    1640             :                 BoolGetDatum(true));
    1641          28 :     scan = table_beginscan_catalog(indRelation, 1, &entry);
    1642          46 :     while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1643             :     {
    1644             :         RelToCluster *rtc;
    1645             : 
    1646          18 :         index = (Form_pg_index) GETSTRUCT(indexTuple);
    1647             : 
    1648          18 :         if (!cluster_is_permitted_for_relation(index->indrelid, GetUserId()))
    1649          12 :             continue;
    1650             : 
    1651             :         /* Use a permanent memory context for the result list */
    1652           6 :         old_context = MemoryContextSwitchTo(cluster_context);
    1653             : 
    1654           6 :         rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
    1655           6 :         rtc->tableOid = index->indrelid;
    1656           6 :         rtc->indexOid = index->indexrelid;
    1657           6 :         rtcs = lappend(rtcs, rtc);
    1658             : 
    1659           6 :         MemoryContextSwitchTo(old_context);
    1660             :     }
    1661          28 :     table_endscan(scan);
    1662             : 
    1663          28 :     relation_close(indRelation, AccessShareLock);
    1664             : 
    1665          28 :     return rtcs;
    1666             : }
    1667             : 
    1668             : /*
    1669             :  * Given an index on a partitioned table, return a list of RelToCluster for
    1670             :  * all the children leaves tables/indexes.
    1671             :  *
    1672             :  * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
    1673             :  * on the table containing the index.
    1674             :  */
    1675             : static List *
    1676          20 : get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
    1677             : {
    1678             :     List       *inhoids;
    1679             :     ListCell   *lc;
    1680          20 :     List       *rtcs = NIL;
    1681             :     MemoryContext old_context;
    1682             : 
    1683             :     /* Do not lock the children until they're processed */
    1684          20 :     inhoids = find_all_inheritors(indexOid, NoLock, NULL);
    1685             : 
    1686         104 :     foreach(lc, inhoids)
    1687             :     {
    1688          84 :         Oid         indexrelid = lfirst_oid(lc);
    1689          84 :         Oid         relid = IndexGetRelation(indexrelid, false);
    1690             :         RelToCluster *rtc;
    1691             : 
    1692             :         /* consider only leaf indexes */
    1693          84 :         if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
    1694          38 :             continue;
    1695             : 
    1696             :         /*
    1697             :          * We already checked that the user has privileges to CLUSTER the
    1698             :          * partitioned table when we locked it earlier, so there's no need to
    1699             :          * check the privileges again here.
    1700             :          */
    1701             : 
    1702             :         /* Use a permanent memory context for the result list */
    1703          46 :         old_context = MemoryContextSwitchTo(cluster_context);
    1704             : 
    1705          46 :         rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
    1706          46 :         rtc->tableOid = relid;
    1707          46 :         rtc->indexOid = indexrelid;
    1708          46 :         rtcs = lappend(rtcs, rtc);
    1709             : 
    1710          46 :         MemoryContextSwitchTo(old_context);
    1711             :     }
    1712             : 
    1713          20 :     return rtcs;
    1714             : }
    1715             : 
    1716             : /*
    1717             :  * Return whether userid has privileges to CLUSTER relid.  If not, this
    1718             :  * function emits a WARNING.
    1719             :  */
    1720             : static bool
    1721          70 : cluster_is_permitted_for_relation(Oid relid, Oid userid)
    1722             : {
    1723         104 :     if (pg_class_aclcheck(relid, userid, ACL_MAINTAIN) == ACLCHECK_OK ||
    1724          34 :         has_partition_ancestor_privs(relid, userid, ACL_MAINTAIN))
    1725          58 :         return true;
    1726             : 
    1727          12 :     ereport(WARNING,
    1728             :             (errmsg("permission denied to cluster \"%s\", skipping it",
    1729             :                     get_rel_name(relid))));
    1730          12 :     return false;
    1731             : }

Generated by: LCOV version 1.14