LCOV - code coverage report
Current view: top level - src/backend/commands - cluster.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 90.1 % 544 490
Test Date: 2026-04-05 23:16:10 Functions: 100.0 % 16 16
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * cluster.c
       4              :  *    REPACK a table; formerly known as CLUSTER.  VACUUM FULL also uses
       5              :  *    parts of this code.
       6              :  *
       7              :  *
       8              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       9              :  * Portions Copyright (c) 1994-5, Regents of the University of California
      10              :  *
      11              :  *
      12              :  * IDENTIFICATION
      13              :  *    src/backend/commands/cluster.c
      14              :  *
      15              :  *-------------------------------------------------------------------------
      16              :  */
      17              : #include "postgres.h"
      18              : 
      19              : #include "access/amapi.h"
      20              : #include "access/heapam.h"
      21              : #include "access/multixact.h"
      22              : #include "access/relscan.h"
      23              : #include "access/tableam.h"
      24              : #include "access/toast_internals.h"
      25              : #include "access/transam.h"
      26              : #include "access/xact.h"
      27              : #include "catalog/catalog.h"
      28              : #include "catalog/dependency.h"
      29              : #include "catalog/heap.h"
      30              : #include "catalog/index.h"
      31              : #include "catalog/namespace.h"
      32              : #include "catalog/objectaccess.h"
      33              : #include "catalog/pg_am.h"
      34              : #include "catalog/pg_inherits.h"
      35              : #include "catalog/toasting.h"
      36              : #include "commands/cluster.h"
      37              : #include "commands/defrem.h"
      38              : #include "commands/progress.h"
      39              : #include "commands/tablecmds.h"
      40              : #include "commands/vacuum.h"
      41              : #include "miscadmin.h"
      42              : #include "optimizer/optimizer.h"
      43              : #include "pgstat.h"
      44              : #include "storage/bufmgr.h"
      45              : #include "storage/lmgr.h"
      46              : #include "storage/predicate.h"
      47              : #include "utils/acl.h"
      48              : #include "utils/fmgroids.h"
      49              : #include "utils/guc.h"
      50              : #include "utils/inval.h"
      51              : #include "utils/lsyscache.h"
      52              : #include "utils/memutils.h"
      53              : #include "utils/pg_rusage.h"
      54              : #include "utils/relmapper.h"
      55              : #include "utils/snapmgr.h"
      56              : #include "utils/syscache.h"
      57              : 
      58              : /*
      59              :  * This struct is used to pass around the information on tables to be
      60              :  * clustered. We need this so we can make a list of them when invoked without
      61              :  * a specific table/index pair.
      62              :  */
      63              : typedef struct
      64              : {
      65              :     Oid         tableOid;
      66              :     Oid         indexOid;
      67              : } RelToCluster;
      68              : 
      69              : static bool cluster_rel_recheck(RepackCommand cmd, Relation OldHeap,
      70              :                                 Oid indexOid, Oid userid, int options);
      71              : static void rebuild_relation(Relation OldHeap, Relation index, bool verbose);
      72              : static void copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex,
      73              :                             bool verbose, bool *pSwapToastByContent,
      74              :                             TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
      75              : static List *get_tables_to_repack(RepackCommand cmd, bool usingindex,
      76              :                                   MemoryContext permcxt);
      77              : static List *get_tables_to_repack_partitioned(RepackCommand cmd,
      78              :                                               Oid relid, bool rel_is_index,
      79              :                                               MemoryContext permcxt);
      80              : static bool repack_is_permitted_for_relation(RepackCommand cmd,
      81              :                                              Oid relid, Oid userid);
      82              : static Relation process_single_relation(RepackStmt *stmt,
      83              :                                         ClusterParams *params);
      84              : static Oid  determine_clustered_index(Relation rel, bool usingindex,
      85              :                                       const char *indexname);
      86              : static const char *RepackCommandAsString(RepackCommand cmd);
      87              : 
      88              : 
      89              : /*
      90              :  * The repack code allows for processing multiple tables at once. Because
      91              :  * of this, we cannot just run everything on a single transaction, or we
      92              :  * would be forced to acquire exclusive locks on all the tables being
      93              :  * clustered, simultaneously --- very likely leading to deadlock.
      94              :  *
      95              :  * To solve this we follow a similar strategy to VACUUM code, processing each
      96              :  * relation in a separate transaction. For this to work, we need to:
      97              :  *
      98              :  *  - provide a separate memory context so that we can pass information in
      99              :  *    a way that survives across transactions
     100              :  *  - start a new transaction every time a new relation is clustered
     101              :  *  - check for validity of the information on to-be-clustered relations,
     102              :  *    as someone might have deleted a relation behind our back, or
     103              :  *    clustered one on a different index
     104              :  *  - end the transaction
     105              :  *
     106              :  * The single-relation case does not have any such overhead.
     107              :  *
     108              :  * We also allow a relation to be repacked following an index, but without
     109              :  * naming a specific one.  In that case, the indisclustered bit will be
     110              :  * looked up, and an ERROR will be thrown if no so-marked index is found.
     111              :  */
     112              : void
     113          180 : ExecRepack(ParseState *pstate, RepackStmt *stmt, bool isTopLevel)
     114              : {
     115          180 :     ClusterParams params = {0};
     116          180 :     Relation    rel = NULL;
     117              :     MemoryContext repack_context;
     118              :     List       *rtcs;
     119              : 
     120              :     /* Parse option list */
     121          376 :     foreach_node(DefElem, opt, stmt->params)
     122              :     {
     123           16 :         if (strcmp(opt->defname, "verbose") == 0)
     124            8 :             params.options |= defGetBoolean(opt) ? CLUOPT_VERBOSE : 0;
     125            8 :         else if (strcmp(opt->defname, "analyze") == 0 ||
     126            0 :                  strcmp(opt->defname, "analyse") == 0)
     127            8 :             params.options |= defGetBoolean(opt) ? CLUOPT_ANALYZE : 0;
     128              :         else
     129            0 :             ereport(ERROR,
     130              :                     errcode(ERRCODE_SYNTAX_ERROR),
     131              :                     errmsg("unrecognized %s option \"%s\"",
     132              :                            RepackCommandAsString(stmt->command),
     133              :                            opt->defname),
     134              :                     parser_errposition(pstate, opt->location));
     135              :     }
     136              : 
     137              :     /*
     138              :      * If a single relation is specified, process it and we're done ... unless
     139              :      * the relation is a partitioned table, in which case we fall through.
     140              :      */
     141          180 :     if (stmt->relation != NULL)
     142              :     {
     143          163 :         rel = process_single_relation(stmt, &params);
     144          147 :         if (rel == NULL)
     145          115 :             return;             /* all done */
     146              :     }
     147              : 
     148              :     /*
     149              :      * Don't allow ANALYZE in the multiple-relation case for now.  Maybe we
     150              :      * can add support for this later.
     151              :      */
     152           49 :     if (params.options & CLUOPT_ANALYZE)
     153            0 :         ereport(ERROR,
     154              :                 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     155              :                 errmsg("cannot execute %s on multiple tables",
     156              :                        "REPACK (ANALYZE)"));
     157              : 
     158              :     /*
     159              :      * By here, we know we are in a multi-table situation.  In order to avoid
     160              :      * holding locks for too long, we want to process each table in its own
     161              :      * transaction.  This forces us to disallow running inside a user
     162              :      * transaction block.
     163              :      */
     164           49 :     PreventInTransactionBlock(isTopLevel, RepackCommandAsString(stmt->command));
     165              : 
     166              :     /* Also, we need a memory context to hold our list of relations */
     167           49 :     repack_context = AllocSetContextCreate(PortalContext,
     168              :                                            "Repack",
     169              :                                            ALLOCSET_DEFAULT_SIZES);
     170              : 
     171           49 :     params.options |= CLUOPT_RECHECK;
     172              : 
     173              :     /*
     174              :      * If we don't have a relation yet, determine a relation list.  If we do,
     175              :      * then it must be a partitioned table, and we want to process its
     176              :      * partitions.
     177              :      */
     178           49 :     if (rel == NULL)
     179              :     {
     180              :         Assert(stmt->indexname == NULL);
     181           17 :         rtcs = get_tables_to_repack(stmt->command, stmt->usingindex,
     182              :                                     repack_context);
     183           17 :         params.options |= CLUOPT_RECHECK_ISCLUSTERED;
     184              :     }
     185              :     else
     186              :     {
     187              :         Oid         relid;
     188              :         bool        rel_is_index;
     189              : 
     190              :         Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
     191              : 
     192              :         /*
     193              :          * If USING INDEX was specified, resolve the index name now and pass
     194              :          * it down.
     195              :          */
     196           32 :         if (stmt->usingindex)
     197              :         {
     198              :             /*
     199              :              * If no index name was specified when repacking a partitioned
     200              :              * table, punt for now.  Maybe we can improve this later.
     201              :              */
     202           28 :             if (!stmt->indexname)
     203              :             {
     204            8 :                 if (stmt->command == REPACK_COMMAND_CLUSTER)
     205            4 :                     ereport(ERROR,
     206              :                             errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     207              :                             errmsg("there is no previously clustered index for table \"%s\"",
     208              :                                    RelationGetRelationName(rel)));
     209              :                 else
     210            4 :                     ereport(ERROR,
     211              :                             errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     212              :                     /*- translator: first %s is name of a SQL command, eg. REPACK */
     213              :                             errmsg("cannot execute %s on partitioned table \"%s\" USING INDEX with no index name",
     214              :                                    RepackCommandAsString(stmt->command),
     215              :                                    RelationGetRelationName(rel)));
     216              :             }
     217              : 
     218           20 :             relid = determine_clustered_index(rel, stmt->usingindex,
     219           20 :                                               stmt->indexname);
     220           20 :             if (!OidIsValid(relid))
     221            0 :                 elog(ERROR, "unable to determine index to cluster on");
     222           20 :             check_index_is_clusterable(rel, relid, AccessExclusiveLock);
     223              : 
     224           16 :             rel_is_index = true;
     225              :         }
     226              :         else
     227              :         {
     228            4 :             relid = RelationGetRelid(rel);
     229            4 :             rel_is_index = false;
     230              :         }
     231              : 
     232           20 :         rtcs = get_tables_to_repack_partitioned(stmt->command,
     233              :                                                 relid, rel_is_index,
     234              :                                                 repack_context);
     235              : 
     236              :         /* close parent relation, releasing lock on it */
     237           20 :         table_close(rel, AccessExclusiveLock);
     238           20 :         rel = NULL;
     239              :     }
     240              : 
     241              :     /* Commit to get out of starting transaction */
     242           37 :     PopActiveSnapshot();
     243           37 :     CommitTransactionCommand();
     244              : 
     245              :     /* Cluster the tables, each in a separate transaction */
     246              :     Assert(rel == NULL);
     247          126 :     foreach_ptr(RelToCluster, rtc, rtcs)
     248              :     {
     249              :         /* Start a new transaction for each relation. */
     250           52 :         StartTransactionCommand();
     251              : 
     252              :         /*
     253              :          * Open the target table, coping with the case where it has been
     254              :          * dropped.
     255              :          */
     256           52 :         rel = try_table_open(rtc->tableOid, AccessExclusiveLock);
     257           52 :         if (rel == NULL)
     258              :         {
     259            0 :             CommitTransactionCommand();
     260            0 :             continue;
     261              :         }
     262              : 
     263              :         /* functions in indexes may want a snapshot set */
     264           52 :         PushActiveSnapshot(GetTransactionSnapshot());
     265              : 
     266              :         /* Process this table */
     267           52 :         cluster_rel(stmt->command, rel, rtc->indexOid, &params);
     268              :         /* cluster_rel closes the relation, but keeps lock */
     269              : 
     270           52 :         PopActiveSnapshot();
     271           52 :         CommitTransactionCommand();
     272              :     }
     273              : 
     274              :     /* Start a new transaction for the cleanup work. */
     275           37 :     StartTransactionCommand();
     276              : 
     277              :     /* Clean up working storage */
     278           37 :     MemoryContextDelete(repack_context);
     279              : }
     280              : 
     281              : /*
     282              :  * cluster_rel
     283              :  *
     284              :  * This clusters the table by creating a new, clustered table and
     285              :  * swapping the relfilenumbers of the new table and the old table, so
     286              :  * the OID of the original table is preserved.  Thus we do not lose
     287              :  * GRANT, inheritance nor references to this table.
     288              :  *
     289              :  * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
     290              :  * the new table, it's better to create the indexes afterwards than to fill
     291              :  * them incrementally while we load the table.
     292              :  *
     293              :  * If indexOid is InvalidOid, the table will be rewritten in physical order
     294              :  * instead of index order.
     295              :  *
     296              :  * 'cmd' indicates which command is being executed, to be used for error
     297              :  * messages.
     298              :  */
     299              : void
     300          389 : cluster_rel(RepackCommand cmd, Relation OldHeap, Oid indexOid,
     301              :             ClusterParams *params)
     302              : {
     303          389 :     Oid         tableOid = RelationGetRelid(OldHeap);
     304              :     Oid         save_userid;
     305              :     int         save_sec_context;
     306              :     int         save_nestlevel;
     307          389 :     bool        verbose = ((params->options & CLUOPT_VERBOSE) != 0);
     308          389 :     bool        recheck = ((params->options & CLUOPT_RECHECK) != 0);
     309              :     Relation    index;
     310              : 
     311              :     Assert(CheckRelationLockedByMe(OldHeap, AccessExclusiveLock, false));
     312              : 
     313              :     /* Check for user-requested abort. */
     314          389 :     CHECK_FOR_INTERRUPTS();
     315              : 
     316          389 :     pgstat_progress_start_command(PROGRESS_COMMAND_REPACK, tableOid);
     317          389 :     pgstat_progress_update_param(PROGRESS_REPACK_COMMAND, cmd);
     318              : 
     319              :     /*
     320              :      * Switch to the table owner's userid, so that any index functions are run
     321              :      * as that user.  Also lock down security-restricted operations and
     322              :      * arrange to make GUC variable changes local to this command.
     323              :      */
     324          389 :     GetUserIdAndSecContext(&save_userid, &save_sec_context);
     325          389 :     SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
     326              :                            save_sec_context | SECURITY_RESTRICTED_OPERATION);
     327          389 :     save_nestlevel = NewGUCNestLevel();
     328          389 :     RestrictSearchPath();
     329              : 
     330              :     /*
     331              :      * Since we may open a new transaction for each relation, we have to check
     332              :      * that the relation still is what we think it is.
     333              :      *
     334              :      * If this is a single-transaction CLUSTER, we can skip these tests. We
     335              :      * *must* skip the one on indisclustered since it would reject an attempt
     336              :      * to cluster a not-previously-clustered index.
     337              :      */
     338          389 :     if (recheck &&
     339           52 :         !cluster_rel_recheck(cmd, OldHeap, indexOid, save_userid,
     340           52 :                              params->options))
     341            0 :         goto out;
     342              : 
     343              :     /*
     344              :      * We allow repacking shared catalogs only when not using an index. It
     345              :      * would work to use an index in most respects, but the index would only
     346              :      * get marked as indisclustered in the current database, leading to
     347              :      * unexpected behavior if CLUSTER were later invoked in another database.
     348              :      */
     349          389 :     if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
     350            0 :         ereport(ERROR,
     351              :                 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     352              :         /*- translator: first %s is name of a SQL command, eg. REPACK */
     353              :                 errmsg("cannot execute %s on a shared catalog",
     354              :                        RepackCommandAsString(cmd)));
     355              : 
     356              :     /*
     357              :      * Don't process temp tables of other backends ... their local buffer
     358              :      * manager is not going to cope.
     359              :      */
     360          389 :     if (RELATION_IS_OTHER_TEMP(OldHeap))
     361            0 :         ereport(ERROR,
     362              :                 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     363              :         /*- translator: first %s is name of a SQL command, eg. REPACK */
     364              :                 errmsg("cannot execute %s on temporary tables of other sessions",
     365              :                        RepackCommandAsString(cmd)));
     366              : 
     367              :     /*
     368              :      * Also check for active uses of the relation in the current transaction,
     369              :      * including open scans and pending AFTER trigger events.
     370              :      */
     371          389 :     CheckTableNotInUse(OldHeap, RepackCommandAsString(cmd));
     372              : 
     373              :     /* Check heap and index are valid to cluster on */
     374          389 :     if (OidIsValid(indexOid))
     375              :     {
     376              :         /* verify the index is good and lock it */
     377          139 :         check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
     378              :         /* also open it */
     379          139 :         index = index_open(indexOid, NoLock);
     380              :     }
     381              :     else
     382          250 :         index = NULL;
     383              : 
     384              :     /*
     385              :      * When allow_system_table_mods is turned off, we disallow repacking a
     386              :      * catalog on a particular index unless that's already the clustered index
     387              :      * for that catalog.
     388              :      *
     389              :      * XXX We don't check for this in CLUSTER, because it's historically been
     390              :      * allowed.
     391              :      */
     392          389 :     if (cmd != REPACK_COMMAND_CLUSTER &&
     393          282 :         !allowSystemTableMods && OidIsValid(indexOid) &&
     394           16 :         IsCatalogRelation(OldHeap) && !index->rd_index->indisclustered)
     395            0 :         ereport(ERROR,
     396              :                 errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     397              :                 errmsg("permission denied: \"%s\" is a system catalog",
     398              :                        RelationGetRelationName(OldHeap)),
     399              :                 errdetail("System catalogs can only be clustered by the index they're already clustered on, if any, unless \"%s\" is enabled.",
     400              :                           "allow_system_table_mods"));
     401              : 
     402              :     /*
     403              :      * Quietly ignore the request if this is a materialized view which has not
     404              :      * been populated from its query. No harm is done because there is no data
     405              :      * to deal with, and we don't want to throw an error if this is part of a
     406              :      * multi-relation request -- for example, CLUSTER was run on the entire
     407              :      * database.
     408              :      */
     409          389 :     if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
     410            0 :         !RelationIsPopulated(OldHeap))
     411              :     {
     412            0 :         relation_close(OldHeap, AccessExclusiveLock);
     413            0 :         goto out;
     414              :     }
     415              : 
     416              :     Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
     417              :            OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
     418              :            OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
     419              : 
     420              :     /*
     421              :      * All predicate locks on the tuples or pages are about to be made
     422              :      * invalid, because we move tuples around.  Promote them to relation
     423              :      * locks.  Predicate locks on indexes will be promoted when they are
     424              :      * reindexed.
     425              :      */
     426          389 :     TransferPredicateLocksToHeapRelation(OldHeap);
     427              : 
     428              :     /* rebuild_relation does all the dirty work */
     429          389 :     rebuild_relation(OldHeap, index, verbose);
     430              :     /* rebuild_relation closes OldHeap, and index if valid */
     431              : 
     432          385 : out:
     433              :     /* Roll back any GUC changes executed by index functions */
     434          385 :     AtEOXact_GUC(false, save_nestlevel);
     435              : 
     436              :     /* Restore userid and security context */
     437          385 :     SetUserIdAndSecContext(save_userid, save_sec_context);
     438              : 
     439          385 :     pgstat_progress_end_command();
     440          385 : }
     441              : 
     442              : /*
     443              :  * Check if the table (and its index) still meets the requirements of
     444              :  * cluster_rel().
     445              :  */
     446              : static bool
     447           52 : cluster_rel_recheck(RepackCommand cmd, Relation OldHeap, Oid indexOid,
     448              :                     Oid userid, int options)
     449              : {
     450           52 :     Oid         tableOid = RelationGetRelid(OldHeap);
     451              : 
     452              :     /* Check that the user still has privileges for the relation */
     453           52 :     if (!repack_is_permitted_for_relation(cmd, tableOid, userid))
     454              :     {
     455            0 :         relation_close(OldHeap, AccessExclusiveLock);
     456            0 :         return false;
     457              :     }
     458              : 
     459              :     /*
     460              :      * Silently skip a temp table for a remote session.  Only doing this check
     461              :      * in the "recheck" case is appropriate (which currently means somebody is
     462              :      * executing a database-wide CLUSTER or on a partitioned table), because
     463              :      * there is another check in cluster() which will stop any attempt to
     464              :      * cluster remote temp tables by name.  There is another check in
     465              :      * cluster_rel which is redundant, but we leave it for extra safety.
     466              :      */
     467           52 :     if (RELATION_IS_OTHER_TEMP(OldHeap))
     468              :     {
     469            0 :         relation_close(OldHeap, AccessExclusiveLock);
     470            0 :         return false;
     471              :     }
     472              : 
     473           52 :     if (OidIsValid(indexOid))
     474              :     {
     475              :         /*
     476              :          * Check that the index still exists
     477              :          */
     478           32 :         if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
     479              :         {
     480            0 :             relation_close(OldHeap, AccessExclusiveLock);
     481            0 :             return false;
     482              :         }
     483              : 
     484              :         /*
     485              :          * Check that the index is still the one with indisclustered set, if
     486              :          * needed.
     487              :          */
     488           32 :         if ((options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
     489            4 :             !get_index_isclustered(indexOid))
     490              :         {
     491            0 :             relation_close(OldHeap, AccessExclusiveLock);
     492            0 :             return false;
     493              :         }
     494              :     }
     495              : 
     496           52 :     return true;
     497              : }
     498              : 
     499              : /*
     500              :  * Verify that the specified heap and index are valid to cluster on
     501              :  *
     502              :  * Side effect: obtains lock on the index.  The caller may
     503              :  * in some cases already have AccessExclusiveLock on the table, but
     504              :  * not in all cases so we can't rely on the table-level lock for
     505              :  * protection here.
     506              :  */
     507              : void
     508          305 : check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
     509              : {
     510              :     Relation    OldIndex;
     511              : 
     512          305 :     OldIndex = index_open(indexOid, lockmode);
     513              : 
     514              :     /*
     515              :      * Check that index is in fact an index on the given relation
     516              :      */
     517          305 :     if (OldIndex->rd_index == NULL ||
     518          305 :         OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
     519            0 :         ereport(ERROR,
     520              :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     521              :                  errmsg("\"%s\" is not an index for table \"%s\"",
     522              :                         RelationGetRelationName(OldIndex),
     523              :                         RelationGetRelationName(OldHeap))));
     524              : 
     525              :     /* Index AM must allow clustering */
     526          305 :     if (!OldIndex->rd_indam->amclusterable)
     527            0 :         ereport(ERROR,
     528              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     529              :                  errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
     530              :                         RelationGetRelationName(OldIndex))));
     531              : 
     532              :     /*
     533              :      * Disallow clustering on incomplete indexes (those that might not index
     534              :      * every row of the relation).  We could relax this by making a separate
     535              :      * seqscan pass over the table to copy the missing rows, but that seems
     536              :      * expensive and tedious.
     537              :      */
     538          305 :     if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
     539            0 :         ereport(ERROR,
     540              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     541              :                  errmsg("cannot cluster on partial index \"%s\"",
     542              :                         RelationGetRelationName(OldIndex))));
     543              : 
     544              :     /*
     545              :      * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
     546              :      * it might well not contain entries for every heap row, or might not even
     547              :      * be internally consistent.  (But note that we don't check indcheckxmin;
     548              :      * the worst consequence of following broken HOT chains would be that we
     549              :      * might put recently-dead tuples out-of-order in the new table, and there
     550              :      * is little harm in that.)
     551              :      */
     552          305 :     if (!OldIndex->rd_index->indisvalid)
     553            4 :         ereport(ERROR,
     554              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     555              :                  errmsg("cannot cluster on invalid index \"%s\"",
     556              :                         RelationGetRelationName(OldIndex))));
     557              : 
     558              :     /* Drop relcache refcnt on OldIndex, but keep lock */
     559          301 :     index_close(OldIndex, NoLock);
     560          301 : }
     561              : 
     562              : /*
     563              :  * mark_index_clustered: mark the specified index as the one clustered on
     564              :  *
     565              :  * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
     566              :  */
     567              : void
     568          186 : mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
     569              : {
     570              :     HeapTuple   indexTuple;
     571              :     Form_pg_index indexForm;
     572              :     Relation    pg_index;
     573              :     ListCell   *index;
     574              : 
     575              :     Assert(rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE);
     576              : 
     577              :     /*
     578              :      * If the index is already marked clustered, no need to do anything.
     579              :      */
     580          186 :     if (OidIsValid(indexOid))
     581              :     {
     582          178 :         if (get_index_isclustered(indexOid))
     583           38 :             return;
     584              :     }
     585              : 
     586              :     /*
     587              :      * Check each index of the relation and set/clear the bit as needed.
     588              :      */
     589          148 :     pg_index = table_open(IndexRelationId, RowExclusiveLock);
     590              : 
     591          450 :     foreach(index, RelationGetIndexList(rel))
     592              :     {
     593          302 :         Oid         thisIndexOid = lfirst_oid(index);
     594              : 
     595          302 :         indexTuple = SearchSysCacheCopy1(INDEXRELID,
     596              :                                          ObjectIdGetDatum(thisIndexOid));
     597          302 :         if (!HeapTupleIsValid(indexTuple))
     598            0 :             elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
     599          302 :         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
     600              : 
     601              :         /*
     602              :          * Unset the bit if set.  We know it's wrong because we checked this
     603              :          * earlier.
     604              :          */
     605          302 :         if (indexForm->indisclustered)
     606              :         {
     607           20 :             indexForm->indisclustered = false;
     608           20 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     609              :         }
     610          282 :         else if (thisIndexOid == indexOid)
     611              :         {
     612              :             /* this was checked earlier, but let's be real sure */
     613          140 :             if (!indexForm->indisvalid)
     614            0 :                 elog(ERROR, "cannot cluster on invalid index %u", indexOid);
     615          140 :             indexForm->indisclustered = true;
     616          140 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     617              :         }
     618              : 
     619          302 :         InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
     620              :                                      InvalidOid, is_internal);
     621              : 
     622          302 :         heap_freetuple(indexTuple);
     623              :     }
     624              : 
     625          148 :     table_close(pg_index, RowExclusiveLock);
     626              : }
     627              : 
     628              : /*
     629              :  * rebuild_relation: rebuild an existing relation in index or physical order
     630              :  *
     631              :  * OldHeap: table to rebuild.
     632              :  * index: index to cluster by, or NULL to rewrite in physical order.
     633              :  *
     634              :  * On entry, heap and index (if one is given) must be open, and
     635              :  * AccessExclusiveLock held on them.
     636              :  * On exit, they are closed, but locks on them are not released.
     637              :  */
     638              : static void
     639          389 : rebuild_relation(Relation OldHeap, Relation index, bool verbose)
     640              : {
     641          389 :     Oid         tableOid = RelationGetRelid(OldHeap);
     642          389 :     Oid         accessMethod = OldHeap->rd_rel->relam;
     643          389 :     Oid         tableSpace = OldHeap->rd_rel->reltablespace;
     644              :     Oid         OIDNewHeap;
     645              :     Relation    NewHeap;
     646              :     char        relpersistence;
     647              :     bool        is_system_catalog;
     648              :     bool        swap_toast_by_content;
     649              :     TransactionId frozenXid;
     650              :     MultiXactId cutoffMulti;
     651              : 
     652              :     Assert(CheckRelationLockedByMe(OldHeap, AccessExclusiveLock, false) &&
     653              :            (index == NULL || CheckRelationLockedByMe(index, AccessExclusiveLock, false)));
     654              : 
     655              :     /* for CLUSTER or REPACK USING INDEX, mark the index as the one to use */
     656          389 :     if (index != NULL)
     657          139 :         mark_index_clustered(OldHeap, RelationGetRelid(index), true);
     658              : 
     659              :     /* Remember info about rel before closing OldHeap */
     660          389 :     relpersistence = OldHeap->rd_rel->relpersistence;
     661          389 :     is_system_catalog = IsSystemRelation(OldHeap);
     662              : 
     663              :     /*
     664              :      * Create the transient table that will receive the re-ordered data.
     665              :      *
     666              :      * OldHeap is already locked, so no need to lock it again.  make_new_heap
     667              :      * obtains AccessExclusiveLock on the new heap and its toast table.
     668              :      */
     669          389 :     OIDNewHeap = make_new_heap(tableOid, tableSpace,
     670              :                                accessMethod,
     671              :                                relpersistence,
     672              :                                NoLock);
     673              :     Assert(CheckRelationOidLockedByMe(OIDNewHeap, AccessExclusiveLock, false));
     674          389 :     NewHeap = table_open(OIDNewHeap, NoLock);
     675              : 
     676              :     /* Copy the heap data into the new table in the desired order */
     677          389 :     copy_table_data(NewHeap, OldHeap, index, verbose,
     678              :                     &swap_toast_by_content, &frozenXid, &cutoffMulti);
     679              : 
     680              : 
     681              :     /* Close relcache entries, but keep lock until transaction commit */
     682          389 :     table_close(OldHeap, NoLock);
     683          389 :     if (index)
     684          139 :         index_close(index, NoLock);
     685              : 
     686              :     /*
     687              :      * Close the new relation so it can be dropped as soon as the storage is
     688              :      * swapped. The relation is not visible to others, so no need to unlock it
     689              :      * explicitly.
     690              :      */
     691          389 :     table_close(NewHeap, NoLock);
     692              : 
     693              :     /*
     694              :      * Swap the physical files of the target and transient tables, then
     695              :      * rebuild the target's indexes and throw away the transient table.
     696              :      */
     697          389 :     finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
     698              :                      swap_toast_by_content, false, true,
     699              :                      frozenXid, cutoffMulti,
     700              :                      relpersistence);
     701          385 : }
     702              : 
     703              : 
     704              : /*
     705              :  * Create the transient table that will be filled with new data during
     706              :  * CLUSTER, ALTER TABLE, and similar operations.  The transient table
     707              :  * duplicates the logical structure of the OldHeap; but will have the
     708              :  * specified physical storage properties NewTableSpace, NewAccessMethod, and
     709              :  * relpersistence.
     710              :  *
     711              :  * After this, the caller should load the new heap with transferred/modified
     712              :  * data, then call finish_heap_swap to complete the operation.
     713              :  */
     714              : Oid
     715         1561 : make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
     716              :               char relpersistence, LOCKMODE lockmode)
     717              : {
     718              :     TupleDesc   OldHeapDesc;
     719              :     char        NewHeapName[NAMEDATALEN];
     720              :     Oid         OIDNewHeap;
     721              :     Oid         toastid;
     722              :     Relation    OldHeap;
     723              :     HeapTuple   tuple;
     724              :     Datum       reloptions;
     725              :     bool        isNull;
     726              :     Oid         namespaceid;
     727              : 
     728         1561 :     OldHeap = table_open(OIDOldHeap, lockmode);
     729         1561 :     OldHeapDesc = RelationGetDescr(OldHeap);
     730              : 
     731              :     /*
     732              :      * Note that the NewHeap will not receive any of the defaults or
     733              :      * constraints associated with the OldHeap; we don't need 'em, and there's
     734              :      * no reason to spend cycles inserting them into the catalogs only to
     735              :      * delete them.
     736              :      */
     737              : 
     738              :     /*
     739              :      * But we do want to use reloptions of the old heap for new heap.
     740              :      */
     741         1561 :     tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
     742         1561 :     if (!HeapTupleIsValid(tuple))
     743            0 :         elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
     744         1561 :     reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     745              :                                  &isNull);
     746         1561 :     if (isNull)
     747         1469 :         reloptions = (Datum) 0;
     748              : 
     749         1561 :     if (relpersistence == RELPERSISTENCE_TEMP)
     750           98 :         namespaceid = LookupCreationNamespace("pg_temp");
     751              :     else
     752         1463 :         namespaceid = RelationGetNamespace(OldHeap);
     753              : 
     754              :     /*
     755              :      * Create the new heap, using a temporary name in the same namespace as
     756              :      * the existing table.  NOTE: there is some risk of collision with user
     757              :      * relnames.  Working around this seems more trouble than it's worth; in
     758              :      * particular, we can't create the new heap in a different namespace from
     759              :      * the old, or we will have problems with the TEMP status of temp tables.
     760              :      *
     761              :      * Note: the new heap is not a shared relation, even if we are rebuilding
     762              :      * a shared rel.  However, we do make the new heap mapped if the source is
     763              :      * mapped.  This simplifies swap_relation_files, and is absolutely
     764              :      * necessary for rebuilding pg_class, for reasons explained there.
     765              :      */
     766         1561 :     snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
     767              : 
     768         1561 :     OIDNewHeap = heap_create_with_catalog(NewHeapName,
     769              :                                           namespaceid,
     770              :                                           NewTableSpace,
     771              :                                           InvalidOid,
     772              :                                           InvalidOid,
     773              :                                           InvalidOid,
     774         1561 :                                           OldHeap->rd_rel->relowner,
     775              :                                           NewAccessMethod,
     776              :                                           OldHeapDesc,
     777              :                                           NIL,
     778              :                                           RELKIND_RELATION,
     779              :                                           relpersistence,
     780              :                                           false,
     781         1561 :                                           RelationIsMapped(OldHeap),
     782              :                                           ONCOMMIT_NOOP,
     783              :                                           reloptions,
     784              :                                           false,
     785              :                                           true,
     786              :                                           true,
     787              :                                           OIDOldHeap,
     788         1561 :                                           NULL);
     789              :     Assert(OIDNewHeap != InvalidOid);
     790              : 
     791         1561 :     ReleaseSysCache(tuple);
     792              : 
     793              :     /*
     794              :      * Advance command counter so that the newly-created relation's catalog
     795              :      * tuples will be visible to table_open.
     796              :      */
     797         1561 :     CommandCounterIncrement();
     798              : 
     799              :     /*
     800              :      * If necessary, create a TOAST table for the new relation.
     801              :      *
     802              :      * If the relation doesn't have a TOAST table already, we can't need one
     803              :      * for the new relation.  The other way around is possible though: if some
     804              :      * wide columns have been dropped, NewHeapCreateToastTable can decide that
     805              :      * no TOAST table is needed for the new table.
     806              :      *
     807              :      * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
     808              :      * that the TOAST table will be visible for insertion.
     809              :      */
     810         1561 :     toastid = OldHeap->rd_rel->reltoastrelid;
     811         1561 :     if (OidIsValid(toastid))
     812              :     {
     813              :         /* keep the existing toast table's reloptions, if any */
     814          552 :         tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
     815          552 :         if (!HeapTupleIsValid(tuple))
     816            0 :             elog(ERROR, "cache lookup failed for relation %u", toastid);
     817          552 :         reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     818              :                                      &isNull);
     819          552 :         if (isNull)
     820          552 :             reloptions = (Datum) 0;
     821              : 
     822          552 :         NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
     823              : 
     824          552 :         ReleaseSysCache(tuple);
     825              :     }
     826              : 
     827         1561 :     table_close(OldHeap, NoLock);
     828              : 
     829         1561 :     return OIDNewHeap;
     830              : }
     831              : 
     832              : /*
     833              :  * Do the physical copying of table data.
     834              :  *
     835              :  * There are three output parameters:
     836              :  * *pSwapToastByContent is set true if toast tables must be swapped by content.
     837              :  * *pFreezeXid receives the TransactionId used as freeze cutoff point.
     838              :  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
     839              :  */
     840              : static void
     841          389 : copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verbose,
     842              :                 bool *pSwapToastByContent, TransactionId *pFreezeXid,
     843              :                 MultiXactId *pCutoffMulti)
     844              : {
     845              :     Relation    relRelation;
     846              :     HeapTuple   reltup;
     847              :     Form_pg_class relform;
     848              :     TupleDesc   oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
     849              :     TupleDesc   newTupDesc PG_USED_FOR_ASSERTS_ONLY;
     850              :     VacuumParams params;
     851              :     struct VacuumCutoffs cutoffs;
     852              :     bool        use_sort;
     853          389 :     double      num_tuples = 0,
     854          389 :                 tups_vacuumed = 0,
     855          389 :                 tups_recently_dead = 0;
     856              :     BlockNumber num_pages;
     857          389 :     int         elevel = verbose ? INFO : DEBUG2;
     858              :     PGRUsage    ru0;
     859              :     char       *nspname;
     860              : 
     861          389 :     pg_rusage_init(&ru0);
     862              : 
     863              :     /* Store a copy of the namespace name for logging purposes */
     864          389 :     nspname = get_namespace_name(RelationGetNamespace(OldHeap));
     865              : 
     866              :     /*
     867              :      * Their tuple descriptors should be exactly alike, but here we only need
     868              :      * assume that they have the same number of columns.
     869              :      */
     870          389 :     oldTupDesc = RelationGetDescr(OldHeap);
     871          389 :     newTupDesc = RelationGetDescr(NewHeap);
     872              :     Assert(newTupDesc->natts == oldTupDesc->natts);
     873              : 
     874              :     /*
     875              :      * If the OldHeap has a toast table, get lock on the toast table to keep
     876              :      * it from being vacuumed.  This is needed because autovacuum processes
     877              :      * toast tables independently of their main tables, with no lock on the
     878              :      * latter.  If an autovacuum were to start on the toast table after we
     879              :      * compute our OldestXmin below, it would use a later OldestXmin, and then
     880              :      * possibly remove as DEAD toast tuples belonging to main tuples we think
     881              :      * are only RECENTLY_DEAD.  Then we'd fail while trying to copy those
     882              :      * tuples.
     883              :      *
     884              :      * We don't need to open the toast relation here, just lock it.  The lock
     885              :      * will be held till end of transaction.
     886              :      */
     887          389 :     if (OldHeap->rd_rel->reltoastrelid)
     888          124 :         LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
     889              : 
     890              :     /*
     891              :      * If both tables have TOAST tables, perform toast swap by content.  It is
     892              :      * possible that the old table has a toast table but the new one doesn't,
     893              :      * if toastable columns have been dropped.  In that case we have to do
     894              :      * swap by links.  This is okay because swap by content is only essential
     895              :      * for system catalogs, and we don't support schema changes for them.
     896              :      */
     897          389 :     if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
     898              :     {
     899          124 :         *pSwapToastByContent = true;
     900              : 
     901              :         /*
     902              :          * When doing swap by content, any toast pointers written into NewHeap
     903              :          * must use the old toast table's OID, because that's where the toast
     904              :          * data will eventually be found.  Set this up by setting rd_toastoid.
     905              :          * This also tells toast_save_datum() to preserve the toast value
     906              :          * OIDs, which we want so as not to invalidate toast pointers in
     907              :          * system catalog caches, and to avoid making multiple copies of a
     908              :          * single toast value.
     909              :          *
     910              :          * Note that we must hold NewHeap open until we are done writing data,
     911              :          * since the relcache will not guarantee to remember this setting once
     912              :          * the relation is closed.  Also, this technique depends on the fact
     913              :          * that no one will try to read from the NewHeap until after we've
     914              :          * finished writing it and swapping the rels --- otherwise they could
     915              :          * follow the toast pointers to the wrong place.  (It would actually
     916              :          * work for values copied over from the old toast table, but not for
     917              :          * any values that we toast which were previously not toasted.)
     918              :          */
     919          124 :         NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
     920              :     }
     921              :     else
     922          265 :         *pSwapToastByContent = false;
     923              : 
     924              :     /*
     925              :      * Compute xids used to freeze and weed out dead tuples and multixacts.
     926              :      * Since we're going to rewrite the whole table anyway, there's no reason
     927              :      * not to be aggressive about this.
     928              :      */
     929          389 :     memset(&params, 0, sizeof(VacuumParams));
     930          389 :     vacuum_get_cutoffs(OldHeap, &params, &cutoffs);
     931              : 
     932              :     /*
     933              :      * FreezeXid will become the table's new relfrozenxid, and that mustn't go
     934              :      * backwards, so take the max.
     935              :      */
     936              :     {
     937          389 :         TransactionId relfrozenxid = OldHeap->rd_rel->relfrozenxid;
     938              : 
     939          778 :         if (TransactionIdIsValid(relfrozenxid) &&
     940          389 :             TransactionIdPrecedes(cutoffs.FreezeLimit, relfrozenxid))
     941           59 :             cutoffs.FreezeLimit = relfrozenxid;
     942              :     }
     943              : 
     944              :     /*
     945              :      * MultiXactCutoff, similarly, shouldn't go backwards either.
     946              :      */
     947              :     {
     948          389 :         MultiXactId relminmxid = OldHeap->rd_rel->relminmxid;
     949              : 
     950          778 :         if (MultiXactIdIsValid(relminmxid) &&
     951          389 :             MultiXactIdPrecedes(cutoffs.MultiXactCutoff, relminmxid))
     952            0 :             cutoffs.MultiXactCutoff = relminmxid;
     953              :     }
     954              : 
     955              :     /*
     956              :      * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
     957              :      * the OldHeap.  We know how to use a sort to duplicate the ordering of a
     958              :      * btree index, and will use seqscan-and-sort for that case if the planner
     959              :      * tells us it's cheaper.  Otherwise, always indexscan if an index is
     960              :      * provided, else plain seqscan.
     961              :      */
     962          389 :     if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
     963          139 :         use_sort = plan_cluster_use_sort(RelationGetRelid(OldHeap),
     964              :                                          RelationGetRelid(OldIndex));
     965              :     else
     966          250 :         use_sort = false;
     967              : 
     968              :     /* Log what we're doing */
     969          389 :     if (OldIndex != NULL && !use_sort)
     970           59 :         ereport(elevel,
     971              :                 errmsg("repacking \"%s.%s\" using index scan on \"%s\"",
     972              :                        nspname,
     973              :                        RelationGetRelationName(OldHeap),
     974              :                        RelationGetRelationName(OldIndex)));
     975          330 :     else if (use_sort)
     976           80 :         ereport(elevel,
     977              :                 errmsg("repacking \"%s.%s\" using sequential scan and sort",
     978              :                        nspname,
     979              :                        RelationGetRelationName(OldHeap)));
     980              :     else
     981          250 :         ereport(elevel,
     982              :                 errmsg("repacking \"%s.%s\" in physical order",
     983              :                        nspname,
     984              :                        RelationGetRelationName(OldHeap)));
     985              : 
     986              :     /*
     987              :      * Hand off the actual copying to AM specific function, the generic code
     988              :      * cannot know how to deal with visibility across AMs. Note that this
     989              :      * routine is allowed to set FreezeXid / MultiXactCutoff to different
     990              :      * values (e.g. because the AM doesn't use freezing).
     991              :      */
     992          389 :     table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
     993              :                                     cutoffs.OldestXmin, &cutoffs.FreezeLimit,
     994              :                                     &cutoffs.MultiXactCutoff,
     995              :                                     &num_tuples, &tups_vacuumed,
     996              :                                     &tups_recently_dead);
     997              : 
     998              :     /* return selected values to caller, get set as relfrozenxid/minmxid */
     999          389 :     *pFreezeXid = cutoffs.FreezeLimit;
    1000          389 :     *pCutoffMulti = cutoffs.MultiXactCutoff;
    1001              : 
    1002              :     /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
    1003          389 :     NewHeap->rd_toastoid = InvalidOid;
    1004              : 
    1005          389 :     num_pages = RelationGetNumberOfBlocks(NewHeap);
    1006              : 
    1007              :     /* Log what we did */
    1008          389 :     ereport(elevel,
    1009              :             (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
    1010              :                     nspname,
    1011              :                     RelationGetRelationName(OldHeap),
    1012              :                     tups_vacuumed, num_tuples,
    1013              :                     RelationGetNumberOfBlocks(OldHeap)),
    1014              :              errdetail("%.0f dead row versions cannot be removed yet.\n"
    1015              :                        "%s.",
    1016              :                        tups_recently_dead,
    1017              :                        pg_rusage_show(&ru0))));
    1018              : 
    1019              :     /* Update pg_class to reflect the correct values of pages and tuples. */
    1020          389 :     relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1021              : 
    1022          389 :     reltup = SearchSysCacheCopy1(RELOID,
    1023              :                                  ObjectIdGetDatum(RelationGetRelid(NewHeap)));
    1024          389 :     if (!HeapTupleIsValid(reltup))
    1025            0 :         elog(ERROR, "cache lookup failed for relation %u",
    1026              :              RelationGetRelid(NewHeap));
    1027          389 :     relform = (Form_pg_class) GETSTRUCT(reltup);
    1028              : 
    1029          389 :     relform->relpages = num_pages;
    1030          389 :     relform->reltuples = num_tuples;
    1031              : 
    1032              :     /* Don't update the stats for pg_class.  See swap_relation_files. */
    1033          389 :     if (RelationGetRelid(OldHeap) != RelationRelationId)
    1034          366 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1035              :     else
    1036           23 :         CacheInvalidateRelcacheByTuple(reltup);
    1037              : 
    1038              :     /* Clean up. */
    1039          389 :     heap_freetuple(reltup);
    1040          389 :     table_close(relRelation, RowExclusiveLock);
    1041              : 
    1042              :     /* Make the update visible */
    1043          389 :     CommandCounterIncrement();
    1044          389 : }
    1045              : 
    1046              : /*
    1047              :  * Swap the physical files of two given relations.
    1048              :  *
    1049              :  * We swap the physical identity (reltablespace, relfilenumber) while keeping
    1050              :  * the same logical identities of the two relations.  relpersistence is also
    1051              :  * swapped, which is critical since it determines where buffers live for each
    1052              :  * relation.
    1053              :  *
    1054              :  * We can swap associated TOAST data in either of two ways: recursively swap
    1055              :  * the physical content of the toast tables (and their indexes), or swap the
    1056              :  * TOAST links in the given relations' pg_class entries.  The former is needed
    1057              :  * to manage rewrites of shared catalogs (where we cannot change the pg_class
    1058              :  * links) while the latter is the only way to handle cases in which a toast
    1059              :  * table is added or removed altogether.
    1060              :  *
    1061              :  * Additionally, the first relation is marked with relfrozenxid set to
    1062              :  * frozenXid.  It seems a bit ugly to have this here, but the caller would
    1063              :  * have to do it anyway, so having it here saves a heap_update.  Note: in
    1064              :  * the swap-toast-links case, we assume we don't need to change the toast
    1065              :  * table's relfrozenxid: the new version of the toast table should already
    1066              :  * have relfrozenxid set to RecentXmin, which is good enough.
    1067              :  *
    1068              :  * Lastly, if r2 and its toast table and toast index (if any) are mapped,
    1069              :  * their OIDs are emitted into mapped_tables[].  This is hacky but beats
    1070              :  * having to look the information up again later in finish_heap_swap.
    1071              :  */
    1072              : static void
    1073         1676 : swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
    1074              :                     bool swap_toast_by_content,
    1075              :                     bool is_internal,
    1076              :                     TransactionId frozenXid,
    1077              :                     MultiXactId cutoffMulti,
    1078              :                     Oid *mapped_tables)
    1079              : {
    1080              :     Relation    relRelation;
    1081              :     HeapTuple   reltup1,
    1082              :                 reltup2;
    1083              :     Form_pg_class relform1,
    1084              :                 relform2;
    1085              :     RelFileNumber relfilenumber1,
    1086              :                 relfilenumber2;
    1087              :     RelFileNumber swaptemp;
    1088              :     char        swptmpchr;
    1089              :     Oid         relam1,
    1090              :                 relam2;
    1091              : 
    1092              :     /* We need writable copies of both pg_class tuples. */
    1093         1676 :     relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1094              : 
    1095         1676 :     reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
    1096         1676 :     if (!HeapTupleIsValid(reltup1))
    1097            0 :         elog(ERROR, "cache lookup failed for relation %u", r1);
    1098         1676 :     relform1 = (Form_pg_class) GETSTRUCT(reltup1);
    1099              : 
    1100         1676 :     reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
    1101         1676 :     if (!HeapTupleIsValid(reltup2))
    1102            0 :         elog(ERROR, "cache lookup failed for relation %u", r2);
    1103         1676 :     relform2 = (Form_pg_class) GETSTRUCT(reltup2);
    1104              : 
    1105         1676 :     relfilenumber1 = relform1->relfilenode;
    1106         1676 :     relfilenumber2 = relform2->relfilenode;
    1107         1676 :     relam1 = relform1->relam;
    1108         1676 :     relam2 = relform2->relam;
    1109              : 
    1110         1676 :     if (RelFileNumberIsValid(relfilenumber1) &&
    1111              :         RelFileNumberIsValid(relfilenumber2))
    1112              :     {
    1113              :         /*
    1114              :          * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
    1115              :          * relpersistence
    1116              :          */
    1117              :         Assert(!target_is_pg_class);
    1118              : 
    1119         1587 :         swaptemp = relform1->relfilenode;
    1120         1587 :         relform1->relfilenode = relform2->relfilenode;
    1121         1587 :         relform2->relfilenode = swaptemp;
    1122              : 
    1123         1587 :         swaptemp = relform1->reltablespace;
    1124         1587 :         relform1->reltablespace = relform2->reltablespace;
    1125         1587 :         relform2->reltablespace = swaptemp;
    1126              : 
    1127         1587 :         swaptemp = relform1->relam;
    1128         1587 :         relform1->relam = relform2->relam;
    1129         1587 :         relform2->relam = swaptemp;
    1130              : 
    1131         1587 :         swptmpchr = relform1->relpersistence;
    1132         1587 :         relform1->relpersistence = relform2->relpersistence;
    1133         1587 :         relform2->relpersistence = swptmpchr;
    1134              : 
    1135              :         /* Also swap toast links, if we're swapping by links */
    1136         1587 :         if (!swap_toast_by_content)
    1137              :         {
    1138         1275 :             swaptemp = relform1->reltoastrelid;
    1139         1275 :             relform1->reltoastrelid = relform2->reltoastrelid;
    1140         1275 :             relform2->reltoastrelid = swaptemp;
    1141              :         }
    1142              :     }
    1143              :     else
    1144              :     {
    1145              :         /*
    1146              :          * Mapped-relation case.  Here we have to swap the relation mappings
    1147              :          * instead of modifying the pg_class columns.  Both must be mapped.
    1148              :          */
    1149           89 :         if (RelFileNumberIsValid(relfilenumber1) ||
    1150              :             RelFileNumberIsValid(relfilenumber2))
    1151            0 :             elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
    1152              :                  NameStr(relform1->relname));
    1153              : 
    1154              :         /*
    1155              :          * We can't change the tablespace nor persistence of a mapped rel, and
    1156              :          * we can't handle toast link swapping for one either, because we must
    1157              :          * not apply any critical changes to its pg_class row.  These cases
    1158              :          * should be prevented by upstream permissions tests, so these checks
    1159              :          * are non-user-facing emergency backstop.
    1160              :          */
    1161           89 :         if (relform1->reltablespace != relform2->reltablespace)
    1162            0 :             elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
    1163              :                  NameStr(relform1->relname));
    1164           89 :         if (relform1->relpersistence != relform2->relpersistence)
    1165            0 :             elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
    1166              :                  NameStr(relform1->relname));
    1167           89 :         if (relform1->relam != relform2->relam)
    1168            0 :             elog(ERROR, "cannot change access method of mapped relation \"%s\"",
    1169              :                  NameStr(relform1->relname));
    1170           89 :         if (!swap_toast_by_content &&
    1171           29 :             (relform1->reltoastrelid || relform2->reltoastrelid))
    1172            0 :             elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
    1173              :                  NameStr(relform1->relname));
    1174              : 
    1175              :         /*
    1176              :          * Fetch the mappings --- shouldn't fail, but be paranoid
    1177              :          */
    1178           89 :         relfilenumber1 = RelationMapOidToFilenumber(r1, relform1->relisshared);
    1179           89 :         if (!RelFileNumberIsValid(relfilenumber1))
    1180            0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1181              :                  NameStr(relform1->relname), r1);
    1182           89 :         relfilenumber2 = RelationMapOidToFilenumber(r2, relform2->relisshared);
    1183           89 :         if (!RelFileNumberIsValid(relfilenumber2))
    1184            0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1185              :                  NameStr(relform2->relname), r2);
    1186              : 
    1187              :         /*
    1188              :          * Send replacement mappings to relmapper.  Note these won't actually
    1189              :          * take effect until CommandCounterIncrement.
    1190              :          */
    1191           89 :         RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false);
    1192           89 :         RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false);
    1193              : 
    1194              :         /* Pass OIDs of mapped r2 tables back to caller */
    1195           89 :         *mapped_tables++ = r2;
    1196              :     }
    1197              : 
    1198              :     /*
    1199              :      * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
    1200              :      * subtransaction. The rel2 storage (swapped from rel1) may or may not be
    1201              :      * new.
    1202              :      */
    1203              :     {
    1204              :         Relation    rel1,
    1205              :                     rel2;
    1206              : 
    1207         1676 :         rel1 = relation_open(r1, NoLock);
    1208         1676 :         rel2 = relation_open(r2, NoLock);
    1209         1676 :         rel2->rd_createSubid = rel1->rd_createSubid;
    1210         1676 :         rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid;
    1211         1676 :         rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid;
    1212         1676 :         RelationAssumeNewRelfilelocator(rel1);
    1213         1676 :         relation_close(rel1, NoLock);
    1214         1676 :         relation_close(rel2, NoLock);
    1215              :     }
    1216              : 
    1217              :     /*
    1218              :      * In the case of a shared catalog, these next few steps will only affect
    1219              :      * our own database's pg_class row; but that's okay, because they are all
    1220              :      * noncritical updates.  That's also an important fact for the case of a
    1221              :      * mapped catalog, because it's possible that we'll commit the map change
    1222              :      * and then fail to commit the pg_class update.
    1223              :      */
    1224              : 
    1225              :     /* set rel1's frozen Xid and minimum MultiXid */
    1226         1676 :     if (relform1->relkind != RELKIND_INDEX)
    1227              :     {
    1228              :         Assert(!TransactionIdIsValid(frozenXid) ||
    1229              :                TransactionIdIsNormal(frozenXid));
    1230         1552 :         relform1->relfrozenxid = frozenXid;
    1231         1552 :         relform1->relminmxid = cutoffMulti;
    1232              :     }
    1233              : 
    1234              :     /* swap size statistics too, since new rel has freshly-updated stats */
    1235              :     {
    1236              :         int32       swap_pages;
    1237              :         float4      swap_tuples;
    1238              :         int32       swap_allvisible;
    1239              :         int32       swap_allfrozen;
    1240              : 
    1241         1676 :         swap_pages = relform1->relpages;
    1242         1676 :         relform1->relpages = relform2->relpages;
    1243         1676 :         relform2->relpages = swap_pages;
    1244              : 
    1245         1676 :         swap_tuples = relform1->reltuples;
    1246         1676 :         relform1->reltuples = relform2->reltuples;
    1247         1676 :         relform2->reltuples = swap_tuples;
    1248              : 
    1249         1676 :         swap_allvisible = relform1->relallvisible;
    1250         1676 :         relform1->relallvisible = relform2->relallvisible;
    1251         1676 :         relform2->relallvisible = swap_allvisible;
    1252              : 
    1253         1676 :         swap_allfrozen = relform1->relallfrozen;
    1254         1676 :         relform1->relallfrozen = relform2->relallfrozen;
    1255         1676 :         relform2->relallfrozen = swap_allfrozen;
    1256              :     }
    1257              : 
    1258              :     /*
    1259              :      * Update the tuples in pg_class --- unless the target relation of the
    1260              :      * swap is pg_class itself.  In that case, there is zero point in making
    1261              :      * changes because we'd be updating the old data that we're about to throw
    1262              :      * away.  Because the real work being done here for a mapped relation is
    1263              :      * just to change the relation map settings, it's all right to not update
    1264              :      * the pg_class rows in this case. The most important changes will instead
    1265              :      * performed later, in finish_heap_swap() itself.
    1266              :      */
    1267         1676 :     if (!target_is_pg_class)
    1268              :     {
    1269              :         CatalogIndexState indstate;
    1270              : 
    1271         1653 :         indstate = CatalogOpenIndexes(relRelation);
    1272         1653 :         CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
    1273              :                                    indstate);
    1274         1653 :         CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
    1275              :                                    indstate);
    1276         1653 :         CatalogCloseIndexes(indstate);
    1277              :     }
    1278              :     else
    1279              :     {
    1280              :         /* no update ... but we do still need relcache inval */
    1281           23 :         CacheInvalidateRelcacheByTuple(reltup1);
    1282           23 :         CacheInvalidateRelcacheByTuple(reltup2);
    1283              :     }
    1284              : 
    1285              :     /*
    1286              :      * Now that pg_class has been updated with its relevant information for
    1287              :      * the swap, update the dependency of the relations to point to their new
    1288              :      * table AM, if it has changed.
    1289              :      */
    1290         1676 :     if (relam1 != relam2)
    1291              :     {
    1292           24 :         if (changeDependencyFor(RelationRelationId,
    1293              :                                 r1,
    1294              :                                 AccessMethodRelationId,
    1295              :                                 relam1,
    1296              :                                 relam2) != 1)
    1297            0 :             elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
    1298              :                  get_namespace_name(get_rel_namespace(r1)),
    1299              :                  get_rel_name(r1));
    1300           24 :         if (changeDependencyFor(RelationRelationId,
    1301              :                                 r2,
    1302              :                                 AccessMethodRelationId,
    1303              :                                 relam2,
    1304              :                                 relam1) != 1)
    1305            0 :             elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
    1306              :                  get_namespace_name(get_rel_namespace(r2)),
    1307              :                  get_rel_name(r2));
    1308              :     }
    1309              : 
    1310              :     /*
    1311              :      * Post alter hook for modified relations. The change to r2 is always
    1312              :      * internal, but r1 depends on the invocation context.
    1313              :      */
    1314         1676 :     InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
    1315              :                                  InvalidOid, is_internal);
    1316         1676 :     InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
    1317              :                                  InvalidOid, true);
    1318              : 
    1319              :     /*
    1320              :      * If we have toast tables associated with the relations being swapped,
    1321              :      * deal with them too.
    1322              :      */
    1323         1676 :     if (relform1->reltoastrelid || relform2->reltoastrelid)
    1324              :     {
    1325          523 :         if (swap_toast_by_content)
    1326              :         {
    1327          124 :             if (relform1->reltoastrelid && relform2->reltoastrelid)
    1328              :             {
    1329              :                 /* Recursively swap the contents of the toast tables */
    1330          124 :                 swap_relation_files(relform1->reltoastrelid,
    1331              :                                     relform2->reltoastrelid,
    1332              :                                     target_is_pg_class,
    1333              :                                     swap_toast_by_content,
    1334              :                                     is_internal,
    1335              :                                     frozenXid,
    1336              :                                     cutoffMulti,
    1337              :                                     mapped_tables);
    1338              :             }
    1339              :             else
    1340              :             {
    1341              :                 /* caller messed up */
    1342            0 :                 elog(ERROR, "cannot swap toast files by content when there's only one");
    1343              :             }
    1344              :         }
    1345              :         else
    1346              :         {
    1347              :             /*
    1348              :              * We swapped the ownership links, so we need to change dependency
    1349              :              * data to match.
    1350              :              *
    1351              :              * NOTE: it is possible that only one table has a toast table.
    1352              :              *
    1353              :              * NOTE: at present, a TOAST table's only dependency is the one on
    1354              :              * its owning table.  If more are ever created, we'd need to use
    1355              :              * something more selective than deleteDependencyRecordsFor() to
    1356              :              * get rid of just the link we want.
    1357              :              */
    1358              :             ObjectAddress baseobject,
    1359              :                         toastobject;
    1360              :             long        count;
    1361              : 
    1362              :             /*
    1363              :              * We disallow this case for system catalogs, to avoid the
    1364              :              * possibility that the catalog we're rebuilding is one of the
    1365              :              * ones the dependency changes would change.  It's too late to be
    1366              :              * making any data changes to the target catalog.
    1367              :              */
    1368          399 :             if (IsSystemClass(r1, relform1))
    1369            0 :                 elog(ERROR, "cannot swap toast files by links for system catalogs");
    1370              : 
    1371              :             /* Delete old dependencies */
    1372          399 :             if (relform1->reltoastrelid)
    1373              :             {
    1374          378 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1375              :                                                    relform1->reltoastrelid,
    1376              :                                                    false);
    1377          378 :                 if (count != 1)
    1378            0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1379              :                          count);
    1380              :             }
    1381          399 :             if (relform2->reltoastrelid)
    1382              :             {
    1383          399 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1384              :                                                    relform2->reltoastrelid,
    1385              :                                                    false);
    1386          399 :                 if (count != 1)
    1387            0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1388              :                          count);
    1389              :             }
    1390              : 
    1391              :             /* Register new dependencies */
    1392          399 :             baseobject.classId = RelationRelationId;
    1393          399 :             baseobject.objectSubId = 0;
    1394          399 :             toastobject.classId = RelationRelationId;
    1395          399 :             toastobject.objectSubId = 0;
    1396              : 
    1397          399 :             if (relform1->reltoastrelid)
    1398              :             {
    1399          378 :                 baseobject.objectId = r1;
    1400          378 :                 toastobject.objectId = relform1->reltoastrelid;
    1401          378 :                 recordDependencyOn(&toastobject, &baseobject,
    1402              :                                    DEPENDENCY_INTERNAL);
    1403              :             }
    1404              : 
    1405          399 :             if (relform2->reltoastrelid)
    1406              :             {
    1407          399 :                 baseobject.objectId = r2;
    1408          399 :                 toastobject.objectId = relform2->reltoastrelid;
    1409          399 :                 recordDependencyOn(&toastobject, &baseobject,
    1410              :                                    DEPENDENCY_INTERNAL);
    1411              :             }
    1412              :         }
    1413              :     }
    1414              : 
    1415              :     /*
    1416              :      * If we're swapping two toast tables by content, do the same for their
    1417              :      * valid index. The swap can actually be safely done only if the relations
    1418              :      * have indexes.
    1419              :      */
    1420         1676 :     if (swap_toast_by_content &&
    1421          372 :         relform1->relkind == RELKIND_TOASTVALUE &&
    1422          124 :         relform2->relkind == RELKIND_TOASTVALUE)
    1423              :     {
    1424              :         Oid         toastIndex1,
    1425              :                     toastIndex2;
    1426              : 
    1427              :         /* Get valid index for each relation */
    1428          124 :         toastIndex1 = toast_get_valid_index(r1,
    1429              :                                             AccessExclusiveLock);
    1430          124 :         toastIndex2 = toast_get_valid_index(r2,
    1431              :                                             AccessExclusiveLock);
    1432              : 
    1433          124 :         swap_relation_files(toastIndex1,
    1434              :                             toastIndex2,
    1435              :                             target_is_pg_class,
    1436              :                             swap_toast_by_content,
    1437              :                             is_internal,
    1438              :                             InvalidTransactionId,
    1439              :                             InvalidMultiXactId,
    1440              :                             mapped_tables);
    1441              :     }
    1442              : 
    1443              :     /* Clean up. */
    1444         1676 :     heap_freetuple(reltup1);
    1445         1676 :     heap_freetuple(reltup2);
    1446              : 
    1447         1676 :     table_close(relRelation, RowExclusiveLock);
    1448         1676 : }
    1449              : 
    1450              : /*
    1451              :  * Remove the transient table that was built by make_new_heap, and finish
    1452              :  * cleaning up (including rebuilding all indexes on the old heap).
    1453              :  */
    1454              : void
    1455         1428 : finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
    1456              :                  bool is_system_catalog,
    1457              :                  bool swap_toast_by_content,
    1458              :                  bool check_constraints,
    1459              :                  bool is_internal,
    1460              :                  TransactionId frozenXid,
    1461              :                  MultiXactId cutoffMulti,
    1462              :                  char newrelpersistence)
    1463              : {
    1464              :     ObjectAddress object;
    1465              :     Oid         mapped_tables[4];
    1466              :     int         reindex_flags;
    1467         1428 :     ReindexParams reindex_params = {0};
    1468              :     int         i;
    1469              : 
    1470              :     /* Report that we are now swapping relation files */
    1471         1428 :     pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
    1472              :                                  PROGRESS_REPACK_PHASE_SWAP_REL_FILES);
    1473              : 
    1474              :     /* Zero out possible results from swapped_relation_files */
    1475         1428 :     memset(mapped_tables, 0, sizeof(mapped_tables));
    1476              : 
    1477              :     /*
    1478              :      * Swap the contents of the heap relations (including any toast tables).
    1479              :      * Also set old heap's relfrozenxid to frozenXid.
    1480              :      */
    1481         1428 :     swap_relation_files(OIDOldHeap, OIDNewHeap,
    1482              :                         (OIDOldHeap == RelationRelationId),
    1483              :                         swap_toast_by_content, is_internal,
    1484              :                         frozenXid, cutoffMulti, mapped_tables);
    1485              : 
    1486              :     /*
    1487              :      * If it's a system catalog, queue a sinval message to flush all catcaches
    1488              :      * on the catalog when we reach CommandCounterIncrement.
    1489              :      */
    1490         1428 :     if (is_system_catalog)
    1491          121 :         CacheInvalidateCatalog(OIDOldHeap);
    1492              : 
    1493              :     /*
    1494              :      * Rebuild each index on the relation (but not the toast table, which is
    1495              :      * all-new at this point).  It is important to do this before the DROP
    1496              :      * step because if we are processing a system catalog that will be used
    1497              :      * during DROP, we want to have its indexes available.  There is no
    1498              :      * advantage to the other order anyway because this is all transactional,
    1499              :      * so no chance to reclaim disk space before commit.  We do not need a
    1500              :      * final CommandCounterIncrement() because reindex_relation does it.
    1501              :      *
    1502              :      * Note: because index_build is called via reindex_relation, it will never
    1503              :      * set indcheckxmin true for the indexes.  This is OK even though in some
    1504              :      * sense we are building new indexes rather than rebuilding existing ones,
    1505              :      * because the new heap won't contain any HOT chains at all, let alone
    1506              :      * broken ones, so it can't be necessary to set indcheckxmin.
    1507              :      */
    1508         1428 :     reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
    1509         1428 :     if (check_constraints)
    1510         1039 :         reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
    1511              : 
    1512              :     /*
    1513              :      * Ensure that the indexes have the same persistence as the parent
    1514              :      * relation.
    1515              :      */
    1516         1428 :     if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
    1517           25 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
    1518         1403 :     else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
    1519         1350 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
    1520              : 
    1521              :     /* Report that we are now reindexing relations */
    1522         1428 :     pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
    1523              :                                  PROGRESS_REPACK_PHASE_REBUILD_INDEX);
    1524              : 
    1525         1428 :     reindex_relation(NULL, OIDOldHeap, reindex_flags, &reindex_params);
    1526              : 
    1527              :     /* Report that we are now doing clean up */
    1528         1416 :     pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
    1529              :                                  PROGRESS_REPACK_PHASE_FINAL_CLEANUP);
    1530              : 
    1531              :     /*
    1532              :      * If the relation being rebuilt is pg_class, swap_relation_files()
    1533              :      * couldn't update pg_class's own pg_class entry (check comments in
    1534              :      * swap_relation_files()), thus relfrozenxid was not updated. That's
    1535              :      * annoying because a potential reason for doing a VACUUM FULL is a
    1536              :      * imminent or actual anti-wraparound shutdown.  So, now that we can
    1537              :      * access the new relation using its indices, update relfrozenxid.
    1538              :      * pg_class doesn't have a toast relation, so we don't need to update the
    1539              :      * corresponding toast relation. Not that there's little point moving all
    1540              :      * relfrozenxid updates here since swap_relation_files() needs to write to
    1541              :      * pg_class for non-mapped relations anyway.
    1542              :      */
    1543         1416 :     if (OIDOldHeap == RelationRelationId)
    1544              :     {
    1545              :         Relation    relRelation;
    1546              :         HeapTuple   reltup;
    1547              :         Form_pg_class relform;
    1548              : 
    1549           23 :         relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1550              : 
    1551           23 :         reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
    1552           23 :         if (!HeapTupleIsValid(reltup))
    1553            0 :             elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
    1554           23 :         relform = (Form_pg_class) GETSTRUCT(reltup);
    1555              : 
    1556           23 :         relform->relfrozenxid = frozenXid;
    1557           23 :         relform->relminmxid = cutoffMulti;
    1558              : 
    1559           23 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1560              : 
    1561           23 :         table_close(relRelation, RowExclusiveLock);
    1562              :     }
    1563              : 
    1564              :     /* Destroy new heap with old filenumber */
    1565         1416 :     object.classId = RelationRelationId;
    1566         1416 :     object.objectId = OIDNewHeap;
    1567         1416 :     object.objectSubId = 0;
    1568              : 
    1569              :     /*
    1570              :      * The new relation is local to our transaction and we know nothing
    1571              :      * depends on it, so DROP_RESTRICT should be OK.
    1572              :      */
    1573         1416 :     performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
    1574              : 
    1575              :     /* performDeletion does CommandCounterIncrement at end */
    1576              : 
    1577              :     /*
    1578              :      * Now we must remove any relation mapping entries that we set up for the
    1579              :      * transient table, as well as its toast table and toast index if any. If
    1580              :      * we fail to do this before commit, the relmapper will complain about new
    1581              :      * permanent map entries being added post-bootstrap.
    1582              :      */
    1583         1505 :     for (i = 0; OidIsValid(mapped_tables[i]); i++)
    1584           89 :         RelationMapRemoveMapping(mapped_tables[i]);
    1585              : 
    1586              :     /*
    1587              :      * At this point, everything is kosher except that, if we did toast swap
    1588              :      * by links, the toast table's name corresponds to the transient table.
    1589              :      * The name is irrelevant to the backend because it's referenced by OID,
    1590              :      * but users looking at the catalogs could be confused.  Rename it to
    1591              :      * prevent this problem.
    1592              :      *
    1593              :      * Note no lock required on the relation, because we already hold an
    1594              :      * exclusive lock on it.
    1595              :      */
    1596         1416 :     if (!swap_toast_by_content)
    1597              :     {
    1598              :         Relation    newrel;
    1599              : 
    1600         1292 :         newrel = table_open(OIDOldHeap, NoLock);
    1601         1292 :         if (OidIsValid(newrel->rd_rel->reltoastrelid))
    1602              :         {
    1603              :             Oid         toastidx;
    1604              :             char        NewToastName[NAMEDATALEN];
    1605              : 
    1606              :             /* Get the associated valid index to be renamed */
    1607          378 :             toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
    1608              :                                              NoLock);
    1609              : 
    1610              :             /* rename the toast table ... */
    1611          378 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
    1612              :                      OIDOldHeap);
    1613          378 :             RenameRelationInternal(newrel->rd_rel->reltoastrelid,
    1614              :                                    NewToastName, true, false);
    1615              : 
    1616              :             /* ... and its valid index too. */
    1617          378 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
    1618              :                      OIDOldHeap);
    1619              : 
    1620          378 :             RenameRelationInternal(toastidx,
    1621              :                                    NewToastName, true, true);
    1622              : 
    1623              :             /*
    1624              :              * Reset the relrewrite for the toast. The command-counter
    1625              :              * increment is required here as we are about to update the tuple
    1626              :              * that is updated as part of RenameRelationInternal.
    1627              :              */
    1628          378 :             CommandCounterIncrement();
    1629          378 :             ResetRelRewrite(newrel->rd_rel->reltoastrelid);
    1630              :         }
    1631         1292 :         relation_close(newrel, NoLock);
    1632              :     }
    1633              : 
    1634              :     /* if it's not a catalog table, clear any missing attribute settings */
    1635         1416 :     if (!is_system_catalog)
    1636              :     {
    1637              :         Relation    newrel;
    1638              : 
    1639         1295 :         newrel = table_open(OIDOldHeap, NoLock);
    1640         1295 :         RelationClearMissing(newrel);
    1641         1295 :         relation_close(newrel, NoLock);
    1642              :     }
    1643         1416 : }
    1644              : 
    1645              : /*
    1646              :  * Determine which relations to process, when REPACK/CLUSTER is called
    1647              :  * without specifying a table name.  The exact process depends on whether
    1648              :  * USING INDEX was given or not, and in any case we only return tables and
    1649              :  * materialized views that the current user has privileges to repack/cluster.
    1650              :  *
    1651              :  * If USING INDEX was given, we scan pg_index to find those that have
    1652              :  * indisclustered set; if it was not given, scan pg_class and return all
    1653              :  * tables.
    1654              :  *
    1655              :  * Return it as a list of RelToCluster in the given memory context.
    1656              :  */
    1657              : static List *
    1658           17 : get_tables_to_repack(RepackCommand cmd, bool usingindex, MemoryContext permcxt)
    1659              : {
    1660              :     Relation    catalog;
    1661              :     TableScanDesc scan;
    1662              :     HeapTuple   tuple;
    1663           17 :     List       *rtcs = NIL;
    1664              : 
    1665           17 :     if (usingindex)
    1666              :     {
    1667              :         ScanKeyData entry;
    1668              : 
    1669              :         /*
    1670              :          * For USING INDEX, scan pg_index to find those with indisclustered.
    1671              :          */
    1672           13 :         catalog = table_open(IndexRelationId, AccessShareLock);
    1673           13 :         ScanKeyInit(&entry,
    1674              :                     Anum_pg_index_indisclustered,
    1675              :                     BTEqualStrategyNumber, F_BOOLEQ,
    1676              :                     BoolGetDatum(true));
    1677           13 :         scan = table_beginscan_catalog(catalog, 1, &entry);
    1678           25 :         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1679              :         {
    1680              :             RelToCluster *rtc;
    1681              :             Form_pg_index index;
    1682              :             MemoryContext oldcxt;
    1683              : 
    1684           12 :             index = (Form_pg_index) GETSTRUCT(tuple);
    1685              : 
    1686              :             /*
    1687              :              * Try to obtain a light lock on the index's table, to ensure it
    1688              :              * doesn't go away while we collect the list.  If we cannot, just
    1689              :              * disregard it.  Be sure to release this if we ultimately decide
    1690              :              * not to process the table!
    1691              :              */
    1692           12 :             if (!ConditionalLockRelationOid(index->indrelid, AccessShareLock))
    1693            0 :                 continue;
    1694              : 
    1695              :             /* Verify that the table still exists; skip if not */
    1696           12 :             if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(index->indrelid)))
    1697              :             {
    1698            0 :                 UnlockRelationOid(index->indrelid, AccessShareLock);
    1699            0 :                 continue;
    1700              :             }
    1701              : 
    1702              :             /* noisily skip rels which the user can't process */
    1703           12 :             if (!repack_is_permitted_for_relation(cmd, index->indrelid,
    1704              :                                                   GetUserId()))
    1705              :             {
    1706            8 :                 UnlockRelationOid(index->indrelid, AccessShareLock);
    1707            8 :                 continue;
    1708              :             }
    1709              : 
    1710              :             /* Use a permanent memory context for the result list */
    1711            4 :             oldcxt = MemoryContextSwitchTo(permcxt);
    1712            4 :             rtc = palloc_object(RelToCluster);
    1713            4 :             rtc->tableOid = index->indrelid;
    1714            4 :             rtc->indexOid = index->indexrelid;
    1715            4 :             rtcs = lappend(rtcs, rtc);
    1716            4 :             MemoryContextSwitchTo(oldcxt);
    1717              :         }
    1718              :     }
    1719              :     else
    1720              :     {
    1721            4 :         catalog = table_open(RelationRelationId, AccessShareLock);
    1722            4 :         scan = table_beginscan_catalog(catalog, 0, NULL);
    1723              : 
    1724         6306 :         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1725              :         {
    1726              :             RelToCluster *rtc;
    1727              :             Form_pg_class class;
    1728              :             MemoryContext oldcxt;
    1729              : 
    1730         6302 :             class = (Form_pg_class) GETSTRUCT(tuple);
    1731              : 
    1732              :             /*
    1733              :              * Try to obtain a light lock on the table, to ensure it doesn't
    1734              :              * go away while we collect the list.  If we cannot, just
    1735              :              * disregard the table.  Be sure to release this if we ultimately
    1736              :              * decide not to process the table!
    1737              :              */
    1738         6302 :             if (!ConditionalLockRelationOid(class->oid, AccessShareLock))
    1739            0 :                 continue;
    1740              : 
    1741              :             /* Verify that the table still exists */
    1742         6302 :             if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(class->oid)))
    1743              :             {
    1744            6 :                 UnlockRelationOid(class->oid, AccessShareLock);
    1745            6 :                 continue;
    1746              :             }
    1747              : 
    1748              :             /* Can only process plain tables and matviews */
    1749         6296 :             if (class->relkind != RELKIND_RELATION &&
    1750         4208 :                 class->relkind != RELKIND_MATVIEW)
    1751              :             {
    1752         4180 :                 UnlockRelationOid(class->oid, AccessShareLock);
    1753         4180 :                 continue;
    1754              :             }
    1755              : 
    1756              :             /* noisily skip rels which the user can't process */
    1757         2116 :             if (!repack_is_permitted_for_relation(cmd, class->oid,
    1758              :                                                   GetUserId()))
    1759              :             {
    1760         2108 :                 UnlockRelationOid(class->oid, AccessShareLock);
    1761         2108 :                 continue;
    1762              :             }
    1763              : 
    1764              :             /* Use a permanent memory context for the result list */
    1765            8 :             oldcxt = MemoryContextSwitchTo(permcxt);
    1766            8 :             rtc = palloc_object(RelToCluster);
    1767            8 :             rtc->tableOid = class->oid;
    1768            8 :             rtc->indexOid = InvalidOid;
    1769            8 :             rtcs = lappend(rtcs, rtc);
    1770            8 :             MemoryContextSwitchTo(oldcxt);
    1771              :         }
    1772              :     }
    1773              : 
    1774           17 :     table_endscan(scan);
    1775           17 :     relation_close(catalog, AccessShareLock);
    1776              : 
    1777           17 :     return rtcs;
    1778              : }
    1779              : 
    1780              : /*
    1781              :  * Given a partitioned table or its index, return a list of RelToCluster for
    1782              :  * all the leaf child tables/indexes.
    1783              :  *
    1784              :  * 'rel_is_index' tells whether 'relid' is that of an index (true) or of the
    1785              :  * owning relation.
    1786              :  */
    1787              : static List *
    1788           20 : get_tables_to_repack_partitioned(RepackCommand cmd, Oid relid,
    1789              :                                  bool rel_is_index, MemoryContext permcxt)
    1790              : {
    1791              :     List       *inhoids;
    1792           20 :     List       *rtcs = NIL;
    1793              : 
    1794              :     /*
    1795              :      * Do not lock the children until they're processed.  Note that we do hold
    1796              :      * a lock on the parent partitioned table.
    1797              :      */
    1798           20 :     inhoids = find_all_inheritors(relid, NoLock, NULL);
    1799          148 :     foreach_oid(child_oid, inhoids)
    1800              :     {
    1801              :         Oid         table_oid,
    1802              :                     index_oid;
    1803              :         RelToCluster *rtc;
    1804              :         MemoryContext oldcxt;
    1805              : 
    1806          108 :         if (rel_is_index)
    1807              :         {
    1808              :             /* consider only leaf indexes */
    1809           80 :             if (get_rel_relkind(child_oid) != RELKIND_INDEX)
    1810           40 :                 continue;
    1811              : 
    1812           40 :             table_oid = IndexGetRelation(child_oid, false);
    1813           40 :             index_oid = child_oid;
    1814              :         }
    1815              :         else
    1816              :         {
    1817              :             /* consider only leaf relations */
    1818           28 :             if (get_rel_relkind(child_oid) != RELKIND_RELATION)
    1819           16 :                 continue;
    1820              : 
    1821           12 :             table_oid = child_oid;
    1822           12 :             index_oid = InvalidOid;
    1823              :         }
    1824              : 
    1825              :         /*
    1826              :          * It's possible that the user does not have privileges to CLUSTER the
    1827              :          * leaf partition despite having them on the partitioned table.  Skip
    1828              :          * if so.
    1829              :          */
    1830           52 :         if (!repack_is_permitted_for_relation(cmd, table_oid, GetUserId()))
    1831           12 :             continue;
    1832              : 
    1833              :         /* Use a permanent memory context for the result list */
    1834           40 :         oldcxt = MemoryContextSwitchTo(permcxt);
    1835           40 :         rtc = palloc_object(RelToCluster);
    1836           40 :         rtc->tableOid = table_oid;
    1837           40 :         rtc->indexOid = index_oid;
    1838           40 :         rtcs = lappend(rtcs, rtc);
    1839           40 :         MemoryContextSwitchTo(oldcxt);
    1840              :     }
    1841              : 
    1842           20 :     return rtcs;
    1843              : }
    1844              : 
    1845              : 
    1846              : /*
    1847              :  * Return whether userid has privileges to REPACK relid.  If not, this
    1848              :  * function emits a WARNING.
    1849              :  */
    1850              : static bool
    1851         2232 : repack_is_permitted_for_relation(RepackCommand cmd, Oid relid, Oid userid)
    1852              : {
    1853              :     Assert(cmd == REPACK_COMMAND_CLUSTER || cmd == REPACK_COMMAND_REPACK);
    1854              : 
    1855         2232 :     if (pg_class_aclcheck(relid, userid, ACL_MAINTAIN) == ACLCHECK_OK)
    1856          104 :         return true;
    1857              : 
    1858         2128 :     ereport(WARNING,
    1859              :             errmsg("permission denied to execute %s on \"%s\", skipping it",
    1860              :                    RepackCommandAsString(cmd),
    1861              :                    get_rel_name(relid)));
    1862              : 
    1863         2128 :     return false;
    1864              : }
    1865              : 
    1866              : 
    1867              : /*
    1868              :  * Given a RepackStmt with an indicated relation name, resolve the relation
    1869              :  * name, obtain lock on it, then determine what to do based on the relation
    1870              :  * type: if it's table and not partitioned, repack it as indicated (using an
    1871              :  * existing clustered index, or following the given one), and return NULL.
    1872              :  *
    1873              :  * On the other hand, if the table is partitioned, do nothing further and
    1874              :  * instead return the opened and locked relcache entry, so that caller can
    1875              :  * process the partitions using the multiple-table handling code.  In this
    1876              :  * case, if an index name is given, it's up to the caller to resolve it.
    1877              :  */
    1878              : static Relation
    1879          163 : process_single_relation(RepackStmt *stmt, ClusterParams *params)
    1880              : {
    1881              :     Relation    rel;
    1882              :     Oid         tableOid;
    1883              : 
    1884              :     Assert(stmt->relation != NULL);
    1885              :     Assert(stmt->command == REPACK_COMMAND_CLUSTER ||
    1886              :            stmt->command == REPACK_COMMAND_REPACK);
    1887              : 
    1888              :     /*
    1889              :      * Make sure ANALYZE is specified if a column list is present.
    1890              :      */
    1891          163 :     if ((params->options & CLUOPT_ANALYZE) == 0 && stmt->relation->va_cols != NIL)
    1892            4 :         ereport(ERROR,
    1893              :                 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1894              :                 errmsg("ANALYZE option must be specified when a column list is provided"));
    1895              : 
    1896              :     /*
    1897              :      * Find, lock, and check permissions on the table.  We obtain
    1898              :      * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
    1899              :      * single-transaction case.
    1900              :      */
    1901          159 :     tableOid = RangeVarGetRelidExtended(stmt->relation->relation,
    1902              :                                         AccessExclusiveLock,
    1903              :                                         0,
    1904              :                                         RangeVarCallbackMaintainsTable,
    1905              :                                         NULL);
    1906          151 :     rel = table_open(tableOid, NoLock);
    1907              : 
    1908              :     /*
    1909              :      * Reject clustering a remote temp table ... their local buffer manager is
    1910              :      * not going to cope.
    1911              :      */
    1912          151 :     if (RELATION_IS_OTHER_TEMP(rel))
    1913            0 :         ereport(ERROR,
    1914              :                 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1915              :         /*- translator: first %s is name of a SQL command, eg. REPACK */
    1916              :                 errmsg("cannot execute %s on temporary tables of other sessions",
    1917              :                        RepackCommandAsString(stmt->command)));
    1918              : 
    1919              :     /*
    1920              :      * For partitioned tables, let caller handle this.  Otherwise, process it
    1921              :      * here and we're done.
    1922              :      */
    1923          151 :     if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    1924           32 :         return rel;
    1925              :     else
    1926              :     {
    1927              :         Oid         indexOid;
    1928              : 
    1929          119 :         indexOid = determine_clustered_index(rel, stmt->usingindex,
    1930          119 :                                              stmt->indexname);
    1931          115 :         if (OidIsValid(indexOid))
    1932          107 :             check_index_is_clusterable(rel, indexOid, AccessExclusiveLock);
    1933          115 :         cluster_rel(stmt->command, rel, indexOid, params);
    1934              : 
    1935              :         /*
    1936              :          * Do an analyze, if requested.  We close the transaction and start a
    1937              :          * new one, so that we don't hold the stronger lock for longer than
    1938              :          * needed.
    1939              :          */
    1940          115 :         if (params->options & CLUOPT_ANALYZE)
    1941              :         {
    1942            8 :             VacuumParams vac_params = {0};
    1943              : 
    1944            8 :             PopActiveSnapshot();
    1945            8 :             CommitTransactionCommand();
    1946              : 
    1947            8 :             StartTransactionCommand();
    1948            8 :             PushActiveSnapshot(GetTransactionSnapshot());
    1949              : 
    1950            8 :             vac_params.options |= VACOPT_ANALYZE;
    1951            8 :             if (params->options & CLUOPT_VERBOSE)
    1952            0 :                 vac_params.options |= VACOPT_VERBOSE;
    1953            8 :             analyze_rel(tableOid, NULL, &vac_params,
    1954            8 :                         stmt->relation->va_cols, true, NULL);
    1955            8 :             PopActiveSnapshot();
    1956            8 :             CommandCounterIncrement();
    1957              :         }
    1958              : 
    1959          115 :         return NULL;
    1960              :     }
    1961              : }
    1962              : 
    1963              : /*
    1964              :  * Given a relation and the usingindex/indexname options in a
    1965              :  * REPACK USING INDEX or CLUSTER command, return the OID of the
    1966              :  * index to use for clustering the table.
    1967              :  *
    1968              :  * Caller must hold lock on the relation so that the set of indexes
    1969              :  * doesn't change, and must call check_index_is_clusterable.
    1970              :  */
    1971              : static Oid
    1972          139 : determine_clustered_index(Relation rel, bool usingindex, const char *indexname)
    1973              : {
    1974              :     Oid         indexOid;
    1975              : 
    1976          139 :     if (indexname == NULL && usingindex)
    1977              :     {
    1978              :         /*
    1979              :          * If USING INDEX with no name is given, find a clustered index, or
    1980              :          * error out if none.
    1981              :          */
    1982           19 :         indexOid = InvalidOid;
    1983           42 :         foreach_oid(idxoid, RelationGetIndexList(rel))
    1984              :         {
    1985           19 :             if (get_index_isclustered(idxoid))
    1986              :             {
    1987           15 :                 indexOid = idxoid;
    1988           15 :                 break;
    1989              :             }
    1990              :         }
    1991              : 
    1992           19 :         if (!OidIsValid(indexOid))
    1993            4 :             ereport(ERROR,
    1994              :                     errcode(ERRCODE_UNDEFINED_OBJECT),
    1995              :                     errmsg("there is no previously clustered index for table \"%s\"",
    1996              :                            RelationGetRelationName(rel)));
    1997              :     }
    1998          120 :     else if (indexname != NULL)
    1999              :     {
    2000              :         /* An index was specified; obtain its OID. */
    2001          112 :         indexOid = get_relname_relid(indexname, rel->rd_rel->relnamespace);
    2002          112 :         if (!OidIsValid(indexOid))
    2003            0 :             ereport(ERROR,
    2004              :                     errcode(ERRCODE_UNDEFINED_OBJECT),
    2005              :                     errmsg("index \"%s\" for table \"%s\" does not exist",
    2006              :                            indexname, RelationGetRelationName(rel)));
    2007              :     }
    2008              :     else
    2009            8 :         indexOid = InvalidOid;
    2010              : 
    2011          135 :     return indexOid;
    2012              : }
    2013              : 
    2014              : static const char *
    2015         2570 : RepackCommandAsString(RepackCommand cmd)
    2016              : {
    2017         2570 :     switch (cmd)
    2018              :     {
    2019         2172 :         case REPACK_COMMAND_REPACK:
    2020         2172 :             return "REPACK";
    2021          222 :         case REPACK_COMMAND_VACUUMFULL:
    2022          222 :             return "VACUUM";
    2023          176 :         case REPACK_COMMAND_CLUSTER:
    2024          176 :             return "CLUSTER";
    2025              :     }
    2026            0 :     return "???";             /* keep compiler quiet */
    2027              : }
        

Generated by: LCOV version 2.0-1