LCOV - code coverage report
Current view: top level - contrib/pg_surgery - heap_surgery.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 112 121 92.6 %
Date: 2025-04-01 16:15:31 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * heap_surgery.c
       4             :  *    Functions to perform surgery on the damaged heap table.
       5             :  *
       6             :  * Copyright (c) 2020-2025, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    contrib/pg_surgery/heap_surgery.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : #include "postgres.h"
      14             : 
      15             : #include "access/htup_details.h"
      16             : #include "access/relation.h"
      17             : #include "access/visibilitymap.h"
      18             : #include "access/xloginsert.h"
      19             : #include "catalog/pg_am_d.h"
      20             : #include "miscadmin.h"
      21             : #include "storage/bufmgr.h"
      22             : #include "utils/acl.h"
      23             : #include "utils/array.h"
      24             : #include "utils/rel.h"
      25             : 
      26           2 : PG_MODULE_MAGIC_EXT(
      27             :                     .name = "pg_surgery",
      28             :                     .version = PG_VERSION
      29             : );
      30             : 
      31             : /* Options to forcefully change the state of a heap tuple. */
      32             : typedef enum HeapTupleForceOption
      33             : {
      34             :     HEAP_FORCE_KILL,
      35             :     HEAP_FORCE_FREEZE,
      36             : } HeapTupleForceOption;
      37             : 
      38           4 : PG_FUNCTION_INFO_V1(heap_force_kill);
      39           4 : PG_FUNCTION_INFO_V1(heap_force_freeze);
      40             : 
      41             : static int32 tidcmp(const void *a, const void *b);
      42             : static Datum heap_force_common(FunctionCallInfo fcinfo,
      43             :                                HeapTupleForceOption heap_force_opt);
      44             : static void sanity_check_tid_array(ArrayType *ta, int *ntids);
      45             : static BlockNumber find_tids_one_page(ItemPointer tids, int ntids,
      46             :                                       OffsetNumber *next_start_ptr);
      47             : 
      48             : /*-------------------------------------------------------------------------
      49             :  * heap_force_kill()
      50             :  *
      51             :  * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
      52             :  * given TID array.
      53             :  *
      54             :  * Usage: SELECT heap_force_kill(regclass, tid[]);
      55             :  *-------------------------------------------------------------------------
      56             :  */
      57             : Datum
      58          18 : heap_force_kill(PG_FUNCTION_ARGS)
      59             : {
      60          18 :     PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_KILL));
      61             : }
      62             : 
      63             : /*-------------------------------------------------------------------------
      64             :  * heap_force_freeze()
      65             :  *
      66             :  * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
      67             :  * given TID array.
      68             :  *
      69             :  * Usage: SELECT heap_force_freeze(regclass, tid[]);
      70             :  *-------------------------------------------------------------------------
      71             :  */
      72             : Datum
      73          14 : heap_force_freeze(PG_FUNCTION_ARGS)
      74             : {
      75          14 :     PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_FREEZE));
      76             : }
      77             : 
      78             : /*-------------------------------------------------------------------------
      79             :  * heap_force_common()
      80             :  *
      81             :  * Common code for heap_force_kill and heap_force_freeze
      82             :  *-------------------------------------------------------------------------
      83             :  */
      84             : static Datum
      85          32 : heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
      86             : {
      87          32 :     Oid         relid = PG_GETARG_OID(0);
      88          32 :     ArrayType  *ta = PG_GETARG_ARRAYTYPE_P_COPY(1);
      89             :     ItemPointer tids;
      90             :     int         ntids,
      91             :                 nblocks;
      92             :     Relation    rel;
      93             :     OffsetNumber curr_start_ptr,
      94             :                 next_start_ptr;
      95             :     bool        include_this_tid[MaxHeapTuplesPerPage];
      96             : 
      97          32 :     if (RecoveryInProgress())
      98           0 :         ereport(ERROR,
      99             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     100             :                  errmsg("recovery is in progress"),
     101             :                  errhint("Heap surgery functions cannot be executed during recovery.")));
     102             : 
     103             :     /* Check inputs. */
     104          32 :     sanity_check_tid_array(ta, &ntids);
     105             : 
     106          28 :     rel = relation_open(relid, RowExclusiveLock);
     107             : 
     108             :     /*
     109             :      * Check target relation.
     110             :      */
     111          28 :     if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
     112           4 :         ereport(ERROR,
     113             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     114             :                  errmsg("cannot operate on relation \"%s\"",
     115             :                         RelationGetRelationName(rel)),
     116             :                  errdetail_relkind_not_supported(rel->rd_rel->relkind)));
     117             : 
     118          24 :     if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
     119           0 :         ereport(ERROR,
     120             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     121             :                  errmsg("only heap AM is supported")));
     122             : 
     123             :     /* Must be owner of the table or superuser. */
     124          24 :     if (!object_ownercheck(RelationRelationId, RelationGetRelid(rel), GetUserId()))
     125           0 :         aclcheck_error(ACLCHECK_NOT_OWNER,
     126           0 :                        get_relkind_objtype(rel->rd_rel->relkind),
     127           0 :                        RelationGetRelationName(rel));
     128             : 
     129          24 :     tids = ((ItemPointer) ARR_DATA_PTR(ta));
     130             : 
     131             :     /*
     132             :      * If there is more than one TID in the array, sort them so that we can
     133             :      * easily fetch all the TIDs belonging to one particular page from the
     134             :      * array.
     135             :      */
     136          24 :     if (ntids > 1)
     137           4 :         qsort(tids, ntids, sizeof(ItemPointerData), tidcmp);
     138             : 
     139          24 :     curr_start_ptr = next_start_ptr = 0;
     140          24 :     nblocks = RelationGetNumberOfBlocks(rel);
     141             : 
     142             :     /*
     143             :      * Loop, performing the necessary actions for each block.
     144             :      */
     145          48 :     while (next_start_ptr != ntids)
     146             :     {
     147             :         Buffer      buf;
     148          24 :         Buffer      vmbuf = InvalidBuffer;
     149             :         Page        page;
     150             :         BlockNumber blkno;
     151             :         OffsetNumber curoff;
     152             :         OffsetNumber maxoffset;
     153             :         int         i;
     154          24 :         bool        did_modify_page = false;
     155          24 :         bool        did_modify_vm = false;
     156             : 
     157          24 :         CHECK_FOR_INTERRUPTS();
     158             : 
     159             :         /*
     160             :          * Find all the TIDs belonging to one particular page starting from
     161             :          * next_start_ptr and process them one by one.
     162             :          */
     163          24 :         blkno = find_tids_one_page(tids, ntids, &next_start_ptr);
     164             : 
     165             :         /* Check whether the block number is valid. */
     166          24 :         if (blkno >= nblocks)
     167             :         {
     168             :             /* Update the current_start_ptr before moving to the next page. */
     169           2 :             curr_start_ptr = next_start_ptr;
     170             : 
     171           2 :             ereport(NOTICE,
     172             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     173             :                      errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
     174             :                             blkno, RelationGetRelationName(rel))));
     175           2 :             continue;
     176             :         }
     177             : 
     178          22 :         buf = ReadBuffer(rel, blkno);
     179          22 :         LockBufferForCleanup(buf);
     180             : 
     181          22 :         page = BufferGetPage(buf);
     182             : 
     183          22 :         maxoffset = PageGetMaxOffsetNumber(page);
     184             : 
     185             :         /*
     186             :          * Figure out which TIDs we are going to process and which ones we are
     187             :          * going to skip.
     188             :          */
     189          22 :         memset(include_this_tid, 0, sizeof(include_this_tid));
     190          48 :         for (i = curr_start_ptr; i < next_start_ptr; i++)
     191             :         {
     192          26 :             OffsetNumber offno = ItemPointerGetOffsetNumberNoCheck(&tids[i]);
     193             :             ItemId      itemid;
     194             : 
     195             :             /* Check whether the offset number is valid. */
     196          26 :             if (offno == InvalidOffsetNumber || offno > maxoffset)
     197             :             {
     198           4 :                 ereport(NOTICE,
     199             :                         errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
     200             :                                blkno, offno, RelationGetRelationName(rel)));
     201           4 :                 continue;
     202             :             }
     203             : 
     204          22 :             itemid = PageGetItemId(page, offno);
     205             : 
     206             :             /* Only accept an item ID that is used. */
     207          22 :             if (ItemIdIsRedirected(itemid))
     208             :             {
     209           2 :                 ereport(NOTICE,
     210             :                         errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
     211             :                                blkno, offno, RelationGetRelationName(rel),
     212             :                                ItemIdGetRedirect(itemid)));
     213           2 :                 continue;
     214             :             }
     215          20 :             else if (ItemIdIsDead(itemid))
     216             :             {
     217           4 :                 ereport(NOTICE,
     218             :                         (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
     219             :                                 blkno, offno, RelationGetRelationName(rel))));
     220           4 :                 continue;
     221             :             }
     222          16 :             else if (!ItemIdIsUsed(itemid))
     223             :             {
     224           2 :                 ereport(NOTICE,
     225             :                         (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
     226             :                                 blkno, offno, RelationGetRelationName(rel))));
     227           2 :                 continue;
     228             :             }
     229             : 
     230             :             /* Mark it for processing. */
     231             :             Assert(offno < MaxHeapTuplesPerPage);
     232          14 :             include_this_tid[offno] = true;
     233             :         }
     234             : 
     235             :         /*
     236             :          * Before entering the critical section, pin the visibility map page
     237             :          * if it appears to be necessary.
     238             :          */
     239          22 :         if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
     240           6 :             visibilitymap_pin(rel, blkno, &vmbuf);
     241             : 
     242             :         /* No ereport(ERROR) from here until all the changes are logged. */
     243          22 :         START_CRIT_SECTION();
     244             : 
     245         110 :         for (curoff = FirstOffsetNumber; curoff <= maxoffset;
     246          88 :              curoff = OffsetNumberNext(curoff))
     247             :         {
     248             :             ItemId      itemid;
     249             : 
     250          88 :             if (!include_this_tid[curoff])
     251          74 :                 continue;
     252             : 
     253          14 :             itemid = PageGetItemId(page, curoff);
     254             :             Assert(ItemIdIsNormal(itemid));
     255             : 
     256          14 :             did_modify_page = true;
     257             : 
     258          14 :             if (heap_force_opt == HEAP_FORCE_KILL)
     259             :             {
     260           6 :                 ItemIdSetDead(itemid);
     261             : 
     262             :                 /*
     263             :                  * If the page is marked all-visible, we must clear
     264             :                  * PD_ALL_VISIBLE flag on the page header and an all-visible
     265             :                  * bit on the visibility map corresponding to the page.
     266             :                  */
     267           6 :                 if (PageIsAllVisible(page))
     268             :                 {
     269           2 :                     PageClearAllVisible(page);
     270           2 :                     visibilitymap_clear(rel, blkno, vmbuf,
     271             :                                         VISIBILITYMAP_VALID_BITS);
     272           2 :                     did_modify_vm = true;
     273             :                 }
     274             :             }
     275             :             else
     276             :             {
     277             :                 HeapTupleHeader htup;
     278             : 
     279             :                 Assert(heap_force_opt == HEAP_FORCE_FREEZE);
     280             : 
     281           8 :                 htup = (HeapTupleHeader) PageGetItem(page, itemid);
     282             : 
     283             :                 /*
     284             :                  * Reset all visibility-related fields of the tuple. This
     285             :                  * logic should mimic heap_execute_freeze_tuple(), but we
     286             :                  * choose to reset xmin and ctid just to be sure that no
     287             :                  * potentially-garbled data is left behind.
     288             :                  */
     289           8 :                 ItemPointerSet(&htup->t_ctid, blkno, curoff);
     290           8 :                 HeapTupleHeaderSetXmin(htup, FrozenTransactionId);
     291           8 :                 HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
     292           8 :                 if (htup->t_infomask & HEAP_MOVED)
     293             :                 {
     294           0 :                     if (htup->t_infomask & HEAP_MOVED_OFF)
     295           0 :                         HeapTupleHeaderSetXvac(htup, InvalidTransactionId);
     296             :                     else
     297           0 :                         HeapTupleHeaderSetXvac(htup, FrozenTransactionId);
     298             :                 }
     299             : 
     300             :                 /*
     301             :                  * Clear all the visibility-related bits of this tuple and
     302             :                  * mark it as frozen. Also, get rid of HOT_UPDATED and
     303             :                  * KEYS_UPDATES bits.
     304             :                  */
     305           8 :                 htup->t_infomask &= ~HEAP_XACT_MASK;
     306           8 :                 htup->t_infomask |= (HEAP_XMIN_FROZEN | HEAP_XMAX_INVALID);
     307           8 :                 htup->t_infomask2 &= ~HEAP_HOT_UPDATED;
     308           8 :                 htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     309             :             }
     310             :         }
     311             : 
     312             :         /*
     313             :          * If the page was modified, only then, we mark the buffer dirty or do
     314             :          * the WAL logging.
     315             :          */
     316          22 :         if (did_modify_page)
     317             :         {
     318             :             /* Mark buffer dirty before we write WAL. */
     319          12 :             MarkBufferDirty(buf);
     320             : 
     321             :             /* XLOG stuff */
     322          12 :             if (RelationNeedsWAL(rel))
     323           4 :                 log_newpage_buffer(buf, true);
     324             :         }
     325             : 
     326             :         /* WAL log the VM page if it was modified. */
     327          22 :         if (did_modify_vm && RelationNeedsWAL(rel))
     328           0 :             log_newpage_buffer(vmbuf, false);
     329             : 
     330          22 :         END_CRIT_SECTION();
     331             : 
     332          22 :         UnlockReleaseBuffer(buf);
     333             : 
     334          22 :         if (vmbuf != InvalidBuffer)
     335           6 :             ReleaseBuffer(vmbuf);
     336             : 
     337             :         /* Update the current_start_ptr before moving to the next page. */
     338          22 :         curr_start_ptr = next_start_ptr;
     339             :     }
     340             : 
     341          24 :     relation_close(rel, RowExclusiveLock);
     342             : 
     343          24 :     pfree(ta);
     344             : 
     345          24 :     PG_RETURN_VOID();
     346             : }
     347             : 
     348             : /*-------------------------------------------------------------------------
     349             :  * tidcmp()
     350             :  *
     351             :  * Compare two item pointers, return -1, 0, or +1.
     352             :  *
     353             :  * See ItemPointerCompare for details.
     354             :  * ------------------------------------------------------------------------
     355             :  */
     356             : static int32
     357           6 : tidcmp(const void *a, const void *b)
     358             : {
     359           6 :     ItemPointer iptr1 = ((const ItemPointer) a);
     360           6 :     ItemPointer iptr2 = ((const ItemPointer) b);
     361             : 
     362           6 :     return ItemPointerCompare(iptr1, iptr2);
     363             : }
     364             : 
     365             : /*-------------------------------------------------------------------------
     366             :  * sanity_check_tid_array()
     367             :  *
     368             :  * Perform sanity checks on the given tid array, and set *ntids to the
     369             :  * number of items in the array.
     370             :  * ------------------------------------------------------------------------
     371             :  */
     372             : static void
     373          32 : sanity_check_tid_array(ArrayType *ta, int *ntids)
     374             : {
     375          32 :     if (ARR_HASNULL(ta) && array_contains_nulls(ta))
     376           2 :         ereport(ERROR,
     377             :                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     378             :                  errmsg("array must not contain nulls")));
     379             : 
     380          30 :     if (ARR_NDIM(ta) > 1)
     381           2 :         ereport(ERROR,
     382             :                 (errcode(ERRCODE_DATA_EXCEPTION),
     383             :                  errmsg("argument must be empty or one-dimensional array")));
     384             : 
     385          28 :     *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
     386          28 : }
     387             : 
     388             : /*-------------------------------------------------------------------------
     389             :  * find_tids_one_page()
     390             :  *
     391             :  * Find all the tids residing in the same page as tids[next_start_ptr], and
     392             :  * update next_start_ptr so that it points to the first tid in the next page.
     393             :  *
     394             :  * NOTE: The input tids[] array must be sorted.
     395             :  * ------------------------------------------------------------------------
     396             :  */
     397             : static BlockNumber
     398          24 : find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
     399             : {
     400             :     int         i;
     401             :     BlockNumber prev_blkno,
     402             :                 blkno;
     403             : 
     404          24 :     prev_blkno = blkno = InvalidBlockNumber;
     405             : 
     406          52 :     for (i = *next_start_ptr; i < ntids; i++)
     407             :     {
     408          30 :         ItemPointerData tid = tids[i];
     409             : 
     410          30 :         blkno = ItemPointerGetBlockNumberNoCheck(&tid);
     411             : 
     412          30 :         if (i == *next_start_ptr)
     413          24 :             prev_blkno = blkno;
     414             : 
     415          30 :         if (prev_blkno != blkno)
     416           2 :             break;
     417             :     }
     418             : 
     419          24 :     *next_start_ptr = i;
     420          24 :     return prev_blkno;
     421             : }

Generated by: LCOV version 1.14