LCOV - code coverage report
Current view: top level - contrib/pg_surgery - heap_surgery.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 112 121 92.6 %
Date: 2025-01-18 05:15:39 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * heap_surgery.c
       4             :  *    Functions to perform surgery on the damaged heap table.
       5             :  *
       6             :  * Copyright (c) 2020-2025, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    contrib/pg_surgery/heap_surgery.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : #include "postgres.h"
      14             : 
      15             : #include "access/htup_details.h"
      16             : #include "access/relation.h"
      17             : #include "access/visibilitymap.h"
      18             : #include "access/xloginsert.h"
      19             : #include "catalog/pg_am_d.h"
      20             : #include "miscadmin.h"
      21             : #include "storage/bufmgr.h"
      22             : #include "utils/acl.h"
      23             : #include "utils/array.h"
      24             : #include "utils/rel.h"
      25             : 
      26           2 : PG_MODULE_MAGIC;
      27             : 
      28             : /* Options to forcefully change the state of a heap tuple. */
      29             : typedef enum HeapTupleForceOption
      30             : {
      31             :     HEAP_FORCE_KILL,
      32             :     HEAP_FORCE_FREEZE,
      33             : } HeapTupleForceOption;
      34             : 
      35           4 : PG_FUNCTION_INFO_V1(heap_force_kill);
      36           4 : PG_FUNCTION_INFO_V1(heap_force_freeze);
      37             : 
      38             : static int32 tidcmp(const void *a, const void *b);
      39             : static Datum heap_force_common(FunctionCallInfo fcinfo,
      40             :                                HeapTupleForceOption heap_force_opt);
      41             : static void sanity_check_tid_array(ArrayType *ta, int *ntids);
      42             : static BlockNumber find_tids_one_page(ItemPointer tids, int ntids,
      43             :                                       OffsetNumber *next_start_ptr);
      44             : 
      45             : /*-------------------------------------------------------------------------
      46             :  * heap_force_kill()
      47             :  *
      48             :  * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
      49             :  * given TID array.
      50             :  *
      51             :  * Usage: SELECT heap_force_kill(regclass, tid[]);
      52             :  *-------------------------------------------------------------------------
      53             :  */
      54             : Datum
      55          18 : heap_force_kill(PG_FUNCTION_ARGS)
      56             : {
      57          18 :     PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_KILL));
      58             : }
      59             : 
      60             : /*-------------------------------------------------------------------------
      61             :  * heap_force_freeze()
      62             :  *
      63             :  * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
      64             :  * given TID array.
      65             :  *
      66             :  * Usage: SELECT heap_force_freeze(regclass, tid[]);
      67             :  *-------------------------------------------------------------------------
      68             :  */
      69             : Datum
      70          14 : heap_force_freeze(PG_FUNCTION_ARGS)
      71             : {
      72          14 :     PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_FREEZE));
      73             : }
      74             : 
      75             : /*-------------------------------------------------------------------------
      76             :  * heap_force_common()
      77             :  *
      78             :  * Common code for heap_force_kill and heap_force_freeze
      79             :  *-------------------------------------------------------------------------
      80             :  */
      81             : static Datum
      82          32 : heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
      83             : {
      84          32 :     Oid         relid = PG_GETARG_OID(0);
      85          32 :     ArrayType  *ta = PG_GETARG_ARRAYTYPE_P_COPY(1);
      86             :     ItemPointer tids;
      87             :     int         ntids,
      88             :                 nblocks;
      89             :     Relation    rel;
      90             :     OffsetNumber curr_start_ptr,
      91             :                 next_start_ptr;
      92             :     bool        include_this_tid[MaxHeapTuplesPerPage];
      93             : 
      94          32 :     if (RecoveryInProgress())
      95           0 :         ereport(ERROR,
      96             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
      97             :                  errmsg("recovery is in progress"),
      98             :                  errhint("Heap surgery functions cannot be executed during recovery.")));
      99             : 
     100             :     /* Check inputs. */
     101          32 :     sanity_check_tid_array(ta, &ntids);
     102             : 
     103          28 :     rel = relation_open(relid, RowExclusiveLock);
     104             : 
     105             :     /*
     106             :      * Check target relation.
     107             :      */
     108          28 :     if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
     109           4 :         ereport(ERROR,
     110             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     111             :                  errmsg("cannot operate on relation \"%s\"",
     112             :                         RelationGetRelationName(rel)),
     113             :                  errdetail_relkind_not_supported(rel->rd_rel->relkind)));
     114             : 
     115          24 :     if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
     116           0 :         ereport(ERROR,
     117             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     118             :                  errmsg("only heap AM is supported")));
     119             : 
     120             :     /* Must be owner of the table or superuser. */
     121          24 :     if (!object_ownercheck(RelationRelationId, RelationGetRelid(rel), GetUserId()))
     122           0 :         aclcheck_error(ACLCHECK_NOT_OWNER,
     123           0 :                        get_relkind_objtype(rel->rd_rel->relkind),
     124           0 :                        RelationGetRelationName(rel));
     125             : 
     126          24 :     tids = ((ItemPointer) ARR_DATA_PTR(ta));
     127             : 
     128             :     /*
     129             :      * If there is more than one TID in the array, sort them so that we can
     130             :      * easily fetch all the TIDs belonging to one particular page from the
     131             :      * array.
     132             :      */
     133          24 :     if (ntids > 1)
     134           4 :         qsort(tids, ntids, sizeof(ItemPointerData), tidcmp);
     135             : 
     136          24 :     curr_start_ptr = next_start_ptr = 0;
     137          24 :     nblocks = RelationGetNumberOfBlocks(rel);
     138             : 
     139             :     /*
     140             :      * Loop, performing the necessary actions for each block.
     141             :      */
     142          48 :     while (next_start_ptr != ntids)
     143             :     {
     144             :         Buffer      buf;
     145          24 :         Buffer      vmbuf = InvalidBuffer;
     146             :         Page        page;
     147             :         BlockNumber blkno;
     148             :         OffsetNumber curoff;
     149             :         OffsetNumber maxoffset;
     150             :         int         i;
     151          24 :         bool        did_modify_page = false;
     152          24 :         bool        did_modify_vm = false;
     153             : 
     154          24 :         CHECK_FOR_INTERRUPTS();
     155             : 
     156             :         /*
     157             :          * Find all the TIDs belonging to one particular page starting from
     158             :          * next_start_ptr and process them one by one.
     159             :          */
     160          24 :         blkno = find_tids_one_page(tids, ntids, &next_start_ptr);
     161             : 
     162             :         /* Check whether the block number is valid. */
     163          24 :         if (blkno >= nblocks)
     164             :         {
     165             :             /* Update the current_start_ptr before moving to the next page. */
     166           2 :             curr_start_ptr = next_start_ptr;
     167             : 
     168           2 :             ereport(NOTICE,
     169             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     170             :                      errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
     171             :                             blkno, RelationGetRelationName(rel))));
     172           2 :             continue;
     173             :         }
     174             : 
     175          22 :         buf = ReadBuffer(rel, blkno);
     176          22 :         LockBufferForCleanup(buf);
     177             : 
     178          22 :         page = BufferGetPage(buf);
     179             : 
     180          22 :         maxoffset = PageGetMaxOffsetNumber(page);
     181             : 
     182             :         /*
     183             :          * Figure out which TIDs we are going to process and which ones we are
     184             :          * going to skip.
     185             :          */
     186          22 :         memset(include_this_tid, 0, sizeof(include_this_tid));
     187          48 :         for (i = curr_start_ptr; i < next_start_ptr; i++)
     188             :         {
     189          26 :             OffsetNumber offno = ItemPointerGetOffsetNumberNoCheck(&tids[i]);
     190             :             ItemId      itemid;
     191             : 
     192             :             /* Check whether the offset number is valid. */
     193          26 :             if (offno == InvalidOffsetNumber || offno > maxoffset)
     194             :             {
     195           4 :                 ereport(NOTICE,
     196             :                         errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
     197             :                                blkno, offno, RelationGetRelationName(rel)));
     198           4 :                 continue;
     199             :             }
     200             : 
     201          22 :             itemid = PageGetItemId(page, offno);
     202             : 
     203             :             /* Only accept an item ID that is used. */
     204          22 :             if (ItemIdIsRedirected(itemid))
     205             :             {
     206           2 :                 ereport(NOTICE,
     207             :                         errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
     208             :                                blkno, offno, RelationGetRelationName(rel),
     209             :                                ItemIdGetRedirect(itemid)));
     210           2 :                 continue;
     211             :             }
     212          20 :             else if (ItemIdIsDead(itemid))
     213             :             {
     214           4 :                 ereport(NOTICE,
     215             :                         (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
     216             :                                 blkno, offno, RelationGetRelationName(rel))));
     217           4 :                 continue;
     218             :             }
     219          16 :             else if (!ItemIdIsUsed(itemid))
     220             :             {
     221           2 :                 ereport(NOTICE,
     222             :                         (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
     223             :                                 blkno, offno, RelationGetRelationName(rel))));
     224           2 :                 continue;
     225             :             }
     226             : 
     227             :             /* Mark it for processing. */
     228             :             Assert(offno < MaxHeapTuplesPerPage);
     229          14 :             include_this_tid[offno] = true;
     230             :         }
     231             : 
     232             :         /*
     233             :          * Before entering the critical section, pin the visibility map page
     234             :          * if it appears to be necessary.
     235             :          */
     236          22 :         if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
     237           6 :             visibilitymap_pin(rel, blkno, &vmbuf);
     238             : 
     239             :         /* No ereport(ERROR) from here until all the changes are logged. */
     240          22 :         START_CRIT_SECTION();
     241             : 
     242         110 :         for (curoff = FirstOffsetNumber; curoff <= maxoffset;
     243          88 :              curoff = OffsetNumberNext(curoff))
     244             :         {
     245             :             ItemId      itemid;
     246             : 
     247          88 :             if (!include_this_tid[curoff])
     248          74 :                 continue;
     249             : 
     250          14 :             itemid = PageGetItemId(page, curoff);
     251             :             Assert(ItemIdIsNormal(itemid));
     252             : 
     253          14 :             did_modify_page = true;
     254             : 
     255          14 :             if (heap_force_opt == HEAP_FORCE_KILL)
     256             :             {
     257           6 :                 ItemIdSetDead(itemid);
     258             : 
     259             :                 /*
     260             :                  * If the page is marked all-visible, we must clear
     261             :                  * PD_ALL_VISIBLE flag on the page header and an all-visible
     262             :                  * bit on the visibility map corresponding to the page.
     263             :                  */
     264           6 :                 if (PageIsAllVisible(page))
     265             :                 {
     266           2 :                     PageClearAllVisible(page);
     267           2 :                     visibilitymap_clear(rel, blkno, vmbuf,
     268             :                                         VISIBILITYMAP_VALID_BITS);
     269           2 :                     did_modify_vm = true;
     270             :                 }
     271             :             }
     272             :             else
     273             :             {
     274             :                 HeapTupleHeader htup;
     275             : 
     276             :                 Assert(heap_force_opt == HEAP_FORCE_FREEZE);
     277             : 
     278           8 :                 htup = (HeapTupleHeader) PageGetItem(page, itemid);
     279             : 
     280             :                 /*
     281             :                  * Reset all visibility-related fields of the tuple. This
     282             :                  * logic should mimic heap_execute_freeze_tuple(), but we
     283             :                  * choose to reset xmin and ctid just to be sure that no
     284             :                  * potentially-garbled data is left behind.
     285             :                  */
     286           8 :                 ItemPointerSet(&htup->t_ctid, blkno, curoff);
     287           8 :                 HeapTupleHeaderSetXmin(htup, FrozenTransactionId);
     288           8 :                 HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
     289           8 :                 if (htup->t_infomask & HEAP_MOVED)
     290             :                 {
     291           0 :                     if (htup->t_infomask & HEAP_MOVED_OFF)
     292           0 :                         HeapTupleHeaderSetXvac(htup, InvalidTransactionId);
     293             :                     else
     294           0 :                         HeapTupleHeaderSetXvac(htup, FrozenTransactionId);
     295             :                 }
     296             : 
     297             :                 /*
     298             :                  * Clear all the visibility-related bits of this tuple and
     299             :                  * mark it as frozen. Also, get rid of HOT_UPDATED and
     300             :                  * KEYS_UPDATES bits.
     301             :                  */
     302           8 :                 htup->t_infomask &= ~HEAP_XACT_MASK;
     303           8 :                 htup->t_infomask |= (HEAP_XMIN_FROZEN | HEAP_XMAX_INVALID);
     304           8 :                 htup->t_infomask2 &= ~HEAP_HOT_UPDATED;
     305           8 :                 htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     306             :             }
     307             :         }
     308             : 
     309             :         /*
     310             :          * If the page was modified, only then, we mark the buffer dirty or do
     311             :          * the WAL logging.
     312             :          */
     313          22 :         if (did_modify_page)
     314             :         {
     315             :             /* Mark buffer dirty before we write WAL. */
     316          12 :             MarkBufferDirty(buf);
     317             : 
     318             :             /* XLOG stuff */
     319          12 :             if (RelationNeedsWAL(rel))
     320           4 :                 log_newpage_buffer(buf, true);
     321             :         }
     322             : 
     323             :         /* WAL log the VM page if it was modified. */
     324          22 :         if (did_modify_vm && RelationNeedsWAL(rel))
     325           0 :             log_newpage_buffer(vmbuf, false);
     326             : 
     327          22 :         END_CRIT_SECTION();
     328             : 
     329          22 :         UnlockReleaseBuffer(buf);
     330             : 
     331          22 :         if (vmbuf != InvalidBuffer)
     332           6 :             ReleaseBuffer(vmbuf);
     333             : 
     334             :         /* Update the current_start_ptr before moving to the next page. */
     335          22 :         curr_start_ptr = next_start_ptr;
     336             :     }
     337             : 
     338          24 :     relation_close(rel, RowExclusiveLock);
     339             : 
     340          24 :     pfree(ta);
     341             : 
     342          24 :     PG_RETURN_VOID();
     343             : }
     344             : 
     345             : /*-------------------------------------------------------------------------
     346             :  * tidcmp()
     347             :  *
     348             :  * Compare two item pointers, return -1, 0, or +1.
     349             :  *
     350             :  * See ItemPointerCompare for details.
     351             :  * ------------------------------------------------------------------------
     352             :  */
     353             : static int32
     354           6 : tidcmp(const void *a, const void *b)
     355             : {
     356           6 :     ItemPointer iptr1 = ((const ItemPointer) a);
     357           6 :     ItemPointer iptr2 = ((const ItemPointer) b);
     358             : 
     359           6 :     return ItemPointerCompare(iptr1, iptr2);
     360             : }
     361             : 
     362             : /*-------------------------------------------------------------------------
     363             :  * sanity_check_tid_array()
     364             :  *
     365             :  * Perform sanity checks on the given tid array, and set *ntids to the
     366             :  * number of items in the array.
     367             :  * ------------------------------------------------------------------------
     368             :  */
     369             : static void
     370          32 : sanity_check_tid_array(ArrayType *ta, int *ntids)
     371             : {
     372          32 :     if (ARR_HASNULL(ta) && array_contains_nulls(ta))
     373           2 :         ereport(ERROR,
     374             :                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     375             :                  errmsg("array must not contain nulls")));
     376             : 
     377          30 :     if (ARR_NDIM(ta) > 1)
     378           2 :         ereport(ERROR,
     379             :                 (errcode(ERRCODE_DATA_EXCEPTION),
     380             :                  errmsg("argument must be empty or one-dimensional array")));
     381             : 
     382          28 :     *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
     383          28 : }
     384             : 
     385             : /*-------------------------------------------------------------------------
     386             :  * find_tids_one_page()
     387             :  *
     388             :  * Find all the tids residing in the same page as tids[next_start_ptr], and
     389             :  * update next_start_ptr so that it points to the first tid in the next page.
     390             :  *
     391             :  * NOTE: The input tids[] array must be sorted.
     392             :  * ------------------------------------------------------------------------
     393             :  */
     394             : static BlockNumber
     395          24 : find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
     396             : {
     397             :     int         i;
     398             :     BlockNumber prev_blkno,
     399             :                 blkno;
     400             : 
     401          24 :     prev_blkno = blkno = InvalidBlockNumber;
     402             : 
     403          52 :     for (i = *next_start_ptr; i < ntids; i++)
     404             :     {
     405          30 :         ItemPointerData tid = tids[i];
     406             : 
     407          30 :         blkno = ItemPointerGetBlockNumberNoCheck(&tid);
     408             : 
     409          30 :         if (i == *next_start_ptr)
     410          24 :             prev_blkno = blkno;
     411             : 
     412          30 :         if (prev_blkno != blkno)
     413           2 :             break;
     414             :     }
     415             : 
     416          24 :     *next_start_ptr = i;
     417          24 :     return prev_blkno;
     418             : }

Generated by: LCOV version 1.14