LCOV - code coverage report
Current view: top level - contrib/pg_surgery - heap_surgery.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 92.6 % 121 112
Test Date: 2026-03-07 13:15:07 Functions: 100.0 % 9 9
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * heap_surgery.c
       4              :  *    Functions to perform surgery on the damaged heap table.
       5              :  *
       6              :  * Copyright (c) 2020-2026, PostgreSQL Global Development Group
       7              :  *
       8              :  * IDENTIFICATION
       9              :  *    contrib/pg_surgery/heap_surgery.c
      10              :  *
      11              :  *-------------------------------------------------------------------------
      12              :  */
      13              : #include "postgres.h"
      14              : 
      15              : #include "access/htup_details.h"
      16              : #include "access/relation.h"
      17              : #include "access/visibilitymap.h"
      18              : #include "access/xloginsert.h"
      19              : #include "catalog/pg_am_d.h"
      20              : #include "miscadmin.h"
      21              : #include "storage/bufmgr.h"
      22              : #include "utils/acl.h"
      23              : #include "utils/array.h"
      24              : #include "utils/rel.h"
      25              : 
      26            1 : PG_MODULE_MAGIC_EXT(
      27              :                     .name = "pg_surgery",
      28              :                     .version = PG_VERSION
      29              : );
      30              : 
      31              : /* Options to forcefully change the state of a heap tuple. */
      32              : typedef enum HeapTupleForceOption
      33              : {
      34              :     HEAP_FORCE_KILL,
      35              :     HEAP_FORCE_FREEZE,
      36              : } HeapTupleForceOption;
      37              : 
      38            2 : PG_FUNCTION_INFO_V1(heap_force_kill);
      39            2 : PG_FUNCTION_INFO_V1(heap_force_freeze);
      40              : 
      41              : static int32 tidcmp(const void *a, const void *b);
      42              : static Datum heap_force_common(FunctionCallInfo fcinfo,
      43              :                                HeapTupleForceOption heap_force_opt);
      44              : static void sanity_check_tid_array(ArrayType *ta, int *ntids);
      45              : static BlockNumber find_tids_one_page(ItemPointer tids, int ntids,
      46              :                                       OffsetNumber *next_start_ptr);
      47              : 
      48              : /*-------------------------------------------------------------------------
      49              :  * heap_force_kill()
      50              :  *
      51              :  * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
      52              :  * given TID array.
      53              :  *
      54              :  * Usage: SELECT heap_force_kill(regclass, tid[]);
      55              :  *-------------------------------------------------------------------------
      56              :  */
      57              : Datum
      58            9 : heap_force_kill(PG_FUNCTION_ARGS)
      59              : {
      60            9 :     PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_KILL));
      61              : }
      62              : 
      63              : /*-------------------------------------------------------------------------
      64              :  * heap_force_freeze()
      65              :  *
      66              :  * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
      67              :  * given TID array.
      68              :  *
      69              :  * Usage: SELECT heap_force_freeze(regclass, tid[]);
      70              :  *-------------------------------------------------------------------------
      71              :  */
      72              : Datum
      73            7 : heap_force_freeze(PG_FUNCTION_ARGS)
      74              : {
      75            7 :     PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_FREEZE));
      76              : }
      77              : 
      78              : /*-------------------------------------------------------------------------
      79              :  * heap_force_common()
      80              :  *
      81              :  * Common code for heap_force_kill and heap_force_freeze
      82              :  *-------------------------------------------------------------------------
      83              :  */
      84              : static Datum
      85           16 : heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
      86              : {
      87           16 :     Oid         relid = PG_GETARG_OID(0);
      88           16 :     ArrayType  *ta = PG_GETARG_ARRAYTYPE_P_COPY(1);
      89              :     ItemPointer tids;
      90              :     int         ntids,
      91              :                 nblocks;
      92              :     Relation    rel;
      93              :     OffsetNumber curr_start_ptr,
      94              :                 next_start_ptr;
      95              :     bool        include_this_tid[MaxHeapTuplesPerPage];
      96              : 
      97           16 :     if (RecoveryInProgress())
      98            0 :         ereport(ERROR,
      99              :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     100              :                  errmsg("recovery is in progress"),
     101              :                  errhint("Heap surgery functions cannot be executed during recovery.")));
     102              : 
     103              :     /* Check inputs. */
     104           16 :     sanity_check_tid_array(ta, &ntids);
     105              : 
     106           14 :     rel = relation_open(relid, RowExclusiveLock);
     107              : 
     108              :     /*
     109              :      * Check target relation.
     110              :      */
     111           14 :     if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
     112            2 :         ereport(ERROR,
     113              :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     114              :                  errmsg("cannot operate on relation \"%s\"",
     115              :                         RelationGetRelationName(rel)),
     116              :                  errdetail_relkind_not_supported(rel->rd_rel->relkind)));
     117              : 
     118           12 :     if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
     119            0 :         ereport(ERROR,
     120              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     121              :                  errmsg("only heap AM is supported")));
     122              : 
     123              :     /* Must be owner of the table or superuser. */
     124           12 :     if (!object_ownercheck(RelationRelationId, RelationGetRelid(rel), GetUserId()))
     125            0 :         aclcheck_error(ACLCHECK_NOT_OWNER,
     126            0 :                        get_relkind_objtype(rel->rd_rel->relkind),
     127            0 :                        RelationGetRelationName(rel));
     128              : 
     129           12 :     tids = ((ItemPointer) ARR_DATA_PTR(ta));
     130              : 
     131              :     /*
     132              :      * If there is more than one TID in the array, sort them so that we can
     133              :      * easily fetch all the TIDs belonging to one particular page from the
     134              :      * array.
     135              :      */
     136           12 :     if (ntids > 1)
     137            2 :         qsort(tids, ntids, sizeof(ItemPointerData), tidcmp);
     138              : 
     139           12 :     curr_start_ptr = next_start_ptr = 0;
     140           12 :     nblocks = RelationGetNumberOfBlocks(rel);
     141              : 
     142              :     /*
     143              :      * Loop, performing the necessary actions for each block.
     144              :      */
     145           24 :     while (next_start_ptr != ntids)
     146              :     {
     147              :         Buffer      buf;
     148           12 :         Buffer      vmbuf = InvalidBuffer;
     149              :         Page        page;
     150              :         BlockNumber blkno;
     151              :         OffsetNumber curoff;
     152              :         OffsetNumber maxoffset;
     153              :         int         i;
     154           12 :         bool        did_modify_page = false;
     155           12 :         bool        did_modify_vm = false;
     156              : 
     157           12 :         CHECK_FOR_INTERRUPTS();
     158              : 
     159              :         /*
     160              :          * Find all the TIDs belonging to one particular page starting from
     161              :          * next_start_ptr and process them one by one.
     162              :          */
     163           12 :         blkno = find_tids_one_page(tids, ntids, &next_start_ptr);
     164              : 
     165              :         /* Check whether the block number is valid. */
     166           12 :         if (blkno >= nblocks)
     167              :         {
     168              :             /* Update the current_start_ptr before moving to the next page. */
     169            1 :             curr_start_ptr = next_start_ptr;
     170              : 
     171            1 :             ereport(NOTICE,
     172              :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     173              :                      errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
     174              :                             blkno, RelationGetRelationName(rel))));
     175            1 :             continue;
     176              :         }
     177              : 
     178           11 :         buf = ReadBuffer(rel, blkno);
     179           11 :         LockBufferForCleanup(buf);
     180              : 
     181           11 :         page = BufferGetPage(buf);
     182              : 
     183           11 :         maxoffset = PageGetMaxOffsetNumber(page);
     184              : 
     185              :         /*
     186              :          * Figure out which TIDs we are going to process and which ones we are
     187              :          * going to skip.
     188              :          */
     189           11 :         memset(include_this_tid, 0, sizeof(include_this_tid));
     190           24 :         for (i = curr_start_ptr; i < next_start_ptr; i++)
     191              :         {
     192           13 :             OffsetNumber offno = ItemPointerGetOffsetNumberNoCheck(&tids[i]);
     193              :             ItemId      itemid;
     194              : 
     195              :             /* Check whether the offset number is valid. */
     196           13 :             if (offno == InvalidOffsetNumber || offno > maxoffset)
     197              :             {
     198            2 :                 ereport(NOTICE,
     199              :                         errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
     200              :                                blkno, offno, RelationGetRelationName(rel)));
     201            2 :                 continue;
     202              :             }
     203              : 
     204           11 :             itemid = PageGetItemId(page, offno);
     205              : 
     206              :             /* Only accept an item ID that is used. */
     207           11 :             if (ItemIdIsRedirected(itemid))
     208              :             {
     209            1 :                 ereport(NOTICE,
     210              :                         errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
     211              :                                blkno, offno, RelationGetRelationName(rel),
     212              :                                ItemIdGetRedirect(itemid)));
     213            1 :                 continue;
     214              :             }
     215           10 :             else if (ItemIdIsDead(itemid))
     216              :             {
     217            2 :                 ereport(NOTICE,
     218              :                         (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
     219              :                                 blkno, offno, RelationGetRelationName(rel))));
     220            2 :                 continue;
     221              :             }
     222            8 :             else if (!ItemIdIsUsed(itemid))
     223              :             {
     224            1 :                 ereport(NOTICE,
     225              :                         (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
     226              :                                 blkno, offno, RelationGetRelationName(rel))));
     227            1 :                 continue;
     228              :             }
     229              : 
     230              :             /* Mark it for processing. */
     231              :             Assert(offno < MaxHeapTuplesPerPage);
     232            7 :             include_this_tid[offno] = true;
     233              :         }
     234              : 
     235              :         /*
     236              :          * Before entering the critical section, pin the visibility map page
     237              :          * if it appears to be necessary.
     238              :          */
     239           11 :         if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
     240            3 :             visibilitymap_pin(rel, blkno, &vmbuf);
     241              : 
     242              :         /* No ereport(ERROR) from here until all the changes are logged. */
     243           11 :         START_CRIT_SECTION();
     244              : 
     245           55 :         for (curoff = FirstOffsetNumber; curoff <= maxoffset;
     246           44 :              curoff = OffsetNumberNext(curoff))
     247              :         {
     248              :             ItemId      itemid;
     249              : 
     250           44 :             if (!include_this_tid[curoff])
     251           37 :                 continue;
     252              : 
     253            7 :             itemid = PageGetItemId(page, curoff);
     254              :             Assert(ItemIdIsNormal(itemid));
     255              : 
     256            7 :             did_modify_page = true;
     257              : 
     258            7 :             if (heap_force_opt == HEAP_FORCE_KILL)
     259              :             {
     260            3 :                 ItemIdSetDead(itemid);
     261              : 
     262              :                 /*
     263              :                  * If the page is marked all-visible, we must clear
     264              :                  * PD_ALL_VISIBLE flag on the page header and an all-visible
     265              :                  * bit on the visibility map corresponding to the page.
     266              :                  */
     267            3 :                 if (PageIsAllVisible(page))
     268              :                 {
     269            1 :                     PageClearAllVisible(page);
     270            1 :                     visibilitymap_clear(rel, blkno, vmbuf,
     271              :                                         VISIBILITYMAP_VALID_BITS);
     272            1 :                     did_modify_vm = true;
     273              :                 }
     274              :             }
     275              :             else
     276              :             {
     277              :                 HeapTupleHeader htup;
     278              : 
     279              :                 Assert(heap_force_opt == HEAP_FORCE_FREEZE);
     280              : 
     281            4 :                 htup = (HeapTupleHeader) PageGetItem(page, itemid);
     282              : 
     283              :                 /*
     284              :                  * Reset all visibility-related fields of the tuple. This
     285              :                  * logic should mimic heap_execute_freeze_tuple(), but we
     286              :                  * choose to reset xmin and ctid just to be sure that no
     287              :                  * potentially-garbled data is left behind.
     288              :                  */
     289            4 :                 ItemPointerSet(&htup->t_ctid, blkno, curoff);
     290            4 :                 HeapTupleHeaderSetXmin(htup, FrozenTransactionId);
     291            4 :                 HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
     292            4 :                 if (htup->t_infomask & HEAP_MOVED)
     293              :                 {
     294            0 :                     if (htup->t_infomask & HEAP_MOVED_OFF)
     295            0 :                         HeapTupleHeaderSetXvac(htup, InvalidTransactionId);
     296              :                     else
     297            0 :                         HeapTupleHeaderSetXvac(htup, FrozenTransactionId);
     298              :                 }
     299              : 
     300              :                 /*
     301              :                  * Clear all the visibility-related bits of this tuple and
     302              :                  * mark it as frozen. Also, get rid of HOT_UPDATED and
     303              :                  * KEYS_UPDATES bits.
     304              :                  */
     305            4 :                 htup->t_infomask &= ~HEAP_XACT_MASK;
     306            4 :                 htup->t_infomask |= (HEAP_XMIN_FROZEN | HEAP_XMAX_INVALID);
     307            4 :                 htup->t_infomask2 &= ~HEAP_HOT_UPDATED;
     308            4 :                 htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     309              :             }
     310              :         }
     311              : 
     312              :         /*
     313              :          * If the page was modified, only then, we mark the buffer dirty or do
     314              :          * the WAL logging.
     315              :          */
     316           11 :         if (did_modify_page)
     317              :         {
     318              :             /* Mark buffer dirty before we write WAL. */
     319            6 :             MarkBufferDirty(buf);
     320              : 
     321              :             /* XLOG stuff */
     322            6 :             if (RelationNeedsWAL(rel))
     323            2 :                 log_newpage_buffer(buf, true);
     324              :         }
     325              : 
     326              :         /* WAL log the VM page if it was modified. */
     327           11 :         if (did_modify_vm && RelationNeedsWAL(rel))
     328            0 :             log_newpage_buffer(vmbuf, false);
     329              : 
     330           11 :         END_CRIT_SECTION();
     331              : 
     332           11 :         UnlockReleaseBuffer(buf);
     333              : 
     334           11 :         if (vmbuf != InvalidBuffer)
     335            3 :             ReleaseBuffer(vmbuf);
     336              : 
     337              :         /* Update the current_start_ptr before moving to the next page. */
     338           11 :         curr_start_ptr = next_start_ptr;
     339              :     }
     340              : 
     341           12 :     relation_close(rel, RowExclusiveLock);
     342              : 
     343           12 :     pfree(ta);
     344              : 
     345           12 :     PG_RETURN_VOID();
     346              : }
     347              : 
     348              : /*-------------------------------------------------------------------------
     349              :  * tidcmp()
     350              :  *
     351              :  * Compare two item pointers, return -1, 0, or +1.
     352              :  *
     353              :  * See ItemPointerCompare for details.
     354              :  * ------------------------------------------------------------------------
     355              :  */
     356              : static int32
     357            3 : tidcmp(const void *a, const void *b)
     358              : {
     359            3 :     const ItemPointerData *iptr1 = a;
     360            3 :     const ItemPointerData *iptr2 = b;
     361              : 
     362            3 :     return ItemPointerCompare(iptr1, iptr2);
     363              : }
     364              : 
     365              : /*-------------------------------------------------------------------------
     366              :  * sanity_check_tid_array()
     367              :  *
     368              :  * Perform sanity checks on the given tid array, and set *ntids to the
     369              :  * number of items in the array.
     370              :  * ------------------------------------------------------------------------
     371              :  */
     372              : static void
     373           16 : sanity_check_tid_array(ArrayType *ta, int *ntids)
     374              : {
     375           16 :     if (ARR_HASNULL(ta) && array_contains_nulls(ta))
     376            1 :         ereport(ERROR,
     377              :                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     378              :                  errmsg("array must not contain nulls")));
     379              : 
     380           15 :     if (ARR_NDIM(ta) > 1)
     381            1 :         ereport(ERROR,
     382              :                 (errcode(ERRCODE_DATA_EXCEPTION),
     383              :                  errmsg("argument must be empty or one-dimensional array")));
     384              : 
     385           14 :     *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
     386           14 : }
     387              : 
     388              : /*-------------------------------------------------------------------------
     389              :  * find_tids_one_page()
     390              :  *
     391              :  * Find all the tids residing in the same page as tids[next_start_ptr], and
     392              :  * update next_start_ptr so that it points to the first tid in the next page.
     393              :  *
     394              :  * NOTE: The input tids[] array must be sorted.
     395              :  * ------------------------------------------------------------------------
     396              :  */
     397              : static BlockNumber
     398           12 : find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
     399              : {
     400              :     int         i;
     401              :     BlockNumber prev_blkno,
     402              :                 blkno;
     403              : 
     404           12 :     prev_blkno = blkno = InvalidBlockNumber;
     405              : 
     406           26 :     for (i = *next_start_ptr; i < ntids; i++)
     407              :     {
     408           15 :         ItemPointerData tid = tids[i];
     409              : 
     410           15 :         blkno = ItemPointerGetBlockNumberNoCheck(&tid);
     411              : 
     412           15 :         if (i == *next_start_ptr)
     413           12 :             prev_blkno = blkno;
     414              : 
     415           15 :         if (prev_blkno != blkno)
     416            1 :             break;
     417              :     }
     418              : 
     419           12 :     *next_start_ptr = i;
     420           12 :     return prev_blkno;
     421              : }
        

Generated by: LCOV version 2.0-1