LCOV - code coverage report
Current view: top level - contrib/pg_visibility - pg_visibility.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 241 293 82.3 %
Date: 2024-05-09 12:10:51 Functions: 22 24 91.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pg_visibility.c
       4             :  *    display visibility map information and page-level visibility bits
       5             :  *
       6             :  * Copyright (c) 2016-2024, PostgreSQL Global Development Group
       7             :  *
       8             :  *    contrib/pg_visibility/pg_visibility.c
       9             :  *-------------------------------------------------------------------------
      10             :  */
      11             : #include "postgres.h"
      12             : 
      13             : #include "access/heapam.h"
      14             : #include "access/htup_details.h"
      15             : #include "access/visibilitymap.h"
      16             : #include "access/xloginsert.h"
      17             : #include "catalog/pg_type.h"
      18             : #include "catalog/storage_xlog.h"
      19             : #include "funcapi.h"
      20             : #include "miscadmin.h"
      21             : #include "storage/bufmgr.h"
      22             : #include "storage/proc.h"
      23             : #include "storage/procarray.h"
      24             : #include "storage/smgr.h"
      25             : #include "utils/rel.h"
      26             : #include "utils/snapmgr.h"
      27             : 
      28           6 : PG_MODULE_MAGIC;
      29             : 
      30             : typedef struct vbits
      31             : {
      32             :     BlockNumber next;
      33             :     BlockNumber count;
      34             :     uint8       bits[FLEXIBLE_ARRAY_MEMBER];
      35             : } vbits;
      36             : 
      37             : typedef struct corrupt_items
      38             : {
      39             :     BlockNumber next;
      40             :     BlockNumber count;
      41             :     ItemPointer tids;
      42             : } corrupt_items;
      43             : 
      44           4 : PG_FUNCTION_INFO_V1(pg_visibility_map);
      45           6 : PG_FUNCTION_INFO_V1(pg_visibility_map_rel);
      46           6 : PG_FUNCTION_INFO_V1(pg_visibility);
      47           6 : PG_FUNCTION_INFO_V1(pg_visibility_rel);
      48           6 : PG_FUNCTION_INFO_V1(pg_visibility_map_summary);
      49           6 : PG_FUNCTION_INFO_V1(pg_check_frozen);
      50           6 : PG_FUNCTION_INFO_V1(pg_check_visible);
      51           6 : PG_FUNCTION_INFO_V1(pg_truncate_visibility_map);
      52             : 
      53             : static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
      54             : static vbits *collect_visibility_data(Oid relid, bool include_pd);
      55             : static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
      56             :                                             bool all_frozen);
      57             : static void record_corrupt_item(corrupt_items *items, ItemPointer tid);
      58             : static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
      59             :                               Buffer buffer);
      60             : static void check_relation_relkind(Relation rel);
      61             : 
      62             : /*
      63             :  * Visibility map information for a single block of a relation.
      64             :  *
      65             :  * Note: the VM code will silently return zeroes for pages past the end
      66             :  * of the map, so we allow probes up to MaxBlockNumber regardless of the
      67             :  * actual relation size.
      68             :  */
      69             : Datum
      70           0 : pg_visibility_map(PG_FUNCTION_ARGS)
      71             : {
      72           0 :     Oid         relid = PG_GETARG_OID(0);
      73           0 :     int64       blkno = PG_GETARG_INT64(1);
      74             :     int32       mapbits;
      75             :     Relation    rel;
      76           0 :     Buffer      vmbuffer = InvalidBuffer;
      77             :     TupleDesc   tupdesc;
      78             :     Datum       values[2];
      79           0 :     bool        nulls[2] = {0};
      80             : 
      81           0 :     rel = relation_open(relid, AccessShareLock);
      82             : 
      83             :     /* Only some relkinds have a visibility map */
      84           0 :     check_relation_relkind(rel);
      85             : 
      86           0 :     if (blkno < 0 || blkno > MaxBlockNumber)
      87           0 :         ereport(ERROR,
      88             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
      89             :                  errmsg("invalid block number")));
      90             : 
      91           0 :     tupdesc = pg_visibility_tupdesc(false, false);
      92             : 
      93           0 :     mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
      94           0 :     if (vmbuffer != InvalidBuffer)
      95           0 :         ReleaseBuffer(vmbuffer);
      96           0 :     values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
      97           0 :     values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
      98             : 
      99           0 :     relation_close(rel, AccessShareLock);
     100             : 
     101           0 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
     102             : }
     103             : 
     104             : /*
     105             :  * Visibility map information for a single block of a relation, plus the
     106             :  * page-level information for the same block.
     107             :  */
     108             : Datum
     109          12 : pg_visibility(PG_FUNCTION_ARGS)
     110             : {
     111          12 :     Oid         relid = PG_GETARG_OID(0);
     112          12 :     int64       blkno = PG_GETARG_INT64(1);
     113             :     int32       mapbits;
     114             :     Relation    rel;
     115          12 :     Buffer      vmbuffer = InvalidBuffer;
     116             :     Buffer      buffer;
     117             :     Page        page;
     118             :     TupleDesc   tupdesc;
     119             :     Datum       values[3];
     120          12 :     bool        nulls[3] = {0};
     121             : 
     122          12 :     rel = relation_open(relid, AccessShareLock);
     123             : 
     124             :     /* Only some relkinds have a visibility map */
     125          12 :     check_relation_relkind(rel);
     126             : 
     127           2 :     if (blkno < 0 || blkno > MaxBlockNumber)
     128           0 :         ereport(ERROR,
     129             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     130             :                  errmsg("invalid block number")));
     131             : 
     132           2 :     tupdesc = pg_visibility_tupdesc(false, true);
     133             : 
     134           2 :     mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
     135           2 :     if (vmbuffer != InvalidBuffer)
     136           2 :         ReleaseBuffer(vmbuffer);
     137           2 :     values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
     138           2 :     values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
     139             : 
     140             :     /* Here we have to explicitly check rel size ... */
     141           2 :     if (blkno < RelationGetNumberOfBlocks(rel))
     142             :     {
     143           2 :         buffer = ReadBuffer(rel, blkno);
     144           2 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
     145             : 
     146           2 :         page = BufferGetPage(buffer);
     147           2 :         values[2] = BoolGetDatum(PageIsAllVisible(page));
     148             : 
     149           2 :         UnlockReleaseBuffer(buffer);
     150             :     }
     151             :     else
     152             :     {
     153             :         /* As with the vismap, silently return 0 for pages past EOF */
     154           0 :         values[2] = BoolGetDatum(false);
     155             :     }
     156             : 
     157           2 :     relation_close(rel, AccessShareLock);
     158             : 
     159           2 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
     160             : }
     161             : 
     162             : /*
     163             :  * Visibility map information for every block in a relation.
     164             :  */
     165             : Datum
     166          40 : pg_visibility_map_rel(PG_FUNCTION_ARGS)
     167             : {
     168             :     FuncCallContext *funcctx;
     169             :     vbits      *info;
     170             : 
     171          40 :     if (SRF_IS_FIRSTCALL())
     172             :     {
     173          22 :         Oid         relid = PG_GETARG_OID(0);
     174             :         MemoryContext oldcontext;
     175             : 
     176          22 :         funcctx = SRF_FIRSTCALL_INIT();
     177          22 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     178          22 :         funcctx->tuple_desc = pg_visibility_tupdesc(true, false);
     179             :         /* collect_visibility_data will verify the relkind */
     180          22 :         funcctx->user_fctx = collect_visibility_data(relid, false);
     181           8 :         MemoryContextSwitchTo(oldcontext);
     182             :     }
     183             : 
     184          26 :     funcctx = SRF_PERCALL_SETUP();
     185          26 :     info = (vbits *) funcctx->user_fctx;
     186             : 
     187          26 :     if (info->next < info->count)
     188             :     {
     189             :         Datum       values[3];
     190          18 :         bool        nulls[3] = {0};
     191             :         HeapTuple   tuple;
     192             : 
     193          18 :         values[0] = Int64GetDatum(info->next);
     194          18 :         values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
     195          18 :         values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
     196          18 :         info->next++;
     197             : 
     198          18 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     199          18 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     200             :     }
     201             : 
     202           8 :     SRF_RETURN_DONE(funcctx);
     203             : }
     204             : 
     205             : /*
     206             :  * Visibility map information for every block in a relation, plus the page
     207             :  * level information for each block.
     208             :  */
     209             : Datum
     210          18 : pg_visibility_rel(PG_FUNCTION_ARGS)
     211             : {
     212             :     FuncCallContext *funcctx;
     213             :     vbits      *info;
     214             : 
     215          18 :     if (SRF_IS_FIRSTCALL())
     216             :     {
     217          12 :         Oid         relid = PG_GETARG_OID(0);
     218             :         MemoryContext oldcontext;
     219             : 
     220          12 :         funcctx = SRF_FIRSTCALL_INIT();
     221          12 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     222          12 :         funcctx->tuple_desc = pg_visibility_tupdesc(true, true);
     223             :         /* collect_visibility_data will verify the relkind */
     224          12 :         funcctx->user_fctx = collect_visibility_data(relid, true);
     225          12 :         MemoryContextSwitchTo(oldcontext);
     226             :     }
     227             : 
     228          18 :     funcctx = SRF_PERCALL_SETUP();
     229          18 :     info = (vbits *) funcctx->user_fctx;
     230             : 
     231          18 :     if (info->next < info->count)
     232             :     {
     233             :         Datum       values[4];
     234           6 :         bool        nulls[4] = {0};
     235             :         HeapTuple   tuple;
     236             : 
     237           6 :         values[0] = Int64GetDatum(info->next);
     238           6 :         values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
     239           6 :         values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
     240           6 :         values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
     241           6 :         info->next++;
     242             : 
     243           6 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     244           6 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     245             :     }
     246             : 
     247          12 :     SRF_RETURN_DONE(funcctx);
     248             : }
     249             : 
     250             : /*
     251             :  * Count the number of all-visible and all-frozen pages in the visibility
     252             :  * map for a particular relation.
     253             :  */
     254             : Datum
     255          12 : pg_visibility_map_summary(PG_FUNCTION_ARGS)
     256             : {
     257          12 :     Oid         relid = PG_GETARG_OID(0);
     258             :     Relation    rel;
     259             :     BlockNumber nblocks;
     260             :     BlockNumber blkno;
     261          12 :     Buffer      vmbuffer = InvalidBuffer;
     262          12 :     int64       all_visible = 0;
     263          12 :     int64       all_frozen = 0;
     264             :     TupleDesc   tupdesc;
     265             :     Datum       values[2];
     266          12 :     bool        nulls[2] = {0};
     267             : 
     268          12 :     rel = relation_open(relid, AccessShareLock);
     269             : 
     270             :     /* Only some relkinds have a visibility map */
     271          12 :     check_relation_relkind(rel);
     272             : 
     273           2 :     nblocks = RelationGetNumberOfBlocks(rel);
     274             : 
     275           4 :     for (blkno = 0; blkno < nblocks; ++blkno)
     276             :     {
     277             :         int32       mapbits;
     278             : 
     279             :         /* Make sure we are interruptible. */
     280           2 :         CHECK_FOR_INTERRUPTS();
     281             : 
     282             :         /* Get map info. */
     283           2 :         mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
     284           2 :         if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
     285           2 :             ++all_visible;
     286           2 :         if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
     287           0 :             ++all_frozen;
     288             :     }
     289             : 
     290             :     /* Clean up. */
     291           2 :     if (vmbuffer != InvalidBuffer)
     292           2 :         ReleaseBuffer(vmbuffer);
     293           2 :     relation_close(rel, AccessShareLock);
     294             : 
     295           2 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
     296           0 :         elog(ERROR, "return type must be a row type");
     297             : 
     298           2 :     values[0] = Int64GetDatum(all_visible);
     299           2 :     values[1] = Int64GetDatum(all_frozen);
     300             : 
     301           2 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
     302             : }
     303             : 
     304             : /*
     305             :  * Return the TIDs of non-frozen tuples present in pages marked all-frozen
     306             :  * in the visibility map.  We hope no one will ever find any, but there could
     307             :  * be bugs, database corruption, etc.
     308             :  */
     309             : Datum
     310          18 : pg_check_frozen(PG_FUNCTION_ARGS)
     311             : {
     312             :     FuncCallContext *funcctx;
     313             :     corrupt_items *items;
     314             : 
     315          18 :     if (SRF_IS_FIRSTCALL())
     316             :     {
     317          18 :         Oid         relid = PG_GETARG_OID(0);
     318             :         MemoryContext oldcontext;
     319             : 
     320          18 :         funcctx = SRF_FIRSTCALL_INIT();
     321          18 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     322             :         /* collect_corrupt_items will verify the relkind */
     323          18 :         funcctx->user_fctx = collect_corrupt_items(relid, false, true);
     324           8 :         MemoryContextSwitchTo(oldcontext);
     325             :     }
     326             : 
     327           8 :     funcctx = SRF_PERCALL_SETUP();
     328           8 :     items = (corrupt_items *) funcctx->user_fctx;
     329             : 
     330           8 :     if (items->next < items->count)
     331           0 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
     332             : 
     333           8 :     SRF_RETURN_DONE(funcctx);
     334             : }
     335             : 
     336             : /*
     337             :  * Return the TIDs of not-all-visible tuples in pages marked all-visible
     338             :  * in the visibility map.  We hope no one will ever find any, but there could
     339             :  * be bugs, database corruption, etc.
     340             :  */
     341             : Datum
     342           2 : pg_check_visible(PG_FUNCTION_ARGS)
     343             : {
     344             :     FuncCallContext *funcctx;
     345             :     corrupt_items *items;
     346             : 
     347           2 :     if (SRF_IS_FIRSTCALL())
     348             :     {
     349           2 :         Oid         relid = PG_GETARG_OID(0);
     350             :         MemoryContext oldcontext;
     351             : 
     352           2 :         funcctx = SRF_FIRSTCALL_INIT();
     353           2 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     354             :         /* collect_corrupt_items will verify the relkind */
     355           2 :         funcctx->user_fctx = collect_corrupt_items(relid, true, false);
     356           2 :         MemoryContextSwitchTo(oldcontext);
     357             :     }
     358             : 
     359           2 :     funcctx = SRF_PERCALL_SETUP();
     360           2 :     items = (corrupt_items *) funcctx->user_fctx;
     361             : 
     362           2 :     if (items->next < items->count)
     363           0 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
     364             : 
     365           2 :     SRF_RETURN_DONE(funcctx);
     366             : }
     367             : 
     368             : /*
     369             :  * Remove the visibility map fork for a relation.  If there turn out to be
     370             :  * any bugs in the visibility map code that require rebuilding the VM, this
     371             :  * provides users with a way to do it that is cleaner than shutting down the
     372             :  * server and removing files by hand.
     373             :  *
     374             :  * This is a cut-down version of RelationTruncate.
     375             :  */
     376             : Datum
     377          12 : pg_truncate_visibility_map(PG_FUNCTION_ARGS)
     378             : {
     379          12 :     Oid         relid = PG_GETARG_OID(0);
     380             :     Relation    rel;
     381             :     ForkNumber  fork;
     382             :     BlockNumber block;
     383             : 
     384          12 :     rel = relation_open(relid, AccessExclusiveLock);
     385             : 
     386             :     /* Only some relkinds have a visibility map */
     387          12 :     check_relation_relkind(rel);
     388             : 
     389             :     /* Forcibly reset cached file size */
     390           2 :     RelationGetSmgr(rel)->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = InvalidBlockNumber;
     391             : 
     392           2 :     block = visibilitymap_prepare_truncate(rel, 0);
     393           2 :     if (BlockNumberIsValid(block))
     394             :     {
     395           2 :         fork = VISIBILITYMAP_FORKNUM;
     396           2 :         smgrtruncate(RelationGetSmgr(rel), &fork, 1, &block);
     397             :     }
     398             : 
     399           2 :     if (RelationNeedsWAL(rel))
     400             :     {
     401             :         xl_smgr_truncate xlrec;
     402             : 
     403           2 :         xlrec.blkno = 0;
     404           2 :         xlrec.rlocator = rel->rd_locator;
     405           2 :         xlrec.flags = SMGR_TRUNCATE_VM;
     406             : 
     407           2 :         XLogBeginInsert();
     408           2 :         XLogRegisterData((char *) &xlrec, sizeof(xlrec));
     409             : 
     410           2 :         XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
     411             :     }
     412             : 
     413             :     /*
     414             :      * Release the lock right away, not at commit time.
     415             :      *
     416             :      * It would be a problem to release the lock prior to commit if this
     417             :      * truncate operation sends any transactional invalidation messages. Other
     418             :      * backends would potentially be able to lock the relation without
     419             :      * processing them in the window of time between when we release the lock
     420             :      * here and when we sent the messages at our eventual commit.  However,
     421             :      * we're currently only sending a non-transactional smgr invalidation,
     422             :      * which will have been posted to shared memory immediately from within
     423             :      * smgr_truncate.  Therefore, there should be no race here.
     424             :      *
     425             :      * The reason why it's desirable to release the lock early here is because
     426             :      * of the possibility that someone will need to use this to blow away many
     427             :      * visibility map forks at once.  If we can't release the lock until
     428             :      * commit time, the transaction doing this will accumulate
     429             :      * AccessExclusiveLocks on all of those relations at the same time, which
     430             :      * is undesirable. However, if this turns out to be unsafe we may have no
     431             :      * choice...
     432             :      */
     433           2 :     relation_close(rel, AccessExclusiveLock);
     434             : 
     435             :     /* Nothing to return. */
     436           2 :     PG_RETURN_VOID();
     437             : }
     438             : 
     439             : /*
     440             :  * Helper function to construct whichever TupleDesc we need for a particular
     441             :  * call.
     442             :  */
     443             : static TupleDesc
     444          36 : pg_visibility_tupdesc(bool include_blkno, bool include_pd)
     445             : {
     446             :     TupleDesc   tupdesc;
     447          36 :     AttrNumber  maxattr = 2;
     448          36 :     AttrNumber  a = 0;
     449             : 
     450          36 :     if (include_blkno)
     451          34 :         ++maxattr;
     452          36 :     if (include_pd)
     453          14 :         ++maxattr;
     454          36 :     tupdesc = CreateTemplateTupleDesc(maxattr);
     455          36 :     if (include_blkno)
     456          34 :         TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
     457          36 :     TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0);
     458          36 :     TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0);
     459          36 :     if (include_pd)
     460          14 :         TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
     461             :     Assert(a == maxattr);
     462             : 
     463          36 :     return BlessTupleDesc(tupdesc);
     464             : }
     465             : 
     466             : /*
     467             :  * Collect visibility data about a relation.
     468             :  *
     469             :  * Checks relkind of relid and will throw an error if the relation does not
     470             :  * have a VM.
     471             :  */
     472             : static vbits *
     473          34 : collect_visibility_data(Oid relid, bool include_pd)
     474             : {
     475             :     Relation    rel;
     476             :     BlockNumber nblocks;
     477             :     vbits      *info;
     478             :     BlockNumber blkno;
     479          34 :     Buffer      vmbuffer = InvalidBuffer;
     480          34 :     BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
     481             : 
     482          34 :     rel = relation_open(relid, AccessShareLock);
     483             : 
     484             :     /* Only some relkinds have a visibility map */
     485          30 :     check_relation_relkind(rel);
     486             : 
     487          20 :     nblocks = RelationGetNumberOfBlocks(rel);
     488          20 :     info = palloc0(offsetof(vbits, bits) + nblocks);
     489          20 :     info->next = 0;
     490          20 :     info->count = nblocks;
     491             : 
     492          44 :     for (blkno = 0; blkno < nblocks; ++blkno)
     493             :     {
     494             :         int32       mapbits;
     495             : 
     496             :         /* Make sure we are interruptible. */
     497          24 :         CHECK_FOR_INTERRUPTS();
     498             : 
     499             :         /* Get map info. */
     500          24 :         mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
     501          24 :         if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
     502          16 :             info->bits[blkno] |= (1 << 0);
     503          24 :         if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
     504          10 :             info->bits[blkno] |= (1 << 1);
     505             : 
     506             :         /*
     507             :          * Page-level data requires reading every block, so only get it if the
     508             :          * caller needs it.  Use a buffer access strategy, too, to prevent
     509             :          * cache-trashing.
     510             :          */
     511          24 :         if (include_pd)
     512             :         {
     513             :             Buffer      buffer;
     514             :             Page        page;
     515             : 
     516           6 :             buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
     517             :                                         bstrategy);
     518           6 :             LockBuffer(buffer, BUFFER_LOCK_SHARE);
     519             : 
     520           6 :             page = BufferGetPage(buffer);
     521           6 :             if (PageIsAllVisible(page))
     522           4 :                 info->bits[blkno] |= (1 << 2);
     523             : 
     524           6 :             UnlockReleaseBuffer(buffer);
     525             :         }
     526             :     }
     527             : 
     528             :     /* Clean up. */
     529          20 :     if (vmbuffer != InvalidBuffer)
     530          14 :         ReleaseBuffer(vmbuffer);
     531          20 :     relation_close(rel, AccessShareLock);
     532             : 
     533          20 :     return info;
     534             : }
     535             : 
     536             : /*
     537             :  * The "strict" version of GetOldestNonRemovableTransactionId().  The
     538             :  * pg_visibility check can tolerate false positives (don't report some of the
     539             :  * errors), but can't tolerate false negatives (report false errors). Normally,
     540             :  * horizons move forwards, but there are cases when it could move backward
     541             :  * (see comment for ComputeXidHorizons()).
     542             :  *
     543             :  * This is why we have to implement our own function for xid horizon, which
     544             :  * would be guaranteed to be newer or equal to any xid horizon computed before.
     545             :  * We have to do the following to achieve this.
     546             :  *
     547             :  * 1. Ignore processes xmin's, because they consider connection to other
     548             :  *    databases that were ignored before.
     549             :  * 2. Ignore KnownAssignedXids, because they are not database-aware. At the
     550             :  *    same time, the primary could compute its horizons database-aware.
     551             :  * 3. Ignore walsender xmin, because it could go backward if some replication
     552             :  *    connections don't use replication slots.
     553             :  *
     554             :  * As a result, we're using only currently running xids to compute the horizon.
     555             :  * Surely these would significantly sacrifice accuracy.  But we have to do so
     556             :  * to avoid reporting false errors.
     557             :  */
     558             : static TransactionId
     559           2 : GetStrictOldestNonRemovableTransactionId(Relation rel)
     560             : {
     561             :     RunningTransactions runningTransactions;
     562             : 
     563           2 :     if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress())
     564             :     {
     565             :         /* Shared relation: take into account all running xids */
     566           0 :         runningTransactions = GetRunningTransactionData();
     567           0 :         LWLockRelease(ProcArrayLock);
     568           0 :         LWLockRelease(XidGenLock);
     569           0 :         return runningTransactions->oldestRunningXid;
     570             :     }
     571           2 :     else if (!RELATION_IS_LOCAL(rel))
     572             :     {
     573             :         /*
     574             :          * Normal relation: take into account xids running within the current
     575             :          * database
     576             :          */
     577           2 :         runningTransactions = GetRunningTransactionData();
     578           2 :         LWLockRelease(ProcArrayLock);
     579           2 :         LWLockRelease(XidGenLock);
     580           2 :         return runningTransactions->oldestDatabaseRunningXid;
     581             :     }
     582             :     else
     583             :     {
     584             :         /*
     585             :          * For temporary relations, ComputeXidHorizons() uses only
     586             :          * TransamVariables->latestCompletedXid and MyProc->xid.  These two
     587             :          * shouldn't go backwards.  So we're fine with this horizon.
     588             :          */
     589           0 :         return GetOldestNonRemovableTransactionId(rel);
     590             :     }
     591             : }
     592             : 
     593             : /*
     594             :  * Returns a list of items whose visibility map information does not match
     595             :  * the status of the tuples on the page.
     596             :  *
     597             :  * If all_visible is passed as true, this will include all items which are
     598             :  * on pages marked as all-visible in the visibility map but which do not
     599             :  * seem to in fact be all-visible.
     600             :  *
     601             :  * If all_frozen is passed as true, this will include all items which are
     602             :  * on pages marked as all-frozen but which do not seem to in fact be frozen.
     603             :  *
     604             :  * Checks relkind of relid and will throw an error if the relation does not
     605             :  * have a VM.
     606             :  */
     607             : static corrupt_items *
     608          20 : collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
     609             : {
     610             :     Relation    rel;
     611             :     BlockNumber nblocks;
     612             :     corrupt_items *items;
     613             :     BlockNumber blkno;
     614          20 :     Buffer      vmbuffer = InvalidBuffer;
     615          20 :     BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
     616          20 :     TransactionId OldestXmin = InvalidTransactionId;
     617             : 
     618          20 :     rel = relation_open(relid, AccessShareLock);
     619             : 
     620             :     /* Only some relkinds have a visibility map */
     621          20 :     check_relation_relkind(rel);
     622             : 
     623          10 :     if (all_visible)
     624           2 :         OldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
     625             : 
     626          10 :     nblocks = RelationGetNumberOfBlocks(rel);
     627             : 
     628             :     /*
     629             :      * Guess an initial array size. We don't expect many corrupted tuples, so
     630             :      * start with a small array.  This function uses the "next" field to track
     631             :      * the next offset where we can store an item (which is the same thing as
     632             :      * the number of items found so far) and the "count" field to track the
     633             :      * number of entries allocated.  We'll repurpose these fields before
     634             :      * returning.
     635             :      */
     636          10 :     items = palloc0(sizeof(corrupt_items));
     637          10 :     items->next = 0;
     638          10 :     items->count = 64;
     639          10 :     items->tids = palloc(items->count * sizeof(ItemPointerData));
     640             : 
     641             :     /* Loop over every block in the relation. */
     642          30 :     for (blkno = 0; blkno < nblocks; ++blkno)
     643             :     {
     644          20 :         bool        check_frozen = false;
     645          20 :         bool        check_visible = false;
     646             :         Buffer      buffer;
     647             :         Page        page;
     648             :         OffsetNumber offnum,
     649             :                     maxoff;
     650             : 
     651             :         /* Make sure we are interruptible. */
     652          20 :         CHECK_FOR_INTERRUPTS();
     653             : 
     654             :         /* Use the visibility map to decide whether to check this page. */
     655          20 :         if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer))
     656          10 :             check_frozen = true;
     657          20 :         if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
     658           2 :             check_visible = true;
     659          20 :         if (!check_visible && !check_frozen)
     660           8 :             continue;
     661             : 
     662             :         /* Read and lock the page. */
     663          12 :         buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
     664             :                                     bstrategy);
     665          12 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
     666             : 
     667          12 :         page = BufferGetPage(buffer);
     668          12 :         maxoff = PageGetMaxOffsetNumber(page);
     669             : 
     670             :         /*
     671             :          * The visibility map bits might have changed while we were acquiring
     672             :          * the page lock.  Recheck to avoid returning spurious results.
     673             :          */
     674          12 :         if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
     675           0 :             check_frozen = false;
     676          12 :         if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
     677           0 :             check_visible = false;
     678          12 :         if (!check_visible && !check_frozen)
     679             :         {
     680           0 :             UnlockReleaseBuffer(buffer);
     681           0 :             continue;
     682             :         }
     683             : 
     684             :         /* Iterate over each tuple on the page. */
     685          44 :         for (offnum = FirstOffsetNumber;
     686             :              offnum <= maxoff;
     687          32 :              offnum = OffsetNumberNext(offnum))
     688             :         {
     689             :             HeapTupleData tuple;
     690             :             ItemId      itemid;
     691             : 
     692          32 :             itemid = PageGetItemId(page, offnum);
     693             : 
     694             :             /* Unused or redirect line pointers are of no interest. */
     695          32 :             if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
     696           0 :                 continue;
     697             : 
     698             :             /* Dead line pointers are neither all-visible nor frozen. */
     699          32 :             if (ItemIdIsDead(itemid))
     700             :             {
     701           0 :                 ItemPointerSet(&(tuple.t_self), blkno, offnum);
     702           0 :                 record_corrupt_item(items, &tuple.t_self);
     703           0 :                 continue;
     704             :             }
     705             : 
     706             :             /* Initialize a HeapTupleData structure for checks below. */
     707          32 :             ItemPointerSet(&(tuple.t_self), blkno, offnum);
     708          32 :             tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
     709          32 :             tuple.t_len = ItemIdGetLength(itemid);
     710          32 :             tuple.t_tableOid = relid;
     711             : 
     712             :             /*
     713             :              * If we're checking whether the page is all-visible, we expect
     714             :              * the tuple to be all-visible.
     715             :              */
     716          32 :             if (check_visible &&
     717           2 :                 !tuple_all_visible(&tuple, OldestXmin, buffer))
     718             :             {
     719             :                 TransactionId RecomputedOldestXmin;
     720             : 
     721             :                 /*
     722             :                  * Time has passed since we computed OldestXmin, so it's
     723             :                  * possible that this tuple is all-visible in reality even
     724             :                  * though it doesn't appear so based on our
     725             :                  * previously-computed value.  Let's compute a new value so we
     726             :                  * can be certain whether there is a problem.
     727             :                  *
     728             :                  * From a concurrency point of view, it sort of sucks to
     729             :                  * retake ProcArrayLock here while we're holding the buffer
     730             :                  * exclusively locked, but it should be safe against
     731             :                  * deadlocks, because surely
     732             :                  * GetStrictOldestNonRemovableTransactionId() should never
     733             :                  * take a buffer lock. And this shouldn't happen often, so
     734             :                  * it's worth being careful so as to avoid false positives.
     735             :                  */
     736           0 :                 RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
     737             : 
     738           0 :                 if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
     739           0 :                     record_corrupt_item(items, &tuple.t_self);
     740             :                 else
     741             :                 {
     742           0 :                     OldestXmin = RecomputedOldestXmin;
     743           0 :                     if (!tuple_all_visible(&tuple, OldestXmin, buffer))
     744           0 :                         record_corrupt_item(items, &tuple.t_self);
     745             :                 }
     746             :             }
     747             : 
     748             :             /*
     749             :              * If we're checking whether the page is all-frozen, we expect the
     750             :              * tuple to be in a state where it will never need freezing.
     751             :              */
     752          32 :             if (check_frozen)
     753             :             {
     754          30 :                 if (heap_tuple_needs_eventual_freeze(tuple.t_data))
     755           0 :                     record_corrupt_item(items, &tuple.t_self);
     756             :             }
     757             :         }
     758             : 
     759          12 :         UnlockReleaseBuffer(buffer);
     760             :     }
     761             : 
     762             :     /* Clean up. */
     763          10 :     if (vmbuffer != InvalidBuffer)
     764          10 :         ReleaseBuffer(vmbuffer);
     765          10 :     relation_close(rel, AccessShareLock);
     766             : 
     767             :     /*
     768             :      * Before returning, repurpose the fields to match caller's expectations.
     769             :      * next is now the next item that should be read (rather than written) and
     770             :      * count is now the number of items we wrote (rather than the number we
     771             :      * allocated).
     772             :      */
     773          10 :     items->count = items->next;
     774          10 :     items->next = 0;
     775             : 
     776          10 :     return items;
     777             : }
     778             : 
     779             : /*
     780             :  * Remember one corrupt item.
     781             :  */
     782             : static void
     783           0 : record_corrupt_item(corrupt_items *items, ItemPointer tid)
     784             : {
     785             :     /* enlarge output array if needed. */
     786           0 :     if (items->next >= items->count)
     787             :     {
     788           0 :         items->count *= 2;
     789           0 :         items->tids = repalloc(items->tids,
     790           0 :                                items->count * sizeof(ItemPointerData));
     791             :     }
     792             :     /* and add the new item */
     793           0 :     items->tids[items->next++] = *tid;
     794           0 : }
     795             : 
     796             : /*
     797             :  * Check whether a tuple is all-visible relative to a given OldestXmin value.
     798             :  * The buffer should contain the tuple and should be locked and pinned.
     799             :  */
     800             : static bool
     801           2 : tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
     802             : {
     803             :     HTSV_Result state;
     804             :     TransactionId xmin;
     805             : 
     806           2 :     state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
     807           2 :     if (state != HEAPTUPLE_LIVE)
     808           0 :         return false;           /* all-visible implies live */
     809             : 
     810             :     /*
     811             :      * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
     812             :      * all-visible unless every tuple is hinted committed. However, those hint
     813             :      * bits could be lost after a crash, so we can't be certain that they'll
     814             :      * be set here.  So just check the xmin.
     815             :      */
     816             : 
     817           2 :     xmin = HeapTupleHeaderGetXmin(tup->t_data);
     818           2 :     if (!TransactionIdPrecedes(xmin, OldestXmin))
     819           0 :         return false;           /* xmin not old enough for all to see */
     820             : 
     821           2 :     return true;
     822             : }
     823             : 
     824             : /*
     825             :  * check_relation_relkind - convenience routine to check that relation
     826             :  * is of the relkind supported by the callers
     827             :  */
     828             : static void
     829          86 : check_relation_relkind(Relation rel)
     830             : {
     831          86 :     if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
     832          50 :         ereport(ERROR,
     833             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     834             :                  errmsg("relation \"%s\" is of wrong relation kind",
     835             :                         RelationGetRelationName(rel)),
     836             :                  errdetail_relkind_not_supported(rel->rd_rel->relkind)));
     837          36 : }

Generated by: LCOV version 1.14