LCOV - code coverage report
Current view: top level - contrib/pg_visibility - pg_visibility.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 281 322 87.3 %
Date: 2024-11-21 09:14:53 Functions: 24 25 96.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pg_visibility.c
       4             :  *    display visibility map information and page-level visibility bits
       5             :  *
       6             :  * Copyright (c) 2016-2024, PostgreSQL Global Development Group
       7             :  *
       8             :  *    contrib/pg_visibility/pg_visibility.c
       9             :  *-------------------------------------------------------------------------
      10             :  */
      11             : #include "postgres.h"
      12             : 
      13             : #include "access/heapam.h"
      14             : #include "access/htup_details.h"
      15             : #include "access/visibilitymap.h"
      16             : #include "access/xloginsert.h"
      17             : #include "catalog/pg_type.h"
      18             : #include "catalog/storage_xlog.h"
      19             : #include "funcapi.h"
      20             : #include "miscadmin.h"
      21             : #include "storage/bufmgr.h"
      22             : #include "storage/procarray.h"
      23             : #include "storage/read_stream.h"
      24             : #include "storage/smgr.h"
      25             : #include "utils/rel.h"
      26             : 
      27          14 : PG_MODULE_MAGIC;
      28             : 
      29             : typedef struct vbits
      30             : {
      31             :     BlockNumber next;
      32             :     BlockNumber count;
      33             :     uint8       bits[FLEXIBLE_ARRAY_MEMBER];
      34             : } vbits;
      35             : 
      36             : typedef struct corrupt_items
      37             : {
      38             :     BlockNumber next;
      39             :     BlockNumber count;
      40             :     ItemPointer tids;
      41             : } corrupt_items;
      42             : 
      43             : /* for collect_corrupt_items_read_stream_next_block */
      44             : struct collect_corrupt_items_read_stream_private
      45             : {
      46             :     bool        all_frozen;
      47             :     bool        all_visible;
      48             :     BlockNumber current_blocknum;
      49             :     BlockNumber last_exclusive;
      50             :     Relation    rel;
      51             :     Buffer      vmbuffer;
      52             : };
      53             : 
      54           6 : PG_FUNCTION_INFO_V1(pg_visibility_map);
      55           8 : PG_FUNCTION_INFO_V1(pg_visibility_map_rel);
      56           8 : PG_FUNCTION_INFO_V1(pg_visibility);
      57           8 : PG_FUNCTION_INFO_V1(pg_visibility_rel);
      58           8 : PG_FUNCTION_INFO_V1(pg_visibility_map_summary);
      59          10 : PG_FUNCTION_INFO_V1(pg_check_frozen);
      60          12 : PG_FUNCTION_INFO_V1(pg_check_visible);
      61           8 : PG_FUNCTION_INFO_V1(pg_truncate_visibility_map);
      62             : 
      63             : static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
      64             : static vbits *collect_visibility_data(Oid relid, bool include_pd);
      65             : static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
      66             :                                             bool all_frozen);
      67             : static void record_corrupt_item(corrupt_items *items, ItemPointer tid);
      68             : static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
      69             :                               Buffer buffer);
      70             : static void check_relation_relkind(Relation rel);
      71             : 
      72             : /*
      73             :  * Visibility map information for a single block of a relation.
      74             :  *
      75             :  * Note: the VM code will silently return zeroes for pages past the end
      76             :  * of the map, so we allow probes up to MaxBlockNumber regardless of the
      77             :  * actual relation size.
      78             :  */
      79             : Datum
      80           0 : pg_visibility_map(PG_FUNCTION_ARGS)
      81             : {
      82           0 :     Oid         relid = PG_GETARG_OID(0);
      83           0 :     int64       blkno = PG_GETARG_INT64(1);
      84             :     int32       mapbits;
      85             :     Relation    rel;
      86           0 :     Buffer      vmbuffer = InvalidBuffer;
      87             :     TupleDesc   tupdesc;
      88             :     Datum       values[2];
      89           0 :     bool        nulls[2] = {0};
      90             : 
      91           0 :     rel = relation_open(relid, AccessShareLock);
      92             : 
      93             :     /* Only some relkinds have a visibility map */
      94           0 :     check_relation_relkind(rel);
      95             : 
      96           0 :     if (blkno < 0 || blkno > MaxBlockNumber)
      97           0 :         ereport(ERROR,
      98             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
      99             :                  errmsg("invalid block number")));
     100             : 
     101           0 :     tupdesc = pg_visibility_tupdesc(false, false);
     102             : 
     103           0 :     mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
     104           0 :     if (vmbuffer != InvalidBuffer)
     105           0 :         ReleaseBuffer(vmbuffer);
     106           0 :     values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
     107           0 :     values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
     108             : 
     109           0 :     relation_close(rel, AccessShareLock);
     110             : 
     111           0 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
     112             : }
     113             : 
     114             : /*
     115             :  * Visibility map information for a single block of a relation, plus the
     116             :  * page-level information for the same block.
     117             :  */
     118             : Datum
     119          12 : pg_visibility(PG_FUNCTION_ARGS)
     120             : {
     121          12 :     Oid         relid = PG_GETARG_OID(0);
     122          12 :     int64       blkno = PG_GETARG_INT64(1);
     123             :     int32       mapbits;
     124             :     Relation    rel;
     125          12 :     Buffer      vmbuffer = InvalidBuffer;
     126             :     Buffer      buffer;
     127             :     Page        page;
     128             :     TupleDesc   tupdesc;
     129             :     Datum       values[3];
     130          12 :     bool        nulls[3] = {0};
     131             : 
     132          12 :     rel = relation_open(relid, AccessShareLock);
     133             : 
     134             :     /* Only some relkinds have a visibility map */
     135          12 :     check_relation_relkind(rel);
     136             : 
     137           2 :     if (blkno < 0 || blkno > MaxBlockNumber)
     138           0 :         ereport(ERROR,
     139             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     140             :                  errmsg("invalid block number")));
     141             : 
     142           2 :     tupdesc = pg_visibility_tupdesc(false, true);
     143             : 
     144           2 :     mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
     145           2 :     if (vmbuffer != InvalidBuffer)
     146           2 :         ReleaseBuffer(vmbuffer);
     147           2 :     values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
     148           2 :     values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
     149             : 
     150             :     /* Here we have to explicitly check rel size ... */
     151           2 :     if (blkno < RelationGetNumberOfBlocks(rel))
     152             :     {
     153           2 :         buffer = ReadBuffer(rel, blkno);
     154           2 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
     155             : 
     156           2 :         page = BufferGetPage(buffer);
     157           2 :         values[2] = BoolGetDatum(PageIsAllVisible(page));
     158             : 
     159           2 :         UnlockReleaseBuffer(buffer);
     160             :     }
     161             :     else
     162             :     {
     163             :         /* As with the vismap, silently return 0 for pages past EOF */
     164           0 :         values[2] = BoolGetDatum(false);
     165             :     }
     166             : 
     167           2 :     relation_close(rel, AccessShareLock);
     168             : 
     169           2 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
     170             : }
     171             : 
     172             : /*
     173             :  * Visibility map information for every block in a relation.
     174             :  */
     175             : Datum
     176          40 : pg_visibility_map_rel(PG_FUNCTION_ARGS)
     177             : {
     178             :     FuncCallContext *funcctx;
     179             :     vbits      *info;
     180             : 
     181          40 :     if (SRF_IS_FIRSTCALL())
     182             :     {
     183          22 :         Oid         relid = PG_GETARG_OID(0);
     184             :         MemoryContext oldcontext;
     185             : 
     186          22 :         funcctx = SRF_FIRSTCALL_INIT();
     187          22 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     188          22 :         funcctx->tuple_desc = pg_visibility_tupdesc(true, false);
     189             :         /* collect_visibility_data will verify the relkind */
     190          22 :         funcctx->user_fctx = collect_visibility_data(relid, false);
     191           8 :         MemoryContextSwitchTo(oldcontext);
     192             :     }
     193             : 
     194          26 :     funcctx = SRF_PERCALL_SETUP();
     195          26 :     info = (vbits *) funcctx->user_fctx;
     196             : 
     197          26 :     if (info->next < info->count)
     198             :     {
     199             :         Datum       values[3];
     200          18 :         bool        nulls[3] = {0};
     201             :         HeapTuple   tuple;
     202             : 
     203          18 :         values[0] = Int64GetDatum(info->next);
     204          18 :         values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
     205          18 :         values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
     206          18 :         info->next++;
     207             : 
     208          18 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     209          18 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     210             :     }
     211             : 
     212           8 :     SRF_RETURN_DONE(funcctx);
     213             : }
     214             : 
     215             : /*
     216             :  * Visibility map information for every block in a relation, plus the page
     217             :  * level information for each block.
     218             :  */
     219             : Datum
     220          18 : pg_visibility_rel(PG_FUNCTION_ARGS)
     221             : {
     222             :     FuncCallContext *funcctx;
     223             :     vbits      *info;
     224             : 
     225          18 :     if (SRF_IS_FIRSTCALL())
     226             :     {
     227          12 :         Oid         relid = PG_GETARG_OID(0);
     228             :         MemoryContext oldcontext;
     229             : 
     230          12 :         funcctx = SRF_FIRSTCALL_INIT();
     231          12 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     232          12 :         funcctx->tuple_desc = pg_visibility_tupdesc(true, true);
     233             :         /* collect_visibility_data will verify the relkind */
     234          12 :         funcctx->user_fctx = collect_visibility_data(relid, true);
     235          12 :         MemoryContextSwitchTo(oldcontext);
     236             :     }
     237             : 
     238          18 :     funcctx = SRF_PERCALL_SETUP();
     239          18 :     info = (vbits *) funcctx->user_fctx;
     240             : 
     241          18 :     if (info->next < info->count)
     242             :     {
     243             :         Datum       values[4];
     244           6 :         bool        nulls[4] = {0};
     245             :         HeapTuple   tuple;
     246             : 
     247           6 :         values[0] = Int64GetDatum(info->next);
     248           6 :         values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
     249           6 :         values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
     250           6 :         values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
     251           6 :         info->next++;
     252             : 
     253           6 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     254           6 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     255             :     }
     256             : 
     257          12 :     SRF_RETURN_DONE(funcctx);
     258             : }
     259             : 
     260             : /*
     261             :  * Count the number of all-visible and all-frozen pages in the visibility
     262             :  * map for a particular relation.
     263             :  */
     264             : Datum
     265          12 : pg_visibility_map_summary(PG_FUNCTION_ARGS)
     266             : {
     267          12 :     Oid         relid = PG_GETARG_OID(0);
     268             :     Relation    rel;
     269             :     BlockNumber nblocks;
     270             :     BlockNumber blkno;
     271          12 :     Buffer      vmbuffer = InvalidBuffer;
     272          12 :     int64       all_visible = 0;
     273          12 :     int64       all_frozen = 0;
     274             :     TupleDesc   tupdesc;
     275             :     Datum       values[2];
     276          12 :     bool        nulls[2] = {0};
     277             : 
     278          12 :     rel = relation_open(relid, AccessShareLock);
     279             : 
     280             :     /* Only some relkinds have a visibility map */
     281          12 :     check_relation_relkind(rel);
     282             : 
     283           2 :     nblocks = RelationGetNumberOfBlocks(rel);
     284             : 
     285           4 :     for (blkno = 0; blkno < nblocks; ++blkno)
     286             :     {
     287             :         int32       mapbits;
     288             : 
     289             :         /* Make sure we are interruptible. */
     290           2 :         CHECK_FOR_INTERRUPTS();
     291             : 
     292             :         /* Get map info. */
     293           2 :         mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
     294           2 :         if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
     295           2 :             ++all_visible;
     296           2 :         if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
     297           0 :             ++all_frozen;
     298             :     }
     299             : 
     300             :     /* Clean up. */
     301           2 :     if (vmbuffer != InvalidBuffer)
     302           2 :         ReleaseBuffer(vmbuffer);
     303           2 :     relation_close(rel, AccessShareLock);
     304             : 
     305           2 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
     306           0 :         elog(ERROR, "return type must be a row type");
     307             : 
     308           2 :     values[0] = Int64GetDatum(all_visible);
     309           2 :     values[1] = Int64GetDatum(all_frozen);
     310             : 
     311           2 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
     312             : }
     313             : 
     314             : /*
     315             :  * Return the TIDs of non-frozen tuples present in pages marked all-frozen
     316             :  * in the visibility map.  We hope no one will ever find any, but there could
     317             :  * be bugs, database corruption, etc.
     318             :  */
     319             : Datum
     320          30 : pg_check_frozen(PG_FUNCTION_ARGS)
     321             : {
     322             :     FuncCallContext *funcctx;
     323             :     corrupt_items *items;
     324             : 
     325          30 :     if (SRF_IS_FIRSTCALL())
     326             :     {
     327          20 :         Oid         relid = PG_GETARG_OID(0);
     328             :         MemoryContext oldcontext;
     329             : 
     330          20 :         funcctx = SRF_FIRSTCALL_INIT();
     331          20 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     332             :         /* collect_corrupt_items will verify the relkind */
     333          20 :         funcctx->user_fctx = collect_corrupt_items(relid, false, true);
     334          10 :         MemoryContextSwitchTo(oldcontext);
     335             :     }
     336             : 
     337          20 :     funcctx = SRF_PERCALL_SETUP();
     338          20 :     items = (corrupt_items *) funcctx->user_fctx;
     339             : 
     340          20 :     if (items->next < items->count)
     341          10 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
     342             : 
     343          10 :     SRF_RETURN_DONE(funcctx);
     344             : }
     345             : 
     346             : /*
     347             :  * Return the TIDs of not-all-visible tuples in pages marked all-visible
     348             :  * in the visibility map.  We hope no one will ever find any, but there could
     349             :  * be bugs, database corruption, etc.
     350             :  */
     351             : Datum
     352          16 : pg_check_visible(PG_FUNCTION_ARGS)
     353             : {
     354             :     FuncCallContext *funcctx;
     355             :     corrupt_items *items;
     356             : 
     357          16 :     if (SRF_IS_FIRSTCALL())
     358             :     {
     359           6 :         Oid         relid = PG_GETARG_OID(0);
     360             :         MemoryContext oldcontext;
     361             : 
     362           6 :         funcctx = SRF_FIRSTCALL_INIT();
     363           6 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     364             :         /* collect_corrupt_items will verify the relkind */
     365           6 :         funcctx->user_fctx = collect_corrupt_items(relid, true, false);
     366           6 :         MemoryContextSwitchTo(oldcontext);
     367             :     }
     368             : 
     369          16 :     funcctx = SRF_PERCALL_SETUP();
     370          16 :     items = (corrupt_items *) funcctx->user_fctx;
     371             : 
     372          16 :     if (items->next < items->count)
     373          10 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
     374             : 
     375           6 :     SRF_RETURN_DONE(funcctx);
     376             : }
     377             : 
     378             : /*
     379             :  * Remove the visibility map fork for a relation.  If there turn out to be
     380             :  * any bugs in the visibility map code that require rebuilding the VM, this
     381             :  * provides users with a way to do it that is cleaner than shutting down the
     382             :  * server and removing files by hand.
     383             :  *
     384             :  * This is a cut-down version of RelationTruncate.
     385             :  */
     386             : Datum
     387          12 : pg_truncate_visibility_map(PG_FUNCTION_ARGS)
     388             : {
     389          12 :     Oid         relid = PG_GETARG_OID(0);
     390             :     Relation    rel;
     391             :     ForkNumber  fork;
     392             :     BlockNumber block;
     393             : 
     394          12 :     rel = relation_open(relid, AccessExclusiveLock);
     395             : 
     396             :     /* Only some relkinds have a visibility map */
     397          12 :     check_relation_relkind(rel);
     398             : 
     399             :     /* Forcibly reset cached file size */
     400           2 :     RelationGetSmgr(rel)->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = InvalidBlockNumber;
     401             : 
     402           2 :     block = visibilitymap_prepare_truncate(rel, 0);
     403           2 :     if (BlockNumberIsValid(block))
     404             :     {
     405           2 :         fork = VISIBILITYMAP_FORKNUM;
     406           2 :         smgrtruncate(RelationGetSmgr(rel), &fork, 1, &block);
     407             :     }
     408             : 
     409           2 :     if (RelationNeedsWAL(rel))
     410             :     {
     411             :         xl_smgr_truncate xlrec;
     412             : 
     413           2 :         xlrec.blkno = 0;
     414           2 :         xlrec.rlocator = rel->rd_locator;
     415           2 :         xlrec.flags = SMGR_TRUNCATE_VM;
     416             : 
     417           2 :         XLogBeginInsert();
     418           2 :         XLogRegisterData((char *) &xlrec, sizeof(xlrec));
     419             : 
     420           2 :         XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
     421             :     }
     422             : 
     423             :     /*
     424             :      * Release the lock right away, not at commit time.
     425             :      *
     426             :      * It would be a problem to release the lock prior to commit if this
     427             :      * truncate operation sends any transactional invalidation messages. Other
     428             :      * backends would potentially be able to lock the relation without
     429             :      * processing them in the window of time between when we release the lock
     430             :      * here and when we sent the messages at our eventual commit.  However,
     431             :      * we're currently only sending a non-transactional smgr invalidation,
     432             :      * which will have been posted to shared memory immediately from within
     433             :      * smgr_truncate.  Therefore, there should be no race here.
     434             :      *
     435             :      * The reason why it's desirable to release the lock early here is because
     436             :      * of the possibility that someone will need to use this to blow away many
     437             :      * visibility map forks at once.  If we can't release the lock until
     438             :      * commit time, the transaction doing this will accumulate
     439             :      * AccessExclusiveLocks on all of those relations at the same time, which
     440             :      * is undesirable. However, if this turns out to be unsafe we may have no
     441             :      * choice...
     442             :      */
     443           2 :     relation_close(rel, AccessExclusiveLock);
     444             : 
     445             :     /* Nothing to return. */
     446           2 :     PG_RETURN_VOID();
     447             : }
     448             : 
     449             : /*
     450             :  * Helper function to construct whichever TupleDesc we need for a particular
     451             :  * call.
     452             :  */
     453             : static TupleDesc
     454          36 : pg_visibility_tupdesc(bool include_blkno, bool include_pd)
     455             : {
     456             :     TupleDesc   tupdesc;
     457          36 :     AttrNumber  maxattr = 2;
     458          36 :     AttrNumber  a = 0;
     459             : 
     460          36 :     if (include_blkno)
     461          34 :         ++maxattr;
     462          36 :     if (include_pd)
     463          14 :         ++maxattr;
     464          36 :     tupdesc = CreateTemplateTupleDesc(maxattr);
     465          36 :     if (include_blkno)
     466          34 :         TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
     467          36 :     TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0);
     468          36 :     TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0);
     469          36 :     if (include_pd)
     470          14 :         TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
     471             :     Assert(a == maxattr);
     472             : 
     473          36 :     return BlessTupleDesc(tupdesc);
     474             : }
     475             : 
     476             : /*
     477             :  * Collect visibility data about a relation.
     478             :  *
     479             :  * Checks relkind of relid and will throw an error if the relation does not
     480             :  * have a VM.
     481             :  */
     482             : static vbits *
     483          34 : collect_visibility_data(Oid relid, bool include_pd)
     484             : {
     485             :     Relation    rel;
     486             :     BlockNumber nblocks;
     487             :     vbits      *info;
     488             :     BlockNumber blkno;
     489          34 :     Buffer      vmbuffer = InvalidBuffer;
     490          34 :     BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
     491             :     BlockRangeReadStreamPrivate p;
     492          34 :     ReadStream *stream = NULL;
     493             : 
     494          34 :     rel = relation_open(relid, AccessShareLock);
     495             : 
     496             :     /* Only some relkinds have a visibility map */
     497          30 :     check_relation_relkind(rel);
     498             : 
     499          20 :     nblocks = RelationGetNumberOfBlocks(rel);
     500          20 :     info = palloc0(offsetof(vbits, bits) + nblocks);
     501          20 :     info->next = 0;
     502          20 :     info->count = nblocks;
     503             : 
     504             :     /* Create a stream if reading main fork. */
     505          20 :     if (include_pd)
     506             :     {
     507          12 :         p.current_blocknum = 0;
     508          12 :         p.last_exclusive = nblocks;
     509          12 :         stream = read_stream_begin_relation(READ_STREAM_FULL,
     510             :                                             bstrategy,
     511             :                                             rel,
     512             :                                             MAIN_FORKNUM,
     513             :                                             block_range_read_stream_cb,
     514             :                                             &p,
     515             :                                             0);
     516             :     }
     517             : 
     518          44 :     for (blkno = 0; blkno < nblocks; ++blkno)
     519             :     {
     520             :         int32       mapbits;
     521             : 
     522             :         /* Make sure we are interruptible. */
     523          24 :         CHECK_FOR_INTERRUPTS();
     524             : 
     525             :         /* Get map info. */
     526          24 :         mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
     527          24 :         if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
     528          16 :             info->bits[blkno] |= (1 << 0);
     529          24 :         if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
     530          10 :             info->bits[blkno] |= (1 << 1);
     531             : 
     532             :         /*
     533             :          * Page-level data requires reading every block, so only get it if the
     534             :          * caller needs it.  Use a buffer access strategy, too, to prevent
     535             :          * cache-trashing.
     536             :          */
     537          24 :         if (include_pd)
     538             :         {
     539             :             Buffer      buffer;
     540             :             Page        page;
     541             : 
     542           6 :             buffer = read_stream_next_buffer(stream, NULL);
     543           6 :             LockBuffer(buffer, BUFFER_LOCK_SHARE);
     544             : 
     545           6 :             page = BufferGetPage(buffer);
     546           6 :             if (PageIsAllVisible(page))
     547           4 :                 info->bits[blkno] |= (1 << 2);
     548             : 
     549           6 :             UnlockReleaseBuffer(buffer);
     550             :         }
     551             :     }
     552             : 
     553          20 :     if (include_pd)
     554             :     {
     555             :         Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
     556          12 :         read_stream_end(stream);
     557             :     }
     558             : 
     559             :     /* Clean up. */
     560          20 :     if (vmbuffer != InvalidBuffer)
     561          14 :         ReleaseBuffer(vmbuffer);
     562          20 :     relation_close(rel, AccessShareLock);
     563             : 
     564          20 :     return info;
     565             : }
     566             : 
     567             : /*
     568             :  * The "strict" version of GetOldestNonRemovableTransactionId().  The
     569             :  * pg_visibility check can tolerate false positives (don't report some of the
     570             :  * errors), but can't tolerate false negatives (report false errors). Normally,
     571             :  * horizons move forwards, but there are cases when it could move backward
     572             :  * (see comment for ComputeXidHorizons()).
     573             :  *
     574             :  * This is why we have to implement our own function for xid horizon, which
     575             :  * would be guaranteed to be newer or equal to any xid horizon computed before.
     576             :  * We have to do the following to achieve this.
     577             :  *
     578             :  * 1. Ignore processes xmin's, because they consider connection to other
     579             :  *    databases that were ignored before.
     580             :  * 2. Ignore KnownAssignedXids, as they are not database-aware. Although we
     581             :  *    now perform minimal checking on a standby by always using nextXid, this
     582             :  *    approach is better than nothing and will at least catch extremely broken
     583             :  *    cases where a xid is in the future.
     584             :  * 3. Ignore walsender xmin, because it could go backward if some replication
     585             :  *    connections don't use replication slots.
     586             :  *
     587             :  * While it might seem like we could use KnownAssignedXids for shared
     588             :  * catalogs, since shared catalogs rely on a global horizon rather than a
     589             :  * database-specific one - there are potential edge cases.  For example, a
     590             :  * transaction may crash on the primary without writing a commit/abort record.
     591             :  * This would lead to a situation where it appears to still be running on the
     592             :  * standby, even though it has already ended on the primary.  For this reason,
     593             :  * it's safer to ignore KnownAssignedXids, even for shared catalogs.
     594             :  *
     595             :  * As a result, we're using only currently running xids to compute the horizon.
     596             :  * Surely these would significantly sacrifice accuracy.  But we have to do so
     597             :  * to avoid reporting false errors.
     598             :  */
     599             : static TransactionId
     600          16 : GetStrictOldestNonRemovableTransactionId(Relation rel)
     601             : {
     602             :     RunningTransactions runningTransactions;
     603             : 
     604          16 :     if (RecoveryInProgress())
     605             :     {
     606             :         TransactionId result;
     607             : 
     608             :         /* As we ignore KnownAssignedXids on standby, just pick nextXid */
     609           2 :         LWLockAcquire(XidGenLock, LW_SHARED);
     610           2 :         result = XidFromFullTransactionId(TransamVariables->nextXid);
     611           2 :         LWLockRelease(XidGenLock);
     612           2 :         return result;
     613             :     }
     614          14 :     else if (rel == NULL || rel->rd_rel->relisshared)
     615             :     {
     616             :         /* Shared relation: take into account all running xids */
     617           0 :         runningTransactions = GetRunningTransactionData();
     618           0 :         LWLockRelease(ProcArrayLock);
     619           0 :         LWLockRelease(XidGenLock);
     620           0 :         return runningTransactions->oldestRunningXid;
     621             :     }
     622          14 :     else if (!RELATION_IS_LOCAL(rel))
     623             :     {
     624             :         /*
     625             :          * Normal relation: take into account xids running within the current
     626             :          * database
     627             :          */
     628          14 :         runningTransactions = GetRunningTransactionData();
     629          14 :         LWLockRelease(ProcArrayLock);
     630          14 :         LWLockRelease(XidGenLock);
     631          14 :         return runningTransactions->oldestDatabaseRunningXid;
     632             :     }
     633             :     else
     634             :     {
     635             :         /*
     636             :          * For temporary relations, ComputeXidHorizons() uses only
     637             :          * TransamVariables->latestCompletedXid and MyProc->xid.  These two
     638             :          * shouldn't go backwards.  So we're fine with this horizon.
     639             :          */
     640           0 :         return GetOldestNonRemovableTransactionId(rel);
     641             :     }
     642             : }
     643             : 
     644             : /*
     645             :  * Callback function to get next block for read stream object used in
     646             :  * collect_corrupt_items() function.
     647             :  */
     648             : static BlockNumber
     649         206 : collect_corrupt_items_read_stream_next_block(ReadStream *stream,
     650             :                                              void *callback_private_data,
     651             :                                              void *per_buffer_data)
     652             : {
     653         206 :     struct collect_corrupt_items_read_stream_private *p = callback_private_data;
     654             : 
     655         218 :     for (; p->current_blocknum < p->last_exclusive; p->current_blocknum++)
     656             :     {
     657         202 :         bool        check_frozen = false;
     658         202 :         bool        check_visible = false;
     659             : 
     660             :         /* Make sure we are interruptible. */
     661         202 :         CHECK_FOR_INTERRUPTS();
     662             : 
     663         202 :         if (p->all_frozen && VM_ALL_FROZEN(p->rel, p->current_blocknum, &p->vmbuffer))
     664          98 :             check_frozen = true;
     665         202 :         if (p->all_visible && VM_ALL_VISIBLE(p->rel, p->current_blocknum, &p->vmbuffer))
     666          92 :             check_visible = true;
     667         202 :         if (!check_visible && !check_frozen)
     668          12 :             continue;
     669             : 
     670         190 :         return p->current_blocknum++;
     671             :     }
     672             : 
     673          16 :     return InvalidBlockNumber;
     674             : }
     675             : 
     676             : /*
     677             :  * Returns a list of items whose visibility map information does not match
     678             :  * the status of the tuples on the page.
     679             :  *
     680             :  * If all_visible is passed as true, this will include all items which are
     681             :  * on pages marked as all-visible in the visibility map but which do not
     682             :  * seem to in fact be all-visible.
     683             :  *
     684             :  * If all_frozen is passed as true, this will include all items which are
     685             :  * on pages marked as all-frozen but which do not seem to in fact be frozen.
     686             :  *
     687             :  * Checks relkind of relid and will throw an error if the relation does not
     688             :  * have a VM.
     689             :  */
     690             : static corrupt_items *
     691          26 : collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
     692             : {
     693             :     Relation    rel;
     694             :     corrupt_items *items;
     695          26 :     Buffer      vmbuffer = InvalidBuffer;
     696          26 :     BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
     697          26 :     TransactionId OldestXmin = InvalidTransactionId;
     698             :     struct collect_corrupt_items_read_stream_private p;
     699             :     ReadStream *stream;
     700             :     Buffer      buffer;
     701             : 
     702          26 :     rel = relation_open(relid, AccessShareLock);
     703             : 
     704             :     /* Only some relkinds have a visibility map */
     705          26 :     check_relation_relkind(rel);
     706             : 
     707          16 :     if (all_visible)
     708           6 :         OldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
     709             : 
     710             :     /*
     711             :      * Guess an initial array size. We don't expect many corrupted tuples, so
     712             :      * start with a small array.  This function uses the "next" field to track
     713             :      * the next offset where we can store an item (which is the same thing as
     714             :      * the number of items found so far) and the "count" field to track the
     715             :      * number of entries allocated.  We'll repurpose these fields before
     716             :      * returning.
     717             :      */
     718          16 :     items = palloc0(sizeof(corrupt_items));
     719          16 :     items->next = 0;
     720          16 :     items->count = 64;
     721          16 :     items->tids = palloc(items->count * sizeof(ItemPointerData));
     722             : 
     723          16 :     p.current_blocknum = 0;
     724          16 :     p.last_exclusive = RelationGetNumberOfBlocks(rel);
     725          16 :     p.rel = rel;
     726          16 :     p.vmbuffer = InvalidBuffer;
     727          16 :     p.all_frozen = all_frozen;
     728          16 :     p.all_visible = all_visible;
     729          16 :     stream = read_stream_begin_relation(READ_STREAM_FULL,
     730             :                                         bstrategy,
     731             :                                         rel,
     732             :                                         MAIN_FORKNUM,
     733             :                                         collect_corrupt_items_read_stream_next_block,
     734             :                                         &p,
     735             :                                         0);
     736             : 
     737             :     /* Loop over every block in the relation. */
     738         206 :     while ((buffer = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
     739             :     {
     740         190 :         bool        check_frozen = all_frozen;
     741         190 :         bool        check_visible = all_visible;
     742             :         Page        page;
     743             :         OffsetNumber offnum,
     744             :                     maxoff;
     745             :         BlockNumber blkno;
     746             : 
     747             :         /* Make sure we are interruptible. */
     748         190 :         CHECK_FOR_INTERRUPTS();
     749             : 
     750         190 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
     751             : 
     752         190 :         page = BufferGetPage(buffer);
     753         190 :         maxoff = PageGetMaxOffsetNumber(page);
     754         190 :         blkno = BufferGetBlockNumber(buffer);
     755             : 
     756             :         /*
     757             :          * The visibility map bits might have changed while we were acquiring
     758             :          * the page lock.  Recheck to avoid returning spurious results.
     759             :          */
     760         190 :         if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
     761           0 :             check_frozen = false;
     762         190 :         if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
     763           0 :             check_visible = false;
     764         190 :         if (!check_visible && !check_frozen)
     765             :         {
     766           0 :             UnlockReleaseBuffer(buffer);
     767           0 :             continue;
     768             :         }
     769             : 
     770             :         /* Iterate over each tuple on the page. */
     771       32252 :         for (offnum = FirstOffsetNumber;
     772             :              offnum <= maxoff;
     773       32062 :              offnum = OffsetNumberNext(offnum))
     774             :         {
     775             :             HeapTupleData tuple;
     776             :             ItemId      itemid;
     777             : 
     778       32062 :             itemid = PageGetItemId(page, offnum);
     779             : 
     780             :             /* Unused or redirect line pointers are of no interest. */
     781       32062 :             if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
     782           0 :                 continue;
     783             : 
     784             :             /* Dead line pointers are neither all-visible nor frozen. */
     785       32062 :             if (ItemIdIsDead(itemid))
     786             :             {
     787           0 :                 ItemPointerSet(&(tuple.t_self), blkno, offnum);
     788           0 :                 record_corrupt_item(items, &tuple.t_self);
     789           0 :                 continue;
     790             :             }
     791             : 
     792             :             /* Initialize a HeapTupleData structure for checks below. */
     793       32062 :             ItemPointerSet(&(tuple.t_self), blkno, offnum);
     794       32062 :             tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
     795       32062 :             tuple.t_len = ItemIdGetLength(itemid);
     796       32062 :             tuple.t_tableOid = relid;
     797             : 
     798             :             /*
     799             :              * If we're checking whether the page is all-visible, we expect
     800             :              * the tuple to be all-visible.
     801             :              */
     802       32062 :             if (check_visible &&
     803       16018 :                 !tuple_all_visible(&tuple, OldestXmin, buffer))
     804             :             {
     805             :                 TransactionId RecomputedOldestXmin;
     806             : 
     807             :                 /*
     808             :                  * Time has passed since we computed OldestXmin, so it's
     809             :                  * possible that this tuple is all-visible in reality even
     810             :                  * though it doesn't appear so based on our
     811             :                  * previously-computed value.  Let's compute a new value so we
     812             :                  * can be certain whether there is a problem.
     813             :                  *
     814             :                  * From a concurrency point of view, it sort of sucks to
     815             :                  * retake ProcArrayLock here while we're holding the buffer
     816             :                  * exclusively locked, but it should be safe against
     817             :                  * deadlocks, because surely
     818             :                  * GetStrictOldestNonRemovableTransactionId() should never
     819             :                  * take a buffer lock. And this shouldn't happen often, so
     820             :                  * it's worth being careful so as to avoid false positives.
     821             :                  */
     822          10 :                 RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
     823             : 
     824          10 :                 if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
     825          10 :                     record_corrupt_item(items, &tuple.t_self);
     826             :                 else
     827             :                 {
     828           0 :                     OldestXmin = RecomputedOldestXmin;
     829           0 :                     if (!tuple_all_visible(&tuple, OldestXmin, buffer))
     830           0 :                         record_corrupt_item(items, &tuple.t_self);
     831             :                 }
     832             :             }
     833             : 
     834             :             /*
     835             :              * If we're checking whether the page is all-frozen, we expect the
     836             :              * tuple to be in a state where it will never need freezing.
     837             :              */
     838       32062 :             if (check_frozen)
     839             :             {
     840       16044 :                 if (heap_tuple_needs_eventual_freeze(tuple.t_data))
     841          10 :                     record_corrupt_item(items, &tuple.t_self);
     842             :             }
     843             :         }
     844             : 
     845         190 :         UnlockReleaseBuffer(buffer);
     846             :     }
     847          16 :     read_stream_end(stream);
     848             : 
     849             :     /* Clean up. */
     850          16 :     if (vmbuffer != InvalidBuffer)
     851          14 :         ReleaseBuffer(vmbuffer);
     852          16 :     if (p.vmbuffer != InvalidBuffer)
     853          16 :         ReleaseBuffer(p.vmbuffer);
     854          16 :     relation_close(rel, AccessShareLock);
     855             : 
     856             :     /*
     857             :      * Before returning, repurpose the fields to match caller's expectations.
     858             :      * next is now the next item that should be read (rather than written) and
     859             :      * count is now the number of items we wrote (rather than the number we
     860             :      * allocated).
     861             :      */
     862          16 :     items->count = items->next;
     863          16 :     items->next = 0;
     864             : 
     865          16 :     return items;
     866             : }
     867             : 
     868             : /*
     869             :  * Remember one corrupt item.
     870             :  */
     871             : static void
     872          20 : record_corrupt_item(corrupt_items *items, ItemPointer tid)
     873             : {
     874             :     /* enlarge output array if needed. */
     875          20 :     if (items->next >= items->count)
     876             :     {
     877           0 :         items->count *= 2;
     878           0 :         items->tids = repalloc(items->tids,
     879           0 :                                items->count * sizeof(ItemPointerData));
     880             :     }
     881             :     /* and add the new item */
     882          20 :     items->tids[items->next++] = *tid;
     883          20 : }
     884             : 
     885             : /*
     886             :  * Check whether a tuple is all-visible relative to a given OldestXmin value.
     887             :  * The buffer should contain the tuple and should be locked and pinned.
     888             :  */
     889             : static bool
     890       16018 : tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
     891             : {
     892             :     HTSV_Result state;
     893             :     TransactionId xmin;
     894             : 
     895       16018 :     state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
     896       16018 :     if (state != HEAPTUPLE_LIVE)
     897          10 :         return false;           /* all-visible implies live */
     898             : 
     899             :     /*
     900             :      * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
     901             :      * all-visible unless every tuple is hinted committed. However, those hint
     902             :      * bits could be lost after a crash, so we can't be certain that they'll
     903             :      * be set here.  So just check the xmin.
     904             :      */
     905             : 
     906       16008 :     xmin = HeapTupleHeaderGetXmin(tup->t_data);
     907       16008 :     if (!TransactionIdPrecedes(xmin, OldestXmin))
     908           0 :         return false;           /* xmin not old enough for all to see */
     909             : 
     910       16008 :     return true;
     911             : }
     912             : 
     913             : /*
     914             :  * check_relation_relkind - convenience routine to check that relation
     915             :  * is of the relkind supported by the callers
     916             :  */
     917             : static void
     918          92 : check_relation_relkind(Relation rel)
     919             : {
     920          92 :     if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
     921          50 :         ereport(ERROR,
     922             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     923             :                  errmsg("relation \"%s\" is of wrong relation kind",
     924             :                         RelationGetRelationName(rel)),
     925             :                  errdetail_relkind_not_supported(rel->rd_rel->relkind)));
     926          42 : }

Generated by: LCOV version 1.14