LCOV - code coverage report
Current view: top level - contrib/amcheck - verify_heapam.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 427 603 70.8 %
Date: 2021-11-29 04:09:17 Functions: 19 19 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * verify_heapam.c
       4             :  *    Functions to check postgresql heap relations for corruption
       5             :  *
       6             :  * Copyright (c) 2016-2021, PostgreSQL Global Development Group
       7             :  *
       8             :  *    contrib/amcheck/verify_heapam.c
       9             :  *-------------------------------------------------------------------------
      10             :  */
      11             : #include "postgres.h"
      12             : 
      13             : #include "access/detoast.h"
      14             : #include "access/genam.h"
      15             : #include "access/heapam.h"
      16             : #include "access/heaptoast.h"
      17             : #include "access/multixact.h"
      18             : #include "access/toast_internals.h"
      19             : #include "access/visibilitymap.h"
      20             : #include "catalog/pg_am.h"
      21             : #include "funcapi.h"
      22             : #include "miscadmin.h"
      23             : #include "storage/bufmgr.h"
      24             : #include "storage/procarray.h"
      25             : #include "utils/builtins.h"
      26             : #include "utils/fmgroids.h"
      27             : 
      28         582 : PG_FUNCTION_INFO_V1(verify_heapam);
      29             : 
      30             : /* The number of columns in tuples returned by verify_heapam */
      31             : #define HEAPCHECK_RELATION_COLS 4
      32             : 
      33             : /* The largest valid toast va_rawsize */
      34             : #define VARLENA_SIZE_LIMIT 0x3FFFFFFF
      35             : 
      36             : /*
      37             :  * Despite the name, we use this for reporting problems with both XIDs and
      38             :  * MXIDs.
      39             :  */
      40             : typedef enum XidBoundsViolation
      41             : {
      42             :     XID_INVALID,
      43             :     XID_IN_FUTURE,
      44             :     XID_PRECEDES_CLUSTERMIN,
      45             :     XID_PRECEDES_RELMIN,
      46             :     XID_BOUNDS_OK
      47             : } XidBoundsViolation;
      48             : 
      49             : typedef enum XidCommitStatus
      50             : {
      51             :     XID_COMMITTED,
      52             :     XID_IS_CURRENT_XID,
      53             :     XID_IN_PROGRESS,
      54             :     XID_ABORTED
      55             : } XidCommitStatus;
      56             : 
      57             : typedef enum SkipPages
      58             : {
      59             :     SKIP_PAGES_ALL_FROZEN,
      60             :     SKIP_PAGES_ALL_VISIBLE,
      61             :     SKIP_PAGES_NONE
      62             : } SkipPages;
      63             : 
      64             : /*
      65             :  * Struct holding information about a toasted attribute sufficient to both
      66             :  * check the toasted attribute and, if found to be corrupt, to report where it
      67             :  * was encountered in the main table.
      68             :  */
      69             : typedef struct ToastedAttribute
      70             : {
      71             :     struct varatt_external toast_pointer;
      72             :     BlockNumber blkno;          /* block in main table */
      73             :     OffsetNumber offnum;        /* offset in main table */
      74             :     AttrNumber  attnum;         /* attribute in main table */
      75             : } ToastedAttribute;
      76             : 
      77             : /*
      78             :  * Struct holding the running context information during
      79             :  * a lifetime of a verify_heapam execution.
      80             :  */
      81             : typedef struct HeapCheckContext
      82             : {
      83             :     /*
      84             :      * Cached copies of values from ShmemVariableCache and computed values
      85             :      * from them.
      86             :      */
      87             :     FullTransactionId next_fxid;    /* ShmemVariableCache->nextXid */
      88             :     TransactionId next_xid;     /* 32-bit version of next_fxid */
      89             :     TransactionId oldest_xid;   /* ShmemVariableCache->oldestXid */
      90             :     FullTransactionId oldest_fxid;  /* 64-bit version of oldest_xid, computed
      91             :                                      * relative to next_fxid */
      92             :     TransactionId safe_xmin;    /* this XID and newer ones can't become
      93             :                                  * all-visible while we're running */
      94             : 
      95             :     /*
      96             :      * Cached copy of value from MultiXactState
      97             :      */
      98             :     MultiXactId next_mxact;     /* MultiXactState->nextMXact */
      99             :     MultiXactId oldest_mxact;   /* MultiXactState->oldestMultiXactId */
     100             : 
     101             :     /*
     102             :      * Cached copies of the most recently checked xid and its status.
     103             :      */
     104             :     TransactionId cached_xid;
     105             :     XidCommitStatus cached_status;
     106             : 
     107             :     /* Values concerning the heap relation being checked */
     108             :     Relation    rel;
     109             :     TransactionId relfrozenxid;
     110             :     FullTransactionId relfrozenfxid;
     111             :     TransactionId relminmxid;
     112             :     Relation    toast_rel;
     113             :     Relation   *toast_indexes;
     114             :     Relation    valid_toast_index;
     115             :     int         num_toast_indexes;
     116             : 
     117             :     /* Values for iterating over pages in the relation */
     118             :     BlockNumber blkno;
     119             :     BufferAccessStrategy bstrategy;
     120             :     Buffer      buffer;
     121             :     Page        page;
     122             : 
     123             :     /* Values for iterating over tuples within a page */
     124             :     OffsetNumber offnum;
     125             :     ItemId      itemid;
     126             :     uint16      lp_len;
     127             :     uint16      lp_off;
     128             :     HeapTupleHeader tuphdr;
     129             :     int         natts;
     130             : 
     131             :     /* Values for iterating over attributes within the tuple */
     132             :     uint32      offset;         /* offset in tuple data */
     133             :     AttrNumber  attnum;
     134             : 
     135             :     /* True if tuple's xmax makes it eligible for pruning */
     136             :     bool        tuple_could_be_pruned;
     137             : 
     138             :     /*
     139             :      * List of ToastedAttribute structs for toasted attributes which are not
     140             :      * eligible for pruning and should be checked
     141             :      */
     142             :     List       *toasted_attributes;
     143             : 
     144             :     /* Whether verify_heapam has yet encountered any corrupt tuples */
     145             :     bool        is_corrupt;
     146             : 
     147             :     /* The descriptor and tuplestore for verify_heapam's result tuples */
     148             :     TupleDesc   tupdesc;
     149             :     Tuplestorestate *tupstore;
     150             : } HeapCheckContext;
     151             : 
     152             : /* Internal implementation */
     153             : static void check_tuple(HeapCheckContext *ctx);
     154             : static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
     155             :                               ToastedAttribute *ta, int32 *expected_chunk_seq,
     156             :                               uint32 extsize);
     157             : 
     158             : static bool check_tuple_attribute(HeapCheckContext *ctx);
     159             : static void check_toasted_attribute(HeapCheckContext *ctx,
     160             :                                     ToastedAttribute *ta);
     161             : 
     162             : static bool check_tuple_header(HeapCheckContext *ctx);
     163             : static bool check_tuple_visibility(HeapCheckContext *ctx);
     164             : 
     165             : static void report_corruption(HeapCheckContext *ctx, char *msg);
     166             : static void report_toast_corruption(HeapCheckContext *ctx,
     167             :                                     ToastedAttribute *ta, char *msg);
     168             : static TupleDesc verify_heapam_tupdesc(void);
     169             : static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid,
     170             :                                                         const HeapCheckContext *ctx);
     171             : static void update_cached_xid_range(HeapCheckContext *ctx);
     172             : static void update_cached_mxid_range(HeapCheckContext *ctx);
     173             : static XidBoundsViolation check_mxid_in_range(MultiXactId mxid,
     174             :                                               HeapCheckContext *ctx);
     175             : static XidBoundsViolation check_mxid_valid_in_rel(MultiXactId mxid,
     176             :                                                   HeapCheckContext *ctx);
     177             : static XidBoundsViolation get_xid_status(TransactionId xid,
     178             :                                          HeapCheckContext *ctx,
     179             :                                          XidCommitStatus *status);
     180             : 
     181             : /*
     182             :  * Scan and report corruption in heap pages, optionally reconciling toasted
     183             :  * attributes with entries in the associated toast table.  Intended to be
     184             :  * called from SQL with the following parameters:
     185             :  *
     186             :  *   relation:
     187             :  *     The Oid of the heap relation to be checked.
     188             :  *
     189             :  *   on_error_stop:
     190             :  *     Whether to stop at the end of the first page for which errors are
     191             :  *     detected.  Note that multiple rows may be returned.
     192             :  *
     193             :  *   check_toast:
     194             :  *     Whether to check each toasted attribute against the toast table to
     195             :  *     verify that it can be found there.
     196             :  *
     197             :  *   skip:
     198             :  *     What kinds of pages in the heap relation should be skipped.  Valid
     199             :  *     options are "all-visible", "all-frozen", and "none".
     200             :  *
     201             :  * Returns to the SQL caller a set of tuples, each containing the location
     202             :  * and a description of a corruption found in the heap.
     203             :  *
     204             :  * This code goes to some trouble to avoid crashing the server even if the
     205             :  * table pages are badly corrupted, but it's probably not perfect. If
     206             :  * check_toast is true, we'll use regular index lookups to try to fetch TOAST
     207             :  * tuples, which can certainly cause crashes if the right kind of corruption
     208             :  * exists in the toast table or index. No matter what parameters you pass,
     209             :  * we can't protect against crashes that might occur trying to look up the
     210             :  * commit status of transaction IDs (though we avoid trying to do such lookups
     211             :  * for transaction IDs that can't legally appear in the table).
     212             :  */
     213             : Datum
     214        5342 : verify_heapam(PG_FUNCTION_ARGS)
     215             : {
     216        5342 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
     217             :     MemoryContext old_context;
     218             :     bool        random_access;
     219             :     HeapCheckContext ctx;
     220        5342 :     Buffer      vmbuffer = InvalidBuffer;
     221             :     Oid         relid;
     222             :     bool        on_error_stop;
     223             :     bool        check_toast;
     224        5342 :     SkipPages   skip_option = SKIP_PAGES_NONE;
     225             :     BlockNumber first_block;
     226             :     BlockNumber last_block;
     227             :     BlockNumber nblocks;
     228             :     const char *skip;
     229             : 
     230             :     /* Check to see if caller supports us returning a tuplestore */
     231        5342 :     if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
     232           0 :         ereport(ERROR,
     233             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     234             :                  errmsg("set-valued function called in context that cannot accept a set")));
     235        5342 :     if (!(rsinfo->allowedModes & SFRM_Materialize))
     236           0 :         ereport(ERROR,
     237             :                 (errcode(ERRCODE_SYNTAX_ERROR),
     238             :                  errmsg("materialize mode required, but it is not allowed in this context")));
     239             : 
     240             :     /* Check supplied arguments */
     241        5342 :     if (PG_ARGISNULL(0))
     242           0 :         ereport(ERROR,
     243             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     244             :                  errmsg("relation cannot be null")));
     245        5342 :     relid = PG_GETARG_OID(0);
     246             : 
     247        5342 :     if (PG_ARGISNULL(1))
     248           0 :         ereport(ERROR,
     249             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     250             :                  errmsg("on_error_stop cannot be null")));
     251        5342 :     on_error_stop = PG_GETARG_BOOL(1);
     252             : 
     253        5342 :     if (PG_ARGISNULL(2))
     254           0 :         ereport(ERROR,
     255             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     256             :                  errmsg("check_toast cannot be null")));
     257        5342 :     check_toast = PG_GETARG_BOOL(2);
     258             : 
     259        5342 :     if (PG_ARGISNULL(3))
     260           0 :         ereport(ERROR,
     261             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     262             :                  errmsg("skip cannot be null")));
     263        5342 :     skip = text_to_cstring(PG_GETARG_TEXT_PP(3));
     264        5342 :     if (pg_strcasecmp(skip, "all-visible") == 0)
     265         170 :         skip_option = SKIP_PAGES_ALL_VISIBLE;
     266        5172 :     else if (pg_strcasecmp(skip, "all-frozen") == 0)
     267         174 :         skip_option = SKIP_PAGES_ALL_FROZEN;
     268        4998 :     else if (pg_strcasecmp(skip, "none") == 0)
     269        4996 :         skip_option = SKIP_PAGES_NONE;
     270             :     else
     271           2 :         ereport(ERROR,
     272             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     273             :                  errmsg("invalid skip option"),
     274             :                  errhint("Valid skip options are \"all-visible\", \"all-frozen\", and \"none\".")));
     275             : 
     276        5340 :     memset(&ctx, 0, sizeof(HeapCheckContext));
     277        5340 :     ctx.cached_xid = InvalidTransactionId;
     278        5340 :     ctx.toasted_attributes = NIL;
     279             : 
     280             :     /*
     281             :      * Any xmin newer than the xmin of our snapshot can't become all-visible
     282             :      * while we're running.
     283             :      */
     284        5340 :     ctx.safe_xmin = GetTransactionSnapshot()->xmin;
     285             : 
     286             :     /*
     287             :      * If we report corruption when not examining some individual attribute,
     288             :      * we need attnum to be reported as NULL.  Set that up before any
     289             :      * corruption reporting might happen.
     290             :      */
     291        5340 :     ctx.attnum = -1;
     292             : 
     293             :     /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
     294        5340 :     old_context = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
     295        5340 :     random_access = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
     296        5340 :     ctx.tupdesc = verify_heapam_tupdesc();
     297        5340 :     ctx.tupstore = tuplestore_begin_heap(random_access, false, work_mem);
     298        5340 :     rsinfo->returnMode = SFRM_Materialize;
     299        5340 :     rsinfo->setResult = ctx.tupstore;
     300        5340 :     rsinfo->setDesc = ctx.tupdesc;
     301        5340 :     MemoryContextSwitchTo(old_context);
     302             : 
     303             :     /* Open relation, check relkind and access method */
     304        5340 :     ctx.rel = relation_open(relid, AccessShareLock);
     305             : 
     306             :     /*
     307             :      * Check that a relation's relkind and access method are both supported.
     308             :      */
     309        5340 :     if (ctx.rel->rd_rel->relkind != RELKIND_RELATION &&
     310        2316 :         ctx.rel->rd_rel->relkind != RELKIND_MATVIEW &&
     311        2116 :         ctx.rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
     312         392 :         ctx.rel->rd_rel->relkind != RELKIND_SEQUENCE)
     313           8 :         ereport(ERROR,
     314             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     315             :                  errmsg("cannot check relation \"%s\"",
     316             :                         RelationGetRelationName(ctx.rel)),
     317             :                  errdetail_relkind_not_supported(ctx.rel->rd_rel->relkind)));
     318             : 
     319             :     /*
     320             :      * Sequences always use heap AM, but they don't show that in the catalogs.
     321             :      * Other relkinds might be using a different AM, so check.
     322             :      */
     323        5332 :     if (ctx.rel->rd_rel->relkind != RELKIND_SEQUENCE &&
     324        4948 :         ctx.rel->rd_rel->relam != HEAP_TABLE_AM_OID)
     325           0 :         ereport(ERROR,
     326             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     327             :                  errmsg("only heap AM is supported")));
     328             : 
     329             :     /*
     330             :      * Early exit for unlogged relations during recovery.  These will have no
     331             :      * relation fork, so there won't be anything to check.  We behave as if
     332             :      * the relation is empty.
     333             :      */
     334        5332 :     if (ctx.rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
     335           0 :         RecoveryInProgress())
     336             :     {
     337           0 :         ereport(DEBUG1,
     338             :                 (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
     339             :                  errmsg("cannot verify unlogged relation \"%s\" during recovery, skipping",
     340             :                         RelationGetRelationName(ctx.rel))));
     341           0 :         relation_close(ctx.rel, AccessShareLock);
     342           0 :         PG_RETURN_NULL();
     343             :     }
     344             : 
     345             :     /* Early exit if the relation is empty */
     346        5332 :     nblocks = RelationGetNumberOfBlocks(ctx.rel);
     347        5298 :     if (!nblocks)
     348             :     {
     349        2894 :         relation_close(ctx.rel, AccessShareLock);
     350        2894 :         PG_RETURN_NULL();
     351             :     }
     352             : 
     353        2404 :     ctx.bstrategy = GetAccessStrategy(BAS_BULKREAD);
     354        2404 :     ctx.buffer = InvalidBuffer;
     355        2404 :     ctx.page = NULL;
     356             : 
     357             :     /* Validate block numbers, or handle nulls. */
     358        2404 :     if (PG_ARGISNULL(4))
     359        2156 :         first_block = 0;
     360             :     else
     361             :     {
     362         248 :         int64       fb = PG_GETARG_INT64(4);
     363             : 
     364         248 :         if (fb < 0 || fb >= nblocks)
     365           2 :             ereport(ERROR,
     366             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     367             :                      errmsg("starting block number must be between 0 and %u",
     368             :                             nblocks - 1)));
     369         246 :         first_block = (BlockNumber) fb;
     370             :     }
     371        2402 :     if (PG_ARGISNULL(5))
     372        2156 :         last_block = nblocks - 1;
     373             :     else
     374             :     {
     375         246 :         int64       lb = PG_GETARG_INT64(5);
     376             : 
     377         246 :         if (lb < 0 || lb >= nblocks)
     378           2 :             ereport(ERROR,
     379             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     380             :                      errmsg("ending block number must be between 0 and %u",
     381             :                             nblocks - 1)));
     382         244 :         last_block = (BlockNumber) lb;
     383             :     }
     384             : 
     385             :     /* Optionally open the toast relation, if any. */
     386        2400 :     if (ctx.rel->rd_rel->reltoastrelid && check_toast)
     387        1116 :     {
     388             :         int         offset;
     389             : 
     390             :         /* Main relation has associated toast relation */
     391        1116 :         ctx.toast_rel = table_open(ctx.rel->rd_rel->reltoastrelid,
     392             :                                    AccessShareLock);
     393        1116 :         offset = toast_open_indexes(ctx.toast_rel,
     394             :                                     AccessShareLock,
     395             :                                     &(ctx.toast_indexes),
     396             :                                     &(ctx.num_toast_indexes));
     397        1116 :         ctx.valid_toast_index = ctx.toast_indexes[offset];
     398             :     }
     399             :     else
     400             :     {
     401             :         /*
     402             :          * Main relation has no associated toast relation, or we're
     403             :          * intentionally skipping it.
     404             :          */
     405        1284 :         ctx.toast_rel = NULL;
     406        1284 :         ctx.toast_indexes = NULL;
     407        1284 :         ctx.num_toast_indexes = 0;
     408             :     }
     409             : 
     410        2400 :     update_cached_xid_range(&ctx);
     411        2400 :     update_cached_mxid_range(&ctx);
     412        2400 :     ctx.relfrozenxid = ctx.rel->rd_rel->relfrozenxid;
     413        2400 :     ctx.relfrozenfxid = FullTransactionIdFromXidAndCtx(ctx.relfrozenxid, &ctx);
     414        2400 :     ctx.relminmxid = ctx.rel->rd_rel->relminmxid;
     415             : 
     416        2400 :     if (TransactionIdIsNormal(ctx.relfrozenxid))
     417        2016 :         ctx.oldest_xid = ctx.relfrozenxid;
     418             : 
     419       19546 :     for (ctx.blkno = first_block; ctx.blkno <= last_block; ctx.blkno++)
     420             :     {
     421             :         OffsetNumber maxoff;
     422             : 
     423       17152 :         CHECK_FOR_INTERRUPTS();
     424             : 
     425             :         /* Optionally skip over all-frozen or all-visible blocks */
     426       17152 :         if (skip_option != SKIP_PAGES_NONE)
     427             :         {
     428             :             int32       mapbits;
     429             : 
     430        1472 :             mapbits = (int32) visibilitymap_get_status(ctx.rel, ctx.blkno,
     431             :                                                        &vmbuffer);
     432        1472 :             if (skip_option == SKIP_PAGES_ALL_FROZEN)
     433             :             {
     434         768 :                 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
     435          64 :                     continue;
     436             :             }
     437             : 
     438        1408 :             if (skip_option == SKIP_PAGES_ALL_VISIBLE)
     439             :             {
     440         704 :                 if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
     441           2 :                     continue;
     442             :             }
     443             :         }
     444             : 
     445             :         /* Read and lock the next page. */
     446       17086 :         ctx.buffer = ReadBufferExtended(ctx.rel, MAIN_FORKNUM, ctx.blkno,
     447             :                                         RBM_NORMAL, ctx.bstrategy);
     448       17086 :         LockBuffer(ctx.buffer, BUFFER_LOCK_SHARE);
     449       17086 :         ctx.page = BufferGetPage(ctx.buffer);
     450             : 
     451             :         /* Perform tuple checks */
     452       17086 :         maxoff = PageGetMaxOffsetNumber(ctx.page);
     453      758516 :         for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
     454      741430 :              ctx.offnum = OffsetNumberNext(ctx.offnum))
     455             :         {
     456      741430 :             ctx.itemid = PageGetItemId(ctx.page, ctx.offnum);
     457             : 
     458             :             /* Skip over unused/dead line pointers */
     459      741430 :             if (!ItemIdIsUsed(ctx.itemid) || ItemIdIsDead(ctx.itemid))
     460        5264 :                 continue;
     461             : 
     462             :             /*
     463             :              * If this line pointer has been redirected, check that it
     464             :              * redirects to a valid offset within the line pointer array
     465             :              */
     466      736166 :             if (ItemIdIsRedirected(ctx.itemid))
     467             :             {
     468        8210 :                 OffsetNumber rdoffnum = ItemIdGetRedirect(ctx.itemid);
     469             :                 ItemId      rditem;
     470             : 
     471        8210 :                 if (rdoffnum < FirstOffsetNumber)
     472             :                 {
     473          12 :                     report_corruption(&ctx,
     474             :                                       psprintf("line pointer redirection to item at offset %u precedes minimum offset %u",
     475             :                                                (unsigned) rdoffnum,
     476             :                                                (unsigned) FirstOffsetNumber));
     477          12 :                     continue;
     478             :                 }
     479        8198 :                 if (rdoffnum > maxoff)
     480             :                 {
     481          28 :                     report_corruption(&ctx,
     482             :                                       psprintf("line pointer redirection to item at offset %u exceeds maximum offset %u",
     483             :                                                (unsigned) rdoffnum,
     484             :                                                (unsigned) maxoff));
     485          28 :                     continue;
     486             :                 }
     487        8170 :                 rditem = PageGetItemId(ctx.page, rdoffnum);
     488        8170 :                 if (!ItemIdIsUsed(rditem))
     489           0 :                     report_corruption(&ctx,
     490             :                                       psprintf("line pointer redirection to unused item at offset %u",
     491             :                                                (unsigned) rdoffnum));
     492        8170 :                 continue;
     493             :             }
     494             : 
     495             :             /* Sanity-check the line pointer's offset and length values */
     496      727956 :             ctx.lp_len = ItemIdGetLength(ctx.itemid);
     497      727956 :             ctx.lp_off = ItemIdGetOffset(ctx.itemid);
     498             : 
     499      727956 :             if (ctx.lp_off != MAXALIGN(ctx.lp_off))
     500             :             {
     501          12 :                 report_corruption(&ctx,
     502             :                                   psprintf("line pointer to page offset %u is not maximally aligned",
     503          12 :                                            ctx.lp_off));
     504          12 :                 continue;
     505             :             }
     506      727944 :             if (ctx.lp_len < MAXALIGN(SizeofHeapTupleHeader))
     507             :             {
     508          24 :                 report_corruption(&ctx,
     509             :                                   psprintf("line pointer length %u is less than the minimum tuple header size %u",
     510          24 :                                            ctx.lp_len,
     511             :                                            (unsigned) MAXALIGN(SizeofHeapTupleHeader)));
     512          24 :                 continue;
     513             :             }
     514      727920 :             if (ctx.lp_off + ctx.lp_len > BLCKSZ)
     515             :             {
     516          28 :                 report_corruption(&ctx,
     517             :                                   psprintf("line pointer to page offset %u with length %u ends beyond maximum page offset %u",
     518          28 :                                            ctx.lp_off,
     519          28 :                                            ctx.lp_len,
     520             :                                            (unsigned) BLCKSZ));
     521          28 :                 continue;
     522             :             }
     523             : 
     524             :             /* It should be safe to examine the tuple's header, at least */
     525      727892 :             ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid);
     526      727892 :             ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr);
     527             : 
     528             :             /* Ok, ready to check this next tuple */
     529      727892 :             check_tuple(&ctx);
     530             :         }
     531             : 
     532             :         /* clean up */
     533       17086 :         UnlockReleaseBuffer(ctx.buffer);
     534             : 
     535             :         /*
     536             :          * Check any toast pointers from the page whose lock we just released
     537             :          */
     538       17086 :         if (ctx.toasted_attributes != NIL)
     539             :         {
     540             :             ListCell   *cell;
     541             : 
     542       24092 :             foreach(cell, ctx.toasted_attributes)
     543       22612 :                 check_toasted_attribute(&ctx, lfirst(cell));
     544        1480 :             list_free_deep(ctx.toasted_attributes);
     545        1480 :             ctx.toasted_attributes = NIL;
     546             :         }
     547             : 
     548       17080 :         if (on_error_stop && ctx.is_corrupt)
     549           0 :             break;
     550             :     }
     551             : 
     552        2394 :     if (vmbuffer != InvalidBuffer)
     553           6 :         ReleaseBuffer(vmbuffer);
     554             : 
     555             :     /* Close the associated toast table and indexes, if any. */
     556        2394 :     if (ctx.toast_indexes)
     557        1110 :         toast_close_indexes(ctx.toast_indexes, ctx.num_toast_indexes,
     558             :                             AccessShareLock);
     559        2394 :     if (ctx.toast_rel)
     560        1110 :         table_close(ctx.toast_rel, AccessShareLock);
     561             : 
     562             :     /* Close the main relation */
     563        2394 :     relation_close(ctx.rel, AccessShareLock);
     564             : 
     565        2394 :     PG_RETURN_NULL();
     566             : }
     567             : 
     568             : /*
     569             :  * Shared internal implementation for report_corruption and
     570             :  * report_toast_corruption.
     571             :  */
     572             : static void
     573         140 : report_corruption_internal(Tuplestorestate *tupstore, TupleDesc tupdesc,
     574             :                            BlockNumber blkno, OffsetNumber offnum,
     575             :                            AttrNumber attnum, char *msg)
     576             : {
     577             :     Datum       values[HEAPCHECK_RELATION_COLS];
     578             :     bool        nulls[HEAPCHECK_RELATION_COLS];
     579             :     HeapTuple   tuple;
     580             : 
     581         700 :     MemSet(values, 0, sizeof(values));
     582         140 :     MemSet(nulls, 0, sizeof(nulls));
     583         140 :     values[0] = Int64GetDatum(blkno);
     584         140 :     values[1] = Int32GetDatum(offnum);
     585         140 :     values[2] = Int32GetDatum(attnum);
     586         140 :     nulls[2] = (attnum < 0);
     587         140 :     values[3] = CStringGetTextDatum(msg);
     588             : 
     589             :     /*
     590             :      * In principle, there is nothing to prevent a scan over a large, highly
     591             :      * corrupted table from using work_mem worth of memory building up the
     592             :      * tuplestore.  That's ok, but if we also leak the msg argument memory
     593             :      * until the end of the query, we could exceed work_mem by more than a
     594             :      * trivial amount.  Therefore, free the msg argument each time we are
     595             :      * called rather than waiting for our current memory context to be freed.
     596             :      */
     597         140 :     pfree(msg);
     598             : 
     599         140 :     tuple = heap_form_tuple(tupdesc, values, nulls);
     600         140 :     tuplestore_puttuple(tupstore, tuple);
     601         140 : }
     602             : 
     603             : /*
     604             :  * Record a single corruption found in the main table.  The values in ctx should
     605             :  * indicate the location of the corruption, and the msg argument should contain
     606             :  * a human-readable description of the corruption.
     607             :  *
     608             :  * The msg argument is pfree'd by this function.
     609             :  */
     610             : static void
     611         138 : report_corruption(HeapCheckContext *ctx, char *msg)
     612             : {
     613         138 :     report_corruption_internal(ctx->tupstore, ctx->tupdesc, ctx->blkno,
     614         138 :                                ctx->offnum, ctx->attnum, msg);
     615         138 :     ctx->is_corrupt = true;
     616         138 : }
     617             : 
     618             : /*
     619             :  * Record corruption found in the toast table.  The values in ta should
     620             :  * indicate the location in the main table where the toast pointer was
     621             :  * encountered, and the msg argument should contain a human-readable
     622             :  * description of the toast table corruption.
     623             :  *
     624             :  * As above, the msg argument is pfree'd by this function.
     625             :  */
     626             : static void
     627           2 : report_toast_corruption(HeapCheckContext *ctx, ToastedAttribute *ta,
     628             :                         char *msg)
     629             : {
     630           2 :     report_corruption_internal(ctx->tupstore, ctx->tupdesc, ta->blkno,
     631           2 :                                ta->offnum, ta->attnum, msg);
     632           2 :     ctx->is_corrupt = true;
     633           2 : }
     634             : 
     635             : /*
     636             :  * Construct the TupleDesc used to report messages about corruptions found
     637             :  * while scanning the heap.
     638             :  */
     639             : static TupleDesc
     640        5340 : verify_heapam_tupdesc(void)
     641             : {
     642             :     TupleDesc   tupdesc;
     643        5340 :     AttrNumber  a = 0;
     644             : 
     645        5340 :     tupdesc = CreateTemplateTupleDesc(HEAPCHECK_RELATION_COLS);
     646        5340 :     TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
     647        5340 :     TupleDescInitEntry(tupdesc, ++a, "offnum", INT4OID, -1, 0);
     648        5340 :     TupleDescInitEntry(tupdesc, ++a, "attnum", INT4OID, -1, 0);
     649        5340 :     TupleDescInitEntry(tupdesc, ++a, "msg", TEXTOID, -1, 0);
     650             :     Assert(a == HEAPCHECK_RELATION_COLS);
     651             : 
     652        5340 :     return BlessTupleDesc(tupdesc);
     653             : }
     654             : 
     655             : /*
     656             :  * Check for tuple header corruption.
     657             :  *
     658             :  * Some kinds of corruption make it unsafe to check the tuple attributes, for
     659             :  * example when the line pointer refers to a range of bytes outside the page.
     660             :  * In such cases, we return false (not checkable) after recording appropriate
     661             :  * corruption messages.
     662             :  *
     663             :  * Some other kinds of tuple header corruption confuse the question of where
     664             :  * the tuple attributes begin, or how long the nulls bitmap is, etc., making it
     665             :  * unreasonable to attempt to check attributes, even if all candidate answers
     666             :  * to those questions would not result in reading past the end of the line
     667             :  * pointer or page.  In such cases, like above, we record corruption messages
     668             :  * about the header and then return false.
     669             :  *
     670             :  * Other kinds of tuple header corruption do not bear on the question of
     671             :  * whether the tuple attributes can be checked, so we record corruption
     672             :  * messages for them but we do not return false merely because we detected
     673             :  * them.
     674             :  *
     675             :  * Returns whether the tuple is sufficiently sensible to undergo visibility and
     676             :  * attribute checks.
     677             :  */
     678             : static bool
     679      727892 : check_tuple_header(HeapCheckContext *ctx)
     680             : {
     681      727892 :     HeapTupleHeader tuphdr = ctx->tuphdr;
     682      727892 :     uint16      infomask = tuphdr->t_infomask;
     683      727892 :     bool        result = true;
     684             :     unsigned    expected_hoff;
     685             : 
     686      727892 :     if (ctx->tuphdr->t_hoff > ctx->lp_len)
     687             :     {
     688           2 :         report_corruption(ctx,
     689             :                           psprintf("data begins at offset %u beyond the tuple length %u",
     690           2 :                                    ctx->tuphdr->t_hoff, ctx->lp_len));
     691           2 :         result = false;
     692             :     }
     693             : 
     694      727892 :     if ((ctx->tuphdr->t_infomask & HEAP_XMAX_COMMITTED) &&
     695         144 :         (ctx->tuphdr->t_infomask & HEAP_XMAX_IS_MULTI))
     696             :     {
     697           4 :         report_corruption(ctx,
     698             :                           pstrdup("multixact should not be marked committed"));
     699             : 
     700             :         /*
     701             :          * This condition is clearly wrong, but it's not enough to justify
     702             :          * skipping further checks, because we don't rely on this to determine
     703             :          * whether the tuple is visible or to interpret other relevant header
     704             :          * fields.
     705             :          */
     706             :     }
     707             : 
     708      727892 :     if (infomask & HEAP_HASNULL)
     709      307636 :         expected_hoff = MAXALIGN(SizeofHeapTupleHeader + BITMAPLEN(ctx->natts));
     710             :     else
     711      420256 :         expected_hoff = MAXALIGN(SizeofHeapTupleHeader);
     712      727892 :     if (ctx->tuphdr->t_hoff != expected_hoff)
     713             :     {
     714          10 :         if ((infomask & HEAP_HASNULL) && ctx->natts == 1)
     715           0 :             report_corruption(ctx,
     716             :                               psprintf("tuple data should begin at byte %u, but actually begins at byte %u (1 attribute, has nulls)",
     717           0 :                                        expected_hoff, ctx->tuphdr->t_hoff));
     718          10 :         else if ((infomask & HEAP_HASNULL))
     719           2 :             report_corruption(ctx,
     720             :                               psprintf("tuple data should begin at byte %u, but actually begins at byte %u (%u attributes, has nulls)",
     721           2 :                                        expected_hoff, ctx->tuphdr->t_hoff, ctx->natts));
     722           8 :         else if (ctx->natts == 1)
     723           0 :             report_corruption(ctx,
     724             :                               psprintf("tuple data should begin at byte %u, but actually begins at byte %u (1 attribute, no nulls)",
     725           0 :                                        expected_hoff, ctx->tuphdr->t_hoff));
     726             :         else
     727           8 :             report_corruption(ctx,
     728             :                               psprintf("tuple data should begin at byte %u, but actually begins at byte %u (%u attributes, no nulls)",
     729           8 :                                        expected_hoff, ctx->tuphdr->t_hoff, ctx->natts));
     730          10 :         result = false;
     731             :     }
     732             : 
     733      727892 :     return result;
     734             : }
     735             : 
     736             : /*
     737             :  * Checks tuple visibility so we know which further checks are safe to
     738             :  * perform.
     739             :  *
     740             :  * If a tuple could have been inserted by a transaction that also added a
     741             :  * column to the table, but which ultimately did not commit, or which has not
     742             :  * yet committed, then the table's current TupleDesc might differ from the one
     743             :  * used to construct this tuple, so we must not check it.
     744             :  *
     745             :  * As a special case, if our own transaction inserted the tuple, even if we
     746             :  * added a column to the table, our TupleDesc should match.  We could check the
     747             :  * tuple, but choose not to do so.
     748             :  *
     749             :  * If a tuple has been updated or deleted, we can still read the old tuple for
     750             :  * corruption checking purposes, as long as we are careful about concurrent
     751             :  * vacuums.  The main table tuple itself cannot be vacuumed away because we
     752             :  * hold a buffer lock on the page, but if the deleting transaction is older
     753             :  * than our transaction snapshot's xmin, then vacuum could remove the toast at
     754             :  * any time, so we must not try to follow TOAST pointers.
     755             :  *
     756             :  * If xmin or xmax values are older than can be checked against clog, or appear
     757             :  * to be in the future (possibly due to wrap-around), then we cannot make a
     758             :  * determination about the visibility of the tuple, so we skip further checks.
     759             :  *
     760             :  * Returns true if the tuple itself should be checked, false otherwise.  Sets
     761             :  * ctx->tuple_could_be_pruned if the tuple -- and thus also any associated
     762             :  * TOAST tuples -- are eligible for pruning.
     763             :  */
     764             : static bool
     765      727882 : check_tuple_visibility(HeapCheckContext *ctx)
     766             : {
     767             :     TransactionId xmin;
     768             :     TransactionId xvac;
     769             :     TransactionId xmax;
     770             :     XidCommitStatus xmin_status;
     771             :     XidCommitStatus xvac_status;
     772             :     XidCommitStatus xmax_status;
     773      727882 :     HeapTupleHeader tuphdr = ctx->tuphdr;
     774             : 
     775      727882 :     ctx->tuple_could_be_pruned = true;   /* have not yet proven otherwise */
     776             : 
     777             :     /* If xmin is normal, it should be within valid range */
     778      727882 :     xmin = HeapTupleHeaderGetXmin(tuphdr);
     779      727882 :     switch (get_xid_status(xmin, ctx, &xmin_status))
     780             :     {
     781      727876 :         case XID_INVALID:
     782             :         case XID_BOUNDS_OK:
     783      727876 :             break;
     784           2 :         case XID_IN_FUTURE:
     785           2 :             report_corruption(ctx,
     786             :                               psprintf("xmin %u equals or exceeds next valid transaction ID %u:%u",
     787             :                                        xmin,
     788           2 :                                        EpochFromFullTransactionId(ctx->next_fxid),
     789           2 :                                        XidFromFullTransactionId(ctx->next_fxid)));
     790           2 :             return false;
     791           2 :         case XID_PRECEDES_CLUSTERMIN:
     792           2 :             report_corruption(ctx,
     793             :                               psprintf("xmin %u precedes oldest valid transaction ID %u:%u",
     794             :                                        xmin,
     795           2 :                                        EpochFromFullTransactionId(ctx->oldest_fxid),
     796           2 :                                        XidFromFullTransactionId(ctx->oldest_fxid)));
     797           2 :             return false;
     798           2 :         case XID_PRECEDES_RELMIN:
     799           2 :             report_corruption(ctx,
     800             :                               psprintf("xmin %u precedes relation freeze threshold %u:%u",
     801             :                                        xmin,
     802           2 :                                        EpochFromFullTransactionId(ctx->relfrozenfxid),
     803           2 :                                        XidFromFullTransactionId(ctx->relfrozenfxid)));
     804           2 :             return false;
     805             :     }
     806             : 
     807             :     /*
     808             :      * Has inserting transaction committed?
     809             :      */
     810      727876 :     if (!HeapTupleHeaderXminCommitted(tuphdr))
     811             :     {
     812       28524 :         if (HeapTupleHeaderXminInvalid(tuphdr))
     813           0 :             return false;       /* inserter aborted, don't check */
     814             :         /* Used by pre-9.0 binary upgrades */
     815       28524 :         else if (tuphdr->t_infomask & HEAP_MOVED_OFF)
     816             :         {
     817           0 :             xvac = HeapTupleHeaderGetXvac(tuphdr);
     818             : 
     819           0 :             switch (get_xid_status(xvac, ctx, &xvac_status))
     820             :             {
     821           0 :                 case XID_INVALID:
     822           0 :                     report_corruption(ctx,
     823             :                                       pstrdup("old-style VACUUM FULL transaction ID for moved off tuple is invalid"));
     824           0 :                     return false;
     825           0 :                 case XID_IN_FUTURE:
     826           0 :                     report_corruption(ctx,
     827             :                                       psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple equals or exceeds next valid transaction ID %u:%u",
     828             :                                                xvac,
     829           0 :                                                EpochFromFullTransactionId(ctx->next_fxid),
     830           0 :                                                XidFromFullTransactionId(ctx->next_fxid)));
     831           0 :                     return false;
     832           0 :                 case XID_PRECEDES_RELMIN:
     833           0 :                     report_corruption(ctx,
     834             :                                       psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple precedes relation freeze threshold %u:%u",
     835             :                                                xvac,
     836           0 :                                                EpochFromFullTransactionId(ctx->relfrozenfxid),
     837           0 :                                                XidFromFullTransactionId(ctx->relfrozenfxid)));
     838           0 :                     return false;
     839           0 :                 case XID_PRECEDES_CLUSTERMIN:
     840           0 :                     report_corruption(ctx,
     841             :                                       psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple precedes oldest valid transaction ID %u:%u",
     842             :                                                xvac,
     843           0 :                                                EpochFromFullTransactionId(ctx->oldest_fxid),
     844           0 :                                                XidFromFullTransactionId(ctx->oldest_fxid)));
     845           0 :                     return false;
     846           0 :                 case XID_BOUNDS_OK:
     847           0 :                     break;
     848             :             }
     849             : 
     850           0 :             switch (xvac_status)
     851             :             {
     852           0 :                 case XID_IS_CURRENT_XID:
     853           0 :                     report_corruption(ctx,
     854             :                                       psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple matches our current transaction ID",
     855             :                                                xvac));
     856           0 :                     return false;
     857           0 :                 case XID_IN_PROGRESS:
     858           0 :                     report_corruption(ctx,
     859             :                                       psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple appears to be in progress",
     860             :                                                xvac));
     861           0 :                     return false;
     862             : 
     863           0 :                 case XID_COMMITTED:
     864             : 
     865             :                     /*
     866             :                      * The tuple is dead, because the xvac transaction moved
     867             :                      * it off and committed. It's checkable, but also
     868             :                      * prunable.
     869             :                      */
     870           0 :                     return true;
     871             : 
     872           0 :                 case XID_ABORTED:
     873             : 
     874             :                     /*
     875             :                      * The original xmin must have committed, because the xvac
     876             :                      * transaction tried to move it later. Since xvac is
     877             :                      * aborted, whether it's still alive now depends on the
     878             :                      * status of xmax.
     879             :                      */
     880           0 :                     break;
     881             :             }
     882           0 :         }
     883             :         /* Used by pre-9.0 binary upgrades */
     884       28524 :         else if (tuphdr->t_infomask & HEAP_MOVED_IN)
     885             :         {
     886           0 :             xvac = HeapTupleHeaderGetXvac(tuphdr);
     887             : 
     888           0 :             switch (get_xid_status(xvac, ctx, &xvac_status))
     889             :             {
     890           0 :                 case XID_INVALID:
     891           0 :                     report_corruption(ctx,
     892             :                                       pstrdup("old-style VACUUM FULL transaction ID for moved in tuple is invalid"));
     893           0 :                     return false;
     894           0 :                 case XID_IN_FUTURE:
     895           0 :                     report_corruption(ctx,
     896             :                                       psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple equals or exceeds next valid transaction ID %u:%u",
     897             :                                                xvac,
     898           0 :                                                EpochFromFullTransactionId(ctx->next_fxid),
     899           0 :                                                XidFromFullTransactionId(ctx->next_fxid)));
     900           0 :                     return false;
     901           0 :                 case XID_PRECEDES_RELMIN:
     902           0 :                     report_corruption(ctx,
     903             :                                       psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple precedes relation freeze threshold %u:%u",
     904             :                                                xvac,
     905           0 :                                                EpochFromFullTransactionId(ctx->relfrozenfxid),
     906           0 :                                                XidFromFullTransactionId(ctx->relfrozenfxid)));
     907           0 :                     return false;
     908           0 :                 case XID_PRECEDES_CLUSTERMIN:
     909           0 :                     report_corruption(ctx,
     910             :                                       psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple precedes oldest valid transaction ID %u:%u",
     911             :                                                xvac,
     912           0 :                                                EpochFromFullTransactionId(ctx->oldest_fxid),
     913           0 :                                                XidFromFullTransactionId(ctx->oldest_fxid)));
     914           0 :                     return false;
     915           0 :                 case XID_BOUNDS_OK:
     916           0 :                     break;
     917             :             }
     918             : 
     919           0 :             switch (xvac_status)
     920             :             {
     921           0 :                 case XID_IS_CURRENT_XID:
     922           0 :                     report_corruption(ctx,
     923             :                                       psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple matches our current transaction ID",
     924             :                                                xvac));
     925           0 :                     return false;
     926           0 :                 case XID_IN_PROGRESS:
     927           0 :                     report_corruption(ctx,
     928             :                                       psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple appears to be in progress",
     929             :                                                xvac));
     930           0 :                     return false;
     931             : 
     932           0 :                 case XID_COMMITTED:
     933             : 
     934             :                     /*
     935             :                      * The original xmin must have committed, because the xvac
     936             :                      * transaction moved it later. Whether it's still alive
     937             :                      * now depends on the status of xmax.
     938             :                      */
     939           0 :                     break;
     940             : 
     941           0 :                 case XID_ABORTED:
     942             : 
     943             :                     /*
     944             :                      * The tuple is dead, because the xvac transaction moved
     945             :                      * it off and committed. It's checkable, but also
     946             :                      * prunable.
     947             :                      */
     948           0 :                     return true;
     949             :             }
     950           0 :         }
     951       28524 :         else if (xmin_status != XID_COMMITTED)
     952             :         {
     953             :             /*
     954             :              * Inserting transaction is not in progress, and not committed, so
     955             :              * it might have changed the TupleDesc in ways we don't know
     956             :              * about. Thus, don't try to check the tuple structure.
     957             :              *
     958             :              * If xmin_status happens to be XID_IS_CURRENT_XID, then in theory
     959             :              * any such DDL changes ought to be visible to us, so perhaps we
     960             :              * could check anyway in that case. But, for now, let's be
     961             :              * conservative and treat this like any other uncommitted insert.
     962             :              */
     963           0 :             return false;
     964             :         }
     965             :     }
     966             : 
     967             :     /*
     968             :      * Okay, the inserter committed, so it was good at some point.  Now what
     969             :      * about the deleting transaction?
     970             :      */
     971             : 
     972      727876 :     if (tuphdr->t_infomask & HEAP_XMAX_IS_MULTI)
     973             :     {
     974             :         /*
     975             :          * xmax is a multixact, so sanity-check the MXID. Note that we do this
     976             :          * prior to checking for HEAP_XMAX_INVALID or
     977             :          * HEAP_XMAX_IS_LOCKED_ONLY. This might therefore complain about
     978             :          * things that wouldn't actually be a problem during a normal scan,
     979             :          * but eventually we're going to have to freeze, and that process will
     980             :          * ignore hint bits.
     981             :          *
     982             :          * Even if the MXID is out of range, we still know that the original
     983             :          * insert committed, so we can check the tuple itself. However, we
     984             :          * can't rule out the possibility that this tuple is dead, so don't
     985             :          * clear ctx->tuple_could_be_pruned. Possibly we should go ahead and
     986             :          * clear that flag anyway if HEAP_XMAX_INVALID is set or if
     987             :          * HEAP_XMAX_IS_LOCKED_ONLY is true, but for now we err on the side of
     988             :          * avoiding possibly-bogus complaints about missing TOAST entries.
     989             :          */
     990         116 :         xmax = HeapTupleHeaderGetRawXmax(tuphdr);
     991         116 :         switch (check_mxid_valid_in_rel(xmax, ctx))
     992             :         {
     993           0 :             case XID_INVALID:
     994           0 :                 report_corruption(ctx,
     995             :                                   pstrdup("multitransaction ID is invalid"));
     996           0 :                 return true;
     997           2 :             case XID_PRECEDES_RELMIN:
     998           2 :                 report_corruption(ctx,
     999             :                                   psprintf("multitransaction ID %u precedes relation minimum multitransaction ID threshold %u",
    1000             :                                            xmax, ctx->relminmxid));
    1001           2 :                 return true;
    1002           0 :             case XID_PRECEDES_CLUSTERMIN:
    1003           0 :                 report_corruption(ctx,
    1004             :                                   psprintf("multitransaction ID %u precedes oldest valid multitransaction ID threshold %u",
    1005             :                                            xmax, ctx->oldest_mxact));
    1006           0 :                 return true;
    1007           2 :             case XID_IN_FUTURE:
    1008           2 :                 report_corruption(ctx,
    1009             :                                   psprintf("multitransaction ID %u equals or exceeds next valid multitransaction ID %u",
    1010             :                                            xmax,
    1011             :                                            ctx->next_mxact));
    1012           2 :                 return true;
    1013         112 :             case XID_BOUNDS_OK:
    1014         112 :                 break;
    1015             :         }
    1016      727760 :     }
    1017             : 
    1018      727872 :     if (tuphdr->t_infomask & HEAP_XMAX_INVALID)
    1019             :     {
    1020             :         /*
    1021             :          * This tuple is live.  A concurrently running transaction could
    1022             :          * delete it before we get around to checking the toast, but any such
    1023             :          * running transaction is surely not less than our safe_xmin, so the
    1024             :          * toast cannot be vacuumed out from under us.
    1025             :          */
    1026      727536 :         ctx->tuple_could_be_pruned = false;
    1027      727536 :         return true;
    1028             :     }
    1029             : 
    1030         336 :     if (HEAP_XMAX_IS_LOCKED_ONLY(tuphdr->t_infomask))
    1031             :     {
    1032             :         /*
    1033             :          * "Deleting" xact really only locked it, so the tuple is live in any
    1034             :          * case.  As above, a concurrently running transaction could delete
    1035             :          * it, but it cannot be vacuumed out from under us.
    1036             :          */
    1037          56 :         ctx->tuple_could_be_pruned = false;
    1038          56 :         return true;
    1039             :     }
    1040             : 
    1041         280 :     if (tuphdr->t_infomask & HEAP_XMAX_IS_MULTI)
    1042             :     {
    1043             :         /*
    1044             :          * We already checked above that this multixact is within limits for
    1045             :          * this table.  Now check the update xid from this multixact.
    1046             :          */
    1047          56 :         xmax = HeapTupleGetUpdateXid(tuphdr);
    1048          56 :         switch (get_xid_status(xmax, ctx, &xmax_status))
    1049             :         {
    1050           0 :             case XID_INVALID:
    1051             :                 /* not LOCKED_ONLY, so it has to have an xmax */
    1052           0 :                 report_corruption(ctx,
    1053             :                                   pstrdup("update xid is invalid"));
    1054           0 :                 return true;
    1055           0 :             case XID_IN_FUTURE:
    1056           0 :                 report_corruption(ctx,
    1057             :                                   psprintf("update xid %u equals or exceeds next valid transaction ID %u:%u",
    1058             :                                            xmax,
    1059           0 :                                            EpochFromFullTransactionId(ctx->next_fxid),
    1060           0 :                                            XidFromFullTransactionId(ctx->next_fxid)));
    1061           0 :                 return true;
    1062           0 :             case XID_PRECEDES_RELMIN:
    1063           0 :                 report_corruption(ctx,
    1064             :                                   psprintf("update xid %u precedes relation freeze threshold %u:%u",
    1065             :                                            xmax,
    1066           0 :                                            EpochFromFullTransactionId(ctx->relfrozenfxid),
    1067           0 :                                            XidFromFullTransactionId(ctx->relfrozenfxid)));
    1068           0 :                 return true;
    1069           0 :             case XID_PRECEDES_CLUSTERMIN:
    1070           0 :                 report_corruption(ctx,
    1071             :                                   psprintf("update xid %u precedes oldest valid transaction ID %u:%u",
    1072             :                                            xmax,
    1073           0 :                                            EpochFromFullTransactionId(ctx->oldest_fxid),
    1074           0 :                                            XidFromFullTransactionId(ctx->oldest_fxid)));
    1075           0 :                 return true;
    1076          56 :             case XID_BOUNDS_OK:
    1077          56 :                 break;
    1078             :         }
    1079             : 
    1080          56 :         switch (xmax_status)
    1081             :         {
    1082           0 :             case XID_IS_CURRENT_XID:
    1083             :             case XID_IN_PROGRESS:
    1084             : 
    1085             :                 /*
    1086             :                  * The delete is in progress, so it cannot be visible to our
    1087             :                  * snapshot.
    1088             :                  */
    1089           0 :                 ctx->tuple_could_be_pruned = false;
    1090           0 :                 break;
    1091          56 :             case XID_COMMITTED:
    1092             : 
    1093             :                 /*
    1094             :                  * The delete committed.  Whether the toast can be vacuumed
    1095             :                  * away depends on how old the deleting transaction is.
    1096             :                  */
    1097          56 :                 ctx->tuple_could_be_pruned = TransactionIdPrecedes(xmax,
    1098             :                                                                    ctx->safe_xmin);
    1099          56 :                 break;
    1100           0 :             case XID_ABORTED:
    1101             : 
    1102             :                 /*
    1103             :                  * The delete aborted or crashed.  The tuple is still live.
    1104             :                  */
    1105           0 :                 ctx->tuple_could_be_pruned = false;
    1106           0 :                 break;
    1107             :         }
    1108             : 
    1109             :         /* Tuple itself is checkable even if it's dead. */
    1110          56 :         return true;
    1111             :     }
    1112             : 
    1113             :     /* xmax is an XID, not a MXID. Sanity check it. */
    1114         224 :     xmax = HeapTupleHeaderGetRawXmax(tuphdr);
    1115         224 :     switch (get_xid_status(xmax, ctx, &xmax_status))
    1116             :     {
    1117           2 :         case XID_IN_FUTURE:
    1118           2 :             report_corruption(ctx,
    1119             :                               psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u",
    1120             :                                        xmax,
    1121           2 :                                        EpochFromFullTransactionId(ctx->next_fxid),
    1122           2 :                                        XidFromFullTransactionId(ctx->next_fxid)));
    1123           2 :             return false;       /* corrupt */
    1124           0 :         case XID_PRECEDES_RELMIN:
    1125           0 :             report_corruption(ctx,
    1126             :                               psprintf("xmax %u precedes relation freeze threshold %u:%u",
    1127             :                                        xmax,
    1128           0 :                                        EpochFromFullTransactionId(ctx->relfrozenfxid),
    1129           0 :                                        XidFromFullTransactionId(ctx->relfrozenfxid)));
    1130           0 :             return false;       /* corrupt */
    1131           0 :         case XID_PRECEDES_CLUSTERMIN:
    1132           0 :             report_corruption(ctx,
    1133             :                               psprintf("xmax %u precedes oldest valid transaction ID %u:%u",
    1134             :                                        xmax,
    1135           0 :                                        EpochFromFullTransactionId(ctx->oldest_fxid),
    1136           0 :                                        XidFromFullTransactionId(ctx->oldest_fxid)));
    1137           0 :             return false;       /* corrupt */
    1138         222 :         case XID_BOUNDS_OK:
    1139             :         case XID_INVALID:
    1140         222 :             break;
    1141             :     }
    1142             : 
    1143             :     /*
    1144             :      * Whether the toast can be vacuumed away depends on how old the deleting
    1145             :      * transaction is.
    1146             :      */
    1147         222 :     switch (xmax_status)
    1148             :     {
    1149           0 :         case XID_IS_CURRENT_XID:
    1150             :         case XID_IN_PROGRESS:
    1151             : 
    1152             :             /*
    1153             :              * The delete is in progress, so it cannot be visible to our
    1154             :              * snapshot.
    1155             :              */
    1156           0 :             ctx->tuple_could_be_pruned = false;
    1157           0 :             break;
    1158             : 
    1159         222 :         case XID_COMMITTED:
    1160             : 
    1161             :             /*
    1162             :              * The delete committed.  Whether the toast can be vacuumed away
    1163             :              * depends on how old the deleting transaction is.
    1164             :              */
    1165         222 :             ctx->tuple_could_be_pruned = TransactionIdPrecedes(xmax,
    1166             :                                                                ctx->safe_xmin);
    1167         222 :             break;
    1168             : 
    1169           0 :         case XID_ABORTED:
    1170             : 
    1171             :             /*
    1172             :              * The delete aborted or crashed.  The tuple is still live.
    1173             :              */
    1174           0 :             ctx->tuple_could_be_pruned = false;
    1175           0 :             break;
    1176             :     }
    1177             : 
    1178             :     /* Tuple itself is checkable even if it's dead. */
    1179         222 :     return true;
    1180             : }
    1181             : 
    1182             : 
    1183             : /*
    1184             :  * Check the current toast tuple against the state tracked in ctx, recording
    1185             :  * any corruption found in ctx->tupstore.
    1186             :  *
    1187             :  * This is not equivalent to running verify_heapam on the toast table itself,
    1188             :  * and is not hardened against corruption of the toast table.  Rather, when
    1189             :  * validating a toasted attribute in the main table, the sequence of toast
    1190             :  * tuples that store the toasted value are retrieved and checked in order, with
    1191             :  * each toast tuple being checked against where we are in the sequence, as well
    1192             :  * as each toast tuple having its varlena structure sanity checked.
    1193             :  *
    1194             :  * On entry, *expected_chunk_seq should be the chunk_seq value that we expect
    1195             :  * to find in toasttup. On exit, it will be updated to the value the next call
    1196             :  * to this function should expect to see.
    1197             :  */
    1198             : static void
    1199       79798 : check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
    1200             :                   ToastedAttribute *ta, int32 *expected_chunk_seq,
    1201             :                   uint32 extsize)
    1202             : {
    1203             :     int32       chunk_seq;
    1204       79798 :     int32       last_chunk_seq = (extsize - 1) / TOAST_MAX_CHUNK_SIZE;
    1205             :     Pointer     chunk;
    1206             :     bool        isnull;
    1207             :     int32       chunksize;
    1208             :     int32       expected_size;
    1209             : 
    1210             :     /* Sanity-check the sequence number. */
    1211       79798 :     chunk_seq = DatumGetInt32(fastgetattr(toasttup, 2,
    1212             :                                           ctx->toast_rel->rd_att, &isnull));
    1213       79798 :     if (isnull)
    1214             :     {
    1215           0 :         report_toast_corruption(ctx, ta,
    1216             :                                 psprintf("toast value %u has toast chunk with null sequence number",
    1217             :                                          ta->toast_pointer.va_valueid));
    1218           0 :         return;
    1219             :     }
    1220       79798 :     if (chunk_seq != *expected_chunk_seq)
    1221             :     {
    1222             :         /* Either the TOAST index is corrupt, or we don't have all chunks. */
    1223           0 :         report_toast_corruption(ctx, ta,
    1224             :                                 psprintf("toast value %u index scan returned chunk %d when expecting chunk %d",
    1225             :                                          ta->toast_pointer.va_valueid,
    1226             :                                          chunk_seq, *expected_chunk_seq));
    1227             :     }
    1228       79798 :     *expected_chunk_seq = chunk_seq + 1;
    1229             : 
    1230             :     /* Sanity-check the chunk data. */
    1231       79798 :     chunk = DatumGetPointer(fastgetattr(toasttup, 3,
    1232             :                                         ctx->toast_rel->rd_att, &isnull));
    1233       79798 :     if (isnull)
    1234             :     {
    1235           0 :         report_toast_corruption(ctx, ta,
    1236             :                                 psprintf("toast value %u chunk %d has null data",
    1237             :                                          ta->toast_pointer.va_valueid,
    1238             :                                          chunk_seq));
    1239           0 :         return;
    1240             :     }
    1241       79798 :     if (!VARATT_IS_EXTENDED(chunk))
    1242       79798 :         chunksize = VARSIZE(chunk) - VARHDRSZ;
    1243           0 :     else if (VARATT_IS_SHORT(chunk))
    1244             :     {
    1245             :         /*
    1246             :          * could happen due to heap_form_tuple doing its thing
    1247             :          */
    1248           0 :         chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
    1249             :     }
    1250             :     else
    1251             :     {
    1252             :         /* should never happen */
    1253           0 :         uint32      header = ((varattrib_4b *) chunk)->va_4byte.va_header;
    1254             : 
    1255           0 :         report_toast_corruption(ctx, ta,
    1256             :                                 psprintf("toast value %u chunk %d has invalid varlena header %0x",
    1257             :                                          ta->toast_pointer.va_valueid,
    1258             :                                          chunk_seq, header));
    1259           0 :         return;
    1260             :     }
    1261             : 
    1262             :     /*
    1263             :      * Some checks on the data we've found
    1264             :      */
    1265       79798 :     if (chunk_seq > last_chunk_seq)
    1266             :     {
    1267           0 :         report_toast_corruption(ctx, ta,
    1268             :                                 psprintf("toast value %u chunk %d follows last expected chunk %d",
    1269             :                                          ta->toast_pointer.va_valueid,
    1270             :                                          chunk_seq, last_chunk_seq));
    1271           0 :         return;
    1272             :     }
    1273             : 
    1274       79798 :     expected_size = chunk_seq < last_chunk_seq ? TOAST_MAX_CHUNK_SIZE
    1275       22604 :         : extsize - (last_chunk_seq * TOAST_MAX_CHUNK_SIZE);
    1276             : 
    1277       79798 :     if (chunksize != expected_size)
    1278           0 :         report_toast_corruption(ctx, ta,
    1279             :                                 psprintf("toast value %u chunk %d has size %u, but expected size %u",
    1280             :                                          ta->toast_pointer.va_valueid,
    1281             :                                          chunk_seq, chunksize, expected_size));
    1282             : }
    1283             : 
    1284             : /*
    1285             :  * Check the current attribute as tracked in ctx, recording any corruption
    1286             :  * found in ctx->tupstore.
    1287             :  *
    1288             :  * This function follows the logic performed by heap_deform_tuple(), and in the
    1289             :  * case of a toasted value, optionally stores the toast pointer so later it can
    1290             :  * be checked following the logic of detoast_external_attr(), checking for any
    1291             :  * conditions that would result in either of those functions Asserting or
    1292             :  * crashing the backend.  The checks performed by Asserts present in those two
    1293             :  * functions are also performed here and in check_toasted_attribute.  In cases
    1294             :  * where those two functions are a bit cavalier in their assumptions about data
    1295             :  * being correct, we perform additional checks not present in either of those
    1296             :  * two functions.  Where some condition is checked in both of those functions,
    1297             :  * we perform it here twice, as we parallel the logical flow of those two
    1298             :  * functions.  The presence of duplicate checks seems a reasonable price to pay
    1299             :  * for keeping this code tightly coupled with the code it protects.
    1300             :  *
    1301             :  * Returns true if the tuple attribute is sane enough for processing to
    1302             :  * continue on to the next attribute, false otherwise.
    1303             :  */
    1304             : static bool
    1305    10495688 : check_tuple_attribute(HeapCheckContext *ctx)
    1306             : {
    1307             :     Datum       attdatum;
    1308             :     struct varlena *attr;
    1309             :     char       *tp;             /* pointer to the tuple data */
    1310             :     uint16      infomask;
    1311             :     Form_pg_attribute thisatt;
    1312             :     struct varatt_external toast_pointer;
    1313             : 
    1314    10495688 :     infomask = ctx->tuphdr->t_infomask;
    1315    10495688 :     thisatt = TupleDescAttr(RelationGetDescr(ctx->rel), ctx->attnum);
    1316             : 
    1317    10495688 :     tp = (char *) ctx->tuphdr + ctx->tuphdr->t_hoff;
    1318             : 
    1319    10495688 :     if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len)
    1320             :     {
    1321           0 :         report_corruption(ctx,
    1322             :                           psprintf("attribute with length %u starts at offset %u beyond total tuple length %u",
    1323           0 :                                    thisatt->attlen,
    1324           0 :                                    ctx->tuphdr->t_hoff + ctx->offset,
    1325           0 :                                    ctx->lp_len));
    1326           0 :         return false;
    1327             :     }
    1328             : 
    1329             :     /* Skip null values */
    1330    10495688 :     if (infomask & HEAP_HASNULL && att_isnull(ctx->attnum, ctx->tuphdr->t_bits))
    1331     1632788 :         return true;
    1332             : 
    1333             :     /* Skip non-varlena values, but update offset first */
    1334     8862900 :     if (thisatt->attlen != -1)
    1335             :     {
    1336     8182028 :         ctx->offset = att_align_nominal(ctx->offset, thisatt->attalign);
    1337     8182028 :         ctx->offset = att_addlength_pointer(ctx->offset, thisatt->attlen,
    1338             :                                             tp + ctx->offset);
    1339     8182028 :         if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len)
    1340             :         {
    1341           0 :             report_corruption(ctx,
    1342             :                               psprintf("attribute with length %u ends at offset %u beyond total tuple length %u",
    1343           0 :                                        thisatt->attlen,
    1344           0 :                                        ctx->tuphdr->t_hoff + ctx->offset,
    1345           0 :                                        ctx->lp_len));
    1346           0 :             return false;
    1347             :         }
    1348     8182028 :         return true;
    1349             :     }
    1350             : 
    1351             :     /* Ok, we're looking at a varlena attribute. */
    1352      680872 :     ctx->offset = att_align_pointer(ctx->offset, thisatt->attalign, -1,
    1353             :                                     tp + ctx->offset);
    1354             : 
    1355             :     /* Get the (possibly corrupt) varlena datum */
    1356      680872 :     attdatum = fetchatt(thisatt, tp + ctx->offset);
    1357             : 
    1358             :     /*
    1359             :      * We have the datum, but we cannot decode it carelessly, as it may still
    1360             :      * be corrupt.
    1361             :      */
    1362             : 
    1363             :     /*
    1364             :      * Check that VARTAG_SIZE won't hit a TrapMacro on a corrupt va_tag before
    1365             :      * risking a call into att_addlength_pointer
    1366             :      */
    1367      680872 :     if (VARATT_IS_EXTERNAL(tp + ctx->offset))
    1368             :     {
    1369       51486 :         uint8       va_tag = VARTAG_EXTERNAL(tp + ctx->offset);
    1370             : 
    1371       51486 :         if (va_tag != VARTAG_ONDISK)
    1372             :         {
    1373           0 :             report_corruption(ctx,
    1374             :                               psprintf("toasted attribute has unexpected TOAST tag %u",
    1375             :                                        va_tag));
    1376             :             /* We can't know where the next attribute begins */
    1377           0 :             return false;
    1378             :         }
    1379             :     }
    1380             : 
    1381             :     /* Ok, should be safe now */
    1382      680872 :     ctx->offset = att_addlength_pointer(ctx->offset, thisatt->attlen,
    1383             :                                         tp + ctx->offset);
    1384             : 
    1385      680872 :     if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len)
    1386             :     {
    1387           2 :         report_corruption(ctx,
    1388             :                           psprintf("attribute with length %u ends at offset %u beyond total tuple length %u",
    1389           2 :                                    thisatt->attlen,
    1390           2 :                                    ctx->tuphdr->t_hoff + ctx->offset,
    1391           2 :                                    ctx->lp_len));
    1392             : 
    1393           2 :         return false;
    1394             :     }
    1395             : 
    1396             :     /*
    1397             :      * heap_deform_tuple would be done with this attribute at this point,
    1398             :      * having stored it in values[], and would continue to the next attribute.
    1399             :      * We go further, because we need to check if the toast datum is corrupt.
    1400             :      */
    1401             : 
    1402      680870 :     attr = (struct varlena *) DatumGetPointer(attdatum);
    1403             : 
    1404             :     /*
    1405             :      * Now we follow the logic of detoast_external_attr(), with the same
    1406             :      * caveats about being paranoid about corruption.
    1407             :      */
    1408             : 
    1409             :     /* Skip values that are not external */
    1410      680870 :     if (!VARATT_IS_EXTERNAL(attr))
    1411      629384 :         return true;
    1412             : 
    1413             :     /* It is external, and we're looking at a page on disk */
    1414             : 
    1415             :     /*
    1416             :      * Must copy attr into toast_pointer for alignment considerations
    1417             :      */
    1418       51486 :     VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
    1419             : 
    1420             :     /* Toasted attributes too large to be untoasted should never be stored */
    1421       51486 :     if (toast_pointer.va_rawsize > VARLENA_SIZE_LIMIT)
    1422           0 :         report_corruption(ctx,
    1423             :                           psprintf("toast value %u rawsize %d exceeds limit %d",
    1424             :                                    toast_pointer.va_valueid,
    1425             :                                    toast_pointer.va_rawsize,
    1426             :                                    VARLENA_SIZE_LIMIT));
    1427             : 
    1428       51486 :     if (VARATT_IS_COMPRESSED(&toast_pointer))
    1429             :     {
    1430             :         ToastCompressionId cmid;
    1431       24562 :         bool        valid = false;
    1432             : 
    1433             :         /* Compression should never expand the attribute */
    1434       24562 :         if (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) > toast_pointer.va_rawsize - VARHDRSZ)
    1435           0 :             report_corruption(ctx,
    1436             :                               psprintf("toast value %u external size %u exceeds maximum expected for rawsize %d",
    1437             :                                        toast_pointer.va_valueid,
    1438           0 :                                        VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer),
    1439             :                                        toast_pointer.va_rawsize));
    1440             : 
    1441             :         /* Compressed attributes should have a valid compression method */
    1442       24562 :         cmid = TOAST_COMPRESS_METHOD(&toast_pointer);
    1443       24562 :         switch (cmid)
    1444             :         {
    1445             :             /* List of all valid compression method IDs */
    1446       24562 :             case TOAST_PGLZ_COMPRESSION_ID:
    1447             :             case TOAST_LZ4_COMPRESSION_ID:
    1448       24562 :                 valid = true;
    1449       24562 :                 break;
    1450             : 
    1451             :             /* Recognized but invalid compression method ID */
    1452           0 :             case TOAST_INVALID_COMPRESSION_ID:
    1453           0 :                 break;
    1454             : 
    1455             :             /* Intentionally no default here */
    1456             :         }
    1457       24562 :         if (!valid)
    1458           0 :             report_corruption(ctx,
    1459             :                               psprintf("toast value %u has invalid compression method id %d",
    1460             :                                        toast_pointer.va_valueid, cmid));
    1461             :     }
    1462             : 
    1463             :     /* The tuple header better claim to contain toasted values */
    1464       51486 :     if (!(infomask & HEAP_HASEXTERNAL))
    1465             :     {
    1466           0 :         report_corruption(ctx,
    1467             :                           psprintf("toast value %u is external but tuple header flag HEAP_HASEXTERNAL not set",
    1468             :                                    toast_pointer.va_valueid));
    1469           0 :         return true;
    1470             :     }
    1471             : 
    1472             :     /* The relation better have a toast table */
    1473       51486 :     if (!ctx->rel->rd_rel->reltoastrelid)
    1474             :     {
    1475           0 :         report_corruption(ctx,
    1476             :                           psprintf("toast value %u is external but relation has no toast relation",
    1477             :                                    toast_pointer.va_valueid));
    1478           0 :         return true;
    1479             :     }
    1480             : 
    1481             :     /* If we were told to skip toast checking, then we're done. */
    1482       51486 :     if (ctx->toast_rel == NULL)
    1483       28858 :         return true;
    1484             : 
    1485             :     /*
    1486             :      * If this tuple is eligible to be pruned, we cannot check the toast.
    1487             :      * Otherwise, we push a copy of the toast tuple so we can check it after
    1488             :      * releasing the main table buffer lock.
    1489             :      */
    1490       22628 :     if (!ctx->tuple_could_be_pruned)
    1491             :     {
    1492             :         ToastedAttribute *ta;
    1493             : 
    1494       22624 :         ta = (ToastedAttribute *) palloc0(sizeof(ToastedAttribute));
    1495             : 
    1496       22624 :         VARATT_EXTERNAL_GET_POINTER(ta->toast_pointer, attr);
    1497       22624 :         ta->blkno = ctx->blkno;
    1498       22624 :         ta->offnum = ctx->offnum;
    1499       22624 :         ta->attnum = ctx->attnum;
    1500       22624 :         ctx->toasted_attributes = lappend(ctx->toasted_attributes, ta);
    1501             :     }
    1502             : 
    1503       22628 :     return true;
    1504             : }
    1505             : 
    1506             : /*
    1507             :  * For each attribute collected in ctx->toasted_attributes, look up the value
    1508             :  * in the toast table and perform checks on it.  This function should only be
    1509             :  * called on toast pointers which cannot be vacuumed away during our
    1510             :  * processing.
    1511             :  */
    1512             : static void
    1513       22612 : check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
    1514             : {
    1515             :     SnapshotData SnapshotToast;
    1516             :     ScanKeyData toastkey;
    1517             :     SysScanDesc toastscan;
    1518             :     bool        found_toasttup;
    1519             :     HeapTuple   toasttup;
    1520             :     uint32      extsize;
    1521       22612 :     int32       expected_chunk_seq = 0;
    1522             :     int32       last_chunk_seq;
    1523             : 
    1524       22612 :     extsize = VARATT_EXTERNAL_GET_EXTSIZE(ta->toast_pointer);
    1525       22612 :     last_chunk_seq = (extsize - 1) / TOAST_MAX_CHUNK_SIZE;
    1526             : 
    1527             :     /*
    1528             :      * Setup a scan key to find chunks in toast table with matching va_valueid
    1529             :      */
    1530       22612 :     ScanKeyInit(&toastkey,
    1531             :                 (AttrNumber) 1,
    1532             :                 BTEqualStrategyNumber, F_OIDEQ,
    1533       22612 :                 ObjectIdGetDatum(ta->toast_pointer.va_valueid));
    1534             : 
    1535             :     /*
    1536             :      * Check if any chunks for this toasted object exist in the toast table,
    1537             :      * accessible via the index.
    1538             :      */
    1539       22612 :     init_toast_snapshot(&SnapshotToast);
    1540       22612 :     toastscan = systable_beginscan_ordered(ctx->toast_rel,
    1541             :                                            ctx->valid_toast_index,
    1542             :                                            &SnapshotToast, 1,
    1543             :                                            &toastkey);
    1544       22612 :     found_toasttup = false;
    1545      125016 :     while ((toasttup =
    1546      102410 :             systable_getnext_ordered(toastscan,
    1547             :                                      ForwardScanDirection)) != NULL)
    1548             :     {
    1549       79798 :         found_toasttup = true;
    1550       79798 :         check_toast_tuple(toasttup, ctx, ta, &expected_chunk_seq, extsize);
    1551             :     }
    1552       22606 :     systable_endscan_ordered(toastscan);
    1553             : 
    1554       22606 :     if (!found_toasttup)
    1555           2 :         report_toast_corruption(ctx, ta,
    1556             :                                 psprintf("toast value %u not found in toast table",
    1557             :                                          ta->toast_pointer.va_valueid));
    1558       22604 :     else if (expected_chunk_seq <= last_chunk_seq)
    1559           0 :         report_toast_corruption(ctx, ta,
    1560             :                                 psprintf("toast value %u was expected to end at chunk %d, but ended while expecting chunk %d",
    1561             :                                          ta->toast_pointer.va_valueid,
    1562             :                                          last_chunk_seq, expected_chunk_seq));
    1563       22606 : }
    1564             : 
    1565             : /*
    1566             :  * Check the current tuple as tracked in ctx, recording any corruption found in
    1567             :  * ctx->tupstore.
    1568             :  */
    1569             : static void
    1570      727892 : check_tuple(HeapCheckContext *ctx)
    1571             : {
    1572             :     /*
    1573             :      * Check various forms of tuple header corruption, and if the header is
    1574             :      * too corrupt, do not continue with other checks.
    1575             :      */
    1576      727892 :     if (!check_tuple_header(ctx))
    1577          10 :         return;
    1578             : 
    1579             :     /*
    1580             :      * Check tuple visibility.  If the inserting transaction aborted, we
    1581             :      * cannot assume our relation description matches the tuple structure, and
    1582             :      * therefore cannot check it.
    1583             :      */
    1584      727882 :     if (!check_tuple_visibility(ctx))
    1585           8 :         return;
    1586             : 
    1587             :     /*
    1588             :      * The tuple is visible, so it must be compatible with the current version
    1589             :      * of the relation descriptor. It might have fewer columns than are
    1590             :      * present in the relation descriptor, but it cannot have more.
    1591             :      */
    1592      727874 :     if (RelationGetDescr(ctx->rel)->natts < ctx->natts)
    1593             :     {
    1594           4 :         report_corruption(ctx,
    1595             :                           psprintf("number of attributes %u exceeds maximum expected for table %u",
    1596             :                                    ctx->natts,
    1597           4 :                                    RelationGetDescr(ctx->rel)->natts));
    1598           4 :         return;
    1599             :     }
    1600             : 
    1601             :     /*
    1602             :      * Check each attribute unless we hit corruption that confuses what to do
    1603             :      * next, at which point we abort further attribute checks for this tuple.
    1604             :      * Note that we don't abort for all types of corruption, only for those
    1605             :      * types where we don't know how to continue.  We also don't abort the
    1606             :      * checking of toasted attributes collected from the tuple prior to
    1607             :      * aborting.  Those will still be checked later along with other toasted
    1608             :      * attributes collected from the page.
    1609             :      */
    1610      727870 :     ctx->offset = 0;
    1611    11223556 :     for (ctx->attnum = 0; ctx->attnum < ctx->natts; ctx->attnum++)
    1612    10495688 :         if (!check_tuple_attribute(ctx))
    1613           2 :             break;              /* cannot continue */
    1614             : 
    1615             :     /* revert attnum to -1 until we again examine individual attributes */
    1616      727870 :     ctx->attnum = -1;
    1617             : }
    1618             : 
    1619             : /*
    1620             :  * Convert a TransactionId into a FullTransactionId using our cached values of
    1621             :  * the valid transaction ID range.  It is the caller's responsibility to have
    1622             :  * already updated the cached values, if necessary.
    1623             :  */
    1624             : static FullTransactionId
    1625      120886 : FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx)
    1626             : {
    1627             :     uint32      epoch;
    1628             : 
    1629      120886 :     if (!TransactionIdIsNormal(xid))
    1630         384 :         return FullTransactionIdFromEpochAndXid(0, xid);
    1631      120502 :     epoch = EpochFromFullTransactionId(ctx->next_fxid);
    1632      120502 :     if (xid > ctx->next_xid)
    1633        2408 :         epoch--;
    1634      120502 :     return FullTransactionIdFromEpochAndXid(epoch, xid);
    1635             : }
    1636             : 
    1637             : /*
    1638             :  * Update our cached range of valid transaction IDs.
    1639             :  */
    1640             : static void
    1641        3382 : update_cached_xid_range(HeapCheckContext *ctx)
    1642             : {
    1643             :     /* Make cached copies */
    1644        3382 :     LWLockAcquire(XidGenLock, LW_SHARED);
    1645        3382 :     ctx->next_fxid = ShmemVariableCache->nextXid;
    1646        3382 :     ctx->oldest_xid = ShmemVariableCache->oldestXid;
    1647        3382 :     LWLockRelease(XidGenLock);
    1648             : 
    1649             :     /* And compute alternate versions of the same */
    1650        3382 :     ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx);
    1651        3382 :     ctx->next_xid = XidFromFullTransactionId(ctx->next_fxid);
    1652        3382 : }
    1653             : 
    1654             : /*
    1655             :  * Update our cached range of valid multitransaction IDs.
    1656             :  */
    1657             : static void
    1658        2404 : update_cached_mxid_range(HeapCheckContext *ctx)
    1659             : {
    1660        2404 :     ReadMultiXactIdRange(&ctx->oldest_mxact, &ctx->next_mxact);
    1661        2404 : }
    1662             : 
    1663             : /*
    1664             :  * Return whether the given FullTransactionId is within our cached valid
    1665             :  * transaction ID range.
    1666             :  */
    1667             : static inline bool
    1668      101828 : fxid_in_cached_range(FullTransactionId fxid, const HeapCheckContext *ctx)
    1669             : {
    1670      202678 :     return (FullTransactionIdPrecedesOrEquals(ctx->oldest_fxid, fxid) &&
    1671      100850 :             FullTransactionIdPrecedes(fxid, ctx->next_fxid));
    1672             : }
    1673             : 
    1674             : /*
    1675             :  * Checks whether a multitransaction ID is in the cached valid range, returning
    1676             :  * the nature of the range violation, if any.
    1677             :  */
    1678             : static XidBoundsViolation
    1679         120 : check_mxid_in_range(MultiXactId mxid, HeapCheckContext *ctx)
    1680             : {
    1681         120 :     if (!TransactionIdIsValid(mxid))
    1682           0 :         return XID_INVALID;
    1683         120 :     if (MultiXactIdPrecedes(mxid, ctx->relminmxid))
    1684           4 :         return XID_PRECEDES_RELMIN;
    1685         116 :     if (MultiXactIdPrecedes(mxid, ctx->oldest_mxact))
    1686           0 :         return XID_PRECEDES_CLUSTERMIN;
    1687         116 :     if (MultiXactIdPrecedesOrEquals(ctx->next_mxact, mxid))
    1688           4 :         return XID_IN_FUTURE;
    1689         112 :     return XID_BOUNDS_OK;
    1690             : }
    1691             : 
    1692             : /*
    1693             :  * Checks whether the given mxid is valid to appear in the heap being checked,
    1694             :  * returning the nature of the range violation, if any.
    1695             :  *
    1696             :  * This function attempts to return quickly by caching the known valid mxid
    1697             :  * range in ctx.  Callers should already have performed the initial setup of
    1698             :  * the cache prior to the first call to this function.
    1699             :  */
    1700             : static XidBoundsViolation
    1701         116 : check_mxid_valid_in_rel(MultiXactId mxid, HeapCheckContext *ctx)
    1702             : {
    1703             :     XidBoundsViolation result;
    1704             : 
    1705         116 :     result = check_mxid_in_range(mxid, ctx);
    1706         116 :     if (result == XID_BOUNDS_OK)
    1707         112 :         return XID_BOUNDS_OK;
    1708             : 
    1709             :     /* The range may have advanced.  Recheck. */
    1710           4 :     update_cached_mxid_range(ctx);
    1711           4 :     return check_mxid_in_range(mxid, ctx);
    1712             : }
    1713             : 
    1714             : /*
    1715             :  * Checks whether the given transaction ID is (or was recently) valid to appear
    1716             :  * in the heap being checked, or whether it is too old or too new to appear in
    1717             :  * the relation, returning information about the nature of the bounds violation.
    1718             :  *
    1719             :  * We cache the range of valid transaction IDs.  If xid is in that range, we
    1720             :  * conclude that it is valid, even though concurrent changes to the table might
    1721             :  * invalidate it under certain corrupt conditions.  (For example, if the table
    1722             :  * contains corrupt all-frozen bits, a concurrent vacuum might skip the page(s)
    1723             :  * containing the xid and then truncate clog and advance the relfrozenxid
    1724             :  * beyond xid.) Reporting the xid as valid under such conditions seems
    1725             :  * acceptable, since if we had checked it earlier in our scan it would have
    1726             :  * truly been valid at that time.
    1727             :  *
    1728             :  * If the status argument is not NULL, and if and only if the transaction ID
    1729             :  * appears to be valid in this relation, the status argument will be set with
    1730             :  * the commit status of the transaction ID.
    1731             :  */
    1732             : static XidBoundsViolation
    1733      728162 : get_xid_status(TransactionId xid, HeapCheckContext *ctx,
    1734             :                XidCommitStatus *status)
    1735             : {
    1736             :     FullTransactionId fxid;
    1737             :     FullTransactionId clog_horizon;
    1738             : 
    1739             :     /* Quick check for special xids */
    1740      728162 :     if (!TransactionIdIsValid(xid))
    1741           0 :         return XID_INVALID;
    1742      728162 :     else if (xid == BootstrapTransactionId || xid == FrozenTransactionId)
    1743             :     {
    1744      626334 :         if (status != NULL)
    1745      626334 :             *status = XID_COMMITTED;
    1746      626334 :         return XID_BOUNDS_OK;
    1747             :     }
    1748             : 
    1749             :     /* Check if the xid is within bounds */
    1750      101828 :     fxid = FullTransactionIdFromXidAndCtx(xid, ctx);
    1751      101828 :     if (!fxid_in_cached_range(fxid, ctx))
    1752             :     {
    1753             :         /*
    1754             :          * We may have been checking against stale values.  Update the cached
    1755             :          * range to be sure, and since we relied on the cached range when we
    1756             :          * performed the full xid conversion, reconvert.
    1757             :          */
    1758         982 :         update_cached_xid_range(ctx);
    1759         982 :         fxid = FullTransactionIdFromXidAndCtx(xid, ctx);
    1760             :     }
    1761             : 
    1762      101828 :     if (FullTransactionIdPrecedesOrEquals(ctx->next_fxid, fxid))
    1763           4 :         return XID_IN_FUTURE;
    1764      101824 :     if (FullTransactionIdPrecedes(fxid, ctx->oldest_fxid))
    1765           2 :         return XID_PRECEDES_CLUSTERMIN;
    1766      101822 :     if (FullTransactionIdPrecedes(fxid, ctx->relfrozenfxid))
    1767           2 :         return XID_PRECEDES_RELMIN;
    1768             : 
    1769             :     /* Early return if the caller does not request clog checking */
    1770      101820 :     if (status == NULL)
    1771           0 :         return XID_BOUNDS_OK;
    1772             : 
    1773             :     /* Early return if we just checked this xid in a prior call */
    1774      101820 :     if (xid == ctx->cached_xid)
    1775             :     {
    1776       89526 :         *status = ctx->cached_status;
    1777       89526 :         return XID_BOUNDS_OK;
    1778             :     }
    1779             : 
    1780       12294 :     *status = XID_COMMITTED;
    1781       12294 :     LWLockAcquire(XactTruncationLock, LW_SHARED);
    1782             :     clog_horizon =
    1783       12294 :         FullTransactionIdFromXidAndCtx(ShmemVariableCache->oldestClogXid,
    1784             :                                        ctx);
    1785       12294 :     if (FullTransactionIdPrecedesOrEquals(clog_horizon, fxid))
    1786             :     {
    1787       12294 :         if (TransactionIdIsCurrentTransactionId(xid))
    1788           0 :             *status = XID_IS_CURRENT_XID;
    1789       12294 :         else if (TransactionIdIsInProgress(xid))
    1790           0 :             *status = XID_IN_PROGRESS;
    1791       12294 :         else if (TransactionIdDidCommit(xid))
    1792       12294 :             *status = XID_COMMITTED;
    1793             :         else
    1794           0 :             *status = XID_ABORTED;
    1795             :     }
    1796       12294 :     LWLockRelease(XactTruncationLock);
    1797       12294 :     ctx->cached_xid = xid;
    1798       12294 :     ctx->cached_status = *status;
    1799       12294 :     return XID_BOUNDS_OK;
    1800             : }

Generated by: LCOV version 1.14