LCOV - code coverage report
Current view: top level - contrib/pageinspect - btreefuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 286 333 85.9 %
Date: 2025-04-24 13:15:39 Functions: 20 20 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * contrib/pageinspect/btreefuncs.c
       3             :  *
       4             :  *
       5             :  * btreefuncs.c
       6             :  *
       7             :  * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
       8             :  *
       9             :  * Permission to use, copy, modify, and distribute this software and
      10             :  * its documentation for any purpose, without fee, and without a
      11             :  * written agreement is hereby granted, provided that the above
      12             :  * copyright notice and this paragraph and the following two
      13             :  * paragraphs appear in all copies.
      14             :  *
      15             :  * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
      16             :  * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
      17             :  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
      18             :  * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
      19             :  * OF THE POSSIBILITY OF SUCH DAMAGE.
      20             :  *
      21             :  * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
      22             :  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      23             :  * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
      24             :  * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
      25             :  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
      26             :  */
      27             : 
      28             : #include "postgres.h"
      29             : 
      30             : #include "access/nbtree.h"
      31             : #include "access/relation.h"
      32             : #include "catalog/namespace.h"
      33             : #include "catalog/pg_am.h"
      34             : #include "catalog/pg_type.h"
      35             : #include "funcapi.h"
      36             : #include "miscadmin.h"
      37             : #include "pageinspect.h"
      38             : #include "utils/array.h"
      39             : #include "utils/builtins.h"
      40             : #include "utils/rel.h"
      41             : #include "utils/varlena.h"
      42             : 
      43          50 : PG_FUNCTION_INFO_V1(bt_metap);
      44          14 : PG_FUNCTION_INFO_V1(bt_page_items_1_9);
      45          26 : PG_FUNCTION_INFO_V1(bt_page_items);
      46          26 : PG_FUNCTION_INFO_V1(bt_page_items_bytea);
      47          14 : PG_FUNCTION_INFO_V1(bt_page_stats_1_9);
      48          14 : PG_FUNCTION_INFO_V1(bt_page_stats);
      49          14 : PG_FUNCTION_INFO_V1(bt_multi_page_stats);
      50             : 
      51             : #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
      52             : #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
      53             : 
      54             : /* ------------------------------------------------
      55             :  * structure for single btree page statistics
      56             :  * ------------------------------------------------
      57             :  */
      58             : typedef struct BTPageStat
      59             : {
      60             :     uint32      blkno;
      61             :     uint32      live_items;
      62             :     uint32      dead_items;
      63             :     uint32      page_size;
      64             :     uint32      max_avail;
      65             :     uint32      free_size;
      66             :     uint32      avg_item_size;
      67             :     char        type;
      68             : 
      69             :     /* opaque data */
      70             :     BlockNumber btpo_prev;
      71             :     BlockNumber btpo_next;
      72             :     uint32      btpo_level;
      73             :     uint16      btpo_flags;
      74             :     BTCycleId   btpo_cycleid;
      75             : } BTPageStat;
      76             : 
      77             : /*
      78             :  * cross-call data structure for SRF for page stats
      79             :  */
      80             : typedef struct ua_page_stats
      81             : {
      82             :     Oid         relid;
      83             :     int64       blkno;
      84             :     int64       blk_count;
      85             :     bool        allpages;
      86             : } ua_page_stats;
      87             : 
      88             : /*
      89             :  * cross-call data structure for SRF for page items
      90             :  */
      91             : typedef struct ua_page_items
      92             : {
      93             :     Page        page;
      94             :     OffsetNumber offset;
      95             :     bool        leafpage;
      96             :     bool        rightmost;
      97             :     TupleDesc   tupd;
      98             : } ua_page_items;
      99             : 
     100             : 
     101             : /* -------------------------------------------------
     102             :  * GetBTPageStatistics()
     103             :  *
     104             :  * Collect statistics of single b-tree page
     105             :  * -------------------------------------------------
     106             :  */
     107             : static void
     108          20 : GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
     109             : {
     110          20 :     Page        page = BufferGetPage(buffer);
     111          20 :     PageHeader  phdr = (PageHeader) page;
     112          20 :     OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     113          20 :     BTPageOpaque opaque = BTPageGetOpaque(page);
     114          20 :     int         item_size = 0;
     115             :     int         off;
     116             : 
     117          20 :     stat->blkno = blkno;
     118             : 
     119          20 :     stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
     120             : 
     121          20 :     stat->dead_items = stat->live_items = 0;
     122             : 
     123          20 :     stat->page_size = PageGetPageSize(page);
     124             : 
     125             :     /* page type (flags) */
     126          20 :     if (P_ISDELETED(opaque))
     127             :     {
     128             :         /* We divide deleted pages into leaf ('d') or internal ('D') */
     129           0 :         if (P_ISLEAF(opaque) || !P_HAS_FULLXID(opaque))
     130           0 :             stat->type = 'd';
     131             :         else
     132           0 :             stat->type = 'D';
     133             : 
     134             :         /*
     135             :          * Report safexid in a deleted page.
     136             :          *
     137             :          * Handle pg_upgrade'd deleted pages that used the previous safexid
     138             :          * representation in btpo_level field (this used to be a union type
     139             :          * called "bpto").
     140             :          */
     141           0 :         if (P_HAS_FULLXID(opaque))
     142             :         {
     143           0 :             FullTransactionId safexid = BTPageGetDeleteXid(page);
     144             : 
     145           0 :             elog(DEBUG2, "deleted page from block %u has safexid %u:%u",
     146             :                  blkno, EpochFromFullTransactionId(safexid),
     147             :                  XidFromFullTransactionId(safexid));
     148             :         }
     149             :         else
     150           0 :             elog(DEBUG2, "deleted page from block %u has safexid %u",
     151             :                  blkno, opaque->btpo_level);
     152             : 
     153             :         /* Don't interpret BTDeletedPageData as index tuples */
     154           0 :         maxoff = InvalidOffsetNumber;
     155             :     }
     156          20 :     else if (P_IGNORE(opaque))
     157           0 :         stat->type = 'e';
     158          20 :     else if (P_ISLEAF(opaque))
     159          16 :         stat->type = 'l';
     160           4 :     else if (P_ISROOT(opaque))
     161           4 :         stat->type = 'r';
     162             :     else
     163           0 :         stat->type = 'i';
     164             : 
     165             :     /* btpage opaque data */
     166          20 :     stat->btpo_prev = opaque->btpo_prev;
     167          20 :     stat->btpo_next = opaque->btpo_next;
     168          20 :     stat->btpo_level = opaque->btpo_level;
     169          20 :     stat->btpo_flags = opaque->btpo_flags;
     170          20 :     stat->btpo_cycleid = opaque->btpo_cycleid;
     171             : 
     172             :     /* count live and dead tuples, and free space */
     173        4044 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
     174             :     {
     175             :         IndexTuple  itup;
     176             : 
     177        4024 :         ItemId      id = PageGetItemId(page, off);
     178             : 
     179        4024 :         itup = (IndexTuple) PageGetItem(page, id);
     180             : 
     181        4024 :         item_size += IndexTupleSize(itup);
     182             : 
     183        4024 :         if (!ItemIdIsDead(id))
     184        4024 :             stat->live_items++;
     185             :         else
     186           0 :             stat->dead_items++;
     187             :     }
     188          20 :     stat->free_size = PageGetFreeSpace(page);
     189             : 
     190          20 :     if ((stat->live_items + stat->dead_items) > 0)
     191          20 :         stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
     192             :     else
     193           0 :         stat->avg_item_size = 0;
     194          20 : }
     195             : 
     196             : /* -----------------------------------------------
     197             :  * check_relation_block_range()
     198             :  *
     199             :  * Verify that a block number (given as int64) is valid for the relation.
     200             :  * -----------------------------------------------
     201             :  */
     202             : static void
     203          30 : check_relation_block_range(Relation rel, int64 blkno)
     204             : {
     205             :     /* Ensure we can cast to BlockNumber */
     206          30 :     if (blkno < 0 || blkno > MaxBlockNumber)
     207           4 :         ereport(ERROR,
     208             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     209             :                  errmsg("invalid block number %" PRId64, blkno)));
     210             : 
     211          26 :     if ((BlockNumber) (blkno) >= RelationGetNumberOfBlocks(rel))
     212           6 :         ereport(ERROR,
     213             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     214             :                  errmsg("block number %" PRId64 " is out of range", blkno)));
     215          20 : }
     216             : 
     217             : /* -----------------------------------------------
     218             :  * bt_index_block_validate()
     219             :  *
     220             :  * Validate index type is btree and block number
     221             :  * is valid (and not the metapage).
     222             :  * -----------------------------------------------
     223             :  */
     224             : static void
     225          36 : bt_index_block_validate(Relation rel, int64 blkno)
     226             : {
     227          36 :     if (!IS_INDEX(rel) || !IS_BTREE(rel))
     228           4 :         ereport(ERROR,
     229             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     230             :                  errmsg("\"%s\" is not a %s index",
     231             :                         RelationGetRelationName(rel), "btree")));
     232             : 
     233             :     /*
     234             :      * Reject attempts to read non-local temporary relations; we would be
     235             :      * likely to get wrong data since we have no visibility into the owning
     236             :      * session's local buffers.
     237             :      */
     238          32 :     if (RELATION_IS_OTHER_TEMP(rel))
     239           0 :         ereport(ERROR,
     240             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     241             :                  errmsg("cannot access temporary tables of other sessions")));
     242             : 
     243          32 :     if (blkno == 0)
     244           6 :         ereport(ERROR,
     245             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     246             :                  errmsg("block 0 is a meta page")));
     247             : 
     248          26 :     check_relation_block_range(rel, blkno);
     249          16 : }
     250             : 
     251             : /* -----------------------------------------------
     252             :  * bt_page_stats()
     253             :  *
     254             :  * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
     255             :  * Arguments are index relation name and block number
     256             :  * -----------------------------------------------
     257             :  */
     258             : static Datum
     259          12 : bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
     260             : {
     261          12 :     text       *relname = PG_GETARG_TEXT_PP(0);
     262          12 :     int64       blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1));
     263             :     Buffer      buffer;
     264             :     Relation    rel;
     265             :     RangeVar   *relrv;
     266             :     Datum       result;
     267             :     HeapTuple   tuple;
     268             :     TupleDesc   tupleDesc;
     269             :     int         j;
     270             :     char       *values[11];
     271             :     BTPageStat  stat;
     272             : 
     273          12 :     if (!superuser())
     274           0 :         ereport(ERROR,
     275             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     276             :                  errmsg("must be superuser to use pageinspect functions")));
     277             : 
     278          12 :     relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
     279          12 :     rel = relation_openrv(relrv, AccessShareLock);
     280             : 
     281          12 :     bt_index_block_validate(rel, blkno);
     282             : 
     283           4 :     buffer = ReadBuffer(rel, blkno);
     284           4 :     LockBuffer(buffer, BUFFER_LOCK_SHARE);
     285             : 
     286             :     /* keep compiler quiet */
     287           4 :     stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
     288           4 :     stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
     289             : 
     290           4 :     GetBTPageStatistics(blkno, buffer, &stat);
     291             : 
     292           4 :     UnlockReleaseBuffer(buffer);
     293           4 :     relation_close(rel, AccessShareLock);
     294             : 
     295             :     /* Build a tuple descriptor for our result type */
     296           4 :     if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
     297           0 :         elog(ERROR, "return type must be a row type");
     298             : 
     299           4 :     j = 0;
     300           4 :     values[j++] = psprintf("%u", stat.blkno);
     301           4 :     values[j++] = psprintf("%c", stat.type);
     302           4 :     values[j++] = psprintf("%u", stat.live_items);
     303           4 :     values[j++] = psprintf("%u", stat.dead_items);
     304           4 :     values[j++] = psprintf("%u", stat.avg_item_size);
     305           4 :     values[j++] = psprintf("%u", stat.page_size);
     306           4 :     values[j++] = psprintf("%u", stat.free_size);
     307           4 :     values[j++] = psprintf("%u", stat.btpo_prev);
     308           4 :     values[j++] = psprintf("%u", stat.btpo_next);
     309           4 :     values[j++] = psprintf("%u", stat.btpo_level);
     310           4 :     values[j++] = psprintf("%d", stat.btpo_flags);
     311             : 
     312           4 :     tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
     313             :                                    values);
     314             : 
     315           4 :     result = HeapTupleGetDatum(tuple);
     316             : 
     317           4 :     PG_RETURN_DATUM(result);
     318             : }
     319             : 
     320             : Datum
     321          10 : bt_page_stats_1_9(PG_FUNCTION_ARGS)
     322             : {
     323          10 :     return bt_page_stats_internal(fcinfo, PAGEINSPECT_V1_9);
     324             : }
     325             : 
     326             : /* entry point for old extension version */
     327             : Datum
     328           2 : bt_page_stats(PG_FUNCTION_ARGS)
     329             : {
     330           2 :     return bt_page_stats_internal(fcinfo, PAGEINSPECT_V1_8);
     331             : }
     332             : 
     333             : 
     334             : /* -----------------------------------------------
     335             :  * bt_multi_page_stats()
     336             :  *
     337             :  * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1, 2);
     338             :  * Arguments are index relation name, first block number, number of blocks
     339             :  * (but number of blocks can be negative to mean "read all the rest")
     340             :  * -----------------------------------------------
     341             :  */
     342             : Datum
     343          28 : bt_multi_page_stats(PG_FUNCTION_ARGS)
     344             : {
     345             :     Relation    rel;
     346             :     ua_page_stats *uargs;
     347             :     FuncCallContext *fctx;
     348             :     MemoryContext mctx;
     349             : 
     350          28 :     if (!superuser())
     351           0 :         ereport(ERROR,
     352             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     353             :                  errmsg("must be superuser to use pageinspect functions")));
     354             : 
     355          28 :     if (SRF_IS_FIRSTCALL())
     356             :     {
     357          12 :         text       *relname = PG_GETARG_TEXT_PP(0);
     358          12 :         int64       blkno = PG_GETARG_INT64(1);
     359          12 :         int64       blk_count = PG_GETARG_INT64(2);
     360             :         RangeVar   *relrv;
     361             : 
     362          12 :         fctx = SRF_FIRSTCALL_INIT();
     363             : 
     364          12 :         relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
     365          12 :         rel = relation_openrv(relrv, AccessShareLock);
     366             : 
     367             :         /* Check that rel is a valid btree index and 1st block number is OK */
     368          12 :         bt_index_block_validate(rel, blkno);
     369             : 
     370             :         /*
     371             :          * Check if upper bound of the specified range is valid. If only one
     372             :          * page is requested, skip as we've already validated the page. (Also,
     373             :          * it's important to skip this if blk_count is negative.)
     374             :          */
     375           8 :         if (blk_count > 1)
     376           4 :             check_relation_block_range(rel, blkno + blk_count - 1);
     377             : 
     378             :         /* Save arguments for reuse */
     379           8 :         mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
     380             : 
     381           8 :         uargs = palloc(sizeof(ua_page_stats));
     382             : 
     383           8 :         uargs->relid = RelationGetRelid(rel);
     384           8 :         uargs->blkno = blkno;
     385           8 :         uargs->blk_count = blk_count;
     386           8 :         uargs->allpages = (blk_count < 0);
     387             : 
     388           8 :         fctx->user_fctx = uargs;
     389             : 
     390           8 :         MemoryContextSwitchTo(mctx);
     391             : 
     392             :         /*
     393             :          * To avoid possibly leaking a relcache reference if the SRF isn't run
     394             :          * to completion, we close and re-open the index rel each time
     395             :          * through, using the index's OID for re-opens to ensure we get the
     396             :          * same rel.  Keep the AccessShareLock though, to ensure it doesn't go
     397             :          * away underneath us.
     398             :          */
     399           8 :         relation_close(rel, NoLock);
     400             :     }
     401             : 
     402          24 :     fctx = SRF_PERCALL_SETUP();
     403          24 :     uargs = fctx->user_fctx;
     404             : 
     405             :     /* We should have lock already */
     406          24 :     rel = relation_open(uargs->relid, NoLock);
     407             : 
     408             :     /* In all-pages mode, recheck the index length each time */
     409          24 :     if (uargs->allpages)
     410          10 :         uargs->blk_count = RelationGetNumberOfBlocks(rel) - uargs->blkno;
     411             : 
     412          24 :     if (uargs->blk_count > 0)
     413             :     {
     414             :         /* We need to fetch next block statistics */
     415             :         Buffer      buffer;
     416             :         Datum       result;
     417             :         HeapTuple   tuple;
     418             :         int         j;
     419             :         char       *values[11];
     420             :         BTPageStat  stat;
     421             :         TupleDesc   tupleDesc;
     422             : 
     423          16 :         buffer = ReadBuffer(rel, uargs->blkno);
     424          16 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
     425             : 
     426             :         /* keep compiler quiet */
     427          16 :         stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
     428          16 :         stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
     429             : 
     430          16 :         GetBTPageStatistics(uargs->blkno, buffer, &stat);
     431             : 
     432          16 :         UnlockReleaseBuffer(buffer);
     433          16 :         relation_close(rel, NoLock);
     434             : 
     435             :         /* Build a tuple descriptor for our result type */
     436          16 :         if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
     437           0 :             elog(ERROR, "return type must be a row type");
     438             : 
     439          16 :         j = 0;
     440          16 :         values[j++] = psprintf("%u", stat.blkno);
     441          16 :         values[j++] = psprintf("%c", stat.type);
     442          16 :         values[j++] = psprintf("%u", stat.live_items);
     443          16 :         values[j++] = psprintf("%u", stat.dead_items);
     444          16 :         values[j++] = psprintf("%u", stat.avg_item_size);
     445          16 :         values[j++] = psprintf("%u", stat.page_size);
     446          16 :         values[j++] = psprintf("%u", stat.free_size);
     447          16 :         values[j++] = psprintf("%u", stat.btpo_prev);
     448          16 :         values[j++] = psprintf("%u", stat.btpo_next);
     449          16 :         values[j++] = psprintf("%u", stat.btpo_level);
     450          16 :         values[j++] = psprintf("%d", stat.btpo_flags);
     451             : 
     452             :         /* Construct tuple to be returned */
     453          16 :         tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
     454             :                                        values);
     455             : 
     456          16 :         result = HeapTupleGetDatum(tuple);
     457             : 
     458             :         /*
     459             :          * Move to the next block number and decrement the number of blocks
     460             :          * still to be fetched
     461             :          */
     462          16 :         uargs->blkno++;
     463          16 :         uargs->blk_count--;
     464             : 
     465          16 :         SRF_RETURN_NEXT(fctx, result);
     466             :     }
     467             : 
     468             :     /* Done, so finally we can release the index lock */
     469           8 :     relation_close(rel, AccessShareLock);
     470           8 :     SRF_RETURN_DONE(fctx);
     471             : }
     472             : 
     473             : /*-------------------------------------------------------
     474             :  * bt_page_print_tuples()
     475             :  *
     476             :  * Form a tuple describing index tuple at a given offset
     477             :  * ------------------------------------------------------
     478             :  */
     479             : static Datum
     480           6 : bt_page_print_tuples(ua_page_items *uargs)
     481             : {
     482           6 :     Page        page = uargs->page;
     483           6 :     OffsetNumber offset = uargs->offset;
     484           6 :     bool        leafpage = uargs->leafpage;
     485           6 :     bool        rightmost = uargs->rightmost;
     486             :     bool        ispivottuple;
     487             :     Datum       values[9];
     488             :     bool        nulls[9];
     489             :     HeapTuple   tuple;
     490             :     ItemId      id;
     491             :     IndexTuple  itup;
     492             :     int         j;
     493             :     int         off;
     494             :     int         dlen;
     495             :     char       *dump,
     496             :                *datacstring;
     497             :     char       *ptr;
     498             :     ItemPointer htid;
     499             : 
     500           6 :     id = PageGetItemId(page, offset);
     501             : 
     502           6 :     if (!ItemIdIsValid(id))
     503           0 :         elog(ERROR, "invalid ItemId");
     504             : 
     505           6 :     itup = (IndexTuple) PageGetItem(page, id);
     506             : 
     507           6 :     j = 0;
     508           6 :     memset(nulls, 0, sizeof(nulls));
     509           6 :     values[j++] = DatumGetInt16(offset);
     510           6 :     values[j++] = ItemPointerGetDatum(&itup->t_tid);
     511           6 :     values[j++] = Int32GetDatum((int) IndexTupleSize(itup));
     512           6 :     values[j++] = BoolGetDatum(IndexTupleHasNulls(itup));
     513           6 :     values[j++] = BoolGetDatum(IndexTupleHasVarwidths(itup));
     514             : 
     515           6 :     ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
     516           6 :     dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
     517             : 
     518             :     /*
     519             :      * Make sure that "data" column does not include posting list or pivot
     520             :      * tuple representation of heap TID(s).
     521             :      *
     522             :      * Note: BTreeTupleIsPivot() won't work reliably on !heapkeyspace indexes
     523             :      * (those built before BTREE_VERSION 4), but we have no way of determining
     524             :      * if this page came from a !heapkeyspace index.  We may only have a bytea
     525             :      * nbtree page image to go on, so in general there is no metapage that we
     526             :      * can check.
     527             :      *
     528             :      * That's okay here because BTreeTupleIsPivot() can only return false for
     529             :      * a !heapkeyspace pivot, never true for a !heapkeyspace non-pivot.  Since
     530             :      * heap TID isn't part of the keyspace in a !heapkeyspace index anyway,
     531             :      * there cannot possibly be a pivot tuple heap TID representation that we
     532             :      * fail to make an adjustment for.  A !heapkeyspace index can have
     533             :      * BTreeTupleIsPivot() return true (due to things like suffix truncation
     534             :      * for INCLUDE indexes in Postgres v11), but when that happens
     535             :      * BTreeTupleGetHeapTID() can be trusted to work reliably (i.e. return
     536             :      * NULL).
     537             :      *
     538             :      * Note: BTreeTupleIsPosting() always works reliably, even with
     539             :      * !heapkeyspace indexes.
     540             :      */
     541           6 :     if (BTreeTupleIsPosting(itup))
     542           0 :         dlen -= IndexTupleSize(itup) - BTreeTupleGetPostingOffset(itup);
     543           6 :     else if (BTreeTupleIsPivot(itup) && BTreeTupleGetHeapTID(itup) != NULL)
     544           0 :         dlen -= MAXALIGN(sizeof(ItemPointerData));
     545             : 
     546           6 :     if (dlen < 0 || dlen > INDEX_SIZE_MASK)
     547           0 :         elog(ERROR, "invalid tuple length %d for tuple at offset number %u",
     548             :              dlen, offset);
     549           6 :     dump = palloc0(dlen * 3 + 1);
     550           6 :     datacstring = dump;
     551          54 :     for (off = 0; off < dlen; off++)
     552             :     {
     553          48 :         if (off > 0)
     554          42 :             *dump++ = ' ';
     555          48 :         sprintf(dump, "%02x", *(ptr + off) & 0xff);
     556          48 :         dump += 2;
     557             :     }
     558           6 :     values[j++] = CStringGetTextDatum(datacstring);
     559           6 :     pfree(datacstring);
     560             : 
     561             :     /*
     562             :      * We need to work around the BTreeTupleIsPivot() !heapkeyspace limitation
     563             :      * again.  Deduce whether or not tuple must be a pivot tuple based on
     564             :      * whether or not the page is a leaf page, as well as the page offset
     565             :      * number of the tuple.
     566             :      */
     567           6 :     ispivottuple = (!leafpage || (!rightmost && offset == P_HIKEY));
     568             : 
     569             :     /* LP_DEAD bit can never be set for pivot tuples, so show a NULL there */
     570           6 :     if (!ispivottuple)
     571           6 :         values[j++] = BoolGetDatum(ItemIdIsDead(id));
     572             :     else
     573             :     {
     574             :         Assert(!ItemIdIsDead(id));
     575           0 :         nulls[j++] = true;
     576             :     }
     577             : 
     578           6 :     htid = BTreeTupleGetHeapTID(itup);
     579           6 :     if (ispivottuple && !BTreeTupleIsPivot(itup))
     580             :     {
     581             :         /* Don't show bogus heap TID in !heapkeyspace pivot tuple */
     582           0 :         htid = NULL;
     583             :     }
     584             : 
     585           6 :     if (htid)
     586           6 :         values[j++] = ItemPointerGetDatum(htid);
     587             :     else
     588           0 :         nulls[j++] = true;
     589             : 
     590           6 :     if (BTreeTupleIsPosting(itup))
     591             :     {
     592             :         /* Build an array of item pointers */
     593             :         ItemPointer tids;
     594             :         Datum      *tids_datum;
     595             :         int         nposting;
     596             : 
     597           0 :         tids = BTreeTupleGetPosting(itup);
     598           0 :         nposting = BTreeTupleGetNPosting(itup);
     599           0 :         tids_datum = (Datum *) palloc(nposting * sizeof(Datum));
     600           0 :         for (int i = 0; i < nposting; i++)
     601           0 :             tids_datum[i] = ItemPointerGetDatum(&tids[i]);
     602           0 :         values[j++] = PointerGetDatum(construct_array_builtin(tids_datum, nposting, TIDOID));
     603           0 :         pfree(tids_datum);
     604             :     }
     605             :     else
     606           6 :         nulls[j++] = true;
     607             : 
     608             :     /* Build and return the result tuple */
     609           6 :     tuple = heap_form_tuple(uargs->tupd, values, nulls);
     610             : 
     611           6 :     return HeapTupleGetDatum(tuple);
     612             : }
     613             : 
     614             : /*-------------------------------------------------------
     615             :  * bt_page_items()
     616             :  *
     617             :  * Get IndexTupleData set in a btree page
     618             :  *
     619             :  * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
     620             :  *-------------------------------------------------------
     621             :  */
     622             : static Datum
     623          16 : bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
     624             : {
     625          16 :     text       *relname = PG_GETARG_TEXT_PP(0);
     626          16 :     int64       blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1));
     627             :     Datum       result;
     628             :     FuncCallContext *fctx;
     629             :     MemoryContext mctx;
     630             :     ua_page_items *uargs;
     631             : 
     632          16 :     if (!superuser())
     633           0 :         ereport(ERROR,
     634             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     635             :                  errmsg("must be superuser to use pageinspect functions")));
     636             : 
     637          16 :     if (SRF_IS_FIRSTCALL())
     638             :     {
     639             :         RangeVar   *relrv;
     640             :         Relation    rel;
     641             :         Buffer      buffer;
     642             :         BTPageOpaque opaque;
     643             :         TupleDesc   tupleDesc;
     644             : 
     645          12 :         fctx = SRF_FIRSTCALL_INIT();
     646             : 
     647          12 :         relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
     648          12 :         rel = relation_openrv(relrv, AccessShareLock);
     649             : 
     650          12 :         bt_index_block_validate(rel, blkno);
     651             : 
     652           4 :         buffer = ReadBuffer(rel, blkno);
     653           4 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
     654             : 
     655             :         /*
     656             :          * We copy the page into local storage to avoid holding pin on the
     657             :          * buffer longer than we must, and possibly failing to release it at
     658             :          * all if the calling query doesn't fetch all rows.
     659             :          */
     660           4 :         mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
     661             : 
     662           4 :         uargs = palloc(sizeof(ua_page_items));
     663             : 
     664           4 :         uargs->page = palloc(BLCKSZ);
     665           4 :         memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
     666             : 
     667           4 :         UnlockReleaseBuffer(buffer);
     668           4 :         relation_close(rel, AccessShareLock);
     669             : 
     670           4 :         uargs->offset = FirstOffsetNumber;
     671             : 
     672           4 :         opaque = BTPageGetOpaque(uargs->page);
     673             : 
     674           4 :         if (!P_ISDELETED(opaque))
     675           4 :             fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
     676             :         else
     677             :         {
     678             :             /* Don't interpret BTDeletedPageData as index tuples */
     679           0 :             elog(NOTICE, "page from block " INT64_FORMAT " is deleted", blkno);
     680           0 :             fctx->max_calls = 0;
     681             :         }
     682           4 :         uargs->leafpage = P_ISLEAF(opaque);
     683           4 :         uargs->rightmost = P_RIGHTMOST(opaque);
     684             : 
     685             :         /* Build a tuple descriptor for our result type */
     686           4 :         if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
     687           0 :             elog(ERROR, "return type must be a row type");
     688           4 :         tupleDesc = BlessTupleDesc(tupleDesc);
     689             : 
     690           4 :         uargs->tupd = tupleDesc;
     691             : 
     692           4 :         fctx->user_fctx = uargs;
     693             : 
     694           4 :         MemoryContextSwitchTo(mctx);
     695             :     }
     696             : 
     697           8 :     fctx = SRF_PERCALL_SETUP();
     698           8 :     uargs = fctx->user_fctx;
     699             : 
     700           8 :     if (fctx->call_cntr < fctx->max_calls)
     701             :     {
     702           4 :         result = bt_page_print_tuples(uargs);
     703           4 :         uargs->offset++;
     704           4 :         SRF_RETURN_NEXT(fctx, result);
     705             :     }
     706             : 
     707           4 :     SRF_RETURN_DONE(fctx);
     708             : }
     709             : 
     710             : Datum
     711          12 : bt_page_items_1_9(PG_FUNCTION_ARGS)
     712             : {
     713          12 :     return bt_page_items_internal(fcinfo, PAGEINSPECT_V1_9);
     714             : }
     715             : 
     716             : /* entry point for old extension version */
     717             : Datum
     718           4 : bt_page_items(PG_FUNCTION_ARGS)
     719             : {
     720           4 :     return bt_page_items_internal(fcinfo, PAGEINSPECT_V1_8);
     721             : }
     722             : 
     723             : /*-------------------------------------------------------
     724             :  * bt_page_items_bytea()
     725             :  *
     726             :  * Get IndexTupleData set in a btree page
     727             :  *
     728             :  * Usage: SELECT * FROM bt_page_items(get_raw_page('t1_pkey', 1));
     729             :  *-------------------------------------------------------
     730             :  */
     731             : 
     732             : Datum
     733          18 : bt_page_items_bytea(PG_FUNCTION_ARGS)
     734             : {
     735          18 :     bytea      *raw_page = PG_GETARG_BYTEA_P(0);
     736             :     Datum       result;
     737             :     FuncCallContext *fctx;
     738             :     ua_page_items *uargs;
     739             : 
     740          18 :     if (!superuser())
     741           0 :         ereport(ERROR,
     742             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     743             :                  errmsg("must be superuser to use raw page functions")));
     744             : 
     745          18 :     if (SRF_IS_FIRSTCALL())
     746             :     {
     747             :         BTPageOpaque opaque;
     748             :         MemoryContext mctx;
     749             :         TupleDesc   tupleDesc;
     750             : 
     751          16 :         fctx = SRF_FIRSTCALL_INIT();
     752          16 :         mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
     753             : 
     754          16 :         uargs = palloc(sizeof(ua_page_items));
     755             : 
     756          16 :         uargs->page = get_page_from_raw(raw_page);
     757             : 
     758          14 :         if (PageIsNew(uargs->page))
     759             :         {
     760           2 :             MemoryContextSwitchTo(mctx);
     761           2 :             PG_RETURN_NULL();
     762             :         }
     763             : 
     764          12 :         uargs->offset = FirstOffsetNumber;
     765             : 
     766             :         /* verify the special space has the expected size */
     767          12 :         if (PageGetSpecialSize(uargs->page) != MAXALIGN(sizeof(BTPageOpaqueData)))
     768           4 :             ereport(ERROR,
     769             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     770             :                      errmsg("input page is not a valid %s page", "btree"),
     771             :                      errdetail("Expected special size %d, got %d.",
     772             :                                (int) MAXALIGN(sizeof(BTPageOpaqueData)),
     773             :                                (int) PageGetSpecialSize(uargs->page))));
     774             : 
     775           8 :         opaque = BTPageGetOpaque(uargs->page);
     776             : 
     777           8 :         if (P_ISMETA(opaque))
     778           4 :             ereport(ERROR,
     779             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     780             :                      errmsg("block is a meta page")));
     781             : 
     782           4 :         if (P_ISLEAF(opaque) && opaque->btpo_level != 0)
     783           2 :             ereport(ERROR,
     784             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     785             :                      errmsg("block is not a valid btree leaf page")));
     786             : 
     787           2 :         if (P_ISDELETED(opaque))
     788           0 :             elog(NOTICE, "page is deleted");
     789             : 
     790           2 :         if (!P_ISDELETED(opaque))
     791           2 :             fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
     792             :         else
     793             :         {
     794             :             /* Don't interpret BTDeletedPageData as index tuples */
     795           0 :             elog(NOTICE, "page from block is deleted");
     796           0 :             fctx->max_calls = 0;
     797             :         }
     798           2 :         uargs->leafpage = P_ISLEAF(opaque);
     799           2 :         uargs->rightmost = P_RIGHTMOST(opaque);
     800             : 
     801             :         /* Build a tuple descriptor for our result type */
     802           2 :         if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
     803           0 :             elog(ERROR, "return type must be a row type");
     804           2 :         tupleDesc = BlessTupleDesc(tupleDesc);
     805             : 
     806           2 :         uargs->tupd = tupleDesc;
     807             : 
     808           2 :         fctx->user_fctx = uargs;
     809             : 
     810           2 :         MemoryContextSwitchTo(mctx);
     811             :     }
     812             : 
     813           4 :     fctx = SRF_PERCALL_SETUP();
     814           4 :     uargs = fctx->user_fctx;
     815             : 
     816           4 :     if (fctx->call_cntr < fctx->max_calls)
     817             :     {
     818           2 :         result = bt_page_print_tuples(uargs);
     819           2 :         uargs->offset++;
     820           2 :         SRF_RETURN_NEXT(fctx, result);
     821             :     }
     822             : 
     823           2 :     SRF_RETURN_DONE(fctx);
     824             : }
     825             : 
     826             : /* Number of output arguments (columns) for bt_metap() */
     827             : #define BT_METAP_COLS_V1_8      9
     828             : 
     829             : /* ------------------------------------------------
     830             :  * bt_metap()
     831             :  *
     832             :  * Get a btree's meta-page information
     833             :  *
     834             :  * Usage: SELECT * FROM bt_metap('t1_pkey')
     835             :  * ------------------------------------------------
     836             :  */
     837             : Datum
     838           4 : bt_metap(PG_FUNCTION_ARGS)
     839             : {
     840           4 :     text       *relname = PG_GETARG_TEXT_PP(0);
     841             :     Datum       result;
     842             :     Relation    rel;
     843             :     RangeVar   *relrv;
     844             :     BTMetaPageData *metad;
     845             :     TupleDesc   tupleDesc;
     846             :     int         j;
     847             :     char       *values[9];
     848             :     Buffer      buffer;
     849             :     Page        page;
     850             :     HeapTuple   tuple;
     851             : 
     852           4 :     if (!superuser())
     853           0 :         ereport(ERROR,
     854             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     855             :                  errmsg("must be superuser to use pageinspect functions")));
     856             : 
     857           4 :     relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
     858           4 :     rel = relation_openrv(relrv, AccessShareLock);
     859             : 
     860           4 :     if (!IS_INDEX(rel) || !IS_BTREE(rel))
     861           2 :         ereport(ERROR,
     862             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     863             :                  errmsg("\"%s\" is not a %s index",
     864             :                         RelationGetRelationName(rel), "btree")));
     865             : 
     866             :     /*
     867             :      * Reject attempts to read non-local temporary relations; we would be
     868             :      * likely to get wrong data since we have no visibility into the owning
     869             :      * session's local buffers.
     870             :      */
     871           2 :     if (RELATION_IS_OTHER_TEMP(rel))
     872           0 :         ereport(ERROR,
     873             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     874             :                  errmsg("cannot access temporary tables of other sessions")));
     875             : 
     876           2 :     buffer = ReadBuffer(rel, 0);
     877           2 :     LockBuffer(buffer, BUFFER_LOCK_SHARE);
     878             : 
     879           2 :     page = BufferGetPage(buffer);
     880           2 :     metad = BTPageGetMeta(page);
     881             : 
     882             :     /* Build a tuple descriptor for our result type */
     883           2 :     if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
     884           0 :         elog(ERROR, "return type must be a row type");
     885             : 
     886             :     /*
     887             :      * We need a kluge here to detect API versions prior to 1.8.  Earlier
     888             :      * versions incorrectly used int4 for certain columns.
     889             :      *
     890             :      * There is no way to reliably avoid the problems created by the old
     891             :      * function definition at this point, so insist that the user update the
     892             :      * extension.
     893             :      */
     894           2 :     if (tupleDesc->natts < BT_METAP_COLS_V1_8)
     895           0 :         ereport(ERROR,
     896             :                 (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
     897             :                  errmsg("function has wrong number of declared columns"),
     898             :                  errhint("To resolve the problem, update the \"pageinspect\" extension to the latest version.")));
     899             : 
     900           2 :     j = 0;
     901           2 :     values[j++] = psprintf("%d", metad->btm_magic);
     902           2 :     values[j++] = psprintf("%d", metad->btm_version);
     903           2 :     values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_root);
     904           2 :     values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_level);
     905           2 :     values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_fastroot);
     906           2 :     values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_fastlevel);
     907             : 
     908             :     /*
     909             :      * Get values of extended metadata if available, use default values
     910             :      * otherwise.  Note that we rely on the assumption that btm_allequalimage
     911             :      * is initialized to zero with indexes that were built on versions prior
     912             :      * to Postgres 13 (just like _bt_metaversion()).
     913             :      */
     914           2 :     if (metad->btm_version >= BTREE_NOVAC_VERSION)
     915             :     {
     916           4 :         values[j++] = psprintf(INT64_FORMAT,
     917           2 :                                (int64) metad->btm_last_cleanup_num_delpages);
     918           2 :         values[j++] = psprintf("%f", metad->btm_last_cleanup_num_heap_tuples);
     919           2 :         values[j++] = metad->btm_allequalimage ? "t" : "f";
     920             :     }
     921             :     else
     922             :     {
     923           0 :         values[j++] = "0";
     924           0 :         values[j++] = "-1";
     925           0 :         values[j++] = "f";
     926             :     }
     927             : 
     928           2 :     tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
     929             :                                    values);
     930             : 
     931           2 :     result = HeapTupleGetDatum(tuple);
     932             : 
     933           2 :     UnlockReleaseBuffer(buffer);
     934           2 :     relation_close(rel, AccessShareLock);
     935             : 
     936           2 :     PG_RETURN_DATUM(result);
     937             : }

Generated by: LCOV version 1.14