LCOV - code coverage report
Current view: top level - contrib/pageinspect - btreefuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 286 333 85.9 %
Date: 2025-01-18 05:15:39 Functions: 20 20 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * contrib/pageinspect/btreefuncs.c
       3             :  *
       4             :  *
       5             :  * btreefuncs.c
       6             :  *
       7             :  * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
       8             :  *
       9             :  * Permission to use, copy, modify, and distribute this software and
      10             :  * its documentation for any purpose, without fee, and without a
      11             :  * written agreement is hereby granted, provided that the above
      12             :  * copyright notice and this paragraph and the following two
      13             :  * paragraphs appear in all copies.
      14             :  *
      15             :  * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
      16             :  * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
      17             :  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
      18             :  * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
      19             :  * OF THE POSSIBILITY OF SUCH DAMAGE.
      20             :  *
      21             :  * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
      22             :  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      23             :  * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
      24             :  * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
      25             :  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
      26             :  */
      27             : 
      28             : #include "postgres.h"
      29             : 
      30             : #include "access/nbtree.h"
      31             : #include "access/relation.h"
      32             : #include "catalog/namespace.h"
      33             : #include "catalog/pg_am.h"
      34             : #include "catalog/pg_type.h"
      35             : #include "funcapi.h"
      36             : #include "miscadmin.h"
      37             : #include "pageinspect.h"
      38             : #include "utils/array.h"
      39             : #include "utils/builtins.h"
      40             : #include "utils/rel.h"
      41             : #include "utils/varlena.h"
      42             : 
      43          50 : PG_FUNCTION_INFO_V1(bt_metap);
      44          14 : PG_FUNCTION_INFO_V1(bt_page_items_1_9);
      45          26 : PG_FUNCTION_INFO_V1(bt_page_items);
      46          26 : PG_FUNCTION_INFO_V1(bt_page_items_bytea);
      47          14 : PG_FUNCTION_INFO_V1(bt_page_stats_1_9);
      48          14 : PG_FUNCTION_INFO_V1(bt_page_stats);
      49          14 : PG_FUNCTION_INFO_V1(bt_multi_page_stats);
      50             : 
      51             : #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
      52             : #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
      53             : 
      54             : /* ------------------------------------------------
      55             :  * structure for single btree page statistics
      56             :  * ------------------------------------------------
      57             :  */
      58             : typedef struct BTPageStat
      59             : {
      60             :     uint32      blkno;
      61             :     uint32      live_items;
      62             :     uint32      dead_items;
      63             :     uint32      page_size;
      64             :     uint32      max_avail;
      65             :     uint32      free_size;
      66             :     uint32      avg_item_size;
      67             :     char        type;
      68             : 
      69             :     /* opaque data */
      70             :     BlockNumber btpo_prev;
      71             :     BlockNumber btpo_next;
      72             :     uint32      btpo_level;
      73             :     uint16      btpo_flags;
      74             :     BTCycleId   btpo_cycleid;
      75             : } BTPageStat;
      76             : 
      77             : /*
      78             :  * cross-call data structure for SRF for page stats
      79             :  */
      80             : typedef struct ua_page_stats
      81             : {
      82             :     Oid         relid;
      83             :     int64       blkno;
      84             :     int64       blk_count;
      85             :     bool        allpages;
      86             : } ua_page_stats;
      87             : 
      88             : /*
      89             :  * cross-call data structure for SRF for page items
      90             :  */
      91             : typedef struct ua_page_items
      92             : {
      93             :     Page        page;
      94             :     OffsetNumber offset;
      95             :     bool        leafpage;
      96             :     bool        rightmost;
      97             :     TupleDesc   tupd;
      98             : } ua_page_items;
      99             : 
     100             : 
     101             : /* -------------------------------------------------
     102             :  * GetBTPageStatistics()
     103             :  *
     104             :  * Collect statistics of single b-tree page
     105             :  * -------------------------------------------------
     106             :  */
     107             : static void
     108          20 : GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
     109             : {
     110          20 :     Page        page = BufferGetPage(buffer);
     111          20 :     PageHeader  phdr = (PageHeader) page;
     112          20 :     OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     113          20 :     BTPageOpaque opaque = BTPageGetOpaque(page);
     114          20 :     int         item_size = 0;
     115             :     int         off;
     116             : 
     117          20 :     stat->blkno = blkno;
     118             : 
     119          20 :     stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
     120             : 
     121          20 :     stat->dead_items = stat->live_items = 0;
     122             : 
     123          20 :     stat->page_size = PageGetPageSize(page);
     124             : 
     125             :     /* page type (flags) */
     126          20 :     if (P_ISDELETED(opaque))
     127             :     {
     128             :         /* We divide deleted pages into leaf ('d') or internal ('D') */
     129           0 :         if (P_ISLEAF(opaque) || !P_HAS_FULLXID(opaque))
     130           0 :             stat->type = 'd';
     131             :         else
     132           0 :             stat->type = 'D';
     133             : 
     134             :         /*
     135             :          * Report safexid in a deleted page.
     136             :          *
     137             :          * Handle pg_upgrade'd deleted pages that used the previous safexid
     138             :          * representation in btpo_level field (this used to be a union type
     139             :          * called "bpto").
     140             :          */
     141           0 :         if (P_HAS_FULLXID(opaque))
     142             :         {
     143           0 :             FullTransactionId safexid = BTPageGetDeleteXid(page);
     144             : 
     145           0 :             elog(DEBUG2, "deleted page from block %u has safexid %u:%u",
     146             :                  blkno, EpochFromFullTransactionId(safexid),
     147             :                  XidFromFullTransactionId(safexid));
     148             :         }
     149             :         else
     150           0 :             elog(DEBUG2, "deleted page from block %u has safexid %u",
     151             :                  blkno, opaque->btpo_level);
     152             : 
     153             :         /* Don't interpret BTDeletedPageData as index tuples */
     154           0 :         maxoff = InvalidOffsetNumber;
     155             :     }
     156          20 :     else if (P_IGNORE(opaque))
     157           0 :         stat->type = 'e';
     158          20 :     else if (P_ISLEAF(opaque))
     159          16 :         stat->type = 'l';
     160           4 :     else if (P_ISROOT(opaque))
     161           4 :         stat->type = 'r';
     162             :     else
     163           0 :         stat->type = 'i';
     164             : 
     165             :     /* btpage opaque data */
     166          20 :     stat->btpo_prev = opaque->btpo_prev;
     167          20 :     stat->btpo_next = opaque->btpo_next;
     168          20 :     stat->btpo_level = opaque->btpo_level;
     169          20 :     stat->btpo_flags = opaque->btpo_flags;
     170          20 :     stat->btpo_cycleid = opaque->btpo_cycleid;
     171             : 
     172             :     /* count live and dead tuples, and free space */
     173        4044 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
     174             :     {
     175             :         IndexTuple  itup;
     176             : 
     177        4024 :         ItemId      id = PageGetItemId(page, off);
     178             : 
     179        4024 :         itup = (IndexTuple) PageGetItem(page, id);
     180             : 
     181        4024 :         item_size += IndexTupleSize(itup);
     182             : 
     183        4024 :         if (!ItemIdIsDead(id))
     184        4024 :             stat->live_items++;
     185             :         else
     186           0 :             stat->dead_items++;
     187             :     }
     188          20 :     stat->free_size = PageGetFreeSpace(page);
     189             : 
     190          20 :     if ((stat->live_items + stat->dead_items) > 0)
     191          20 :         stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
     192             :     else
     193           0 :         stat->avg_item_size = 0;
     194          20 : }
     195             : 
     196             : /* -----------------------------------------------
     197             :  * check_relation_block_range()
     198             :  *
     199             :  * Verify that a block number (given as int64) is valid for the relation.
     200             :  * -----------------------------------------------
     201             :  */
     202             : static void
     203          30 : check_relation_block_range(Relation rel, int64 blkno)
     204             : {
     205             :     /* Ensure we can cast to BlockNumber */
     206          30 :     if (blkno < 0 || blkno > MaxBlockNumber)
     207           4 :         ereport(ERROR,
     208             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     209             :                  errmsg("invalid block number %lld",
     210             :                         (long long) blkno)));
     211             : 
     212          26 :     if ((BlockNumber) (blkno) >= RelationGetNumberOfBlocks(rel))
     213           6 :         ereport(ERROR,
     214             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     215             :                  errmsg("block number %lld is out of range",
     216             :                         (long long) blkno)));
     217          20 : }
     218             : 
     219             : /* -----------------------------------------------
     220             :  * bt_index_block_validate()
     221             :  *
     222             :  * Validate index type is btree and block number
     223             :  * is valid (and not the metapage).
     224             :  * -----------------------------------------------
     225             :  */
     226             : static void
     227          36 : bt_index_block_validate(Relation rel, int64 blkno)
     228             : {
     229          36 :     if (!IS_INDEX(rel) || !IS_BTREE(rel))
     230           4 :         ereport(ERROR,
     231             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     232             :                  errmsg("\"%s\" is not a %s index",
     233             :                         RelationGetRelationName(rel), "btree")));
     234             : 
     235             :     /*
     236             :      * Reject attempts to read non-local temporary relations; we would be
     237             :      * likely to get wrong data since we have no visibility into the owning
     238             :      * session's local buffers.
     239             :      */
     240          32 :     if (RELATION_IS_OTHER_TEMP(rel))
     241           0 :         ereport(ERROR,
     242             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     243             :                  errmsg("cannot access temporary tables of other sessions")));
     244             : 
     245          32 :     if (blkno == 0)
     246           6 :         ereport(ERROR,
     247             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     248             :                  errmsg("block 0 is a meta page")));
     249             : 
     250          26 :     check_relation_block_range(rel, blkno);
     251          16 : }
     252             : 
     253             : /* -----------------------------------------------
     254             :  * bt_page_stats()
     255             :  *
     256             :  * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
     257             :  * Arguments are index relation name and block number
     258             :  * -----------------------------------------------
     259             :  */
     260             : static Datum
     261          12 : bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
     262             : {
     263          12 :     text       *relname = PG_GETARG_TEXT_PP(0);
     264          12 :     int64       blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1));
     265             :     Buffer      buffer;
     266             :     Relation    rel;
     267             :     RangeVar   *relrv;
     268             :     Datum       result;
     269             :     HeapTuple   tuple;
     270             :     TupleDesc   tupleDesc;
     271             :     int         j;
     272             :     char       *values[11];
     273             :     BTPageStat  stat;
     274             : 
     275          12 :     if (!superuser())
     276           0 :         ereport(ERROR,
     277             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     278             :                  errmsg("must be superuser to use pageinspect functions")));
     279             : 
     280          12 :     relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
     281          12 :     rel = relation_openrv(relrv, AccessShareLock);
     282             : 
     283          12 :     bt_index_block_validate(rel, blkno);
     284             : 
     285           4 :     buffer = ReadBuffer(rel, blkno);
     286           4 :     LockBuffer(buffer, BUFFER_LOCK_SHARE);
     287             : 
     288             :     /* keep compiler quiet */
     289           4 :     stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
     290           4 :     stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
     291             : 
     292           4 :     GetBTPageStatistics(blkno, buffer, &stat);
     293             : 
     294           4 :     UnlockReleaseBuffer(buffer);
     295           4 :     relation_close(rel, AccessShareLock);
     296             : 
     297             :     /* Build a tuple descriptor for our result type */
     298           4 :     if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
     299           0 :         elog(ERROR, "return type must be a row type");
     300             : 
     301           4 :     j = 0;
     302           4 :     values[j++] = psprintf("%u", stat.blkno);
     303           4 :     values[j++] = psprintf("%c", stat.type);
     304           4 :     values[j++] = psprintf("%u", stat.live_items);
     305           4 :     values[j++] = psprintf("%u", stat.dead_items);
     306           4 :     values[j++] = psprintf("%u", stat.avg_item_size);
     307           4 :     values[j++] = psprintf("%u", stat.page_size);
     308           4 :     values[j++] = psprintf("%u", stat.free_size);
     309           4 :     values[j++] = psprintf("%u", stat.btpo_prev);
     310           4 :     values[j++] = psprintf("%u", stat.btpo_next);
     311           4 :     values[j++] = psprintf("%u", stat.btpo_level);
     312           4 :     values[j++] = psprintf("%d", stat.btpo_flags);
     313             : 
     314           4 :     tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
     315             :                                    values);
     316             : 
     317           4 :     result = HeapTupleGetDatum(tuple);
     318             : 
     319           4 :     PG_RETURN_DATUM(result);
     320             : }
     321             : 
     322             : Datum
     323          10 : bt_page_stats_1_9(PG_FUNCTION_ARGS)
     324             : {
     325          10 :     return bt_page_stats_internal(fcinfo, PAGEINSPECT_V1_9);
     326             : }
     327             : 
     328             : /* entry point for old extension version */
     329             : Datum
     330           2 : bt_page_stats(PG_FUNCTION_ARGS)
     331             : {
     332           2 :     return bt_page_stats_internal(fcinfo, PAGEINSPECT_V1_8);
     333             : }
     334             : 
     335             : 
     336             : /* -----------------------------------------------
     337             :  * bt_multi_page_stats()
     338             :  *
     339             :  * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1, 2);
     340             :  * Arguments are index relation name, first block number, number of blocks
     341             :  * (but number of blocks can be negative to mean "read all the rest")
     342             :  * -----------------------------------------------
     343             :  */
     344             : Datum
     345          28 : bt_multi_page_stats(PG_FUNCTION_ARGS)
     346             : {
     347             :     Relation    rel;
     348             :     ua_page_stats *uargs;
     349             :     FuncCallContext *fctx;
     350             :     MemoryContext mctx;
     351             : 
     352          28 :     if (!superuser())
     353           0 :         ereport(ERROR,
     354             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     355             :                  errmsg("must be superuser to use pageinspect functions")));
     356             : 
     357          28 :     if (SRF_IS_FIRSTCALL())
     358             :     {
     359          12 :         text       *relname = PG_GETARG_TEXT_PP(0);
     360          12 :         int64       blkno = PG_GETARG_INT64(1);
     361          12 :         int64       blk_count = PG_GETARG_INT64(2);
     362             :         RangeVar   *relrv;
     363             : 
     364          12 :         fctx = SRF_FIRSTCALL_INIT();
     365             : 
     366          12 :         relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
     367          12 :         rel = relation_openrv(relrv, AccessShareLock);
     368             : 
     369             :         /* Check that rel is a valid btree index and 1st block number is OK */
     370          12 :         bt_index_block_validate(rel, blkno);
     371             : 
     372             :         /*
     373             :          * Check if upper bound of the specified range is valid. If only one
     374             :          * page is requested, skip as we've already validated the page. (Also,
     375             :          * it's important to skip this if blk_count is negative.)
     376             :          */
     377           8 :         if (blk_count > 1)
     378           4 :             check_relation_block_range(rel, blkno + blk_count - 1);
     379             : 
     380             :         /* Save arguments for reuse */
     381           8 :         mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
     382             : 
     383           8 :         uargs = palloc(sizeof(ua_page_stats));
     384             : 
     385           8 :         uargs->relid = RelationGetRelid(rel);
     386           8 :         uargs->blkno = blkno;
     387           8 :         uargs->blk_count = blk_count;
     388           8 :         uargs->allpages = (blk_count < 0);
     389             : 
     390           8 :         fctx->user_fctx = uargs;
     391             : 
     392           8 :         MemoryContextSwitchTo(mctx);
     393             : 
     394             :         /*
     395             :          * To avoid possibly leaking a relcache reference if the SRF isn't run
     396             :          * to completion, we close and re-open the index rel each time
     397             :          * through, using the index's OID for re-opens to ensure we get the
     398             :          * same rel.  Keep the AccessShareLock though, to ensure it doesn't go
     399             :          * away underneath us.
     400             :          */
     401           8 :         relation_close(rel, NoLock);
     402             :     }
     403             : 
     404          24 :     fctx = SRF_PERCALL_SETUP();
     405          24 :     uargs = fctx->user_fctx;
     406             : 
     407             :     /* We should have lock already */
     408          24 :     rel = relation_open(uargs->relid, NoLock);
     409             : 
     410             :     /* In all-pages mode, recheck the index length each time */
     411          24 :     if (uargs->allpages)
     412          10 :         uargs->blk_count = RelationGetNumberOfBlocks(rel) - uargs->blkno;
     413             : 
     414          24 :     if (uargs->blk_count > 0)
     415             :     {
     416             :         /* We need to fetch next block statistics */
     417             :         Buffer      buffer;
     418             :         Datum       result;
     419             :         HeapTuple   tuple;
     420             :         int         j;
     421             :         char       *values[11];
     422             :         BTPageStat  stat;
     423             :         TupleDesc   tupleDesc;
     424             : 
     425          16 :         buffer = ReadBuffer(rel, uargs->blkno);
     426          16 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
     427             : 
     428             :         /* keep compiler quiet */
     429          16 :         stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
     430          16 :         stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
     431             : 
     432          16 :         GetBTPageStatistics(uargs->blkno, buffer, &stat);
     433             : 
     434          16 :         UnlockReleaseBuffer(buffer);
     435          16 :         relation_close(rel, NoLock);
     436             : 
     437             :         /* Build a tuple descriptor for our result type */
     438          16 :         if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
     439           0 :             elog(ERROR, "return type must be a row type");
     440             : 
     441          16 :         j = 0;
     442          16 :         values[j++] = psprintf("%u", stat.blkno);
     443          16 :         values[j++] = psprintf("%c", stat.type);
     444          16 :         values[j++] = psprintf("%u", stat.live_items);
     445          16 :         values[j++] = psprintf("%u", stat.dead_items);
     446          16 :         values[j++] = psprintf("%u", stat.avg_item_size);
     447          16 :         values[j++] = psprintf("%u", stat.page_size);
     448          16 :         values[j++] = psprintf("%u", stat.free_size);
     449          16 :         values[j++] = psprintf("%u", stat.btpo_prev);
     450          16 :         values[j++] = psprintf("%u", stat.btpo_next);
     451          16 :         values[j++] = psprintf("%u", stat.btpo_level);
     452          16 :         values[j++] = psprintf("%d", stat.btpo_flags);
     453             : 
     454             :         /* Construct tuple to be returned */
     455          16 :         tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
     456             :                                        values);
     457             : 
     458          16 :         result = HeapTupleGetDatum(tuple);
     459             : 
     460             :         /*
     461             :          * Move to the next block number and decrement the number of blocks
     462             :          * still to be fetched
     463             :          */
     464          16 :         uargs->blkno++;
     465          16 :         uargs->blk_count--;
     466             : 
     467          16 :         SRF_RETURN_NEXT(fctx, result);
     468             :     }
     469             : 
     470             :     /* Done, so finally we can release the index lock */
     471           8 :     relation_close(rel, AccessShareLock);
     472           8 :     SRF_RETURN_DONE(fctx);
     473             : }
     474             : 
     475             : /*-------------------------------------------------------
     476             :  * bt_page_print_tuples()
     477             :  *
     478             :  * Form a tuple describing index tuple at a given offset
     479             :  * ------------------------------------------------------
     480             :  */
     481             : static Datum
     482           6 : bt_page_print_tuples(ua_page_items *uargs)
     483             : {
     484           6 :     Page        page = uargs->page;
     485           6 :     OffsetNumber offset = uargs->offset;
     486           6 :     bool        leafpage = uargs->leafpage;
     487           6 :     bool        rightmost = uargs->rightmost;
     488             :     bool        ispivottuple;
     489             :     Datum       values[9];
     490             :     bool        nulls[9];
     491             :     HeapTuple   tuple;
     492             :     ItemId      id;
     493             :     IndexTuple  itup;
     494             :     int         j;
     495             :     int         off;
     496             :     int         dlen;
     497             :     char       *dump,
     498             :                *datacstring;
     499             :     char       *ptr;
     500             :     ItemPointer htid;
     501             : 
     502           6 :     id = PageGetItemId(page, offset);
     503             : 
     504           6 :     if (!ItemIdIsValid(id))
     505           0 :         elog(ERROR, "invalid ItemId");
     506             : 
     507           6 :     itup = (IndexTuple) PageGetItem(page, id);
     508             : 
     509           6 :     j = 0;
     510           6 :     memset(nulls, 0, sizeof(nulls));
     511           6 :     values[j++] = DatumGetInt16(offset);
     512           6 :     values[j++] = ItemPointerGetDatum(&itup->t_tid);
     513           6 :     values[j++] = Int32GetDatum((int) IndexTupleSize(itup));
     514           6 :     values[j++] = BoolGetDatum(IndexTupleHasNulls(itup));
     515           6 :     values[j++] = BoolGetDatum(IndexTupleHasVarwidths(itup));
     516             : 
     517           6 :     ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
     518           6 :     dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
     519             : 
     520             :     /*
     521             :      * Make sure that "data" column does not include posting list or pivot
     522             :      * tuple representation of heap TID(s).
     523             :      *
     524             :      * Note: BTreeTupleIsPivot() won't work reliably on !heapkeyspace indexes
     525             :      * (those built before BTREE_VERSION 4), but we have no way of determining
     526             :      * if this page came from a !heapkeyspace index.  We may only have a bytea
     527             :      * nbtree page image to go on, so in general there is no metapage that we
     528             :      * can check.
     529             :      *
     530             :      * That's okay here because BTreeTupleIsPivot() can only return false for
     531             :      * a !heapkeyspace pivot, never true for a !heapkeyspace non-pivot.  Since
     532             :      * heap TID isn't part of the keyspace in a !heapkeyspace index anyway,
     533             :      * there cannot possibly be a pivot tuple heap TID representation that we
     534             :      * fail to make an adjustment for.  A !heapkeyspace index can have
     535             :      * BTreeTupleIsPivot() return true (due to things like suffix truncation
     536             :      * for INCLUDE indexes in Postgres v11), but when that happens
     537             :      * BTreeTupleGetHeapTID() can be trusted to work reliably (i.e. return
     538             :      * NULL).
     539             :      *
     540             :      * Note: BTreeTupleIsPosting() always works reliably, even with
     541             :      * !heapkeyspace indexes.
     542             :      */
     543           6 :     if (BTreeTupleIsPosting(itup))
     544           0 :         dlen -= IndexTupleSize(itup) - BTreeTupleGetPostingOffset(itup);
     545           6 :     else if (BTreeTupleIsPivot(itup) && BTreeTupleGetHeapTID(itup) != NULL)
     546           0 :         dlen -= MAXALIGN(sizeof(ItemPointerData));
     547             : 
     548           6 :     if (dlen < 0 || dlen > INDEX_SIZE_MASK)
     549           0 :         elog(ERROR, "invalid tuple length %d for tuple at offset number %u",
     550             :              dlen, offset);
     551           6 :     dump = palloc0(dlen * 3 + 1);
     552           6 :     datacstring = dump;
     553          54 :     for (off = 0; off < dlen; off++)
     554             :     {
     555          48 :         if (off > 0)
     556          42 :             *dump++ = ' ';
     557          48 :         sprintf(dump, "%02x", *(ptr + off) & 0xff);
     558          48 :         dump += 2;
     559             :     }
     560           6 :     values[j++] = CStringGetTextDatum(datacstring);
     561           6 :     pfree(datacstring);
     562             : 
     563             :     /*
     564             :      * We need to work around the BTreeTupleIsPivot() !heapkeyspace limitation
     565             :      * again.  Deduce whether or not tuple must be a pivot tuple based on
     566             :      * whether or not the page is a leaf page, as well as the page offset
     567             :      * number of the tuple.
     568             :      */
     569           6 :     ispivottuple = (!leafpage || (!rightmost && offset == P_HIKEY));
     570             : 
     571             :     /* LP_DEAD bit can never be set for pivot tuples, so show a NULL there */
     572           6 :     if (!ispivottuple)
     573           6 :         values[j++] = BoolGetDatum(ItemIdIsDead(id));
     574             :     else
     575             :     {
     576             :         Assert(!ItemIdIsDead(id));
     577           0 :         nulls[j++] = true;
     578             :     }
     579             : 
     580           6 :     htid = BTreeTupleGetHeapTID(itup);
     581           6 :     if (ispivottuple && !BTreeTupleIsPivot(itup))
     582             :     {
     583             :         /* Don't show bogus heap TID in !heapkeyspace pivot tuple */
     584           0 :         htid = NULL;
     585             :     }
     586             : 
     587           6 :     if (htid)
     588           6 :         values[j++] = ItemPointerGetDatum(htid);
     589             :     else
     590           0 :         nulls[j++] = true;
     591             : 
     592           6 :     if (BTreeTupleIsPosting(itup))
     593             :     {
     594             :         /* Build an array of item pointers */
     595             :         ItemPointer tids;
     596             :         Datum      *tids_datum;
     597             :         int         nposting;
     598             : 
     599           0 :         tids = BTreeTupleGetPosting(itup);
     600           0 :         nposting = BTreeTupleGetNPosting(itup);
     601           0 :         tids_datum = (Datum *) palloc(nposting * sizeof(Datum));
     602           0 :         for (int i = 0; i < nposting; i++)
     603           0 :             tids_datum[i] = ItemPointerGetDatum(&tids[i]);
     604           0 :         values[j++] = PointerGetDatum(construct_array_builtin(tids_datum, nposting, TIDOID));
     605           0 :         pfree(tids_datum);
     606             :     }
     607             :     else
     608           6 :         nulls[j++] = true;
     609             : 
     610             :     /* Build and return the result tuple */
     611           6 :     tuple = heap_form_tuple(uargs->tupd, values, nulls);
     612             : 
     613           6 :     return HeapTupleGetDatum(tuple);
     614             : }
     615             : 
     616             : /*-------------------------------------------------------
     617             :  * bt_page_items()
     618             :  *
     619             :  * Get IndexTupleData set in a btree page
     620             :  *
     621             :  * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
     622             :  *-------------------------------------------------------
     623             :  */
     624             : static Datum
     625          16 : bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
     626             : {
     627          16 :     text       *relname = PG_GETARG_TEXT_PP(0);
     628          16 :     int64       blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1));
     629             :     Datum       result;
     630             :     FuncCallContext *fctx;
     631             :     MemoryContext mctx;
     632             :     ua_page_items *uargs;
     633             : 
     634          16 :     if (!superuser())
     635           0 :         ereport(ERROR,
     636             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     637             :                  errmsg("must be superuser to use pageinspect functions")));
     638             : 
     639          16 :     if (SRF_IS_FIRSTCALL())
     640             :     {
     641             :         RangeVar   *relrv;
     642             :         Relation    rel;
     643             :         Buffer      buffer;
     644             :         BTPageOpaque opaque;
     645             :         TupleDesc   tupleDesc;
     646             : 
     647          12 :         fctx = SRF_FIRSTCALL_INIT();
     648             : 
     649          12 :         relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
     650          12 :         rel = relation_openrv(relrv, AccessShareLock);
     651             : 
     652          12 :         bt_index_block_validate(rel, blkno);
     653             : 
     654           4 :         buffer = ReadBuffer(rel, blkno);
     655           4 :         LockBuffer(buffer, BUFFER_LOCK_SHARE);
     656             : 
     657             :         /*
     658             :          * We copy the page into local storage to avoid holding pin on the
     659             :          * buffer longer than we must, and possibly failing to release it at
     660             :          * all if the calling query doesn't fetch all rows.
     661             :          */
     662           4 :         mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
     663             : 
     664           4 :         uargs = palloc(sizeof(ua_page_items));
     665             : 
     666           4 :         uargs->page = palloc(BLCKSZ);
     667           4 :         memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
     668             : 
     669           4 :         UnlockReleaseBuffer(buffer);
     670           4 :         relation_close(rel, AccessShareLock);
     671             : 
     672           4 :         uargs->offset = FirstOffsetNumber;
     673             : 
     674           4 :         opaque = BTPageGetOpaque(uargs->page);
     675             : 
     676           4 :         if (!P_ISDELETED(opaque))
     677           4 :             fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
     678             :         else
     679             :         {
     680             :             /* Don't interpret BTDeletedPageData as index tuples */
     681           0 :             elog(NOTICE, "page from block " INT64_FORMAT " is deleted", blkno);
     682           0 :             fctx->max_calls = 0;
     683             :         }
     684           4 :         uargs->leafpage = P_ISLEAF(opaque);
     685           4 :         uargs->rightmost = P_RIGHTMOST(opaque);
     686             : 
     687             :         /* Build a tuple descriptor for our result type */
     688           4 :         if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
     689           0 :             elog(ERROR, "return type must be a row type");
     690           4 :         tupleDesc = BlessTupleDesc(tupleDesc);
     691             : 
     692           4 :         uargs->tupd = tupleDesc;
     693             : 
     694           4 :         fctx->user_fctx = uargs;
     695             : 
     696           4 :         MemoryContextSwitchTo(mctx);
     697             :     }
     698             : 
     699           8 :     fctx = SRF_PERCALL_SETUP();
     700           8 :     uargs = fctx->user_fctx;
     701             : 
     702           8 :     if (fctx->call_cntr < fctx->max_calls)
     703             :     {
     704           4 :         result = bt_page_print_tuples(uargs);
     705           4 :         uargs->offset++;
     706           4 :         SRF_RETURN_NEXT(fctx, result);
     707             :     }
     708             : 
     709           4 :     SRF_RETURN_DONE(fctx);
     710             : }
     711             : 
     712             : Datum
     713          12 : bt_page_items_1_9(PG_FUNCTION_ARGS)
     714             : {
     715          12 :     return bt_page_items_internal(fcinfo, PAGEINSPECT_V1_9);
     716             : }
     717             : 
     718             : /* entry point for old extension version */
     719             : Datum
     720           4 : bt_page_items(PG_FUNCTION_ARGS)
     721             : {
     722           4 :     return bt_page_items_internal(fcinfo, PAGEINSPECT_V1_8);
     723             : }
     724             : 
     725             : /*-------------------------------------------------------
     726             :  * bt_page_items_bytea()
     727             :  *
     728             :  * Get IndexTupleData set in a btree page
     729             :  *
     730             :  * Usage: SELECT * FROM bt_page_items(get_raw_page('t1_pkey', 1));
     731             :  *-------------------------------------------------------
     732             :  */
     733             : 
     734             : Datum
     735          18 : bt_page_items_bytea(PG_FUNCTION_ARGS)
     736             : {
     737          18 :     bytea      *raw_page = PG_GETARG_BYTEA_P(0);
     738             :     Datum       result;
     739             :     FuncCallContext *fctx;
     740             :     ua_page_items *uargs;
     741             : 
     742          18 :     if (!superuser())
     743           0 :         ereport(ERROR,
     744             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     745             :                  errmsg("must be superuser to use raw page functions")));
     746             : 
     747          18 :     if (SRF_IS_FIRSTCALL())
     748             :     {
     749             :         BTPageOpaque opaque;
     750             :         MemoryContext mctx;
     751             :         TupleDesc   tupleDesc;
     752             : 
     753          16 :         fctx = SRF_FIRSTCALL_INIT();
     754          16 :         mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
     755             : 
     756          16 :         uargs = palloc(sizeof(ua_page_items));
     757             : 
     758          16 :         uargs->page = get_page_from_raw(raw_page);
     759             : 
     760          14 :         if (PageIsNew(uargs->page))
     761             :         {
     762           2 :             MemoryContextSwitchTo(mctx);
     763           2 :             PG_RETURN_NULL();
     764             :         }
     765             : 
     766          12 :         uargs->offset = FirstOffsetNumber;
     767             : 
     768             :         /* verify the special space has the expected size */
     769          12 :         if (PageGetSpecialSize(uargs->page) != MAXALIGN(sizeof(BTPageOpaqueData)))
     770           4 :             ereport(ERROR,
     771             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     772             :                      errmsg("input page is not a valid %s page", "btree"),
     773             :                      errdetail("Expected special size %d, got %d.",
     774             :                                (int) MAXALIGN(sizeof(BTPageOpaqueData)),
     775             :                                (int) PageGetSpecialSize(uargs->page))));
     776             : 
     777           8 :         opaque = BTPageGetOpaque(uargs->page);
     778             : 
     779           8 :         if (P_ISMETA(opaque))
     780           4 :             ereport(ERROR,
     781             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     782             :                      errmsg("block is a meta page")));
     783             : 
     784           4 :         if (P_ISLEAF(opaque) && opaque->btpo_level != 0)
     785           2 :             ereport(ERROR,
     786             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     787             :                      errmsg("block is not a valid btree leaf page")));
     788             : 
     789           2 :         if (P_ISDELETED(opaque))
     790           0 :             elog(NOTICE, "page is deleted");
     791             : 
     792           2 :         if (!P_ISDELETED(opaque))
     793           2 :             fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
     794             :         else
     795             :         {
     796             :             /* Don't interpret BTDeletedPageData as index tuples */
     797           0 :             elog(NOTICE, "page from block is deleted");
     798           0 :             fctx->max_calls = 0;
     799             :         }
     800           2 :         uargs->leafpage = P_ISLEAF(opaque);
     801           2 :         uargs->rightmost = P_RIGHTMOST(opaque);
     802             : 
     803             :         /* Build a tuple descriptor for our result type */
     804           2 :         if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
     805           0 :             elog(ERROR, "return type must be a row type");
     806           2 :         tupleDesc = BlessTupleDesc(tupleDesc);
     807             : 
     808           2 :         uargs->tupd = tupleDesc;
     809             : 
     810           2 :         fctx->user_fctx = uargs;
     811             : 
     812           2 :         MemoryContextSwitchTo(mctx);
     813             :     }
     814             : 
     815           4 :     fctx = SRF_PERCALL_SETUP();
     816           4 :     uargs = fctx->user_fctx;
     817             : 
     818           4 :     if (fctx->call_cntr < fctx->max_calls)
     819             :     {
     820           2 :         result = bt_page_print_tuples(uargs);
     821           2 :         uargs->offset++;
     822           2 :         SRF_RETURN_NEXT(fctx, result);
     823             :     }
     824             : 
     825           2 :     SRF_RETURN_DONE(fctx);
     826             : }
     827             : 
     828             : /* Number of output arguments (columns) for bt_metap() */
     829             : #define BT_METAP_COLS_V1_8      9
     830             : 
     831             : /* ------------------------------------------------
     832             :  * bt_metap()
     833             :  *
     834             :  * Get a btree's meta-page information
     835             :  *
     836             :  * Usage: SELECT * FROM bt_metap('t1_pkey')
     837             :  * ------------------------------------------------
     838             :  */
     839             : Datum
     840           4 : bt_metap(PG_FUNCTION_ARGS)
     841             : {
     842           4 :     text       *relname = PG_GETARG_TEXT_PP(0);
     843             :     Datum       result;
     844             :     Relation    rel;
     845             :     RangeVar   *relrv;
     846             :     BTMetaPageData *metad;
     847             :     TupleDesc   tupleDesc;
     848             :     int         j;
     849             :     char       *values[9];
     850             :     Buffer      buffer;
     851             :     Page        page;
     852             :     HeapTuple   tuple;
     853             : 
     854           4 :     if (!superuser())
     855           0 :         ereport(ERROR,
     856             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     857             :                  errmsg("must be superuser to use pageinspect functions")));
     858             : 
     859           4 :     relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
     860           4 :     rel = relation_openrv(relrv, AccessShareLock);
     861             : 
     862           4 :     if (!IS_INDEX(rel) || !IS_BTREE(rel))
     863           2 :         ereport(ERROR,
     864             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     865             :                  errmsg("\"%s\" is not a %s index",
     866             :                         RelationGetRelationName(rel), "btree")));
     867             : 
     868             :     /*
     869             :      * Reject attempts to read non-local temporary relations; we would be
     870             :      * likely to get wrong data since we have no visibility into the owning
     871             :      * session's local buffers.
     872             :      */
     873           2 :     if (RELATION_IS_OTHER_TEMP(rel))
     874           0 :         ereport(ERROR,
     875             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     876             :                  errmsg("cannot access temporary tables of other sessions")));
     877             : 
     878           2 :     buffer = ReadBuffer(rel, 0);
     879           2 :     LockBuffer(buffer, BUFFER_LOCK_SHARE);
     880             : 
     881           2 :     page = BufferGetPage(buffer);
     882           2 :     metad = BTPageGetMeta(page);
     883             : 
     884             :     /* Build a tuple descriptor for our result type */
     885           2 :     if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
     886           0 :         elog(ERROR, "return type must be a row type");
     887             : 
     888             :     /*
     889             :      * We need a kluge here to detect API versions prior to 1.8.  Earlier
     890             :      * versions incorrectly used int4 for certain columns.
     891             :      *
     892             :      * There is no way to reliably avoid the problems created by the old
     893             :      * function definition at this point, so insist that the user update the
     894             :      * extension.
     895             :      */
     896           2 :     if (tupleDesc->natts < BT_METAP_COLS_V1_8)
     897           0 :         ereport(ERROR,
     898             :                 (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
     899             :                  errmsg("function has wrong number of declared columns"),
     900             :                  errhint("To resolve the problem, update the \"pageinspect\" extension to the latest version.")));
     901             : 
     902           2 :     j = 0;
     903           2 :     values[j++] = psprintf("%d", metad->btm_magic);
     904           2 :     values[j++] = psprintf("%d", metad->btm_version);
     905           2 :     values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_root);
     906           2 :     values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_level);
     907           2 :     values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_fastroot);
     908           2 :     values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_fastlevel);
     909             : 
     910             :     /*
     911             :      * Get values of extended metadata if available, use default values
     912             :      * otherwise.  Note that we rely on the assumption that btm_allequalimage
     913             :      * is initialized to zero with indexes that were built on versions prior
     914             :      * to Postgres 13 (just like _bt_metaversion()).
     915             :      */
     916           2 :     if (metad->btm_version >= BTREE_NOVAC_VERSION)
     917             :     {
     918           4 :         values[j++] = psprintf(INT64_FORMAT,
     919           2 :                                (int64) metad->btm_last_cleanup_num_delpages);
     920           2 :         values[j++] = psprintf("%f", metad->btm_last_cleanup_num_heap_tuples);
     921           2 :         values[j++] = metad->btm_allequalimage ? "t" : "f";
     922             :     }
     923             :     else
     924             :     {
     925           0 :         values[j++] = "0";
     926           0 :         values[j++] = "-1";
     927           0 :         values[j++] = "f";
     928             :     }
     929             : 
     930           2 :     tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
     931             :                                    values);
     932             : 
     933           2 :     result = HeapTupleGetDatum(tuple);
     934             : 
     935           2 :     UnlockReleaseBuffer(buffer);
     936           2 :     relation_close(rel, AccessShareLock);
     937             : 
     938           2 :     PG_RETURN_DATUM(result);
     939             : }

Generated by: LCOV version 1.14