LCOV - code coverage report
Current view: top level - contrib/pgstattuple - pgstatapprox.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 50.9 % 114 58
Test Date: 2026-04-01 10:16:04 Functions: 85.7 % 7 6
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * pgstatapprox.c
       4              :  *        Bloat estimation functions
       5              :  *
       6              :  * Copyright (c) 2014-2026, PostgreSQL Global Development Group
       7              :  *
       8              :  * IDENTIFICATION
       9              :  *        contrib/pgstattuple/pgstatapprox.c
      10              :  *
      11              :  *-------------------------------------------------------------------------
      12              :  */
      13              : #include "postgres.h"
      14              : 
      15              : #include "access/heapam.h"
      16              : #include "access/htup_details.h"
      17              : #include "access/relation.h"
      18              : #include "access/visibilitymap.h"
      19              : #include "catalog/pg_am_d.h"
      20              : #include "commands/vacuum.h"
      21              : #include "funcapi.h"
      22              : #include "miscadmin.h"
      23              : #include "storage/bufmgr.h"
      24              : #include "storage/freespace.h"
      25              : #include "storage/procarray.h"
      26              : #include "storage/read_stream.h"
      27              : 
      28            1 : PG_FUNCTION_INFO_V1(pgstattuple_approx);
      29            2 : PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);
      30              : 
      31              : Datum       pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo);
      32              : 
      33              : typedef struct output_type
      34              : {
      35              :     uint64      table_len;
      36              :     double      scanned_percent;
      37              :     uint64      tuple_count;
      38              :     uint64      tuple_len;
      39              :     double      tuple_percent;
      40              :     uint64      dead_tuple_count;
      41              :     uint64      dead_tuple_len;
      42              :     double      dead_tuple_percent;
      43              :     uint64      free_space;
      44              :     double      free_percent;
      45              : } output_type;
      46              : 
      47              : #define NUM_OUTPUT_COLUMNS 10
      48              : 
      49              : /*
      50              :  * Struct for statapprox_heap read stream callback.
      51              :  */
      52              : typedef struct StatApproxReadStreamPrivate
      53              : {
      54              :     Relation    rel;
      55              :     output_type *stat;
      56              :     BlockNumber current_blocknum;
      57              :     BlockNumber nblocks;
      58              :     BlockNumber scanned;        /* count of pages actually read */
      59              :     Buffer      vmbuffer;       /* for VM lookups */
      60              : } StatApproxReadStreamPrivate;
      61              : 
      62              : /*
      63              :  * Read stream callback for statapprox_heap.
      64              :  *
      65              :  * This callback checks the visibility map for each block.  If the block is
      66              :  * all-visible, we can get the free space from the FSM without reading the
      67              :  * actual page, and skip to the next block.  Only the blocks that are not
      68              :  * all-visible are returned for actual reading after being locked.
      69              :  */
      70              : static BlockNumber
      71            2 : statapprox_heap_read_stream_next(ReadStream *stream,
      72              :                                  void *callback_private_data,
      73              :                                  void *per_buffer_data)
      74              : {
      75            2 :     StatApproxReadStreamPrivate *p =
      76              :         (StatApproxReadStreamPrivate *) callback_private_data;
      77              : 
      78            2 :     while (p->current_blocknum < p->nblocks)
      79              :     {
      80            0 :         BlockNumber blkno = p->current_blocknum++;
      81              :         Size        freespace;
      82              : 
      83            0 :         CHECK_FOR_INTERRUPTS();
      84              : 
      85              :         /*
      86              :          * If the page has only visible tuples, then we can find out the free
      87              :          * space from the FSM and move on without reading the page.
      88              :          */
      89            0 :         if (VM_ALL_VISIBLE(p->rel, blkno, &p->vmbuffer))
      90              :         {
      91            0 :             freespace = GetRecordedFreeSpace(p->rel, blkno);
      92            0 :             p->stat->tuple_len += BLCKSZ - freespace;
      93            0 :             p->stat->free_space += freespace;
      94            0 :             continue;
      95              :         }
      96              : 
      97              :         /* This block needs to be read */
      98            0 :         p->scanned++;
      99            0 :         return blkno;
     100              :     }
     101              : 
     102            2 :     return InvalidBlockNumber;
     103              : }
     104              : 
     105              : /*
     106              :  * This function takes an already open relation and scans its pages,
     107              :  * skipping those that have the corresponding visibility map bit set.
     108              :  * For pages we skip, we find the free space from the free space map
     109              :  * and approximate tuple_len on that basis. For the others, we count
     110              :  * the exact number of dead tuples etc.
     111              :  *
     112              :  * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
     113              :  * we do not try to avoid skipping single pages.
     114              :  */
     115              : static void
     116            2 : statapprox_heap(Relation rel, output_type *stat)
     117              : {
     118              :     BlockNumber nblocks;
     119              :     BufferAccessStrategy bstrategy;
     120              :     TransactionId OldestXmin;
     121              :     StatApproxReadStreamPrivate p;
     122              :     ReadStream *stream;
     123              : 
     124            2 :     OldestXmin = GetOldestNonRemovableTransactionId(rel);
     125            2 :     bstrategy = GetAccessStrategy(BAS_BULKREAD);
     126              : 
     127            2 :     nblocks = RelationGetNumberOfBlocks(rel);
     128              : 
     129              :     /* Initialize read stream private data */
     130            2 :     p.rel = rel;
     131            2 :     p.stat = stat;
     132            2 :     p.current_blocknum = 0;
     133            2 :     p.nblocks = nblocks;
     134            2 :     p.scanned = 0;
     135            2 :     p.vmbuffer = InvalidBuffer;
     136              : 
     137              :     /*
     138              :      * Create the read stream. We don't use READ_STREAM_USE_BATCHING because
     139              :      * the callback accesses the visibility map which may need to read VM
     140              :      * pages. While this shouldn't cause deadlocks, we err on the side of
     141              :      * caution.
     142              :      */
     143            2 :     stream = read_stream_begin_relation(READ_STREAM_FULL,
     144              :                                         bstrategy,
     145              :                                         rel,
     146              :                                         MAIN_FORKNUM,
     147              :                                         statapprox_heap_read_stream_next,
     148              :                                         &p,
     149              :                                         0);
     150              : 
     151              :     for (;;)
     152            0 :     {
     153              :         Buffer      buf;
     154              :         Page        page;
     155              :         OffsetNumber offnum,
     156              :                     maxoff;
     157              :         BlockNumber blkno;
     158              : 
     159            2 :         buf = read_stream_next_buffer(stream, NULL);
     160            2 :         if (buf == InvalidBuffer)
     161            2 :             break;
     162              : 
     163            0 :         LockBuffer(buf, BUFFER_LOCK_SHARE);
     164              : 
     165            0 :         page = BufferGetPage(buf);
     166            0 :         blkno = BufferGetBlockNumber(buf);
     167              : 
     168            0 :         stat->free_space += PageGetExactFreeSpace(page);
     169              : 
     170            0 :         if (PageIsNew(page) || PageIsEmpty(page))
     171              :         {
     172            0 :             UnlockReleaseBuffer(buf);
     173            0 :             continue;
     174              :         }
     175              : 
     176              :         /*
     177              :          * Look at each tuple on the page and decide whether it's live or
     178              :          * dead, then count it and its size. Unlike lazy_scan_heap, we can
     179              :          * afford to ignore problems and special cases.
     180              :          */
     181            0 :         maxoff = PageGetMaxOffsetNumber(page);
     182              : 
     183            0 :         for (offnum = FirstOffsetNumber;
     184            0 :              offnum <= maxoff;
     185            0 :              offnum = OffsetNumberNext(offnum))
     186              :         {
     187              :             ItemId      itemid;
     188              :             HeapTupleData tuple;
     189              : 
     190            0 :             itemid = PageGetItemId(page, offnum);
     191              : 
     192            0 :             if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
     193            0 :                 ItemIdIsDead(itemid))
     194              :             {
     195            0 :                 continue;
     196              :             }
     197              : 
     198              :             Assert(ItemIdIsNormal(itemid));
     199              : 
     200            0 :             ItemPointerSet(&(tuple.t_self), blkno, offnum);
     201              : 
     202            0 :             tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
     203            0 :             tuple.t_len = ItemIdGetLength(itemid);
     204            0 :             tuple.t_tableOid = RelationGetRelid(rel);
     205              : 
     206              :             /*
     207              :              * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples
     208              :              * as "dead" while DELETE_IN_PROGRESS tuples are "live".  We don't
     209              :              * bother distinguishing tuples inserted/deleted by our own
     210              :              * transaction.
     211              :              */
     212            0 :             switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
     213              :             {
     214            0 :                 case HEAPTUPLE_LIVE:
     215              :                 case HEAPTUPLE_DELETE_IN_PROGRESS:
     216            0 :                     stat->tuple_len += tuple.t_len;
     217            0 :                     stat->tuple_count++;
     218            0 :                     break;
     219            0 :                 case HEAPTUPLE_DEAD:
     220              :                 case HEAPTUPLE_RECENTLY_DEAD:
     221              :                 case HEAPTUPLE_INSERT_IN_PROGRESS:
     222            0 :                     stat->dead_tuple_len += tuple.t_len;
     223            0 :                     stat->dead_tuple_count++;
     224            0 :                     break;
     225            0 :                 default:
     226            0 :                     elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
     227              :                     break;
     228              :             }
     229              :         }
     230              : 
     231            0 :         UnlockReleaseBuffer(buf);
     232              :     }
     233              : 
     234              :     Assert(p.current_blocknum == nblocks);
     235            2 :     read_stream_end(stream);
     236              : 
     237            2 :     stat->table_len = (uint64) nblocks * BLCKSZ;
     238              : 
     239              :     /*
     240              :      * We don't know how many tuples are in the pages we didn't scan, so
     241              :      * extrapolate the live-tuple count to the whole table in the same way
     242              :      * that VACUUM does.  (Like VACUUM, we're not taking a random sample, so
     243              :      * just extrapolating linearly seems unsafe.)  There should be no dead
     244              :      * tuples in all-visible pages, so no correction is needed for that, and
     245              :      * we already accounted for the space in those pages, too.
     246              :      */
     247            4 :     stat->tuple_count = vac_estimate_reltuples(rel, nblocks, p.scanned,
     248            2 :                                                stat->tuple_count);
     249              : 
     250              :     /* It's not clear if we could get -1 here, but be safe. */
     251            2 :     stat->tuple_count = Max(stat->tuple_count, 0);
     252              : 
     253              :     /*
     254              :      * Calculate percentages if the relation has one or more pages.
     255              :      */
     256            2 :     if (nblocks != 0)
     257              :     {
     258            0 :         stat->scanned_percent = 100.0 * p.scanned / nblocks;
     259            0 :         stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
     260            0 :         stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
     261            0 :         stat->free_percent = 100.0 * stat->free_space / stat->table_len;
     262              :     }
     263              : 
     264            2 :     if (BufferIsValid(p.vmbuffer))
     265              :     {
     266            0 :         ReleaseBuffer(p.vmbuffer);
     267            0 :         p.vmbuffer = InvalidBuffer;
     268              :     }
     269            2 : }
     270              : 
     271              : /*
     272              :  * Returns estimated live/dead tuple statistics for the given relid.
     273              :  *
     274              :  * The superuser() check here must be kept as the library might be upgraded
     275              :  * without the extension being upgraded, meaning that in pre-1.5 installations
     276              :  * these functions could be called by any user.
     277              :  */
     278              : Datum
     279            0 : pgstattuple_approx(PG_FUNCTION_ARGS)
     280              : {
     281            0 :     Oid         relid = PG_GETARG_OID(0);
     282              : 
     283            0 :     if (!superuser())
     284            0 :         ereport(ERROR,
     285              :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     286              :                  errmsg("must be superuser to use pgstattuple functions")));
     287              : 
     288            0 :     PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
     289              : }
     290              : 
     291              : /*
     292              :  * As of pgstattuple version 1.5, we no longer need to check if the user
     293              :  * is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC.
     294              :  * Users can then grant access to it based on their policies.
     295              :  *
     296              :  * Otherwise identical to pgstattuple_approx (above).
     297              :  */
     298              : Datum
     299            6 : pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)
     300              : {
     301            6 :     Oid         relid = PG_GETARG_OID(0);
     302              : 
     303            6 :     PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
     304              : }
     305              : 
     306              : Datum
     307            6 : pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)
     308              : {
     309              :     Relation    rel;
     310            6 :     output_type stat = {0};
     311              :     TupleDesc   tupdesc;
     312              :     bool        nulls[NUM_OUTPUT_COLUMNS];
     313              :     Datum       values[NUM_OUTPUT_COLUMNS];
     314              :     HeapTuple   ret;
     315            6 :     int         i = 0;
     316              : 
     317            6 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
     318            0 :         elog(ERROR, "return type must be a row type");
     319              : 
     320            6 :     if (tupdesc->natts != NUM_OUTPUT_COLUMNS)
     321            0 :         elog(ERROR, "incorrect number of output arguments");
     322              : 
     323            6 :     rel = relation_open(relid, AccessShareLock);
     324              : 
     325              :     /*
     326              :      * Reject attempts to read non-local temporary relations; we would be
     327              :      * likely to get wrong data since we have no visibility into the owning
     328              :      * session's local buffers.
     329              :      */
     330            6 :     if (RELATION_IS_OTHER_TEMP(rel))
     331            0 :         ereport(ERROR,
     332              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     333              :                  errmsg("cannot access temporary tables of other sessions")));
     334              : 
     335              :     /*
     336              :      * We support only relation kinds with a visibility map and a free space
     337              :      * map.
     338              :      */
     339            6 :     if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
     340            5 :           rel->rd_rel->relkind == RELKIND_MATVIEW ||
     341            5 :           rel->rd_rel->relkind == RELKIND_TOASTVALUE))
     342            4 :         ereport(ERROR,
     343              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     344              :                  errmsg("relation \"%s\" is of wrong relation kind",
     345              :                         RelationGetRelationName(rel)),
     346              :                  errdetail_relkind_not_supported(rel->rd_rel->relkind)));
     347              : 
     348            2 :     if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
     349            0 :         ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     350              :                         errmsg("only heap AM is supported")));
     351              : 
     352            2 :     statapprox_heap(rel, &stat);
     353              : 
     354            2 :     relation_close(rel, AccessShareLock);
     355              : 
     356            2 :     memset(nulls, 0, sizeof(nulls));
     357              : 
     358            2 :     values[i++] = Int64GetDatum(stat.table_len);
     359            2 :     values[i++] = Float8GetDatum(stat.scanned_percent);
     360            2 :     values[i++] = Int64GetDatum(stat.tuple_count);
     361            2 :     values[i++] = Int64GetDatum(stat.tuple_len);
     362            2 :     values[i++] = Float8GetDatum(stat.tuple_percent);
     363            2 :     values[i++] = Int64GetDatum(stat.dead_tuple_count);
     364            2 :     values[i++] = Int64GetDatum(stat.dead_tuple_len);
     365            2 :     values[i++] = Float8GetDatum(stat.dead_tuple_percent);
     366            2 :     values[i++] = Int64GetDatum(stat.free_space);
     367            2 :     values[i++] = Float8GetDatum(stat.free_percent);
     368              : 
     369            2 :     ret = heap_form_tuple(tupdesc, values, nulls);
     370            2 :     return HeapTupleGetDatum(ret);
     371              : }
        

Generated by: LCOV version 2.0-1