LCOV - code coverage report
Current view: top level - contrib/pg_buffercache - pg_buffercache_pages.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 133 146 91.1 %
Date: 2025-04-01 15:15:16 Functions: 8 9 88.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pg_buffercache_pages.c
       4             :  *    display some contents of the buffer cache
       5             :  *
       6             :  *    contrib/pg_buffercache/pg_buffercache_pages.c
       7             :  *-------------------------------------------------------------------------
       8             :  */
       9             : #include "postgres.h"
      10             : 
      11             : #include "access/htup_details.h"
      12             : #include "catalog/pg_type.h"
      13             : #include "funcapi.h"
      14             : #include "storage/buf_internals.h"
      15             : #include "storage/bufmgr.h"
      16             : 
      17             : 
      18             : #define NUM_BUFFERCACHE_PAGES_MIN_ELEM  8
      19             : #define NUM_BUFFERCACHE_PAGES_ELEM  9
      20             : #define NUM_BUFFERCACHE_SUMMARY_ELEM 5
      21             : #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
      22             : 
      23           2 : PG_MODULE_MAGIC_EXT(
      24             :                     .name = "pg_buffercache",
      25             :                     .version = PG_VERSION
      26             : );
      27             : 
      28             : /*
      29             :  * Record structure holding the to be exposed cache data.
      30             :  */
      31             : typedef struct
      32             : {
      33             :     uint32      bufferid;
      34             :     RelFileNumber relfilenumber;
      35             :     Oid         reltablespace;
      36             :     Oid         reldatabase;
      37             :     ForkNumber  forknum;
      38             :     BlockNumber blocknum;
      39             :     bool        isvalid;
      40             :     bool        isdirty;
      41             :     uint16      usagecount;
      42             : 
      43             :     /*
      44             :      * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
      45             :      * being pinned by too many backends and each backend will only pin once
      46             :      * because of bufmgr.c's PrivateRefCount infrastructure.
      47             :      */
      48             :     int32       pinning_backends;
      49             : } BufferCachePagesRec;
      50             : 
      51             : 
      52             : /*
      53             :  * Function context for data persisting over repeated calls.
      54             :  */
      55             : typedef struct
      56             : {
      57             :     TupleDesc   tupdesc;
      58             :     BufferCachePagesRec *record;
      59             : } BufferCachePagesContext;
      60             : 
      61             : 
      62             : /*
      63             :  * Function returning data from the shared buffer cache - buffer number,
      64             :  * relation node/tablespace/database/blocknum and dirty indicator.
      65             :  */
      66           4 : PG_FUNCTION_INFO_V1(pg_buffercache_pages);
      67           4 : PG_FUNCTION_INFO_V1(pg_buffercache_summary);
      68           4 : PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts);
      69           2 : PG_FUNCTION_INFO_V1(pg_buffercache_evict);
      70             : 
      71             : Datum
      72       65540 : pg_buffercache_pages(PG_FUNCTION_ARGS)
      73             : {
      74             :     FuncCallContext *funcctx;
      75             :     Datum       result;
      76             :     MemoryContext oldcontext;
      77             :     BufferCachePagesContext *fctx;  /* User function context. */
      78             :     TupleDesc   tupledesc;
      79             :     TupleDesc   expected_tupledesc;
      80             :     HeapTuple   tuple;
      81             : 
      82       65540 :     if (SRF_IS_FIRSTCALL())
      83             :     {
      84             :         int         i;
      85             : 
      86           4 :         funcctx = SRF_FIRSTCALL_INIT();
      87             : 
      88             :         /* Switch context when allocating stuff to be used in later calls */
      89           4 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
      90             : 
      91             :         /* Create a user function context for cross-call persistence */
      92           4 :         fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));
      93             : 
      94             :         /*
      95             :          * To smoothly support upgrades from version 1.0 of this extension
      96             :          * transparently handle the (non-)existence of the pinning_backends
      97             :          * column. We unfortunately have to get the result type for that... -
      98             :          * we can't use the result type determined by the function definition
      99             :          * without potentially crashing when somebody uses the old (or even
     100             :          * wrong) function definition though.
     101             :          */
     102           4 :         if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
     103           0 :             elog(ERROR, "return type must be a row type");
     104             : 
     105           4 :         if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
     106           4 :             expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
     107           0 :             elog(ERROR, "incorrect number of output arguments");
     108             : 
     109             :         /* Construct a tuple descriptor for the result rows. */
     110           4 :         tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
     111           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
     112             :                            INT4OID, -1, 0);
     113           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
     114             :                            OIDOID, -1, 0);
     115           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
     116             :                            OIDOID, -1, 0);
     117           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
     118             :                            OIDOID, -1, 0);
     119           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
     120             :                            INT2OID, -1, 0);
     121           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
     122             :                            INT8OID, -1, 0);
     123           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
     124             :                            BOOLOID, -1, 0);
     125           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
     126             :                            INT2OID, -1, 0);
     127             : 
     128           4 :         if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
     129           4 :             TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
     130             :                                INT4OID, -1, 0);
     131             : 
     132           4 :         fctx->tupdesc = BlessTupleDesc(tupledesc);
     133             : 
     134             :         /* Allocate NBuffers worth of BufferCachePagesRec records. */
     135           4 :         fctx->record = (BufferCachePagesRec *)
     136           4 :             MemoryContextAllocHuge(CurrentMemoryContext,
     137             :                                    sizeof(BufferCachePagesRec) * NBuffers);
     138             : 
     139             :         /* Set max calls and remember the user function context. */
     140           4 :         funcctx->max_calls = NBuffers;
     141           4 :         funcctx->user_fctx = fctx;
     142             : 
     143             :         /* Return to original context when allocating transient memory */
     144           4 :         MemoryContextSwitchTo(oldcontext);
     145             : 
     146             :         /*
     147             :          * Scan through all the buffers, saving the relevant fields in the
     148             :          * fctx->record structure.
     149             :          *
     150             :          * We don't hold the partition locks, so we don't get a consistent
     151             :          * snapshot across all buffers, but we do grab the buffer header
     152             :          * locks, so the information of each buffer is self-consistent.
     153             :          */
     154       65540 :         for (i = 0; i < NBuffers; i++)
     155             :         {
     156             :             BufferDesc *bufHdr;
     157             :             uint32      buf_state;
     158             : 
     159       65536 :             bufHdr = GetBufferDescriptor(i);
     160             :             /* Lock each buffer header before inspecting. */
     161       65536 :             buf_state = LockBufHdr(bufHdr);
     162             : 
     163       65536 :             fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
     164       65536 :             fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
     165       65536 :             fctx->record[i].reltablespace = bufHdr->tag.spcOid;
     166       65536 :             fctx->record[i].reldatabase = bufHdr->tag.dbOid;
     167       65536 :             fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
     168       65536 :             fctx->record[i].blocknum = bufHdr->tag.blockNum;
     169       65536 :             fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
     170       65536 :             fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
     171             : 
     172       65536 :             if (buf_state & BM_DIRTY)
     173        3764 :                 fctx->record[i].isdirty = true;
     174             :             else
     175       61772 :                 fctx->record[i].isdirty = false;
     176             : 
     177             :             /* Note if the buffer is valid, and has storage created */
     178       65536 :             if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
     179        7428 :                 fctx->record[i].isvalid = true;
     180             :             else
     181       58108 :                 fctx->record[i].isvalid = false;
     182             : 
     183       65536 :             UnlockBufHdr(bufHdr, buf_state);
     184             :         }
     185             :     }
     186             : 
     187       65540 :     funcctx = SRF_PERCALL_SETUP();
     188             : 
     189             :     /* Get the saved state */
     190       65540 :     fctx = funcctx->user_fctx;
     191             : 
     192       65540 :     if (funcctx->call_cntr < funcctx->max_calls)
     193             :     {
     194       65536 :         uint32      i = funcctx->call_cntr;
     195             :         Datum       values[NUM_BUFFERCACHE_PAGES_ELEM];
     196             :         bool        nulls[NUM_BUFFERCACHE_PAGES_ELEM];
     197             : 
     198       65536 :         values[0] = Int32GetDatum(fctx->record[i].bufferid);
     199       65536 :         nulls[0] = false;
     200             : 
     201             :         /*
     202             :          * Set all fields except the bufferid to null if the buffer is unused
     203             :          * or not valid.
     204             :          */
     205       65536 :         if (fctx->record[i].blocknum == InvalidBlockNumber ||
     206        7428 :             fctx->record[i].isvalid == false)
     207             :         {
     208       58108 :             nulls[1] = true;
     209       58108 :             nulls[2] = true;
     210       58108 :             nulls[3] = true;
     211       58108 :             nulls[4] = true;
     212       58108 :             nulls[5] = true;
     213       58108 :             nulls[6] = true;
     214       58108 :             nulls[7] = true;
     215             :             /* unused for v1.0 callers, but the array is always long enough */
     216       58108 :             nulls[8] = true;
     217             :         }
     218             :         else
     219             :         {
     220        7428 :             values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber);
     221        7428 :             nulls[1] = false;
     222        7428 :             values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
     223        7428 :             nulls[2] = false;
     224        7428 :             values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
     225        7428 :             nulls[3] = false;
     226        7428 :             values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
     227        7428 :             nulls[4] = false;
     228        7428 :             values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
     229        7428 :             nulls[5] = false;
     230        7428 :             values[6] = BoolGetDatum(fctx->record[i].isdirty);
     231        7428 :             nulls[6] = false;
     232        7428 :             values[7] = Int16GetDatum(fctx->record[i].usagecount);
     233        7428 :             nulls[7] = false;
     234             :             /* unused for v1.0 callers, but the array is always long enough */
     235        7428 :             values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
     236        7428 :             nulls[8] = false;
     237             :         }
     238             : 
     239             :         /* Build and return the tuple. */
     240       65536 :         tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
     241       65536 :         result = HeapTupleGetDatum(tuple);
     242             : 
     243       65536 :         SRF_RETURN_NEXT(funcctx, result);
     244             :     }
     245             :     else
     246           4 :         SRF_RETURN_DONE(funcctx);
     247             : }
     248             : 
     249             : Datum
     250           4 : pg_buffercache_summary(PG_FUNCTION_ARGS)
     251             : {
     252             :     Datum       result;
     253             :     TupleDesc   tupledesc;
     254             :     HeapTuple   tuple;
     255             :     Datum       values[NUM_BUFFERCACHE_SUMMARY_ELEM];
     256             :     bool        nulls[NUM_BUFFERCACHE_SUMMARY_ELEM];
     257             : 
     258           4 :     int32       buffers_used = 0;
     259           4 :     int32       buffers_unused = 0;
     260           4 :     int32       buffers_dirty = 0;
     261           4 :     int32       buffers_pinned = 0;
     262           4 :     int64       usagecount_total = 0;
     263             : 
     264           4 :     if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
     265           0 :         elog(ERROR, "return type must be a row type");
     266             : 
     267       65540 :     for (int i = 0; i < NBuffers; i++)
     268             :     {
     269             :         BufferDesc *bufHdr;
     270             :         uint32      buf_state;
     271             : 
     272             :         /*
     273             :          * This function summarizes the state of all headers. Locking the
     274             :          * buffer headers wouldn't provide an improved result as the state of
     275             :          * the buffer can still change after we release the lock and it'd
     276             :          * noticeably increase the cost of the function.
     277             :          */
     278       65536 :         bufHdr = GetBufferDescriptor(i);
     279       65536 :         buf_state = pg_atomic_read_u32(&bufHdr->state);
     280             : 
     281       65536 :         if (buf_state & BM_VALID)
     282             :         {
     283        7428 :             buffers_used++;
     284        7428 :             usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
     285             : 
     286        7428 :             if (buf_state & BM_DIRTY)
     287        3764 :                 buffers_dirty++;
     288             :         }
     289             :         else
     290       58108 :             buffers_unused++;
     291             : 
     292       65536 :         if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
     293           0 :             buffers_pinned++;
     294             :     }
     295             : 
     296           4 :     memset(nulls, 0, sizeof(nulls));
     297           4 :     values[0] = Int32GetDatum(buffers_used);
     298           4 :     values[1] = Int32GetDatum(buffers_unused);
     299           4 :     values[2] = Int32GetDatum(buffers_dirty);
     300           4 :     values[3] = Int32GetDatum(buffers_pinned);
     301             : 
     302           4 :     if (buffers_used != 0)
     303           4 :         values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
     304             :     else
     305           0 :         nulls[4] = true;
     306             : 
     307             :     /* Build and return the tuple. */
     308           4 :     tuple = heap_form_tuple(tupledesc, values, nulls);
     309           4 :     result = HeapTupleGetDatum(tuple);
     310             : 
     311           4 :     PG_RETURN_DATUM(result);
     312             : }
     313             : 
     314             : Datum
     315           4 : pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
     316             : {
     317           4 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
     318           4 :     int         usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
     319           4 :     int         dirty[BM_MAX_USAGE_COUNT + 1] = {0};
     320           4 :     int         pinned[BM_MAX_USAGE_COUNT + 1] = {0};
     321             :     Datum       values[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM];
     322           4 :     bool        nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
     323             : 
     324           4 :     InitMaterializedSRF(fcinfo, 0);
     325             : 
     326       65540 :     for (int i = 0; i < NBuffers; i++)
     327             :     {
     328       65536 :         BufferDesc *bufHdr = GetBufferDescriptor(i);
     329       65536 :         uint32      buf_state = pg_atomic_read_u32(&bufHdr->state);
     330             :         int         usage_count;
     331             : 
     332       65536 :         usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
     333       65536 :         usage_counts[usage_count]++;
     334             : 
     335       65536 :         if (buf_state & BM_DIRTY)
     336        3764 :             dirty[usage_count]++;
     337             : 
     338       65536 :         if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
     339           0 :             pinned[usage_count]++;
     340             :     }
     341             : 
     342          28 :     for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
     343             :     {
     344          24 :         values[0] = Int32GetDatum(i);
     345          24 :         values[1] = Int32GetDatum(usage_counts[i]);
     346          24 :         values[2] = Int32GetDatum(dirty[i]);
     347          24 :         values[3] = Int32GetDatum(pinned[i]);
     348             : 
     349          24 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
     350             :     }
     351             : 
     352           4 :     return (Datum) 0;
     353             : }
     354             : 
     355             : /*
     356             :  * Try to evict a shared buffer.
     357             :  */
     358             : Datum
     359           0 : pg_buffercache_evict(PG_FUNCTION_ARGS)
     360             : {
     361           0 :     Buffer      buf = PG_GETARG_INT32(0);
     362             : 
     363           0 :     if (!superuser())
     364           0 :         ereport(ERROR,
     365             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     366             :                  errmsg("must be superuser to use pg_buffercache_evict function")));
     367             : 
     368           0 :     if (buf < 1 || buf > NBuffers)
     369           0 :         elog(ERROR, "bad buffer ID: %d", buf);
     370             : 
     371           0 :     PG_RETURN_BOOL(EvictUnpinnedBuffer(buf));
     372             : }

Generated by: LCOV version 1.14