LCOV - code coverage report
Current view: top level - contrib/pg_buffercache - pg_buffercache_pages.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 133 146 91.1 %
Date: 2024-11-21 08:14:44 Functions: 8 9 88.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pg_buffercache_pages.c
       4             :  *    display some contents of the buffer cache
       5             :  *
       6             :  *    contrib/pg_buffercache/pg_buffercache_pages.c
       7             :  *-------------------------------------------------------------------------
       8             :  */
       9             : #include "postgres.h"
      10             : 
      11             : #include "access/htup_details.h"
      12             : #include "catalog/pg_type.h"
      13             : #include "funcapi.h"
      14             : #include "storage/buf_internals.h"
      15             : #include "storage/bufmgr.h"
      16             : 
      17             : 
      18             : #define NUM_BUFFERCACHE_PAGES_MIN_ELEM  8
      19             : #define NUM_BUFFERCACHE_PAGES_ELEM  9
      20             : #define NUM_BUFFERCACHE_SUMMARY_ELEM 5
      21             : #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
      22             : 
      23           2 : PG_MODULE_MAGIC;
      24             : 
      25             : /*
      26             :  * Record structure holding the to be exposed cache data.
      27             :  */
      28             : typedef struct
      29             : {
      30             :     uint32      bufferid;
      31             :     RelFileNumber relfilenumber;
      32             :     Oid         reltablespace;
      33             :     Oid         reldatabase;
      34             :     ForkNumber  forknum;
      35             :     BlockNumber blocknum;
      36             :     bool        isvalid;
      37             :     bool        isdirty;
      38             :     uint16      usagecount;
      39             : 
      40             :     /*
      41             :      * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
      42             :      * being pinned by too many backends and each backend will only pin once
      43             :      * because of bufmgr.c's PrivateRefCount infrastructure.
      44             :      */
      45             :     int32       pinning_backends;
      46             : } BufferCachePagesRec;
      47             : 
      48             : 
      49             : /*
      50             :  * Function context for data persisting over repeated calls.
      51             :  */
      52             : typedef struct
      53             : {
      54             :     TupleDesc   tupdesc;
      55             :     BufferCachePagesRec *record;
      56             : } BufferCachePagesContext;
      57             : 
      58             : 
      59             : /*
      60             :  * Function returning data from the shared buffer cache - buffer number,
      61             :  * relation node/tablespace/database/blocknum and dirty indicator.
      62             :  */
      63           4 : PG_FUNCTION_INFO_V1(pg_buffercache_pages);
      64           4 : PG_FUNCTION_INFO_V1(pg_buffercache_summary);
      65           4 : PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts);
      66           2 : PG_FUNCTION_INFO_V1(pg_buffercache_evict);
      67             : 
      68             : Datum
      69       65540 : pg_buffercache_pages(PG_FUNCTION_ARGS)
      70             : {
      71             :     FuncCallContext *funcctx;
      72             :     Datum       result;
      73             :     MemoryContext oldcontext;
      74             :     BufferCachePagesContext *fctx;  /* User function context. */
      75             :     TupleDesc   tupledesc;
      76             :     TupleDesc   expected_tupledesc;
      77             :     HeapTuple   tuple;
      78             : 
      79       65540 :     if (SRF_IS_FIRSTCALL())
      80             :     {
      81             :         int         i;
      82             : 
      83           4 :         funcctx = SRF_FIRSTCALL_INIT();
      84             : 
      85             :         /* Switch context when allocating stuff to be used in later calls */
      86           4 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
      87             : 
      88             :         /* Create a user function context for cross-call persistence */
      89           4 :         fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));
      90             : 
      91             :         /*
      92             :          * To smoothly support upgrades from version 1.0 of this extension
      93             :          * transparently handle the (non-)existence of the pinning_backends
      94             :          * column. We unfortunately have to get the result type for that... -
      95             :          * we can't use the result type determined by the function definition
      96             :          * without potentially crashing when somebody uses the old (or even
      97             :          * wrong) function definition though.
      98             :          */
      99           4 :         if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
     100           0 :             elog(ERROR, "return type must be a row type");
     101             : 
     102           4 :         if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
     103           4 :             expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
     104           0 :             elog(ERROR, "incorrect number of output arguments");
     105             : 
     106             :         /* Construct a tuple descriptor for the result rows. */
     107           4 :         tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
     108           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
     109             :                            INT4OID, -1, 0);
     110           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
     111             :                            OIDOID, -1, 0);
     112           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
     113             :                            OIDOID, -1, 0);
     114           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
     115             :                            OIDOID, -1, 0);
     116           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
     117             :                            INT2OID, -1, 0);
     118           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
     119             :                            INT8OID, -1, 0);
     120           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
     121             :                            BOOLOID, -1, 0);
     122           4 :         TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
     123             :                            INT2OID, -1, 0);
     124             : 
     125           4 :         if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
     126           4 :             TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
     127             :                                INT4OID, -1, 0);
     128             : 
     129           4 :         fctx->tupdesc = BlessTupleDesc(tupledesc);
     130             : 
     131             :         /* Allocate NBuffers worth of BufferCachePagesRec records. */
     132           4 :         fctx->record = (BufferCachePagesRec *)
     133           4 :             MemoryContextAllocHuge(CurrentMemoryContext,
     134             :                                    sizeof(BufferCachePagesRec) * NBuffers);
     135             : 
     136             :         /* Set max calls and remember the user function context. */
     137           4 :         funcctx->max_calls = NBuffers;
     138           4 :         funcctx->user_fctx = fctx;
     139             : 
     140             :         /* Return to original context when allocating transient memory */
     141           4 :         MemoryContextSwitchTo(oldcontext);
     142             : 
     143             :         /*
     144             :          * Scan through all the buffers, saving the relevant fields in the
     145             :          * fctx->record structure.
     146             :          *
     147             :          * We don't hold the partition locks, so we don't get a consistent
     148             :          * snapshot across all buffers, but we do grab the buffer header
     149             :          * locks, so the information of each buffer is self-consistent.
     150             :          */
     151       65540 :         for (i = 0; i < NBuffers; i++)
     152             :         {
     153             :             BufferDesc *bufHdr;
     154             :             uint32      buf_state;
     155             : 
     156       65536 :             bufHdr = GetBufferDescriptor(i);
     157             :             /* Lock each buffer header before inspecting. */
     158       65536 :             buf_state = LockBufHdr(bufHdr);
     159             : 
     160       65536 :             fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
     161       65536 :             fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
     162       65536 :             fctx->record[i].reltablespace = bufHdr->tag.spcOid;
     163       65536 :             fctx->record[i].reldatabase = bufHdr->tag.dbOid;
     164       65536 :             fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
     165       65536 :             fctx->record[i].blocknum = bufHdr->tag.blockNum;
     166       65536 :             fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
     167       65536 :             fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
     168             : 
     169       65536 :             if (buf_state & BM_DIRTY)
     170        3748 :                 fctx->record[i].isdirty = true;
     171             :             else
     172       61788 :                 fctx->record[i].isdirty = false;
     173             : 
     174             :             /* Note if the buffer is valid, and has storage created */
     175       65536 :             if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
     176        7308 :                 fctx->record[i].isvalid = true;
     177             :             else
     178       58228 :                 fctx->record[i].isvalid = false;
     179             : 
     180       65536 :             UnlockBufHdr(bufHdr, buf_state);
     181             :         }
     182             :     }
     183             : 
     184       65540 :     funcctx = SRF_PERCALL_SETUP();
     185             : 
     186             :     /* Get the saved state */
     187       65540 :     fctx = funcctx->user_fctx;
     188             : 
     189       65540 :     if (funcctx->call_cntr < funcctx->max_calls)
     190             :     {
     191       65536 :         uint32      i = funcctx->call_cntr;
     192             :         Datum       values[NUM_BUFFERCACHE_PAGES_ELEM];
     193             :         bool        nulls[NUM_BUFFERCACHE_PAGES_ELEM];
     194             : 
     195       65536 :         values[0] = Int32GetDatum(fctx->record[i].bufferid);
     196       65536 :         nulls[0] = false;
     197             : 
     198             :         /*
     199             :          * Set all fields except the bufferid to null if the buffer is unused
     200             :          * or not valid.
     201             :          */
     202       65536 :         if (fctx->record[i].blocknum == InvalidBlockNumber ||
     203        7308 :             fctx->record[i].isvalid == false)
     204             :         {
     205       58228 :             nulls[1] = true;
     206       58228 :             nulls[2] = true;
     207       58228 :             nulls[3] = true;
     208       58228 :             nulls[4] = true;
     209       58228 :             nulls[5] = true;
     210       58228 :             nulls[6] = true;
     211       58228 :             nulls[7] = true;
     212             :             /* unused for v1.0 callers, but the array is always long enough */
     213       58228 :             nulls[8] = true;
     214             :         }
     215             :         else
     216             :         {
     217        7308 :             values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber);
     218        7308 :             nulls[1] = false;
     219        7308 :             values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
     220        7308 :             nulls[2] = false;
     221        7308 :             values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
     222        7308 :             nulls[3] = false;
     223        7308 :             values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
     224        7308 :             nulls[4] = false;
     225        7308 :             values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
     226        7308 :             nulls[5] = false;
     227        7308 :             values[6] = BoolGetDatum(fctx->record[i].isdirty);
     228        7308 :             nulls[6] = false;
     229        7308 :             values[7] = Int16GetDatum(fctx->record[i].usagecount);
     230        7308 :             nulls[7] = false;
     231             :             /* unused for v1.0 callers, but the array is always long enough */
     232        7308 :             values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
     233        7308 :             nulls[8] = false;
     234             :         }
     235             : 
     236             :         /* Build and return the tuple. */
     237       65536 :         tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
     238       65536 :         result = HeapTupleGetDatum(tuple);
     239             : 
     240       65536 :         SRF_RETURN_NEXT(funcctx, result);
     241             :     }
     242             :     else
     243           4 :         SRF_RETURN_DONE(funcctx);
     244             : }
     245             : 
     246             : Datum
     247           4 : pg_buffercache_summary(PG_FUNCTION_ARGS)
     248             : {
     249             :     Datum       result;
     250             :     TupleDesc   tupledesc;
     251             :     HeapTuple   tuple;
     252             :     Datum       values[NUM_BUFFERCACHE_SUMMARY_ELEM];
     253             :     bool        nulls[NUM_BUFFERCACHE_SUMMARY_ELEM];
     254             : 
     255           4 :     int32       buffers_used = 0;
     256           4 :     int32       buffers_unused = 0;
     257           4 :     int32       buffers_dirty = 0;
     258           4 :     int32       buffers_pinned = 0;
     259           4 :     int64       usagecount_total = 0;
     260             : 
     261           4 :     if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
     262           0 :         elog(ERROR, "return type must be a row type");
     263             : 
     264       65540 :     for (int i = 0; i < NBuffers; i++)
     265             :     {
     266             :         BufferDesc *bufHdr;
     267             :         uint32      buf_state;
     268             : 
     269             :         /*
     270             :          * This function summarizes the state of all headers. Locking the
     271             :          * buffer headers wouldn't provide an improved result as the state of
     272             :          * the buffer can still change after we release the lock and it'd
     273             :          * noticeably increase the cost of the function.
     274             :          */
     275       65536 :         bufHdr = GetBufferDescriptor(i);
     276       65536 :         buf_state = pg_atomic_read_u32(&bufHdr->state);
     277             : 
     278       65536 :         if (buf_state & BM_VALID)
     279             :         {
     280        7308 :             buffers_used++;
     281        7308 :             usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
     282             : 
     283        7308 :             if (buf_state & BM_DIRTY)
     284        3748 :                 buffers_dirty++;
     285             :         }
     286             :         else
     287       58228 :             buffers_unused++;
     288             : 
     289       65536 :         if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
     290           0 :             buffers_pinned++;
     291             :     }
     292             : 
     293           4 :     memset(nulls, 0, sizeof(nulls));
     294           4 :     values[0] = Int32GetDatum(buffers_used);
     295           4 :     values[1] = Int32GetDatum(buffers_unused);
     296           4 :     values[2] = Int32GetDatum(buffers_dirty);
     297           4 :     values[3] = Int32GetDatum(buffers_pinned);
     298             : 
     299           4 :     if (buffers_used != 0)
     300           4 :         values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
     301             :     else
     302           0 :         nulls[4] = true;
     303             : 
     304             :     /* Build and return the tuple. */
     305           4 :     tuple = heap_form_tuple(tupledesc, values, nulls);
     306           4 :     result = HeapTupleGetDatum(tuple);
     307             : 
     308           4 :     PG_RETURN_DATUM(result);
     309             : }
     310             : 
     311             : Datum
     312           4 : pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
     313             : {
     314           4 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
     315           4 :     int         usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
     316           4 :     int         dirty[BM_MAX_USAGE_COUNT + 1] = {0};
     317           4 :     int         pinned[BM_MAX_USAGE_COUNT + 1] = {0};
     318             :     Datum       values[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM];
     319           4 :     bool        nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
     320             : 
     321           4 :     InitMaterializedSRF(fcinfo, 0);
     322             : 
     323       65540 :     for (int i = 0; i < NBuffers; i++)
     324             :     {
     325       65536 :         BufferDesc *bufHdr = GetBufferDescriptor(i);
     326       65536 :         uint32      buf_state = pg_atomic_read_u32(&bufHdr->state);
     327             :         int         usage_count;
     328             : 
     329       65536 :         usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
     330       65536 :         usage_counts[usage_count]++;
     331             : 
     332       65536 :         if (buf_state & BM_DIRTY)
     333        3748 :             dirty[usage_count]++;
     334             : 
     335       65536 :         if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
     336           0 :             pinned[usage_count]++;
     337             :     }
     338             : 
     339          28 :     for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
     340             :     {
     341          24 :         values[0] = Int32GetDatum(i);
     342          24 :         values[1] = Int32GetDatum(usage_counts[i]);
     343          24 :         values[2] = Int32GetDatum(dirty[i]);
     344          24 :         values[3] = Int32GetDatum(pinned[i]);
     345             : 
     346          24 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
     347             :     }
     348             : 
     349           4 :     return (Datum) 0;
     350             : }
     351             : 
     352             : /*
     353             :  * Try to evict a shared buffer.
     354             :  */
     355             : Datum
     356           0 : pg_buffercache_evict(PG_FUNCTION_ARGS)
     357             : {
     358           0 :     Buffer      buf = PG_GETARG_INT32(0);
     359             : 
     360           0 :     if (!superuser())
     361           0 :         ereport(ERROR,
     362             :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     363             :                  errmsg("must be superuser to use pg_buffercache_evict function")));
     364             : 
     365           0 :     if (buf < 1 || buf > NBuffers)
     366           0 :         elog(ERROR, "bad buffer ID: %d", buf);
     367             : 
     368           0 :     PG_RETURN_BOOL(EvictUnpinnedBuffer(buf));
     369             : }

Generated by: LCOV version 1.14