LCOV - code coverage report
Current view: top level - contrib/pg_buffercache - pg_buffercache_pages.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 87.9 % 313 275
Test Date: 2026-04-28 05:16:27 Functions: 96.0 % 25 24
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * pg_buffercache_pages.c
       4              :  *    display some contents of the buffer cache
       5              :  *
       6              :  *    contrib/pg_buffercache/pg_buffercache_pages.c
       7              :  *-------------------------------------------------------------------------
       8              :  */
       9              : #include "postgres.h"
      10              : 
      11              : #include "access/htup_details.h"
      12              : #include "access/relation.h"
      13              : #include "catalog/pg_type.h"
      14              : #include "funcapi.h"
      15              : #include "port/pg_numa.h"
      16              : #include "storage/buf_internals.h"
      17              : #include "storage/bufmgr.h"
      18              : #include "utils/rel.h"
      19              : #include "utils/tuplestore.h"
      20              : 
      21              : 
      22              : #define NUM_BUFFERCACHE_PAGES_MIN_ELEM  8
      23              : #define NUM_BUFFERCACHE_PAGES_ELEM  9
      24              : #define NUM_BUFFERCACHE_SUMMARY_ELEM 5
      25              : #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
      26              : #define NUM_BUFFERCACHE_EVICT_ELEM 2
      27              : #define NUM_BUFFERCACHE_EVICT_RELATION_ELEM 3
      28              : #define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3
      29              : #define NUM_BUFFERCACHE_MARK_DIRTY_ELEM 2
      30              : #define NUM_BUFFERCACHE_MARK_DIRTY_RELATION_ELEM 3
      31              : #define NUM_BUFFERCACHE_MARK_DIRTY_ALL_ELEM 3
      32              : 
      33              : #define NUM_BUFFERCACHE_OS_PAGES_ELEM   3
      34              : 
      35            1 : PG_MODULE_MAGIC_EXT(
      36              :                     .name = "pg_buffercache",
      37              :                     .version = PG_VERSION
      38              : );
      39              : 
      40              : /*
      41              :  * Record structure holding the to be exposed cache data for OS pages.  This
      42              :  * structure is used by pg_buffercache_os_pages(), where NUMA information may
      43              :  * or may not be included.
      44              :  */
      45              : typedef struct
      46              : {
      47              :     uint32      bufferid;
      48              :     int64       page_num;
      49              :     int32       numa_node;
      50              : } BufferCacheOsPagesRec;
      51              : 
      52              : /*
      53              :  * Function context for data persisting over repeated calls.
      54              :  */
      55              : typedef struct
      56              : {
      57              :     TupleDesc   tupdesc;
      58              :     bool        include_numa;
      59              :     BufferCacheOsPagesRec *record;
      60              : } BufferCacheOsPagesContext;
      61              : 
      62              : 
      63              : /*
      64              :  * Function returning data from the shared buffer cache - buffer number,
      65              :  * relation node/tablespace/database/blocknum and dirty indicator.
      66              :  */
      67            2 : PG_FUNCTION_INFO_V1(pg_buffercache_pages);
      68            2 : PG_FUNCTION_INFO_V1(pg_buffercache_os_pages);
      69            1 : PG_FUNCTION_INFO_V1(pg_buffercache_numa_pages);
      70            2 : PG_FUNCTION_INFO_V1(pg_buffercache_summary);
      71            2 : PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts);
      72            3 : PG_FUNCTION_INFO_V1(pg_buffercache_evict);
      73            2 : PG_FUNCTION_INFO_V1(pg_buffercache_evict_relation);
      74            2 : PG_FUNCTION_INFO_V1(pg_buffercache_evict_all);
      75            2 : PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty);
      76            2 : PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty_relation);
      77            2 : PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty_all);
      78              : 
      79              : 
      80              : /* Only need to touch memory once per backend process lifetime */
      81              : static bool firstNumaTouch = true;
      82              : 
      83              : 
      84              : Datum
      85            2 : pg_buffercache_pages(PG_FUNCTION_ARGS)
      86              : {
      87            2 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
      88              :     TupleDesc   expected_tupledesc;
      89              :     int         i;
      90              : 
      91              :     /*
      92              :      * To smoothly support upgrades from version 1.0 of this extension
      93              :      * transparently handle the (non-)existence of the pinning_backends
      94              :      * column. We unfortunately have to get the result type for that... - we
      95              :      * can't use the result type determined by the function definition without
      96              :      * potentially crashing when somebody uses the old (or even wrong)
      97              :      * function definition though.
      98              :      */
      99            2 :     if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
     100            0 :         elog(ERROR, "return type must be a row type");
     101              : 
     102            2 :     if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
     103            2 :         expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
     104            0 :         elog(ERROR, "incorrect number of output arguments");
     105              : 
     106            2 :     InitMaterializedSRF(fcinfo, 0);
     107              : 
     108              :     /*
     109              :      * Scan through all the buffers, adding one row for each of the buffers to
     110              :      * the tuplestore.
     111              :      *
     112              :      * We don't hold the partition locks, so we don't get a consistent
     113              :      * snapshot across all buffers, but we do grab the buffer header locks, so
     114              :      * the information of each buffer is self-consistent.
     115              :      */
     116        32770 :     for (i = 0; i < NBuffers; i++)
     117              :     {
     118              :         BufferDesc *bufHdr;
     119              :         uint64      buf_state;
     120              :         uint32      bufferid;
     121              :         RelFileNumber relfilenumber;
     122              :         Oid         reltablespace;
     123              :         Oid         reldatabase;
     124              :         ForkNumber  forknum;
     125              :         BlockNumber blocknum;
     126              :         bool        isvalid;
     127              :         bool        isdirty;
     128              :         uint16      usagecount;
     129              :         int32       pinning_backends;
     130              :         Datum       values[NUM_BUFFERCACHE_PAGES_ELEM];
     131              :         bool        nulls[NUM_BUFFERCACHE_PAGES_ELEM];
     132              : 
     133        32768 :         CHECK_FOR_INTERRUPTS();
     134              : 
     135        32768 :         bufHdr = GetBufferDescriptor(i);
     136              :         /* Lock each buffer header before inspecting. */
     137        32768 :         buf_state = LockBufHdr(bufHdr);
     138              : 
     139        32768 :         bufferid = BufferDescriptorGetBuffer(bufHdr);
     140        32768 :         relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
     141        32768 :         reltablespace = bufHdr->tag.spcOid;
     142        32768 :         reldatabase = bufHdr->tag.dbOid;
     143        32768 :         forknum = BufTagGetForkNum(&bufHdr->tag);
     144        32768 :         blocknum = bufHdr->tag.blockNum;
     145        32768 :         usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
     146        32768 :         pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
     147              : 
     148        32768 :         if (buf_state & BM_DIRTY)
     149         1950 :             isdirty = true;
     150              :         else
     151        30818 :             isdirty = false;
     152              : 
     153              :         /* Note if the buffer is valid, and has storage created */
     154        32768 :         if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
     155         4100 :             isvalid = true;
     156              :         else
     157        28668 :             isvalid = false;
     158              : 
     159        32768 :         UnlockBufHdr(bufHdr);
     160              : 
     161              :         /* Build the tuple and add it to tuplestore */
     162        32768 :         values[0] = Int32GetDatum(bufferid);
     163        32768 :         nulls[0] = false;
     164              : 
     165              :         /*
     166              :          * Set all fields except the bufferid to null if the buffer is unused
     167              :          * or not valid.
     168              :          */
     169        32768 :         if (blocknum == InvalidBlockNumber || isvalid == false)
     170              :         {
     171        28668 :             nulls[1] = true;
     172        28668 :             nulls[2] = true;
     173        28668 :             nulls[3] = true;
     174        28668 :             nulls[4] = true;
     175        28668 :             nulls[5] = true;
     176        28668 :             nulls[6] = true;
     177        28668 :             nulls[7] = true;
     178              :             /* unused for v1.0 callers, but the array is always long enough */
     179        28668 :             nulls[8] = true;
     180              :         }
     181              :         else
     182              :         {
     183         4100 :             values[1] = ObjectIdGetDatum(relfilenumber);
     184         4100 :             nulls[1] = false;
     185         4100 :             values[2] = ObjectIdGetDatum(reltablespace);
     186         4100 :             nulls[2] = false;
     187         4100 :             values[3] = ObjectIdGetDatum(reldatabase);
     188         4100 :             nulls[3] = false;
     189         4100 :             values[4] = Int16GetDatum(forknum);
     190         4100 :             nulls[4] = false;
     191         4100 :             values[5] = Int64GetDatum((int64) blocknum);
     192         4100 :             nulls[5] = false;
     193         4100 :             values[6] = BoolGetDatum(isdirty);
     194         4100 :             nulls[6] = false;
     195         4100 :             values[7] = UInt16GetDatum(usagecount);
     196         4100 :             nulls[7] = false;
     197              :             /* unused for v1.0 callers, but the array is always long enough */
     198         4100 :             values[8] = Int32GetDatum(pinning_backends);
     199         4100 :             nulls[8] = false;
     200              :         }
     201              : 
     202        32768 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
     203              :     }
     204              : 
     205            2 :     return (Datum) 0;
     206              : }
     207              : 
     208              : /*
     209              :  * Inquire about OS pages mappings for shared buffers, with NUMA information,
     210              :  * optionally.
     211              :  *
     212              :  * When "include_numa" is false, this routines ignores everything related
     213              :  * to NUMA (returned as NULL values), returning mapping information between
     214              :  * shared buffers and OS pages.
     215              :  *
     216              :  * When "include_numa" is true, NUMA is initialized and numa_node values
     217              :  * are generated.  In order to get reliable results we also need to touch
     218              :  * memory pages, so that the inquiry about NUMA memory node does not return
     219              :  * -2, indicating unmapped/unallocated pages.
     220              :  *
     221              :  * Buffers may be smaller or larger than OS memory pages. For each buffer we
     222              :  * return one entry for each memory page used by the buffer (if the buffer is
     223              :  * smaller, it only uses a part of one memory page).
     224              :  *
     225              :  * We expect both sizes (for buffers and memory pages) to be a power-of-2, so
     226              :  * one is always a multiple of the other.
     227              :  *
     228              :  */
     229              : static Datum
     230        65538 : pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa)
     231              : {
     232              :     FuncCallContext *funcctx;
     233              :     MemoryContext oldcontext;
     234              :     BufferCacheOsPagesContext *fctx;    /* User function context. */
     235              :     TupleDesc   tupledesc;
     236              :     TupleDesc   expected_tupledesc;
     237              :     HeapTuple   tuple;
     238              :     Datum       result;
     239              : 
     240        65538 :     if (SRF_IS_FIRSTCALL())
     241              :     {
     242              :         int         i,
     243              :                     idx;
     244              :         Size        os_page_size;
     245              :         int         pages_per_buffer;
     246            2 :         int        *os_page_status = NULL;
     247            2 :         uint64      os_page_count = 0;
     248              :         int         max_entries;
     249              :         char       *startptr,
     250              :                    *endptr;
     251              : 
     252              :         /* If NUMA information is requested, initialize NUMA support. */
     253            2 :         if (include_numa && pg_numa_init() == -1)
     254            0 :             elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
     255              : 
     256              :         /*
     257              :          * The database block size and OS memory page size are unlikely to be
     258              :          * the same. The block size is 1-32KB, the memory page size depends on
     259              :          * platform. On x86 it's usually 4KB, on ARM it's 4KB or 64KB, but
     260              :          * there are also features like THP etc. Moreover, we don't quite know
     261              :          * how the pages and buffers "align" in memory - the buffers may be
     262              :          * shifted in some way, using more memory pages than necessary.
     263              :          *
     264              :          * So we need to be careful about mapping buffers to memory pages. We
     265              :          * calculate the maximum number of pages a buffer might use, so that
     266              :          * we allocate enough space for the entries. And then we count the
     267              :          * actual number of entries as we scan the buffers.
     268              :          *
     269              :          * This information is needed before calling move_pages() for NUMA
     270              :          * node id inquiry.
     271              :          */
     272            2 :         os_page_size = pg_get_shmem_pagesize();
     273              : 
     274              :         /*
     275              :          * The pages and block size is expected to be 2^k, so one divides the
     276              :          * other (we don't know in which direction). This does not say
     277              :          * anything about relative alignment of pages/buffers.
     278              :          */
     279              :         Assert((os_page_size % BLCKSZ == 0) || (BLCKSZ % os_page_size == 0));
     280              : 
     281            2 :         if (include_numa)
     282              :         {
     283            0 :             void      **os_page_ptrs = NULL;
     284              : 
     285              :             /*
     286              :              * How many addresses we are going to query?  Simply get the page
     287              :              * for the first buffer, and first page after the last buffer, and
     288              :              * count the pages from that.
     289              :              */
     290            0 :             startptr = (char *) TYPEALIGN_DOWN(os_page_size,
     291              :                                                BufferGetBlock(1));
     292            0 :             endptr = (char *) TYPEALIGN(os_page_size,
     293              :                                         (char *) BufferGetBlock(NBuffers) + BLCKSZ);
     294            0 :             os_page_count = (endptr - startptr) / os_page_size;
     295              : 
     296              :             /* Used to determine the NUMA node for all OS pages at once */
     297            0 :             os_page_ptrs = palloc0_array(void *, os_page_count);
     298            0 :             os_page_status = palloc_array(int, os_page_count);
     299              : 
     300              :             /*
     301              :              * Fill pointers for all the memory pages.  This loop stores and
     302              :              * touches (if needed) addresses into os_page_ptrs[] as input to
     303              :              * one big move_pages(2) inquiry system call, as done in
     304              :              * pg_numa_query_pages().
     305              :              */
     306            0 :             idx = 0;
     307            0 :             for (char *ptr = startptr; ptr < endptr; ptr += os_page_size)
     308              :             {
     309            0 :                 os_page_ptrs[idx++] = ptr;
     310              : 
     311              :                 /* Only need to touch memory once per backend process lifetime */
     312            0 :                 if (firstNumaTouch)
     313              :                     pg_numa_touch_mem_if_required(ptr);
     314              :             }
     315              : 
     316              :             Assert(idx == os_page_count);
     317              : 
     318            0 :             elog(DEBUG1, "NUMA: NBuffers=%d os_page_count=" UINT64_FORMAT " "
     319              :                  "os_page_size=%zu", NBuffers, os_page_count, os_page_size);
     320              : 
     321              :             /*
     322              :              * If we ever get 0xff back from kernel inquiry, then we probably
     323              :              * have bug in our buffers to OS page mapping code here.
     324              :              */
     325            0 :             memset(os_page_status, 0xff, sizeof(int) * os_page_count);
     326              : 
     327              :             /* Query NUMA status for all the pointers */
     328            0 :             if (pg_numa_query_pages(0, os_page_count, os_page_ptrs, os_page_status) == -1)
     329            0 :                 elog(ERROR, "failed NUMA pages inquiry: %m");
     330              :         }
     331              : 
     332              :         /* Initialize the multi-call context, load entries about buffers */
     333              : 
     334            2 :         funcctx = SRF_FIRSTCALL_INIT();
     335              : 
     336              :         /* Switch context when allocating stuff to be used in later calls */
     337            2 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     338              : 
     339              :         /* Create a user function context for cross-call persistence */
     340            2 :         fctx = palloc_object(BufferCacheOsPagesContext);
     341              : 
     342            2 :         if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
     343            0 :             elog(ERROR, "return type must be a row type");
     344              : 
     345            2 :         if (expected_tupledesc->natts != NUM_BUFFERCACHE_OS_PAGES_ELEM)
     346            0 :             elog(ERROR, "incorrect number of output arguments");
     347              : 
     348              :         /* Construct a tuple descriptor for the result rows. */
     349            2 :         tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
     350            2 :         TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
     351              :                            INT4OID, -1, 0);
     352            2 :         TupleDescInitEntry(tupledesc, (AttrNumber) 2, "os_page_num",
     353              :                            INT8OID, -1, 0);
     354            2 :         TupleDescInitEntry(tupledesc, (AttrNumber) 3, "numa_node",
     355              :                            INT4OID, -1, 0);
     356              : 
     357            2 :         TupleDescFinalize(tupledesc);
     358            2 :         fctx->tupdesc = BlessTupleDesc(tupledesc);
     359            2 :         fctx->include_numa = include_numa;
     360              : 
     361              :         /*
     362              :          * Each buffer needs at least one entry, but it might be offset in
     363              :          * some way, and use one extra entry. So we allocate space for the
     364              :          * maximum number of entries we might need, and then count the exact
     365              :          * number as we're walking buffers. That way we can do it in one pass,
     366              :          * without reallocating memory.
     367              :          */
     368            2 :         pages_per_buffer = Max(1, BLCKSZ / os_page_size) + 1;
     369            2 :         max_entries = NBuffers * pages_per_buffer;
     370              : 
     371              :         /* Allocate entries for BufferCacheOsPagesRec records. */
     372            2 :         fctx->record = (BufferCacheOsPagesRec *)
     373            2 :             MemoryContextAllocHuge(CurrentMemoryContext,
     374              :                                    sizeof(BufferCacheOsPagesRec) * max_entries);
     375              : 
     376              :         /* Return to original context when allocating transient memory */
     377            2 :         MemoryContextSwitchTo(oldcontext);
     378              : 
     379            2 :         if (include_numa && firstNumaTouch)
     380            0 :             elog(DEBUG1, "NUMA: page-faulting the buffercache for proper NUMA readouts");
     381              : 
     382              :         /*
     383              :          * Scan through all the buffers, saving the relevant fields in the
     384              :          * fctx->record structure.
     385              :          *
     386              :          * We don't hold the partition locks, so we don't get a consistent
     387              :          * snapshot across all buffers, but we do grab the buffer header
     388              :          * locks, so the information of each buffer is self-consistent.
     389              :          */
     390            2 :         startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1));
     391            2 :         idx = 0;
     392        32770 :         for (i = 0; i < NBuffers; i++)
     393              :         {
     394        32768 :             char       *buffptr = (char *) BufferGetBlock(i + 1);
     395              :             BufferDesc *bufHdr;
     396              :             uint32      bufferid;
     397              :             int32       page_num;
     398              :             char       *startptr_buff,
     399              :                        *endptr_buff;
     400              : 
     401        32768 :             CHECK_FOR_INTERRUPTS();
     402              : 
     403        32768 :             bufHdr = GetBufferDescriptor(i);
     404              : 
     405              :             /* Lock each buffer header before inspecting. */
     406        32768 :             LockBufHdr(bufHdr);
     407        32768 :             bufferid = BufferDescriptorGetBuffer(bufHdr);
     408        32768 :             UnlockBufHdr(bufHdr);
     409              : 
     410              :             /* start of the first page of this buffer */
     411        32768 :             startptr_buff = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
     412              : 
     413              :             /* end of the buffer (no need to align to memory page) */
     414        32768 :             endptr_buff = buffptr + BLCKSZ;
     415              : 
     416              :             Assert(startptr_buff < endptr_buff);
     417              : 
     418              :             /* calculate ID of the first page for this buffer */
     419        32768 :             page_num = (startptr_buff - startptr) / os_page_size;
     420              : 
     421              :             /* Add an entry for each OS page overlapping with this buffer. */
     422        98304 :             for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)
     423              :             {
     424        65536 :                 fctx->record[idx].bufferid = bufferid;
     425        65536 :                 fctx->record[idx].page_num = page_num;
     426        65536 :                 fctx->record[idx].numa_node = include_numa ? os_page_status[page_num] : -1;
     427              : 
     428              :                 /* advance to the next entry/page */
     429        65536 :                 ++idx;
     430        65536 :                 ++page_num;
     431              :             }
     432              :         }
     433              : 
     434              :         Assert(idx <= max_entries);
     435              : 
     436              :         if (include_numa)
     437              :             Assert(idx >= os_page_count);
     438              : 
     439              :         /* Set max calls and remember the user function context. */
     440            2 :         funcctx->max_calls = idx;
     441            2 :         funcctx->user_fctx = fctx;
     442              : 
     443              :         /* Remember this backend touched the pages (only relevant for NUMA) */
     444            2 :         if (include_numa)
     445            0 :             firstNumaTouch = false;
     446              :     }
     447              : 
     448        65538 :     funcctx = SRF_PERCALL_SETUP();
     449              : 
     450              :     /* Get the saved state */
     451        65538 :     fctx = funcctx->user_fctx;
     452              : 
     453        65538 :     if (funcctx->call_cntr < funcctx->max_calls)
     454              :     {
     455        65536 :         uint32      i = funcctx->call_cntr;
     456              :         Datum       values[NUM_BUFFERCACHE_OS_PAGES_ELEM];
     457              :         bool        nulls[NUM_BUFFERCACHE_OS_PAGES_ELEM];
     458              : 
     459        65536 :         values[0] = Int32GetDatum(fctx->record[i].bufferid);
     460        65536 :         nulls[0] = false;
     461              : 
     462        65536 :         values[1] = Int64GetDatum(fctx->record[i].page_num);
     463        65536 :         nulls[1] = false;
     464              : 
     465        65536 :         if (fctx->include_numa)
     466              :         {
     467              :             /* status is valid node number */
     468            0 :             if (fctx->record[i].numa_node >= 0)
     469              :             {
     470            0 :                 values[2] = Int32GetDatum(fctx->record[i].numa_node);
     471            0 :                 nulls[2] = false;
     472              :             }
     473              :             else
     474              :             {
     475              :                 /* some kind of error (e.g. pages moved to swap) */
     476            0 :                 values[2] = (Datum) 0;
     477            0 :                 nulls[2] = true;
     478              :             }
     479              :         }
     480              :         else
     481              :         {
     482        65536 :             values[2] = (Datum) 0;
     483        65536 :             nulls[2] = true;
     484              :         }
     485              : 
     486              :         /* Build and return the tuple. */
     487        65536 :         tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
     488        65536 :         result = HeapTupleGetDatum(tuple);
     489              : 
     490        65536 :         SRF_RETURN_NEXT(funcctx, result);
     491              :     }
     492              :     else
     493            2 :         SRF_RETURN_DONE(funcctx);
     494              : }
     495              : 
     496              : /*
     497              :  * pg_buffercache_os_pages
     498              :  *
     499              :  * Retrieve information about OS pages, with or without NUMA information.
     500              :  */
     501              : Datum
     502        65538 : pg_buffercache_os_pages(PG_FUNCTION_ARGS)
     503              : {
     504              :     bool        include_numa;
     505              : 
     506              :     /* Get the boolean parameter that controls the NUMA behavior. */
     507        65538 :     include_numa = PG_GETARG_BOOL(0);
     508              : 
     509        65538 :     return pg_buffercache_os_pages_internal(fcinfo, include_numa);
     510              : }
     511              : 
     512              : /* Backward-compatible wrapper for v1.6. */
     513              : Datum
     514            0 : pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
     515              : {
     516              :     /* Call internal function with include_numa=true */
     517            0 :     return pg_buffercache_os_pages_internal(fcinfo, true);
     518              : }
     519              : 
     520              : Datum
     521            2 : pg_buffercache_summary(PG_FUNCTION_ARGS)
     522              : {
     523              :     Datum       result;
     524              :     TupleDesc   tupledesc;
     525              :     HeapTuple   tuple;
     526              :     Datum       values[NUM_BUFFERCACHE_SUMMARY_ELEM];
     527              :     bool        nulls[NUM_BUFFERCACHE_SUMMARY_ELEM];
     528              : 
     529            2 :     int32       buffers_used = 0;
     530            2 :     int32       buffers_unused = 0;
     531            2 :     int32       buffers_dirty = 0;
     532            2 :     int32       buffers_pinned = 0;
     533            2 :     int64       usagecount_total = 0;
     534              : 
     535            2 :     if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
     536            0 :         elog(ERROR, "return type must be a row type");
     537              : 
     538        32770 :     for (int i = 0; i < NBuffers; i++)
     539              :     {
     540              :         BufferDesc *bufHdr;
     541              :         uint64      buf_state;
     542              : 
     543        32768 :         CHECK_FOR_INTERRUPTS();
     544              : 
     545              :         /*
     546              :          * This function summarizes the state of all headers. Locking the
     547              :          * buffer headers wouldn't provide an improved result as the state of
     548              :          * the buffer can still change after we release the lock and it'd
     549              :          * noticeably increase the cost of the function.
     550              :          */
     551        32768 :         bufHdr = GetBufferDescriptor(i);
     552        32768 :         buf_state = pg_atomic_read_u64(&bufHdr->state);
     553              : 
     554        32768 :         if (buf_state & BM_VALID)
     555              :         {
     556         4100 :             buffers_used++;
     557         4100 :             usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
     558              : 
     559         4100 :             if (buf_state & BM_DIRTY)
     560         1950 :                 buffers_dirty++;
     561              :         }
     562              :         else
     563        28668 :             buffers_unused++;
     564              : 
     565        32768 :         if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
     566            0 :             buffers_pinned++;
     567              :     }
     568              : 
     569            2 :     memset(nulls, 0, sizeof(nulls));
     570            2 :     values[0] = Int32GetDatum(buffers_used);
     571            2 :     values[1] = Int32GetDatum(buffers_unused);
     572            2 :     values[2] = Int32GetDatum(buffers_dirty);
     573            2 :     values[3] = Int32GetDatum(buffers_pinned);
     574              : 
     575            2 :     if (buffers_used != 0)
     576            2 :         values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
     577              :     else
     578            0 :         nulls[4] = true;
     579              : 
     580              :     /* Build and return the tuple. */
     581            2 :     tuple = heap_form_tuple(tupledesc, values, nulls);
     582            2 :     result = HeapTupleGetDatum(tuple);
     583              : 
     584            2 :     PG_RETURN_DATUM(result);
     585              : }
     586              : 
     587              : Datum
     588            2 : pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
     589              : {
     590            2 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
     591            2 :     int         usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
     592            2 :     int         dirty[BM_MAX_USAGE_COUNT + 1] = {0};
     593            2 :     int         pinned[BM_MAX_USAGE_COUNT + 1] = {0};
     594              :     Datum       values[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM];
     595            2 :     bool        nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
     596              : 
     597            2 :     InitMaterializedSRF(fcinfo, 0);
     598              : 
     599        32770 :     for (int i = 0; i < NBuffers; i++)
     600              :     {
     601        32768 :         BufferDesc *bufHdr = GetBufferDescriptor(i);
     602        32768 :         uint64      buf_state = pg_atomic_read_u64(&bufHdr->state);
     603              :         int         usage_count;
     604              : 
     605        32768 :         CHECK_FOR_INTERRUPTS();
     606              : 
     607        32768 :         usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
     608        32768 :         usage_counts[usage_count]++;
     609              : 
     610        32768 :         if (buf_state & BM_DIRTY)
     611         1950 :             dirty[usage_count]++;
     612              : 
     613        32768 :         if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
     614            0 :             pinned[usage_count]++;
     615              :     }
     616              : 
     617           14 :     for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
     618              :     {
     619           12 :         values[0] = Int32GetDatum(i);
     620           12 :         values[1] = Int32GetDatum(usage_counts[i]);
     621           12 :         values[2] = Int32GetDatum(dirty[i]);
     622           12 :         values[3] = Int32GetDatum(pinned[i]);
     623              : 
     624           12 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
     625              :     }
     626              : 
     627            2 :     return (Datum) 0;
     628              : }
     629              : 
     630              : /*
     631              :  * Helper function to check if the user has superuser privileges.
     632              :  */
     633              : static void
     634           20 : pg_buffercache_superuser_check(char *func_name)
     635              : {
     636           20 :     if (!superuser())
     637            6 :         ereport(ERROR,
     638              :                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     639              :                  errmsg("must be superuser to use %s()",
     640              :                         func_name)));
     641           14 : }
     642              : 
     643              : /*
     644              :  * Try to evict a shared buffer.
     645              :  */
     646              : Datum
     647            5 : pg_buffercache_evict(PG_FUNCTION_ARGS)
     648              : {
     649              :     Datum       result;
     650              :     TupleDesc   tupledesc;
     651              :     HeapTuple   tuple;
     652              :     Datum       values[NUM_BUFFERCACHE_EVICT_ELEM];
     653            5 :     bool        nulls[NUM_BUFFERCACHE_EVICT_ELEM] = {0};
     654              : 
     655            5 :     Buffer      buf = PG_GETARG_INT32(0);
     656              :     bool        buffer_flushed;
     657              : 
     658            5 :     if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
     659            0 :         elog(ERROR, "return type must be a row type");
     660              : 
     661            5 :     pg_buffercache_superuser_check("pg_buffercache_evict");
     662              : 
     663            4 :     if (buf < 1 || buf > NBuffers)
     664            3 :         elog(ERROR, "bad buffer ID: %d", buf);
     665              : 
     666            1 :     values[0] = BoolGetDatum(EvictUnpinnedBuffer(buf, &buffer_flushed));
     667            1 :     values[1] = BoolGetDatum(buffer_flushed);
     668              : 
     669            1 :     tuple = heap_form_tuple(tupledesc, values, nulls);
     670            1 :     result = HeapTupleGetDatum(tuple);
     671              : 
     672            1 :     PG_RETURN_DATUM(result);
     673              : }
     674              : 
     675              : /*
     676              :  * Try to evict specified relation.
     677              :  */
     678              : Datum
     679            3 : pg_buffercache_evict_relation(PG_FUNCTION_ARGS)
     680              : {
     681              :     Datum       result;
     682              :     TupleDesc   tupledesc;
     683              :     HeapTuple   tuple;
     684              :     Datum       values[NUM_BUFFERCACHE_EVICT_RELATION_ELEM];
     685            3 :     bool        nulls[NUM_BUFFERCACHE_EVICT_RELATION_ELEM] = {0};
     686              : 
     687              :     Oid         relOid;
     688              :     Relation    rel;
     689              : 
     690            3 :     int32       buffers_evicted = 0;
     691            3 :     int32       buffers_flushed = 0;
     692            3 :     int32       buffers_skipped = 0;
     693              : 
     694            3 :     if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
     695            0 :         elog(ERROR, "return type must be a row type");
     696              : 
     697            3 :     pg_buffercache_superuser_check("pg_buffercache_evict_relation");
     698              : 
     699            2 :     relOid = PG_GETARG_OID(0);
     700              : 
     701            2 :     rel = relation_open(relOid, AccessShareLock);
     702              : 
     703            2 :     if (RelationUsesLocalBuffers(rel))
     704            1 :         ereport(ERROR,
     705              :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     706              :                  errmsg("relation uses local buffers, %s() is intended to be used for shared buffers only",
     707              :                         "pg_buffercache_evict_relation")));
     708              : 
     709            1 :     EvictRelUnpinnedBuffers(rel, &buffers_evicted, &buffers_flushed,
     710              :                             &buffers_skipped);
     711              : 
     712            1 :     relation_close(rel, AccessShareLock);
     713              : 
     714            1 :     values[0] = Int32GetDatum(buffers_evicted);
     715            1 :     values[1] = Int32GetDatum(buffers_flushed);
     716            1 :     values[2] = Int32GetDatum(buffers_skipped);
     717              : 
     718            1 :     tuple = heap_form_tuple(tupledesc, values, nulls);
     719            1 :     result = HeapTupleGetDatum(tuple);
     720              : 
     721            1 :     PG_RETURN_DATUM(result);
     722              : }
     723              : 
     724              : 
     725              : /*
     726              :  * Try to evict all shared buffers.
     727              :  */
     728              : Datum
     729            2 : pg_buffercache_evict_all(PG_FUNCTION_ARGS)
     730              : {
     731              :     Datum       result;
     732              :     TupleDesc   tupledesc;
     733              :     HeapTuple   tuple;
     734              :     Datum       values[NUM_BUFFERCACHE_EVICT_ALL_ELEM];
     735            2 :     bool        nulls[NUM_BUFFERCACHE_EVICT_ALL_ELEM] = {0};
     736              : 
     737            2 :     int32       buffers_evicted = 0;
     738            2 :     int32       buffers_flushed = 0;
     739            2 :     int32       buffers_skipped = 0;
     740              : 
     741            2 :     if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
     742            0 :         elog(ERROR, "return type must be a row type");
     743              : 
     744            2 :     pg_buffercache_superuser_check("pg_buffercache_evict_all");
     745              : 
     746            1 :     EvictAllUnpinnedBuffers(&buffers_evicted, &buffers_flushed,
     747              :                             &buffers_skipped);
     748              : 
     749            1 :     values[0] = Int32GetDatum(buffers_evicted);
     750            1 :     values[1] = Int32GetDatum(buffers_flushed);
     751            1 :     values[2] = Int32GetDatum(buffers_skipped);
     752              : 
     753            1 :     tuple = heap_form_tuple(tupledesc, values, nulls);
     754            1 :     result = HeapTupleGetDatum(tuple);
     755              : 
     756            1 :     PG_RETURN_DATUM(result);
     757              : }
     758              : 
     759              : /*
     760              :  * Try to mark a shared buffer as dirty.
     761              :  */
     762              : Datum
     763            5 : pg_buffercache_mark_dirty(PG_FUNCTION_ARGS)
     764              : {
     765              : 
     766              :     Datum       result;
     767              :     TupleDesc   tupledesc;
     768              :     HeapTuple   tuple;
     769              :     Datum       values[NUM_BUFFERCACHE_MARK_DIRTY_ELEM];
     770            5 :     bool        nulls[NUM_BUFFERCACHE_MARK_DIRTY_ELEM] = {0};
     771              : 
     772            5 :     Buffer      buf = PG_GETARG_INT32(0);
     773              :     bool        buffer_already_dirty;
     774              : 
     775            5 :     if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
     776            0 :         elog(ERROR, "return type must be a row type");
     777              : 
     778            5 :     pg_buffercache_superuser_check("pg_buffercache_mark_dirty");
     779              : 
     780            4 :     if (buf < 1 || buf > NBuffers)
     781            3 :         elog(ERROR, "bad buffer ID: %d", buf);
     782              : 
     783            1 :     values[0] = BoolGetDatum(MarkDirtyUnpinnedBuffer(buf, &buffer_already_dirty));
     784            1 :     values[1] = BoolGetDatum(buffer_already_dirty);
     785              : 
     786            1 :     tuple = heap_form_tuple(tupledesc, values, nulls);
     787            1 :     result = HeapTupleGetDatum(tuple);
     788              : 
     789            1 :     PG_RETURN_DATUM(result);
     790              : }
     791              : 
     792              : /*
     793              :  * Try to mark all the shared buffers of a relation as dirty.
     794              :  */
     795              : Datum
     796            3 : pg_buffercache_mark_dirty_relation(PG_FUNCTION_ARGS)
     797              : {
     798              :     Datum       result;
     799              :     TupleDesc   tupledesc;
     800              :     HeapTuple   tuple;
     801              :     Datum       values[NUM_BUFFERCACHE_MARK_DIRTY_RELATION_ELEM];
     802            3 :     bool        nulls[NUM_BUFFERCACHE_MARK_DIRTY_RELATION_ELEM] = {0};
     803              : 
     804              :     Oid         relOid;
     805              :     Relation    rel;
     806              : 
     807            3 :     int32       buffers_already_dirty = 0;
     808            3 :     int32       buffers_dirtied = 0;
     809            3 :     int32       buffers_skipped = 0;
     810              : 
     811            3 :     if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
     812            0 :         elog(ERROR, "return type must be a row type");
     813              : 
     814            3 :     pg_buffercache_superuser_check("pg_buffercache_mark_dirty_relation");
     815              : 
     816            2 :     relOid = PG_GETARG_OID(0);
     817              : 
     818            2 :     rel = relation_open(relOid, AccessShareLock);
     819              : 
     820            2 :     if (RelationUsesLocalBuffers(rel))
     821            1 :         ereport(ERROR,
     822              :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     823              :                  errmsg("relation uses local buffers, %s() is intended to be used for shared buffers only",
     824              :                         "pg_buffercache_mark_dirty_relation")));
     825              : 
     826            1 :     MarkDirtyRelUnpinnedBuffers(rel, &buffers_dirtied, &buffers_already_dirty,
     827              :                                 &buffers_skipped);
     828              : 
     829            1 :     relation_close(rel, AccessShareLock);
     830              : 
     831            1 :     values[0] = Int32GetDatum(buffers_dirtied);
     832            1 :     values[1] = Int32GetDatum(buffers_already_dirty);
     833            1 :     values[2] = Int32GetDatum(buffers_skipped);
     834              : 
     835            1 :     tuple = heap_form_tuple(tupledesc, values, nulls);
     836            1 :     result = HeapTupleGetDatum(tuple);
     837              : 
     838            1 :     PG_RETURN_DATUM(result);
     839              : }
     840              : 
     841              : /*
     842              :  * Try to mark all the shared buffers as dirty.
     843              :  */
     844              : Datum
     845            2 : pg_buffercache_mark_dirty_all(PG_FUNCTION_ARGS)
     846              : {
     847              :     Datum       result;
     848              :     TupleDesc   tupledesc;
     849              :     HeapTuple   tuple;
     850              :     Datum       values[NUM_BUFFERCACHE_MARK_DIRTY_ALL_ELEM];
     851            2 :     bool        nulls[NUM_BUFFERCACHE_MARK_DIRTY_ALL_ELEM] = {0};
     852              : 
     853            2 :     int32       buffers_already_dirty = 0;
     854            2 :     int32       buffers_dirtied = 0;
     855            2 :     int32       buffers_skipped = 0;
     856              : 
     857            2 :     if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
     858            0 :         elog(ERROR, "return type must be a row type");
     859              : 
     860            2 :     pg_buffercache_superuser_check("pg_buffercache_mark_dirty_all");
     861              : 
     862            1 :     MarkDirtyAllUnpinnedBuffers(&buffers_dirtied, &buffers_already_dirty,
     863              :                                 &buffers_skipped);
     864              : 
     865            1 :     values[0] = Int32GetDatum(buffers_dirtied);
     866            1 :     values[1] = Int32GetDatum(buffers_already_dirty);
     867            1 :     values[2] = Int32GetDatum(buffers_skipped);
     868              : 
     869            1 :     tuple = heap_form_tuple(tupledesc, values, nulls);
     870            1 :     result = HeapTupleGetDatum(tuple);
     871              : 
     872            1 :     PG_RETURN_DATUM(result);
     873              : }
        

Generated by: LCOV version 2.0-1