Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_buffercache_pages.c
4 : * display some contents of the buffer cache
5 : *
6 : * contrib/pg_buffercache/pg_buffercache_pages.c
7 : *-------------------------------------------------------------------------
8 : */
9 : #include "postgres.h"
10 :
11 : #include "access/htup_details.h"
12 : #include "catalog/pg_type.h"
13 : #include "funcapi.h"
14 : #include "storage/buf_internals.h"
15 : #include "storage/bufmgr.h"
16 :
17 :
18 : #define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8
19 : #define NUM_BUFFERCACHE_PAGES_ELEM 9
20 : #define NUM_BUFFERCACHE_SUMMARY_ELEM 5
21 : #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
22 :
23 2 : PG_MODULE_MAGIC;
24 :
25 : /*
26 : * Record structure holding the to be exposed cache data.
27 : */
28 : typedef struct
29 : {
30 : uint32 bufferid;
31 : RelFileNumber relfilenumber;
32 : Oid reltablespace;
33 : Oid reldatabase;
34 : ForkNumber forknum;
35 : BlockNumber blocknum;
36 : bool isvalid;
37 : bool isdirty;
38 : uint16 usagecount;
39 :
40 : /*
41 : * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
42 : * being pinned by too many backends and each backend will only pin once
43 : * because of bufmgr.c's PrivateRefCount infrastructure.
44 : */
45 : int32 pinning_backends;
46 : } BufferCachePagesRec;
47 :
48 :
49 : /*
50 : * Function context for data persisting over repeated calls.
51 : */
52 : typedef struct
53 : {
54 : TupleDesc tupdesc;
55 : BufferCachePagesRec *record;
56 : } BufferCachePagesContext;
57 :
58 :
59 : /*
60 : * Function returning data from the shared buffer cache - buffer number,
61 : * relation node/tablespace/database/blocknum and dirty indicator.
62 : */
63 4 : PG_FUNCTION_INFO_V1(pg_buffercache_pages);
64 4 : PG_FUNCTION_INFO_V1(pg_buffercache_summary);
65 4 : PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts);
66 :
67 : Datum
68 65540 : pg_buffercache_pages(PG_FUNCTION_ARGS)
69 : {
70 : FuncCallContext *funcctx;
71 : Datum result;
72 : MemoryContext oldcontext;
73 : BufferCachePagesContext *fctx; /* User function context. */
74 : TupleDesc tupledesc;
75 : TupleDesc expected_tupledesc;
76 : HeapTuple tuple;
77 :
78 65540 : if (SRF_IS_FIRSTCALL())
79 : {
80 : int i;
81 :
82 4 : funcctx = SRF_FIRSTCALL_INIT();
83 :
84 : /* Switch context when allocating stuff to be used in later calls */
85 4 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
86 :
87 : /* Create a user function context for cross-call persistence */
88 4 : fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));
89 :
90 : /*
91 : * To smoothly support upgrades from version 1.0 of this extension
92 : * transparently handle the (non-)existence of the pinning_backends
93 : * column. We unfortunately have to get the result type for that... -
94 : * we can't use the result type determined by the function definition
95 : * without potentially crashing when somebody uses the old (or even
96 : * wrong) function definition though.
97 : */
98 4 : if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
99 0 : elog(ERROR, "return type must be a row type");
100 :
101 4 : if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
102 4 : expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
103 0 : elog(ERROR, "incorrect number of output arguments");
104 :
105 : /* Construct a tuple descriptor for the result rows. */
106 4 : tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
107 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
108 : INT4OID, -1, 0);
109 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
110 : OIDOID, -1, 0);
111 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
112 : OIDOID, -1, 0);
113 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
114 : OIDOID, -1, 0);
115 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
116 : INT2OID, -1, 0);
117 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
118 : INT8OID, -1, 0);
119 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
120 : BOOLOID, -1, 0);
121 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
122 : INT2OID, -1, 0);
123 :
124 4 : if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
125 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
126 : INT4OID, -1, 0);
127 :
128 4 : fctx->tupdesc = BlessTupleDesc(tupledesc);
129 :
130 : /* Allocate NBuffers worth of BufferCachePagesRec records. */
131 4 : fctx->record = (BufferCachePagesRec *)
132 4 : MemoryContextAllocHuge(CurrentMemoryContext,
133 : sizeof(BufferCachePagesRec) * NBuffers);
134 :
135 : /* Set max calls and remember the user function context. */
136 4 : funcctx->max_calls = NBuffers;
137 4 : funcctx->user_fctx = fctx;
138 :
139 : /* Return to original context when allocating transient memory */
140 4 : MemoryContextSwitchTo(oldcontext);
141 :
142 : /*
143 : * Scan through all the buffers, saving the relevant fields in the
144 : * fctx->record structure.
145 : *
146 : * We don't hold the partition locks, so we don't get a consistent
147 : * snapshot across all buffers, but we do grab the buffer header
148 : * locks, so the information of each buffer is self-consistent.
149 : */
150 65540 : for (i = 0; i < NBuffers; i++)
151 : {
152 : BufferDesc *bufHdr;
153 : uint32 buf_state;
154 :
155 65536 : bufHdr = GetBufferDescriptor(i);
156 : /* Lock each buffer header before inspecting. */
157 65536 : buf_state = LockBufHdr(bufHdr);
158 :
159 65536 : fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
160 65536 : fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
161 65536 : fctx->record[i].reltablespace = bufHdr->tag.spcOid;
162 65536 : fctx->record[i].reldatabase = bufHdr->tag.dbOid;
163 65536 : fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
164 65536 : fctx->record[i].blocknum = bufHdr->tag.blockNum;
165 65536 : fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
166 65536 : fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
167 :
168 65536 : if (buf_state & BM_DIRTY)
169 3692 : fctx->record[i].isdirty = true;
170 : else
171 61844 : fctx->record[i].isdirty = false;
172 :
173 : /* Note if the buffer is valid, and has storage created */
174 65536 : if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
175 7140 : fctx->record[i].isvalid = true;
176 : else
177 58396 : fctx->record[i].isvalid = false;
178 :
179 65536 : UnlockBufHdr(bufHdr, buf_state);
180 : }
181 : }
182 :
183 65540 : funcctx = SRF_PERCALL_SETUP();
184 :
185 : /* Get the saved state */
186 65540 : fctx = funcctx->user_fctx;
187 :
188 65540 : if (funcctx->call_cntr < funcctx->max_calls)
189 : {
190 65536 : uint32 i = funcctx->call_cntr;
191 : Datum values[NUM_BUFFERCACHE_PAGES_ELEM];
192 : bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
193 :
194 65536 : values[0] = Int32GetDatum(fctx->record[i].bufferid);
195 65536 : nulls[0] = false;
196 :
197 : /*
198 : * Set all fields except the bufferid to null if the buffer is unused
199 : * or not valid.
200 : */
201 65536 : if (fctx->record[i].blocknum == InvalidBlockNumber ||
202 7140 : fctx->record[i].isvalid == false)
203 : {
204 58396 : nulls[1] = true;
205 58396 : nulls[2] = true;
206 58396 : nulls[3] = true;
207 58396 : nulls[4] = true;
208 58396 : nulls[5] = true;
209 58396 : nulls[6] = true;
210 58396 : nulls[7] = true;
211 : /* unused for v1.0 callers, but the array is always long enough */
212 58396 : nulls[8] = true;
213 : }
214 : else
215 : {
216 7140 : values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber);
217 7140 : nulls[1] = false;
218 7140 : values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
219 7140 : nulls[2] = false;
220 7140 : values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
221 7140 : nulls[3] = false;
222 7140 : values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
223 7140 : nulls[4] = false;
224 7140 : values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
225 7140 : nulls[5] = false;
226 7140 : values[6] = BoolGetDatum(fctx->record[i].isdirty);
227 7140 : nulls[6] = false;
228 7140 : values[7] = Int16GetDatum(fctx->record[i].usagecount);
229 7140 : nulls[7] = false;
230 : /* unused for v1.0 callers, but the array is always long enough */
231 7140 : values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
232 7140 : nulls[8] = false;
233 : }
234 :
235 : /* Build and return the tuple. */
236 65536 : tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
237 65536 : result = HeapTupleGetDatum(tuple);
238 :
239 65536 : SRF_RETURN_NEXT(funcctx, result);
240 : }
241 : else
242 4 : SRF_RETURN_DONE(funcctx);
243 : }
244 :
245 : Datum
246 4 : pg_buffercache_summary(PG_FUNCTION_ARGS)
247 : {
248 : Datum result;
249 : TupleDesc tupledesc;
250 : HeapTuple tuple;
251 : Datum values[NUM_BUFFERCACHE_SUMMARY_ELEM];
252 : bool nulls[NUM_BUFFERCACHE_SUMMARY_ELEM];
253 :
254 4 : int32 buffers_used = 0;
255 4 : int32 buffers_unused = 0;
256 4 : int32 buffers_dirty = 0;
257 4 : int32 buffers_pinned = 0;
258 4 : int64 usagecount_total = 0;
259 :
260 4 : if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
261 0 : elog(ERROR, "return type must be a row type");
262 :
263 65540 : for (int i = 0; i < NBuffers; i++)
264 : {
265 : BufferDesc *bufHdr;
266 : uint32 buf_state;
267 :
268 : /*
269 : * This function summarizes the state of all headers. Locking the
270 : * buffer headers wouldn't provide an improved result as the state of
271 : * the buffer can still change after we release the lock and it'd
272 : * noticeably increase the cost of the function.
273 : */
274 65536 : bufHdr = GetBufferDescriptor(i);
275 65536 : buf_state = pg_atomic_read_u32(&bufHdr->state);
276 :
277 65536 : if (buf_state & BM_VALID)
278 : {
279 7140 : buffers_used++;
280 7140 : usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
281 :
282 7140 : if (buf_state & BM_DIRTY)
283 3692 : buffers_dirty++;
284 : }
285 : else
286 58396 : buffers_unused++;
287 :
288 65536 : if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
289 0 : buffers_pinned++;
290 : }
291 :
292 4 : memset(nulls, 0, sizeof(nulls));
293 4 : values[0] = Int32GetDatum(buffers_used);
294 4 : values[1] = Int32GetDatum(buffers_unused);
295 4 : values[2] = Int32GetDatum(buffers_dirty);
296 4 : values[3] = Int32GetDatum(buffers_pinned);
297 :
298 4 : if (buffers_used != 0)
299 4 : values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
300 : else
301 0 : nulls[4] = true;
302 :
303 : /* Build and return the tuple. */
304 4 : tuple = heap_form_tuple(tupledesc, values, nulls);
305 4 : result = HeapTupleGetDatum(tuple);
306 :
307 4 : PG_RETURN_DATUM(result);
308 : }
309 :
310 : Datum
311 4 : pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
312 : {
313 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
314 4 : int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
315 4 : int dirty[BM_MAX_USAGE_COUNT + 1] = {0};
316 4 : int pinned[BM_MAX_USAGE_COUNT + 1] = {0};
317 : Datum values[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM];
318 4 : bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
319 :
320 4 : InitMaterializedSRF(fcinfo, 0);
321 :
322 65540 : for (int i = 0; i < NBuffers; i++)
323 : {
324 65536 : BufferDesc *bufHdr = GetBufferDescriptor(i);
325 65536 : uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
326 : int usage_count;
327 :
328 65536 : usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
329 65536 : usage_counts[usage_count]++;
330 :
331 65536 : if (buf_state & BM_DIRTY)
332 3692 : dirty[usage_count]++;
333 :
334 65536 : if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
335 0 : pinned[usage_count]++;
336 : }
337 :
338 28 : for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
339 : {
340 24 : values[0] = Int32GetDatum(i);
341 24 : values[1] = Int32GetDatum(usage_counts[i]);
342 24 : values[2] = Int32GetDatum(dirty[i]);
343 24 : values[3] = Int32GetDatum(pinned[i]);
344 :
345 24 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
346 : }
347 :
348 4 : return (Datum) 0;
349 : }
|