Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_buffercache_pages.c
4 : * display some contents of the buffer cache
5 : *
6 : * contrib/pg_buffercache/pg_buffercache_pages.c
7 : *-------------------------------------------------------------------------
8 : */
9 : #include "postgres.h"
10 :
11 : #include "access/htup_details.h"
12 : #include "catalog/pg_type.h"
13 : #include "funcapi.h"
14 : #include "storage/buf_internals.h"
15 : #include "storage/bufmgr.h"
16 :
17 :
18 : #define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8
19 : #define NUM_BUFFERCACHE_PAGES_ELEM 9
20 : #define NUM_BUFFERCACHE_SUMMARY_ELEM 5
21 : #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
22 :
23 2 : PG_MODULE_MAGIC_EXT(
24 : .name = "pg_buffercache",
25 : .version = PG_VERSION
26 : );
27 :
28 : /*
29 : * Record structure holding the to be exposed cache data.
30 : */
31 : typedef struct
32 : {
33 : uint32 bufferid;
34 : RelFileNumber relfilenumber;
35 : Oid reltablespace;
36 : Oid reldatabase;
37 : ForkNumber forknum;
38 : BlockNumber blocknum;
39 : bool isvalid;
40 : bool isdirty;
41 : uint16 usagecount;
42 :
43 : /*
44 : * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
45 : * being pinned by too many backends and each backend will only pin once
46 : * because of bufmgr.c's PrivateRefCount infrastructure.
47 : */
48 : int32 pinning_backends;
49 : } BufferCachePagesRec;
50 :
51 :
52 : /*
53 : * Function context for data persisting over repeated calls.
54 : */
55 : typedef struct
56 : {
57 : TupleDesc tupdesc;
58 : BufferCachePagesRec *record;
59 : } BufferCachePagesContext;
60 :
61 :
62 : /*
63 : * Function returning data from the shared buffer cache - buffer number,
64 : * relation node/tablespace/database/blocknum and dirty indicator.
65 : */
66 4 : PG_FUNCTION_INFO_V1(pg_buffercache_pages);
67 4 : PG_FUNCTION_INFO_V1(pg_buffercache_summary);
68 4 : PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts);
69 2 : PG_FUNCTION_INFO_V1(pg_buffercache_evict);
70 :
71 : Datum
72 65540 : pg_buffercache_pages(PG_FUNCTION_ARGS)
73 : {
74 : FuncCallContext *funcctx;
75 : Datum result;
76 : MemoryContext oldcontext;
77 : BufferCachePagesContext *fctx; /* User function context. */
78 : TupleDesc tupledesc;
79 : TupleDesc expected_tupledesc;
80 : HeapTuple tuple;
81 :
82 65540 : if (SRF_IS_FIRSTCALL())
83 : {
84 : int i;
85 :
86 4 : funcctx = SRF_FIRSTCALL_INIT();
87 :
88 : /* Switch context when allocating stuff to be used in later calls */
89 4 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
90 :
91 : /* Create a user function context for cross-call persistence */
92 4 : fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));
93 :
94 : /*
95 : * To smoothly support upgrades from version 1.0 of this extension
96 : * transparently handle the (non-)existence of the pinning_backends
97 : * column. We unfortunately have to get the result type for that... -
98 : * we can't use the result type determined by the function definition
99 : * without potentially crashing when somebody uses the old (or even
100 : * wrong) function definition though.
101 : */
102 4 : if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
103 0 : elog(ERROR, "return type must be a row type");
104 :
105 4 : if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
106 4 : expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
107 0 : elog(ERROR, "incorrect number of output arguments");
108 :
109 : /* Construct a tuple descriptor for the result rows. */
110 4 : tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
111 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
112 : INT4OID, -1, 0);
113 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
114 : OIDOID, -1, 0);
115 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
116 : OIDOID, -1, 0);
117 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
118 : OIDOID, -1, 0);
119 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
120 : INT2OID, -1, 0);
121 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
122 : INT8OID, -1, 0);
123 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
124 : BOOLOID, -1, 0);
125 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
126 : INT2OID, -1, 0);
127 :
128 4 : if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
129 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
130 : INT4OID, -1, 0);
131 :
132 4 : fctx->tupdesc = BlessTupleDesc(tupledesc);
133 :
134 : /* Allocate NBuffers worth of BufferCachePagesRec records. */
135 4 : fctx->record = (BufferCachePagesRec *)
136 4 : MemoryContextAllocHuge(CurrentMemoryContext,
137 : sizeof(BufferCachePagesRec) * NBuffers);
138 :
139 : /* Set max calls and remember the user function context. */
140 4 : funcctx->max_calls = NBuffers;
141 4 : funcctx->user_fctx = fctx;
142 :
143 : /* Return to original context when allocating transient memory */
144 4 : MemoryContextSwitchTo(oldcontext);
145 :
146 : /*
147 : * Scan through all the buffers, saving the relevant fields in the
148 : * fctx->record structure.
149 : *
150 : * We don't hold the partition locks, so we don't get a consistent
151 : * snapshot across all buffers, but we do grab the buffer header
152 : * locks, so the information of each buffer is self-consistent.
153 : */
154 65540 : for (i = 0; i < NBuffers; i++)
155 : {
156 : BufferDesc *bufHdr;
157 : uint32 buf_state;
158 :
159 65536 : bufHdr = GetBufferDescriptor(i);
160 : /* Lock each buffer header before inspecting. */
161 65536 : buf_state = LockBufHdr(bufHdr);
162 :
163 65536 : fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
164 65536 : fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
165 65536 : fctx->record[i].reltablespace = bufHdr->tag.spcOid;
166 65536 : fctx->record[i].reldatabase = bufHdr->tag.dbOid;
167 65536 : fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
168 65536 : fctx->record[i].blocknum = bufHdr->tag.blockNum;
169 65536 : fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
170 65536 : fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
171 :
172 65536 : if (buf_state & BM_DIRTY)
173 3764 : fctx->record[i].isdirty = true;
174 : else
175 61772 : fctx->record[i].isdirty = false;
176 :
177 : /* Note if the buffer is valid, and has storage created */
178 65536 : if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
179 7428 : fctx->record[i].isvalid = true;
180 : else
181 58108 : fctx->record[i].isvalid = false;
182 :
183 65536 : UnlockBufHdr(bufHdr, buf_state);
184 : }
185 : }
186 :
187 65540 : funcctx = SRF_PERCALL_SETUP();
188 :
189 : /* Get the saved state */
190 65540 : fctx = funcctx->user_fctx;
191 :
192 65540 : if (funcctx->call_cntr < funcctx->max_calls)
193 : {
194 65536 : uint32 i = funcctx->call_cntr;
195 : Datum values[NUM_BUFFERCACHE_PAGES_ELEM];
196 : bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
197 :
198 65536 : values[0] = Int32GetDatum(fctx->record[i].bufferid);
199 65536 : nulls[0] = false;
200 :
201 : /*
202 : * Set all fields except the bufferid to null if the buffer is unused
203 : * or not valid.
204 : */
205 65536 : if (fctx->record[i].blocknum == InvalidBlockNumber ||
206 7428 : fctx->record[i].isvalid == false)
207 : {
208 58108 : nulls[1] = true;
209 58108 : nulls[2] = true;
210 58108 : nulls[3] = true;
211 58108 : nulls[4] = true;
212 58108 : nulls[5] = true;
213 58108 : nulls[6] = true;
214 58108 : nulls[7] = true;
215 : /* unused for v1.0 callers, but the array is always long enough */
216 58108 : nulls[8] = true;
217 : }
218 : else
219 : {
220 7428 : values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber);
221 7428 : nulls[1] = false;
222 7428 : values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
223 7428 : nulls[2] = false;
224 7428 : values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
225 7428 : nulls[3] = false;
226 7428 : values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
227 7428 : nulls[4] = false;
228 7428 : values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
229 7428 : nulls[5] = false;
230 7428 : values[6] = BoolGetDatum(fctx->record[i].isdirty);
231 7428 : nulls[6] = false;
232 7428 : values[7] = Int16GetDatum(fctx->record[i].usagecount);
233 7428 : nulls[7] = false;
234 : /* unused for v1.0 callers, but the array is always long enough */
235 7428 : values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
236 7428 : nulls[8] = false;
237 : }
238 :
239 : /* Build and return the tuple. */
240 65536 : tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
241 65536 : result = HeapTupleGetDatum(tuple);
242 :
243 65536 : SRF_RETURN_NEXT(funcctx, result);
244 : }
245 : else
246 4 : SRF_RETURN_DONE(funcctx);
247 : }
248 :
249 : Datum
250 4 : pg_buffercache_summary(PG_FUNCTION_ARGS)
251 : {
252 : Datum result;
253 : TupleDesc tupledesc;
254 : HeapTuple tuple;
255 : Datum values[NUM_BUFFERCACHE_SUMMARY_ELEM];
256 : bool nulls[NUM_BUFFERCACHE_SUMMARY_ELEM];
257 :
258 4 : int32 buffers_used = 0;
259 4 : int32 buffers_unused = 0;
260 4 : int32 buffers_dirty = 0;
261 4 : int32 buffers_pinned = 0;
262 4 : int64 usagecount_total = 0;
263 :
264 4 : if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
265 0 : elog(ERROR, "return type must be a row type");
266 :
267 65540 : for (int i = 0; i < NBuffers; i++)
268 : {
269 : BufferDesc *bufHdr;
270 : uint32 buf_state;
271 :
272 : /*
273 : * This function summarizes the state of all headers. Locking the
274 : * buffer headers wouldn't provide an improved result as the state of
275 : * the buffer can still change after we release the lock and it'd
276 : * noticeably increase the cost of the function.
277 : */
278 65536 : bufHdr = GetBufferDescriptor(i);
279 65536 : buf_state = pg_atomic_read_u32(&bufHdr->state);
280 :
281 65536 : if (buf_state & BM_VALID)
282 : {
283 7428 : buffers_used++;
284 7428 : usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
285 :
286 7428 : if (buf_state & BM_DIRTY)
287 3764 : buffers_dirty++;
288 : }
289 : else
290 58108 : buffers_unused++;
291 :
292 65536 : if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
293 0 : buffers_pinned++;
294 : }
295 :
296 4 : memset(nulls, 0, sizeof(nulls));
297 4 : values[0] = Int32GetDatum(buffers_used);
298 4 : values[1] = Int32GetDatum(buffers_unused);
299 4 : values[2] = Int32GetDatum(buffers_dirty);
300 4 : values[3] = Int32GetDatum(buffers_pinned);
301 :
302 4 : if (buffers_used != 0)
303 4 : values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
304 : else
305 0 : nulls[4] = true;
306 :
307 : /* Build and return the tuple. */
308 4 : tuple = heap_form_tuple(tupledesc, values, nulls);
309 4 : result = HeapTupleGetDatum(tuple);
310 :
311 4 : PG_RETURN_DATUM(result);
312 : }
313 :
314 : Datum
315 4 : pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
316 : {
317 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
318 4 : int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
319 4 : int dirty[BM_MAX_USAGE_COUNT + 1] = {0};
320 4 : int pinned[BM_MAX_USAGE_COUNT + 1] = {0};
321 : Datum values[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM];
322 4 : bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
323 :
324 4 : InitMaterializedSRF(fcinfo, 0);
325 :
326 65540 : for (int i = 0; i < NBuffers; i++)
327 : {
328 65536 : BufferDesc *bufHdr = GetBufferDescriptor(i);
329 65536 : uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
330 : int usage_count;
331 :
332 65536 : usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
333 65536 : usage_counts[usage_count]++;
334 :
335 65536 : if (buf_state & BM_DIRTY)
336 3764 : dirty[usage_count]++;
337 :
338 65536 : if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
339 0 : pinned[usage_count]++;
340 : }
341 :
342 28 : for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
343 : {
344 24 : values[0] = Int32GetDatum(i);
345 24 : values[1] = Int32GetDatum(usage_counts[i]);
346 24 : values[2] = Int32GetDatum(dirty[i]);
347 24 : values[3] = Int32GetDatum(pinned[i]);
348 :
349 24 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
350 : }
351 :
352 4 : return (Datum) 0;
353 : }
354 :
355 : /*
356 : * Try to evict a shared buffer.
357 : */
358 : Datum
359 0 : pg_buffercache_evict(PG_FUNCTION_ARGS)
360 : {
361 0 : Buffer buf = PG_GETARG_INT32(0);
362 :
363 0 : if (!superuser())
364 0 : ereport(ERROR,
365 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
366 : errmsg("must be superuser to use pg_buffercache_evict function")));
367 :
368 0 : if (buf < 1 || buf > NBuffers)
369 0 : elog(ERROR, "bad buffer ID: %d", buf);
370 :
371 0 : PG_RETURN_BOOL(EvictUnpinnedBuffer(buf));
372 : }
|