Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_buffercache_pages.c
4 : * display some contents of the buffer cache
5 : *
6 : * contrib/pg_buffercache/pg_buffercache_pages.c
7 : *-------------------------------------------------------------------------
8 : */
9 : #include "postgres.h"
10 :
11 : #include "access/htup_details.h"
12 : #include "catalog/pg_type.h"
13 : #include "funcapi.h"
14 : #include "storage/buf_internals.h"
15 : #include "storage/bufmgr.h"
16 :
17 :
18 : #define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8
19 : #define NUM_BUFFERCACHE_PAGES_ELEM 9
20 : #define NUM_BUFFERCACHE_SUMMARY_ELEM 5
21 : #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
22 :
23 2 : PG_MODULE_MAGIC;
24 :
25 : /*
26 : * Record structure holding the to be exposed cache data.
27 : */
28 : typedef struct
29 : {
30 : uint32 bufferid;
31 : RelFileNumber relfilenumber;
32 : Oid reltablespace;
33 : Oid reldatabase;
34 : ForkNumber forknum;
35 : BlockNumber blocknum;
36 : bool isvalid;
37 : bool isdirty;
38 : uint16 usagecount;
39 :
40 : /*
41 : * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
42 : * being pinned by too many backends and each backend will only pin once
43 : * because of bufmgr.c's PrivateRefCount infrastructure.
44 : */
45 : int32 pinning_backends;
46 : } BufferCachePagesRec;
47 :
48 :
49 : /*
50 : * Function context for data persisting over repeated calls.
51 : */
52 : typedef struct
53 : {
54 : TupleDesc tupdesc;
55 : BufferCachePagesRec *record;
56 : } BufferCachePagesContext;
57 :
58 :
59 : /*
60 : * Function returning data from the shared buffer cache - buffer number,
61 : * relation node/tablespace/database/blocknum and dirty indicator.
62 : */
63 4 : PG_FUNCTION_INFO_V1(pg_buffercache_pages);
64 4 : PG_FUNCTION_INFO_V1(pg_buffercache_summary);
65 4 : PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts);
66 2 : PG_FUNCTION_INFO_V1(pg_buffercache_evict);
67 :
68 : Datum
69 65540 : pg_buffercache_pages(PG_FUNCTION_ARGS)
70 : {
71 : FuncCallContext *funcctx;
72 : Datum result;
73 : MemoryContext oldcontext;
74 : BufferCachePagesContext *fctx; /* User function context. */
75 : TupleDesc tupledesc;
76 : TupleDesc expected_tupledesc;
77 : HeapTuple tuple;
78 :
79 65540 : if (SRF_IS_FIRSTCALL())
80 : {
81 : int i;
82 :
83 4 : funcctx = SRF_FIRSTCALL_INIT();
84 :
85 : /* Switch context when allocating stuff to be used in later calls */
86 4 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
87 :
88 : /* Create a user function context for cross-call persistence */
89 4 : fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));
90 :
91 : /*
92 : * To smoothly support upgrades from version 1.0 of this extension
93 : * transparently handle the (non-)existence of the pinning_backends
94 : * column. We unfortunately have to get the result type for that... -
95 : * we can't use the result type determined by the function definition
96 : * without potentially crashing when somebody uses the old (or even
97 : * wrong) function definition though.
98 : */
99 4 : if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
100 0 : elog(ERROR, "return type must be a row type");
101 :
102 4 : if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
103 4 : expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
104 0 : elog(ERROR, "incorrect number of output arguments");
105 :
106 : /* Construct a tuple descriptor for the result rows. */
107 4 : tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
108 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
109 : INT4OID, -1, 0);
110 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
111 : OIDOID, -1, 0);
112 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
113 : OIDOID, -1, 0);
114 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
115 : OIDOID, -1, 0);
116 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
117 : INT2OID, -1, 0);
118 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
119 : INT8OID, -1, 0);
120 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
121 : BOOLOID, -1, 0);
122 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
123 : INT2OID, -1, 0);
124 :
125 4 : if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
126 4 : TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
127 : INT4OID, -1, 0);
128 :
129 4 : fctx->tupdesc = BlessTupleDesc(tupledesc);
130 :
131 : /* Allocate NBuffers worth of BufferCachePagesRec records. */
132 4 : fctx->record = (BufferCachePagesRec *)
133 4 : MemoryContextAllocHuge(CurrentMemoryContext,
134 : sizeof(BufferCachePagesRec) * NBuffers);
135 :
136 : /* Set max calls and remember the user function context. */
137 4 : funcctx->max_calls = NBuffers;
138 4 : funcctx->user_fctx = fctx;
139 :
140 : /* Return to original context when allocating transient memory */
141 4 : MemoryContextSwitchTo(oldcontext);
142 :
143 : /*
144 : * Scan through all the buffers, saving the relevant fields in the
145 : * fctx->record structure.
146 : *
147 : * We don't hold the partition locks, so we don't get a consistent
148 : * snapshot across all buffers, but we do grab the buffer header
149 : * locks, so the information of each buffer is self-consistent.
150 : */
151 65540 : for (i = 0; i < NBuffers; i++)
152 : {
153 : BufferDesc *bufHdr;
154 : uint32 buf_state;
155 :
156 65536 : bufHdr = GetBufferDescriptor(i);
157 : /* Lock each buffer header before inspecting. */
158 65536 : buf_state = LockBufHdr(bufHdr);
159 :
160 65536 : fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
161 65536 : fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
162 65536 : fctx->record[i].reltablespace = bufHdr->tag.spcOid;
163 65536 : fctx->record[i].reldatabase = bufHdr->tag.dbOid;
164 65536 : fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
165 65536 : fctx->record[i].blocknum = bufHdr->tag.blockNum;
166 65536 : fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
167 65536 : fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
168 :
169 65536 : if (buf_state & BM_DIRTY)
170 3748 : fctx->record[i].isdirty = true;
171 : else
172 61788 : fctx->record[i].isdirty = false;
173 :
174 : /* Note if the buffer is valid, and has storage created */
175 65536 : if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
176 7308 : fctx->record[i].isvalid = true;
177 : else
178 58228 : fctx->record[i].isvalid = false;
179 :
180 65536 : UnlockBufHdr(bufHdr, buf_state);
181 : }
182 : }
183 :
184 65540 : funcctx = SRF_PERCALL_SETUP();
185 :
186 : /* Get the saved state */
187 65540 : fctx = funcctx->user_fctx;
188 :
189 65540 : if (funcctx->call_cntr < funcctx->max_calls)
190 : {
191 65536 : uint32 i = funcctx->call_cntr;
192 : Datum values[NUM_BUFFERCACHE_PAGES_ELEM];
193 : bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
194 :
195 65536 : values[0] = Int32GetDatum(fctx->record[i].bufferid);
196 65536 : nulls[0] = false;
197 :
198 : /*
199 : * Set all fields except the bufferid to null if the buffer is unused
200 : * or not valid.
201 : */
202 65536 : if (fctx->record[i].blocknum == InvalidBlockNumber ||
203 7308 : fctx->record[i].isvalid == false)
204 : {
205 58228 : nulls[1] = true;
206 58228 : nulls[2] = true;
207 58228 : nulls[3] = true;
208 58228 : nulls[4] = true;
209 58228 : nulls[5] = true;
210 58228 : nulls[6] = true;
211 58228 : nulls[7] = true;
212 : /* unused for v1.0 callers, but the array is always long enough */
213 58228 : nulls[8] = true;
214 : }
215 : else
216 : {
217 7308 : values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber);
218 7308 : nulls[1] = false;
219 7308 : values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
220 7308 : nulls[2] = false;
221 7308 : values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
222 7308 : nulls[3] = false;
223 7308 : values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
224 7308 : nulls[4] = false;
225 7308 : values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
226 7308 : nulls[5] = false;
227 7308 : values[6] = BoolGetDatum(fctx->record[i].isdirty);
228 7308 : nulls[6] = false;
229 7308 : values[7] = Int16GetDatum(fctx->record[i].usagecount);
230 7308 : nulls[7] = false;
231 : /* unused for v1.0 callers, but the array is always long enough */
232 7308 : values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
233 7308 : nulls[8] = false;
234 : }
235 :
236 : /* Build and return the tuple. */
237 65536 : tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
238 65536 : result = HeapTupleGetDatum(tuple);
239 :
240 65536 : SRF_RETURN_NEXT(funcctx, result);
241 : }
242 : else
243 4 : SRF_RETURN_DONE(funcctx);
244 : }
245 :
246 : Datum
247 4 : pg_buffercache_summary(PG_FUNCTION_ARGS)
248 : {
249 : Datum result;
250 : TupleDesc tupledesc;
251 : HeapTuple tuple;
252 : Datum values[NUM_BUFFERCACHE_SUMMARY_ELEM];
253 : bool nulls[NUM_BUFFERCACHE_SUMMARY_ELEM];
254 :
255 4 : int32 buffers_used = 0;
256 4 : int32 buffers_unused = 0;
257 4 : int32 buffers_dirty = 0;
258 4 : int32 buffers_pinned = 0;
259 4 : int64 usagecount_total = 0;
260 :
261 4 : if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
262 0 : elog(ERROR, "return type must be a row type");
263 :
264 65540 : for (int i = 0; i < NBuffers; i++)
265 : {
266 : BufferDesc *bufHdr;
267 : uint32 buf_state;
268 :
269 : /*
270 : * This function summarizes the state of all headers. Locking the
271 : * buffer headers wouldn't provide an improved result as the state of
272 : * the buffer can still change after we release the lock and it'd
273 : * noticeably increase the cost of the function.
274 : */
275 65536 : bufHdr = GetBufferDescriptor(i);
276 65536 : buf_state = pg_atomic_read_u32(&bufHdr->state);
277 :
278 65536 : if (buf_state & BM_VALID)
279 : {
280 7308 : buffers_used++;
281 7308 : usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
282 :
283 7308 : if (buf_state & BM_DIRTY)
284 3748 : buffers_dirty++;
285 : }
286 : else
287 58228 : buffers_unused++;
288 :
289 65536 : if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
290 0 : buffers_pinned++;
291 : }
292 :
293 4 : memset(nulls, 0, sizeof(nulls));
294 4 : values[0] = Int32GetDatum(buffers_used);
295 4 : values[1] = Int32GetDatum(buffers_unused);
296 4 : values[2] = Int32GetDatum(buffers_dirty);
297 4 : values[3] = Int32GetDatum(buffers_pinned);
298 :
299 4 : if (buffers_used != 0)
300 4 : values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
301 : else
302 0 : nulls[4] = true;
303 :
304 : /* Build and return the tuple. */
305 4 : tuple = heap_form_tuple(tupledesc, values, nulls);
306 4 : result = HeapTupleGetDatum(tuple);
307 :
308 4 : PG_RETURN_DATUM(result);
309 : }
310 :
311 : Datum
312 4 : pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
313 : {
314 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
315 4 : int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
316 4 : int dirty[BM_MAX_USAGE_COUNT + 1] = {0};
317 4 : int pinned[BM_MAX_USAGE_COUNT + 1] = {0};
318 : Datum values[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM];
319 4 : bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
320 :
321 4 : InitMaterializedSRF(fcinfo, 0);
322 :
323 65540 : for (int i = 0; i < NBuffers; i++)
324 : {
325 65536 : BufferDesc *bufHdr = GetBufferDescriptor(i);
326 65536 : uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
327 : int usage_count;
328 :
329 65536 : usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
330 65536 : usage_counts[usage_count]++;
331 :
332 65536 : if (buf_state & BM_DIRTY)
333 3748 : dirty[usage_count]++;
334 :
335 65536 : if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
336 0 : pinned[usage_count]++;
337 : }
338 :
339 28 : for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
340 : {
341 24 : values[0] = Int32GetDatum(i);
342 24 : values[1] = Int32GetDatum(usage_counts[i]);
343 24 : values[2] = Int32GetDatum(dirty[i]);
344 24 : values[3] = Int32GetDatum(pinned[i]);
345 :
346 24 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
347 : }
348 :
349 4 : return (Datum) 0;
350 : }
351 :
352 : /*
353 : * Try to evict a shared buffer.
354 : */
355 : Datum
356 0 : pg_buffercache_evict(PG_FUNCTION_ARGS)
357 : {
358 0 : Buffer buf = PG_GETARG_INT32(0);
359 :
360 0 : if (!superuser())
361 0 : ereport(ERROR,
362 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
363 : errmsg("must be superuser to use pg_buffercache_evict function")));
364 :
365 0 : if (buf < 1 || buf > NBuffers)
366 0 : elog(ERROR, "bad buffer ID: %d", buf);
367 :
368 0 : PG_RETURN_BOOL(EvictUnpinnedBuffer(buf));
369 : }
|