Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * instrument.c
4 : * functions for instrumentation of plan execution
5 : *
6 : *
7 : * Copyright (c) 2001-2026, PostgreSQL Global Development Group
8 : *
9 : * IDENTIFICATION
10 : * src/backend/executor/instrument.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include <unistd.h>
17 :
18 : #include "executor/executor.h"
19 : #include "executor/instrument.h"
20 : #include "executor/tuptable.h"
21 : #include "nodes/execnodes.h"
22 : #include "portability/instr_time.h"
23 : #include "utils/guc_hooks.h"
24 :
25 : BufferUsage pgBufferUsage;
26 : static BufferUsage save_pgBufferUsage;
27 : WalUsage pgWalUsage;
28 : static WalUsage save_pgWalUsage;
29 :
30 : static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add);
31 : static void WalUsageAdd(WalUsage *dst, WalUsage *add);
32 :
33 :
34 : /* General purpose instrumentation handling */
35 : Instrumentation *
36 41427 : InstrAlloc(int instrument_options)
37 : {
38 41427 : Instrumentation *instr = palloc0_object(Instrumentation);
39 :
40 41427 : InstrInitOptions(instr, instrument_options);
41 41427 : return instr;
42 : }
43 :
44 : void
45 49535 : InstrInitOptions(Instrumentation *instr, int instrument_options)
46 : {
47 49535 : instr->need_bufusage = (instrument_options & INSTRUMENT_BUFFERS) != 0;
48 49535 : instr->need_walusage = (instrument_options & INSTRUMENT_WAL) != 0;
49 49535 : instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0;
50 49535 : }
51 :
52 : inline void
53 9039923 : InstrStart(Instrumentation *instr)
54 : {
55 9039923 : if (instr->need_timer)
56 : {
57 7140953 : if (!INSTR_TIME_IS_ZERO(instr->starttime))
58 0 : elog(ERROR, "InstrStart called twice in a row");
59 : else
60 7140953 : INSTR_TIME_SET_CURRENT_FAST(instr->starttime);
61 : }
62 :
63 : /* save buffer usage totals at start, if needed */
64 9039923 : if (instr->need_bufusage)
65 7088842 : instr->bufusage_start = pgBufferUsage;
66 :
67 9039923 : if (instr->need_walusage)
68 80708 : instr->walusage_start = pgWalUsage;
69 9039923 : }
70 :
71 : /*
72 : * Helper for InstrStop() and InstrStopNode(), to avoid code duplication
73 : * despite slightly different needs about how time is accumulated.
74 : */
75 : static inline void
76 9038241 : InstrStopCommon(Instrumentation *instr, instr_time *accum_time)
77 : {
78 : instr_time endtime;
79 :
80 : /* update the time only if the timer was requested */
81 9038241 : if (instr->need_timer)
82 : {
83 7139279 : if (INSTR_TIME_IS_ZERO(instr->starttime))
84 0 : elog(ERROR, "InstrStop called without start");
85 :
86 7139279 : INSTR_TIME_SET_CURRENT_FAST(endtime);
87 7139279 : INSTR_TIME_ACCUM_DIFF(*accum_time, endtime, instr->starttime);
88 :
89 7139279 : INSTR_TIME_SET_ZERO(instr->starttime);
90 : }
91 :
92 : /* Add delta of buffer usage since InstrStart to the totals */
93 9038241 : if (instr->need_bufusage)
94 7087168 : BufferUsageAccumDiff(&instr->bufusage,
95 7087168 : &pgBufferUsage, &instr->bufusage_start);
96 :
97 9038241 : if (instr->need_walusage)
98 79034 : WalUsageAccumDiff(&instr->walusage,
99 79034 : &pgWalUsage, &instr->walusage_start);
100 9038241 : }
101 :
102 : void
103 79056 : InstrStop(Instrumentation *instr)
104 : {
105 79056 : InstrStopCommon(instr, &instr->total);
106 79056 : }
107 :
108 : /* Node instrumentation handling */
109 :
110 : /* Allocate new node instrumentation structure */
111 : NodeInstrumentation *
112 6988 : InstrAllocNode(int instrument_options, bool async_mode)
113 : {
114 6988 : NodeInstrumentation *instr = palloc_object(NodeInstrumentation);
115 :
116 6988 : InstrInitNode(instr, instrument_options, async_mode);
117 :
118 6988 : return instr;
119 : }
120 :
121 : /* Initialize a pre-allocated instrumentation structure. */
122 : void
123 8108 : InstrInitNode(NodeInstrumentation *instr, int instrument_options, bool async_mode)
124 : {
125 8108 : memset(instr, 0, sizeof(NodeInstrumentation));
126 8108 : InstrInitOptions(&instr->instr, instrument_options);
127 8108 : instr->async_mode = async_mode;
128 8108 : }
129 :
130 : /* Entry to a plan node */
131 : inline void
132 8959193 : InstrStartNode(NodeInstrumentation *instr)
133 : {
134 8959193 : InstrStart(&instr->instr);
135 8959193 : }
136 :
137 : /* Exit from a plan node */
138 : inline void
139 8959185 : InstrStopNode(NodeInstrumentation *instr, double nTuples)
140 : {
141 8959185 : double save_tuplecount = instr->tuplecount;
142 :
143 : /* count the returned tuples */
144 8959185 : instr->tuplecount += nTuples;
145 :
146 : /*
147 : * Note that in contrast to InstrStop() the time is accumulated into
148 : * NodeInstrumentation->counter, with total only getting updated in
149 : * InstrEndLoop. We need the separate counter variable because we need to
150 : * calculate start-up time for the first tuple in each cycle, and then
151 : * accumulate it together.
152 : */
153 8959185 : InstrStopCommon(&instr->instr, &instr->counter);
154 :
155 : /* Is this the first tuple of this cycle? */
156 8959185 : if (!instr->running)
157 : {
158 34202 : instr->running = true;
159 34202 : instr->firsttuple = instr->counter;
160 : }
161 : else
162 : {
163 : /*
164 : * In async mode, if the plan node hadn't emitted any tuples before,
165 : * this might be the first tuple
166 : */
167 8924983 : if (instr->async_mode && save_tuplecount < 1.0)
168 22 : instr->firsttuple = instr->counter;
169 : }
170 8959185 : }
171 :
172 : /*
173 : * ExecProcNode wrapper that performs instrumentation calls. By keeping
174 : * this a separate function, we avoid overhead in the normal case where
175 : * no instrumentation is wanted.
176 : *
177 : * This is implemented in instrument.c as all the functions it calls directly
178 : * are here, allowing them to be inlined even when not using LTO.
179 : */
180 : TupleTableSlot *
181 8951895 : ExecProcNodeInstr(PlanState *node)
182 : {
183 : TupleTableSlot *result;
184 :
185 8951895 : InstrStartNode(node->instrument);
186 :
187 8951895 : result = node->ExecProcNodeReal(node);
188 :
189 8951887 : InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0);
190 :
191 8951887 : return result;
192 : }
193 :
194 : /* Update tuple count */
195 : void
196 1 : InstrUpdateTupleCount(NodeInstrumentation *instr, double nTuples)
197 : {
198 : /* count the returned tuples */
199 1 : instr->tuplecount += nTuples;
200 1 : }
201 :
202 : /* Finish a run cycle for a plan node */
203 : void
204 35579 : InstrEndLoop(NodeInstrumentation *instr)
205 : {
206 : /* Skip if nothing has happened, or already shut down */
207 35579 : if (!instr->running)
208 1386 : return;
209 :
210 34193 : if (!INSTR_TIME_IS_ZERO(instr->instr.starttime))
211 0 : elog(ERROR, "InstrEndLoop called on running node");
212 :
213 : /* Accumulate per-cycle statistics into totals */
214 34193 : INSTR_TIME_ADD(instr->startup, instr->firsttuple);
215 34193 : INSTR_TIME_ADD(instr->instr.total, instr->counter);
216 34193 : instr->ntuples += instr->tuplecount;
217 34193 : instr->nloops += 1;
218 :
219 : /* Reset for next cycle (if any) */
220 34193 : instr->running = false;
221 34193 : INSTR_TIME_SET_ZERO(instr->instr.starttime);
222 34193 : INSTR_TIME_SET_ZERO(instr->counter);
223 34193 : INSTR_TIME_SET_ZERO(instr->firsttuple);
224 34193 : instr->tuplecount = 0;
225 : }
226 :
227 : /*
228 : * Aggregate instrumentation from parallel workers. Must be called after
229 : * InstrEndLoop.
230 : */
231 : void
232 2697 : InstrAggNode(NodeInstrumentation *dst, NodeInstrumentation *add)
233 : {
234 : Assert(!add->running);
235 :
236 2697 : INSTR_TIME_ADD(dst->startup, add->startup);
237 2697 : INSTR_TIME_ADD(dst->instr.total, add->instr.total);
238 2697 : dst->ntuples += add->ntuples;
239 2697 : dst->ntuples2 += add->ntuples2;
240 2697 : dst->nloops += add->nloops;
241 2697 : dst->nfiltered1 += add->nfiltered1;
242 2697 : dst->nfiltered2 += add->nfiltered2;
243 :
244 2697 : if (dst->instr.need_bufusage)
245 1272 : BufferUsageAdd(&dst->instr.bufusage, &add->instr.bufusage);
246 :
247 2697 : if (dst->instr.need_walusage)
248 0 : WalUsageAdd(&dst->instr.walusage, &add->instr.walusage);
249 2697 : }
250 :
251 : /* Trigger instrumentation handling */
252 : TriggerInstrumentation *
253 0 : InstrAllocTrigger(int n, int instrument_options)
254 : {
255 0 : TriggerInstrumentation *tginstr = palloc0_array(TriggerInstrumentation, n);
256 : int i;
257 :
258 0 : for (i = 0; i < n; i++)
259 0 : InstrInitOptions(&tginstr[i].instr, instrument_options);
260 :
261 0 : return tginstr;
262 : }
263 :
264 : void
265 0 : InstrStartTrigger(TriggerInstrumentation *tginstr)
266 : {
267 0 : InstrStart(&tginstr->instr);
268 0 : }
269 :
270 : void
271 0 : InstrStopTrigger(TriggerInstrumentation *tginstr, int64 firings)
272 : {
273 0 : InstrStop(&tginstr->instr);
274 0 : tginstr->firings += firings;
275 0 : }
276 :
277 : /* note current values during parallel executor startup */
278 : void
279 2015 : InstrStartParallelQuery(void)
280 : {
281 2015 : save_pgBufferUsage = pgBufferUsage;
282 2015 : save_pgWalUsage = pgWalUsage;
283 2015 : }
284 :
285 : /* report usage after parallel executor shutdown */
286 : void
287 2007 : InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
288 : {
289 2007 : memset(bufusage, 0, sizeof(BufferUsage));
290 2007 : BufferUsageAccumDiff(bufusage, &pgBufferUsage, &save_pgBufferUsage);
291 2007 : memset(walusage, 0, sizeof(WalUsage));
292 2007 : WalUsageAccumDiff(walusage, &pgWalUsage, &save_pgWalUsage);
293 2007 : }
294 :
295 : /* accumulate work done by workers in leader's stats */
296 : void
297 2007 : InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
298 : {
299 2007 : BufferUsageAdd(&pgBufferUsage, bufusage);
300 2007 : WalUsageAdd(&pgWalUsage, walusage);
301 2007 : }
302 :
303 : /* dst += add */
304 : static void
305 3279 : BufferUsageAdd(BufferUsage *dst, const BufferUsage *add)
306 : {
307 3279 : dst->shared_blks_hit += add->shared_blks_hit;
308 3279 : dst->shared_blks_read += add->shared_blks_read;
309 3279 : dst->shared_blks_dirtied += add->shared_blks_dirtied;
310 3279 : dst->shared_blks_written += add->shared_blks_written;
311 3279 : dst->local_blks_hit += add->local_blks_hit;
312 3279 : dst->local_blks_read += add->local_blks_read;
313 3279 : dst->local_blks_dirtied += add->local_blks_dirtied;
314 3279 : dst->local_blks_written += add->local_blks_written;
315 3279 : dst->temp_blks_read += add->temp_blks_read;
316 3279 : dst->temp_blks_written += add->temp_blks_written;
317 3279 : INSTR_TIME_ADD(dst->shared_blk_read_time, add->shared_blk_read_time);
318 3279 : INSTR_TIME_ADD(dst->shared_blk_write_time, add->shared_blk_write_time);
319 3279 : INSTR_TIME_ADD(dst->local_blk_read_time, add->local_blk_read_time);
320 3279 : INSTR_TIME_ADD(dst->local_blk_write_time, add->local_blk_write_time);
321 3279 : INSTR_TIME_ADD(dst->temp_blk_read_time, add->temp_blk_read_time);
322 3279 : INSTR_TIME_ADD(dst->temp_blk_write_time, add->temp_blk_write_time);
323 3279 : }
324 :
325 : /* dst += add - sub */
326 : inline void
327 7212297 : BufferUsageAccumDiff(BufferUsage *dst,
328 : const BufferUsage *add,
329 : const BufferUsage *sub)
330 : {
331 7212297 : dst->shared_blks_hit += add->shared_blks_hit - sub->shared_blks_hit;
332 7212297 : dst->shared_blks_read += add->shared_blks_read - sub->shared_blks_read;
333 7212297 : dst->shared_blks_dirtied += add->shared_blks_dirtied - sub->shared_blks_dirtied;
334 7212297 : dst->shared_blks_written += add->shared_blks_written - sub->shared_blks_written;
335 7212297 : dst->local_blks_hit += add->local_blks_hit - sub->local_blks_hit;
336 7212297 : dst->local_blks_read += add->local_blks_read - sub->local_blks_read;
337 7212297 : dst->local_blks_dirtied += add->local_blks_dirtied - sub->local_blks_dirtied;
338 7212297 : dst->local_blks_written += add->local_blks_written - sub->local_blks_written;
339 7212297 : dst->temp_blks_read += add->temp_blks_read - sub->temp_blks_read;
340 7212297 : dst->temp_blks_written += add->temp_blks_written - sub->temp_blks_written;
341 7212297 : INSTR_TIME_ACCUM_DIFF(dst->shared_blk_read_time,
342 : add->shared_blk_read_time, sub->shared_blk_read_time);
343 7212297 : INSTR_TIME_ACCUM_DIFF(dst->shared_blk_write_time,
344 : add->shared_blk_write_time, sub->shared_blk_write_time);
345 7212297 : INSTR_TIME_ACCUM_DIFF(dst->local_blk_read_time,
346 : add->local_blk_read_time, sub->local_blk_read_time);
347 7212297 : INSTR_TIME_ACCUM_DIFF(dst->local_blk_write_time,
348 : add->local_blk_write_time, sub->local_blk_write_time);
349 7212297 : INSTR_TIME_ACCUM_DIFF(dst->temp_blk_read_time,
350 : add->temp_blk_read_time, sub->temp_blk_read_time);
351 7212297 : INSTR_TIME_ACCUM_DIFF(dst->temp_blk_write_time,
352 : add->temp_blk_write_time, sub->temp_blk_write_time);
353 7212297 : }
354 :
355 : /* helper functions for WAL usage accumulation */
356 : static inline void
357 2007 : WalUsageAdd(WalUsage *dst, WalUsage *add)
358 : {
359 2007 : dst->wal_bytes += add->wal_bytes;
360 2007 : dst->wal_records += add->wal_records;
361 2007 : dst->wal_fpi += add->wal_fpi;
362 2007 : dst->wal_fpi_bytes += add->wal_fpi_bytes;
363 2007 : dst->wal_buffers_full += add->wal_buffers_full;
364 2007 : }
365 :
366 : inline void
367 225964 : WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
368 : {
369 225964 : dst->wal_bytes += add->wal_bytes - sub->wal_bytes;
370 225964 : dst->wal_records += add->wal_records - sub->wal_records;
371 225964 : dst->wal_fpi += add->wal_fpi - sub->wal_fpi;
372 225964 : dst->wal_fpi_bytes += add->wal_fpi_bytes - sub->wal_fpi_bytes;
373 225964 : dst->wal_buffers_full += add->wal_buffers_full - sub->wal_buffers_full;
374 225964 : }
375 :
376 : /* GUC hooks for timing_clock_source */
377 :
378 : bool
379 1279 : check_timing_clock_source(int *newval, void **extra, GucSource source)
380 : {
381 : /*
382 : * Do nothing if timing is not initialized. This is only expected on child
383 : * processes in EXEC_BACKEND builds, as GUC hooks can be called during
384 : * InitializeGUCOptions() before InitProcessGlobals() has had a chance to
385 : * run pg_initialize_timing(). Instead, TSC will be initialized via
386 : * restore_backend_variables.
387 : */
388 : #ifdef EXEC_BACKEND
389 : if (!timing_initialized)
390 : return true;
391 : #else
392 : Assert(timing_initialized);
393 : #endif
394 :
395 : #if PG_INSTR_TSC_CLOCK
396 1279 : pg_initialize_timing_tsc();
397 :
398 1279 : if (*newval == TIMING_CLOCK_SOURCE_TSC && timing_tsc_frequency_khz <= 0)
399 : {
400 0 : GUC_check_errdetail("TSC is not supported as timing clock source");
401 0 : return false;
402 : }
403 : #endif
404 :
405 1279 : return true;
406 : }
407 :
408 : void
409 1279 : assign_timing_clock_source(int newval, void *extra)
410 : {
411 : #ifdef EXEC_BACKEND
412 : if (!timing_initialized)
413 : return;
414 : #else
415 : Assert(timing_initialized);
416 : #endif
417 :
418 : /*
419 : * Ignore the return code since the check hook already verified TSC is
420 : * usable if it's explicitly requested.
421 : */
422 1279 : pg_set_timing_clock_source(newval);
423 1279 : }
424 :
425 : const char *
426 1953 : show_timing_clock_source(void)
427 : {
428 1953 : switch (timing_clock_source)
429 : {
430 1953 : case TIMING_CLOCK_SOURCE_AUTO:
431 : #if PG_INSTR_TSC_CLOCK
432 1953 : if (pg_current_timing_clock_source() == TIMING_CLOCK_SOURCE_TSC)
433 1953 : return "auto (tsc)";
434 : #endif
435 0 : return "auto (system)";
436 0 : case TIMING_CLOCK_SOURCE_SYSTEM:
437 0 : return "system";
438 : #if PG_INSTR_TSC_CLOCK
439 0 : case TIMING_CLOCK_SOURCE_TSC:
440 0 : return "tsc";
441 : #endif
442 : }
443 :
444 : /* unreachable */
445 0 : return "?";
446 : }
|