Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_io.c
4 : * Implementation of IO statistics.
5 : *
6 : * This file contains the implementation of IO statistics. It is kept separate
7 : * from pgstat.c to enforce the line between the statistics access / storage
8 : * implementation and the details about individual types of statistics.
9 : *
10 : * Copyright (c) 2021-2024, PostgreSQL Global Development Group
11 : *
12 : * IDENTIFICATION
13 : * src/backend/utils/activity/pgstat_io.c
14 : * -------------------------------------------------------------------------
15 : */
16 :
17 : #include "postgres.h"
18 :
19 : #include "executor/instrument.h"
20 : #include "storage/bufmgr.h"
21 : #include "utils/pgstat_internal.h"
22 :
23 :
24 : typedef struct PgStat_PendingIO
25 : {
26 : PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
27 : instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
28 : } PgStat_PendingIO;
29 :
30 :
31 : static PgStat_PendingIO PendingIOStats;
32 : static bool have_iostats = false;
33 :
34 :
35 : /*
36 : * Check that stats have not been counted for any combination of IOObject,
37 : * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
38 : * stats are tracked for this combination and IO times are non-zero, counts
39 : * should be non-zero.
40 : *
41 : * The passed-in PgStat_BktypeIO must contain stats from the BackendType
42 : * specified by the second parameter. Caller is responsible for locking the
43 : * passed-in PgStat_BktypeIO, if needed.
44 : */
45 : bool
46 0 : pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
47 : BackendType bktype)
48 : {
49 0 : for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
50 : {
51 0 : for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
52 : {
53 0 : for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
54 : {
55 : /* we do track it */
56 0 : if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
57 : {
58 : /* ensure that if IO times are non-zero, counts are > 0 */
59 0 : if (backend_io->times[io_object][io_context][io_op] != 0 &&
60 0 : backend_io->counts[io_object][io_context][io_op] <= 0)
61 0 : return false;
62 :
63 0 : continue;
64 : }
65 :
66 : /* we don't track it, and it is not 0 */
67 0 : if (backend_io->counts[io_object][io_context][io_op] != 0)
68 0 : return false;
69 : }
70 : }
71 : }
72 :
73 0 : return true;
74 : }
75 :
76 : void
77 108011756 : pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
78 : {
79 108011756 : pgstat_count_io_op_n(io_object, io_context, io_op, 1);
80 108011756 : }
81 :
82 : void
83 111440840 : pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
84 : {
85 : Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
86 : Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
87 : Assert((unsigned int) io_op < IOOP_NUM_TYPES);
88 : Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
89 :
90 111440840 : PendingIOStats.counts[io_object][io_context][io_op] += cnt;
91 :
92 111440840 : have_iostats = true;
93 111440840 : }
94 :
95 : /*
96 : * Initialize the internal timing for an IO operation, depending on an
97 : * IO timing GUC.
98 : */
99 : instr_time
100 3429114 : pgstat_prepare_io_time(bool track_io_guc)
101 : {
102 : instr_time io_start;
103 :
104 3429114 : if (track_io_guc)
105 0 : INSTR_TIME_SET_CURRENT(io_start);
106 : else
107 : {
108 : /*
109 : * There is no need to set io_start when an IO timing GUC is disabled,
110 : * still initialize it to zero to avoid compiler warnings.
111 : */
112 3429114 : INSTR_TIME_SET_ZERO(io_start);
113 : }
114 :
115 3429114 : return io_start;
116 : }
117 :
118 : /*
119 : * Like pgstat_count_io_op_n() except it also accumulates time.
120 : */
121 : void
122 3429084 : pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
123 : instr_time start_time, uint32 cnt)
124 : {
125 3429084 : if (track_io_timing)
126 : {
127 : instr_time io_time;
128 :
129 0 : INSTR_TIME_SET_CURRENT(io_time);
130 0 : INSTR_TIME_SUBTRACT(io_time, start_time);
131 :
132 0 : if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND)
133 : {
134 0 : pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
135 0 : if (io_object == IOOBJECT_RELATION)
136 0 : INSTR_TIME_ADD(pgBufferUsage.shared_blk_write_time, io_time);
137 0 : else if (io_object == IOOBJECT_TEMP_RELATION)
138 0 : INSTR_TIME_ADD(pgBufferUsage.local_blk_write_time, io_time);
139 : }
140 0 : else if (io_op == IOOP_READ)
141 : {
142 0 : pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
143 0 : if (io_object == IOOBJECT_RELATION)
144 0 : INSTR_TIME_ADD(pgBufferUsage.shared_blk_read_time, io_time);
145 0 : else if (io_object == IOOBJECT_TEMP_RELATION)
146 0 : INSTR_TIME_ADD(pgBufferUsage.local_blk_read_time, io_time);
147 : }
148 :
149 0 : INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
150 : io_time);
151 : }
152 :
153 3429084 : pgstat_count_io_op_n(io_object, io_context, io_op, cnt);
154 3429084 : }
155 :
156 : PgStat_IO *
157 112 : pgstat_fetch_stat_io(void)
158 : {
159 112 : pgstat_snapshot_fixed(PGSTAT_KIND_IO);
160 :
161 112 : return &pgStatLocal.snapshot.io;
162 : }
163 :
164 : /*
165 : * Check if there any IO stats waiting for flush.
166 : */
167 : bool
168 13266 : pgstat_io_have_pending_cb(void)
169 : {
170 13266 : return have_iostats;
171 : }
172 :
173 : /*
174 : * Simpler wrapper of pgstat_io_flush_cb()
175 : */
176 : void
177 171200 : pgstat_flush_io(bool nowait)
178 : {
179 171200 : (void) pgstat_io_flush_cb(nowait);
180 171200 : }
181 :
182 : /*
183 : * Flush out locally pending IO statistics
184 : *
185 : * If no stats have been recorded, this function returns false.
186 : *
187 : * If nowait is true, this function returns true if the lock could not be
188 : * acquired. Otherwise, return false.
189 : */
190 : bool
191 233402 : pgstat_io_flush_cb(bool nowait)
192 : {
193 : LWLock *bktype_lock;
194 : PgStat_BktypeIO *bktype_shstats;
195 :
196 233402 : if (!have_iostats)
197 54794 : return false;
198 :
199 178608 : bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
200 178608 : bktype_shstats =
201 178608 : &pgStatLocal.shmem->io.stats.stats[MyBackendType];
202 :
203 178608 : if (!nowait)
204 152080 : LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
205 26528 : else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
206 0 : return true;
207 :
208 535824 : for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
209 : {
210 1786080 : for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
211 : {
212 12859776 : for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
213 : {
214 : instr_time time;
215 :
216 11430912 : bktype_shstats->counts[io_object][io_context][io_op] +=
217 11430912 : PendingIOStats.counts[io_object][io_context][io_op];
218 :
219 11430912 : time = PendingIOStats.pending_times[io_object][io_context][io_op];
220 :
221 11430912 : bktype_shstats->times[io_object][io_context][io_op] +=
222 11430912 : INSTR_TIME_GET_MICROSEC(time);
223 : }
224 : }
225 : }
226 :
227 : Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType));
228 :
229 178608 : LWLockRelease(bktype_lock);
230 :
231 178608 : memset(&PendingIOStats, 0, sizeof(PendingIOStats));
232 :
233 178608 : have_iostats = false;
234 :
235 178608 : return false;
236 : }
237 :
238 : const char *
239 8960 : pgstat_get_io_context_name(IOContext io_context)
240 : {
241 8960 : switch (io_context)
242 : {
243 2240 : case IOCONTEXT_BULKREAD:
244 2240 : return "bulkread";
245 2240 : case IOCONTEXT_BULKWRITE:
246 2240 : return "bulkwrite";
247 2240 : case IOCONTEXT_NORMAL:
248 2240 : return "normal";
249 2240 : case IOCONTEXT_VACUUM:
250 2240 : return "vacuum";
251 : }
252 :
253 0 : elog(ERROR, "unrecognized IOContext value: %d", io_context);
254 : pg_unreachable();
255 : }
256 :
257 : const char *
258 2240 : pgstat_get_io_object_name(IOObject io_object)
259 : {
260 2240 : switch (io_object)
261 : {
262 1120 : case IOOBJECT_RELATION:
263 1120 : return "relation";
264 1120 : case IOOBJECT_TEMP_RELATION:
265 1120 : return "temp relation";
266 : }
267 :
268 0 : elog(ERROR, "unrecognized IOObject value: %d", io_object);
269 : pg_unreachable();
270 : }
271 :
272 : void
273 1902 : pgstat_io_init_shmem_cb(void *stats)
274 : {
275 1902 : PgStatShared_IO *stat_shmem = (PgStatShared_IO *) stats;
276 :
277 34236 : for (int i = 0; i < BACKEND_NUM_TYPES; i++)
278 32334 : LWLockInitialize(&stat_shmem->locks[i], LWTRANCHE_PGSTATS_DATA);
279 1902 : }
280 :
281 : void
282 464 : pgstat_io_reset_all_cb(TimestampTz ts)
283 : {
284 8352 : for (int i = 0; i < BACKEND_NUM_TYPES; i++)
285 : {
286 7888 : LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
287 7888 : PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
288 :
289 7888 : LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
290 :
291 : /*
292 : * Use the lock in the first BackendType's PgStat_BktypeIO to protect
293 : * the reset timestamp as well.
294 : */
295 7888 : if (i == 0)
296 464 : pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts;
297 :
298 7888 : memset(bktype_shstats, 0, sizeof(*bktype_shstats));
299 7888 : LWLockRelease(bktype_lock);
300 : }
301 464 : }
302 :
303 : void
304 1246 : pgstat_io_snapshot_cb(void)
305 : {
306 22428 : for (int i = 0; i < BACKEND_NUM_TYPES; i++)
307 : {
308 21182 : LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
309 21182 : PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
310 21182 : PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
311 :
312 21182 : LWLockAcquire(bktype_lock, LW_SHARED);
313 :
314 : /*
315 : * Use the lock in the first BackendType's PgStat_BktypeIO to protect
316 : * the reset timestamp as well.
317 : */
318 21182 : if (i == 0)
319 1246 : pgStatLocal.snapshot.io.stat_reset_timestamp =
320 1246 : pgStatLocal.shmem->io.stats.stat_reset_timestamp;
321 :
322 : /* using struct assignment due to better type safety */
323 21182 : *bktype_snap = *bktype_shstats;
324 21182 : LWLockRelease(bktype_lock);
325 : }
326 1246 : }
327 :
328 : /*
329 : * IO statistics are not collected for all BackendTypes.
330 : *
331 : * The following BackendTypes do not participate in the cumulative stats
332 : * subsystem or do not perform IO on which we currently track:
333 : * - Dead-end backend because it is not connected to shared memory and
334 : * doesn't do any IO
335 : * - Syslogger because it is not connected to shared memory
336 : * - Archiver because most relevant archiving IO is delegated to a
337 : * specialized command or module
338 : * - WAL Receiver, WAL Writer, and WAL Summarizer IO are not tracked in
339 : * pg_stat_io for now
340 : *
341 : * Function returns true if BackendType participates in the cumulative stats
342 : * subsystem for IO and false if it does not.
343 : *
344 : * When adding a new BackendType, also consider adding relevant restrictions to
345 : * pgstat_tracks_io_object() and pgstat_tracks_io_op().
346 : */
347 : bool
348 42224 : pgstat_tracks_io_bktype(BackendType bktype)
349 : {
350 : /*
351 : * List every type so that new backend types trigger a warning about
352 : * needing to adjust this switch.
353 : */
354 42224 : switch (bktype)
355 : {
356 784 : case B_INVALID:
357 : case B_DEAD_END_BACKEND:
358 : case B_ARCHIVER:
359 : case B_LOGGER:
360 : case B_WAL_RECEIVER:
361 : case B_WAL_WRITER:
362 : case B_WAL_SUMMARIZER:
363 784 : return false;
364 :
365 41440 : case B_AUTOVAC_LAUNCHER:
366 : case B_AUTOVAC_WORKER:
367 : case B_BACKEND:
368 : case B_BG_WORKER:
369 : case B_BG_WRITER:
370 : case B_CHECKPOINTER:
371 : case B_SLOTSYNC_WORKER:
372 : case B_STANDALONE_BACKEND:
373 : case B_STARTUP:
374 : case B_WAL_SENDER:
375 41440 : return true;
376 : }
377 :
378 0 : return false;
379 : }
380 :
381 : /*
382 : * Some BackendTypes do not perform IO on certain IOObjects or in certain
383 : * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
384 : * that the given BackendType is expected to do IO in the given IOContext and
385 : * on the given IOObject and that the given IOObject is expected to be operated
386 : * on in the given IOContext.
387 : */
388 : bool
389 40320 : pgstat_tracks_io_object(BackendType bktype, IOObject io_object,
390 : IOContext io_context)
391 : {
392 : bool no_temp_rel;
393 :
394 : /*
395 : * Some BackendTypes should never track IO statistics.
396 : */
397 40320 : if (!pgstat_tracks_io_bktype(bktype))
398 0 : return false;
399 :
400 : /*
401 : * Currently, IO on temporary relations can only occur in the
402 : * IOCONTEXT_NORMAL IOContext.
403 : */
404 40320 : if (io_context != IOCONTEXT_NORMAL &&
405 : io_object == IOOBJECT_TEMP_RELATION)
406 3360 : return false;
407 :
408 : /*
409 : * In core Postgres, only regular backends and WAL Sender processes
410 : * executing queries will use local buffers and operate on temporary
411 : * relations. Parallel workers will not use local buffers (see
412 : * InitLocalBuffers()); however, extensions leveraging background workers
413 : * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
414 : * BackendType B_BG_WORKER.
415 : */
416 34608 : no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
417 31696 : bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
418 71568 : bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
419 :
420 36960 : if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
421 : io_object == IOOBJECT_TEMP_RELATION)
422 672 : return false;
423 :
424 : /*
425 : * Some BackendTypes do not currently perform any IO in certain
426 : * IOContexts, and, while it may not be inherently incorrect for them to
427 : * do so, excluding those rows from the view makes the view easier to use.
428 : */
429 36288 : if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
430 2464 : (io_context == IOCONTEXT_BULKREAD ||
431 2240 : io_context == IOCONTEXT_BULKWRITE ||
432 : io_context == IOCONTEXT_VACUUM))
433 672 : return false;
434 :
435 35616 : if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
436 112 : return false;
437 :
438 35504 : if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
439 : io_context == IOCONTEXT_BULKWRITE)
440 224 : return false;
441 :
442 35280 : return true;
443 : }
444 :
445 : /*
446 : * Some BackendTypes will never do certain IOOps and some IOOps should not
447 : * occur in certain IOContexts or on certain IOObjects. Check that the given
448 : * IOOp is valid for the given BackendType in the given IOContext and on the
449 : * given IOObject. Note that there are currently no cases of an IOOp being
450 : * invalid for a particular BackendType only within a certain IOContext and/or
451 : * only on a certain IOObject.
452 : */
453 : bool
454 31360 : pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
455 : IOContext io_context, IOOp io_op)
456 : {
457 : bool strategy_io_context;
458 :
459 : /* if (io_context, io_object) will never collect stats, we're done */
460 31360 : if (!pgstat_tracks_io_object(bktype, io_object, io_context))
461 0 : return false;
462 :
463 : /*
464 : * Some BackendTypes will not do certain IOOps.
465 : */
466 31360 : if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
467 1568 : (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
468 672 : return false;
469 :
470 30688 : if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
471 2912 : bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
472 448 : return false;
473 :
474 : /*
475 : * Temporary tables are not logged and thus do not require fsync'ing.
476 : * Writeback is not requested for temporary tables.
477 : */
478 30240 : if (io_object == IOOBJECT_TEMP_RELATION &&
479 3136 : (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
480 896 : return false;
481 :
482 : /*
483 : * Some IOOps are not valid in certain IOContexts and some IOOps are only
484 : * valid in certain contexts.
485 : */
486 29344 : if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
487 784 : return false;
488 :
489 22288 : strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
490 50848 : io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
491 :
492 : /*
493 : * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
494 : */
495 28560 : if (!strategy_io_context && io_op == IOOP_REUSE)
496 1568 : return false;
497 :
498 : /*
499 : * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
500 : * counted in the IOCONTEXT_NORMAL IOContext. See comment in
501 : * register_dirty_segment() for more details.
502 : */
503 26992 : if (strategy_io_context && io_op == IOOP_FSYNC)
504 2352 : return false;
505 :
506 :
507 24640 : return true;
508 : }
|