Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_io.c
4 : * Implementation of IO statistics.
5 : *
6 : * This file contains the implementation of IO statistics. It is kept separate
7 : * from pgstat.c to enforce the line between the statistics access / storage
8 : * implementation and the details about individual types of statistics.
9 : *
10 : * Copyright (c) 2021-2023, PostgreSQL Global Development Group
11 : *
12 : * IDENTIFICATION
13 : * src/backend/utils/activity/pgstat_io.c
14 : * -------------------------------------------------------------------------
15 : */
16 :
17 : #include "postgres.h"
18 :
19 : #include "executor/instrument.h"
20 : #include "storage/bufmgr.h"
21 : #include "utils/pgstat_internal.h"
22 :
23 :
24 : typedef struct PgStat_PendingIO
25 : {
26 : PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
27 : instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
28 : } PgStat_PendingIO;
29 :
30 :
31 : static PgStat_PendingIO PendingIOStats;
32 : bool have_iostats = false;
33 :
34 :
35 : /*
36 : * Check that stats have not been counted for any combination of IOObject,
37 : * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
38 : * stats are tracked for this combination and IO times are non-zero, counts
39 : * should be non-zero.
40 : *
41 : * The passed-in PgStat_BktypeIO must contain stats from the BackendType
42 : * specified by the second parameter. Caller is responsible for locking the
43 : * passed-in PgStat_BktypeIO, if needed.
44 : */
45 : bool
46 0 : pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
47 : BackendType bktype)
48 : {
49 0 : for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
50 : {
51 0 : for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
52 : {
53 0 : for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
54 : {
55 : /* we do track it */
56 0 : if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
57 : {
58 : /* ensure that if IO times are non-zero, counts are > 0 */
59 0 : if (backend_io->times[io_object][io_context][io_op] != 0 &&
60 0 : backend_io->counts[io_object][io_context][io_op] <= 0)
61 0 : return false;
62 :
63 0 : continue;
64 : }
65 :
66 : /* we don't track it, and it is not 0 */
67 0 : if (backend_io->counts[io_object][io_context][io_op] != 0)
68 0 : return false;
69 : }
70 : }
71 : }
72 :
73 0 : return true;
74 : }
75 :
76 : void
77 89505330 : pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
78 : {
79 89505330 : pgstat_count_io_op_n(io_object, io_context, io_op, 1);
80 89505330 : }
81 :
82 : void
83 92741840 : pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
84 : {
85 : Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
86 : Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
87 : Assert((unsigned int) io_op < IOOP_NUM_TYPES);
88 : Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
89 :
90 92741840 : PendingIOStats.counts[io_object][io_context][io_op] += cnt;
91 :
92 92741840 : have_iostats = true;
93 92741840 : }
94 :
95 : instr_time
96 3236540 : pgstat_prepare_io_time(void)
97 : {
98 : instr_time io_start;
99 :
100 3236540 : if (track_io_timing)
101 4 : INSTR_TIME_SET_CURRENT(io_start);
102 : else
103 3236536 : INSTR_TIME_SET_ZERO(io_start);
104 :
105 3236540 : return io_start;
106 : }
107 :
108 : /*
109 : * Like pgstat_count_io_op_n() except it also accumulates time.
110 : */
111 : void
112 3236510 : pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
113 : instr_time start_time, uint32 cnt)
114 : {
115 3236510 : if (track_io_timing)
116 : {
117 : instr_time io_time;
118 :
119 4 : INSTR_TIME_SET_CURRENT(io_time);
120 4 : INSTR_TIME_SUBTRACT(io_time, start_time);
121 :
122 4 : if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND)
123 : {
124 2 : pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
125 2 : if (io_object == IOOBJECT_RELATION)
126 2 : INSTR_TIME_ADD(pgBufferUsage.shared_blk_write_time, io_time);
127 0 : else if (io_object == IOOBJECT_TEMP_RELATION)
128 0 : INSTR_TIME_ADD(pgBufferUsage.local_blk_write_time, io_time);
129 : }
130 2 : else if (io_op == IOOP_READ)
131 : {
132 2 : pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
133 2 : if (io_object == IOOBJECT_RELATION)
134 2 : INSTR_TIME_ADD(pgBufferUsage.shared_blk_read_time, io_time);
135 0 : else if (io_object == IOOBJECT_TEMP_RELATION)
136 0 : INSTR_TIME_ADD(pgBufferUsage.local_blk_read_time, io_time);
137 : }
138 :
139 4 : INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
140 : io_time);
141 : }
142 :
143 3236510 : pgstat_count_io_op_n(io_object, io_context, io_op, cnt);
144 3236510 : }
145 :
146 : PgStat_IO *
147 112 : pgstat_fetch_stat_io(void)
148 : {
149 112 : pgstat_snapshot_fixed(PGSTAT_KIND_IO);
150 :
151 112 : return &pgStatLocal.snapshot.io;
152 : }
153 :
154 : /*
155 : * Flush out locally pending IO statistics
156 : *
157 : * If no stats have been recorded, this function returns false.
158 : *
159 : * If nowait is true, this function returns true if the lock could not be
160 : * acquired. Otherwise, return false.
161 : */
162 : bool
163 119946 : pgstat_flush_io(bool nowait)
164 : {
165 : LWLock *bktype_lock;
166 : PgStat_BktypeIO *bktype_shstats;
167 :
168 119946 : if (!have_iostats)
169 44542 : return false;
170 :
171 75404 : bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
172 75404 : bktype_shstats =
173 75404 : &pgStatLocal.shmem->io.stats.stats[MyBackendType];
174 :
175 75404 : if (!nowait)
176 55078 : LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
177 20326 : else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
178 0 : return true;
179 :
180 226212 : for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
181 : {
182 754040 : for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
183 : {
184 5429088 : for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
185 : {
186 : instr_time time;
187 :
188 4825856 : bktype_shstats->counts[io_object][io_context][io_op] +=
189 4825856 : PendingIOStats.counts[io_object][io_context][io_op];
190 :
191 4825856 : time = PendingIOStats.pending_times[io_object][io_context][io_op];
192 :
193 4825856 : bktype_shstats->times[io_object][io_context][io_op] +=
194 4825856 : INSTR_TIME_GET_MICROSEC(time);
195 : }
196 : }
197 : }
198 :
199 : Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType));
200 :
201 75404 : LWLockRelease(bktype_lock);
202 :
203 75404 : memset(&PendingIOStats, 0, sizeof(PendingIOStats));
204 :
205 75404 : have_iostats = false;
206 :
207 75404 : return false;
208 : }
209 :
210 : const char *
211 8064 : pgstat_get_io_context_name(IOContext io_context)
212 : {
213 8064 : switch (io_context)
214 : {
215 2016 : case IOCONTEXT_BULKREAD:
216 2016 : return "bulkread";
217 2016 : case IOCONTEXT_BULKWRITE:
218 2016 : return "bulkwrite";
219 2016 : case IOCONTEXT_NORMAL:
220 2016 : return "normal";
221 2016 : case IOCONTEXT_VACUUM:
222 2016 : return "vacuum";
223 : }
224 :
225 0 : elog(ERROR, "unrecognized IOContext value: %d", io_context);
226 : pg_unreachable();
227 : }
228 :
229 : const char *
230 2016 : pgstat_get_io_object_name(IOObject io_object)
231 : {
232 2016 : switch (io_object)
233 : {
234 1008 : case IOOBJECT_RELATION:
235 1008 : return "relation";
236 1008 : case IOOBJECT_TEMP_RELATION:
237 1008 : return "temp relation";
238 : }
239 :
240 0 : elog(ERROR, "unrecognized IOObject value: %d", io_object);
241 : pg_unreachable();
242 : }
243 :
244 : void
245 378 : pgstat_io_reset_all_cb(TimestampTz ts)
246 : {
247 5670 : for (int i = 0; i < BACKEND_NUM_TYPES; i++)
248 : {
249 5292 : LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
250 5292 : PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
251 :
252 5292 : LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
253 :
254 : /*
255 : * Use the lock in the first BackendType's PgStat_BktypeIO to protect
256 : * the reset timestamp as well.
257 : */
258 5292 : if (i == 0)
259 378 : pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts;
260 :
261 5292 : memset(bktype_shstats, 0, sizeof(*bktype_shstats));
262 5292 : LWLockRelease(bktype_lock);
263 : }
264 378 : }
265 :
266 : void
267 1040 : pgstat_io_snapshot_cb(void)
268 : {
269 15600 : for (int i = 0; i < BACKEND_NUM_TYPES; i++)
270 : {
271 14560 : LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
272 14560 : PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
273 14560 : PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
274 :
275 14560 : LWLockAcquire(bktype_lock, LW_SHARED);
276 :
277 : /*
278 : * Use the lock in the first BackendType's PgStat_BktypeIO to protect
279 : * the reset timestamp as well.
280 : */
281 14560 : if (i == 0)
282 1040 : pgStatLocal.snapshot.io.stat_reset_timestamp =
283 1040 : pgStatLocal.shmem->io.stats.stat_reset_timestamp;
284 :
285 : /* using struct assignment due to better type safety */
286 14560 : *bktype_snap = *bktype_shstats;
287 14560 : LWLockRelease(bktype_lock);
288 : }
289 1040 : }
290 :
291 : /*
292 : * IO statistics are not collected for all BackendTypes.
293 : *
294 : * The following BackendTypes do not participate in the cumulative stats
295 : * subsystem or do not perform IO on which we currently track:
296 : * - Syslogger because it is not connected to shared memory
297 : * - Archiver because most relevant archiving IO is delegated to a
298 : * specialized command or module
299 : * - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now
300 : *
301 : * Function returns true if BackendType participates in the cumulative stats
302 : * subsystem for IO and false if it does not.
303 : *
304 : * When adding a new BackendType, also consider adding relevant restrictions to
305 : * pgstat_tracks_io_object() and pgstat_tracks_io_op().
306 : */
307 : bool
308 36512 : pgstat_tracks_io_bktype(BackendType bktype)
309 : {
310 : /*
311 : * List every type so that new backend types trigger a warning about
312 : * needing to adjust this switch.
313 : */
314 36512 : switch (bktype)
315 : {
316 560 : case B_INVALID:
317 : case B_ARCHIVER:
318 : case B_LOGGER:
319 : case B_WAL_RECEIVER:
320 : case B_WAL_WRITER:
321 560 : return false;
322 :
323 35952 : case B_AUTOVAC_LAUNCHER:
324 : case B_AUTOVAC_WORKER:
325 : case B_BACKEND:
326 : case B_BG_WORKER:
327 : case B_BG_WRITER:
328 : case B_CHECKPOINTER:
329 : case B_STANDALONE_BACKEND:
330 : case B_STARTUP:
331 : case B_WAL_SENDER:
332 35952 : return true;
333 : }
334 :
335 0 : return false;
336 : }
337 :
338 : /*
339 : * Some BackendTypes do not perform IO on certain IOObjects or in certain
340 : * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
341 : * that the given BackendType is expected to do IO in the given IOContext and
342 : * on the given IOObject and that the given IOObject is expected to be operated
343 : * on in the given IOContext.
344 : */
345 : bool
346 34944 : pgstat_tracks_io_object(BackendType bktype, IOObject io_object,
347 : IOContext io_context)
348 : {
349 : bool no_temp_rel;
350 :
351 : /*
352 : * Some BackendTypes should never track IO statistics.
353 : */
354 34944 : if (!pgstat_tracks_io_bktype(bktype))
355 0 : return false;
356 :
357 : /*
358 : * Currently, IO on temporary relations can only occur in the
359 : * IOCONTEXT_NORMAL IOContext.
360 : */
361 34944 : if (io_context != IOCONTEXT_NORMAL &&
362 : io_object == IOOBJECT_TEMP_RELATION)
363 3024 : return false;
364 :
365 : /*
366 : * In core Postgres, only regular backends and WAL Sender processes
367 : * executing queries will use local buffers and operate on temporary
368 : * relations. Parallel workers will not use local buffers (see
369 : * InitLocalBuffers()); however, extensions leveraging background workers
370 : * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
371 : * BackendType B_BG_WORKER.
372 : */
373 29568 : no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
374 26656 : bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
375 61488 : bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
376 :
377 31920 : if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
378 : io_object == IOOBJECT_TEMP_RELATION)
379 672 : return false;
380 :
381 : /*
382 : * Some BackendTypes do not currently perform any IO in certain
383 : * IOContexts, and, while it may not be inherently incorrect for them to
384 : * do so, excluding those rows from the view makes the view easier to use.
385 : */
386 31248 : if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
387 2464 : (io_context == IOCONTEXT_BULKREAD ||
388 2240 : io_context == IOCONTEXT_BULKWRITE ||
389 : io_context == IOCONTEXT_VACUUM))
390 672 : return false;
391 :
392 30576 : if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
393 112 : return false;
394 :
395 30464 : if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
396 : io_context == IOCONTEXT_BULKWRITE)
397 224 : return false;
398 :
399 30240 : return true;
400 : }
401 :
402 : /*
403 : * Some BackendTypes will never do certain IOOps and some IOOps should not
404 : * occur in certain IOContexts or on certain IOObjects. Check that the given
405 : * IOOp is valid for the given BackendType in the given IOContext and on the
406 : * given IOObject. Note that there are currently no cases of an IOOp being
407 : * invalid for a particular BackendType only within a certain IOContext and/or
408 : * only on a certain IOObject.
409 : */
410 : bool
411 26880 : pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
412 : IOContext io_context, IOOp io_op)
413 : {
414 : bool strategy_io_context;
415 :
416 : /* if (io_context, io_object) will never collect stats, we're done */
417 26880 : if (!pgstat_tracks_io_object(bktype, io_object, io_context))
418 0 : return false;
419 :
420 : /*
421 : * Some BackendTypes will not do certain IOOps.
422 : */
423 26880 : if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
424 1568 : (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
425 672 : return false;
426 :
427 26208 : if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
428 2912 : bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
429 448 : return false;
430 :
431 : /*
432 : * Temporary tables are not logged and thus do not require fsync'ing.
433 : * Writeback is not requested for temporary tables.
434 : */
435 25760 : if (io_object == IOOBJECT_TEMP_RELATION &&
436 2352 : (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
437 672 : return false;
438 :
439 : /*
440 : * Some IOOps are not valid in certain IOContexts and some IOOps are only
441 : * valid in certain contexts.
442 : */
443 25088 : if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
444 672 : return false;
445 :
446 18928 : strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
447 43344 : io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
448 :
449 : /*
450 : * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
451 : */
452 24416 : if (!strategy_io_context && io_op == IOOP_REUSE)
453 1344 : return false;
454 :
455 : /*
456 : * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
457 : * counted in the IOCONTEXT_NORMAL IOContext. See comment in
458 : * register_dirty_segment() for more details.
459 : */
460 23072 : if (strategy_io_context && io_op == IOOP_FSYNC)
461 2016 : return false;
462 :
463 :
464 21056 : return true;
465 : }
|