LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat_io.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 135 172 78.5 %
Date: 2025-01-18 04:15:08 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -------------------------------------------------------------------------
       2             :  *
       3             :  * pgstat_io.c
       4             :  *    Implementation of IO statistics.
       5             :  *
       6             :  * This file contains the implementation of IO statistics. It is kept separate
       7             :  * from pgstat.c to enforce the line between the statistics access / storage
       8             :  * implementation and the details about individual types of statistics.
       9             :  *
      10             :  * Copyright (c) 2021-2025, PostgreSQL Global Development Group
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/utils/activity/pgstat_io.c
      14             :  * -------------------------------------------------------------------------
      15             :  */
      16             : 
      17             : #include "postgres.h"
      18             : 
      19             : #include "executor/instrument.h"
      20             : #include "storage/bufmgr.h"
      21             : #include "utils/pgstat_internal.h"
      22             : 
      23             : static PgStat_PendingIO PendingIOStats;
      24             : static bool have_iostats = false;
      25             : 
      26             : /*
      27             :  * Check that stats have not been counted for any combination of IOObject,
      28             :  * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
      29             :  * stats are tracked for this combination and IO times are non-zero, counts
      30             :  * should be non-zero.
      31             :  *
      32             :  * The passed-in PgStat_BktypeIO must contain stats from the BackendType
      33             :  * specified by the second parameter. Caller is responsible for locking the
      34             :  * passed-in PgStat_BktypeIO, if needed.
      35             :  */
      36             : bool
      37           0 : pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
      38             :                              BackendType bktype)
      39             : {
      40           0 :     for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
      41             :     {
      42           0 :         for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
      43             :         {
      44           0 :             for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
      45             :             {
      46             :                 /* we do track it */
      47           0 :                 if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
      48             :                 {
      49             :                     /* ensure that if IO times are non-zero, counts are > 0 */
      50           0 :                     if (backend_io->times[io_object][io_context][io_op] != 0 &&
      51           0 :                         backend_io->counts[io_object][io_context][io_op] <= 0)
      52           0 :                         return false;
      53             : 
      54           0 :                     continue;
      55             :                 }
      56             : 
      57             :                 /* we don't track it, and it is not 0 */
      58           0 :                 if (backend_io->counts[io_object][io_context][io_op] != 0)
      59           0 :                     return false;
      60             :             }
      61             :         }
      62             :     }
      63             : 
      64           0 :     return true;
      65             : }
      66             : 
      67             : void
      68   113053048 : pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op,
      69             :                    uint32 cnt, uint64 bytes)
      70             : {
      71             :     Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
      72             :     Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
      73             :     Assert(pgstat_is_ioop_tracked_in_bytes(io_op) || bytes == 0);
      74             :     Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
      75             : 
      76   113053048 :     if (pgstat_tracks_backend_bktype(MyBackendType))
      77             :     {
      78             :         PgStat_BackendPending *entry_ref;
      79             : 
      80   101807330 :         entry_ref = pgstat_prep_backend_pending(MyProcNumber);
      81   101807330 :         entry_ref->pending_io.counts[io_object][io_context][io_op] += cnt;
      82   101807330 :         entry_ref->pending_io.bytes[io_object][io_context][io_op] += bytes;
      83             :     }
      84             : 
      85   113053048 :     PendingIOStats.counts[io_object][io_context][io_op] += cnt;
      86   113053048 :     PendingIOStats.bytes[io_object][io_context][io_op] += bytes;
      87             : 
      88   113053048 :     have_iostats = true;
      89   113053048 : }
      90             : 
      91             : /*
      92             :  * Initialize the internal timing for an IO operation, depending on an
      93             :  * IO timing GUC.
      94             :  */
      95             : instr_time
      96     3645586 : pgstat_prepare_io_time(bool track_io_guc)
      97             : {
      98             :     instr_time  io_start;
      99             : 
     100     3645586 :     if (track_io_guc)
     101           0 :         INSTR_TIME_SET_CURRENT(io_start);
     102             :     else
     103             :     {
     104             :         /*
     105             :          * There is no need to set io_start when an IO timing GUC is disabled,
     106             :          * still initialize it to zero to avoid compiler warnings.
     107             :          */
     108     3645586 :         INSTR_TIME_SET_ZERO(io_start);
     109             :     }
     110             : 
     111     3645586 :     return io_start;
     112             : }
     113             : 
     114             : /*
     115             :  * Like pgstat_count_io_op() except it also accumulates time.
     116             :  */
     117             : void
     118     3645556 : pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
     119             :                         instr_time start_time, uint32 cnt, uint64 bytes)
     120             : {
     121     3645556 :     if (track_io_timing)
     122             :     {
     123             :         instr_time  io_time;
     124             : 
     125           0 :         INSTR_TIME_SET_CURRENT(io_time);
     126           0 :         INSTR_TIME_SUBTRACT(io_time, start_time);
     127             : 
     128           0 :         if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND)
     129             :         {
     130           0 :             pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
     131           0 :             if (io_object == IOOBJECT_RELATION)
     132           0 :                 INSTR_TIME_ADD(pgBufferUsage.shared_blk_write_time, io_time);
     133           0 :             else if (io_object == IOOBJECT_TEMP_RELATION)
     134           0 :                 INSTR_TIME_ADD(pgBufferUsage.local_blk_write_time, io_time);
     135             :         }
     136           0 :         else if (io_op == IOOP_READ)
     137             :         {
     138           0 :             pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
     139           0 :             if (io_object == IOOBJECT_RELATION)
     140           0 :                 INSTR_TIME_ADD(pgBufferUsage.shared_blk_read_time, io_time);
     141           0 :             else if (io_object == IOOBJECT_TEMP_RELATION)
     142           0 :                 INSTR_TIME_ADD(pgBufferUsage.local_blk_read_time, io_time);
     143             :         }
     144             : 
     145           0 :         INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
     146             :                        io_time);
     147             : 
     148           0 :         if (pgstat_tracks_backend_bktype(MyBackendType))
     149             :         {
     150             :             PgStat_BackendPending *entry_ref;
     151             : 
     152           0 :             entry_ref = pgstat_prep_backend_pending(MyProcNumber);
     153           0 :             INSTR_TIME_ADD(entry_ref->pending_io.pending_times[io_object][io_context][io_op],
     154             :                            io_time);
     155             :         }
     156             :     }
     157             : 
     158     3645556 :     pgstat_count_io_op(io_object, io_context, io_op, cnt, bytes);
     159     3645556 : }
     160             : 
     161             : PgStat_IO *
     162         112 : pgstat_fetch_stat_io(void)
     163             : {
     164         112 :     pgstat_snapshot_fixed(PGSTAT_KIND_IO);
     165             : 
     166         112 :     return &pgStatLocal.snapshot.io;
     167             : }
     168             : 
     169             : /*
     170             :  * Check if there any IO stats waiting for flush.
     171             :  */
     172             : bool
     173       13078 : pgstat_io_have_pending_cb(void)
     174             : {
     175       13078 :     return have_iostats;
     176             : }
     177             : 
     178             : /*
     179             :  * Simpler wrapper of pgstat_io_flush_cb()
     180             :  */
     181             : void
     182      190166 : pgstat_flush_io(bool nowait)
     183             : {
     184      190166 :     (void) pgstat_io_flush_cb(nowait);
     185      190166 : }
     186             : 
     187             : /*
     188             :  * Flush out locally pending IO statistics
     189             :  *
     190             :  * If no stats have been recorded, this function returns false.
     191             :  *
     192             :  * If nowait is true, this function returns true if the lock could not be
     193             :  * acquired. Otherwise, return false.
     194             :  */
     195             : bool
     196      249568 : pgstat_io_flush_cb(bool nowait)
     197             : {
     198             :     LWLock     *bktype_lock;
     199             :     PgStat_BktypeIO *bktype_shstats;
     200             : 
     201      249568 :     if (!have_iostats)
     202       56096 :         return false;
     203             : 
     204      193472 :     bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
     205      193472 :     bktype_shstats =
     206      193472 :         &pgStatLocal.shmem->io.stats.stats[MyBackendType];
     207             : 
     208      193472 :     if (!nowait)
     209      169034 :         LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
     210       24438 :     else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
     211           0 :         return true;
     212             : 
     213      580416 :     for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
     214             :     {
     215     1934720 :         for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
     216             :         {
     217    13929984 :             for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
     218             :             {
     219             :                 instr_time  time;
     220             : 
     221    12382208 :                 bktype_shstats->counts[io_object][io_context][io_op] +=
     222    12382208 :                     PendingIOStats.counts[io_object][io_context][io_op];
     223             : 
     224    12382208 :                 bktype_shstats->bytes[io_object][io_context][io_op] +=
     225    12382208 :                     PendingIOStats.bytes[io_object][io_context][io_op];
     226             : 
     227    12382208 :                 time = PendingIOStats.pending_times[io_object][io_context][io_op];
     228             : 
     229    12382208 :                 bktype_shstats->times[io_object][io_context][io_op] +=
     230    12382208 :                     INSTR_TIME_GET_MICROSEC(time);
     231             :             }
     232             :         }
     233             :     }
     234             : 
     235             :     Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType));
     236             : 
     237      193472 :     LWLockRelease(bktype_lock);
     238             : 
     239      193472 :     memset(&PendingIOStats, 0, sizeof(PendingIOStats));
     240             : 
     241      193472 :     have_iostats = false;
     242             : 
     243      193472 :     return false;
     244             : }
     245             : 
     246             : const char *
     247        9296 : pgstat_get_io_context_name(IOContext io_context)
     248             : {
     249        9296 :     switch (io_context)
     250             :     {
     251        2324 :         case IOCONTEXT_BULKREAD:
     252        2324 :             return "bulkread";
     253        2324 :         case IOCONTEXT_BULKWRITE:
     254        2324 :             return "bulkwrite";
     255        2324 :         case IOCONTEXT_NORMAL:
     256        2324 :             return "normal";
     257        2324 :         case IOCONTEXT_VACUUM:
     258        2324 :             return "vacuum";
     259             :     }
     260             : 
     261           0 :     elog(ERROR, "unrecognized IOContext value: %d", io_context);
     262             :     pg_unreachable();
     263             : }
     264             : 
     265             : const char *
     266        2324 : pgstat_get_io_object_name(IOObject io_object)
     267             : {
     268        2324 :     switch (io_object)
     269             :     {
     270        1162 :         case IOOBJECT_RELATION:
     271        1162 :             return "relation";
     272        1162 :         case IOOBJECT_TEMP_RELATION:
     273        1162 :             return "temp relation";
     274             :     }
     275             : 
     276           0 :     elog(ERROR, "unrecognized IOObject value: %d", io_object);
     277             :     pg_unreachable();
     278             : }
     279             : 
     280             : void
     281        1918 : pgstat_io_init_shmem_cb(void *stats)
     282             : {
     283        1918 :     PgStatShared_IO *stat_shmem = (PgStatShared_IO *) stats;
     284             : 
     285       34524 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     286       32606 :         LWLockInitialize(&stat_shmem->locks[i], LWTRANCHE_PGSTATS_DATA);
     287        1918 : }
     288             : 
     289             : void
     290         464 : pgstat_io_reset_all_cb(TimestampTz ts)
     291             : {
     292        8352 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     293             :     {
     294        7888 :         LWLock     *bktype_lock = &pgStatLocal.shmem->io.locks[i];
     295        7888 :         PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
     296             : 
     297        7888 :         LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
     298             : 
     299             :         /*
     300             :          * Use the lock in the first BackendType's PgStat_BktypeIO to protect
     301             :          * the reset timestamp as well.
     302             :          */
     303        7888 :         if (i == 0)
     304         464 :             pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts;
     305             : 
     306        7888 :         memset(bktype_shstats, 0, sizeof(*bktype_shstats));
     307        7888 :         LWLockRelease(bktype_lock);
     308             :     }
     309         464 : }
     310             : 
     311             : void
     312        1252 : pgstat_io_snapshot_cb(void)
     313             : {
     314       22536 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     315             :     {
     316       21284 :         LWLock     *bktype_lock = &pgStatLocal.shmem->io.locks[i];
     317       21284 :         PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
     318       21284 :         PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
     319             : 
     320       21284 :         LWLockAcquire(bktype_lock, LW_SHARED);
     321             : 
     322             :         /*
     323             :          * Use the lock in the first BackendType's PgStat_BktypeIO to protect
     324             :          * the reset timestamp as well.
     325             :          */
     326       21284 :         if (i == 0)
     327        1252 :             pgStatLocal.snapshot.io.stat_reset_timestamp =
     328        1252 :                 pgStatLocal.shmem->io.stats.stat_reset_timestamp;
     329             : 
     330             :         /* using struct assignment due to better type safety */
     331       21284 :         *bktype_snap = *bktype_shstats;
     332       21284 :         LWLockRelease(bktype_lock);
     333             :     }
     334        1252 : }
     335             : 
     336             : /*
     337             : * IO statistics are not collected for all BackendTypes.
     338             : *
     339             : * The following BackendTypes do not participate in the cumulative stats
     340             : * subsystem or do not perform IO on which we currently track:
     341             : * - Dead-end backend because it is not connected to shared memory and
     342             : *   doesn't do any IO
     343             : * - Syslogger because it is not connected to shared memory
     344             : * - Archiver because most relevant archiving IO is delegated to a
     345             : *   specialized command or module
     346             : * - WAL Receiver, WAL Writer, and WAL Summarizer IO are not tracked in
     347             : *   pg_stat_io for now
     348             : *
     349             : * Function returns true if BackendType participates in the cumulative stats
     350             : * subsystem for IO and false if it does not.
     351             : *
     352             : * When adding a new BackendType, also consider adding relevant restrictions to
     353             : * pgstat_tracks_io_object() and pgstat_tracks_io_op().
     354             : */
     355             : bool
     356       44240 : pgstat_tracks_io_bktype(BackendType bktype)
     357             : {
     358             :     /*
     359             :      * List every type so that new backend types trigger a warning about
     360             :      * needing to adjust this switch.
     361             :      */
     362       44240 :     switch (bktype)
     363             :     {
     364         784 :         case B_INVALID:
     365             :         case B_DEAD_END_BACKEND:
     366             :         case B_ARCHIVER:
     367             :         case B_LOGGER:
     368             :         case B_WAL_RECEIVER:
     369             :         case B_WAL_WRITER:
     370             :         case B_WAL_SUMMARIZER:
     371         784 :             return false;
     372             : 
     373       43456 :         case B_AUTOVAC_LAUNCHER:
     374             :         case B_AUTOVAC_WORKER:
     375             :         case B_BACKEND:
     376             :         case B_BG_WORKER:
     377             :         case B_BG_WRITER:
     378             :         case B_CHECKPOINTER:
     379             :         case B_SLOTSYNC_WORKER:
     380             :         case B_STANDALONE_BACKEND:
     381             :         case B_STARTUP:
     382             :         case B_WAL_SENDER:
     383       43456 :             return true;
     384             :     }
     385             : 
     386           0 :     return false;
     387             : }
     388             : 
     389             : /*
     390             :  * Some BackendTypes do not perform IO on certain IOObjects or in certain
     391             :  * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
     392             :  * that the given BackendType is expected to do IO in the given IOContext and
     393             :  * on the given IOObject and that the given IOObject is expected to be operated
     394             :  * on in the given IOContext.
     395             :  */
     396             : bool
     397       42336 : pgstat_tracks_io_object(BackendType bktype, IOObject io_object,
     398             :                         IOContext io_context)
     399             : {
     400             :     bool        no_temp_rel;
     401             : 
     402             :     /*
     403             :      * Some BackendTypes should never track IO statistics.
     404             :      */
     405       42336 :     if (!pgstat_tracks_io_bktype(bktype))
     406           0 :         return false;
     407             : 
     408             :     /*
     409             :      * Currently, IO on temporary relations can only occur in the
     410             :      * IOCONTEXT_NORMAL IOContext.
     411             :      */
     412       42336 :     if (io_context != IOCONTEXT_NORMAL &&
     413             :         io_object == IOOBJECT_TEMP_RELATION)
     414        3486 :         return false;
     415             : 
     416             :     /*
     417             :      * In core Postgres, only regular backends and WAL Sender processes
     418             :      * executing queries will use local buffers and operate on temporary
     419             :      * relations. Parallel workers will not use local buffers (see
     420             :      * InitLocalBuffers()); however, extensions leveraging background workers
     421             :      * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
     422             :      * BackendType B_BG_WORKER.
     423             :      */
     424       36498 :     no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
     425       33586 :         bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
     426       75348 :         bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
     427             : 
     428       38850 :     if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
     429             :         io_object == IOOBJECT_TEMP_RELATION)
     430         672 :         return false;
     431             : 
     432             :     /*
     433             :      * Some BackendTypes do not currently perform any IO in certain
     434             :      * IOContexts, and, while it may not be inherently incorrect for them to
     435             :      * do so, excluding those rows from the view makes the view easier to use.
     436             :      */
     437       38178 :     if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
     438        2464 :         (io_context == IOCONTEXT_BULKREAD ||
     439        2240 :          io_context == IOCONTEXT_BULKWRITE ||
     440             :          io_context == IOCONTEXT_VACUUM))
     441         672 :         return false;
     442             : 
     443       37506 :     if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
     444         112 :         return false;
     445             : 
     446       37394 :     if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
     447             :         io_context == IOCONTEXT_BULKWRITE)
     448         224 :         return false;
     449             : 
     450       37170 :     return true;
     451             : }
     452             : 
     453             : /*
     454             :  * Some BackendTypes will never do certain IOOps and some IOOps should not
     455             :  * occur in certain IOContexts or on certain IOObjects. Check that the given
     456             :  * IOOp is valid for the given BackendType in the given IOContext and on the
     457             :  * given IOObject. Note that there are currently no cases of an IOOp being
     458             :  * invalid for a particular BackendType only within a certain IOContext and/or
     459             :  * only on a certain IOObject.
     460             :  */
     461             : bool
     462       33040 : pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
     463             :                     IOContext io_context, IOOp io_op)
     464             : {
     465             :     bool        strategy_io_context;
     466             : 
     467             :     /* if (io_context, io_object) will never collect stats, we're done */
     468       33040 :     if (!pgstat_tracks_io_object(bktype, io_object, io_context))
     469           0 :         return false;
     470             : 
     471             :     /*
     472             :      * Some BackendTypes will not do certain IOOps.
     473             :      */
     474       33040 :     if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
     475        1568 :         (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
     476         672 :         return false;
     477             : 
     478       32368 :     if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
     479        2912 :          bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
     480         448 :         return false;
     481             : 
     482             :     /*
     483             :      * Temporary tables are not logged and thus do not require fsync'ing.
     484             :      * Writeback is not requested for temporary tables.
     485             :      */
     486       31920 :     if (io_object == IOOBJECT_TEMP_RELATION &&
     487        3430 :         (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
     488         980 :         return false;
     489             : 
     490             :     /*
     491             :      * Some IOOps are not valid in certain IOContexts and some IOOps are only
     492             :      * valid in certain contexts.
     493             :      */
     494       30940 :     if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
     495         826 :         return false;
     496             : 
     497       23548 :     strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
     498       53662 :         io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
     499             : 
     500             :     /*
     501             :      * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
     502             :      */
     503       30114 :     if (!strategy_io_context && io_op == IOOP_REUSE)
     504        1652 :         return false;
     505             : 
     506             :     /*
     507             :      * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
     508             :      * counted in the IOCONTEXT_NORMAL IOContext. See comment in
     509             :      * register_dirty_segment() for more details.
     510             :      */
     511       28462 :     if (strategy_io_context && io_op == IOOP_FSYNC)
     512        2478 :         return false;
     513             : 
     514             : 
     515       25984 :     return true;
     516             : }

Generated by: LCOV version 1.14