LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat_io.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 131 165 79.4 %
Date: 2024-10-10 03:14:42 Functions: 16 17 94.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -------------------------------------------------------------------------
       2             :  *
       3             :  * pgstat_io.c
       4             :  *    Implementation of IO statistics.
       5             :  *
       6             :  * This file contains the implementation of IO statistics. It is kept separate
       7             :  * from pgstat.c to enforce the line between the statistics access / storage
       8             :  * implementation and the details about individual types of statistics.
       9             :  *
      10             :  * Copyright (c) 2021-2024, PostgreSQL Global Development Group
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/utils/activity/pgstat_io.c
      14             :  * -------------------------------------------------------------------------
      15             :  */
      16             : 
      17             : #include "postgres.h"
      18             : 
      19             : #include "executor/instrument.h"
      20             : #include "storage/bufmgr.h"
      21             : #include "utils/pgstat_internal.h"
      22             : 
      23             : 
      24             : typedef struct PgStat_PendingIO
      25             : {
      26             :     PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
      27             :     instr_time  pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
      28             : } PgStat_PendingIO;
      29             : 
      30             : 
      31             : static PgStat_PendingIO PendingIOStats;
      32             : static bool have_iostats = false;
      33             : 
      34             : 
      35             : /*
      36             :  * Check that stats have not been counted for any combination of IOObject,
      37             :  * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
      38             :  * stats are tracked for this combination and IO times are non-zero, counts
      39             :  * should be non-zero.
      40             :  *
      41             :  * The passed-in PgStat_BktypeIO must contain stats from the BackendType
      42             :  * specified by the second parameter. Caller is responsible for locking the
      43             :  * passed-in PgStat_BktypeIO, if needed.
      44             :  */
      45             : bool
      46           0 : pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
      47             :                              BackendType bktype)
      48             : {
      49           0 :     for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
      50             :     {
      51           0 :         for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
      52             :         {
      53           0 :             for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
      54             :             {
      55             :                 /* we do track it */
      56           0 :                 if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
      57             :                 {
      58             :                     /* ensure that if IO times are non-zero, counts are > 0 */
      59           0 :                     if (backend_io->times[io_object][io_context][io_op] != 0 &&
      60           0 :                         backend_io->counts[io_object][io_context][io_op] <= 0)
      61           0 :                         return false;
      62             : 
      63           0 :                     continue;
      64             :                 }
      65             : 
      66             :                 /* we don't track it, and it is not 0 */
      67           0 :                 if (backend_io->counts[io_object][io_context][io_op] != 0)
      68           0 :                     return false;
      69             :             }
      70             :         }
      71             :     }
      72             : 
      73           0 :     return true;
      74             : }
      75             : 
      76             : void
      77   104579414 : pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
      78             : {
      79   104579414 :     pgstat_count_io_op_n(io_object, io_context, io_op, 1);
      80   104579414 : }
      81             : 
      82             : void
      83   107872850 : pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
      84             : {
      85             :     Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
      86             :     Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
      87             :     Assert((unsigned int) io_op < IOOP_NUM_TYPES);
      88             :     Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
      89             : 
      90   107872850 :     PendingIOStats.counts[io_object][io_context][io_op] += cnt;
      91             : 
      92   107872850 :     have_iostats = true;
      93   107872850 : }
      94             : 
      95             : /*
      96             :  * Initialize the internal timing for an IO operation, depending on an
      97             :  * IO timing GUC.
      98             :  */
      99             : instr_time
     100     3293466 : pgstat_prepare_io_time(bool track_io_guc)
     101             : {
     102             :     instr_time  io_start;
     103             : 
     104     3293466 :     if (track_io_guc)
     105           0 :         INSTR_TIME_SET_CURRENT(io_start);
     106             :     else
     107             :     {
     108             :         /*
     109             :          * There is no need to set io_start when an IO timing GUC is disabled,
     110             :          * still initialize it to zero to avoid compiler warnings.
     111             :          */
     112     3293466 :         INSTR_TIME_SET_ZERO(io_start);
     113             :     }
     114             : 
     115     3293466 :     return io_start;
     116             : }
     117             : 
     118             : /*
     119             :  * Like pgstat_count_io_op_n() except it also accumulates time.
     120             :  */
     121             : void
     122     3293436 : pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
     123             :                         instr_time start_time, uint32 cnt)
     124             : {
     125     3293436 :     if (track_io_timing)
     126             :     {
     127             :         instr_time  io_time;
     128             : 
     129           0 :         INSTR_TIME_SET_CURRENT(io_time);
     130           0 :         INSTR_TIME_SUBTRACT(io_time, start_time);
     131             : 
     132           0 :         if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND)
     133             :         {
     134           0 :             pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
     135           0 :             if (io_object == IOOBJECT_RELATION)
     136           0 :                 INSTR_TIME_ADD(pgBufferUsage.shared_blk_write_time, io_time);
     137           0 :             else if (io_object == IOOBJECT_TEMP_RELATION)
     138           0 :                 INSTR_TIME_ADD(pgBufferUsage.local_blk_write_time, io_time);
     139             :         }
     140           0 :         else if (io_op == IOOP_READ)
     141             :         {
     142           0 :             pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
     143           0 :             if (io_object == IOOBJECT_RELATION)
     144           0 :                 INSTR_TIME_ADD(pgBufferUsage.shared_blk_read_time, io_time);
     145           0 :             else if (io_object == IOOBJECT_TEMP_RELATION)
     146           0 :                 INSTR_TIME_ADD(pgBufferUsage.local_blk_read_time, io_time);
     147             :         }
     148             : 
     149           0 :         INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
     150             :                        io_time);
     151             :     }
     152             : 
     153     3293436 :     pgstat_count_io_op_n(io_object, io_context, io_op, cnt);
     154     3293436 : }
     155             : 
     156             : PgStat_IO *
     157         112 : pgstat_fetch_stat_io(void)
     158             : {
     159         112 :     pgstat_snapshot_fixed(PGSTAT_KIND_IO);
     160             : 
     161         112 :     return &pgStatLocal.snapshot.io;
     162             : }
     163             : 
     164             : /*
     165             :  * Check if there any IO stats waiting for flush.
     166             :  */
     167             : bool
     168       12560 : pgstat_io_have_pending_cb(void)
     169             : {
     170       12560 :     return have_iostats;
     171             : }
     172             : 
     173             : /*
     174             :  * Simpler wrapper of pgstat_io_flush_cb()
     175             :  */
     176             : void
     177      186990 : pgstat_flush_io(bool nowait)
     178             : {
     179      186990 :     (void) pgstat_io_flush_cb(nowait);
     180      186990 : }
     181             : 
     182             : /*
     183             :  * Flush out locally pending IO statistics
     184             :  *
     185             :  * If no stats have been recorded, this function returns false.
     186             :  *
     187             :  * If nowait is true, this function returns true if the lock could not be
     188             :  * acquired. Otherwise, return false.
     189             :  */
     190             : bool
     191      243852 : pgstat_io_flush_cb(bool nowait)
     192             : {
     193             :     LWLock     *bktype_lock;
     194             :     PgStat_BktypeIO *bktype_shstats;
     195             : 
     196      243852 :     if (!have_iostats)
     197       53564 :         return false;
     198             : 
     199      190288 :     bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
     200      190288 :     bktype_shstats =
     201      190288 :         &pgStatLocal.shmem->io.stats.stats[MyBackendType];
     202             : 
     203      190288 :     if (!nowait)
     204      167004 :         LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
     205       23284 :     else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
     206           0 :         return true;
     207             : 
     208      570864 :     for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
     209             :     {
     210     1902880 :         for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
     211             :         {
     212    13700736 :             for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
     213             :             {
     214             :                 instr_time  time;
     215             : 
     216    12178432 :                 bktype_shstats->counts[io_object][io_context][io_op] +=
     217    12178432 :                     PendingIOStats.counts[io_object][io_context][io_op];
     218             : 
     219    12178432 :                 time = PendingIOStats.pending_times[io_object][io_context][io_op];
     220             : 
     221    12178432 :                 bktype_shstats->times[io_object][io_context][io_op] +=
     222    12178432 :                     INSTR_TIME_GET_MICROSEC(time);
     223             :             }
     224             :         }
     225             :     }
     226             : 
     227             :     Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType));
     228             : 
     229      190288 :     LWLockRelease(bktype_lock);
     230             : 
     231      190288 :     memset(&PendingIOStats, 0, sizeof(PendingIOStats));
     232             : 
     233      190288 :     have_iostats = false;
     234             : 
     235      190288 :     return false;
     236             : }
     237             : 
     238             : const char *
     239        8960 : pgstat_get_io_context_name(IOContext io_context)
     240             : {
     241        8960 :     switch (io_context)
     242             :     {
     243        2240 :         case IOCONTEXT_BULKREAD:
     244        2240 :             return "bulkread";
     245        2240 :         case IOCONTEXT_BULKWRITE:
     246        2240 :             return "bulkwrite";
     247        2240 :         case IOCONTEXT_NORMAL:
     248        2240 :             return "normal";
     249        2240 :         case IOCONTEXT_VACUUM:
     250        2240 :             return "vacuum";
     251             :     }
     252             : 
     253           0 :     elog(ERROR, "unrecognized IOContext value: %d", io_context);
     254             :     pg_unreachable();
     255             : }
     256             : 
     257             : const char *
     258        2240 : pgstat_get_io_object_name(IOObject io_object)
     259             : {
     260        2240 :     switch (io_object)
     261             :     {
     262        1120 :         case IOOBJECT_RELATION:
     263        1120 :             return "relation";
     264        1120 :         case IOOBJECT_TEMP_RELATION:
     265        1120 :             return "temp relation";
     266             :     }
     267             : 
     268           0 :     elog(ERROR, "unrecognized IOObject value: %d", io_object);
     269             :     pg_unreachable();
     270             : }
     271             : 
     272             : void
     273        1828 : pgstat_io_init_shmem_cb(void *stats)
     274             : {
     275        1828 :     PgStatShared_IO *stat_shmem = (PgStatShared_IO *) stats;
     276             : 
     277       31076 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     278       29248 :         LWLockInitialize(&stat_shmem->locks[i], LWTRANCHE_PGSTATS_DATA);
     279        1828 : }
     280             : 
     281             : void
     282         448 : pgstat_io_reset_all_cb(TimestampTz ts)
     283             : {
     284        7616 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     285             :     {
     286        7168 :         LWLock     *bktype_lock = &pgStatLocal.shmem->io.locks[i];
     287        7168 :         PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
     288             : 
     289        7168 :         LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
     290             : 
     291             :         /*
     292             :          * Use the lock in the first BackendType's PgStat_BktypeIO to protect
     293             :          * the reset timestamp as well.
     294             :          */
     295        7168 :         if (i == 0)
     296         448 :             pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts;
     297             : 
     298        7168 :         memset(bktype_shstats, 0, sizeof(*bktype_shstats));
     299        7168 :         LWLockRelease(bktype_lock);
     300             :     }
     301         448 : }
     302             : 
     303             : void
     304        1204 : pgstat_io_snapshot_cb(void)
     305             : {
     306       20468 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     307             :     {
     308       19264 :         LWLock     *bktype_lock = &pgStatLocal.shmem->io.locks[i];
     309       19264 :         PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
     310       19264 :         PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
     311             : 
     312       19264 :         LWLockAcquire(bktype_lock, LW_SHARED);
     313             : 
     314             :         /*
     315             :          * Use the lock in the first BackendType's PgStat_BktypeIO to protect
     316             :          * the reset timestamp as well.
     317             :          */
     318       19264 :         if (i == 0)
     319        1204 :             pgStatLocal.snapshot.io.stat_reset_timestamp =
     320        1204 :                 pgStatLocal.shmem->io.stats.stat_reset_timestamp;
     321             : 
     322             :         /* using struct assignment due to better type safety */
     323       19264 :         *bktype_snap = *bktype_shstats;
     324       19264 :         LWLockRelease(bktype_lock);
     325             :     }
     326        1204 : }
     327             : 
     328             : /*
     329             : * IO statistics are not collected for all BackendTypes.
     330             : *
     331             : * The following BackendTypes do not participate in the cumulative stats
     332             : * subsystem or do not perform IO on which we currently track:
     333             : * - Syslogger because it is not connected to shared memory
     334             : * - Archiver because most relevant archiving IO is delegated to a
     335             : *   specialized command or module
     336             : * - WAL Receiver, WAL Writer, and WAL Summarizer IO are not tracked in
     337             : *   pg_stat_io for now
     338             : *
     339             : * Function returns true if BackendType participates in the cumulative stats
     340             : * subsystem for IO and false if it does not.
     341             : *
     342             : * When adding a new BackendType, also consider adding relevant restrictions to
     343             : * pgstat_tracks_io_object() and pgstat_tracks_io_op().
     344             : */
     345             : bool
     346       42112 : pgstat_tracks_io_bktype(BackendType bktype)
     347             : {
     348             :     /*
     349             :      * List every type so that new backend types trigger a warning about
     350             :      * needing to adjust this switch.
     351             :      */
     352       42112 :     switch (bktype)
     353             :     {
     354         672 :         case B_INVALID:
     355             :         case B_ARCHIVER:
     356             :         case B_LOGGER:
     357             :         case B_WAL_RECEIVER:
     358             :         case B_WAL_WRITER:
     359             :         case B_WAL_SUMMARIZER:
     360         672 :             return false;
     361             : 
     362       41440 :         case B_AUTOVAC_LAUNCHER:
     363             :         case B_AUTOVAC_WORKER:
     364             :         case B_BACKEND:
     365             :         case B_BG_WORKER:
     366             :         case B_BG_WRITER:
     367             :         case B_CHECKPOINTER:
     368             :         case B_SLOTSYNC_WORKER:
     369             :         case B_STANDALONE_BACKEND:
     370             :         case B_STARTUP:
     371             :         case B_WAL_SENDER:
     372       41440 :             return true;
     373             :     }
     374             : 
     375           0 :     return false;
     376             : }
     377             : 
     378             : /*
     379             :  * Some BackendTypes do not perform IO on certain IOObjects or in certain
     380             :  * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
     381             :  * that the given BackendType is expected to do IO in the given IOContext and
     382             :  * on the given IOObject and that the given IOObject is expected to be operated
     383             :  * on in the given IOContext.
     384             :  */
     385             : bool
     386       40320 : pgstat_tracks_io_object(BackendType bktype, IOObject io_object,
     387             :                         IOContext io_context)
     388             : {
     389             :     bool        no_temp_rel;
     390             : 
     391             :     /*
     392             :      * Some BackendTypes should never track IO statistics.
     393             :      */
     394       40320 :     if (!pgstat_tracks_io_bktype(bktype))
     395           0 :         return false;
     396             : 
     397             :     /*
     398             :      * Currently, IO on temporary relations can only occur in the
     399             :      * IOCONTEXT_NORMAL IOContext.
     400             :      */
     401       40320 :     if (io_context != IOCONTEXT_NORMAL &&
     402             :         io_object == IOOBJECT_TEMP_RELATION)
     403        3360 :         return false;
     404             : 
     405             :     /*
     406             :      * In core Postgres, only regular backends and WAL Sender processes
     407             :      * executing queries will use local buffers and operate on temporary
     408             :      * relations. Parallel workers will not use local buffers (see
     409             :      * InitLocalBuffers()); however, extensions leveraging background workers
     410             :      * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
     411             :      * BackendType B_BG_WORKER.
     412             :      */
     413       34608 :     no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
     414       31696 :         bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
     415       71568 :         bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
     416             : 
     417       36960 :     if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
     418             :         io_object == IOOBJECT_TEMP_RELATION)
     419         672 :         return false;
     420             : 
     421             :     /*
     422             :      * Some BackendTypes do not currently perform any IO in certain
     423             :      * IOContexts, and, while it may not be inherently incorrect for them to
     424             :      * do so, excluding those rows from the view makes the view easier to use.
     425             :      */
     426       36288 :     if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
     427        2464 :         (io_context == IOCONTEXT_BULKREAD ||
     428        2240 :          io_context == IOCONTEXT_BULKWRITE ||
     429             :          io_context == IOCONTEXT_VACUUM))
     430         672 :         return false;
     431             : 
     432       35616 :     if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
     433         112 :         return false;
     434             : 
     435       35504 :     if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
     436             :         io_context == IOCONTEXT_BULKWRITE)
     437         224 :         return false;
     438             : 
     439       35280 :     return true;
     440             : }
     441             : 
     442             : /*
     443             :  * Some BackendTypes will never do certain IOOps and some IOOps should not
     444             :  * occur in certain IOContexts or on certain IOObjects. Check that the given
     445             :  * IOOp is valid for the given BackendType in the given IOContext and on the
     446             :  * given IOObject. Note that there are currently no cases of an IOOp being
     447             :  * invalid for a particular BackendType only within a certain IOContext and/or
     448             :  * only on a certain IOObject.
     449             :  */
     450             : bool
     451       31360 : pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
     452             :                     IOContext io_context, IOOp io_op)
     453             : {
     454             :     bool        strategy_io_context;
     455             : 
     456             :     /* if (io_context, io_object) will never collect stats, we're done */
     457       31360 :     if (!pgstat_tracks_io_object(bktype, io_object, io_context))
     458           0 :         return false;
     459             : 
     460             :     /*
     461             :      * Some BackendTypes will not do certain IOOps.
     462             :      */
     463       31360 :     if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
     464        1568 :         (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
     465         672 :         return false;
     466             : 
     467       30688 :     if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
     468        2912 :          bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
     469         448 :         return false;
     470             : 
     471             :     /*
     472             :      * Temporary tables are not logged and thus do not require fsync'ing.
     473             :      * Writeback is not requested for temporary tables.
     474             :      */
     475       30240 :     if (io_object == IOOBJECT_TEMP_RELATION &&
     476        3136 :         (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
     477         896 :         return false;
     478             : 
     479             :     /*
     480             :      * Some IOOps are not valid in certain IOContexts and some IOOps are only
     481             :      * valid in certain contexts.
     482             :      */
     483       29344 :     if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
     484         784 :         return false;
     485             : 
     486       22288 :     strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
     487       50848 :         io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
     488             : 
     489             :     /*
     490             :      * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
     491             :      */
     492       28560 :     if (!strategy_io_context && io_op == IOOP_REUSE)
     493        1568 :         return false;
     494             : 
     495             :     /*
     496             :      * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
     497             :      * counted in the IOCONTEXT_NORMAL IOContext. See comment in
     498             :      * register_dirty_segment() for more details.
     499             :      */
     500       26992 :     if (strategy_io_context && io_op == IOOP_FSYNC)
     501        2352 :         return false;
     502             : 
     503             : 
     504       24640 :     return true;
     505             : }

Generated by: LCOV version 1.14