LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat_io.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 130 155 83.9 %
Date: 2024-03-28 10:11:15 Functions: 13 14 92.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -------------------------------------------------------------------------
       2             :  *
       3             :  * pgstat_io.c
       4             :  *    Implementation of IO statistics.
       5             :  *
       6             :  * This file contains the implementation of IO statistics. It is kept separate
       7             :  * from pgstat.c to enforce the line between the statistics access / storage
       8             :  * implementation and the details about individual types of statistics.
       9             :  *
      10             :  * Copyright (c) 2021-2024, PostgreSQL Global Development Group
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/utils/activity/pgstat_io.c
      14             :  * -------------------------------------------------------------------------
      15             :  */
      16             : 
      17             : #include "postgres.h"
      18             : 
      19             : #include "executor/instrument.h"
      20             : #include "storage/bufmgr.h"
      21             : #include "utils/pgstat_internal.h"
      22             : 
      23             : 
      24             : typedef struct PgStat_PendingIO
      25             : {
      26             :     PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
      27             :     instr_time  pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
      28             : } PgStat_PendingIO;
      29             : 
      30             : 
      31             : static PgStat_PendingIO PendingIOStats;
      32             : bool        have_iostats = false;
      33             : 
      34             : 
      35             : /*
      36             :  * Check that stats have not been counted for any combination of IOObject,
      37             :  * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
      38             :  * stats are tracked for this combination and IO times are non-zero, counts
      39             :  * should be non-zero.
      40             :  *
      41             :  * The passed-in PgStat_BktypeIO must contain stats from the BackendType
      42             :  * specified by the second parameter. Caller is responsible for locking the
      43             :  * passed-in PgStat_BktypeIO, if needed.
      44             :  */
      45             : bool
      46           0 : pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
      47             :                              BackendType bktype)
      48             : {
      49           0 :     for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
      50             :     {
      51           0 :         for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
      52             :         {
      53           0 :             for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
      54             :             {
      55             :                 /* we do track it */
      56           0 :                 if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
      57             :                 {
      58             :                     /* ensure that if IO times are non-zero, counts are > 0 */
      59           0 :                     if (backend_io->times[io_object][io_context][io_op] != 0 &&
      60           0 :                         backend_io->counts[io_object][io_context][io_op] <= 0)
      61           0 :                         return false;
      62             : 
      63           0 :                     continue;
      64             :                 }
      65             : 
      66             :                 /* we don't track it, and it is not 0 */
      67           0 :                 if (backend_io->counts[io_object][io_context][io_op] != 0)
      68           0 :                     return false;
      69             :             }
      70             :         }
      71             :     }
      72             : 
      73           0 :     return true;
      74             : }
      75             : 
      76             : void
      77    93107174 : pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
      78             : {
      79    93107174 :     pgstat_count_io_op_n(io_object, io_context, io_op, 1);
      80    93107174 : }
      81             : 
      82             : void
      83    96503352 : pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
      84             : {
      85             :     Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
      86             :     Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
      87             :     Assert((unsigned int) io_op < IOOP_NUM_TYPES);
      88             :     Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
      89             : 
      90    96503352 :     PendingIOStats.counts[io_object][io_context][io_op] += cnt;
      91             : 
      92    96503352 :     have_iostats = true;
      93    96503352 : }
      94             : 
      95             : /*
      96             :  * Initialize the internal timing for an IO operation, depending on an
      97             :  * IO timing GUC.
      98             :  */
      99             : instr_time
     100     3396208 : pgstat_prepare_io_time(bool track_io_guc)
     101             : {
     102             :     instr_time  io_start;
     103             : 
     104     3396208 :     if (track_io_guc)
     105           2 :         INSTR_TIME_SET_CURRENT(io_start);
     106             :     else
     107             :     {
     108             :         /*
     109             :          * There is no need to set io_start when an IO timing GUC is disabled,
     110             :          * still initialize it to zero to avoid compiler warnings.
     111             :          */
     112     3396206 :         INSTR_TIME_SET_ZERO(io_start);
     113             :     }
     114             : 
     115     3396208 :     return io_start;
     116             : }
     117             : 
     118             : /*
     119             :  * Like pgstat_count_io_op_n() except it also accumulates time.
     120             :  */
     121             : void
     122     3396178 : pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
     123             :                         instr_time start_time, uint32 cnt)
     124             : {
     125     3396178 :     if (track_io_timing)
     126             :     {
     127             :         instr_time  io_time;
     128             : 
     129           2 :         INSTR_TIME_SET_CURRENT(io_time);
     130           2 :         INSTR_TIME_SUBTRACT(io_time, start_time);
     131             : 
     132           2 :         if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND)
     133             :         {
     134           0 :             pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
     135           0 :             if (io_object == IOOBJECT_RELATION)
     136           0 :                 INSTR_TIME_ADD(pgBufferUsage.shared_blk_write_time, io_time);
     137           0 :             else if (io_object == IOOBJECT_TEMP_RELATION)
     138           0 :                 INSTR_TIME_ADD(pgBufferUsage.local_blk_write_time, io_time);
     139             :         }
     140           2 :         else if (io_op == IOOP_READ)
     141             :         {
     142           2 :             pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
     143           2 :             if (io_object == IOOBJECT_RELATION)
     144           2 :                 INSTR_TIME_ADD(pgBufferUsage.shared_blk_read_time, io_time);
     145           0 :             else if (io_object == IOOBJECT_TEMP_RELATION)
     146           0 :                 INSTR_TIME_ADD(pgBufferUsage.local_blk_read_time, io_time);
     147             :         }
     148             : 
     149           2 :         INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
     150             :                        io_time);
     151             :     }
     152             : 
     153     3396178 :     pgstat_count_io_op_n(io_object, io_context, io_op, cnt);
     154     3396178 : }
     155             : 
     156             : PgStat_IO *
     157         112 : pgstat_fetch_stat_io(void)
     158             : {
     159         112 :     pgstat_snapshot_fixed(PGSTAT_KIND_IO);
     160             : 
     161         112 :     return &pgStatLocal.snapshot.io;
     162             : }
     163             : 
     164             : /*
     165             :  * Flush out locally pending IO statistics
     166             :  *
     167             :  * If no stats have been recorded, this function returns false.
     168             :  *
     169             :  * If nowait is true, this function returns true if the lock could not be
     170             :  * acquired. Otherwise, return false.
     171             :  */
     172             : bool
     173      131362 : pgstat_flush_io(bool nowait)
     174             : {
     175             :     LWLock     *bktype_lock;
     176             :     PgStat_BktypeIO *bktype_shstats;
     177             : 
     178      131362 :     if (!have_iostats)
     179       48504 :         return false;
     180             : 
     181       82858 :     bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
     182       82858 :     bktype_shstats =
     183       82858 :         &pgStatLocal.shmem->io.stats.stats[MyBackendType];
     184             : 
     185       82858 :     if (!nowait)
     186       61090 :         LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
     187       21768 :     else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
     188           0 :         return true;
     189             : 
     190      248574 :     for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
     191             :     {
     192      828580 :         for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
     193             :         {
     194     5965776 :             for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
     195             :             {
     196             :                 instr_time  time;
     197             : 
     198     5302912 :                 bktype_shstats->counts[io_object][io_context][io_op] +=
     199     5302912 :                     PendingIOStats.counts[io_object][io_context][io_op];
     200             : 
     201     5302912 :                 time = PendingIOStats.pending_times[io_object][io_context][io_op];
     202             : 
     203     5302912 :                 bktype_shstats->times[io_object][io_context][io_op] +=
     204     5302912 :                     INSTR_TIME_GET_MICROSEC(time);
     205             :             }
     206             :         }
     207             :     }
     208             : 
     209             :     Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType));
     210             : 
     211       82858 :     LWLockRelease(bktype_lock);
     212             : 
     213       82858 :     memset(&PendingIOStats, 0, sizeof(PendingIOStats));
     214             : 
     215       82858 :     have_iostats = false;
     216             : 
     217       82858 :     return false;
     218             : }
     219             : 
     220             : const char *
     221        8960 : pgstat_get_io_context_name(IOContext io_context)
     222             : {
     223        8960 :     switch (io_context)
     224             :     {
     225        2240 :         case IOCONTEXT_BULKREAD:
     226        2240 :             return "bulkread";
     227        2240 :         case IOCONTEXT_BULKWRITE:
     228        2240 :             return "bulkwrite";
     229        2240 :         case IOCONTEXT_NORMAL:
     230        2240 :             return "normal";
     231        2240 :         case IOCONTEXT_VACUUM:
     232        2240 :             return "vacuum";
     233             :     }
     234             : 
     235           0 :     elog(ERROR, "unrecognized IOContext value: %d", io_context);
     236             :     pg_unreachable();
     237             : }
     238             : 
     239             : const char *
     240        2240 : pgstat_get_io_object_name(IOObject io_object)
     241             : {
     242        2240 :     switch (io_object)
     243             :     {
     244        1120 :         case IOOBJECT_RELATION:
     245        1120 :             return "relation";
     246        1120 :         case IOOBJECT_TEMP_RELATION:
     247        1120 :             return "temp relation";
     248             :     }
     249             : 
     250           0 :     elog(ERROR, "unrecognized IOObject value: %d", io_object);
     251             :     pg_unreachable();
     252             : }
     253             : 
     254             : void
     255         418 : pgstat_io_reset_all_cb(TimestampTz ts)
     256             : {
     257        7106 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     258             :     {
     259        6688 :         LWLock     *bktype_lock = &pgStatLocal.shmem->io.locks[i];
     260        6688 :         PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
     261             : 
     262        6688 :         LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
     263             : 
     264             :         /*
     265             :          * Use the lock in the first BackendType's PgStat_BktypeIO to protect
     266             :          * the reset timestamp as well.
     267             :          */
     268        6688 :         if (i == 0)
     269         418 :             pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts;
     270             : 
     271        6688 :         memset(bktype_shstats, 0, sizeof(*bktype_shstats));
     272        6688 :         LWLockRelease(bktype_lock);
     273             :     }
     274         418 : }
     275             : 
     276             : void
     277        1160 : pgstat_io_snapshot_cb(void)
     278             : {
     279       19720 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     280             :     {
     281       18560 :         LWLock     *bktype_lock = &pgStatLocal.shmem->io.locks[i];
     282       18560 :         PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
     283       18560 :         PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
     284             : 
     285       18560 :         LWLockAcquire(bktype_lock, LW_SHARED);
     286             : 
     287             :         /*
     288             :          * Use the lock in the first BackendType's PgStat_BktypeIO to protect
     289             :          * the reset timestamp as well.
     290             :          */
     291       18560 :         if (i == 0)
     292        1160 :             pgStatLocal.snapshot.io.stat_reset_timestamp =
     293        1160 :                 pgStatLocal.shmem->io.stats.stat_reset_timestamp;
     294             : 
     295             :         /* using struct assignment due to better type safety */
     296       18560 :         *bktype_snap = *bktype_shstats;
     297       18560 :         LWLockRelease(bktype_lock);
     298             :     }
     299        1160 : }
     300             : 
     301             : /*
     302             : * IO statistics are not collected for all BackendTypes.
     303             : *
     304             : * The following BackendTypes do not participate in the cumulative stats
     305             : * subsystem or do not perform IO on which we currently track:
     306             : * - Syslogger because it is not connected to shared memory
     307             : * - Archiver because most relevant archiving IO is delegated to a
     308             : *   specialized command or module
     309             : * - WAL Receiver, WAL Writer, and WAL Summarizer IO are not tracked in
     310             : *   pg_stat_io for now
     311             : *
     312             : * Function returns true if BackendType participates in the cumulative stats
     313             : * subsystem for IO and false if it does not.
     314             : *
     315             : * When adding a new BackendType, also consider adding relevant restrictions to
     316             : * pgstat_tracks_io_object() and pgstat_tracks_io_op().
     317             : */
     318             : bool
     319       42112 : pgstat_tracks_io_bktype(BackendType bktype)
     320             : {
     321             :     /*
     322             :      * List every type so that new backend types trigger a warning about
     323             :      * needing to adjust this switch.
     324             :      */
     325       42112 :     switch (bktype)
     326             :     {
     327         672 :         case B_INVALID:
     328             :         case B_ARCHIVER:
     329             :         case B_LOGGER:
     330             :         case B_WAL_RECEIVER:
     331             :         case B_WAL_WRITER:
     332             :         case B_WAL_SUMMARIZER:
     333         672 :             return false;
     334             : 
     335       41440 :         case B_AUTOVAC_LAUNCHER:
     336             :         case B_AUTOVAC_WORKER:
     337             :         case B_BACKEND:
     338             :         case B_BG_WORKER:
     339             :         case B_BG_WRITER:
     340             :         case B_CHECKPOINTER:
     341             :         case B_SLOTSYNC_WORKER:
     342             :         case B_STANDALONE_BACKEND:
     343             :         case B_STARTUP:
     344             :         case B_WAL_SENDER:
     345       41440 :             return true;
     346             :     }
     347             : 
     348           0 :     return false;
     349             : }
     350             : 
     351             : /*
     352             :  * Some BackendTypes do not perform IO on certain IOObjects or in certain
     353             :  * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
     354             :  * that the given BackendType is expected to do IO in the given IOContext and
     355             :  * on the given IOObject and that the given IOObject is expected to be operated
     356             :  * on in the given IOContext.
     357             :  */
     358             : bool
     359       40320 : pgstat_tracks_io_object(BackendType bktype, IOObject io_object,
     360             :                         IOContext io_context)
     361             : {
     362             :     bool        no_temp_rel;
     363             : 
     364             :     /*
     365             :      * Some BackendTypes should never track IO statistics.
     366             :      */
     367       40320 :     if (!pgstat_tracks_io_bktype(bktype))
     368           0 :         return false;
     369             : 
     370             :     /*
     371             :      * Currently, IO on temporary relations can only occur in the
     372             :      * IOCONTEXT_NORMAL IOContext.
     373             :      */
     374       40320 :     if (io_context != IOCONTEXT_NORMAL &&
     375             :         io_object == IOOBJECT_TEMP_RELATION)
     376        3360 :         return false;
     377             : 
     378             :     /*
     379             :      * In core Postgres, only regular backends and WAL Sender processes
     380             :      * executing queries will use local buffers and operate on temporary
     381             :      * relations. Parallel workers will not use local buffers (see
     382             :      * InitLocalBuffers()); however, extensions leveraging background workers
     383             :      * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
     384             :      * BackendType B_BG_WORKER.
     385             :      */
     386       34608 :     no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
     387       31696 :         bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
     388       71568 :         bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
     389             : 
     390       36960 :     if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
     391             :         io_object == IOOBJECT_TEMP_RELATION)
     392         672 :         return false;
     393             : 
     394             :     /*
     395             :      * Some BackendTypes do not currently perform any IO in certain
     396             :      * IOContexts, and, while it may not be inherently incorrect for them to
     397             :      * do so, excluding those rows from the view makes the view easier to use.
     398             :      */
     399       36288 :     if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
     400        2464 :         (io_context == IOCONTEXT_BULKREAD ||
     401        2240 :          io_context == IOCONTEXT_BULKWRITE ||
     402             :          io_context == IOCONTEXT_VACUUM))
     403         672 :         return false;
     404             : 
     405       35616 :     if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
     406         112 :         return false;
     407             : 
     408       35504 :     if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
     409             :         io_context == IOCONTEXT_BULKWRITE)
     410         224 :         return false;
     411             : 
     412       35280 :     return true;
     413             : }
     414             : 
     415             : /*
     416             :  * Some BackendTypes will never do certain IOOps and some IOOps should not
     417             :  * occur in certain IOContexts or on certain IOObjects. Check that the given
     418             :  * IOOp is valid for the given BackendType in the given IOContext and on the
     419             :  * given IOObject. Note that there are currently no cases of an IOOp being
     420             :  * invalid for a particular BackendType only within a certain IOContext and/or
     421             :  * only on a certain IOObject.
     422             :  */
     423             : bool
     424       31360 : pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
     425             :                     IOContext io_context, IOOp io_op)
     426             : {
     427             :     bool        strategy_io_context;
     428             : 
     429             :     /* if (io_context, io_object) will never collect stats, we're done */
     430       31360 :     if (!pgstat_tracks_io_object(bktype, io_object, io_context))
     431           0 :         return false;
     432             : 
     433             :     /*
     434             :      * Some BackendTypes will not do certain IOOps.
     435             :      */
     436       31360 :     if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
     437        1568 :         (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
     438         672 :         return false;
     439             : 
     440       30688 :     if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
     441        2912 :          bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
     442         448 :         return false;
     443             : 
     444             :     /*
     445             :      * Temporary tables are not logged and thus do not require fsync'ing.
     446             :      * Writeback is not requested for temporary tables.
     447             :      */
     448       30240 :     if (io_object == IOOBJECT_TEMP_RELATION &&
     449        3136 :         (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
     450         896 :         return false;
     451             : 
     452             :     /*
     453             :      * Some IOOps are not valid in certain IOContexts and some IOOps are only
     454             :      * valid in certain contexts.
     455             :      */
     456       29344 :     if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
     457         784 :         return false;
     458             : 
     459       22288 :     strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
     460       50848 :         io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
     461             : 
     462             :     /*
     463             :      * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
     464             :      */
     465       28560 :     if (!strategy_io_context && io_op == IOOP_REUSE)
     466        1568 :         return false;
     467             : 
     468             :     /*
     469             :      * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
     470             :      * counted in the IOCONTEXT_NORMAL IOContext. See comment in
     471             :      * register_dirty_segment() for more details.
     472             :      */
     473       26992 :     if (strategy_io_context && io_op == IOOP_FSYNC)
     474        2352 :         return false;
     475             : 
     476             : 
     477       24640 :     return true;
     478             : }

Generated by: LCOV version 1.14