LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat_io.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 131 165 79.4 %
Date: 2024-11-21 08:14:44 Functions: 16 17 94.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -------------------------------------------------------------------------
       2             :  *
       3             :  * pgstat_io.c
       4             :  *    Implementation of IO statistics.
       5             :  *
       6             :  * This file contains the implementation of IO statistics. It is kept separate
       7             :  * from pgstat.c to enforce the line between the statistics access / storage
       8             :  * implementation and the details about individual types of statistics.
       9             :  *
      10             :  * Copyright (c) 2021-2024, PostgreSQL Global Development Group
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/utils/activity/pgstat_io.c
      14             :  * -------------------------------------------------------------------------
      15             :  */
      16             : 
      17             : #include "postgres.h"
      18             : 
      19             : #include "executor/instrument.h"
      20             : #include "storage/bufmgr.h"
      21             : #include "utils/pgstat_internal.h"
      22             : 
      23             : 
      24             : typedef struct PgStat_PendingIO
      25             : {
      26             :     PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
      27             :     instr_time  pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
      28             : } PgStat_PendingIO;
      29             : 
      30             : 
      31             : static PgStat_PendingIO PendingIOStats;
      32             : static bool have_iostats = false;
      33             : 
      34             : 
      35             : /*
      36             :  * Check that stats have not been counted for any combination of IOObject,
      37             :  * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
      38             :  * stats are tracked for this combination and IO times are non-zero, counts
      39             :  * should be non-zero.
      40             :  *
      41             :  * The passed-in PgStat_BktypeIO must contain stats from the BackendType
      42             :  * specified by the second parameter. Caller is responsible for locking the
      43             :  * passed-in PgStat_BktypeIO, if needed.
      44             :  */
      45             : bool
      46           0 : pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
      47             :                              BackendType bktype)
      48             : {
      49           0 :     for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
      50             :     {
      51           0 :         for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
      52             :         {
      53           0 :             for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
      54             :             {
      55             :                 /* we do track it */
      56           0 :                 if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
      57             :                 {
      58             :                     /* ensure that if IO times are non-zero, counts are > 0 */
      59           0 :                     if (backend_io->times[io_object][io_context][io_op] != 0 &&
      60           0 :                         backend_io->counts[io_object][io_context][io_op] <= 0)
      61           0 :                         return false;
      62             : 
      63           0 :                     continue;
      64             :                 }
      65             : 
      66             :                 /* we don't track it, and it is not 0 */
      67           0 :                 if (backend_io->counts[io_object][io_context][io_op] != 0)
      68           0 :                     return false;
      69             :             }
      70             :         }
      71             :     }
      72             : 
      73           0 :     return true;
      74             : }
      75             : 
      76             : void
      77   108011756 : pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
      78             : {
      79   108011756 :     pgstat_count_io_op_n(io_object, io_context, io_op, 1);
      80   108011756 : }
      81             : 
      82             : void
      83   111440840 : pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
      84             : {
      85             :     Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
      86             :     Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
      87             :     Assert((unsigned int) io_op < IOOP_NUM_TYPES);
      88             :     Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
      89             : 
      90   111440840 :     PendingIOStats.counts[io_object][io_context][io_op] += cnt;
      91             : 
      92   111440840 :     have_iostats = true;
      93   111440840 : }
      94             : 
      95             : /*
      96             :  * Initialize the internal timing for an IO operation, depending on an
      97             :  * IO timing GUC.
      98             :  */
      99             : instr_time
     100     3429114 : pgstat_prepare_io_time(bool track_io_guc)
     101             : {
     102             :     instr_time  io_start;
     103             : 
     104     3429114 :     if (track_io_guc)
     105           0 :         INSTR_TIME_SET_CURRENT(io_start);
     106             :     else
     107             :     {
     108             :         /*
     109             :          * There is no need to set io_start when an IO timing GUC is disabled,
     110             :          * still initialize it to zero to avoid compiler warnings.
     111             :          */
     112     3429114 :         INSTR_TIME_SET_ZERO(io_start);
     113             :     }
     114             : 
     115     3429114 :     return io_start;
     116             : }
     117             : 
     118             : /*
     119             :  * Like pgstat_count_io_op_n() except it also accumulates time.
     120             :  */
     121             : void
     122     3429084 : pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
     123             :                         instr_time start_time, uint32 cnt)
     124             : {
     125     3429084 :     if (track_io_timing)
     126             :     {
     127             :         instr_time  io_time;
     128             : 
     129           0 :         INSTR_TIME_SET_CURRENT(io_time);
     130           0 :         INSTR_TIME_SUBTRACT(io_time, start_time);
     131             : 
     132           0 :         if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND)
     133             :         {
     134           0 :             pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
     135           0 :             if (io_object == IOOBJECT_RELATION)
     136           0 :                 INSTR_TIME_ADD(pgBufferUsage.shared_blk_write_time, io_time);
     137           0 :             else if (io_object == IOOBJECT_TEMP_RELATION)
     138           0 :                 INSTR_TIME_ADD(pgBufferUsage.local_blk_write_time, io_time);
     139             :         }
     140           0 :         else if (io_op == IOOP_READ)
     141             :         {
     142           0 :             pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
     143           0 :             if (io_object == IOOBJECT_RELATION)
     144           0 :                 INSTR_TIME_ADD(pgBufferUsage.shared_blk_read_time, io_time);
     145           0 :             else if (io_object == IOOBJECT_TEMP_RELATION)
     146           0 :                 INSTR_TIME_ADD(pgBufferUsage.local_blk_read_time, io_time);
     147             :         }
     148             : 
     149           0 :         INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
     150             :                        io_time);
     151             :     }
     152             : 
     153     3429084 :     pgstat_count_io_op_n(io_object, io_context, io_op, cnt);
     154     3429084 : }
     155             : 
     156             : PgStat_IO *
     157         112 : pgstat_fetch_stat_io(void)
     158             : {
     159         112 :     pgstat_snapshot_fixed(PGSTAT_KIND_IO);
     160             : 
     161         112 :     return &pgStatLocal.snapshot.io;
     162             : }
     163             : 
     164             : /*
     165             :  * Check if there any IO stats waiting for flush.
     166             :  */
     167             : bool
     168       13266 : pgstat_io_have_pending_cb(void)
     169             : {
     170       13266 :     return have_iostats;
     171             : }
     172             : 
     173             : /*
     174             :  * Simpler wrapper of pgstat_io_flush_cb()
     175             :  */
     176             : void
     177      171200 : pgstat_flush_io(bool nowait)
     178             : {
     179      171200 :     (void) pgstat_io_flush_cb(nowait);
     180      171200 : }
     181             : 
     182             : /*
     183             :  * Flush out locally pending IO statistics
     184             :  *
     185             :  * If no stats have been recorded, this function returns false.
     186             :  *
     187             :  * If nowait is true, this function returns true if the lock could not be
     188             :  * acquired. Otherwise, return false.
     189             :  */
     190             : bool
     191      233402 : pgstat_io_flush_cb(bool nowait)
     192             : {
     193             :     LWLock     *bktype_lock;
     194             :     PgStat_BktypeIO *bktype_shstats;
     195             : 
     196      233402 :     if (!have_iostats)
     197       54794 :         return false;
     198             : 
     199      178608 :     bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
     200      178608 :     bktype_shstats =
     201      178608 :         &pgStatLocal.shmem->io.stats.stats[MyBackendType];
     202             : 
     203      178608 :     if (!nowait)
     204      152080 :         LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
     205       26528 :     else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
     206           0 :         return true;
     207             : 
     208      535824 :     for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
     209             :     {
     210     1786080 :         for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
     211             :         {
     212    12859776 :             for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
     213             :             {
     214             :                 instr_time  time;
     215             : 
     216    11430912 :                 bktype_shstats->counts[io_object][io_context][io_op] +=
     217    11430912 :                     PendingIOStats.counts[io_object][io_context][io_op];
     218             : 
     219    11430912 :                 time = PendingIOStats.pending_times[io_object][io_context][io_op];
     220             : 
     221    11430912 :                 bktype_shstats->times[io_object][io_context][io_op] +=
     222    11430912 :                     INSTR_TIME_GET_MICROSEC(time);
     223             :             }
     224             :         }
     225             :     }
     226             : 
     227             :     Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType));
     228             : 
     229      178608 :     LWLockRelease(bktype_lock);
     230             : 
     231      178608 :     memset(&PendingIOStats, 0, sizeof(PendingIOStats));
     232             : 
     233      178608 :     have_iostats = false;
     234             : 
     235      178608 :     return false;
     236             : }
     237             : 
     238             : const char *
     239        8960 : pgstat_get_io_context_name(IOContext io_context)
     240             : {
     241        8960 :     switch (io_context)
     242             :     {
     243        2240 :         case IOCONTEXT_BULKREAD:
     244        2240 :             return "bulkread";
     245        2240 :         case IOCONTEXT_BULKWRITE:
     246        2240 :             return "bulkwrite";
     247        2240 :         case IOCONTEXT_NORMAL:
     248        2240 :             return "normal";
     249        2240 :         case IOCONTEXT_VACUUM:
     250        2240 :             return "vacuum";
     251             :     }
     252             : 
     253           0 :     elog(ERROR, "unrecognized IOContext value: %d", io_context);
     254             :     pg_unreachable();
     255             : }
     256             : 
     257             : const char *
     258        2240 : pgstat_get_io_object_name(IOObject io_object)
     259             : {
     260        2240 :     switch (io_object)
     261             :     {
     262        1120 :         case IOOBJECT_RELATION:
     263        1120 :             return "relation";
     264        1120 :         case IOOBJECT_TEMP_RELATION:
     265        1120 :             return "temp relation";
     266             :     }
     267             : 
     268           0 :     elog(ERROR, "unrecognized IOObject value: %d", io_object);
     269             :     pg_unreachable();
     270             : }
     271             : 
     272             : void
     273        1902 : pgstat_io_init_shmem_cb(void *stats)
     274             : {
     275        1902 :     PgStatShared_IO *stat_shmem = (PgStatShared_IO *) stats;
     276             : 
     277       34236 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     278       32334 :         LWLockInitialize(&stat_shmem->locks[i], LWTRANCHE_PGSTATS_DATA);
     279        1902 : }
     280             : 
     281             : void
     282         464 : pgstat_io_reset_all_cb(TimestampTz ts)
     283             : {
     284        8352 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     285             :     {
     286        7888 :         LWLock     *bktype_lock = &pgStatLocal.shmem->io.locks[i];
     287        7888 :         PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
     288             : 
     289        7888 :         LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
     290             : 
     291             :         /*
     292             :          * Use the lock in the first BackendType's PgStat_BktypeIO to protect
     293             :          * the reset timestamp as well.
     294             :          */
     295        7888 :         if (i == 0)
     296         464 :             pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts;
     297             : 
     298        7888 :         memset(bktype_shstats, 0, sizeof(*bktype_shstats));
     299        7888 :         LWLockRelease(bktype_lock);
     300             :     }
     301         464 : }
     302             : 
     303             : void
     304        1246 : pgstat_io_snapshot_cb(void)
     305             : {
     306       22428 :     for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     307             :     {
     308       21182 :         LWLock     *bktype_lock = &pgStatLocal.shmem->io.locks[i];
     309       21182 :         PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
     310       21182 :         PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
     311             : 
     312       21182 :         LWLockAcquire(bktype_lock, LW_SHARED);
     313             : 
     314             :         /*
     315             :          * Use the lock in the first BackendType's PgStat_BktypeIO to protect
     316             :          * the reset timestamp as well.
     317             :          */
     318       21182 :         if (i == 0)
     319        1246 :             pgStatLocal.snapshot.io.stat_reset_timestamp =
     320        1246 :                 pgStatLocal.shmem->io.stats.stat_reset_timestamp;
     321             : 
     322             :         /* using struct assignment due to better type safety */
     323       21182 :         *bktype_snap = *bktype_shstats;
     324       21182 :         LWLockRelease(bktype_lock);
     325             :     }
     326        1246 : }
     327             : 
     328             : /*
     329             : * IO statistics are not collected for all BackendTypes.
     330             : *
     331             : * The following BackendTypes do not participate in the cumulative stats
     332             : * subsystem or do not perform IO on which we currently track:
     333             : * - Dead-end backend because it is not connected to shared memory and
     334             : *   doesn't do any IO
     335             : * - Syslogger because it is not connected to shared memory
     336             : * - Archiver because most relevant archiving IO is delegated to a
     337             : *   specialized command or module
     338             : * - WAL Receiver, WAL Writer, and WAL Summarizer IO are not tracked in
     339             : *   pg_stat_io for now
     340             : *
     341             : * Function returns true if BackendType participates in the cumulative stats
     342             : * subsystem for IO and false if it does not.
     343             : *
     344             : * When adding a new BackendType, also consider adding relevant restrictions to
     345             : * pgstat_tracks_io_object() and pgstat_tracks_io_op().
     346             : */
     347             : bool
     348       42224 : pgstat_tracks_io_bktype(BackendType bktype)
     349             : {
     350             :     /*
     351             :      * List every type so that new backend types trigger a warning about
     352             :      * needing to adjust this switch.
     353             :      */
     354       42224 :     switch (bktype)
     355             :     {
     356         784 :         case B_INVALID:
     357             :         case B_DEAD_END_BACKEND:
     358             :         case B_ARCHIVER:
     359             :         case B_LOGGER:
     360             :         case B_WAL_RECEIVER:
     361             :         case B_WAL_WRITER:
     362             :         case B_WAL_SUMMARIZER:
     363         784 :             return false;
     364             : 
     365       41440 :         case B_AUTOVAC_LAUNCHER:
     366             :         case B_AUTOVAC_WORKER:
     367             :         case B_BACKEND:
     368             :         case B_BG_WORKER:
     369             :         case B_BG_WRITER:
     370             :         case B_CHECKPOINTER:
     371             :         case B_SLOTSYNC_WORKER:
     372             :         case B_STANDALONE_BACKEND:
     373             :         case B_STARTUP:
     374             :         case B_WAL_SENDER:
     375       41440 :             return true;
     376             :     }
     377             : 
     378           0 :     return false;
     379             : }
     380             : 
     381             : /*
     382             :  * Some BackendTypes do not perform IO on certain IOObjects or in certain
     383             :  * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
     384             :  * that the given BackendType is expected to do IO in the given IOContext and
     385             :  * on the given IOObject and that the given IOObject is expected to be operated
     386             :  * on in the given IOContext.
     387             :  */
     388             : bool
     389       40320 : pgstat_tracks_io_object(BackendType bktype, IOObject io_object,
     390             :                         IOContext io_context)
     391             : {
     392             :     bool        no_temp_rel;
     393             : 
     394             :     /*
     395             :      * Some BackendTypes should never track IO statistics.
     396             :      */
     397       40320 :     if (!pgstat_tracks_io_bktype(bktype))
     398           0 :         return false;
     399             : 
     400             :     /*
     401             :      * Currently, IO on temporary relations can only occur in the
     402             :      * IOCONTEXT_NORMAL IOContext.
     403             :      */
     404       40320 :     if (io_context != IOCONTEXT_NORMAL &&
     405             :         io_object == IOOBJECT_TEMP_RELATION)
     406        3360 :         return false;
     407             : 
     408             :     /*
     409             :      * In core Postgres, only regular backends and WAL Sender processes
     410             :      * executing queries will use local buffers and operate on temporary
     411             :      * relations. Parallel workers will not use local buffers (see
     412             :      * InitLocalBuffers()); however, extensions leveraging background workers
     413             :      * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
     414             :      * BackendType B_BG_WORKER.
     415             :      */
     416       34608 :     no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
     417       31696 :         bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
     418       71568 :         bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
     419             : 
     420       36960 :     if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
     421             :         io_object == IOOBJECT_TEMP_RELATION)
     422         672 :         return false;
     423             : 
     424             :     /*
     425             :      * Some BackendTypes do not currently perform any IO in certain
     426             :      * IOContexts, and, while it may not be inherently incorrect for them to
     427             :      * do so, excluding those rows from the view makes the view easier to use.
     428             :      */
     429       36288 :     if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
     430        2464 :         (io_context == IOCONTEXT_BULKREAD ||
     431        2240 :          io_context == IOCONTEXT_BULKWRITE ||
     432             :          io_context == IOCONTEXT_VACUUM))
     433         672 :         return false;
     434             : 
     435       35616 :     if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
     436         112 :         return false;
     437             : 
     438       35504 :     if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
     439             :         io_context == IOCONTEXT_BULKWRITE)
     440         224 :         return false;
     441             : 
     442       35280 :     return true;
     443             : }
     444             : 
     445             : /*
     446             :  * Some BackendTypes will never do certain IOOps and some IOOps should not
     447             :  * occur in certain IOContexts or on certain IOObjects. Check that the given
     448             :  * IOOp is valid for the given BackendType in the given IOContext and on the
     449             :  * given IOObject. Note that there are currently no cases of an IOOp being
     450             :  * invalid for a particular BackendType only within a certain IOContext and/or
     451             :  * only on a certain IOObject.
     452             :  */
     453             : bool
     454       31360 : pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
     455             :                     IOContext io_context, IOOp io_op)
     456             : {
     457             :     bool        strategy_io_context;
     458             : 
     459             :     /* if (io_context, io_object) will never collect stats, we're done */
     460       31360 :     if (!pgstat_tracks_io_object(bktype, io_object, io_context))
     461           0 :         return false;
     462             : 
     463             :     /*
     464             :      * Some BackendTypes will not do certain IOOps.
     465             :      */
     466       31360 :     if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
     467        1568 :         (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
     468         672 :         return false;
     469             : 
     470       30688 :     if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
     471        2912 :          bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
     472         448 :         return false;
     473             : 
     474             :     /*
     475             :      * Temporary tables are not logged and thus do not require fsync'ing.
     476             :      * Writeback is not requested for temporary tables.
     477             :      */
     478       30240 :     if (io_object == IOOBJECT_TEMP_RELATION &&
     479        3136 :         (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
     480         896 :         return false;
     481             : 
     482             :     /*
     483             :      * Some IOOps are not valid in certain IOContexts and some IOOps are only
     484             :      * valid in certain contexts.
     485             :      */
     486       29344 :     if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
     487         784 :         return false;
     488             : 
     489       22288 :     strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
     490       50848 :         io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
     491             : 
     492             :     /*
     493             :      * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
     494             :      */
     495       28560 :     if (!strategy_io_context && io_op == IOOP_REUSE)
     496        1568 :         return false;
     497             : 
     498             :     /*
     499             :      * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
     500             :      * counted in the IOCONTEXT_NORMAL IOContext. See comment in
     501             :      * register_dirty_segment() for more details.
     502             :      */
     503       26992 :     if (strategy_io_context && io_op == IOOP_FSYNC)
     504        2352 :         return false;
     505             : 
     506             : 
     507       24640 :     return true;
     508             : }

Generated by: LCOV version 1.14