Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * aio_io.c 4 : * AIO - Low Level IO Handling 5 : * 6 : * Functions related to associating IO operations to IO Handles and IO-method 7 : * independent support functions for actually performing IO. 8 : * 9 : * 10 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group 11 : * Portions Copyright (c) 1994, Regents of the University of California 12 : * 13 : * IDENTIFICATION 14 : * src/backend/storage/aio/aio_io.c 15 : * 16 : *------------------------------------------------------------------------- 17 : */ 18 : 19 : #include "postgres.h" 20 : 21 : #include "miscadmin.h" 22 : #include "storage/aio.h" 23 : #include "storage/aio_internal.h" 24 : #include "storage/fd.h" 25 : #include "utils/wait_event.h" 26 : 27 : 28 : static void pgaio_io_before_start(PgAioHandle *ioh); 29 : 30 : 31 : 32 : /* -------------------------------------------------------------------------------- 33 : * Public IO related functions operating on IO Handles 34 : * -------------------------------------------------------------------------------- 35 : */ 36 : 37 : /* 38 : * Scatter/gather IO needs to associate an iovec with the Handle. To support 39 : * worker mode this data needs to be in shared memory. 40 : */ 41 : int 42 2423296 : pgaio_io_get_iovec(PgAioHandle *ioh, struct iovec **iov) 43 : { 44 : Assert(ioh->state == PGAIO_HS_HANDED_OUT); 45 : 46 2423296 : *iov = &pgaio_ctl->iovecs[ioh->iovec_off]; 47 : 48 2423296 : return PG_IOV_MAX; 49 : } 50 : 51 : PgAioOp 52 972766 : pgaio_io_get_op(PgAioHandle *ioh) 53 : { 54 972766 : return ioh->op; 55 : } 56 : 57 : PgAioOpData * 58 972766 : pgaio_io_get_op_data(PgAioHandle *ioh) 59 : { 60 972766 : return &ioh->op_data; 61 : } 62 : 63 : 64 : 65 : /* -------------------------------------------------------------------------------- 66 : * "Start" routines for individual IO operations 67 : * 68 : * These are called by the code actually initiating an IO, to associate the IO 69 : * specific data with an AIO handle. 70 : * 71 : * Each of the "start" routines first needs to call pgaio_io_before_start(), 72 : * then fill IO specific fields in the handle and then finally call 73 : * pgaio_io_stage(). 74 : * -------------------------------------------------------------------------------- 75 : */ 76 : 77 : void 78 2423296 : pgaio_io_start_readv(PgAioHandle *ioh, 79 : int fd, int iovcnt, uint64 offset) 80 : { 81 2423296 : pgaio_io_before_start(ioh); 82 : 83 2423296 : ioh->op_data.read.fd = fd; 84 2423296 : ioh->op_data.read.offset = offset; 85 2423296 : ioh->op_data.read.iov_length = iovcnt; 86 : 87 2423296 : pgaio_io_stage(ioh, PGAIO_OP_READV); 88 2423296 : } 89 : 90 : void 91 0 : pgaio_io_start_writev(PgAioHandle *ioh, 92 : int fd, int iovcnt, uint64 offset) 93 : { 94 0 : pgaio_io_before_start(ioh); 95 : 96 0 : ioh->op_data.write.fd = fd; 97 0 : ioh->op_data.write.offset = offset; 98 0 : ioh->op_data.write.iov_length = iovcnt; 99 : 100 0 : pgaio_io_stage(ioh, PGAIO_OP_WRITEV); 101 0 : } 102 : 103 : 104 : 105 : /* -------------------------------------------------------------------------------- 106 : * Internal IO related functions operating on IO Handles 107 : * -------------------------------------------------------------------------------- 108 : */ 109 : 110 : /* 111 : * Execute IO operation synchronously. This is implemented here, not in 112 : * method_sync.c, because other IO methods also might use it / fall back to 113 : * it. 114 : */ 115 : void 116 2213712 : pgaio_io_perform_synchronously(PgAioHandle *ioh) 117 : { 118 2213712 : ssize_t result = 0; 119 2213712 : struct iovec *iov = &pgaio_ctl->iovecs[ioh->iovec_off]; 120 : 121 2213712 : START_CRIT_SECTION(); 122 : 123 : /* Perform IO. */ 124 2213712 : switch (ioh->op) 125 : { 126 2213712 : case PGAIO_OP_READV: 127 2213712 : pgstat_report_wait_start(WAIT_EVENT_DATA_FILE_READ); 128 2213712 : result = pg_preadv(ioh->op_data.read.fd, iov, 129 2213712 : ioh->op_data.read.iov_length, 130 2213712 : ioh->op_data.read.offset); 131 2213712 : pgstat_report_wait_end(); 132 2213712 : break; 133 0 : case PGAIO_OP_WRITEV: 134 0 : pgstat_report_wait_start(WAIT_EVENT_DATA_FILE_WRITE); 135 0 : result = pg_pwritev(ioh->op_data.write.fd, iov, 136 0 : ioh->op_data.write.iov_length, 137 0 : ioh->op_data.write.offset); 138 0 : pgstat_report_wait_end(); 139 0 : break; 140 0 : case PGAIO_OP_INVALID: 141 0 : elog(ERROR, "trying to execute invalid IO operation"); 142 : } 143 : 144 2213712 : ioh->result = result < 0 ? -errno : result; 145 : 146 2213712 : pgaio_io_process_completion(ioh, ioh->result); 147 : 148 2213712 : END_CRIT_SECTION(); 149 2213712 : } 150 : 151 : /* 152 : * Helper function to be called by IO operation preparation functions, before 153 : * any data in the handle is set. Mostly to centralize assertions. 154 : */ 155 : static void 156 2423296 : pgaio_io_before_start(PgAioHandle *ioh) 157 : { 158 : Assert(ioh->state == PGAIO_HS_HANDED_OUT); 159 : Assert(pgaio_my_backend->handed_out_io == ioh); 160 : Assert(pgaio_io_has_target(ioh)); 161 : Assert(ioh->op == PGAIO_OP_INVALID); 162 : 163 : /* 164 : * Otherwise the FDs referenced by the IO could be closed due to interrupt 165 : * processing. 166 : */ 167 : Assert(!INTERRUPTS_CAN_BE_PROCESSED()); 168 2423296 : } 169 : 170 : /* 171 : * Could be made part of the public interface, but it's not clear there's 172 : * really a use case for that. 173 : */ 174 : const char * 175 0 : pgaio_io_get_op_name(PgAioHandle *ioh) 176 : { 177 : Assert(ioh->op >= 0 && ioh->op < PGAIO_OP_COUNT); 178 : 179 0 : switch (ioh->op) 180 : { 181 0 : case PGAIO_OP_INVALID: 182 0 : return "invalid"; 183 0 : case PGAIO_OP_READV: 184 0 : return "read"; 185 0 : case PGAIO_OP_WRITEV: 186 0 : return "write"; 187 : } 188 : 189 0 : return NULL; /* silence compiler */ 190 : } 191 : 192 : /* 193 : * Used to determine if an IO needs to be waited upon before the file 194 : * descriptor can be closed. 195 : */ 196 : bool 197 0 : pgaio_io_uses_fd(PgAioHandle *ioh, int fd) 198 : { 199 : Assert(ioh->state >= PGAIO_HS_DEFINED); 200 : 201 0 : switch (ioh->op) 202 : { 203 0 : case PGAIO_OP_READV: 204 0 : return ioh->op_data.read.fd == fd; 205 0 : case PGAIO_OP_WRITEV: 206 0 : return ioh->op_data.write.fd == fd; 207 0 : case PGAIO_OP_INVALID: 208 0 : return false; 209 : } 210 : 211 0 : return false; /* silence compiler */ 212 : }