Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * aio_io.c
4 : * AIO - Low Level IO Handling
5 : *
6 : * Functions related to associating IO operations to IO Handles and IO-method
7 : * independent support functions for actually performing IO.
8 : *
9 : *
10 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
11 : * Portions Copyright (c) 1994, Regents of the University of California
12 : *
13 : * IDENTIFICATION
14 : * src/backend/storage/aio/aio_io.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 :
19 : #include "postgres.h"
20 :
21 : #include "miscadmin.h"
22 : #include "storage/aio.h"
23 : #include "storage/aio_internal.h"
24 : #include "storage/fd.h"
25 : #include "utils/wait_event.h"
26 :
27 :
28 : static void pgaio_io_before_start(PgAioHandle *ioh);
29 :
30 :
31 :
32 : /* --------------------------------------------------------------------------------
33 : * Public IO related functions operating on IO Handles
34 : * --------------------------------------------------------------------------------
35 : */
36 :
37 : /*
38 : * Scatter/gather IO needs to associate an iovec with the Handle. To support
39 : * worker mode this data needs to be in shared memory.
40 : */
41 : int
42 1316121 : pgaio_io_get_iovec(PgAioHandle *ioh, struct iovec **iov)
43 : {
44 : Assert(ioh->state == PGAIO_HS_HANDED_OUT);
45 :
46 1316121 : *iov = &pgaio_ctl->iovecs[ioh->iovec_off];
47 :
48 1316121 : return PG_IOV_MAX;
49 : }
50 :
51 : PgAioOp
52 480945 : pgaio_io_get_op(PgAioHandle *ioh)
53 : {
54 480945 : return ioh->op;
55 : }
56 :
57 : PgAioOpData *
58 480945 : pgaio_io_get_op_data(PgAioHandle *ioh)
59 : {
60 480945 : return &ioh->op_data;
61 : }
62 :
63 :
64 :
65 : /* --------------------------------------------------------------------------------
66 : * "Start" routines for individual IO operations
67 : *
68 : * These are called by the code actually initiating an IO, to associate the IO
69 : * specific data with an AIO handle.
70 : *
71 : * Each of the "start" routines first needs to call pgaio_io_before_start(),
72 : * then fill IO specific fields in the handle and then finally call
73 : * pgaio_io_stage().
74 : * --------------------------------------------------------------------------------
75 : */
76 :
77 : void
78 1316121 : pgaio_io_start_readv(PgAioHandle *ioh,
79 : int fd, int iovcnt, uint64 offset)
80 : {
81 1316121 : pgaio_io_before_start(ioh);
82 :
83 1316121 : ioh->op_data.read.fd = fd;
84 1316121 : ioh->op_data.read.offset = offset;
85 1316121 : ioh->op_data.read.iov_length = iovcnt;
86 :
87 1316121 : pgaio_io_stage(ioh, PGAIO_OP_READV);
88 1316121 : }
89 :
90 : void
91 0 : pgaio_io_start_writev(PgAioHandle *ioh,
92 : int fd, int iovcnt, uint64 offset)
93 : {
94 0 : pgaio_io_before_start(ioh);
95 :
96 0 : ioh->op_data.write.fd = fd;
97 0 : ioh->op_data.write.offset = offset;
98 0 : ioh->op_data.write.iov_length = iovcnt;
99 :
100 0 : pgaio_io_stage(ioh, PGAIO_OP_WRITEV);
101 0 : }
102 :
103 :
104 :
105 : /* --------------------------------------------------------------------------------
106 : * Internal IO related functions operating on IO Handles
107 : * --------------------------------------------------------------------------------
108 : */
109 :
110 : /*
111 : * Execute IO operation synchronously. This is implemented here, not in
112 : * method_sync.c, because other IO methods also might use it / fall back to
113 : * it.
114 : */
115 : void
116 1210123 : pgaio_io_perform_synchronously(PgAioHandle *ioh)
117 : {
118 1210123 : ssize_t result = 0;
119 1210123 : struct iovec *iov = &pgaio_ctl->iovecs[ioh->iovec_off];
120 :
121 1210123 : START_CRIT_SECTION();
122 :
123 : /* Perform IO. */
124 1210123 : switch ((PgAioOp) ioh->op)
125 : {
126 1210123 : case PGAIO_OP_READV:
127 1210123 : pgstat_report_wait_start(WAIT_EVENT_DATA_FILE_READ);
128 1210123 : result = pg_preadv(ioh->op_data.read.fd, iov,
129 1210123 : ioh->op_data.read.iov_length,
130 1210123 : ioh->op_data.read.offset);
131 1210123 : pgstat_report_wait_end();
132 1210123 : break;
133 0 : case PGAIO_OP_WRITEV:
134 0 : pgstat_report_wait_start(WAIT_EVENT_DATA_FILE_WRITE);
135 0 : result = pg_pwritev(ioh->op_data.write.fd, iov,
136 0 : ioh->op_data.write.iov_length,
137 0 : ioh->op_data.write.offset);
138 0 : pgstat_report_wait_end();
139 0 : break;
140 0 : case PGAIO_OP_INVALID:
141 0 : elog(ERROR, "trying to execute invalid IO operation");
142 : }
143 :
144 1210123 : ioh->result = result < 0 ? -errno : result;
145 :
146 1210123 : pgaio_io_process_completion(ioh, ioh->result);
147 :
148 1210123 : END_CRIT_SECTION();
149 1210123 : }
150 :
151 : /*
152 : * Helper function to be called by IO operation preparation functions, before
153 : * any data in the handle is set. Mostly to centralize assertions.
154 : */
155 : static void
156 1316121 : pgaio_io_before_start(PgAioHandle *ioh)
157 : {
158 : Assert(ioh->state == PGAIO_HS_HANDED_OUT);
159 : Assert(pgaio_my_backend->handed_out_io == ioh);
160 : Assert(pgaio_io_has_target(ioh));
161 : Assert(ioh->op == PGAIO_OP_INVALID);
162 :
163 : /*
164 : * Otherwise the FDs referenced by the IO could be closed due to interrupt
165 : * processing.
166 : */
167 : Assert(!INTERRUPTS_CAN_BE_PROCESSED());
168 1316121 : }
169 :
170 : /*
171 : * Could be made part of the public interface, but it's not clear there's
172 : * really a use case for that.
173 : */
174 : const char *
175 7238 : pgaio_io_get_op_name(PgAioHandle *ioh)
176 : {
177 : Assert(ioh->op >= 0 && ioh->op < PGAIO_OP_COUNT);
178 :
179 7238 : switch ((PgAioOp) ioh->op)
180 : {
181 2386 : case PGAIO_OP_INVALID:
182 2386 : return "invalid";
183 4852 : case PGAIO_OP_READV:
184 4852 : return "readv";
185 0 : case PGAIO_OP_WRITEV:
186 0 : return "writev";
187 : }
188 :
189 0 : return NULL; /* silence compiler */
190 : }
191 :
192 : /*
193 : * Used to determine if an IO needs to be waited upon before the file
194 : * descriptor can be closed.
195 : */
196 : bool
197 0 : pgaio_io_uses_fd(PgAioHandle *ioh, int fd)
198 : {
199 : Assert(ioh->state >= PGAIO_HS_DEFINED);
200 :
201 0 : switch ((PgAioOp) ioh->op)
202 : {
203 0 : case PGAIO_OP_READV:
204 0 : return ioh->op_data.read.fd == fd;
205 0 : case PGAIO_OP_WRITEV:
206 0 : return ioh->op_data.write.fd == fd;
207 0 : case PGAIO_OP_INVALID:
208 0 : return false;
209 : }
210 :
211 0 : return false; /* silence compiler */
212 : }
213 :
214 : /*
215 : * Return the iovec and its length. Currently only expected to be used by
216 : * debugging infrastructure
217 : */
218 : int
219 0 : pgaio_io_get_iovec_length(PgAioHandle *ioh, struct iovec **iov)
220 : {
221 : Assert(ioh->state >= PGAIO_HS_DEFINED);
222 :
223 0 : *iov = &pgaio_ctl->iovecs[ioh->iovec_off];
224 :
225 0 : switch ((PgAioOp) ioh->op)
226 : {
227 0 : case PGAIO_OP_READV:
228 0 : return ioh->op_data.read.iov_length;
229 0 : case PGAIO_OP_WRITEV:
230 0 : return ioh->op_data.write.iov_length;
231 0 : default:
232 0 : pg_unreachable();
233 : return 0;
234 : }
235 : }
|