Line data Source code
1 : /*
2 : * slru_io.c
3 : *
4 : * Routines for reading and writing SLRU files during upgrade.
5 : *
6 : * Copyright (c) 2025, PostgreSQL Global Development Group
7 : * src/bin/pg_upgrade/slru_io.c
8 : */
9 :
10 : #include "postgres_fe.h"
11 :
12 : #include <fcntl.h>
13 :
14 : #include "common/fe_memutils.h"
15 : #include "common/file_perm.h"
16 : #include "common/file_utils.h"
17 : #include "pg_upgrade.h"
18 : #include "port/pg_iovec.h"
19 : #include "slru_io.h"
20 :
21 : static SlruSegState *AllocSlruSegState(const char *dir);
22 : static char *SlruFileName(SlruSegState *state, int64 segno);
23 : static void SlruFlush(SlruSegState *state);
24 :
25 : /* common parts of AllocSlruRead and AllocSlruWrite */
26 : static SlruSegState *
27 0 : AllocSlruSegState(const char *dir)
28 : {
29 0 : SlruSegState *state = pg_malloc(sizeof(*state));
30 :
31 0 : state->dir = pstrdup(dir);
32 0 : state->fn = NULL;
33 0 : state->fd = -1;
34 0 : state->segno = -1;
35 0 : state->pageno = 0;
36 :
37 : /* state->writing and state->long_segment_names must be set by caller! */
38 :
39 0 : return state;
40 : }
41 :
42 : /* similar to the backend function with the same name */
43 : static char *
44 0 : SlruFileName(SlruSegState *state, int64 segno)
45 : {
46 0 : if (state->long_segment_names)
47 : {
48 : Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
49 0 : return psprintf("%s/%015" PRIX64, state->dir, segno);
50 : }
51 : else
52 : {
53 : Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
54 0 : return psprintf("%s/%04X", state->dir, (unsigned int) segno);
55 : }
56 : }
57 :
58 : /*
59 : * Create SLRU reader for dir.
60 : */
61 : SlruSegState *
62 0 : AllocSlruRead(const char *dir, bool long_segment_names)
63 : {
64 0 : SlruSegState *state = AllocSlruSegState(dir);
65 :
66 0 : state->writing = false;
67 0 : state->long_segment_names = long_segment_names;
68 :
69 0 : return state;
70 : }
71 :
72 : /*
73 : * Read the given page into memory buffer.
74 : *
75 : * Reading can be done in random order.
76 : *
77 : * If the file containing 'pageno' does not exist, a fatal error is raised.
78 : * If the file exists but is shorter than expected, the missing part is read
79 : * as zeros and a warning is logged. That is reasonable behavior for current
80 : * callers.
81 : *
82 : * This is the slow path of the inlineable SlruReadSwitchPage() function.
83 : */
84 : char *
85 0 : SlruReadSwitchPageSlow(SlruSegState *state, uint64 pageno)
86 : {
87 : int64 segno;
88 : off_t offset;
89 : ssize_t bytes_read;
90 :
91 : Assert(!state->writing); /* read only mode */
92 :
93 0 : if (state->segno != -1 && pageno == state->pageno)
94 0 : return state->buf.data;
95 :
96 : /* If the new page is on a different SLRU segment, open the new segment */
97 0 : segno = pageno / SLRU_PAGES_PER_SEGMENT;
98 0 : if (segno != state->segno)
99 : {
100 0 : if (state->segno != -1)
101 : {
102 0 : close(state->fd);
103 0 : state->fd = -1;
104 :
105 0 : pg_free(state->fn);
106 0 : state->fn = NULL;
107 :
108 0 : state->segno = -1;
109 : }
110 :
111 0 : state->fn = SlruFileName(state, segno);
112 0 : if ((state->fd = open(state->fn, O_RDONLY | PG_BINARY, 0)) < 0)
113 0 : pg_fatal("could not open file \"%s\": %m", state->fn);
114 0 : state->segno = segno;
115 : }
116 :
117 0 : offset = (pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
118 0 : bytes_read = 0;
119 0 : while (bytes_read < BLCKSZ)
120 : {
121 : ssize_t rc;
122 :
123 0 : rc = pg_pread(state->fd,
124 0 : &state->buf.data + bytes_read,
125 0 : BLCKSZ - bytes_read,
126 : offset);
127 0 : if (rc < 0)
128 : {
129 0 : if (errno == EINTR)
130 0 : continue;
131 0 : pg_fatal("could not read file \"%s\": %m", state->fn);
132 : }
133 0 : if (rc == 0)
134 : {
135 : /* unexpected EOF */
136 0 : pg_log(PG_WARNING, "unexpected EOF reading file \"%s\" at offset %u, reading as zeros",
137 : state->fn, (unsigned int) offset);
138 0 : memset(&state->buf.data + bytes_read, 0, BLCKSZ - bytes_read);
139 0 : break;
140 : }
141 0 : bytes_read += rc;
142 0 : offset += rc;
143 : }
144 0 : state->pageno = pageno;
145 :
146 0 : return state->buf.data;
147 : }
148 :
149 : /*
150 : * Free the reader.
151 : */
152 : void
153 0 : FreeSlruRead(SlruSegState *state)
154 : {
155 : Assert(!state->writing); /* read only mode */
156 :
157 0 : if (state->fd != -1)
158 0 : close(state->fd);
159 0 : pg_free(state);
160 0 : }
161 :
162 : /*
163 : * Create SLRU writer for dir.
164 : */
165 : SlruSegState *
166 0 : AllocSlruWrite(const char *dir, bool long_segment_names)
167 : {
168 0 : SlruSegState *state = AllocSlruSegState(dir);
169 :
170 0 : state->writing = true;
171 0 : state->long_segment_names = long_segment_names;
172 :
173 0 : return state;
174 : }
175 :
176 : /*
177 : * Open the given page for writing.
178 : *
179 : * NOTE: This uses O_EXCL when stepping to a new segment, so this assumes that
180 : * each segment is written in full before moving on to the next one. This
181 : * limitation would be easy to lift if needed, but it fits the usage pattern
182 : * of current callers.
183 : *
184 : * This is the slow path of the inlineable SlruWriteSwitchPage() function.
185 : */
186 : char *
187 0 : SlruWriteSwitchPageSlow(SlruSegState *state, uint64 pageno)
188 : {
189 : int64 segno;
190 : off_t offset;
191 :
192 : Assert(state->writing);
193 :
194 0 : if (state->segno != -1 && pageno == state->pageno)
195 0 : return state->buf.data;
196 :
197 0 : segno = pageno / SLRU_PAGES_PER_SEGMENT;
198 0 : offset = (pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
199 :
200 0 : SlruFlush(state);
201 0 : memset(state->buf.data, 0, BLCKSZ);
202 :
203 0 : if (segno != state->segno)
204 : {
205 0 : if (state->segno != -1)
206 : {
207 0 : close(state->fd);
208 0 : state->fd = -1;
209 :
210 0 : pg_free(state->fn);
211 0 : state->fn = NULL;
212 :
213 0 : state->segno = -1;
214 : }
215 :
216 : /* Create the segment */
217 0 : state->fn = SlruFileName(state, segno);
218 0 : if ((state->fd = open(state->fn, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
219 : pg_file_create_mode)) < 0)
220 : {
221 0 : pg_fatal("could not create file \"%s\": %m", state->fn);
222 : }
223 :
224 0 : state->segno = segno;
225 :
226 0 : if (offset > 0)
227 : {
228 0 : if (pg_pwrite_zeros(state->fd, offset, 0) < 0)
229 0 : pg_fatal("could not write file \"%s\": %m", state->fn);
230 : }
231 : }
232 :
233 0 : state->pageno = pageno;
234 :
235 0 : return state->buf.data;
236 : }
237 :
238 : static void
239 0 : SlruFlush(SlruSegState *state)
240 : {
241 0 : struct iovec iovec = {
242 0 : .iov_base = &state->buf,
243 : .iov_len = BLCKSZ,
244 : };
245 : off_t offset;
246 :
247 0 : if (state->segno == -1)
248 0 : return;
249 :
250 0 : offset = (state->pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
251 :
252 0 : if (pg_pwritev_with_retry(state->fd, &iovec, 1, offset) < 0)
253 0 : pg_fatal("could not write file \"%s\": %m", state->fn);
254 : }
255 :
256 : /*
257 : * Free the writer.
258 : */
259 : void
260 0 : FreeSlruWrite(SlruSegState *state)
261 : {
262 : Assert(state->writing);
263 :
264 0 : SlruFlush(state);
265 :
266 0 : if (state->fd != -1)
267 0 : close(state->fd);
268 0 : pg_free(state);
269 0 : }
|