Line data Source code
1 : /*
2 : * multixact_read_v18.c
3 : *
4 : * Functions to read multixact SLRUs from clusters of PostgreSQL version 18
5 : * and older. In version 19, the multixid offsets were expanded from 32 to 64
6 : * bits.
7 : *
8 : * Copyright (c) 2025-2026, PostgreSQL Global Development Group
9 : * src/bin/pg_upgrade/multixact_read_v18.c
10 : */
11 :
12 : #include "postgres_fe.h"
13 :
14 : #include "multixact_read_v18.h"
15 : #include "pg_upgrade.h"
16 :
17 : /*
18 : * NOTE: below are a bunch of definitions that are copy-pasted from
19 : * multixact.c from version 18. It's important that this file doesn't
20 : * #include the new definitions with same names from "multixact_internal.h"!
21 : *
22 : * To further avoid confusion in the functions exposed outside this source
23 : * file, we use MultiXactOffset32 to represent the old-style 32-bit multixid
24 : * offsets. The new 64-bit MultiXactOffset should not be used anywhere in
25 : * this file.
26 : */
27 : #ifdef MULTIXACT_INTERNAL_H
28 : #error multixact_internal.h should not be included in multixact_read_v18.c
29 : #endif
30 : #define MultiXactOffset should_not_be_used
31 :
32 : /* We need four bytes per offset and 8 bytes per base for each page. */
33 : #define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset32))
34 :
35 : static inline int64
36 0 : MultiXactIdToOffsetPage(MultiXactId multi)
37 : {
38 0 : return multi / MULTIXACT_OFFSETS_PER_PAGE;
39 : }
40 :
41 : static inline int
42 0 : MultiXactIdToOffsetEntry(MultiXactId multi)
43 : {
44 0 : return multi % MULTIXACT_OFFSETS_PER_PAGE;
45 : }
46 :
47 : /*
48 : * The situation for members is a bit more complex: we store one byte of
49 : * additional flag bits for each TransactionId. To do this without getting
50 : * into alignment issues, we store four bytes of flags, and then the
51 : * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
52 : * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
53 : * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
54 : * performance) trumps space efficiency here.
55 : *
56 : * Note that the "offset" macros work with byte offset, not array indexes, so
57 : * arithmetic must be done using "char *" pointers.
58 : */
59 : /* We need eight bits per xact, so one xact fits in a byte */
60 : #define MXACT_MEMBER_BITS_PER_XACT 8
61 : #define MXACT_MEMBER_FLAGS_PER_BYTE 1
62 : #define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
63 :
64 : /* how many full bytes of flags are there in a group? */
65 : #define MULTIXACT_FLAGBYTES_PER_GROUP 4
66 : #define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
67 : (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
68 : /* size in bytes of a complete group */
69 : #define MULTIXACT_MEMBERGROUP_SIZE \
70 : (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
71 : #define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
72 : #define MULTIXACT_MEMBERS_PER_PAGE \
73 : (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
74 :
75 : /* page in which a member is to be found */
76 : static inline int64
77 0 : MXOffsetToMemberPage(MultiXactOffset32 offset)
78 : {
79 0 : return offset / MULTIXACT_MEMBERS_PER_PAGE;
80 : }
81 :
82 : /* Location (byte offset within page) of flag word for a given member */
83 : static inline int
84 0 : MXOffsetToFlagsOffset(MultiXactOffset32 offset)
85 : {
86 0 : MultiXactOffset32 group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
87 0 : int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
88 0 : int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
89 :
90 0 : return byteoff;
91 : }
92 :
93 : /* Location (byte offset within page) of TransactionId of given member */
94 : static inline int
95 0 : MXOffsetToMemberOffset(MultiXactOffset32 offset)
96 : {
97 0 : int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
98 :
99 0 : return MXOffsetToFlagsOffset(offset) +
100 0 : MULTIXACT_FLAGBYTES_PER_GROUP +
101 : member_in_group * sizeof(TransactionId);
102 : }
103 :
104 : static inline int
105 0 : MXOffsetToFlagsBitShift(MultiXactOffset32 offset)
106 : {
107 0 : int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
108 0 : int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
109 :
110 0 : return bshift;
111 : }
112 :
113 : /*
114 : * Construct reader of old multixacts.
115 : *
116 : * Returns the malloced memory used by the all other calls in this module.
117 : */
118 : OldMultiXactReader *
119 0 : AllocOldMultiXactRead(char *pgdata, MultiXactId nextMulti,
120 : MultiXactOffset32 nextOffset)
121 : {
122 0 : OldMultiXactReader *state = pg_malloc_object(OldMultiXactReader);
123 0 : char dir[MAXPGPATH] = {0};
124 :
125 0 : state->nextMXact = nextMulti;
126 0 : state->nextOffset = nextOffset;
127 :
128 0 : pg_sprintf(dir, "%s/pg_multixact/offsets", pgdata);
129 0 : state->offset = AllocSlruRead(dir, false);
130 :
131 0 : pg_sprintf(dir, "%s/pg_multixact/members", pgdata);
132 0 : state->members = AllocSlruRead(dir, false);
133 :
134 0 : return state;
135 : }
136 :
137 : /*
138 : * This is a simplified version of the GetMultiXactIdMembers() server
139 : * function:
140 : *
141 : * - Only return the updating member, if any. Upgrade only cares about the
142 : * updaters. If there is no updating member, return somewhat arbitrarily
143 : * the first locking-only member, because we don't have any way to represent
144 : * "no members".
145 : *
146 : * - Because there's no concurrent activity, we don't need to worry about
147 : * locking and some corner cases.
148 : *
149 : * - Don't bail out on invalid entries that could've been left behind after a
150 : * server crash. Such multixids won't appear anywhere else on disk, so the
151 : * server will never try to read them. During upgrade, however, we scan
152 : * through all multixids in order, and will encounter such invalid but
153 : * unreferenced multixids too. We try to distinguish between entries that
154 : * are invalid because of missed disk writes, like entries with zeros in
155 : * offsets or members, and entries that look corrupt in other ways that
156 : * should not happen even on a server crash.
157 : *
158 : * Returns true on success, false if the multixact was invalid.
159 : */
160 : bool
161 0 : GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
162 : MultiXactMember *member)
163 : {
164 : MultiXactId nextMXact,
165 : nextOffset,
166 : tmpMXact;
167 : int64 pageno,
168 : prev_pageno;
169 : int entryno,
170 : length;
171 : char *buf;
172 : MultiXactOffset32 *offptr,
173 : offset;
174 : MultiXactOffset32 nextMXOffset;
175 0 : TransactionId result_xid = InvalidTransactionId;
176 0 : MultiXactStatus result_status = 0;
177 :
178 0 : nextMXact = state->nextMXact;
179 0 : nextOffset = state->nextOffset;
180 :
181 : /*
182 : * Comment copied from GetMultiXactIdMembers in PostgreSQL v18
183 : * multixact.c:
184 : *
185 : * Find out the offset at which we need to start reading MultiXactMembers
186 : * and the number of members in the multixact. We determine the latter as
187 : * the difference between this multixact's starting offset and the next
188 : * one's. However, there are some corner cases to worry about:
189 : *
190 : * 1. This multixact may be the latest one created, in which case there is
191 : * no next one to look at. The next multixact's offset should be set
192 : * already, as we set it in RecordNewMultiXact(), but we used to not do
193 : * that in older minor versions. To cope with that case, if this
194 : * multixact is the latest one created, use the nextOffset value we read
195 : * above as the endpoint.
196 : *
197 : * 2. Because GetNewMultiXactId skips over offset zero, to reserve zero
198 : * for to mean "unset", there is an ambiguity near the point of offset
199 : * wraparound. If we see next multixact's offset is one, is that our
200 : * multixact's actual endpoint, or did it end at zero with a subsequent
201 : * increment? We handle this using the knowledge that if the zero'th
202 : * member slot wasn't filled, it'll contain zero, and zero isn't a valid
203 : * transaction ID so it can't be a multixact member. Therefore, if we
204 : * read a zero from the members array, just ignore it.
205 : */
206 :
207 0 : pageno = MultiXactIdToOffsetPage(multi);
208 0 : entryno = MultiXactIdToOffsetEntry(multi);
209 :
210 0 : buf = SlruReadSwitchPage(state->offset, pageno);
211 0 : offptr = (MultiXactOffset32 *) buf;
212 0 : offptr += entryno;
213 0 : offset = *offptr;
214 :
215 0 : if (offset == 0)
216 : {
217 : /* Invalid entry. These can be left behind on a server crash. */
218 0 : return false;
219 : }
220 :
221 : /*
222 : * Use the same increment rule as GetNewMultiXactId(), that is, don't
223 : * handle wraparound explicitly until needed.
224 : */
225 0 : tmpMXact = multi + 1;
226 :
227 0 : if (nextMXact == tmpMXact)
228 : {
229 : /* Corner case 1: there is no next multixact */
230 0 : nextMXOffset = nextOffset;
231 : }
232 : else
233 : {
234 : /* handle wraparound if needed */
235 0 : if (tmpMXact < FirstMultiXactId)
236 0 : tmpMXact = FirstMultiXactId;
237 :
238 0 : prev_pageno = pageno;
239 :
240 0 : pageno = MultiXactIdToOffsetPage(tmpMXact);
241 0 : entryno = MultiXactIdToOffsetEntry(tmpMXact);
242 :
243 0 : if (pageno != prev_pageno)
244 0 : buf = SlruReadSwitchPage(state->offset, pageno);
245 :
246 0 : offptr = (MultiXactOffset32 *) buf;
247 0 : offptr += entryno;
248 0 : nextMXOffset = *offptr;
249 : }
250 :
251 0 : if (nextMXOffset == 0)
252 : {
253 : /* Invalid entry. These can be left behind on a server crash. */
254 0 : return false;
255 : }
256 0 : length = nextMXOffset - offset;
257 :
258 0 : if (length < 0)
259 : {
260 : /*
261 : * This entry is corrupt. We should not see these even after a server
262 : * crash.
263 : */
264 0 : pg_fatal("multixact %u has an invalid length (%d)", multi, length);
265 : }
266 0 : if (length == 0)
267 : {
268 : /*
269 : * Invalid entry. The server never writes multixids with zero
270 : * members, but it's not clear if a server crash or using pg_resetwal
271 : * could leave them behind. Seems best to accept them.
272 : */
273 0 : return false;
274 : }
275 :
276 : /* read the members */
277 0 : prev_pageno = -1;
278 0 : for (int i = 0; i < length; i++, offset++)
279 : {
280 : TransactionId *xactptr;
281 : uint32 *flagsptr;
282 : int flagsoff;
283 : int bshift;
284 : int memberoff;
285 : MultiXactStatus status;
286 :
287 0 : pageno = MXOffsetToMemberPage(offset);
288 0 : memberoff = MXOffsetToMemberOffset(offset);
289 :
290 0 : if (pageno != prev_pageno)
291 : {
292 0 : buf = SlruReadSwitchPage(state->members, pageno);
293 0 : prev_pageno = pageno;
294 : }
295 :
296 0 : xactptr = (TransactionId *) (buf + memberoff);
297 0 : if (!TransactionIdIsValid(*xactptr))
298 : {
299 : /*
300 : * Corner case 2: offset must have wrapped around to unused slot
301 : * zero.
302 : */
303 0 : if (offset == 0)
304 0 : continue;
305 :
306 : /*
307 : * Otherwise this is an invalid entry that should not be
308 : * referenced from anywhere in the heap. These can be left behind
309 : * on a server crash. We could return 'false' here, but we prefer
310 : * to continue reading the members and converting them the best we
311 : * can, to preserve evidence in case this is corruption that
312 : * should not have happened.
313 : */
314 : }
315 :
316 0 : flagsoff = MXOffsetToFlagsOffset(offset);
317 0 : bshift = MXOffsetToFlagsBitShift(offset);
318 0 : flagsptr = (uint32 *) (buf + flagsoff);
319 :
320 0 : status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
321 :
322 : /*
323 : * Remember the updating XID among the members, or first locking XID
324 : * if no updating XID.
325 : */
326 0 : if (ISUPDATE_from_mxstatus(status))
327 : {
328 : /* sanity check */
329 0 : if (ISUPDATE_from_mxstatus(result_status))
330 : {
331 : /*
332 : * We don't expect to see more than one updating member, even
333 : * if the server had crashed.
334 : */
335 0 : pg_fatal("multixact %u has more than one updating member",
336 : multi);
337 : }
338 0 : result_xid = *xactptr;
339 0 : result_status = status;
340 : }
341 0 : else if (!TransactionIdIsValid(result_xid))
342 : {
343 0 : result_xid = *xactptr;
344 0 : result_status = status;
345 : }
346 : }
347 :
348 0 : member->xid = result_xid;
349 0 : member->status = result_status;
350 0 : return true;
351 : }
352 :
353 : /*
354 : * Frees the malloced reader.
355 : */
356 : void
357 0 : FreeOldMultiXactReader(OldMultiXactReader *state)
358 : {
359 0 : FreeSlruRead(state->offset);
360 0 : FreeSlruRead(state->members);
361 :
362 0 : pfree(state);
363 0 : }
|