Line data Source code
1 : /*
2 : * multixact_read_v18.c
3 : *
4 : * Functions to read multixact SLRUs from clusters of PostgreSQL version 18
5 : * and older. In version 19, the multixid offsets were expanded from 32 to 64
6 : * bits.
7 : *
8 : * Copyright (c) 2025, PostgreSQL Global Development Group
9 : * src/bin/pg_upgrade/multixact_read_v18.c
10 : */
11 :
12 : #include "postgres_fe.h"
13 :
14 : #include "multixact_read_v18.h"
15 : #include "pg_upgrade.h"
16 :
17 : /*
18 : * NOTE: below are a bunch of definitions that are copy-pasted from
19 : * multixact.c from version 18. It's important that this file doesn't
20 : * #include the new definitions with same names from "multixact_internal.h"!
21 : *
22 : * To further avoid confusion in the functions exposed outside this source
23 : * file, we use MultiXactOffset32 to represent the old-style 32-bit multixid
24 : * offsets. The new 64-bit MultiXactOffset should not be used anywhere in
25 : * this file.
26 : */
27 : #ifdef MULTIXACT_INTERNAL_H
28 : #error multixact_internal.h should not be included in multixact_read_v18.c
29 : #endif
30 : #define MultiXactOffset should_not_be_used
31 :
32 : /* We need four bytes per offset and 8 bytes per base for each page. */
33 : #define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset32))
34 :
35 : static inline int64
36 0 : MultiXactIdToOffsetPage(MultiXactId multi)
37 : {
38 0 : return multi / MULTIXACT_OFFSETS_PER_PAGE;
39 : }
40 :
41 : static inline int
42 0 : MultiXactIdToOffsetEntry(MultiXactId multi)
43 : {
44 0 : return multi % MULTIXACT_OFFSETS_PER_PAGE;
45 : }
46 :
47 : /*
48 : * The situation for members is a bit more complex: we store one byte of
49 : * additional flag bits for each TransactionId. To do this without getting
50 : * into alignment issues, we store four bytes of flags, and then the
51 : * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
52 : * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
53 : * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
54 : * performance) trumps space efficiency here.
55 : *
56 : * Note that the "offset" macros work with byte offset, not array indexes, so
57 : * arithmetic must be done using "char *" pointers.
58 : */
59 : /* We need eight bits per xact, so one xact fits in a byte */
60 : #define MXACT_MEMBER_BITS_PER_XACT 8
61 : #define MXACT_MEMBER_FLAGS_PER_BYTE 1
62 : #define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
63 :
64 : /* how many full bytes of flags are there in a group? */
65 : #define MULTIXACT_FLAGBYTES_PER_GROUP 4
66 : #define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
67 : (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
68 : /* size in bytes of a complete group */
69 : #define MULTIXACT_MEMBERGROUP_SIZE \
70 : (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
71 : #define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
72 : #define MULTIXACT_MEMBERS_PER_PAGE \
73 : (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
74 :
75 : /* page in which a member is to be found */
76 : static inline int64
77 0 : MXOffsetToMemberPage(MultiXactOffset32 offset)
78 : {
79 0 : return offset / MULTIXACT_MEMBERS_PER_PAGE;
80 : }
81 :
82 : /* Location (byte offset within page) of flag word for a given member */
83 : static inline int
84 0 : MXOffsetToFlagsOffset(MultiXactOffset32 offset)
85 : {
86 0 : MultiXactOffset32 group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
87 0 : int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
88 0 : int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
89 :
90 0 : return byteoff;
91 : }
92 :
93 : /* Location (byte offset within page) of TransactionId of given member */
94 : static inline int
95 0 : MXOffsetToMemberOffset(MultiXactOffset32 offset)
96 : {
97 0 : int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
98 :
99 0 : return MXOffsetToFlagsOffset(offset) +
100 0 : MULTIXACT_FLAGBYTES_PER_GROUP +
101 : member_in_group * sizeof(TransactionId);
102 : }
103 :
104 : static inline int
105 0 : MXOffsetToFlagsBitShift(MultiXactOffset32 offset)
106 : {
107 0 : int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
108 0 : int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
109 :
110 0 : return bshift;
111 : }
112 :
113 : /*
114 : * Construct reader of old multixacts.
115 : *
116 : * Returns the malloced memory used by the all other calls in this module.
117 : */
118 : OldMultiXactReader *
119 0 : AllocOldMultiXactRead(char *pgdata, MultiXactId nextMulti,
120 : MultiXactOffset32 nextOffset)
121 : {
122 0 : OldMultiXactReader *state = state = pg_malloc(sizeof(*state));
123 0 : char dir[MAXPGPATH] = {0};
124 :
125 0 : state->nextMXact = nextMulti;
126 0 : state->nextOffset = nextOffset;
127 :
128 0 : pg_sprintf(dir, "%s/pg_multixact/offsets", pgdata);
129 0 : state->offset = AllocSlruRead(dir, false);
130 :
131 0 : pg_sprintf(dir, "%s/pg_multixact/members", pgdata);
132 0 : state->members = AllocSlruRead(dir, false);
133 :
134 0 : return state;
135 : }
136 :
137 : /*
138 : * This is a simplified version of the GetMultiXactIdMembers() server
139 : * function:
140 : *
141 : * - Only return the updating member, if any. Upgrade only cares about the
142 : * updaters. If there is no updating member, return somewhat arbitrarily
143 : * the first locking-only member, because we don't have any way to represent
144 : * "no members".
145 : *
146 : * - Because there's no concurrent activity, we don't need to worry about
147 : * locking and some corner cases.
148 : *
149 : * - Don't bail out on invalid entries. If the server crashes, it can leave
150 : * invalid or half-written entries on disk. Such multixids won't appear
151 : * anywhere else on disk, so the server will never try to read them. During
152 : * upgrade, however, we scan through all multixids in order, and will
153 : * encounter such invalid but unreferenced multixids too.
154 : *
155 : * Returns true on success, false if the multixact was invalid.
156 : */
157 : bool
158 0 : GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
159 : MultiXactMember *member)
160 : {
161 : MultiXactId nextMXact,
162 : nextOffset,
163 : tmpMXact;
164 : int64 pageno,
165 : prev_pageno;
166 : int entryno,
167 : length;
168 : char *buf;
169 : MultiXactOffset32 *offptr,
170 : offset;
171 : MultiXactOffset32 nextMXOffset;
172 0 : TransactionId result_xid = InvalidTransactionId;
173 0 : MultiXactStatus result_status = 0;
174 :
175 0 : nextMXact = state->nextMXact;
176 0 : nextOffset = state->nextOffset;
177 :
178 : /*
179 : * Comment copied from GetMultiXactIdMembers in PostgreSQL v18
180 : * multixact.c:
181 : *
182 : * Find out the offset at which we need to start reading MultiXactMembers
183 : * and the number of members in the multixact. We determine the latter as
184 : * the difference between this multixact's starting offset and the next
185 : * one's. However, there are some corner cases to worry about:
186 : *
187 : * 1. This multixact may be the latest one created, in which case there is
188 : * no next one to look at. The next multixact's offset should be set
189 : * already, as we set it in RecordNewMultiXact(), but we used to not do
190 : * that in older minor versions. To cope with that case, if this
191 : * multixact is the latest one created, use the nextOffset value we read
192 : * above as the endpoint.
193 : *
194 : * 2. Because GetNewMultiXactId skips over offset zero, to reserve zero
195 : * for to mean "unset", there is an ambiguity near the point of offset
196 : * wraparound. If we see next multixact's offset is one, is that our
197 : * multixact's actual endpoint, or did it end at zero with a subsequent
198 : * increment? We handle this using the knowledge that if the zero'th
199 : * member slot wasn't filled, it'll contain zero, and zero isn't a valid
200 : * transaction ID so it can't be a multixact member. Therefore, if we
201 : * read a zero from the members array, just ignore it.
202 : */
203 :
204 0 : pageno = MultiXactIdToOffsetPage(multi);
205 0 : entryno = MultiXactIdToOffsetEntry(multi);
206 :
207 0 : buf = SlruReadSwitchPage(state->offset, pageno);
208 0 : offptr = (MultiXactOffset32 *) buf;
209 0 : offptr += entryno;
210 0 : offset = *offptr;
211 :
212 0 : if (offset == 0)
213 : {
214 : /* Invalid entry */
215 0 : return false;
216 : }
217 :
218 : /*
219 : * Use the same increment rule as GetNewMultiXactId(), that is, don't
220 : * handle wraparound explicitly until needed.
221 : */
222 0 : tmpMXact = multi + 1;
223 :
224 0 : if (nextMXact == tmpMXact)
225 : {
226 : /* Corner case 1: there is no next multixact */
227 0 : nextMXOffset = nextOffset;
228 : }
229 : else
230 : {
231 : /* handle wraparound if needed */
232 0 : if (tmpMXact < FirstMultiXactId)
233 0 : tmpMXact = FirstMultiXactId;
234 :
235 0 : prev_pageno = pageno;
236 :
237 0 : pageno = MultiXactIdToOffsetPage(tmpMXact);
238 0 : entryno = MultiXactIdToOffsetEntry(tmpMXact);
239 :
240 0 : if (pageno != prev_pageno)
241 0 : buf = SlruReadSwitchPage(state->offset, pageno);
242 :
243 0 : offptr = (MultiXactOffset32 *) buf;
244 0 : offptr += entryno;
245 0 : nextMXOffset = *offptr;
246 : }
247 :
248 0 : if (nextMXOffset == 0)
249 : {
250 : /* Invalid entry */
251 0 : return false;
252 : }
253 0 : length = nextMXOffset - offset;
254 :
255 : /* read the members */
256 0 : prev_pageno = -1;
257 0 : for (int i = 0; i < length; i++, offset++)
258 : {
259 : TransactionId *xactptr;
260 : uint32 *flagsptr;
261 : int flagsoff;
262 : int bshift;
263 : int memberoff;
264 : MultiXactStatus status;
265 :
266 0 : pageno = MXOffsetToMemberPage(offset);
267 0 : memberoff = MXOffsetToMemberOffset(offset);
268 :
269 0 : if (pageno != prev_pageno)
270 : {
271 0 : buf = SlruReadSwitchPage(state->members, pageno);
272 0 : prev_pageno = pageno;
273 : }
274 :
275 0 : xactptr = (TransactionId *) (buf + memberoff);
276 0 : if (!TransactionIdIsValid(*xactptr))
277 : {
278 : /*
279 : * Corner case 2: we are looking at unused slot zero
280 : */
281 0 : if (offset == 0)
282 0 : continue;
283 :
284 : /*
285 : * Otherwise this is an invalid entry that should not be
286 : * referenced from anywhere in the heap. We could return 'false'
287 : * here, but we prefer to continue reading the members and
288 : * converting them the best we can, to preserve evidence in case
289 : * this is corruption that should not happen.
290 : */
291 : }
292 :
293 0 : flagsoff = MXOffsetToFlagsOffset(offset);
294 0 : bshift = MXOffsetToFlagsBitShift(offset);
295 0 : flagsptr = (uint32 *) (buf + flagsoff);
296 :
297 0 : status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
298 :
299 : /*
300 : * Remember the updating XID among the members, or first locking XID
301 : * if no updating XID.
302 : */
303 0 : if (ISUPDATE_from_mxstatus(status))
304 : {
305 : /* sanity check */
306 0 : if (ISUPDATE_from_mxstatus(result_status))
307 : {
308 : /*
309 : * We don't expect to see more than one updating member, even
310 : * if the server had crashed.
311 : */
312 0 : pg_fatal("multixact %u has more than one updating member",
313 : multi);
314 : }
315 0 : result_xid = *xactptr;
316 0 : result_status = status;
317 : }
318 0 : else if (!TransactionIdIsValid(result_xid))
319 : {
320 0 : result_xid = *xactptr;
321 0 : result_status = status;
322 : }
323 : }
324 :
325 0 : member->xid = result_xid;
326 0 : member->status = result_status;
327 0 : return true;
328 : }
329 :
330 : /*
331 : * Frees the malloced reader.
332 : */
333 : void
334 0 : FreeOldMultiXactReader(OldMultiXactReader *state)
335 : {
336 0 : FreeSlruRead(state->offset);
337 0 : FreeSlruRead(state->members);
338 :
339 0 : pfree(state);
340 0 : }
|