Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * subtrans.c
4 : * PostgreSQL subtransaction-log manager
5 : *
6 : * The pg_subtrans manager is a pg_xact-like manager that stores the parent
7 : * transaction Id for each transaction. It is a fundamental part of the
8 : * nested transactions implementation. A main transaction has a parent
9 : * of InvalidTransactionId, and each subtransaction has its immediate parent.
10 : * The tree can easily be walked from child to parent, but not in the
11 : * opposite direction.
12 : *
13 : * This code is based on xact.c, but the robustness requirements
14 : * are completely different from pg_xact, because we only need to remember
15 : * pg_subtrans information for currently-open transactions. Thus, there is
16 : * no need to preserve data over a crash and restart.
17 : *
18 : * There are no XLOG interactions since we do not care about preserving
19 : * data across crashes. During database startup, we simply force the
20 : * currently-active page of SUBTRANS to zeroes.
21 : *
22 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
23 : * Portions Copyright (c) 1994, Regents of the University of California
24 : *
25 : * src/backend/access/transam/subtrans.c
26 : *
27 : *-------------------------------------------------------------------------
28 : */
29 : #include "postgres.h"
30 :
31 : #include "access/slru.h"
32 : #include "access/subtrans.h"
33 : #include "access/transam.h"
34 : #include "miscadmin.h"
35 : #include "pg_trace.h"
36 : #include "storage/subsystems.h"
37 : #include "utils/guc_hooks.h"
38 : #include "utils/snapmgr.h"
39 :
40 :
41 : /*
42 : * Defines for SubTrans page sizes. A page is the same BLCKSZ as is used
43 : * everywhere else in Postgres.
44 : *
45 : * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
46 : * SubTrans page numbering also wraps around at
47 : * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE, and segment numbering at
48 : * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need take no
49 : * explicit notice of that fact in this module, except when comparing segment
50 : * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes) and zeroing
51 : * them in StartupSUBTRANS.
52 : */
53 :
54 : /* We need four bytes per xact */
55 : #define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
56 :
57 : /*
58 : * Although we return an int64 the actual value can't currently exceed
59 : * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE.
60 : */
61 : static inline int64
62 6925442 : TransactionIdToPage(TransactionId xid)
63 : {
64 6925442 : return xid / (int64) SUBTRANS_XACTS_PER_PAGE;
65 : }
66 :
67 : #define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE)
68 :
69 :
70 : static void SUBTRANSShmemRequest(void *arg);
71 : static void SUBTRANSShmemInit(void *arg);
72 : static bool SubTransPagePrecedes(int64 page1, int64 page2);
73 : static int subtrans_errdetail_for_io_error(const void *opaque_data);
74 :
75 : const ShmemCallbacks SUBTRANSShmemCallbacks = {
76 : .request_fn = SUBTRANSShmemRequest,
77 : .init_fn = SUBTRANSShmemInit,
78 : };
79 :
80 : /*
81 : * Link to shared-memory data structures for SUBTRANS control
82 : */
83 : static SlruDesc SubTransSlruDesc;
84 :
85 : #define SubTransCtl (&SubTransSlruDesc)
86 :
87 :
88 : /*
89 : * Record the parent of a subtransaction in the subtrans log.
90 : */
91 : void
92 6763 : SubTransSetParent(TransactionId xid, TransactionId parent)
93 : {
94 6763 : int64 pageno = TransactionIdToPage(xid);
95 6763 : int entryno = TransactionIdToEntry(xid);
96 : int slotno;
97 : LWLock *lock;
98 : TransactionId *ptr;
99 :
100 : Assert(TransactionIdIsValid(parent));
101 : Assert(TransactionIdFollows(xid, parent));
102 :
103 6763 : lock = SimpleLruGetBankLock(SubTransCtl, pageno);
104 6763 : LWLockAcquire(lock, LW_EXCLUSIVE);
105 :
106 6763 : slotno = SimpleLruReadPage(SubTransCtl, pageno, true, &xid);
107 6763 : ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
108 6763 : ptr += entryno;
109 :
110 : /*
111 : * It's possible we'll try to set the parent xid multiple times but we
112 : * shouldn't ever be changing the xid from one valid xid to another valid
113 : * xid, which would corrupt the data structure.
114 : */
115 6763 : if (*ptr != parent)
116 : {
117 : Assert(*ptr == InvalidTransactionId);
118 6176 : *ptr = parent;
119 6176 : SubTransCtl->shared->page_dirty[slotno] = true;
120 : }
121 :
122 6763 : LWLockRelease(lock);
123 6763 : }
124 :
125 : /*
126 : * Interrogate the parent of a transaction in the subtrans log.
127 : */
128 : TransactionId
129 3138 : SubTransGetParent(TransactionId xid)
130 : {
131 3138 : int64 pageno = TransactionIdToPage(xid);
132 3138 : int entryno = TransactionIdToEntry(xid);
133 : int slotno;
134 : TransactionId *ptr;
135 : TransactionId parent;
136 :
137 : /* Can't ask about stuff that might not be around anymore */
138 : Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
139 :
140 : /* Bootstrap and frozen XIDs have no parent */
141 3138 : if (!TransactionIdIsNormal(xid))
142 0 : return InvalidTransactionId;
143 :
144 : /* lock is acquired by SimpleLruReadPage_ReadOnly */
145 :
146 3138 : slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, &xid);
147 3138 : ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
148 3138 : ptr += entryno;
149 :
150 3138 : parent = *ptr;
151 :
152 3138 : LWLockRelease(SimpleLruGetBankLock(SubTransCtl, pageno));
153 :
154 3138 : return parent;
155 : }
156 :
157 : /*
158 : * SubTransGetTopmostTransaction
159 : *
160 : * Returns the topmost transaction of the given transaction id.
161 : *
162 : * Because we cannot look back further than TransactionXmin, it is possible
163 : * that this function will lie and return an intermediate subtransaction ID
164 : * instead of the true topmost parent ID. This is OK, because in practice
165 : * we only care about detecting whether the topmost parent is still running
166 : * or is part of a current snapshot's list of still-running transactions.
167 : * Therefore, any XID before TransactionXmin is as good as any other.
168 : */
169 : TransactionId
170 1133 : SubTransGetTopmostTransaction(TransactionId xid)
171 : {
172 1133 : TransactionId parentXid = xid,
173 1133 : previousXid = xid;
174 :
175 : /* Can't ask about stuff that might not be around anymore */
176 : Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
177 :
178 4271 : while (TransactionIdIsValid(parentXid))
179 : {
180 3138 : previousXid = parentXid;
181 3138 : if (TransactionIdPrecedes(parentXid, TransactionXmin))
182 0 : break;
183 3138 : parentXid = SubTransGetParent(parentXid);
184 :
185 : /*
186 : * By convention the parent xid gets allocated first, so should always
187 : * precede the child xid. Anything else points to a corrupted data
188 : * structure that could lead to an infinite loop, so exit.
189 : */
190 3138 : if (!TransactionIdPrecedes(parentXid, previousXid))
191 0 : elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u",
192 : previousXid, parentXid);
193 : }
194 :
195 : Assert(TransactionIdIsValid(previousXid));
196 :
197 1133 : return previousXid;
198 : }
199 :
200 : /*
201 : * Number of shared SUBTRANS buffers.
202 : *
203 : * If asked to autotune, use 2MB for every 1GB of shared buffers, up to 8MB.
204 : * Otherwise just cap the configured amount to be between 16 and the maximum
205 : * allowed.
206 : */
207 : static int
208 2464 : SUBTRANSShmemBuffers(void)
209 : {
210 : /* auto-tune based on shared buffers */
211 2464 : if (subtransaction_buffers == 0)
212 1226 : return SimpleLruAutotuneBuffers(512, 1024);
213 :
214 1238 : return Min(Max(16, subtransaction_buffers), SLRU_MAX_ALLOWED_BUFFERS);
215 : }
216 :
217 :
218 :
219 : /*
220 : * Register shared memory for SUBTRANS
221 : */
222 : static void
223 1238 : SUBTRANSShmemRequest(void *arg)
224 : {
225 : /* If auto-tuning is requested, now is the time to do it */
226 1238 : if (subtransaction_buffers == 0)
227 : {
228 : char buf[32];
229 :
230 1226 : snprintf(buf, sizeof(buf), "%d", SUBTRANSShmemBuffers());
231 1226 : SetConfigOption("subtransaction_buffers", buf, PGC_POSTMASTER,
232 : PGC_S_DYNAMIC_DEFAULT);
233 :
234 : /*
235 : * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
236 : * However, if the DBA explicitly set subtransaction_buffers = 0 in
237 : * the config file, then PGC_S_DYNAMIC_DEFAULT will fail to override
238 : * that and we must force the matter with PGC_S_OVERRIDE.
239 : */
240 1226 : if (subtransaction_buffers == 0) /* failed to apply it? */
241 0 : SetConfigOption("subtransaction_buffers", buf, PGC_POSTMASTER,
242 : PGC_S_OVERRIDE);
243 : }
244 : Assert(subtransaction_buffers != 0);
245 :
246 1238 : SimpleLruRequest(.desc = &SubTransSlruDesc,
247 : .name = "subtransaction",
248 : .Dir = "pg_subtrans",
249 : .long_segment_names = false,
250 :
251 : .nslots = SUBTRANSShmemBuffers(),
252 :
253 : .sync_handler = SYNC_HANDLER_NONE,
254 : .PagePrecedes = SubTransPagePrecedes,
255 : .errdetail_for_io_error = subtrans_errdetail_for_io_error,
256 :
257 : .buffer_tranche_id = LWTRANCHE_SUBTRANS_BUFFER,
258 : .bank_tranche_id = LWTRANCHE_SUBTRANS_SLRU,
259 : );
260 1238 : }
261 :
262 : static void
263 1235 : SUBTRANSShmemInit(void *arg)
264 : {
265 : SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE);
266 1235 : }
267 :
268 : /*
269 : * GUC check_hook for subtransaction_buffers
270 : */
271 : bool
272 2529 : check_subtrans_buffers(int *newval, void **extra, GucSource source)
273 : {
274 2529 : return check_slru_buffers("subtransaction_buffers", newval);
275 : }
276 :
277 : /*
278 : * This func must be called ONCE on system install. It creates
279 : * the initial SUBTRANS segment. (The SUBTRANS directory is assumed to
280 : * have been created by the initdb shell script, and SUBTRANSShmemInit
281 : * must have been called already.)
282 : *
283 : * Note: it's not really necessary to create the initial segment now,
284 : * since slru.c would create it on first write anyway. But we may as well
285 : * do it to be sure the directory is set up correctly.
286 : */
287 : void
288 57 : BootStrapSUBTRANS(void)
289 : {
290 : /* Zero the initial page and flush it to disk */
291 57 : SimpleLruZeroAndWritePage(SubTransCtl, 0);
292 57 : }
293 :
294 : /*
295 : * This must be called ONCE during postmaster or standalone-backend startup,
296 : * after StartupXLOG has initialized TransamVariables->nextXid.
297 : *
298 : * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid
299 : * if there are none.
300 : */
301 : void
302 1072 : StartupSUBTRANS(TransactionId oldestActiveXID)
303 : {
304 : FullTransactionId nextXid;
305 : int64 startPage;
306 : int64 endPage;
307 1072 : LWLock *prevlock = NULL;
308 : LWLock *lock;
309 :
310 : /*
311 : * Since we don't expect pg_subtrans to be valid across crashes, we
312 : * initialize the currently-active page(s) to zeroes during startup.
313 : * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero
314 : * the new page without regard to whatever was previously on disk.
315 : */
316 1072 : startPage = TransactionIdToPage(oldestActiveXID);
317 1072 : nextXid = TransamVariables->nextXid;
318 1072 : endPage = TransactionIdToPage(XidFromFullTransactionId(nextXid));
319 :
320 : for (;;)
321 : {
322 1074 : lock = SimpleLruGetBankLock(SubTransCtl, startPage);
323 1074 : if (prevlock != lock)
324 : {
325 1074 : if (prevlock)
326 2 : LWLockRelease(prevlock);
327 1074 : LWLockAcquire(lock, LW_EXCLUSIVE);
328 1074 : prevlock = lock;
329 : }
330 :
331 1074 : (void) SimpleLruZeroPage(SubTransCtl, startPage);
332 1074 : if (startPage == endPage)
333 1072 : break;
334 :
335 2 : startPage++;
336 : /* must account for wraparound */
337 2 : if (startPage > TransactionIdToPage(MaxTransactionId))
338 0 : startPage = 0;
339 : }
340 :
341 1072 : LWLockRelease(lock);
342 1072 : }
343 :
344 : /*
345 : * Perform a checkpoint --- either during shutdown, or on-the-fly
346 : */
347 : void
348 1939 : CheckPointSUBTRANS(void)
349 : {
350 : /*
351 : * Write dirty SUBTRANS pages to disk
352 : *
353 : * This is not actually necessary from a correctness point of view. We do
354 : * it merely to improve the odds that writing of dirty pages is done by
355 : * the checkpoint process and not by backends.
356 : */
357 : TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(true);
358 1939 : SimpleLruWriteAll(SubTransCtl, true);
359 : TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(true);
360 1939 : }
361 :
362 :
363 : /*
364 : * Make sure that SUBTRANS has room for a newly-allocated XID.
365 : *
366 : * NB: this is called while holding XidGenLock. We want it to be very fast
367 : * most of the time; even when it's not so fast, no actual I/O need happen
368 : * unless we're forced to write out a dirty subtrans page to make room
369 : * in shared memory.
370 : */
371 : void
372 24568459 : ExtendSUBTRANS(TransactionId newestXact)
373 : {
374 : int64 pageno;
375 : LWLock *lock;
376 :
377 : /*
378 : * No work except at first XID of a page. But beware: just after
379 : * wraparound, the first XID of page zero is FirstNormalTransactionId.
380 : */
381 24568459 : if (TransactionIdToEntry(newestXact) != 0 &&
382 : !TransactionIdEquals(newestXact, FirstNormalTransactionId))
383 17656973 : return;
384 :
385 6911486 : pageno = TransactionIdToPage(newestXact);
386 :
387 6911486 : lock = SimpleLruGetBankLock(SubTransCtl, pageno);
388 6911486 : LWLockAcquire(lock, LW_EXCLUSIVE);
389 :
390 : /* Zero the page */
391 6911486 : SimpleLruZeroPage(SubTransCtl, pageno);
392 :
393 6911486 : LWLockRelease(lock);
394 : }
395 :
396 :
397 : /*
398 : * Remove all SUBTRANS segments before the one holding the passed transaction ID
399 : *
400 : * oldestXact is the oldest TransactionXmin of any running transaction. This
401 : * is called only during checkpoint.
402 : */
403 : void
404 1909 : TruncateSUBTRANS(TransactionId oldestXact)
405 : {
406 : int64 cutoffPage;
407 :
408 : /*
409 : * The cutoff point is the start of the segment containing oldestXact. We
410 : * pass the *page* containing oldestXact to SimpleLruTruncate. We step
411 : * back one transaction to avoid passing a cutoff page that hasn't been
412 : * created yet in the rare case that oldestXact would be the first item on
413 : * a page and oldestXact == next XID. In that case, if we didn't subtract
414 : * one, we'd trigger SimpleLruTruncate's wraparound detection.
415 : */
416 2086 : TransactionIdRetreat(oldestXact);
417 1909 : cutoffPage = TransactionIdToPage(oldestXact);
418 :
419 1909 : SimpleLruTruncate(SubTransCtl, cutoffPage);
420 1909 : }
421 :
422 :
423 : /*
424 : * Decide whether a SUBTRANS page number is "older" for truncation purposes.
425 : * Analogous to CLOGPagePrecedes().
426 : */
427 : static bool
428 317346 : SubTransPagePrecedes(int64 page1, int64 page2)
429 : {
430 : TransactionId xid1;
431 : TransactionId xid2;
432 :
433 317346 : xid1 = ((TransactionId) page1) * SUBTRANS_XACTS_PER_PAGE;
434 317346 : xid1 += FirstNormalTransactionId + 1;
435 317346 : xid2 = ((TransactionId) page2) * SUBTRANS_XACTS_PER_PAGE;
436 317346 : xid2 += FirstNormalTransactionId + 1;
437 :
438 575442 : return (TransactionIdPrecedes(xid1, xid2) &&
439 258096 : TransactionIdPrecedes(xid1, xid2 + SUBTRANS_XACTS_PER_PAGE - 1));
440 : }
441 :
442 : static int
443 0 : subtrans_errdetail_for_io_error(const void *opaque_data)
444 : {
445 0 : TransactionId xid = *(const TransactionId *) opaque_data;
446 :
447 0 : return errdetail("Could not access subtransaction status of transaction %u.", xid);
448 : }
|