Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam_xlog.c
4 : * WAL replay logic for heap access method.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/heapam_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/heapam.h"
19 : #include "access/visibilitymap.h"
20 : #include "access/xlog.h"
21 : #include "access/xlogutils.h"
22 : #include "storage/freespace.h"
23 : #include "storage/standby.h"
24 :
25 :
26 : /*
27 : * Replay XLOG_HEAP2_PRUNE_* records.
28 : */
29 : static void
30 25180 : heap_xlog_prune_freeze(XLogReaderState *record)
31 : {
32 25180 : XLogRecPtr lsn = record->EndRecPtr;
33 25180 : char *maindataptr = XLogRecGetData(record);
34 : xl_heap_prune xlrec;
35 : Buffer buffer;
36 : RelFileLocator rlocator;
37 : BlockNumber blkno;
38 : XLogRedoAction action;
39 :
40 25180 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
41 25180 : memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
42 25180 : maindataptr += SizeOfHeapPrune;
43 :
44 : /*
45 : * We will take an ordinary exclusive lock or a cleanup lock depending on
46 : * whether the XLHP_CLEANUP_LOCK flag is set. With an ordinary exclusive
47 : * lock, we better not be doing anything that requires moving existing
48 : * tuple data.
49 : */
50 : Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
51 : (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
52 :
53 : /*
54 : * We are about to remove and/or freeze tuples. In Hot Standby mode,
55 : * ensure that there are no queries running for which the removed tuples
56 : * are still visible or which still consider the frozen xids as running.
57 : * The conflict horizon XID comes after xl_heap_prune.
58 : */
59 25180 : if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
60 : {
61 : TransactionId snapshot_conflict_horizon;
62 :
63 : /* memcpy() because snapshot_conflict_horizon is stored unaligned */
64 18948 : memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
65 18948 : maindataptr += sizeof(TransactionId);
66 :
67 18948 : if (InHotStandby)
68 18484 : ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
69 18484 : (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
70 : rlocator);
71 : }
72 :
73 : /*
74 : * If we have a full-page image, restore it and we're done.
75 : */
76 25180 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
77 25180 : (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
78 : &buffer);
79 25180 : if (action == BLK_NEEDS_REDO)
80 : {
81 16234 : Page page = BufferGetPage(buffer);
82 : OffsetNumber *redirected;
83 : OffsetNumber *nowdead;
84 : OffsetNumber *nowunused;
85 : int nredirected;
86 : int ndead;
87 : int nunused;
88 : int nplans;
89 : Size datalen;
90 : xlhp_freeze_plan *plans;
91 : OffsetNumber *frz_offsets;
92 16234 : char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
93 :
94 16234 : heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
95 : &nplans, &plans, &frz_offsets,
96 : &nredirected, &redirected,
97 : &ndead, &nowdead,
98 : &nunused, &nowunused);
99 :
100 : /*
101 : * Update all line pointers per the record, and repair fragmentation
102 : * if needed.
103 : */
104 16234 : if (nredirected > 0 || ndead > 0 || nunused > 0)
105 15174 : heap_page_prune_execute(buffer,
106 15174 : (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
107 : redirected, nredirected,
108 : nowdead, ndead,
109 : nowunused, nunused);
110 :
111 : /* Freeze tuples */
112 18304 : for (int p = 0; p < nplans; p++)
113 : {
114 : HeapTupleFreeze frz;
115 :
116 : /*
117 : * Convert freeze plan representation from WAL record into
118 : * per-tuple format used by heap_execute_freeze_tuple
119 : */
120 2070 : frz.xmax = plans[p].xmax;
121 2070 : frz.t_infomask2 = plans[p].t_infomask2;
122 2070 : frz.t_infomask = plans[p].t_infomask;
123 2070 : frz.frzflags = plans[p].frzflags;
124 2070 : frz.offset = InvalidOffsetNumber; /* unused, but be tidy */
125 :
126 99876 : for (int i = 0; i < plans[p].ntuples; i++)
127 : {
128 97806 : OffsetNumber offset = *(frz_offsets++);
129 : ItemId lp;
130 : HeapTupleHeader tuple;
131 :
132 97806 : lp = PageGetItemId(page, offset);
133 97806 : tuple = (HeapTupleHeader) PageGetItem(page, lp);
134 97806 : heap_execute_freeze_tuple(tuple, &frz);
135 : }
136 : }
137 :
138 : /* There should be no more data */
139 : Assert((char *) frz_offsets == dataptr + datalen);
140 :
141 : /*
142 : * Note: we don't worry about updating the page's prunability hints.
143 : * At worst this will cause an extra prune cycle to occur soon.
144 : */
145 :
146 16234 : PageSetLSN(page, lsn);
147 16234 : MarkBufferDirty(buffer);
148 : }
149 :
150 : /*
151 : * If we released any space or line pointers, update the free space map.
152 : *
153 : * Do this regardless of a full-page image being applied, since the FSM
154 : * data is not in the page anyway.
155 : */
156 25180 : if (BufferIsValid(buffer))
157 : {
158 25180 : if (xlrec.flags & (XLHP_HAS_REDIRECTIONS |
159 : XLHP_HAS_DEAD_ITEMS |
160 : XLHP_HAS_NOW_UNUSED_ITEMS))
161 : {
162 21568 : Size freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
163 :
164 21568 : UnlockReleaseBuffer(buffer);
165 :
166 21568 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
167 : }
168 : else
169 3612 : UnlockReleaseBuffer(buffer);
170 : }
171 25180 : }
172 :
173 : /*
174 : * Replay XLOG_HEAP2_VISIBLE records.
175 : *
176 : * The critical integrity requirement here is that we must never end up with
177 : * a situation where the visibility map bit is set, and the page-level
178 : * PD_ALL_VISIBLE bit is clear. If that were to occur, then a subsequent
179 : * page modification would fail to clear the visibility map bit.
180 : */
181 : static void
182 14750 : heap_xlog_visible(XLogReaderState *record)
183 : {
184 14750 : XLogRecPtr lsn = record->EndRecPtr;
185 14750 : xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
186 14750 : Buffer vmbuffer = InvalidBuffer;
187 : Buffer buffer;
188 : Page page;
189 : RelFileLocator rlocator;
190 : BlockNumber blkno;
191 : XLogRedoAction action;
192 :
193 : Assert((xlrec->flags & VISIBILITYMAP_XLOG_VALID_BITS) == xlrec->flags);
194 :
195 14750 : XLogRecGetBlockTag(record, 1, &rlocator, NULL, &blkno);
196 :
197 : /*
198 : * If there are any Hot Standby transactions running that have an xmin
199 : * horizon old enough that this page isn't all-visible for them, they
200 : * might incorrectly decide that an index-only scan can skip a heap fetch.
201 : *
202 : * NB: It might be better to throw some kind of "soft" conflict here that
203 : * forces any index-only scan that is in flight to perform heap fetches,
204 : * rather than killing the transaction outright.
205 : */
206 14750 : if (InHotStandby)
207 14402 : ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
208 14402 : xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
209 : rlocator);
210 :
211 : /*
212 : * Read the heap page, if it still exists. If the heap file has dropped or
213 : * truncated later in recovery, we don't need to update the page, but we'd
214 : * better still update the visibility map.
215 : */
216 14750 : action = XLogReadBufferForRedo(record, 1, &buffer);
217 14750 : if (action == BLK_NEEDS_REDO)
218 : {
219 : /*
220 : * We don't bump the LSN of the heap page when setting the visibility
221 : * map bit (unless checksums or wal_hint_bits is enabled, in which
222 : * case we must). This exposes us to torn page hazards, but since
223 : * we're not inspecting the existing page contents in any way, we
224 : * don't care.
225 : */
226 10902 : page = BufferGetPage(buffer);
227 :
228 10902 : PageSetAllVisible(page);
229 :
230 10902 : if (XLogHintBitIsNeeded())
231 10902 : PageSetLSN(page, lsn);
232 :
233 10902 : MarkBufferDirty(buffer);
234 : }
235 : else if (action == BLK_RESTORED)
236 : {
237 : /*
238 : * If heap block was backed up, we already restored it and there's
239 : * nothing more to do. (This can only happen with checksums or
240 : * wal_log_hints enabled.)
241 : */
242 : }
243 :
244 14750 : if (BufferIsValid(buffer))
245 : {
246 14750 : Size space = PageGetFreeSpace(BufferGetPage(buffer));
247 :
248 14750 : UnlockReleaseBuffer(buffer);
249 :
250 : /*
251 : * Since FSM is not WAL-logged and only updated heuristically, it
252 : * easily becomes stale in standbys. If the standby is later promoted
253 : * and runs VACUUM, it will skip updating individual free space
254 : * figures for pages that became all-visible (or all-frozen, depending
255 : * on the vacuum mode,) which is troublesome when FreeSpaceMapVacuum
256 : * propagates too optimistic free space values to upper FSM layers;
257 : * later inserters try to use such pages only to find out that they
258 : * are unusable. This can cause long stalls when there are many such
259 : * pages.
260 : *
261 : * Forestall those problems by updating FSM's idea about a page that
262 : * is becoming all-visible or all-frozen.
263 : *
264 : * Do this regardless of a full-page image being applied, since the
265 : * FSM data is not in the page anyway.
266 : */
267 14750 : if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
268 14750 : XLogRecordPageWithFreeSpace(rlocator, blkno, space);
269 : }
270 :
271 : /*
272 : * Even if we skipped the heap page update due to the LSN interlock, it's
273 : * still safe to update the visibility map. Any WAL record that clears
274 : * the visibility map bit does so before checking the page LSN, so any
275 : * bits that need to be cleared will still be cleared.
276 : */
277 14750 : if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
278 : &vmbuffer) == BLK_NEEDS_REDO)
279 : {
280 14212 : Page vmpage = BufferGetPage(vmbuffer);
281 : Relation reln;
282 : uint8 vmbits;
283 :
284 : /* initialize the page if it was read as zeros */
285 14212 : if (PageIsNew(vmpage))
286 0 : PageInit(vmpage, BLCKSZ, 0);
287 :
288 : /* remove VISIBILITYMAP_XLOG_* */
289 14212 : vmbits = xlrec->flags & VISIBILITYMAP_VALID_BITS;
290 :
291 : /*
292 : * XLogReadBufferForRedoExtended locked the buffer. But
293 : * visibilitymap_set will handle locking itself.
294 : */
295 14212 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
296 :
297 14212 : reln = CreateFakeRelcacheEntry(rlocator);
298 :
299 14212 : visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
300 : xlrec->snapshotConflictHorizon, vmbits);
301 :
302 14212 : ReleaseBuffer(vmbuffer);
303 14212 : FreeFakeRelcacheEntry(reln);
304 : }
305 538 : else if (BufferIsValid(vmbuffer))
306 538 : UnlockReleaseBuffer(vmbuffer);
307 14750 : }
308 :
309 : /*
310 : * Given an "infobits" field from an XLog record, set the correct bits in the
311 : * given infomask and infomask2 for the tuple touched by the record.
312 : *
313 : * (This is the reverse of compute_infobits).
314 : */
315 : static void
316 891654 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
317 : {
318 891654 : *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
319 : HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
320 891654 : *infomask2 &= ~HEAP_KEYS_UPDATED;
321 :
322 891654 : if (infobits & XLHL_XMAX_IS_MULTI)
323 4 : *infomask |= HEAP_XMAX_IS_MULTI;
324 891654 : if (infobits & XLHL_XMAX_LOCK_ONLY)
325 110232 : *infomask |= HEAP_XMAX_LOCK_ONLY;
326 891654 : if (infobits & XLHL_XMAX_EXCL_LOCK)
327 109438 : *infomask |= HEAP_XMAX_EXCL_LOCK;
328 : /* note HEAP_XMAX_SHR_LOCK isn't considered here */
329 891654 : if (infobits & XLHL_XMAX_KEYSHR_LOCK)
330 816 : *infomask |= HEAP_XMAX_KEYSHR_LOCK;
331 :
332 891654 : if (infobits & XLHL_KEYS_UPDATED)
333 597968 : *infomask2 |= HEAP_KEYS_UPDATED;
334 891654 : }
335 :
336 : /*
337 : * Replay XLOG_HEAP_DELETE records.
338 : */
339 : static void
340 599260 : heap_xlog_delete(XLogReaderState *record)
341 : {
342 599260 : XLogRecPtr lsn = record->EndRecPtr;
343 599260 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
344 : Buffer buffer;
345 : Page page;
346 599260 : ItemId lp = NULL;
347 : HeapTupleHeader htup;
348 : BlockNumber blkno;
349 : RelFileLocator target_locator;
350 : ItemPointerData target_tid;
351 :
352 599260 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
353 599260 : ItemPointerSetBlockNumber(&target_tid, blkno);
354 599260 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
355 :
356 : /*
357 : * The visibility map may need to be fixed even if the heap page is
358 : * already up-to-date.
359 : */
360 599260 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
361 : {
362 40 : Relation reln = CreateFakeRelcacheEntry(target_locator);
363 40 : Buffer vmbuffer = InvalidBuffer;
364 :
365 40 : visibilitymap_pin(reln, blkno, &vmbuffer);
366 40 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
367 40 : ReleaseBuffer(vmbuffer);
368 40 : FreeFakeRelcacheEntry(reln);
369 : }
370 :
371 599260 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
372 : {
373 595262 : page = BufferGetPage(buffer);
374 :
375 595262 : if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
376 595262 : lp = PageGetItemId(page, xlrec->offnum);
377 :
378 595262 : if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
379 0 : elog(PANIC, "invalid lp");
380 :
381 595262 : htup = (HeapTupleHeader) PageGetItem(page, lp);
382 :
383 595262 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
384 595262 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
385 595262 : HeapTupleHeaderClearHotUpdated(htup);
386 595262 : fix_infomask_from_infobits(xlrec->infobits_set,
387 : &htup->t_infomask, &htup->t_infomask2);
388 595262 : if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
389 595262 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
390 : else
391 0 : HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
392 595262 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
393 :
394 : /* Mark the page as a candidate for pruning */
395 595262 : PageSetPrunable(page, XLogRecGetXid(record));
396 :
397 595262 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
398 24 : PageClearAllVisible(page);
399 :
400 : /* Make sure t_ctid is set correctly */
401 595262 : if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
402 284 : HeapTupleHeaderSetMovedPartitions(htup);
403 : else
404 594978 : htup->t_ctid = target_tid;
405 595262 : PageSetLSN(page, lsn);
406 595262 : MarkBufferDirty(buffer);
407 : }
408 599260 : if (BufferIsValid(buffer))
409 599260 : UnlockReleaseBuffer(buffer);
410 599260 : }
411 :
412 : /*
413 : * Replay XLOG_HEAP_INSERT records.
414 : */
415 : static void
416 2569566 : heap_xlog_insert(XLogReaderState *record)
417 : {
418 2569566 : XLogRecPtr lsn = record->EndRecPtr;
419 2569566 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
420 : Buffer buffer;
421 : Page page;
422 : union
423 : {
424 : HeapTupleHeaderData hdr;
425 : char data[MaxHeapTupleSize];
426 : } tbuf;
427 : HeapTupleHeader htup;
428 : xl_heap_header xlhdr;
429 : uint32 newlen;
430 2569566 : Size freespace = 0;
431 : RelFileLocator target_locator;
432 : BlockNumber blkno;
433 : ItemPointerData target_tid;
434 : XLogRedoAction action;
435 :
436 2569566 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
437 2569566 : ItemPointerSetBlockNumber(&target_tid, blkno);
438 2569566 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
439 :
440 : /* No freezing in the heap_insert() code path */
441 : Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
442 :
443 : /*
444 : * The visibility map may need to be fixed even if the heap page is
445 : * already up-to-date.
446 : */
447 2569566 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
448 : {
449 1804 : Relation reln = CreateFakeRelcacheEntry(target_locator);
450 1804 : Buffer vmbuffer = InvalidBuffer;
451 :
452 1804 : visibilitymap_pin(reln, blkno, &vmbuffer);
453 1804 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
454 1804 : ReleaseBuffer(vmbuffer);
455 1804 : FreeFakeRelcacheEntry(reln);
456 : }
457 :
458 : /*
459 : * If we inserted the first and only tuple on the page, re-initialize the
460 : * page from scratch.
461 : */
462 2569566 : if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
463 : {
464 34300 : buffer = XLogInitBufferForRedo(record, 0);
465 34300 : page = BufferGetPage(buffer);
466 34300 : PageInit(page, BufferGetPageSize(buffer), 0);
467 34300 : action = BLK_NEEDS_REDO;
468 : }
469 : else
470 2535266 : action = XLogReadBufferForRedo(record, 0, &buffer);
471 2569566 : if (action == BLK_NEEDS_REDO)
472 : {
473 : Size datalen;
474 : char *data;
475 :
476 2564436 : page = BufferGetPage(buffer);
477 :
478 2564436 : if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
479 0 : elog(PANIC, "invalid max offset number");
480 :
481 2564436 : data = XLogRecGetBlockData(record, 0, &datalen);
482 :
483 2564436 : newlen = datalen - SizeOfHeapHeader;
484 : Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
485 2564436 : memcpy(&xlhdr, data, SizeOfHeapHeader);
486 2564436 : data += SizeOfHeapHeader;
487 :
488 2564436 : htup = &tbuf.hdr;
489 2564436 : MemSet(htup, 0, SizeofHeapTupleHeader);
490 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
491 2564436 : memcpy((char *) htup + SizeofHeapTupleHeader,
492 : data,
493 : newlen);
494 2564436 : newlen += SizeofHeapTupleHeader;
495 2564436 : htup->t_infomask2 = xlhdr.t_infomask2;
496 2564436 : htup->t_infomask = xlhdr.t_infomask;
497 2564436 : htup->t_hoff = xlhdr.t_hoff;
498 2564436 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
499 2564436 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
500 2564436 : htup->t_ctid = target_tid;
501 :
502 2564436 : if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,
503 : true, true) == InvalidOffsetNumber)
504 0 : elog(PANIC, "failed to add tuple");
505 :
506 2564436 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
507 :
508 2564436 : PageSetLSN(page, lsn);
509 :
510 2564436 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
511 612 : PageClearAllVisible(page);
512 :
513 2564436 : MarkBufferDirty(buffer);
514 : }
515 2569566 : if (BufferIsValid(buffer))
516 2569566 : UnlockReleaseBuffer(buffer);
517 :
518 : /*
519 : * If the page is running low on free space, update the FSM as well.
520 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
521 : * better than that without knowing the fill-factor for the table.
522 : *
523 : * XXX: Don't do this if the page was restored from full page image. We
524 : * don't bother to update the FSM in that case, it doesn't need to be
525 : * totally accurate anyway.
526 : */
527 2569566 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
528 504232 : XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
529 2569566 : }
530 :
531 : /*
532 : * Replay XLOG_HEAP2_MULTI_INSERT records.
533 : */
534 : static void
535 114894 : heap_xlog_multi_insert(XLogReaderState *record)
536 : {
537 114894 : XLogRecPtr lsn = record->EndRecPtr;
538 : xl_heap_multi_insert *xlrec;
539 : RelFileLocator rlocator;
540 : BlockNumber blkno;
541 : Buffer buffer;
542 : Page page;
543 : union
544 : {
545 : HeapTupleHeaderData hdr;
546 : char data[MaxHeapTupleSize];
547 : } tbuf;
548 : HeapTupleHeader htup;
549 : uint32 newlen;
550 114894 : Size freespace = 0;
551 : int i;
552 114894 : bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
553 : XLogRedoAction action;
554 :
555 : /*
556 : * Insertion doesn't overwrite MVCC data, so no conflict processing is
557 : * required.
558 : */
559 114894 : xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
560 :
561 114894 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
562 :
563 : /* check that the mutually exclusive flags are not both set */
564 : Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
565 : (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
566 :
567 : /*
568 : * The visibility map may need to be fixed even if the heap page is
569 : * already up-to-date.
570 : */
571 114894 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
572 : {
573 1752 : Relation reln = CreateFakeRelcacheEntry(rlocator);
574 1752 : Buffer vmbuffer = InvalidBuffer;
575 :
576 1752 : visibilitymap_pin(reln, blkno, &vmbuffer);
577 1752 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
578 1752 : ReleaseBuffer(vmbuffer);
579 1752 : FreeFakeRelcacheEntry(reln);
580 : }
581 :
582 114894 : if (isinit)
583 : {
584 3664 : buffer = XLogInitBufferForRedo(record, 0);
585 3664 : page = BufferGetPage(buffer);
586 3664 : PageInit(page, BufferGetPageSize(buffer), 0);
587 3664 : action = BLK_NEEDS_REDO;
588 : }
589 : else
590 111230 : action = XLogReadBufferForRedo(record, 0, &buffer);
591 114894 : if (action == BLK_NEEDS_REDO)
592 : {
593 : char *tupdata;
594 : char *endptr;
595 : Size len;
596 :
597 : /* Tuples are stored as block data */
598 112066 : tupdata = XLogRecGetBlockData(record, 0, &len);
599 112066 : endptr = tupdata + len;
600 :
601 112066 : page = BufferGetPage(buffer);
602 :
603 524620 : for (i = 0; i < xlrec->ntuples; i++)
604 : {
605 : OffsetNumber offnum;
606 : xl_multi_insert_tuple *xlhdr;
607 :
608 : /*
609 : * If we're reinitializing the page, the tuples are stored in
610 : * order from FirstOffsetNumber. Otherwise there's an array of
611 : * offsets in the WAL record, and the tuples come after that.
612 : */
613 412554 : if (isinit)
614 198654 : offnum = FirstOffsetNumber + i;
615 : else
616 213900 : offnum = xlrec->offsets[i];
617 412554 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
618 0 : elog(PANIC, "invalid max offset number");
619 :
620 412554 : xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
621 412554 : tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
622 :
623 412554 : newlen = xlhdr->datalen;
624 : Assert(newlen <= MaxHeapTupleSize);
625 412554 : htup = &tbuf.hdr;
626 412554 : MemSet(htup, 0, SizeofHeapTupleHeader);
627 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
628 412554 : memcpy((char *) htup + SizeofHeapTupleHeader,
629 : tupdata,
630 : newlen);
631 412554 : tupdata += newlen;
632 :
633 412554 : newlen += SizeofHeapTupleHeader;
634 412554 : htup->t_infomask2 = xlhdr->t_infomask2;
635 412554 : htup->t_infomask = xlhdr->t_infomask;
636 412554 : htup->t_hoff = xlhdr->t_hoff;
637 412554 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
638 412554 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
639 412554 : ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
640 412554 : ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
641 :
642 412554 : offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
643 412554 : if (offnum == InvalidOffsetNumber)
644 0 : elog(PANIC, "failed to add tuple");
645 : }
646 112066 : if (tupdata != endptr)
647 0 : elog(PANIC, "total tuple length mismatch");
648 :
649 112066 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
650 :
651 112066 : PageSetLSN(page, lsn);
652 :
653 112066 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
654 218 : PageClearAllVisible(page);
655 :
656 : /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
657 112066 : if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
658 8 : PageSetAllVisible(page);
659 :
660 112066 : MarkBufferDirty(buffer);
661 : }
662 114894 : if (BufferIsValid(buffer))
663 114894 : UnlockReleaseBuffer(buffer);
664 :
665 : /*
666 : * If the page is running low on free space, update the FSM as well.
667 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
668 : * better than that without knowing the fill-factor for the table.
669 : *
670 : * XXX: Don't do this if the page was restored from full page image. We
671 : * don't bother to update the FSM in that case, it doesn't need to be
672 : * totally accurate anyway.
673 : */
674 114894 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
675 30710 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
676 114894 : }
677 :
678 : /*
679 : * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
680 : */
681 : static void
682 186794 : heap_xlog_update(XLogReaderState *record, bool hot_update)
683 : {
684 186794 : XLogRecPtr lsn = record->EndRecPtr;
685 186794 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
686 : RelFileLocator rlocator;
687 : BlockNumber oldblk;
688 : BlockNumber newblk;
689 : ItemPointerData newtid;
690 : Buffer obuffer,
691 : nbuffer;
692 : Page page;
693 : OffsetNumber offnum;
694 186794 : ItemId lp = NULL;
695 : HeapTupleData oldtup;
696 : HeapTupleHeader htup;
697 186794 : uint16 prefixlen = 0,
698 186794 : suffixlen = 0;
699 : char *newp;
700 : union
701 : {
702 : HeapTupleHeaderData hdr;
703 : char data[MaxHeapTupleSize];
704 : } tbuf;
705 : xl_heap_header xlhdr;
706 : uint32 newlen;
707 186794 : Size freespace = 0;
708 : XLogRedoAction oldaction;
709 : XLogRedoAction newaction;
710 :
711 : /* initialize to keep the compiler quiet */
712 186794 : oldtup.t_data = NULL;
713 186794 : oldtup.t_len = 0;
714 :
715 186794 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
716 186794 : if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
717 : {
718 : /* HOT updates are never done across pages */
719 : Assert(!hot_update);
720 : }
721 : else
722 77928 : oldblk = newblk;
723 :
724 186794 : ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
725 :
726 : /*
727 : * The visibility map may need to be fixed even if the heap page is
728 : * already up-to-date.
729 : */
730 186794 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
731 : {
732 418 : Relation reln = CreateFakeRelcacheEntry(rlocator);
733 418 : Buffer vmbuffer = InvalidBuffer;
734 :
735 418 : visibilitymap_pin(reln, oldblk, &vmbuffer);
736 418 : visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
737 418 : ReleaseBuffer(vmbuffer);
738 418 : FreeFakeRelcacheEntry(reln);
739 : }
740 :
741 : /*
742 : * In normal operation, it is important to lock the two pages in
743 : * page-number order, to avoid possible deadlocks against other update
744 : * operations going the other way. However, during WAL replay there can
745 : * be no other update happening, so we don't need to worry about that. But
746 : * we *do* need to worry that we don't expose an inconsistent state to Hot
747 : * Standby queries --- so the original page can't be unlocked before we've
748 : * added the new tuple to the new page.
749 : */
750 :
751 : /* Deal with old tuple version */
752 186794 : oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
753 : &obuffer);
754 186794 : if (oldaction == BLK_NEEDS_REDO)
755 : {
756 186160 : page = BufferGetPage(obuffer);
757 186160 : offnum = xlrec->old_offnum;
758 186160 : if (PageGetMaxOffsetNumber(page) >= offnum)
759 186160 : lp = PageGetItemId(page, offnum);
760 :
761 186160 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
762 0 : elog(PANIC, "invalid lp");
763 :
764 186160 : htup = (HeapTupleHeader) PageGetItem(page, lp);
765 :
766 186160 : oldtup.t_data = htup;
767 186160 : oldtup.t_len = ItemIdGetLength(lp);
768 :
769 186160 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
770 186160 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
771 186160 : if (hot_update)
772 71904 : HeapTupleHeaderSetHotUpdated(htup);
773 : else
774 114256 : HeapTupleHeaderClearHotUpdated(htup);
775 186160 : fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
776 : &htup->t_infomask2);
777 186160 : HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
778 186160 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
779 : /* Set forward chain link in t_ctid */
780 186160 : htup->t_ctid = newtid;
781 :
782 : /* Mark the page as a candidate for pruning */
783 186160 : PageSetPrunable(page, XLogRecGetXid(record));
784 :
785 186160 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
786 396 : PageClearAllVisible(page);
787 :
788 186160 : PageSetLSN(page, lsn);
789 186160 : MarkBufferDirty(obuffer);
790 : }
791 :
792 : /*
793 : * Read the page the new tuple goes into, if different from old.
794 : */
795 186794 : if (oldblk == newblk)
796 : {
797 77928 : nbuffer = obuffer;
798 77928 : newaction = oldaction;
799 : }
800 108866 : else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
801 : {
802 1222 : nbuffer = XLogInitBufferForRedo(record, 0);
803 1222 : page = BufferGetPage(nbuffer);
804 1222 : PageInit(page, BufferGetPageSize(nbuffer), 0);
805 1222 : newaction = BLK_NEEDS_REDO;
806 : }
807 : else
808 107644 : newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
809 :
810 : /*
811 : * The visibility map may need to be fixed even if the heap page is
812 : * already up-to-date.
813 : */
814 186794 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
815 : {
816 326 : Relation reln = CreateFakeRelcacheEntry(rlocator);
817 326 : Buffer vmbuffer = InvalidBuffer;
818 :
819 326 : visibilitymap_pin(reln, newblk, &vmbuffer);
820 326 : visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
821 326 : ReleaseBuffer(vmbuffer);
822 326 : FreeFakeRelcacheEntry(reln);
823 : }
824 :
825 : /* Deal with new tuple */
826 186794 : if (newaction == BLK_NEEDS_REDO)
827 : {
828 : char *recdata;
829 : char *recdata_end;
830 : Size datalen;
831 : Size tuplen;
832 :
833 185742 : recdata = XLogRecGetBlockData(record, 0, &datalen);
834 185742 : recdata_end = recdata + datalen;
835 :
836 185742 : page = BufferGetPage(nbuffer);
837 :
838 185742 : offnum = xlrec->new_offnum;
839 185742 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
840 0 : elog(PANIC, "invalid max offset number");
841 :
842 185742 : if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
843 : {
844 : Assert(newblk == oldblk);
845 30140 : memcpy(&prefixlen, recdata, sizeof(uint16));
846 30140 : recdata += sizeof(uint16);
847 : }
848 185742 : if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
849 : {
850 : Assert(newblk == oldblk);
851 66982 : memcpy(&suffixlen, recdata, sizeof(uint16));
852 66982 : recdata += sizeof(uint16);
853 : }
854 :
855 185742 : memcpy(&xlhdr, recdata, SizeOfHeapHeader);
856 185742 : recdata += SizeOfHeapHeader;
857 :
858 185742 : tuplen = recdata_end - recdata;
859 : Assert(tuplen <= MaxHeapTupleSize);
860 :
861 185742 : htup = &tbuf.hdr;
862 185742 : MemSet(htup, 0, SizeofHeapTupleHeader);
863 :
864 : /*
865 : * Reconstruct the new tuple using the prefix and/or suffix from the
866 : * old tuple, and the data stored in the WAL record.
867 : */
868 185742 : newp = (char *) htup + SizeofHeapTupleHeader;
869 185742 : if (prefixlen > 0)
870 : {
871 : int len;
872 :
873 : /* copy bitmap [+ padding] [+ oid] from WAL record */
874 30140 : len = xlhdr.t_hoff - SizeofHeapTupleHeader;
875 30140 : memcpy(newp, recdata, len);
876 30140 : recdata += len;
877 30140 : newp += len;
878 :
879 : /* copy prefix from old tuple */
880 30140 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
881 30140 : newp += prefixlen;
882 :
883 : /* copy new tuple data from WAL record */
884 30140 : len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
885 30140 : memcpy(newp, recdata, len);
886 30140 : recdata += len;
887 30140 : newp += len;
888 : }
889 : else
890 : {
891 : /*
892 : * copy bitmap [+ padding] [+ oid] + data from record, all in one
893 : * go
894 : */
895 155602 : memcpy(newp, recdata, tuplen);
896 155602 : recdata += tuplen;
897 155602 : newp += tuplen;
898 : }
899 : Assert(recdata == recdata_end);
900 :
901 : /* copy suffix from old tuple */
902 185742 : if (suffixlen > 0)
903 66982 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
904 :
905 185742 : newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
906 185742 : htup->t_infomask2 = xlhdr.t_infomask2;
907 185742 : htup->t_infomask = xlhdr.t_infomask;
908 185742 : htup->t_hoff = xlhdr.t_hoff;
909 :
910 185742 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
911 185742 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
912 185742 : HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
913 : /* Make sure there is no forward chain link in t_ctid */
914 185742 : htup->t_ctid = newtid;
915 :
916 185742 : offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
917 185742 : if (offnum == InvalidOffsetNumber)
918 0 : elog(PANIC, "failed to add tuple");
919 :
920 185742 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
921 122 : PageClearAllVisible(page);
922 :
923 185742 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
924 :
925 185742 : PageSetLSN(page, lsn);
926 185742 : MarkBufferDirty(nbuffer);
927 : }
928 :
929 186794 : if (BufferIsValid(nbuffer) && nbuffer != obuffer)
930 108866 : UnlockReleaseBuffer(nbuffer);
931 186794 : if (BufferIsValid(obuffer))
932 186794 : UnlockReleaseBuffer(obuffer);
933 :
934 : /*
935 : * If the new page is running low on free space, update the FSM as well.
936 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
937 : * better than that without knowing the fill-factor for the table.
938 : *
939 : * However, don't update the FSM on HOT updates, because after crash
940 : * recovery, either the old or the new tuple will certainly be dead and
941 : * prunable. After pruning, the page will have roughly as much free space
942 : * as it did before the update, assuming the new tuple is about the same
943 : * size as the old one.
944 : *
945 : * XXX: Don't do this if the page was restored from full page image. We
946 : * don't bother to update the FSM in that case, it doesn't need to be
947 : * totally accurate anyway.
948 : */
949 186794 : if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
950 23212 : XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
951 186794 : }
952 :
953 : /*
954 : * Replay XLOG_HEAP_CONFIRM records.
955 : */
956 : static void
957 158 : heap_xlog_confirm(XLogReaderState *record)
958 : {
959 158 : XLogRecPtr lsn = record->EndRecPtr;
960 158 : xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
961 : Buffer buffer;
962 : Page page;
963 : OffsetNumber offnum;
964 158 : ItemId lp = NULL;
965 : HeapTupleHeader htup;
966 :
967 158 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
968 : {
969 158 : page = BufferGetPage(buffer);
970 :
971 158 : offnum = xlrec->offnum;
972 158 : if (PageGetMaxOffsetNumber(page) >= offnum)
973 158 : lp = PageGetItemId(page, offnum);
974 :
975 158 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
976 0 : elog(PANIC, "invalid lp");
977 :
978 158 : htup = (HeapTupleHeader) PageGetItem(page, lp);
979 :
980 : /*
981 : * Confirm tuple as actually inserted
982 : */
983 158 : ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
984 :
985 158 : PageSetLSN(page, lsn);
986 158 : MarkBufferDirty(buffer);
987 : }
988 158 : if (BufferIsValid(buffer))
989 158 : UnlockReleaseBuffer(buffer);
990 158 : }
991 :
992 : /*
993 : * Replay XLOG_HEAP_LOCK records.
994 : */
995 : static void
996 110648 : heap_xlog_lock(XLogReaderState *record)
997 : {
998 110648 : XLogRecPtr lsn = record->EndRecPtr;
999 110648 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
1000 : Buffer buffer;
1001 : Page page;
1002 : OffsetNumber offnum;
1003 110648 : ItemId lp = NULL;
1004 : HeapTupleHeader htup;
1005 :
1006 : /*
1007 : * The visibility map may need to be fixed even if the heap page is
1008 : * already up-to-date.
1009 : */
1010 110648 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1011 : {
1012 : RelFileLocator rlocator;
1013 96 : Buffer vmbuffer = InvalidBuffer;
1014 : BlockNumber block;
1015 : Relation reln;
1016 :
1017 96 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1018 96 : reln = CreateFakeRelcacheEntry(rlocator);
1019 :
1020 96 : visibilitymap_pin(reln, block, &vmbuffer);
1021 96 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1022 :
1023 96 : ReleaseBuffer(vmbuffer);
1024 96 : FreeFakeRelcacheEntry(reln);
1025 : }
1026 :
1027 110648 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1028 : {
1029 110232 : page = BufferGetPage(buffer);
1030 :
1031 110232 : offnum = xlrec->offnum;
1032 110232 : if (PageGetMaxOffsetNumber(page) >= offnum)
1033 110232 : lp = PageGetItemId(page, offnum);
1034 :
1035 110232 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1036 0 : elog(PANIC, "invalid lp");
1037 :
1038 110232 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1039 :
1040 110232 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1041 110232 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1042 110232 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1043 : &htup->t_infomask2);
1044 :
1045 : /*
1046 : * Clear relevant update flags, but only if the modified infomask says
1047 : * there's no update.
1048 : */
1049 110232 : if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
1050 : {
1051 110232 : HeapTupleHeaderClearHotUpdated(htup);
1052 : /* Make sure there is no forward chain link in t_ctid */
1053 110232 : ItemPointerSet(&htup->t_ctid,
1054 : BufferGetBlockNumber(buffer),
1055 : offnum);
1056 : }
1057 110232 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1058 110232 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
1059 110232 : PageSetLSN(page, lsn);
1060 110232 : MarkBufferDirty(buffer);
1061 : }
1062 110648 : if (BufferIsValid(buffer))
1063 110648 : UnlockReleaseBuffer(buffer);
1064 110648 : }
1065 :
1066 : /*
1067 : * Replay XLOG_HEAP2_LOCK_UPDATED records.
1068 : */
1069 : static void
1070 0 : heap_xlog_lock_updated(XLogReaderState *record)
1071 : {
1072 0 : XLogRecPtr lsn = record->EndRecPtr;
1073 : xl_heap_lock_updated *xlrec;
1074 : Buffer buffer;
1075 : Page page;
1076 : OffsetNumber offnum;
1077 0 : ItemId lp = NULL;
1078 : HeapTupleHeader htup;
1079 :
1080 0 : xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
1081 :
1082 : /*
1083 : * The visibility map may need to be fixed even if the heap page is
1084 : * already up-to-date.
1085 : */
1086 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1087 : {
1088 : RelFileLocator rlocator;
1089 0 : Buffer vmbuffer = InvalidBuffer;
1090 : BlockNumber block;
1091 : Relation reln;
1092 :
1093 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1094 0 : reln = CreateFakeRelcacheEntry(rlocator);
1095 :
1096 0 : visibilitymap_pin(reln, block, &vmbuffer);
1097 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1098 :
1099 0 : ReleaseBuffer(vmbuffer);
1100 0 : FreeFakeRelcacheEntry(reln);
1101 : }
1102 :
1103 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1104 : {
1105 0 : page = BufferGetPage(buffer);
1106 :
1107 0 : offnum = xlrec->offnum;
1108 0 : if (PageGetMaxOffsetNumber(page) >= offnum)
1109 0 : lp = PageGetItemId(page, offnum);
1110 :
1111 0 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1112 0 : elog(PANIC, "invalid lp");
1113 :
1114 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1115 :
1116 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1117 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1118 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1119 : &htup->t_infomask2);
1120 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1121 :
1122 0 : PageSetLSN(page, lsn);
1123 0 : MarkBufferDirty(buffer);
1124 : }
1125 0 : if (BufferIsValid(buffer))
1126 0 : UnlockReleaseBuffer(buffer);
1127 0 : }
1128 :
1129 : /*
1130 : * Replay XLOG_HEAP_INPLACE records.
1131 : */
1132 : static void
1133 15018 : heap_xlog_inplace(XLogReaderState *record)
1134 : {
1135 15018 : XLogRecPtr lsn = record->EndRecPtr;
1136 15018 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
1137 : Buffer buffer;
1138 : Page page;
1139 : OffsetNumber offnum;
1140 15018 : ItemId lp = NULL;
1141 : HeapTupleHeader htup;
1142 : uint32 oldlen;
1143 : Size newlen;
1144 :
1145 15018 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1146 : {
1147 14608 : char *newtup = XLogRecGetBlockData(record, 0, &newlen);
1148 :
1149 14608 : page = BufferGetPage(buffer);
1150 :
1151 14608 : offnum = xlrec->offnum;
1152 14608 : if (PageGetMaxOffsetNumber(page) >= offnum)
1153 14608 : lp = PageGetItemId(page, offnum);
1154 :
1155 14608 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1156 0 : elog(PANIC, "invalid lp");
1157 :
1158 14608 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1159 :
1160 14608 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
1161 14608 : if (oldlen != newlen)
1162 0 : elog(PANIC, "wrong tuple length");
1163 :
1164 14608 : memcpy((char *) htup + htup->t_hoff, newtup, newlen);
1165 :
1166 14608 : PageSetLSN(page, lsn);
1167 14608 : MarkBufferDirty(buffer);
1168 : }
1169 15018 : if (BufferIsValid(buffer))
1170 15018 : UnlockReleaseBuffer(buffer);
1171 :
1172 15018 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1173 : xlrec->nmsgs,
1174 15018 : xlrec->relcacheInitFileInval,
1175 : xlrec->dbId,
1176 : xlrec->tsId);
1177 15018 : }
1178 :
1179 : void
1180 3481450 : heap_redo(XLogReaderState *record)
1181 : {
1182 3481450 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1183 :
1184 : /*
1185 : * These operations don't overwrite MVCC data so no conflict processing is
1186 : * required. The ones in heap2 rmgr do.
1187 : */
1188 :
1189 3481450 : switch (info & XLOG_HEAP_OPMASK)
1190 : {
1191 2569566 : case XLOG_HEAP_INSERT:
1192 2569566 : heap_xlog_insert(record);
1193 2569566 : break;
1194 599260 : case XLOG_HEAP_DELETE:
1195 599260 : heap_xlog_delete(record);
1196 599260 : break;
1197 114336 : case XLOG_HEAP_UPDATE:
1198 114336 : heap_xlog_update(record, false);
1199 114336 : break;
1200 6 : case XLOG_HEAP_TRUNCATE:
1201 :
1202 : /*
1203 : * TRUNCATE is a no-op because the actions are already logged as
1204 : * SMGR WAL records. TRUNCATE WAL record only exists for logical
1205 : * decoding.
1206 : */
1207 6 : break;
1208 72458 : case XLOG_HEAP_HOT_UPDATE:
1209 72458 : heap_xlog_update(record, true);
1210 72458 : break;
1211 158 : case XLOG_HEAP_CONFIRM:
1212 158 : heap_xlog_confirm(record);
1213 158 : break;
1214 110648 : case XLOG_HEAP_LOCK:
1215 110648 : heap_xlog_lock(record);
1216 110648 : break;
1217 15018 : case XLOG_HEAP_INPLACE:
1218 15018 : heap_xlog_inplace(record);
1219 15018 : break;
1220 0 : default:
1221 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
1222 : }
1223 3481450 : }
1224 :
1225 : void
1226 156820 : heap2_redo(XLogReaderState *record)
1227 : {
1228 156820 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1229 :
1230 156820 : switch (info & XLOG_HEAP_OPMASK)
1231 : {
1232 25180 : case XLOG_HEAP2_PRUNE_ON_ACCESS:
1233 : case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
1234 : case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
1235 25180 : heap_xlog_prune_freeze(record);
1236 25180 : break;
1237 14750 : case XLOG_HEAP2_VISIBLE:
1238 14750 : heap_xlog_visible(record);
1239 14750 : break;
1240 114894 : case XLOG_HEAP2_MULTI_INSERT:
1241 114894 : heap_xlog_multi_insert(record);
1242 114894 : break;
1243 0 : case XLOG_HEAP2_LOCK_UPDATED:
1244 0 : heap_xlog_lock_updated(record);
1245 0 : break;
1246 1996 : case XLOG_HEAP2_NEW_CID:
1247 :
1248 : /*
1249 : * Nothing to do on a real replay, only used during logical
1250 : * decoding.
1251 : */
1252 1996 : break;
1253 0 : case XLOG_HEAP2_REWRITE:
1254 0 : heap_xlog_logical_rewrite(record);
1255 0 : break;
1256 0 : default:
1257 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
1258 : }
1259 156820 : }
1260 :
1261 : /*
1262 : * Mask a heap page before performing consistency checks on it.
1263 : */
1264 : void
1265 5814292 : heap_mask(char *pagedata, BlockNumber blkno)
1266 : {
1267 5814292 : Page page = (Page) pagedata;
1268 : OffsetNumber off;
1269 :
1270 5814292 : mask_page_lsn_and_checksum(page);
1271 :
1272 5814292 : mask_page_hint_bits(page);
1273 5814292 : mask_unused_space(page);
1274 :
1275 476298024 : for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
1276 : {
1277 470483732 : ItemId iid = PageGetItemId(page, off);
1278 : char *page_item;
1279 :
1280 470483732 : page_item = (char *) (page + ItemIdGetOffset(iid));
1281 :
1282 470483732 : if (ItemIdIsNormal(iid))
1283 : {
1284 445301960 : HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
1285 :
1286 : /*
1287 : * If xmin of a tuple is not yet frozen, we should ignore
1288 : * differences in hint bits, since they can be set without
1289 : * emitting WAL.
1290 : */
1291 445301960 : if (!HeapTupleHeaderXminFrozen(page_htup))
1292 440954772 : page_htup->t_infomask &= ~HEAP_XACT_MASK;
1293 : else
1294 : {
1295 : /* Still we need to mask xmax hint bits. */
1296 4347188 : page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
1297 4347188 : page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
1298 : }
1299 :
1300 : /*
1301 : * During replay, we set Command Id to FirstCommandId. Hence, mask
1302 : * it. See heap_xlog_insert() for details.
1303 : */
1304 445301960 : page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
1305 :
1306 : /*
1307 : * For a speculative tuple, heap_insert() does not set ctid in the
1308 : * caller-passed heap tuple itself, leaving the ctid field to
1309 : * contain a speculative token value - a per-backend monotonically
1310 : * increasing identifier. Besides, it does not WAL-log ctid under
1311 : * any circumstances.
1312 : *
1313 : * During redo, heap_xlog_insert() sets t_ctid to current block
1314 : * number and self offset number. It doesn't care about any
1315 : * speculative insertions on the primary. Hence, we set t_ctid to
1316 : * current block number and self offset number to ignore any
1317 : * inconsistency.
1318 : */
1319 445301960 : if (HeapTupleHeaderIsSpeculative(page_htup))
1320 160 : ItemPointerSet(&page_htup->t_ctid, blkno, off);
1321 :
1322 : /*
1323 : * NB: Not ignoring ctid changes due to the tuple having moved
1324 : * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
1325 : * important information that needs to be in-sync between primary
1326 : * and standby, and thus is WAL logged.
1327 : */
1328 : }
1329 :
1330 : /*
1331 : * Ignore any padding bytes after the tuple, when the length of the
1332 : * item is not MAXALIGNed.
1333 : */
1334 470483732 : if (ItemIdHasStorage(iid))
1335 : {
1336 445301960 : int len = ItemIdGetLength(iid);
1337 445301960 : int padlen = MAXALIGN(len) - len;
1338 :
1339 445301960 : if (padlen > 0)
1340 237827644 : memset(page_item + len, MASK_MARKER, padlen);
1341 : }
1342 : }
1343 5814292 : }
|