Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam_xlog.c
4 : * WAL replay logic for heap access method.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/heapam_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/heapam.h"
19 : #include "access/visibilitymap.h"
20 : #include "access/xlog.h"
21 : #include "access/xlogutils.h"
22 : #include "storage/freespace.h"
23 : #include "storage/standby.h"
24 :
25 :
26 : /*
27 : * Replay XLOG_HEAP2_PRUNE_* records.
28 : */
29 : static void
30 20009 : heap_xlog_prune_freeze(XLogReaderState *record)
31 : {
32 20009 : XLogRecPtr lsn = record->EndRecPtr;
33 20009 : char *maindataptr = XLogRecGetData(record);
34 : xl_heap_prune xlrec;
35 : Buffer buffer;
36 : RelFileLocator rlocator;
37 : BlockNumber blkno;
38 20009 : Buffer vmbuffer = InvalidBuffer;
39 20009 : uint8 vmflags = 0;
40 20009 : Size freespace = 0;
41 :
42 20009 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
43 20009 : memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
44 20009 : maindataptr += SizeOfHeapPrune;
45 :
46 : /*
47 : * We will take an ordinary exclusive lock or a cleanup lock depending on
48 : * whether the XLHP_CLEANUP_LOCK flag is set. With an ordinary exclusive
49 : * lock, we better not be doing anything that requires moving existing
50 : * tuple data.
51 : */
52 : Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
53 : (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
54 :
55 20009 : if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
56 : {
57 10085 : vmflags = VISIBILITYMAP_ALL_VISIBLE;
58 10085 : if (xlrec.flags & XLHP_VM_ALL_FROZEN)
59 6083 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
60 : }
61 :
62 : /*
63 : * After xl_heap_prune is the optional snapshot conflict horizon.
64 : *
65 : * In Hot Standby mode, we must ensure that there are no running queries
66 : * which would conflict with the changes in this record. That means we
67 : * can't replay this record if it removes tuples that are still visible to
68 : * transactions on the standby, freeze tuples with xids that are still
69 : * considered running on the standby, or set a page as all-visible in the
70 : * VM if it isn't all-visible to all transactions on the standby.
71 : */
72 20009 : if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
73 : {
74 : TransactionId snapshot_conflict_horizon;
75 :
76 : /* memcpy() because snapshot_conflict_horizon is stored unaligned */
77 16005 : memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
78 16005 : maindataptr += sizeof(TransactionId);
79 :
80 16005 : if (InHotStandby)
81 15555 : ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
82 15555 : (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
83 : rlocator);
84 : }
85 :
86 : /*
87 : * If we have a full-page image of the heap block, restore it and we're
88 : * done with the heap block.
89 : */
90 20009 : if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
91 20009 : (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
92 : &buffer) == BLK_NEEDS_REDO)
93 : {
94 13378 : Page page = BufferGetPage(buffer);
95 : OffsetNumber *redirected;
96 : OffsetNumber *nowdead;
97 : OffsetNumber *nowunused;
98 : int nredirected;
99 : int ndead;
100 : int nunused;
101 : int nplans;
102 : Size datalen;
103 : xlhp_freeze_plan *plans;
104 : OffsetNumber *frz_offsets;
105 13378 : char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
106 : bool do_prune;
107 :
108 13378 : heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
109 : &nplans, &plans, &frz_offsets,
110 : &nredirected, &redirected,
111 : &ndead, &nowdead,
112 : &nunused, &nowunused);
113 :
114 13378 : do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
115 :
116 : /* Ensure the record does something */
117 : Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
118 :
119 : /*
120 : * Update all line pointers per the record, and repair fragmentation
121 : * if needed.
122 : */
123 13378 : if (do_prune)
124 10633 : heap_page_prune_execute(buffer,
125 10633 : (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
126 : redirected, nredirected,
127 : nowdead, ndead,
128 : nowunused, nunused);
129 :
130 : /* Freeze tuples */
131 14627 : for (int p = 0; p < nplans; p++)
132 : {
133 : HeapTupleFreeze frz;
134 :
135 : /*
136 : * Convert freeze plan representation from WAL record into
137 : * per-tuple format used by heap_execute_freeze_tuple
138 : */
139 1249 : frz.xmax = plans[p].xmax;
140 1249 : frz.t_infomask2 = plans[p].t_infomask2;
141 1249 : frz.t_infomask = plans[p].t_infomask;
142 1249 : frz.frzflags = plans[p].frzflags;
143 1249 : frz.offset = InvalidOffsetNumber; /* unused, but be tidy */
144 :
145 74998 : for (int i = 0; i < plans[p].ntuples; i++)
146 : {
147 73749 : OffsetNumber offset = *(frz_offsets++);
148 : ItemId lp;
149 : HeapTupleHeader tuple;
150 :
151 73749 : lp = PageGetItemId(page, offset);
152 73749 : tuple = (HeapTupleHeader) PageGetItem(page, lp);
153 73749 : heap_execute_freeze_tuple(tuple, &frz);
154 : }
155 : }
156 :
157 : /* There should be no more data */
158 : Assert((char *) frz_offsets == dataptr + datalen);
159 :
160 : /*
161 : * The critical integrity requirement here is that we must never end
162 : * up with the visibility map bit set and the page-level
163 : * PD_ALL_VISIBLE bit unset. If that were to occur, a subsequent page
164 : * modification would fail to clear the visibility map bit.
165 : */
166 13378 : if (vmflags & VISIBILITYMAP_VALID_BITS)
167 : {
168 5359 : PageSetAllVisible(page);
169 5359 : PageClearPrunable(page);
170 : }
171 :
172 13378 : MarkBufferDirty(buffer);
173 :
174 : /*
175 : * See log_heap_prune_and_freeze() for commentary on when we set the
176 : * heap page LSN.
177 : */
178 13378 : if (do_prune || nplans > 0 ||
179 1956 : ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
180 13378 : PageSetLSN(page, lsn);
181 :
182 : /*
183 : * Note: we don't worry about updating the page's prunability hints.
184 : * At worst this will cause an extra prune cycle to occur soon.
185 : */
186 : }
187 :
188 : /*
189 : * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
190 : * or the VM, update the freespace map.
191 : *
192 : * Even when no actual space is freed (when only marking the page
193 : * all-visible or frozen), we still update the FSM. Because the FSM is
194 : * unlogged and maintained heuristically, it often becomes stale on
195 : * standbys. If such a standby is later promoted and runs VACUUM, it will
196 : * skip recalculating free space for pages that were marked
197 : * all-visible/all-frozen. FreeSpaceMapVacuum() can then propagate overly
198 : * optimistic free space values upward, causing future insertions to
199 : * select pages that turn out to be unusable. In bulk, this can lead to
200 : * long stalls.
201 : *
202 : * To prevent this, always update the FSM even when only marking a page
203 : * all-visible/all-frozen.
204 : *
205 : * Do this regardless of whether a full-page image is logged, since FSM
206 : * data is not part of the page itself.
207 : */
208 20009 : if (BufferIsValid(buffer))
209 : {
210 20009 : if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
211 : XLHP_HAS_DEAD_ITEMS |
212 6057 : XLHP_HAS_NOW_UNUSED_ITEMS)) ||
213 6057 : (vmflags & VISIBILITYMAP_VALID_BITS))
214 19991 : freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
215 :
216 : /*
217 : * We want to avoid holding an exclusive lock on the heap buffer while
218 : * doing IO (either of the FSM or the VM), so we'll release it now.
219 : */
220 20009 : UnlockReleaseBuffer(buffer);
221 : }
222 :
223 : /*
224 : * Now read and update the VM block.
225 : *
226 : * We must redo changes to the VM even if the heap page was skipped due to
227 : * LSN interlock. See comment in heap_xlog_multi_insert() for more details
228 : * on replaying changes to the VM.
229 : */
230 30094 : if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
231 10085 : XLogReadBufferForRedoExtended(record, 1,
232 : RBM_ZERO_ON_ERROR,
233 : false,
234 : &vmbuffer) == BLK_NEEDS_REDO)
235 : {
236 9711 : Page vmpage = BufferGetPage(vmbuffer);
237 :
238 : /* initialize the page if it was read as zeros */
239 9711 : if (PageIsNew(vmpage))
240 2 : PageInit(vmpage, BLCKSZ, 0);
241 :
242 9711 : visibilitymap_set(blkno, vmbuffer, vmflags, rlocator);
243 :
244 : Assert(BufferIsDirty(vmbuffer));
245 9711 : PageSetLSN(vmpage, lsn);
246 : }
247 :
248 20009 : if (BufferIsValid(vmbuffer))
249 10085 : UnlockReleaseBuffer(vmbuffer);
250 :
251 20009 : if (freespace > 0)
252 19601 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
253 20009 : }
254 :
255 : /*
256 : * Given an "infobits" field from an XLog record, set the correct bits in the
257 : * given infomask and infomask2 for the tuple touched by the record.
258 : *
259 : * (This is the reverse of compute_infobits).
260 : */
261 : static void
262 466384 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
263 : {
264 466384 : *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
265 : HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
266 466384 : *infomask2 &= ~HEAP_KEYS_UPDATED;
267 :
268 466384 : if (infobits & XLHL_XMAX_IS_MULTI)
269 3 : *infomask |= HEAP_XMAX_IS_MULTI;
270 466384 : if (infobits & XLHL_XMAX_LOCK_ONLY)
271 55658 : *infomask |= HEAP_XMAX_LOCK_ONLY;
272 466384 : if (infobits & XLHL_XMAX_EXCL_LOCK)
273 54894 : *infomask |= HEAP_XMAX_EXCL_LOCK;
274 : /* note HEAP_XMAX_SHR_LOCK isn't considered here */
275 466384 : if (infobits & XLHL_XMAX_KEYSHR_LOCK)
276 778 : *infomask |= HEAP_XMAX_KEYSHR_LOCK;
277 :
278 466384 : if (infobits & XLHL_KEYS_UPDATED)
279 316159 : *infomask2 |= HEAP_KEYS_UPDATED;
280 466384 : }
281 :
282 : /*
283 : * Replay XLOG_HEAP_DELETE records.
284 : */
285 : static void
286 316864 : heap_xlog_delete(XLogReaderState *record)
287 : {
288 316864 : XLogRecPtr lsn = record->EndRecPtr;
289 316864 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
290 : Buffer buffer;
291 : Page page;
292 : ItemId lp;
293 : HeapTupleHeader htup;
294 : BlockNumber blkno;
295 : RelFileLocator target_locator;
296 : ItemPointerData target_tid;
297 :
298 316864 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
299 316864 : ItemPointerSetBlockNumber(&target_tid, blkno);
300 316864 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
301 :
302 : /*
303 : * The visibility map may need to be fixed even if the heap page is
304 : * already up-to-date.
305 : */
306 316864 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
307 : {
308 201 : Relation reln = CreateFakeRelcacheEntry(target_locator);
309 201 : Buffer vmbuffer = InvalidBuffer;
310 :
311 201 : visibilitymap_pin(reln, blkno, &vmbuffer);
312 201 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
313 201 : ReleaseBuffer(vmbuffer);
314 201 : FreeFakeRelcacheEntry(reln);
315 : }
316 :
317 316864 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
318 : {
319 314731 : page = BufferGetPage(buffer);
320 :
321 314731 : if (xlrec->offnum < 1 || xlrec->offnum > PageGetMaxOffsetNumber(page))
322 0 : elog(PANIC, "offnum out of range");
323 314731 : lp = PageGetItemId(page, xlrec->offnum);
324 314731 : if (!ItemIdIsNormal(lp))
325 0 : elog(PANIC, "invalid lp");
326 :
327 314731 : htup = (HeapTupleHeader) PageGetItem(page, lp);
328 :
329 314731 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
330 314731 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
331 314731 : HeapTupleHeaderClearHotUpdated(htup);
332 314731 : fix_infomask_from_infobits(xlrec->infobits_set,
333 : &htup->t_infomask, &htup->t_infomask2);
334 314731 : if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
335 314731 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
336 : else
337 0 : HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
338 314731 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
339 :
340 : /* Mark the page as a candidate for pruning */
341 314731 : PageSetPrunable(page, XLogRecGetXid(record));
342 :
343 314731 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
344 22 : PageClearAllVisible(page);
345 :
346 : /* Make sure t_ctid is set correctly */
347 314731 : if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
348 152 : HeapTupleHeaderSetMovedPartitions(htup);
349 : else
350 314579 : htup->t_ctid = target_tid;
351 314731 : PageSetLSN(page, lsn);
352 314731 : MarkBufferDirty(buffer);
353 : }
354 316864 : if (BufferIsValid(buffer))
355 316864 : UnlockReleaseBuffer(buffer);
356 316864 : }
357 :
358 : /*
359 : * Replay XLOG_HEAP_INSERT records.
360 : */
361 : static void
362 1321866 : heap_xlog_insert(XLogReaderState *record)
363 : {
364 1321866 : XLogRecPtr lsn = record->EndRecPtr;
365 1321866 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
366 : Buffer buffer;
367 : Page page;
368 : union
369 : {
370 : HeapTupleHeaderData hdr;
371 : char data[MaxHeapTupleSize];
372 : } tbuf;
373 : HeapTupleHeader htup;
374 : xl_heap_header xlhdr;
375 : uint32 newlen;
376 1321866 : Size freespace = 0;
377 : RelFileLocator target_locator;
378 : BlockNumber blkno;
379 : ItemPointerData target_tid;
380 : XLogRedoAction action;
381 :
382 1321866 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
383 1321866 : ItemPointerSetBlockNumber(&target_tid, blkno);
384 1321866 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
385 :
386 : /* No freezing in the heap_insert() code path */
387 : Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
388 :
389 : /*
390 : * The visibility map may need to be fixed even if the heap page is
391 : * already up-to-date.
392 : */
393 1321866 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
394 : {
395 908 : Relation reln = CreateFakeRelcacheEntry(target_locator);
396 908 : Buffer vmbuffer = InvalidBuffer;
397 :
398 908 : visibilitymap_pin(reln, blkno, &vmbuffer);
399 908 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
400 908 : ReleaseBuffer(vmbuffer);
401 908 : FreeFakeRelcacheEntry(reln);
402 : }
403 :
404 : /*
405 : * If we inserted the first and only tuple on the page, re-initialize the
406 : * page from scratch.
407 : */
408 1321866 : if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
409 : {
410 17725 : buffer = XLogInitBufferForRedo(record, 0);
411 17725 : page = BufferGetPage(buffer);
412 17725 : PageInit(page, BufferGetPageSize(buffer), 0);
413 17725 : action = BLK_NEEDS_REDO;
414 : }
415 : else
416 1304141 : action = XLogReadBufferForRedo(record, 0, &buffer);
417 1321866 : if (action == BLK_NEEDS_REDO)
418 : {
419 : Size datalen;
420 : char *data;
421 :
422 1318772 : page = BufferGetPage(buffer);
423 :
424 1318772 : if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
425 0 : elog(PANIC, "invalid max offset number");
426 :
427 1318772 : data = XLogRecGetBlockData(record, 0, &datalen);
428 :
429 1318772 : newlen = datalen - SizeOfHeapHeader;
430 : Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
431 1318772 : memcpy(&xlhdr, data, SizeOfHeapHeader);
432 1318772 : data += SizeOfHeapHeader;
433 :
434 1318772 : htup = &tbuf.hdr;
435 1318772 : MemSet(htup, 0, SizeofHeapTupleHeader);
436 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
437 1318772 : memcpy((char *) htup + SizeofHeapTupleHeader,
438 : data,
439 : newlen);
440 1318772 : newlen += SizeofHeapTupleHeader;
441 1318772 : htup->t_infomask2 = xlhdr.t_infomask2;
442 1318772 : htup->t_infomask = xlhdr.t_infomask;
443 1318772 : htup->t_hoff = xlhdr.t_hoff;
444 1318772 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
445 1318772 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
446 1318772 : htup->t_ctid = target_tid;
447 :
448 1318772 : if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
449 0 : elog(PANIC, "failed to add tuple");
450 :
451 1318772 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
452 :
453 : /*
454 : * Set the page prunable to trigger on-access pruning later, which may
455 : * set the page all-visible in the VM. See comments in heap_insert().
456 : */
457 1318772 : if (TransactionIdIsNormal(XLogRecGetXid(record)) &&
458 1318772 : !HeapTupleHeaderXminFrozen(htup))
459 1318242 : PageSetPrunable(page, XLogRecGetXid(record));
460 :
461 1318772 : PageSetLSN(page, lsn);
462 :
463 1318772 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
464 300 : PageClearAllVisible(page);
465 :
466 1318772 : MarkBufferDirty(buffer);
467 : }
468 1321866 : if (BufferIsValid(buffer))
469 1321866 : UnlockReleaseBuffer(buffer);
470 :
471 : /*
472 : * If the page is running low on free space, update the FSM as well.
473 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
474 : * better than that without knowing the fill-factor for the table.
475 : *
476 : * XXX: Don't do this if the page was restored from full page image. We
477 : * don't bother to update the FSM in that case, it doesn't need to be
478 : * totally accurate anyway.
479 : */
480 1321866 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
481 259042 : XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
482 1321866 : }
483 :
484 : /*
485 : * Replay XLOG_HEAP2_MULTI_INSERT records.
486 : */
487 : static void
488 66824 : heap_xlog_multi_insert(XLogReaderState *record)
489 : {
490 66824 : XLogRecPtr lsn = record->EndRecPtr;
491 : xl_heap_multi_insert *xlrec;
492 : RelFileLocator rlocator;
493 : BlockNumber blkno;
494 : Buffer buffer;
495 : Page page;
496 : union
497 : {
498 : HeapTupleHeaderData hdr;
499 : char data[MaxHeapTupleSize];
500 : } tbuf;
501 : HeapTupleHeader htup;
502 : uint32 newlen;
503 66824 : Size freespace = 0;
504 : int i;
505 66824 : bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
506 : XLogRedoAction action;
507 66824 : Buffer vmbuffer = InvalidBuffer;
508 :
509 : /*
510 : * Insertion doesn't overwrite MVCC data, so no conflict processing is
511 : * required.
512 : */
513 66824 : xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
514 :
515 66824 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
516 :
517 : /* check that the mutually exclusive flags are not both set */
518 : Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
519 : (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
520 :
521 : /*
522 : * The visibility map may need to be fixed even if the heap page is
523 : * already up-to-date.
524 : */
525 66824 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
526 : {
527 895 : Relation reln = CreateFakeRelcacheEntry(rlocator);
528 :
529 895 : visibilitymap_pin(reln, blkno, &vmbuffer);
530 895 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
531 895 : ReleaseBuffer(vmbuffer);
532 895 : vmbuffer = InvalidBuffer;
533 895 : FreeFakeRelcacheEntry(reln);
534 : }
535 :
536 66824 : if (isinit)
537 : {
538 1941 : buffer = XLogInitBufferForRedo(record, 0);
539 1941 : page = BufferGetPage(buffer);
540 1941 : PageInit(page, BufferGetPageSize(buffer), 0);
541 1941 : action = BLK_NEEDS_REDO;
542 : }
543 : else
544 64883 : action = XLogReadBufferForRedo(record, 0, &buffer);
545 66824 : if (action == BLK_NEEDS_REDO)
546 : {
547 : char *tupdata;
548 : char *endptr;
549 : Size len;
550 :
551 : /* Tuples are stored as block data */
552 65128 : tupdata = XLogRecGetBlockData(record, 0, &len);
553 65128 : endptr = tupdata + len;
554 :
555 65128 : page = BufferGetPage(buffer);
556 :
557 286857 : for (i = 0; i < xlrec->ntuples; i++)
558 : {
559 : OffsetNumber offnum;
560 : xl_multi_insert_tuple *xlhdr;
561 :
562 : /*
563 : * If we're reinitializing the page, the tuples are stored in
564 : * order from FirstOffsetNumber. Otherwise there's an array of
565 : * offsets in the WAL record, and the tuples come after that.
566 : */
567 221729 : if (isinit)
568 99759 : offnum = FirstOffsetNumber + i;
569 : else
570 121970 : offnum = xlrec->offsets[i];
571 221729 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
572 0 : elog(PANIC, "invalid max offset number");
573 :
574 221729 : xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
575 221729 : tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
576 :
577 221729 : newlen = xlhdr->datalen;
578 : Assert(newlen <= MaxHeapTupleSize);
579 221729 : htup = &tbuf.hdr;
580 221729 : MemSet(htup, 0, SizeofHeapTupleHeader);
581 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
582 221729 : memcpy((char *) htup + SizeofHeapTupleHeader,
583 : tupdata,
584 : newlen);
585 221729 : tupdata += newlen;
586 :
587 221729 : newlen += SizeofHeapTupleHeader;
588 221729 : htup->t_infomask2 = xlhdr->t_infomask2;
589 221729 : htup->t_infomask = xlhdr->t_infomask;
590 221729 : htup->t_hoff = xlhdr->t_hoff;
591 221729 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
592 221729 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
593 221729 : ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
594 221729 : ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
595 :
596 221729 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
597 221729 : if (offnum == InvalidOffsetNumber)
598 0 : elog(PANIC, "failed to add tuple");
599 : }
600 65128 : if (tupdata != endptr)
601 0 : elog(PANIC, "total tuple length mismatch");
602 :
603 65128 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
604 :
605 65128 : PageSetLSN(page, lsn);
606 :
607 65128 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
608 82 : PageClearAllVisible(page);
609 :
610 : /*
611 : * XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible. If
612 : * we are not setting the page frozen, then set the page's prunable
613 : * hint so that we trigger on-access pruning later which may set the
614 : * page all-visible in the VM.
615 : */
616 65128 : if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
617 : {
618 4 : PageSetAllVisible(page);
619 4 : PageClearPrunable(page);
620 : }
621 : else
622 65124 : PageSetPrunable(page, XLogRecGetXid(record));
623 :
624 65128 : MarkBufferDirty(buffer);
625 : }
626 66824 : if (BufferIsValid(buffer))
627 66824 : UnlockReleaseBuffer(buffer);
628 :
629 66824 : buffer = InvalidBuffer;
630 :
631 : /*
632 : * Read and update the visibility map (VM) block.
633 : *
634 : * We must always redo VM changes, even if the corresponding heap page
635 : * update was skipped due to the LSN interlock. Each VM block covers
636 : * multiple heap pages, so later WAL records may update other bits in the
637 : * same block. If this record includes an FPI (full-page image),
638 : * subsequent WAL records may depend on it to guard against torn pages.
639 : *
640 : * Heap page changes are replayed first to preserve the invariant:
641 : * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
642 : *
643 : * Note that we released the heap page lock above. During normal
644 : * operation, this would be unsafe — a concurrent modification could
645 : * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
646 : * invariant.
647 : *
648 : * During recovery, however, no concurrent writers exist. Therefore,
649 : * updating the VM without holding the heap page lock is safe enough. This
650 : * same approach is taken when replaying XLOG_HEAP2_PRUNE* records (see
651 : * heap_xlog_prune_freeze()).
652 : */
653 66828 : if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
654 4 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
655 : &vmbuffer) == BLK_NEEDS_REDO)
656 : {
657 0 : Page vmpage = BufferGetPage(vmbuffer);
658 :
659 : /* initialize the page if it was read as zeros */
660 0 : if (PageIsNew(vmpage))
661 0 : PageInit(vmpage, BLCKSZ, 0);
662 :
663 0 : visibilitymap_set(blkno,
664 : vmbuffer,
665 : VISIBILITYMAP_ALL_VISIBLE |
666 : VISIBILITYMAP_ALL_FROZEN,
667 : rlocator);
668 :
669 : Assert(BufferIsDirty(vmbuffer));
670 0 : PageSetLSN(vmpage, lsn);
671 : }
672 :
673 66824 : if (BufferIsValid(vmbuffer))
674 4 : UnlockReleaseBuffer(vmbuffer);
675 :
676 : /*
677 : * If the page is running low on free space, update the FSM as well.
678 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
679 : * better than that without knowing the fill-factor for the table.
680 : *
681 : * XXX: Don't do this if the page was restored from full page image. We
682 : * don't bother to update the FSM in that case, it doesn't need to be
683 : * totally accurate anyway.
684 : */
685 66824 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
686 17191 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
687 66824 : }
688 :
689 : /*
690 : * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
691 : */
692 : static void
693 96285 : heap_xlog_update(XLogReaderState *record, bool hot_update)
694 : {
695 96285 : XLogRecPtr lsn = record->EndRecPtr;
696 96285 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
697 : RelFileLocator rlocator;
698 : BlockNumber oldblk;
699 : BlockNumber newblk;
700 : ItemPointerData newtid;
701 : Buffer obuffer,
702 : nbuffer;
703 : Page opage,
704 : npage;
705 : OffsetNumber offnum;
706 : ItemId lp;
707 : HeapTupleData oldtup;
708 : HeapTupleHeader htup;
709 96285 : uint16 prefixlen = 0,
710 96285 : suffixlen = 0;
711 : char *newp;
712 : union
713 : {
714 : HeapTupleHeaderData hdr;
715 : char data[MaxHeapTupleSize];
716 : } tbuf;
717 : xl_heap_header xlhdr;
718 : uint32 newlen;
719 96285 : Size freespace = 0;
720 : XLogRedoAction oldaction;
721 : XLogRedoAction newaction;
722 :
723 : /* initialize to keep the compiler quiet */
724 96285 : oldtup.t_data = NULL;
725 96285 : oldtup.t_len = 0;
726 :
727 96285 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
728 96285 : if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
729 : {
730 : /* HOT updates are never done across pages */
731 : Assert(!hot_update);
732 : }
733 : else
734 41704 : oldblk = newblk;
735 :
736 96285 : ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
737 :
738 : /*
739 : * The visibility map may need to be fixed even if the heap page is
740 : * already up-to-date.
741 : */
742 96285 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
743 : {
744 384 : Relation reln = CreateFakeRelcacheEntry(rlocator);
745 384 : Buffer vmbuffer = InvalidBuffer;
746 :
747 384 : visibilitymap_pin(reln, oldblk, &vmbuffer);
748 384 : visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
749 384 : ReleaseBuffer(vmbuffer);
750 384 : FreeFakeRelcacheEntry(reln);
751 : }
752 :
753 : /*
754 : * In normal operation, it is important to lock the two pages in
755 : * page-number order, to avoid possible deadlocks against other update
756 : * operations going the other way. However, during WAL replay there can
757 : * be no other update happening, so we don't need to worry about that. But
758 : * we *do* need to worry that we don't expose an inconsistent state to Hot
759 : * Standby queries --- so the original page can't be unlocked before we've
760 : * added the new tuple to the new page.
761 : */
762 :
763 : /* Deal with old tuple version */
764 96285 : oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
765 : &obuffer);
766 96285 : if (oldaction == BLK_NEEDS_REDO)
767 : {
768 95995 : opage = BufferGetPage(obuffer);
769 95995 : offnum = xlrec->old_offnum;
770 95995 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(opage))
771 0 : elog(PANIC, "offnum out of range");
772 95995 : lp = PageGetItemId(opage, offnum);
773 95995 : if (!ItemIdIsNormal(lp))
774 0 : elog(PANIC, "invalid lp");
775 :
776 95995 : htup = (HeapTupleHeader) PageGetItem(opage, lp);
777 :
778 95995 : oldtup.t_data = htup;
779 95995 : oldtup.t_len = ItemIdGetLength(lp);
780 :
781 95995 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
782 95995 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
783 95995 : if (hot_update)
784 38265 : HeapTupleHeaderSetHotUpdated(htup);
785 : else
786 57730 : HeapTupleHeaderClearHotUpdated(htup);
787 95995 : fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
788 : &htup->t_infomask2);
789 95995 : HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
790 95995 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
791 : /* Set forward chain link in t_ctid */
792 95995 : htup->t_ctid = newtid;
793 :
794 : /* Mark the page as a candidate for pruning */
795 95995 : PageSetPrunable(opage, XLogRecGetXid(record));
796 :
797 95995 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
798 369 : PageClearAllVisible(opage);
799 :
800 95995 : PageSetLSN(opage, lsn);
801 95995 : MarkBufferDirty(obuffer);
802 : }
803 :
804 : /*
805 : * Read the page the new tuple goes into, if different from old.
806 : */
807 96285 : if (oldblk == newblk)
808 : {
809 41704 : nbuffer = obuffer;
810 41704 : newaction = oldaction;
811 : }
812 54581 : else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
813 : {
814 620 : nbuffer = XLogInitBufferForRedo(record, 0);
815 620 : npage = BufferGetPage(nbuffer);
816 620 : PageInit(npage, BufferGetPageSize(nbuffer), 0);
817 620 : newaction = BLK_NEEDS_REDO;
818 : }
819 : else
820 53961 : newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
821 :
822 : /*
823 : * The visibility map may need to be fixed even if the heap page is
824 : * already up-to-date.
825 : */
826 96285 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
827 : {
828 161 : Relation reln = CreateFakeRelcacheEntry(rlocator);
829 161 : Buffer vmbuffer = InvalidBuffer;
830 :
831 161 : visibilitymap_pin(reln, newblk, &vmbuffer);
832 161 : visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
833 161 : ReleaseBuffer(vmbuffer);
834 161 : FreeFakeRelcacheEntry(reln);
835 : }
836 :
837 : /* Deal with new tuple */
838 96285 : if (newaction == BLK_NEEDS_REDO)
839 : {
840 : char *recdata;
841 : char *recdata_end;
842 : Size datalen;
843 : Size tuplen;
844 :
845 95764 : recdata = XLogRecGetBlockData(record, 0, &datalen);
846 95764 : recdata_end = recdata + datalen;
847 :
848 95764 : npage = BufferGetPage(nbuffer);
849 :
850 95764 : offnum = xlrec->new_offnum;
851 95764 : if (PageGetMaxOffsetNumber(npage) + 1 < offnum)
852 0 : elog(PANIC, "invalid max offset number");
853 :
854 95764 : if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
855 : {
856 : Assert(newblk == oldblk);
857 17818 : memcpy(&prefixlen, recdata, sizeof(uint16));
858 17818 : recdata += sizeof(uint16);
859 : }
860 95764 : if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
861 : {
862 : Assert(newblk == oldblk);
863 35475 : memcpy(&suffixlen, recdata, sizeof(uint16));
864 35475 : recdata += sizeof(uint16);
865 : }
866 :
867 95764 : memcpy(&xlhdr, recdata, SizeOfHeapHeader);
868 95764 : recdata += SizeOfHeapHeader;
869 :
870 95764 : tuplen = recdata_end - recdata;
871 : Assert(tuplen <= MaxHeapTupleSize);
872 :
873 95764 : htup = &tbuf.hdr;
874 95764 : MemSet(htup, 0, SizeofHeapTupleHeader);
875 :
876 : /*
877 : * Reconstruct the new tuple using the prefix and/or suffix from the
878 : * old tuple, and the data stored in the WAL record.
879 : */
880 95764 : newp = (char *) htup + SizeofHeapTupleHeader;
881 95764 : if (prefixlen > 0)
882 : {
883 : int len;
884 :
885 : /* copy bitmap [+ padding] [+ oid] from WAL record */
886 17818 : len = xlhdr.t_hoff - SizeofHeapTupleHeader;
887 17818 : memcpy(newp, recdata, len);
888 17818 : recdata += len;
889 17818 : newp += len;
890 :
891 : /* copy prefix from old tuple */
892 17818 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
893 17818 : newp += prefixlen;
894 :
895 : /* copy new tuple data from WAL record */
896 17818 : len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
897 17818 : memcpy(newp, recdata, len);
898 17818 : recdata += len;
899 17818 : newp += len;
900 : }
901 : else
902 : {
903 : /*
904 : * copy bitmap [+ padding] [+ oid] + data from record, all in one
905 : * go
906 : */
907 77946 : memcpy(newp, recdata, tuplen);
908 77946 : recdata += tuplen;
909 77946 : newp += tuplen;
910 : }
911 : Assert(recdata == recdata_end);
912 :
913 : /* copy suffix from old tuple */
914 95764 : if (suffixlen > 0)
915 35475 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
916 :
917 95764 : newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
918 95764 : htup->t_infomask2 = xlhdr.t_infomask2;
919 95764 : htup->t_infomask = xlhdr.t_infomask;
920 95764 : htup->t_hoff = xlhdr.t_hoff;
921 :
922 95764 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
923 95764 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
924 95764 : HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
925 : /* Make sure there is no forward chain link in t_ctid */
926 95764 : htup->t_ctid = newtid;
927 :
928 95764 : offnum = PageAddItem(npage, htup, newlen, offnum, true, true);
929 95764 : if (offnum == InvalidOffsetNumber)
930 0 : elog(PANIC, "failed to add tuple");
931 :
932 95764 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
933 80 : PageClearAllVisible(npage);
934 :
935 : /* needed to update FSM below */
936 95764 : freespace = PageGetHeapFreeSpace(npage);
937 :
938 95764 : PageSetLSN(npage, lsn);
939 : /* See heap_insert() for why we set pd_prune_xid on insert */
940 95764 : PageSetPrunable(npage, XLogRecGetXid(record));
941 95764 : MarkBufferDirty(nbuffer);
942 : }
943 :
944 96285 : if (BufferIsValid(nbuffer) && nbuffer != obuffer)
945 54581 : UnlockReleaseBuffer(nbuffer);
946 96285 : if (BufferIsValid(obuffer))
947 96285 : UnlockReleaseBuffer(obuffer);
948 :
949 : /*
950 : * If the new page is running low on free space, update the FSM as well.
951 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
952 : * better than that without knowing the fill-factor for the table.
953 : *
954 : * However, don't update the FSM on HOT updates, because after crash
955 : * recovery, either the old or the new tuple will certainly be dead and
956 : * prunable. After pruning, the page will have roughly as much free space
957 : * as it did before the update, assuming the new tuple is about the same
958 : * size as the old one.
959 : *
960 : * XXX: Don't do this if the page was restored from full page image. We
961 : * don't bother to update the FSM in that case, it doesn't need to be
962 : * totally accurate anyway.
963 : */
964 96285 : if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
965 11770 : XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
966 96285 : }
967 :
968 : /*
969 : * Replay XLOG_HEAP_CONFIRM records.
970 : */
971 : static void
972 93 : heap_xlog_confirm(XLogReaderState *record)
973 : {
974 93 : XLogRecPtr lsn = record->EndRecPtr;
975 93 : xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
976 : Buffer buffer;
977 : Page page;
978 : OffsetNumber offnum;
979 : ItemId lp;
980 : HeapTupleHeader htup;
981 :
982 93 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
983 : {
984 93 : page = BufferGetPage(buffer);
985 :
986 93 : offnum = xlrec->offnum;
987 93 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
988 0 : elog(PANIC, "offnum out of range");
989 93 : lp = PageGetItemId(page, offnum);
990 93 : if (!ItemIdIsNormal(lp))
991 0 : elog(PANIC, "invalid lp");
992 :
993 93 : htup = (HeapTupleHeader) PageGetItem(page, lp);
994 :
995 : /*
996 : * Confirm tuple as actually inserted
997 : */
998 93 : ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
999 :
1000 93 : PageSetLSN(page, lsn);
1001 93 : MarkBufferDirty(buffer);
1002 : }
1003 93 : if (BufferIsValid(buffer))
1004 93 : UnlockReleaseBuffer(buffer);
1005 93 : }
1006 :
1007 : /*
1008 : * Replay XLOG_HEAP_LOCK records.
1009 : */
1010 : static void
1011 55879 : heap_xlog_lock(XLogReaderState *record)
1012 : {
1013 55879 : XLogRecPtr lsn = record->EndRecPtr;
1014 55879 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
1015 : Buffer buffer;
1016 : Page page;
1017 : OffsetNumber offnum;
1018 : ItemId lp;
1019 : HeapTupleHeader htup;
1020 :
1021 : /*
1022 : * The visibility map may need to be fixed even if the heap page is
1023 : * already up-to-date.
1024 : */
1025 55879 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1026 : {
1027 : RelFileLocator rlocator;
1028 54 : Buffer vmbuffer = InvalidBuffer;
1029 : BlockNumber block;
1030 : Relation reln;
1031 :
1032 54 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1033 54 : reln = CreateFakeRelcacheEntry(rlocator);
1034 :
1035 54 : visibilitymap_pin(reln, block, &vmbuffer);
1036 54 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1037 :
1038 54 : ReleaseBuffer(vmbuffer);
1039 54 : FreeFakeRelcacheEntry(reln);
1040 : }
1041 :
1042 55879 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1043 : {
1044 55658 : page = BufferGetPage(buffer);
1045 :
1046 55658 : offnum = xlrec->offnum;
1047 55658 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1048 0 : elog(PANIC, "offnum out of range");
1049 55658 : lp = PageGetItemId(page, offnum);
1050 55658 : if (!ItemIdIsNormal(lp))
1051 0 : elog(PANIC, "invalid lp");
1052 :
1053 55658 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1054 :
1055 55658 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1056 55658 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1057 55658 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1058 : &htup->t_infomask2);
1059 :
1060 : /*
1061 : * Clear relevant update flags, but only if the modified infomask says
1062 : * there's no update.
1063 : */
1064 55658 : if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
1065 : {
1066 55658 : HeapTupleHeaderClearHotUpdated(htup);
1067 : /* Make sure there is no forward chain link in t_ctid */
1068 55658 : ItemPointerSet(&htup->t_ctid,
1069 : BufferGetBlockNumber(buffer),
1070 : offnum);
1071 : }
1072 55658 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1073 55658 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
1074 55658 : PageSetLSN(page, lsn);
1075 55658 : MarkBufferDirty(buffer);
1076 : }
1077 55879 : if (BufferIsValid(buffer))
1078 55879 : UnlockReleaseBuffer(buffer);
1079 55879 : }
1080 :
1081 : /*
1082 : * Replay XLOG_HEAP2_LOCK_UPDATED records.
1083 : */
1084 : static void
1085 0 : heap_xlog_lock_updated(XLogReaderState *record)
1086 : {
1087 0 : XLogRecPtr lsn = record->EndRecPtr;
1088 : xl_heap_lock_updated *xlrec;
1089 : Buffer buffer;
1090 : Page page;
1091 : OffsetNumber offnum;
1092 : ItemId lp;
1093 : HeapTupleHeader htup;
1094 :
1095 0 : xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
1096 :
1097 : /*
1098 : * The visibility map may need to be fixed even if the heap page is
1099 : * already up-to-date.
1100 : */
1101 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1102 : {
1103 : RelFileLocator rlocator;
1104 0 : Buffer vmbuffer = InvalidBuffer;
1105 : BlockNumber block;
1106 : Relation reln;
1107 :
1108 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1109 0 : reln = CreateFakeRelcacheEntry(rlocator);
1110 :
1111 0 : visibilitymap_pin(reln, block, &vmbuffer);
1112 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1113 :
1114 0 : ReleaseBuffer(vmbuffer);
1115 0 : FreeFakeRelcacheEntry(reln);
1116 : }
1117 :
1118 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1119 : {
1120 0 : page = BufferGetPage(buffer);
1121 :
1122 0 : offnum = xlrec->offnum;
1123 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1124 0 : elog(PANIC, "offnum out of range");
1125 0 : lp = PageGetItemId(page, offnum);
1126 0 : if (!ItemIdIsNormal(lp))
1127 0 : elog(PANIC, "invalid lp");
1128 :
1129 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1130 :
1131 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1132 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1133 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1134 : &htup->t_infomask2);
1135 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1136 :
1137 0 : PageSetLSN(page, lsn);
1138 0 : MarkBufferDirty(buffer);
1139 : }
1140 0 : if (BufferIsValid(buffer))
1141 0 : UnlockReleaseBuffer(buffer);
1142 0 : }
1143 :
1144 : /*
1145 : * Replay XLOG_HEAP_INPLACE records.
1146 : */
1147 : static void
1148 8227 : heap_xlog_inplace(XLogReaderState *record)
1149 : {
1150 8227 : XLogRecPtr lsn = record->EndRecPtr;
1151 8227 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
1152 : Buffer buffer;
1153 : Page page;
1154 : OffsetNumber offnum;
1155 : ItemId lp;
1156 : HeapTupleHeader htup;
1157 : uint32 oldlen;
1158 : Size newlen;
1159 :
1160 8227 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1161 : {
1162 8022 : char *newtup = XLogRecGetBlockData(record, 0, &newlen);
1163 :
1164 8022 : page = BufferGetPage(buffer);
1165 :
1166 8022 : offnum = xlrec->offnum;
1167 8022 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1168 0 : elog(PANIC, "offnum out of range");
1169 8022 : lp = PageGetItemId(page, offnum);
1170 8022 : if (!ItemIdIsNormal(lp))
1171 0 : elog(PANIC, "invalid lp");
1172 :
1173 8022 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1174 :
1175 8022 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
1176 8022 : if (oldlen != newlen)
1177 0 : elog(PANIC, "wrong tuple length");
1178 :
1179 8022 : memcpy((char *) htup + htup->t_hoff, newtup, newlen);
1180 :
1181 8022 : PageSetLSN(page, lsn);
1182 8022 : MarkBufferDirty(buffer);
1183 : }
1184 8227 : if (BufferIsValid(buffer))
1185 8227 : UnlockReleaseBuffer(buffer);
1186 :
1187 8227 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1188 : xlrec->nmsgs,
1189 8227 : xlrec->relcacheInitFileInval,
1190 : xlrec->dbId,
1191 : xlrec->tsId);
1192 8227 : }
1193 :
1194 : void
1195 1799216 : heap_redo(XLogReaderState *record)
1196 : {
1197 1799216 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1198 :
1199 : /*
1200 : * These operations don't overwrite MVCC data so no conflict processing is
1201 : * required. The ones in heap2 rmgr do.
1202 : */
1203 :
1204 1799216 : switch (info & XLOG_HEAP_OPMASK)
1205 : {
1206 1321866 : case XLOG_HEAP_INSERT:
1207 1321866 : heap_xlog_insert(record);
1208 1321866 : break;
1209 316864 : case XLOG_HEAP_DELETE:
1210 316864 : heap_xlog_delete(record);
1211 316864 : break;
1212 57770 : case XLOG_HEAP_UPDATE:
1213 57770 : heap_xlog_update(record, false);
1214 57770 : break;
1215 2 : case XLOG_HEAP_TRUNCATE:
1216 :
1217 : /*
1218 : * TRUNCATE is a no-op because the actions are already logged as
1219 : * SMGR WAL records. TRUNCATE WAL record only exists for logical
1220 : * decoding.
1221 : */
1222 2 : break;
1223 38515 : case XLOG_HEAP_HOT_UPDATE:
1224 38515 : heap_xlog_update(record, true);
1225 38515 : break;
1226 93 : case XLOG_HEAP_CONFIRM:
1227 93 : heap_xlog_confirm(record);
1228 93 : break;
1229 55879 : case XLOG_HEAP_LOCK:
1230 55879 : heap_xlog_lock(record);
1231 55879 : break;
1232 8227 : case XLOG_HEAP_INPLACE:
1233 8227 : heap_xlog_inplace(record);
1234 8227 : break;
1235 0 : default:
1236 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
1237 : }
1238 1799216 : }
1239 :
1240 : void
1241 87881 : heap2_redo(XLogReaderState *record)
1242 : {
1243 87881 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1244 :
1245 87881 : switch (info & XLOG_HEAP_OPMASK)
1246 : {
1247 20009 : case XLOG_HEAP2_PRUNE_ON_ACCESS:
1248 : case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
1249 : case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
1250 20009 : heap_xlog_prune_freeze(record);
1251 20009 : break;
1252 66824 : case XLOG_HEAP2_MULTI_INSERT:
1253 66824 : heap_xlog_multi_insert(record);
1254 66824 : break;
1255 0 : case XLOG_HEAP2_LOCK_UPDATED:
1256 0 : heap_xlog_lock_updated(record);
1257 0 : break;
1258 1048 : case XLOG_HEAP2_NEW_CID:
1259 :
1260 : /*
1261 : * Nothing to do on a real replay, only used during logical
1262 : * decoding.
1263 : */
1264 1048 : break;
1265 0 : case XLOG_HEAP2_REWRITE:
1266 0 : heap_xlog_logical_rewrite(record);
1267 0 : break;
1268 0 : default:
1269 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
1270 : }
1271 87881 : }
1272 :
1273 : /*
1274 : * Mask a heap page before performing consistency checks on it.
1275 : */
1276 : void
1277 3005114 : heap_mask(char *pagedata, BlockNumber blkno)
1278 : {
1279 3005114 : Page page = (Page) pagedata;
1280 : OffsetNumber off;
1281 :
1282 3005114 : mask_page_lsn_and_checksum(page);
1283 :
1284 3005114 : mask_page_hint_bits(page);
1285 3005114 : mask_unused_space(page);
1286 :
1287 248276240 : for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
1288 : {
1289 245271126 : ItemId iid = PageGetItemId(page, off);
1290 : char *page_item;
1291 :
1292 245271126 : page_item = (char *) (page + ItemIdGetOffset(iid));
1293 :
1294 245271126 : if (ItemIdIsNormal(iid))
1295 : {
1296 227941370 : HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
1297 :
1298 : /*
1299 : * If xmin of a tuple is not yet frozen, we should ignore
1300 : * differences in hint bits, since they can be set without
1301 : * emitting WAL.
1302 : */
1303 227941370 : if (!HeapTupleHeaderXminFrozen(page_htup))
1304 220265474 : page_htup->t_infomask &= ~HEAP_XACT_MASK;
1305 : else
1306 : {
1307 : /* Still we need to mask xmax hint bits. */
1308 7675896 : page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
1309 7675896 : page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
1310 : }
1311 :
1312 : /*
1313 : * During replay, we set Command Id to FirstCommandId. Hence, mask
1314 : * it. See heap_xlog_insert() for details.
1315 : */
1316 227941370 : page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
1317 :
1318 : /*
1319 : * For a speculative tuple, heap_insert() does not set ctid in the
1320 : * caller-passed heap tuple itself, leaving the ctid field to
1321 : * contain a speculative token value - a per-backend monotonically
1322 : * increasing identifier. Besides, it does not WAL-log ctid under
1323 : * any circumstances.
1324 : *
1325 : * During redo, heap_xlog_insert() sets t_ctid to current block
1326 : * number and self offset number. It doesn't care about any
1327 : * speculative insertions on the primary. Hence, we set t_ctid to
1328 : * current block number and self offset number to ignore any
1329 : * inconsistency.
1330 : */
1331 227941370 : if (HeapTupleHeaderIsSpeculative(page_htup))
1332 93 : ItemPointerSet(&page_htup->t_ctid, blkno, off);
1333 :
1334 : /*
1335 : * NB: Not ignoring ctid changes due to the tuple having moved
1336 : * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
1337 : * important information that needs to be in-sync between primary
1338 : * and standby, and thus is WAL logged.
1339 : */
1340 : }
1341 :
1342 : /*
1343 : * Ignore any padding bytes after the tuple, when the length of the
1344 : * item is not MAXALIGNed.
1345 : */
1346 245271126 : if (ItemIdHasStorage(iid))
1347 : {
1348 227941370 : int len = ItemIdGetLength(iid);
1349 227941370 : int padlen = MAXALIGN(len) - len;
1350 :
1351 227941370 : if (padlen > 0)
1352 122771186 : memset(page_item + len, MASK_MARKER, padlen);
1353 : }
1354 : }
1355 3005114 : }
|