Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hash_xlog.c
4 : * WAL replay logic for hash index.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hash_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/hash.h"
19 : #include "access/hash_xlog.h"
20 : #include "access/xlogutils.h"
21 : #include "storage/standby.h"
22 :
23 : /*
24 : * replay a hash index meta page
25 : */
26 : static void
27 52 : hash_xlog_init_meta_page(XLogReaderState *record)
28 : {
29 52 : XLogRecPtr lsn = record->EndRecPtr;
30 : Page page;
31 : Buffer metabuf;
32 : ForkNumber forknum;
33 :
34 52 : xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
35 :
36 : /* create the index' metapage */
37 52 : metabuf = XLogInitBufferForRedo(record, 0);
38 : Assert(BufferIsValid(metabuf));
39 52 : _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
40 52 : xlrec->ffactor, true);
41 52 : page = (Page) BufferGetPage(metabuf);
42 52 : PageSetLSN(page, lsn);
43 52 : MarkBufferDirty(metabuf);
44 :
45 : /*
46 : * Force the on-disk state of init forks to always be in sync with the
47 : * state in shared buffers. See XLogReadBufferForRedoExtended. We need
48 : * special handling for init forks as create index operations don't log a
49 : * full page image of the metapage.
50 : */
51 52 : XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
52 52 : if (forknum == INIT_FORKNUM)
53 2 : FlushOneBuffer(metabuf);
54 :
55 : /* all done */
56 52 : UnlockReleaseBuffer(metabuf);
57 52 : }
58 :
59 : /*
60 : * replay a hash index bitmap page
61 : */
62 : static void
63 52 : hash_xlog_init_bitmap_page(XLogReaderState *record)
64 : {
65 52 : XLogRecPtr lsn = record->EndRecPtr;
66 : Buffer bitmapbuf;
67 : Buffer metabuf;
68 : Page page;
69 : HashMetaPage metap;
70 : uint32 num_buckets;
71 : ForkNumber forknum;
72 :
73 52 : xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
74 :
75 : /*
76 : * Initialize bitmap page
77 : */
78 52 : bitmapbuf = XLogInitBufferForRedo(record, 0);
79 52 : _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
80 52 : PageSetLSN(BufferGetPage(bitmapbuf), lsn);
81 52 : MarkBufferDirty(bitmapbuf);
82 :
83 : /*
84 : * Force the on-disk state of init forks to always be in sync with the
85 : * state in shared buffers. See XLogReadBufferForRedoExtended. We need
86 : * special handling for init forks as create index operations don't log a
87 : * full page image of the metapage.
88 : */
89 52 : XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
90 52 : if (forknum == INIT_FORKNUM)
91 2 : FlushOneBuffer(bitmapbuf);
92 52 : UnlockReleaseBuffer(bitmapbuf);
93 :
94 : /* add the new bitmap page to the metapage's list of bitmaps */
95 52 : if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
96 : {
97 : /*
98 : * Note: in normal operation, we'd update the metapage while still
99 : * holding lock on the bitmap page. But during replay it's not
100 : * necessary to hold that lock, since nobody can see it yet; the
101 : * creating transaction hasn't yet committed.
102 : */
103 52 : page = BufferGetPage(metabuf);
104 52 : metap = HashPageGetMeta(page);
105 :
106 52 : num_buckets = metap->hashm_maxbucket + 1;
107 52 : metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
108 52 : metap->hashm_nmaps++;
109 :
110 52 : PageSetLSN(page, lsn);
111 52 : MarkBufferDirty(metabuf);
112 :
113 52 : XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
114 52 : if (forknum == INIT_FORKNUM)
115 2 : FlushOneBuffer(metabuf);
116 : }
117 52 : if (BufferIsValid(metabuf))
118 52 : UnlockReleaseBuffer(metabuf);
119 52 : }
120 :
121 : /*
122 : * replay a hash index insert without split
123 : */
124 : static void
125 239288 : hash_xlog_insert(XLogReaderState *record)
126 : {
127 : HashMetaPage metap;
128 239288 : XLogRecPtr lsn = record->EndRecPtr;
129 239288 : xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
130 : Buffer buffer;
131 : Page page;
132 :
133 239288 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
134 : {
135 : Size datalen;
136 236382 : char *datapos = XLogRecGetBlockData(record, 0, &datalen);
137 :
138 236382 : page = BufferGetPage(buffer);
139 :
140 236382 : if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
141 : false, false) == InvalidOffsetNumber)
142 0 : elog(PANIC, "hash_xlog_insert: failed to add item");
143 :
144 236382 : PageSetLSN(page, lsn);
145 236382 : MarkBufferDirty(buffer);
146 : }
147 239288 : if (BufferIsValid(buffer))
148 239288 : UnlockReleaseBuffer(buffer);
149 :
150 239288 : if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
151 : {
152 : /*
153 : * Note: in normal operation, we'd update the metapage while still
154 : * holding lock on the page we inserted into. But during replay it's
155 : * not necessary to hold that lock, since no other index updates can
156 : * be happening concurrently.
157 : */
158 239236 : page = BufferGetPage(buffer);
159 239236 : metap = HashPageGetMeta(page);
160 239236 : metap->hashm_ntuples += 1;
161 :
162 239236 : PageSetLSN(page, lsn);
163 239236 : MarkBufferDirty(buffer);
164 : }
165 239288 : if (BufferIsValid(buffer))
166 239288 : UnlockReleaseBuffer(buffer);
167 239288 : }
168 :
169 : /*
170 : * replay addition of overflow page for hash index
171 : */
172 : static void
173 132 : hash_xlog_add_ovfl_page(XLogReaderState *record)
174 : {
175 132 : XLogRecPtr lsn = record->EndRecPtr;
176 132 : xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) XLogRecGetData(record);
177 : Buffer leftbuf;
178 : Buffer ovflbuf;
179 : Buffer metabuf;
180 : BlockNumber leftblk;
181 : BlockNumber rightblk;
182 132 : BlockNumber newmapblk = InvalidBlockNumber;
183 : Page ovflpage;
184 : HashPageOpaque ovflopaque;
185 : uint32 *num_bucket;
186 : char *data;
187 : Size datalen PG_USED_FOR_ASSERTS_ONLY;
188 132 : bool new_bmpage = false;
189 :
190 132 : XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
191 132 : XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
192 :
193 132 : ovflbuf = XLogInitBufferForRedo(record, 0);
194 : Assert(BufferIsValid(ovflbuf));
195 :
196 132 : data = XLogRecGetBlockData(record, 0, &datalen);
197 132 : num_bucket = (uint32 *) data;
198 : Assert(datalen == sizeof(uint32));
199 132 : _hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE,
200 : true);
201 : /* update backlink */
202 132 : ovflpage = BufferGetPage(ovflbuf);
203 132 : ovflopaque = HashPageGetOpaque(ovflpage);
204 132 : ovflopaque->hasho_prevblkno = leftblk;
205 :
206 132 : PageSetLSN(ovflpage, lsn);
207 132 : MarkBufferDirty(ovflbuf);
208 :
209 132 : if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
210 : {
211 : Page leftpage;
212 : HashPageOpaque leftopaque;
213 :
214 132 : leftpage = BufferGetPage(leftbuf);
215 132 : leftopaque = HashPageGetOpaque(leftpage);
216 132 : leftopaque->hasho_nextblkno = rightblk;
217 :
218 132 : PageSetLSN(leftpage, lsn);
219 132 : MarkBufferDirty(leftbuf);
220 : }
221 :
222 132 : if (BufferIsValid(leftbuf))
223 132 : UnlockReleaseBuffer(leftbuf);
224 132 : UnlockReleaseBuffer(ovflbuf);
225 :
226 : /*
227 : * Note: in normal operation, we'd update the bitmap and meta page while
228 : * still holding lock on the overflow pages. But during replay it's not
229 : * necessary to hold those locks, since no other index updates can be
230 : * happening concurrently.
231 : */
232 132 : if (XLogRecHasBlockRef(record, 2))
233 : {
234 : Buffer mapbuffer;
235 :
236 22 : if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
237 : {
238 18 : Page mappage = (Page) BufferGetPage(mapbuffer);
239 18 : uint32 *freep = NULL;
240 : uint32 *bitmap_page_bit;
241 :
242 18 : freep = HashPageGetBitmap(mappage);
243 :
244 18 : data = XLogRecGetBlockData(record, 2, &datalen);
245 18 : bitmap_page_bit = (uint32 *) data;
246 :
247 18 : SETBIT(freep, *bitmap_page_bit);
248 :
249 18 : PageSetLSN(mappage, lsn);
250 18 : MarkBufferDirty(mapbuffer);
251 : }
252 22 : if (BufferIsValid(mapbuffer))
253 22 : UnlockReleaseBuffer(mapbuffer);
254 : }
255 :
256 132 : if (XLogRecHasBlockRef(record, 3))
257 : {
258 : Buffer newmapbuf;
259 :
260 0 : newmapbuf = XLogInitBufferForRedo(record, 3);
261 :
262 0 : _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
263 :
264 0 : new_bmpage = true;
265 0 : newmapblk = BufferGetBlockNumber(newmapbuf);
266 :
267 0 : MarkBufferDirty(newmapbuf);
268 0 : PageSetLSN(BufferGetPage(newmapbuf), lsn);
269 :
270 0 : UnlockReleaseBuffer(newmapbuf);
271 : }
272 :
273 132 : if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
274 : {
275 : HashMetaPage metap;
276 : Page page;
277 : uint32 *firstfree_ovflpage;
278 :
279 132 : data = XLogRecGetBlockData(record, 4, &datalen);
280 132 : firstfree_ovflpage = (uint32 *) data;
281 :
282 132 : page = BufferGetPage(metabuf);
283 132 : metap = HashPageGetMeta(page);
284 132 : metap->hashm_firstfree = *firstfree_ovflpage;
285 :
286 132 : if (!xlrec->bmpage_found)
287 : {
288 110 : metap->hashm_spares[metap->hashm_ovflpoint]++;
289 :
290 110 : if (new_bmpage)
291 : {
292 : Assert(BlockNumberIsValid(newmapblk));
293 :
294 0 : metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
295 0 : metap->hashm_nmaps++;
296 0 : metap->hashm_spares[metap->hashm_ovflpoint]++;
297 : }
298 : }
299 :
300 132 : PageSetLSN(page, lsn);
301 132 : MarkBufferDirty(metabuf);
302 : }
303 132 : if (BufferIsValid(metabuf))
304 132 : UnlockReleaseBuffer(metabuf);
305 132 : }
306 :
307 : /*
308 : * replay allocation of page for split operation
309 : */
310 : static void
311 448 : hash_xlog_split_allocate_page(XLogReaderState *record)
312 : {
313 448 : XLogRecPtr lsn = record->EndRecPtr;
314 448 : xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) XLogRecGetData(record);
315 : Buffer oldbuf;
316 : Buffer newbuf;
317 : Buffer metabuf;
318 : Size datalen PG_USED_FOR_ASSERTS_ONLY;
319 : char *data;
320 : XLogRedoAction action;
321 :
322 : /*
323 : * To be consistent with normal operation, here we take cleanup locks on
324 : * both the old and new buckets even though there can't be any concurrent
325 : * inserts.
326 : */
327 :
328 : /* replay the record for old bucket */
329 448 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
330 :
331 : /*
332 : * Note that we still update the page even if it was restored from a full
333 : * page image, because the special space is not included in the image.
334 : */
335 448 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
336 : {
337 : Page oldpage;
338 : HashPageOpaque oldopaque;
339 :
340 448 : oldpage = BufferGetPage(oldbuf);
341 448 : oldopaque = HashPageGetOpaque(oldpage);
342 :
343 448 : oldopaque->hasho_flag = xlrec->old_bucket_flag;
344 448 : oldopaque->hasho_prevblkno = xlrec->new_bucket;
345 :
346 448 : PageSetLSN(oldpage, lsn);
347 448 : MarkBufferDirty(oldbuf);
348 : }
349 :
350 : /* replay the record for new bucket */
351 448 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_AND_CLEANUP_LOCK, true,
352 : &newbuf);
353 448 : _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
354 448 : xlrec->new_bucket_flag, true);
355 448 : MarkBufferDirty(newbuf);
356 448 : PageSetLSN(BufferGetPage(newbuf), lsn);
357 :
358 : /*
359 : * We can release the lock on old bucket early as well but doing here to
360 : * consistent with normal operation.
361 : */
362 448 : if (BufferIsValid(oldbuf))
363 448 : UnlockReleaseBuffer(oldbuf);
364 448 : if (BufferIsValid(newbuf))
365 448 : UnlockReleaseBuffer(newbuf);
366 :
367 : /*
368 : * Note: in normal operation, we'd update the meta page while still
369 : * holding lock on the old and new bucket pages. But during replay it's
370 : * not necessary to hold those locks, since no other bucket splits can be
371 : * happening concurrently.
372 : */
373 :
374 : /* replay the record for metapage changes */
375 448 : if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
376 : {
377 : Page page;
378 : HashMetaPage metap;
379 :
380 448 : page = BufferGetPage(metabuf);
381 448 : metap = HashPageGetMeta(page);
382 448 : metap->hashm_maxbucket = xlrec->new_bucket;
383 :
384 448 : data = XLogRecGetBlockData(record, 2, &datalen);
385 :
386 448 : if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS)
387 : {
388 : uint32 lowmask;
389 : uint32 *highmask;
390 :
391 : /* extract low and high masks. */
392 8 : memcpy(&lowmask, data, sizeof(uint32));
393 8 : highmask = (uint32 *) ((char *) data + sizeof(uint32));
394 :
395 : /* update metapage */
396 8 : metap->hashm_lowmask = lowmask;
397 8 : metap->hashm_highmask = *highmask;
398 :
399 8 : data += sizeof(uint32) * 2;
400 : }
401 :
402 448 : if (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT)
403 : {
404 : uint32 ovflpoint;
405 : uint32 *ovflpages;
406 :
407 : /* extract information of overflow pages. */
408 20 : memcpy(&ovflpoint, data, sizeof(uint32));
409 20 : ovflpages = (uint32 *) ((char *) data + sizeof(uint32));
410 :
411 : /* update metapage */
412 20 : metap->hashm_spares[ovflpoint] = *ovflpages;
413 20 : metap->hashm_ovflpoint = ovflpoint;
414 : }
415 :
416 448 : MarkBufferDirty(metabuf);
417 448 : PageSetLSN(BufferGetPage(metabuf), lsn);
418 : }
419 :
420 448 : if (BufferIsValid(metabuf))
421 448 : UnlockReleaseBuffer(metabuf);
422 448 : }
423 :
424 : /*
425 : * replay of split operation
426 : */
427 : static void
428 474 : hash_xlog_split_page(XLogReaderState *record)
429 : {
430 : Buffer buf;
431 :
432 474 : if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
433 0 : elog(ERROR, "Hash split record did not contain a full-page image");
434 :
435 474 : UnlockReleaseBuffer(buf);
436 474 : }
437 :
438 : /*
439 : * replay completion of split operation
440 : */
441 : static void
442 448 : hash_xlog_split_complete(XLogReaderState *record)
443 : {
444 448 : XLogRecPtr lsn = record->EndRecPtr;
445 448 : xl_hash_split_complete *xlrec = (xl_hash_split_complete *) XLogRecGetData(record);
446 : Buffer oldbuf;
447 : Buffer newbuf;
448 : XLogRedoAction action;
449 :
450 : /* replay the record for old bucket */
451 448 : action = XLogReadBufferForRedo(record, 0, &oldbuf);
452 :
453 : /*
454 : * Note that we still update the page even if it was restored from a full
455 : * page image, because the bucket flag is not included in the image.
456 : */
457 448 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
458 : {
459 : Page oldpage;
460 : HashPageOpaque oldopaque;
461 :
462 448 : oldpage = BufferGetPage(oldbuf);
463 448 : oldopaque = HashPageGetOpaque(oldpage);
464 :
465 448 : oldopaque->hasho_flag = xlrec->old_bucket_flag;
466 :
467 448 : PageSetLSN(oldpage, lsn);
468 448 : MarkBufferDirty(oldbuf);
469 : }
470 448 : if (BufferIsValid(oldbuf))
471 448 : UnlockReleaseBuffer(oldbuf);
472 :
473 : /* replay the record for new bucket */
474 448 : action = XLogReadBufferForRedo(record, 1, &newbuf);
475 :
476 : /*
477 : * Note that we still update the page even if it was restored from a full
478 : * page image, because the bucket flag is not included in the image.
479 : */
480 448 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
481 : {
482 : Page newpage;
483 : HashPageOpaque nopaque;
484 :
485 448 : newpage = BufferGetPage(newbuf);
486 448 : nopaque = HashPageGetOpaque(newpage);
487 :
488 448 : nopaque->hasho_flag = xlrec->new_bucket_flag;
489 :
490 448 : PageSetLSN(newpage, lsn);
491 448 : MarkBufferDirty(newbuf);
492 : }
493 448 : if (BufferIsValid(newbuf))
494 448 : UnlockReleaseBuffer(newbuf);
495 448 : }
496 :
497 : /*
498 : * replay move of page contents for squeeze operation of hash index
499 : */
500 : static void
501 2 : hash_xlog_move_page_contents(XLogReaderState *record)
502 : {
503 2 : XLogRecPtr lsn = record->EndRecPtr;
504 2 : xl_hash_move_page_contents *xldata = (xl_hash_move_page_contents *) XLogRecGetData(record);
505 2 : Buffer bucketbuf = InvalidBuffer;
506 2 : Buffer writebuf = InvalidBuffer;
507 2 : Buffer deletebuf = InvalidBuffer;
508 : XLogRedoAction action;
509 :
510 : /*
511 : * Ensure we have a cleanup lock on primary bucket page before we start
512 : * with the actual replay operation. This is to ensure that neither a
513 : * scan can start nor a scan can be already-in-progress during the replay
514 : * of this operation. If we allow scans during this operation, then they
515 : * can miss some records or show the same record multiple times.
516 : */
517 2 : if (xldata->is_prim_bucket_same_wrt)
518 2 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
519 : else
520 : {
521 : /*
522 : * we don't care for return value as the purpose of reading bucketbuf
523 : * is to ensure a cleanup lock on primary bucket page.
524 : */
525 0 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
526 :
527 0 : action = XLogReadBufferForRedo(record, 1, &writebuf);
528 : }
529 :
530 : /* replay the record for adding entries in overflow buffer */
531 2 : if (action == BLK_NEEDS_REDO)
532 : {
533 : Page writepage;
534 : char *begin;
535 : char *data;
536 : Size datalen;
537 2 : uint16 ninserted = 0;
538 :
539 2 : data = begin = XLogRecGetBlockData(record, 1, &datalen);
540 :
541 2 : writepage = (Page) BufferGetPage(writebuf);
542 :
543 2 : if (xldata->ntups > 0)
544 : {
545 2 : OffsetNumber *towrite = (OffsetNumber *) data;
546 :
547 2 : data += sizeof(OffsetNumber) * xldata->ntups;
548 :
549 686 : while (data - begin < datalen)
550 : {
551 684 : IndexTuple itup = (IndexTuple) data;
552 : Size itemsz;
553 : OffsetNumber l;
554 :
555 684 : itemsz = IndexTupleSize(itup);
556 684 : itemsz = MAXALIGN(itemsz);
557 :
558 684 : data += itemsz;
559 :
560 684 : l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
561 684 : if (l == InvalidOffsetNumber)
562 0 : elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
563 : (int) itemsz);
564 :
565 684 : ninserted++;
566 : }
567 : }
568 :
569 : /*
570 : * number of tuples inserted must be same as requested in REDO record.
571 : */
572 : Assert(ninserted == xldata->ntups);
573 :
574 2 : PageSetLSN(writepage, lsn);
575 2 : MarkBufferDirty(writebuf);
576 : }
577 :
578 : /* replay the record for deleting entries from overflow buffer */
579 2 : if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
580 : {
581 : Page page;
582 : char *ptr;
583 : Size len;
584 :
585 2 : ptr = XLogRecGetBlockData(record, 2, &len);
586 :
587 2 : page = (Page) BufferGetPage(deletebuf);
588 :
589 2 : if (len > 0)
590 : {
591 : OffsetNumber *unused;
592 : OffsetNumber *unend;
593 :
594 2 : unused = (OffsetNumber *) ptr;
595 2 : unend = (OffsetNumber *) ((char *) ptr + len);
596 :
597 2 : if ((unend - unused) > 0)
598 2 : PageIndexMultiDelete(page, unused, unend - unused);
599 : }
600 :
601 2 : PageSetLSN(page, lsn);
602 2 : MarkBufferDirty(deletebuf);
603 : }
604 :
605 : /*
606 : * Replay is complete, now we can release the buffers. We release locks at
607 : * end of replay operation to ensure that we hold lock on primary bucket
608 : * page till end of operation. We can optimize by releasing the lock on
609 : * write buffer as soon as the operation for same is complete, if it is
610 : * not same as primary bucket page, but that doesn't seem to be worth
611 : * complicating the code.
612 : */
613 2 : if (BufferIsValid(deletebuf))
614 2 : UnlockReleaseBuffer(deletebuf);
615 :
616 2 : if (BufferIsValid(writebuf))
617 2 : UnlockReleaseBuffer(writebuf);
618 :
619 2 : if (BufferIsValid(bucketbuf))
620 0 : UnlockReleaseBuffer(bucketbuf);
621 2 : }
622 :
623 : /*
624 : * replay squeeze page operation of hash index
625 : */
626 : static void
627 62 : hash_xlog_squeeze_page(XLogReaderState *record)
628 : {
629 62 : XLogRecPtr lsn = record->EndRecPtr;
630 62 : xl_hash_squeeze_page *xldata = (xl_hash_squeeze_page *) XLogRecGetData(record);
631 62 : Buffer bucketbuf = InvalidBuffer;
632 62 : Buffer writebuf = InvalidBuffer;
633 : Buffer ovflbuf;
634 62 : Buffer prevbuf = InvalidBuffer;
635 : Buffer mapbuf;
636 : XLogRedoAction action;
637 :
638 : /*
639 : * Ensure we have a cleanup lock on primary bucket page before we start
640 : * with the actual replay operation. This is to ensure that neither a
641 : * scan can start nor a scan can be already-in-progress during the replay
642 : * of this operation. If we allow scans during this operation, then they
643 : * can miss some records or show the same record multiple times.
644 : */
645 62 : if (xldata->is_prim_bucket_same_wrt)
646 46 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
647 : else
648 : {
649 : /*
650 : * we don't care for return value as the purpose of reading bucketbuf
651 : * is to ensure a cleanup lock on primary bucket page.
652 : */
653 16 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
654 :
655 16 : if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
656 14 : action = XLogReadBufferForRedo(record, 1, &writebuf);
657 : else
658 2 : action = BLK_NOTFOUND;
659 : }
660 :
661 : /* replay the record for adding entries in overflow buffer */
662 62 : if (action == BLK_NEEDS_REDO)
663 : {
664 : Page writepage;
665 : char *begin;
666 : char *data;
667 : Size datalen;
668 56 : uint16 ninserted = 0;
669 56 : bool mod_wbuf = false;
670 :
671 56 : data = begin = XLogRecGetBlockData(record, 1, &datalen);
672 :
673 56 : writepage = (Page) BufferGetPage(writebuf);
674 :
675 56 : if (xldata->ntups > 0)
676 : {
677 28 : OffsetNumber *towrite = (OffsetNumber *) data;
678 :
679 28 : data += sizeof(OffsetNumber) * xldata->ntups;
680 :
681 980 : while (data - begin < datalen)
682 : {
683 952 : IndexTuple itup = (IndexTuple) data;
684 : Size itemsz;
685 : OffsetNumber l;
686 :
687 952 : itemsz = IndexTupleSize(itup);
688 952 : itemsz = MAXALIGN(itemsz);
689 :
690 952 : data += itemsz;
691 :
692 952 : l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
693 952 : if (l == InvalidOffsetNumber)
694 0 : elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
695 : (int) itemsz);
696 :
697 952 : ninserted++;
698 : }
699 :
700 28 : mod_wbuf = true;
701 : }
702 : else
703 : {
704 : /*
705 : * Ensure that the required flags are set when there are no
706 : * tuples. See _hash_freeovflpage().
707 : */
708 : Assert(xldata->is_prim_bucket_same_wrt ||
709 : xldata->is_prev_bucket_same_wrt);
710 : }
711 :
712 : /*
713 : * number of tuples inserted must be same as requested in REDO record.
714 : */
715 : Assert(ninserted == xldata->ntups);
716 :
717 : /*
718 : * if the page on which are adding tuples is a page previous to freed
719 : * overflow page, then update its nextblkno.
720 : */
721 56 : if (xldata->is_prev_bucket_same_wrt)
722 : {
723 20 : HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
724 :
725 20 : writeopaque->hasho_nextblkno = xldata->nextblkno;
726 20 : mod_wbuf = true;
727 : }
728 :
729 : /* Set LSN and mark writebuf dirty iff it is modified */
730 56 : if (mod_wbuf)
731 : {
732 34 : PageSetLSN(writepage, lsn);
733 34 : MarkBufferDirty(writebuf);
734 : }
735 : }
736 :
737 : /* replay the record for initializing overflow buffer */
738 62 : if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
739 : {
740 : Page ovflpage;
741 : HashPageOpaque ovflopaque;
742 :
743 0 : ovflpage = BufferGetPage(ovflbuf);
744 :
745 0 : _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
746 :
747 0 : ovflopaque = HashPageGetOpaque(ovflpage);
748 :
749 0 : ovflopaque->hasho_prevblkno = InvalidBlockNumber;
750 0 : ovflopaque->hasho_nextblkno = InvalidBlockNumber;
751 0 : ovflopaque->hasho_bucket = InvalidBucket;
752 0 : ovflopaque->hasho_flag = LH_UNUSED_PAGE;
753 0 : ovflopaque->hasho_page_id = HASHO_PAGE_ID;
754 :
755 0 : PageSetLSN(ovflpage, lsn);
756 0 : MarkBufferDirty(ovflbuf);
757 : }
758 62 : if (BufferIsValid(ovflbuf))
759 62 : UnlockReleaseBuffer(ovflbuf);
760 :
761 : /* replay the record for page previous to the freed overflow page */
762 104 : if (!xldata->is_prev_bucket_same_wrt &&
763 42 : XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
764 : {
765 40 : Page prevpage = BufferGetPage(prevbuf);
766 40 : HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
767 :
768 40 : prevopaque->hasho_nextblkno = xldata->nextblkno;
769 :
770 40 : PageSetLSN(prevpage, lsn);
771 40 : MarkBufferDirty(prevbuf);
772 : }
773 62 : if (BufferIsValid(prevbuf))
774 42 : UnlockReleaseBuffer(prevbuf);
775 :
776 : /* replay the record for page next to the freed overflow page */
777 62 : if (XLogRecHasBlockRef(record, 4))
778 : {
779 : Buffer nextbuf;
780 :
781 0 : if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
782 : {
783 0 : Page nextpage = BufferGetPage(nextbuf);
784 0 : HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
785 :
786 0 : nextopaque->hasho_prevblkno = xldata->prevblkno;
787 :
788 0 : PageSetLSN(nextpage, lsn);
789 0 : MarkBufferDirty(nextbuf);
790 : }
791 0 : if (BufferIsValid(nextbuf))
792 0 : UnlockReleaseBuffer(nextbuf);
793 : }
794 :
795 62 : if (BufferIsValid(writebuf))
796 60 : UnlockReleaseBuffer(writebuf);
797 :
798 62 : if (BufferIsValid(bucketbuf))
799 16 : UnlockReleaseBuffer(bucketbuf);
800 :
801 : /*
802 : * Note: in normal operation, we'd update the bitmap and meta page while
803 : * still holding lock on the primary bucket page and overflow pages. But
804 : * during replay it's not necessary to hold those locks, since no other
805 : * index updates can be happening concurrently.
806 : */
807 : /* replay the record for bitmap page */
808 62 : if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
809 : {
810 50 : Page mappage = (Page) BufferGetPage(mapbuf);
811 50 : uint32 *freep = NULL;
812 : char *data;
813 : uint32 *bitmap_page_bit;
814 : Size datalen;
815 :
816 50 : freep = HashPageGetBitmap(mappage);
817 :
818 50 : data = XLogRecGetBlockData(record, 5, &datalen);
819 50 : bitmap_page_bit = (uint32 *) data;
820 :
821 50 : CLRBIT(freep, *bitmap_page_bit);
822 :
823 50 : PageSetLSN(mappage, lsn);
824 50 : MarkBufferDirty(mapbuf);
825 : }
826 62 : if (BufferIsValid(mapbuf))
827 62 : UnlockReleaseBuffer(mapbuf);
828 :
829 : /* replay the record for meta page */
830 62 : if (XLogRecHasBlockRef(record, 6))
831 : {
832 : Buffer metabuf;
833 :
834 60 : if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
835 : {
836 : HashMetaPage metap;
837 : Page page;
838 : char *data;
839 : uint32 *firstfree_ovflpage;
840 : Size datalen;
841 :
842 54 : data = XLogRecGetBlockData(record, 6, &datalen);
843 54 : firstfree_ovflpage = (uint32 *) data;
844 :
845 54 : page = BufferGetPage(metabuf);
846 54 : metap = HashPageGetMeta(page);
847 54 : metap->hashm_firstfree = *firstfree_ovflpage;
848 :
849 54 : PageSetLSN(page, lsn);
850 54 : MarkBufferDirty(metabuf);
851 : }
852 60 : if (BufferIsValid(metabuf))
853 60 : UnlockReleaseBuffer(metabuf);
854 : }
855 62 : }
856 :
857 : /*
858 : * replay delete operation of hash index
859 : */
860 : static void
861 510 : hash_xlog_delete(XLogReaderState *record)
862 : {
863 510 : XLogRecPtr lsn = record->EndRecPtr;
864 510 : xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
865 510 : Buffer bucketbuf = InvalidBuffer;
866 : Buffer deletebuf;
867 : Page page;
868 : XLogRedoAction action;
869 :
870 : /*
871 : * Ensure we have a cleanup lock on primary bucket page before we start
872 : * with the actual replay operation. This is to ensure that neither a
873 : * scan can start nor a scan can be already-in-progress during the replay
874 : * of this operation. If we allow scans during this operation, then they
875 : * can miss some records or show the same record multiple times.
876 : */
877 510 : if (xldata->is_primary_bucket_page)
878 444 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
879 : else
880 : {
881 : /*
882 : * we don't care for return value as the purpose of reading bucketbuf
883 : * is to ensure a cleanup lock on primary bucket page.
884 : */
885 66 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
886 :
887 66 : action = XLogReadBufferForRedo(record, 1, &deletebuf);
888 : }
889 :
890 : /* replay the record for deleting entries in bucket page */
891 510 : if (action == BLK_NEEDS_REDO)
892 : {
893 : char *ptr;
894 : Size len;
895 :
896 462 : ptr = XLogRecGetBlockData(record, 1, &len);
897 :
898 462 : page = (Page) BufferGetPage(deletebuf);
899 :
900 462 : if (len > 0)
901 : {
902 : OffsetNumber *unused;
903 : OffsetNumber *unend;
904 :
905 462 : unused = (OffsetNumber *) ptr;
906 462 : unend = (OffsetNumber *) ((char *) ptr + len);
907 :
908 462 : if ((unend - unused) > 0)
909 462 : PageIndexMultiDelete(page, unused, unend - unused);
910 : }
911 :
912 : /*
913 : * Mark the page as not containing any LP_DEAD items only if
914 : * clear_dead_marking flag is set to true. See comments in
915 : * hashbucketcleanup() for details.
916 : */
917 462 : if (xldata->clear_dead_marking)
918 : {
919 : HashPageOpaque pageopaque;
920 :
921 0 : pageopaque = HashPageGetOpaque(page);
922 0 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
923 : }
924 :
925 462 : PageSetLSN(page, lsn);
926 462 : MarkBufferDirty(deletebuf);
927 : }
928 510 : if (BufferIsValid(deletebuf))
929 510 : UnlockReleaseBuffer(deletebuf);
930 :
931 510 : if (BufferIsValid(bucketbuf))
932 66 : UnlockReleaseBuffer(bucketbuf);
933 510 : }
934 :
935 : /*
936 : * replay split cleanup flag operation for primary bucket page.
937 : */
938 : static void
939 448 : hash_xlog_split_cleanup(XLogReaderState *record)
940 : {
941 448 : XLogRecPtr lsn = record->EndRecPtr;
942 : Buffer buffer;
943 : Page page;
944 :
945 448 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
946 : {
947 : HashPageOpaque bucket_opaque;
948 :
949 448 : page = (Page) BufferGetPage(buffer);
950 :
951 448 : bucket_opaque = HashPageGetOpaque(page);
952 448 : bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
953 448 : PageSetLSN(page, lsn);
954 448 : MarkBufferDirty(buffer);
955 : }
956 448 : if (BufferIsValid(buffer))
957 448 : UnlockReleaseBuffer(buffer);
958 448 : }
959 :
960 : /*
961 : * replay for update meta page
962 : */
963 : static void
964 8 : hash_xlog_update_meta_page(XLogReaderState *record)
965 : {
966 : HashMetaPage metap;
967 8 : XLogRecPtr lsn = record->EndRecPtr;
968 8 : xl_hash_update_meta_page *xldata = (xl_hash_update_meta_page *) XLogRecGetData(record);
969 : Buffer metabuf;
970 : Page page;
971 :
972 8 : if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
973 : {
974 8 : page = BufferGetPage(metabuf);
975 8 : metap = HashPageGetMeta(page);
976 :
977 8 : metap->hashm_ntuples = xldata->ntuples;
978 :
979 8 : PageSetLSN(page, lsn);
980 8 : MarkBufferDirty(metabuf);
981 : }
982 8 : if (BufferIsValid(metabuf))
983 8 : UnlockReleaseBuffer(metabuf);
984 8 : }
985 :
986 : /*
987 : * replay delete operation in hash index to remove
988 : * tuples marked as DEAD during index tuple insertion.
989 : */
990 : static void
991 0 : hash_xlog_vacuum_one_page(XLogReaderState *record)
992 : {
993 0 : XLogRecPtr lsn = record->EndRecPtr;
994 : xl_hash_vacuum_one_page *xldata;
995 : Buffer buffer;
996 : Buffer metabuf;
997 : Page page;
998 : XLogRedoAction action;
999 : HashPageOpaque pageopaque;
1000 : OffsetNumber *toDelete;
1001 :
1002 0 : xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
1003 0 : toDelete = xldata->offsets;
1004 :
1005 : /*
1006 : * If we have any conflict processing to do, it must happen before we
1007 : * update the page.
1008 : *
1009 : * Hash index records that are marked as LP_DEAD and being removed during
1010 : * hash index tuple insertion can conflict with standby queries. You might
1011 : * think that vacuum records would conflict as well, but we've handled
1012 : * that already. XLOG_HEAP2_PRUNE_VACUUM_SCAN records provide the highest
1013 : * xid cleaned by the vacuum of the heap and so we can resolve any
1014 : * conflicts just once when that arrives. After that we know that no
1015 : * conflicts exist from individual hash index vacuum records on that
1016 : * index.
1017 : */
1018 0 : if (InHotStandby)
1019 : {
1020 : RelFileLocator rlocator;
1021 :
1022 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1023 0 : ResolveRecoveryConflictWithSnapshot(xldata->snapshotConflictHorizon,
1024 0 : xldata->isCatalogRel,
1025 : rlocator);
1026 : }
1027 :
1028 0 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1029 :
1030 0 : if (action == BLK_NEEDS_REDO)
1031 : {
1032 0 : page = (Page) BufferGetPage(buffer);
1033 :
1034 0 : PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1035 :
1036 : /*
1037 : * Mark the page as not containing any LP_DEAD items. See comments in
1038 : * _hash_vacuum_one_page() for details.
1039 : */
1040 0 : pageopaque = HashPageGetOpaque(page);
1041 0 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1042 :
1043 0 : PageSetLSN(page, lsn);
1044 0 : MarkBufferDirty(buffer);
1045 : }
1046 0 : if (BufferIsValid(buffer))
1047 0 : UnlockReleaseBuffer(buffer);
1048 :
1049 0 : if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1050 : {
1051 : Page metapage;
1052 : HashMetaPage metap;
1053 :
1054 0 : metapage = BufferGetPage(metabuf);
1055 0 : metap = HashPageGetMeta(metapage);
1056 :
1057 0 : metap->hashm_ntuples -= xldata->ntuples;
1058 :
1059 0 : PageSetLSN(metapage, lsn);
1060 0 : MarkBufferDirty(metabuf);
1061 : }
1062 0 : if (BufferIsValid(metabuf))
1063 0 : UnlockReleaseBuffer(metabuf);
1064 0 : }
1065 :
1066 : void
1067 241924 : hash_redo(XLogReaderState *record)
1068 : {
1069 241924 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1070 :
1071 241924 : switch (info)
1072 : {
1073 52 : case XLOG_HASH_INIT_META_PAGE:
1074 52 : hash_xlog_init_meta_page(record);
1075 52 : break;
1076 52 : case XLOG_HASH_INIT_BITMAP_PAGE:
1077 52 : hash_xlog_init_bitmap_page(record);
1078 52 : break;
1079 239288 : case XLOG_HASH_INSERT:
1080 239288 : hash_xlog_insert(record);
1081 239288 : break;
1082 132 : case XLOG_HASH_ADD_OVFL_PAGE:
1083 132 : hash_xlog_add_ovfl_page(record);
1084 132 : break;
1085 448 : case XLOG_HASH_SPLIT_ALLOCATE_PAGE:
1086 448 : hash_xlog_split_allocate_page(record);
1087 448 : break;
1088 474 : case XLOG_HASH_SPLIT_PAGE:
1089 474 : hash_xlog_split_page(record);
1090 474 : break;
1091 448 : case XLOG_HASH_SPLIT_COMPLETE:
1092 448 : hash_xlog_split_complete(record);
1093 448 : break;
1094 2 : case XLOG_HASH_MOVE_PAGE_CONTENTS:
1095 2 : hash_xlog_move_page_contents(record);
1096 2 : break;
1097 62 : case XLOG_HASH_SQUEEZE_PAGE:
1098 62 : hash_xlog_squeeze_page(record);
1099 62 : break;
1100 510 : case XLOG_HASH_DELETE:
1101 510 : hash_xlog_delete(record);
1102 510 : break;
1103 448 : case XLOG_HASH_SPLIT_CLEANUP:
1104 448 : hash_xlog_split_cleanup(record);
1105 448 : break;
1106 8 : case XLOG_HASH_UPDATE_META_PAGE:
1107 8 : hash_xlog_update_meta_page(record);
1108 8 : break;
1109 0 : case XLOG_HASH_VACUUM_ONE_PAGE:
1110 0 : hash_xlog_vacuum_one_page(record);
1111 0 : break;
1112 0 : default:
1113 0 : elog(PANIC, "hash_redo: unknown op code %u", info);
1114 : }
1115 241924 : }
1116 :
1117 : /*
1118 : * Mask a hash page before performing consistency checks on it.
1119 : */
1120 : void
1121 958732 : hash_mask(char *pagedata, BlockNumber blkno)
1122 : {
1123 958732 : Page page = (Page) pagedata;
1124 : HashPageOpaque opaque;
1125 : int pagetype;
1126 :
1127 958732 : mask_page_lsn_and_checksum(page);
1128 :
1129 958732 : mask_page_hint_bits(page);
1130 958732 : mask_unused_space(page);
1131 :
1132 958732 : opaque = HashPageGetOpaque(page);
1133 :
1134 958732 : pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1135 958732 : if (pagetype == LH_UNUSED_PAGE)
1136 : {
1137 : /*
1138 : * Mask everything on a UNUSED page.
1139 : */
1140 0 : mask_page_content(page);
1141 : }
1142 958732 : else if (pagetype == LH_BUCKET_PAGE ||
1143 : pagetype == LH_OVERFLOW_PAGE)
1144 : {
1145 : /*
1146 : * In hash bucket and overflow pages, it is possible to modify the
1147 : * LP_FLAGS without emitting any WAL record. Hence, mask the line
1148 : * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1149 : */
1150 478528 : mask_lp_flags(page);
1151 : }
1152 :
1153 : /*
1154 : * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1155 : * unlogged. So, mask it. See _hash_kill_items() for details.
1156 : */
1157 958732 : opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1158 958732 : }
|