Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hash_xlog.c
4 : * WAL replay logic for hash index.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hash_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/hash.h"
19 : #include "access/hash_xlog.h"
20 : #include "access/xlogutils.h"
21 : #include "storage/standby.h"
22 :
23 : /*
24 : * replay a hash index meta page
25 : */
26 : static void
27 30 : hash_xlog_init_meta_page(XLogReaderState *record)
28 : {
29 30 : XLogRecPtr lsn = record->EndRecPtr;
30 : Page page;
31 : Buffer metabuf;
32 :
33 30 : xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
34 :
35 : /* create the index' metapage */
36 30 : metabuf = XLogInitBufferForRedo(record, 0);
37 : Assert(BufferIsValid(metabuf));
38 30 : _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
39 30 : xlrec->ffactor, true);
40 30 : page = BufferGetPage(metabuf);
41 30 : PageSetLSN(page, lsn);
42 30 : MarkBufferDirty(metabuf);
43 30 : XLogFlushBufferForRedoIfInit(record, 0, metabuf);
44 :
45 : /* all done */
46 30 : UnlockReleaseBuffer(metabuf);
47 30 : }
48 :
49 : /*
50 : * replay a hash index bitmap page
51 : */
52 : static void
53 30 : hash_xlog_init_bitmap_page(XLogReaderState *record)
54 : {
55 30 : XLogRecPtr lsn = record->EndRecPtr;
56 : Buffer bitmapbuf;
57 : Buffer metabuf;
58 : Page page;
59 : HashMetaPage metap;
60 : uint32 num_buckets;
61 :
62 30 : xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
63 :
64 : /*
65 : * Initialize bitmap page
66 : */
67 30 : bitmapbuf = XLogInitBufferForRedo(record, 0);
68 30 : _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
69 30 : PageSetLSN(BufferGetPage(bitmapbuf), lsn);
70 30 : MarkBufferDirty(bitmapbuf);
71 30 : XLogFlushBufferForRedoIfInit(record, 0, bitmapbuf);
72 30 : UnlockReleaseBuffer(bitmapbuf);
73 :
74 : /* add the new bitmap page to the metapage's list of bitmaps */
75 30 : if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
76 : {
77 : /*
78 : * Note: in normal operation, we'd update the metapage while still
79 : * holding lock on the bitmap page. But during replay it's not
80 : * necessary to hold that lock, since nobody can see it yet; the
81 : * creating transaction hasn't yet committed.
82 : */
83 30 : page = BufferGetPage(metabuf);
84 30 : metap = HashPageGetMeta(page);
85 :
86 30 : num_buckets = metap->hashm_maxbucket + 1;
87 30 : metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
88 30 : metap->hashm_nmaps++;
89 :
90 30 : PageSetLSN(page, lsn);
91 30 : MarkBufferDirty(metabuf);
92 30 : XLogFlushBufferForRedoIfInit(record, 1, metabuf);
93 : }
94 30 : if (BufferIsValid(metabuf))
95 30 : UnlockReleaseBuffer(metabuf);
96 30 : }
97 :
98 : /*
99 : * replay a hash index insert without split
100 : */
101 : static void
102 121315 : hash_xlog_insert(XLogReaderState *record)
103 : {
104 : HashMetaPage metap;
105 121315 : XLogRecPtr lsn = record->EndRecPtr;
106 121315 : xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
107 : Buffer buffer;
108 : Page page;
109 :
110 121315 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
111 : {
112 : Size datalen;
113 119816 : char *datapos = XLogRecGetBlockData(record, 0, &datalen);
114 :
115 119816 : page = BufferGetPage(buffer);
116 :
117 119816 : if (PageAddItem(page, datapos, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
118 0 : elog(PANIC, "hash_xlog_insert: failed to add item");
119 :
120 119816 : PageSetLSN(page, lsn);
121 119816 : MarkBufferDirty(buffer);
122 : }
123 121315 : if (BufferIsValid(buffer))
124 121315 : UnlockReleaseBuffer(buffer);
125 :
126 121315 : if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
127 : {
128 : /*
129 : * Note: in normal operation, we'd update the metapage while still
130 : * holding lock on the page we inserted into. But during replay it's
131 : * not necessary to hold that lock, since no other index updates can
132 : * be happening concurrently.
133 : */
134 121286 : page = BufferGetPage(buffer);
135 121286 : metap = HashPageGetMeta(page);
136 121286 : metap->hashm_ntuples += 1;
137 :
138 121286 : PageSetLSN(page, lsn);
139 121286 : MarkBufferDirty(buffer);
140 : }
141 121315 : if (BufferIsValid(buffer))
142 121315 : UnlockReleaseBuffer(buffer);
143 121315 : }
144 :
145 : /*
146 : * replay addition of overflow page for hash index
147 : */
148 : static void
149 70 : hash_xlog_add_ovfl_page(XLogReaderState *record)
150 : {
151 70 : XLogRecPtr lsn = record->EndRecPtr;
152 70 : xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) XLogRecGetData(record);
153 : Buffer leftbuf;
154 : Buffer ovflbuf;
155 : Buffer metabuf;
156 : BlockNumber leftblk;
157 : BlockNumber rightblk;
158 70 : BlockNumber newmapblk = InvalidBlockNumber;
159 : Page ovflpage;
160 : HashPageOpaque ovflopaque;
161 : uint32 *num_bucket;
162 : char *data;
163 : Size datalen PG_USED_FOR_ASSERTS_ONLY;
164 70 : bool new_bmpage = false;
165 :
166 70 : XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
167 70 : XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
168 :
169 70 : ovflbuf = XLogInitBufferForRedo(record, 0);
170 : Assert(BufferIsValid(ovflbuf));
171 :
172 70 : data = XLogRecGetBlockData(record, 0, &datalen);
173 70 : num_bucket = (uint32 *) data;
174 : Assert(datalen == sizeof(uint32));
175 70 : _hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE,
176 : true);
177 : /* update backlink */
178 70 : ovflpage = BufferGetPage(ovflbuf);
179 70 : ovflopaque = HashPageGetOpaque(ovflpage);
180 70 : ovflopaque->hasho_prevblkno = leftblk;
181 :
182 70 : PageSetLSN(ovflpage, lsn);
183 70 : MarkBufferDirty(ovflbuf);
184 :
185 70 : if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
186 : {
187 : Page leftpage;
188 : HashPageOpaque leftopaque;
189 :
190 69 : leftpage = BufferGetPage(leftbuf);
191 69 : leftopaque = HashPageGetOpaque(leftpage);
192 69 : leftopaque->hasho_nextblkno = rightblk;
193 :
194 69 : PageSetLSN(leftpage, lsn);
195 69 : MarkBufferDirty(leftbuf);
196 : }
197 :
198 70 : if (BufferIsValid(leftbuf))
199 70 : UnlockReleaseBuffer(leftbuf);
200 70 : UnlockReleaseBuffer(ovflbuf);
201 :
202 : /*
203 : * Note: in normal operation, we'd update the bitmap and meta page while
204 : * still holding lock on the overflow pages. But during replay it's not
205 : * necessary to hold those locks, since no other index updates can be
206 : * happening concurrently.
207 : */
208 70 : if (XLogRecHasBlockRef(record, 2))
209 : {
210 : Buffer mapbuffer;
211 :
212 12 : if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
213 : {
214 7 : Page mappage = BufferGetPage(mapbuffer);
215 7 : uint32 *freep = NULL;
216 : uint32 *bitmap_page_bit;
217 :
218 7 : freep = HashPageGetBitmap(mappage);
219 :
220 7 : data = XLogRecGetBlockData(record, 2, &datalen);
221 7 : bitmap_page_bit = (uint32 *) data;
222 :
223 7 : SETBIT(freep, *bitmap_page_bit);
224 :
225 7 : PageSetLSN(mappage, lsn);
226 7 : MarkBufferDirty(mapbuffer);
227 : }
228 12 : if (BufferIsValid(mapbuffer))
229 12 : UnlockReleaseBuffer(mapbuffer);
230 : }
231 :
232 70 : if (XLogRecHasBlockRef(record, 3))
233 : {
234 : Buffer newmapbuf;
235 :
236 0 : newmapbuf = XLogInitBufferForRedo(record, 3);
237 :
238 0 : _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
239 :
240 0 : new_bmpage = true;
241 0 : newmapblk = BufferGetBlockNumber(newmapbuf);
242 :
243 0 : MarkBufferDirty(newmapbuf);
244 0 : PageSetLSN(BufferGetPage(newmapbuf), lsn);
245 :
246 0 : UnlockReleaseBuffer(newmapbuf);
247 : }
248 :
249 70 : if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
250 : {
251 : HashMetaPage metap;
252 : Page page;
253 : uint32 *firstfree_ovflpage;
254 :
255 70 : data = XLogRecGetBlockData(record, 4, &datalen);
256 70 : firstfree_ovflpage = (uint32 *) data;
257 :
258 70 : page = BufferGetPage(metabuf);
259 70 : metap = HashPageGetMeta(page);
260 70 : metap->hashm_firstfree = *firstfree_ovflpage;
261 :
262 70 : if (!xlrec->bmpage_found)
263 : {
264 58 : metap->hashm_spares[metap->hashm_ovflpoint]++;
265 :
266 58 : if (new_bmpage)
267 : {
268 : Assert(BlockNumberIsValid(newmapblk));
269 :
270 0 : metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
271 0 : metap->hashm_nmaps++;
272 0 : metap->hashm_spares[metap->hashm_ovflpoint]++;
273 : }
274 : }
275 :
276 70 : PageSetLSN(page, lsn);
277 70 : MarkBufferDirty(metabuf);
278 : }
279 70 : if (BufferIsValid(metabuf))
280 70 : UnlockReleaseBuffer(metabuf);
281 70 : }
282 :
283 : /*
284 : * replay allocation of page for split operation
285 : */
286 : static void
287 221 : hash_xlog_split_allocate_page(XLogReaderState *record)
288 : {
289 221 : XLogRecPtr lsn = record->EndRecPtr;
290 221 : xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) XLogRecGetData(record);
291 : Buffer oldbuf;
292 : Buffer newbuf;
293 : Buffer metabuf;
294 : XLogRedoAction action;
295 :
296 : /*
297 : * To be consistent with normal operation, here we take cleanup locks on
298 : * both the old and new buckets even though there can't be any concurrent
299 : * inserts.
300 : */
301 :
302 : /* replay the record for old bucket */
303 221 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
304 :
305 : /*
306 : * Note that we still update the page even if it was restored from a full
307 : * page image, because the special space is not included in the image.
308 : */
309 221 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
310 : {
311 : Page oldpage;
312 : HashPageOpaque oldopaque;
313 :
314 221 : oldpage = BufferGetPage(oldbuf);
315 221 : oldopaque = HashPageGetOpaque(oldpage);
316 :
317 221 : oldopaque->hasho_flag = xlrec->old_bucket_flag;
318 221 : oldopaque->hasho_prevblkno = xlrec->new_bucket;
319 :
320 221 : PageSetLSN(oldpage, lsn);
321 221 : MarkBufferDirty(oldbuf);
322 : }
323 :
324 : /* replay the record for new bucket */
325 221 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_AND_CLEANUP_LOCK, true,
326 : &newbuf);
327 221 : _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
328 221 : xlrec->new_bucket_flag, true);
329 221 : MarkBufferDirty(newbuf);
330 221 : PageSetLSN(BufferGetPage(newbuf), lsn);
331 :
332 : /*
333 : * We can release the lock on old bucket early as well but doing here to
334 : * consistent with normal operation.
335 : */
336 221 : if (BufferIsValid(oldbuf))
337 221 : UnlockReleaseBuffer(oldbuf);
338 221 : if (BufferIsValid(newbuf))
339 221 : UnlockReleaseBuffer(newbuf);
340 :
341 : /*
342 : * Note: in normal operation, we'd update the meta page while still
343 : * holding lock on the old and new bucket pages. But during replay it's
344 : * not necessary to hold those locks, since no other bucket splits can be
345 : * happening concurrently.
346 : */
347 :
348 : /* replay the record for metapage changes */
349 221 : if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
350 : {
351 : Page page;
352 : HashMetaPage metap;
353 : Size datalen;
354 : char *data;
355 : uint32 *uidata;
356 : int uidatacount;
357 :
358 221 : page = BufferGetPage(metabuf);
359 221 : metap = HashPageGetMeta(page);
360 221 : metap->hashm_maxbucket = xlrec->new_bucket;
361 :
362 221 : data = XLogRecGetBlockData(record, 2, &datalen);
363 :
364 : /*
365 : * This cast is ok because XLogRecGetBlockData() returns a MAXALIGNed
366 : * buffer.
367 : */
368 221 : uidata = (uint32 *) data;
369 221 : uidatacount = 0;
370 :
371 221 : if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS)
372 : {
373 3 : uint32 lowmask = uidata[uidatacount++];
374 3 : uint32 highmask = uidata[uidatacount++];
375 :
376 : /* update metapage */
377 3 : metap->hashm_lowmask = lowmask;
378 3 : metap->hashm_highmask = highmask;
379 : }
380 :
381 221 : if (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT)
382 : {
383 9 : uint32 ovflpoint = uidata[uidatacount++];
384 9 : uint32 ovflpages = uidata[uidatacount++];
385 :
386 : /* update metapage */
387 9 : metap->hashm_ovflpoint = ovflpoint;
388 9 : metap->hashm_spares[ovflpoint] = ovflpages;
389 : }
390 :
391 221 : MarkBufferDirty(metabuf);
392 221 : PageSetLSN(BufferGetPage(metabuf), lsn);
393 : }
394 :
395 221 : if (BufferIsValid(metabuf))
396 221 : UnlockReleaseBuffer(metabuf);
397 221 : }
398 :
399 : /*
400 : * replay of split operation
401 : */
402 : static void
403 234 : hash_xlog_split_page(XLogReaderState *record)
404 : {
405 : Buffer buf;
406 :
407 234 : if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
408 0 : elog(ERROR, "Hash split record did not contain a full-page image");
409 :
410 234 : UnlockReleaseBuffer(buf);
411 234 : }
412 :
413 : /*
414 : * replay completion of split operation
415 : */
416 : static void
417 221 : hash_xlog_split_complete(XLogReaderState *record)
418 : {
419 221 : XLogRecPtr lsn = record->EndRecPtr;
420 221 : xl_hash_split_complete *xlrec = (xl_hash_split_complete *) XLogRecGetData(record);
421 : Buffer oldbuf;
422 : Buffer newbuf;
423 : XLogRedoAction action;
424 :
425 : /* replay the record for old bucket */
426 221 : action = XLogReadBufferForRedo(record, 0, &oldbuf);
427 :
428 : /*
429 : * Note that we still update the page even if it was restored from a full
430 : * page image, because the bucket flag is not included in the image.
431 : */
432 221 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
433 : {
434 : Page oldpage;
435 : HashPageOpaque oldopaque;
436 :
437 221 : oldpage = BufferGetPage(oldbuf);
438 221 : oldopaque = HashPageGetOpaque(oldpage);
439 :
440 221 : oldopaque->hasho_flag = xlrec->old_bucket_flag;
441 :
442 221 : PageSetLSN(oldpage, lsn);
443 221 : MarkBufferDirty(oldbuf);
444 : }
445 221 : if (BufferIsValid(oldbuf))
446 221 : UnlockReleaseBuffer(oldbuf);
447 :
448 : /* replay the record for new bucket */
449 221 : action = XLogReadBufferForRedo(record, 1, &newbuf);
450 :
451 : /*
452 : * Note that we still update the page even if it was restored from a full
453 : * page image, because the bucket flag is not included in the image.
454 : */
455 221 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
456 : {
457 : Page newpage;
458 : HashPageOpaque nopaque;
459 :
460 221 : newpage = BufferGetPage(newbuf);
461 221 : nopaque = HashPageGetOpaque(newpage);
462 :
463 221 : nopaque->hasho_flag = xlrec->new_bucket_flag;
464 :
465 221 : PageSetLSN(newpage, lsn);
466 221 : MarkBufferDirty(newbuf);
467 : }
468 221 : if (BufferIsValid(newbuf))
469 221 : UnlockReleaseBuffer(newbuf);
470 221 : }
471 :
472 : /*
473 : * replay move of page contents for squeeze operation of hash index
474 : */
475 : static void
476 1 : hash_xlog_move_page_contents(XLogReaderState *record)
477 : {
478 1 : XLogRecPtr lsn = record->EndRecPtr;
479 1 : xl_hash_move_page_contents *xldata = (xl_hash_move_page_contents *) XLogRecGetData(record);
480 1 : Buffer bucketbuf = InvalidBuffer;
481 1 : Buffer writebuf = InvalidBuffer;
482 1 : Buffer deletebuf = InvalidBuffer;
483 : XLogRedoAction action;
484 :
485 : /*
486 : * Ensure we have a cleanup lock on primary bucket page before we start
487 : * with the actual replay operation. This is to ensure that neither a
488 : * scan can start nor a scan can be already-in-progress during the replay
489 : * of this operation. If we allow scans during this operation, then they
490 : * can miss some records or show the same record multiple times.
491 : */
492 1 : if (xldata->is_prim_bucket_same_wrt)
493 1 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
494 : else
495 : {
496 : /*
497 : * we don't care for return value as the purpose of reading bucketbuf
498 : * is to ensure a cleanup lock on primary bucket page.
499 : */
500 0 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
501 :
502 0 : action = XLogReadBufferForRedo(record, 1, &writebuf);
503 : }
504 :
505 : /* replay the record for adding entries in overflow buffer */
506 1 : if (action == BLK_NEEDS_REDO)
507 : {
508 : Page writepage;
509 : char *begin;
510 : char *data;
511 : Size datalen;
512 1 : uint16 ninserted = 0;
513 :
514 1 : data = begin = XLogRecGetBlockData(record, 1, &datalen);
515 :
516 1 : writepage = BufferGetPage(writebuf);
517 :
518 1 : if (xldata->ntups > 0)
519 : {
520 1 : OffsetNumber *towrite = (OffsetNumber *) data;
521 :
522 1 : data += sizeof(OffsetNumber) * xldata->ntups;
523 :
524 343 : while (data - begin < datalen)
525 : {
526 342 : IndexTuple itup = (IndexTuple) data;
527 : Size itemsz;
528 : OffsetNumber l;
529 :
530 342 : itemsz = IndexTupleSize(itup);
531 342 : itemsz = MAXALIGN(itemsz);
532 :
533 342 : data += itemsz;
534 :
535 342 : l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
536 342 : if (l == InvalidOffsetNumber)
537 0 : elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %zu bytes", itemsz);
538 :
539 342 : ninserted++;
540 : }
541 : }
542 :
543 : /*
544 : * number of tuples inserted must be same as requested in REDO record.
545 : */
546 : Assert(ninserted == xldata->ntups);
547 :
548 1 : PageSetLSN(writepage, lsn);
549 1 : MarkBufferDirty(writebuf);
550 : }
551 :
552 : /* replay the record for deleting entries from overflow buffer */
553 1 : if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
554 : {
555 : Page page;
556 : char *ptr;
557 : Size len;
558 :
559 1 : ptr = XLogRecGetBlockData(record, 2, &len);
560 :
561 1 : page = BufferGetPage(deletebuf);
562 :
563 1 : if (len > 0)
564 : {
565 : OffsetNumber *unused;
566 : OffsetNumber *unend;
567 :
568 1 : unused = (OffsetNumber *) ptr;
569 1 : unend = (OffsetNumber *) (ptr + len);
570 :
571 1 : if ((unend - unused) > 0)
572 1 : PageIndexMultiDelete(page, unused, unend - unused);
573 : }
574 :
575 1 : PageSetLSN(page, lsn);
576 1 : MarkBufferDirty(deletebuf);
577 : }
578 :
579 : /*
580 : * Replay is complete, now we can release the buffers. We release locks at
581 : * end of replay operation to ensure that we hold lock on primary bucket
582 : * page till end of operation. We can optimize by releasing the lock on
583 : * write buffer as soon as the operation for same is complete, if it is
584 : * not same as primary bucket page, but that doesn't seem to be worth
585 : * complicating the code.
586 : */
587 1 : if (BufferIsValid(deletebuf))
588 1 : UnlockReleaseBuffer(deletebuf);
589 :
590 1 : if (BufferIsValid(writebuf))
591 1 : UnlockReleaseBuffer(writebuf);
592 :
593 1 : if (BufferIsValid(bucketbuf))
594 0 : UnlockReleaseBuffer(bucketbuf);
595 1 : }
596 :
597 : /*
598 : * replay squeeze page operation of hash index
599 : */
600 : static void
601 31 : hash_xlog_squeeze_page(XLogReaderState *record)
602 : {
603 31 : XLogRecPtr lsn = record->EndRecPtr;
604 31 : xl_hash_squeeze_page *xldata = (xl_hash_squeeze_page *) XLogRecGetData(record);
605 31 : Buffer bucketbuf = InvalidBuffer;
606 31 : Buffer writebuf = InvalidBuffer;
607 : Buffer ovflbuf;
608 31 : Buffer prevbuf = InvalidBuffer;
609 : Buffer mapbuf;
610 : XLogRedoAction action;
611 :
612 : /*
613 : * Ensure we have a cleanup lock on primary bucket page before we start
614 : * with the actual replay operation. This is to ensure that neither a
615 : * scan can start nor a scan can be already-in-progress during the replay
616 : * of this operation. If we allow scans during this operation, then they
617 : * can miss some records or show the same record multiple times.
618 : */
619 31 : if (xldata->is_prim_bucket_same_wrt)
620 23 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
621 : else
622 : {
623 : /*
624 : * we don't care for return value as the purpose of reading bucketbuf
625 : * is to ensure a cleanup lock on primary bucket page.
626 : */
627 8 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
628 :
629 8 : if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
630 7 : action = XLogReadBufferForRedo(record, 1, &writebuf);
631 : else
632 1 : action = BLK_NOTFOUND;
633 : }
634 :
635 : /* replay the record for adding entries in overflow buffer */
636 31 : if (action == BLK_NEEDS_REDO)
637 : {
638 : Page writepage;
639 : char *begin;
640 : char *data;
641 : Size datalen;
642 28 : uint16 ninserted = 0;
643 28 : bool mod_wbuf = false;
644 :
645 28 : data = begin = XLogRecGetBlockData(record, 1, &datalen);
646 :
647 28 : writepage = BufferGetPage(writebuf);
648 :
649 28 : if (xldata->ntups > 0)
650 : {
651 14 : OffsetNumber *towrite = (OffsetNumber *) data;
652 :
653 14 : data += sizeof(OffsetNumber) * xldata->ntups;
654 :
655 522 : while (data - begin < datalen)
656 : {
657 508 : IndexTuple itup = (IndexTuple) data;
658 : Size itemsz;
659 : OffsetNumber l;
660 :
661 508 : itemsz = IndexTupleSize(itup);
662 508 : itemsz = MAXALIGN(itemsz);
663 :
664 508 : data += itemsz;
665 :
666 508 : l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
667 508 : if (l == InvalidOffsetNumber)
668 0 : elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %zu bytes", itemsz);
669 :
670 508 : ninserted++;
671 : }
672 :
673 14 : mod_wbuf = true;
674 : }
675 : else
676 : {
677 : /*
678 : * Ensure that the required flags are set when there are no
679 : * tuples. See _hash_freeovflpage().
680 : */
681 : Assert(xldata->is_prim_bucket_same_wrt ||
682 : xldata->is_prev_bucket_same_wrt);
683 : }
684 :
685 : /*
686 : * number of tuples inserted must be same as requested in REDO record.
687 : */
688 : Assert(ninserted == xldata->ntups);
689 :
690 : /*
691 : * if the page on which are adding tuples is a page previous to freed
692 : * overflow page, then update its nextblkno.
693 : */
694 28 : if (xldata->is_prev_bucket_same_wrt)
695 : {
696 10 : HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
697 :
698 10 : writeopaque->hasho_nextblkno = xldata->nextblkno;
699 10 : mod_wbuf = true;
700 : }
701 :
702 : /* Set LSN and mark writebuf dirty iff it is modified */
703 28 : if (mod_wbuf)
704 : {
705 17 : PageSetLSN(writepage, lsn);
706 17 : MarkBufferDirty(writebuf);
707 : }
708 : }
709 :
710 : /* replay the record for initializing overflow buffer */
711 31 : if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
712 : {
713 : Page ovflpage;
714 : HashPageOpaque ovflopaque;
715 :
716 0 : ovflpage = BufferGetPage(ovflbuf);
717 :
718 0 : _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
719 :
720 0 : ovflopaque = HashPageGetOpaque(ovflpage);
721 :
722 0 : ovflopaque->hasho_prevblkno = InvalidBlockNumber;
723 0 : ovflopaque->hasho_nextblkno = InvalidBlockNumber;
724 0 : ovflopaque->hasho_bucket = InvalidBucket;
725 0 : ovflopaque->hasho_flag = LH_UNUSED_PAGE;
726 0 : ovflopaque->hasho_page_id = HASHO_PAGE_ID;
727 :
728 0 : PageSetLSN(ovflpage, lsn);
729 0 : MarkBufferDirty(ovflbuf);
730 : }
731 31 : if (BufferIsValid(ovflbuf))
732 31 : UnlockReleaseBuffer(ovflbuf);
733 :
734 : /* replay the record for page previous to the freed overflow page */
735 52 : if (!xldata->is_prev_bucket_same_wrt &&
736 21 : XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
737 : {
738 20 : Page prevpage = BufferGetPage(prevbuf);
739 20 : HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
740 :
741 20 : prevopaque->hasho_nextblkno = xldata->nextblkno;
742 :
743 20 : PageSetLSN(prevpage, lsn);
744 20 : MarkBufferDirty(prevbuf);
745 : }
746 31 : if (BufferIsValid(prevbuf))
747 21 : UnlockReleaseBuffer(prevbuf);
748 :
749 : /* replay the record for page next to the freed overflow page */
750 31 : if (XLogRecHasBlockRef(record, 4))
751 : {
752 : Buffer nextbuf;
753 :
754 0 : if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
755 : {
756 0 : Page nextpage = BufferGetPage(nextbuf);
757 0 : HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
758 :
759 0 : nextopaque->hasho_prevblkno = xldata->prevblkno;
760 :
761 0 : PageSetLSN(nextpage, lsn);
762 0 : MarkBufferDirty(nextbuf);
763 : }
764 0 : if (BufferIsValid(nextbuf))
765 0 : UnlockReleaseBuffer(nextbuf);
766 : }
767 :
768 31 : if (BufferIsValid(writebuf))
769 30 : UnlockReleaseBuffer(writebuf);
770 :
771 31 : if (BufferIsValid(bucketbuf))
772 8 : UnlockReleaseBuffer(bucketbuf);
773 :
774 : /*
775 : * Note: in normal operation, we'd update the bitmap and meta page while
776 : * still holding lock on the primary bucket page and overflow pages. But
777 : * during replay it's not necessary to hold those locks, since no other
778 : * index updates can be happening concurrently.
779 : */
780 : /* replay the record for bitmap page */
781 31 : if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
782 : {
783 25 : Page mappage = BufferGetPage(mapbuf);
784 25 : uint32 *freep = NULL;
785 : char *data;
786 : uint32 *bitmap_page_bit;
787 : Size datalen;
788 :
789 25 : freep = HashPageGetBitmap(mappage);
790 :
791 25 : data = XLogRecGetBlockData(record, 5, &datalen);
792 25 : bitmap_page_bit = (uint32 *) data;
793 :
794 25 : CLRBIT(freep, *bitmap_page_bit);
795 :
796 25 : PageSetLSN(mappage, lsn);
797 25 : MarkBufferDirty(mapbuf);
798 : }
799 31 : if (BufferIsValid(mapbuf))
800 31 : UnlockReleaseBuffer(mapbuf);
801 :
802 : /* replay the record for meta page */
803 31 : if (XLogRecHasBlockRef(record, 6))
804 : {
805 : Buffer metabuf;
806 :
807 30 : if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
808 : {
809 : HashMetaPage metap;
810 : Page page;
811 : char *data;
812 : uint32 *firstfree_ovflpage;
813 : Size datalen;
814 :
815 27 : data = XLogRecGetBlockData(record, 6, &datalen);
816 27 : firstfree_ovflpage = (uint32 *) data;
817 :
818 27 : page = BufferGetPage(metabuf);
819 27 : metap = HashPageGetMeta(page);
820 27 : metap->hashm_firstfree = *firstfree_ovflpage;
821 :
822 27 : PageSetLSN(page, lsn);
823 27 : MarkBufferDirty(metabuf);
824 : }
825 30 : if (BufferIsValid(metabuf))
826 30 : UnlockReleaseBuffer(metabuf);
827 : }
828 31 : }
829 :
830 : /*
831 : * replay delete operation of hash index
832 : */
833 : static void
834 254 : hash_xlog_delete(XLogReaderState *record)
835 : {
836 254 : XLogRecPtr lsn = record->EndRecPtr;
837 254 : xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
838 254 : Buffer bucketbuf = InvalidBuffer;
839 : Buffer deletebuf;
840 : Page page;
841 : XLogRedoAction action;
842 :
843 : /*
844 : * Ensure we have a cleanup lock on primary bucket page before we start
845 : * with the actual replay operation. This is to ensure that neither a
846 : * scan can start nor a scan can be already-in-progress during the replay
847 : * of this operation. If we allow scans during this operation, then they
848 : * can miss some records or show the same record multiple times.
849 : */
850 254 : if (xldata->is_primary_bucket_page)
851 222 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
852 : else
853 : {
854 : /*
855 : * we don't care for return value as the purpose of reading bucketbuf
856 : * is to ensure a cleanup lock on primary bucket page.
857 : */
858 32 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
859 :
860 32 : action = XLogReadBufferForRedo(record, 1, &deletebuf);
861 : }
862 :
863 : /* replay the record for deleting entries in bucket page */
864 254 : if (action == BLK_NEEDS_REDO)
865 : {
866 : char *ptr;
867 : Size len;
868 :
869 223 : ptr = XLogRecGetBlockData(record, 1, &len);
870 :
871 223 : page = BufferGetPage(deletebuf);
872 :
873 223 : if (len > 0)
874 : {
875 : OffsetNumber *unused;
876 : OffsetNumber *unend;
877 :
878 223 : unused = (OffsetNumber *) ptr;
879 223 : unend = (OffsetNumber *) (ptr + len);
880 :
881 223 : if ((unend - unused) > 0)
882 223 : PageIndexMultiDelete(page, unused, unend - unused);
883 : }
884 :
885 : /*
886 : * Mark the page as not containing any LP_DEAD items only if
887 : * clear_dead_marking flag is set to true. See comments in
888 : * hashbucketcleanup() for details.
889 : */
890 223 : if (xldata->clear_dead_marking)
891 : {
892 : HashPageOpaque pageopaque;
893 :
894 0 : pageopaque = HashPageGetOpaque(page);
895 0 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
896 : }
897 :
898 223 : PageSetLSN(page, lsn);
899 223 : MarkBufferDirty(deletebuf);
900 : }
901 254 : if (BufferIsValid(deletebuf))
902 254 : UnlockReleaseBuffer(deletebuf);
903 :
904 254 : if (BufferIsValid(bucketbuf))
905 32 : UnlockReleaseBuffer(bucketbuf);
906 254 : }
907 :
908 : /*
909 : * replay split cleanup flag operation for primary bucket page.
910 : */
911 : static void
912 221 : hash_xlog_split_cleanup(XLogReaderState *record)
913 : {
914 221 : XLogRecPtr lsn = record->EndRecPtr;
915 : Buffer buffer;
916 : Page page;
917 :
918 221 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
919 : {
920 : HashPageOpaque bucket_opaque;
921 :
922 221 : page = BufferGetPage(buffer);
923 :
924 221 : bucket_opaque = HashPageGetOpaque(page);
925 221 : bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
926 221 : PageSetLSN(page, lsn);
927 221 : MarkBufferDirty(buffer);
928 : }
929 221 : if (BufferIsValid(buffer))
930 221 : UnlockReleaseBuffer(buffer);
931 221 : }
932 :
933 : /*
934 : * replay for update meta page
935 : */
936 : static void
937 8 : hash_xlog_update_meta_page(XLogReaderState *record)
938 : {
939 : HashMetaPage metap;
940 8 : XLogRecPtr lsn = record->EndRecPtr;
941 8 : xl_hash_update_meta_page *xldata = (xl_hash_update_meta_page *) XLogRecGetData(record);
942 : Buffer metabuf;
943 : Page page;
944 :
945 8 : if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
946 : {
947 4 : page = BufferGetPage(metabuf);
948 4 : metap = HashPageGetMeta(page);
949 :
950 4 : metap->hashm_ntuples = xldata->ntuples;
951 :
952 4 : PageSetLSN(page, lsn);
953 4 : MarkBufferDirty(metabuf);
954 : }
955 8 : if (BufferIsValid(metabuf))
956 8 : UnlockReleaseBuffer(metabuf);
957 8 : }
958 :
959 : /*
960 : * replay delete operation in hash index to remove
961 : * tuples marked as DEAD during index tuple insertion.
962 : */
963 : static void
964 1 : hash_xlog_vacuum_one_page(XLogReaderState *record)
965 : {
966 1 : XLogRecPtr lsn = record->EndRecPtr;
967 : xl_hash_vacuum_one_page *xldata;
968 : Buffer buffer;
969 : Buffer metabuf;
970 : Page page;
971 : XLogRedoAction action;
972 : HashPageOpaque pageopaque;
973 : OffsetNumber *toDelete;
974 :
975 1 : xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
976 1 : toDelete = xldata->offsets;
977 :
978 : /*
979 : * If we have any conflict processing to do, it must happen before we
980 : * update the page.
981 : *
982 : * Hash index records that are marked as LP_DEAD and being removed during
983 : * hash index tuple insertion can conflict with standby queries. You might
984 : * think that vacuum records would conflict as well, but we've handled
985 : * that already. XLOG_HEAP2_PRUNE_VACUUM_SCAN records provide the highest
986 : * xid cleaned by the vacuum of the heap and so we can resolve any
987 : * conflicts just once when that arrives. After that we know that no
988 : * conflicts exist from individual hash index vacuum records on that
989 : * index.
990 : */
991 1 : if (InHotStandby)
992 : {
993 : RelFileLocator rlocator;
994 :
995 1 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
996 1 : ResolveRecoveryConflictWithSnapshot(xldata->snapshotConflictHorizon,
997 1 : xldata->isCatalogRel,
998 : rlocator);
999 : }
1000 :
1001 1 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1002 :
1003 1 : if (action == BLK_NEEDS_REDO)
1004 : {
1005 1 : page = BufferGetPage(buffer);
1006 :
1007 1 : PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1008 :
1009 : /*
1010 : * Mark the page as not containing any LP_DEAD items. See comments in
1011 : * _hash_vacuum_one_page() for details.
1012 : */
1013 1 : pageopaque = HashPageGetOpaque(page);
1014 1 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1015 :
1016 1 : PageSetLSN(page, lsn);
1017 1 : MarkBufferDirty(buffer);
1018 : }
1019 1 : if (BufferIsValid(buffer))
1020 1 : UnlockReleaseBuffer(buffer);
1021 :
1022 1 : if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1023 : {
1024 : Page metapage;
1025 : HashMetaPage metap;
1026 :
1027 1 : metapage = BufferGetPage(metabuf);
1028 1 : metap = HashPageGetMeta(metapage);
1029 :
1030 1 : metap->hashm_ntuples -= xldata->ntuples;
1031 :
1032 1 : PageSetLSN(metapage, lsn);
1033 1 : MarkBufferDirty(metabuf);
1034 : }
1035 1 : if (BufferIsValid(metabuf))
1036 1 : UnlockReleaseBuffer(metabuf);
1037 1 : }
1038 :
1039 : void
1040 122637 : hash_redo(XLogReaderState *record)
1041 : {
1042 122637 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1043 :
1044 122637 : switch (info)
1045 : {
1046 30 : case XLOG_HASH_INIT_META_PAGE:
1047 30 : hash_xlog_init_meta_page(record);
1048 30 : break;
1049 30 : case XLOG_HASH_INIT_BITMAP_PAGE:
1050 30 : hash_xlog_init_bitmap_page(record);
1051 30 : break;
1052 121315 : case XLOG_HASH_INSERT:
1053 121315 : hash_xlog_insert(record);
1054 121315 : break;
1055 70 : case XLOG_HASH_ADD_OVFL_PAGE:
1056 70 : hash_xlog_add_ovfl_page(record);
1057 70 : break;
1058 221 : case XLOG_HASH_SPLIT_ALLOCATE_PAGE:
1059 221 : hash_xlog_split_allocate_page(record);
1060 221 : break;
1061 234 : case XLOG_HASH_SPLIT_PAGE:
1062 234 : hash_xlog_split_page(record);
1063 234 : break;
1064 221 : case XLOG_HASH_SPLIT_COMPLETE:
1065 221 : hash_xlog_split_complete(record);
1066 221 : break;
1067 1 : case XLOG_HASH_MOVE_PAGE_CONTENTS:
1068 1 : hash_xlog_move_page_contents(record);
1069 1 : break;
1070 31 : case XLOG_HASH_SQUEEZE_PAGE:
1071 31 : hash_xlog_squeeze_page(record);
1072 31 : break;
1073 254 : case XLOG_HASH_DELETE:
1074 254 : hash_xlog_delete(record);
1075 254 : break;
1076 221 : case XLOG_HASH_SPLIT_CLEANUP:
1077 221 : hash_xlog_split_cleanup(record);
1078 221 : break;
1079 8 : case XLOG_HASH_UPDATE_META_PAGE:
1080 8 : hash_xlog_update_meta_page(record);
1081 8 : break;
1082 1 : case XLOG_HASH_VACUUM_ONE_PAGE:
1083 1 : hash_xlog_vacuum_one_page(record);
1084 1 : break;
1085 0 : default:
1086 0 : elog(PANIC, "hash_redo: unknown op code %u", info);
1087 : }
1088 122637 : }
1089 :
1090 : /*
1091 : * Mask a hash page before performing consistency checks on it.
1092 : */
1093 : void
1094 485930 : hash_mask(char *pagedata, BlockNumber blkno)
1095 : {
1096 485930 : Page page = (Page) pagedata;
1097 : HashPageOpaque opaque;
1098 : int pagetype;
1099 :
1100 485930 : mask_page_lsn_and_checksum(page);
1101 :
1102 485930 : mask_page_hint_bits(page);
1103 485930 : mask_unused_space(page);
1104 :
1105 485930 : opaque = HashPageGetOpaque(page);
1106 :
1107 485930 : pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1108 485930 : if (pagetype == LH_UNUSED_PAGE)
1109 : {
1110 : /*
1111 : * Mask everything on a UNUSED page.
1112 : */
1113 0 : mask_page_content(page);
1114 : }
1115 485930 : else if (pagetype == LH_BUCKET_PAGE ||
1116 : pagetype == LH_OVERFLOW_PAGE)
1117 : {
1118 : /*
1119 : * In hash bucket and overflow pages, it is possible to modify the
1120 : * LP_FLAGS without emitting any WAL record. Hence, mask the line
1121 : * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1122 : */
1123 242468 : mask_lp_flags(page);
1124 : }
1125 :
1126 : /*
1127 : * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1128 : * unlogged. So, mask it. See _hash_kill_items() for details.
1129 : */
1130 485930 : opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1131 485930 : }
|