Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginxlog.c
4 : * WAL replay logic for inverted index.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginxlog.c
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/bufmask.h"
17 : #include "access/gin_private.h"
18 : #include "access/ginxlog.h"
19 : #include "access/xlogutils.h"
20 : #include "utils/memutils.h"
21 :
22 : static MemoryContext opCtx; /* working memory for operations */
23 :
24 : static void
25 278 : ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
26 : {
27 278 : XLogRecPtr lsn = record->EndRecPtr;
28 : Buffer buffer;
29 : Page page;
30 :
31 278 : if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO)
32 : {
33 278 : page = (Page) BufferGetPage(buffer);
34 278 : GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
35 :
36 278 : PageSetLSN(page, lsn);
37 278 : MarkBufferDirty(buffer);
38 : }
39 278 : if (BufferIsValid(buffer))
40 278 : UnlockReleaseBuffer(buffer);
41 278 : }
42 :
43 : static void
44 12 : ginRedoCreatePTree(XLogReaderState *record)
45 : {
46 12 : XLogRecPtr lsn = record->EndRecPtr;
47 12 : ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record);
48 : char *ptr;
49 : Buffer buffer;
50 : Page page;
51 :
52 12 : buffer = XLogInitBufferForRedo(record, 0);
53 12 : page = (Page) BufferGetPage(buffer);
54 :
55 12 : GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED);
56 :
57 12 : ptr = XLogRecGetData(record) + sizeof(ginxlogCreatePostingTree);
58 :
59 : /* Place page data */
60 12 : memcpy(GinDataLeafPageGetPostingList(page), ptr, data->size);
61 :
62 12 : GinDataPageSetDataSize(page, data->size);
63 :
64 12 : PageSetLSN(page, lsn);
65 :
66 12 : MarkBufferDirty(buffer);
67 12 : UnlockReleaseBuffer(buffer);
68 12 : }
69 :
70 : static void
71 47226 : ginRedoInsertEntry(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdata)
72 : {
73 47226 : Page page = BufferGetPage(buffer);
74 47226 : ginxlogInsertEntry *data = (ginxlogInsertEntry *) rdata;
75 47226 : OffsetNumber offset = data->offset;
76 : IndexTuple itup;
77 :
78 47226 : if (rightblkno != InvalidBlockNumber)
79 : {
80 : /* update link to right page after split */
81 : Assert(!GinPageIsLeaf(page));
82 : Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page));
83 262 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offset));
84 262 : GinSetDownlink(itup, rightblkno);
85 : }
86 :
87 47226 : if (data->isDelete)
88 : {
89 : Assert(GinPageIsLeaf(page));
90 : Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page));
91 7250 : PageIndexTupleDelete(page, offset);
92 : }
93 :
94 47226 : itup = &data->tuple;
95 :
96 47226 : if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), offset, false, false) == InvalidOffsetNumber)
97 : {
98 : RelFileLocator locator;
99 : ForkNumber forknum;
100 : BlockNumber blknum;
101 :
102 0 : BufferGetTag(buffer, &locator, &forknum, &blknum);
103 0 : elog(ERROR, "failed to add item to index page in %u/%u/%u",
104 : locator.spcOid, locator.dbOid, locator.relNumber);
105 : }
106 47226 : }
107 :
108 : /*
109 : * Redo recompression of posting list. Doing all the changes in-place is not
110 : * always possible, because it might require more space than we've on the page.
111 : * Instead, once modification is required we copy unprocessed tail of the page
112 : * into separately allocated chunk of memory for further reading original
113 : * versions of segments. Thanks to that we don't bother about moving page data
114 : * in-place.
115 : */
116 : static void
117 6678 : ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
118 : {
119 : int actionno;
120 : int segno;
121 : GinPostingList *oldseg;
122 : Pointer segmentend;
123 : char *walbuf;
124 : int totalsize;
125 6678 : Pointer tailCopy = NULL;
126 : Pointer writePtr;
127 : Pointer segptr;
128 :
129 : /*
130 : * If the page is in pre-9.4 format, convert to new format first.
131 : */
132 6678 : if (!GinPageIsCompressed(page))
133 : {
134 0 : ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page);
135 0 : int nuncompressed = GinPageGetOpaque(page)->maxoff;
136 : int npacked;
137 :
138 : /*
139 : * Empty leaf pages are deleted as part of vacuum, but leftmost and
140 : * rightmost pages are never deleted. So, pg_upgrade'd from pre-9.4
141 : * instances might contain empty leaf pages, and we need to handle
142 : * them correctly.
143 : */
144 0 : if (nuncompressed > 0)
145 : {
146 : GinPostingList *plist;
147 :
148 0 : plist = ginCompressPostingList(uncompressed, nuncompressed,
149 : BLCKSZ, &npacked);
150 0 : totalsize = SizeOfGinPostingList(plist);
151 :
152 : Assert(npacked == nuncompressed);
153 :
154 0 : memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize);
155 : }
156 : else
157 : {
158 0 : totalsize = 0;
159 : }
160 :
161 0 : GinDataPageSetDataSize(page, totalsize);
162 0 : GinPageSetCompressed(page);
163 0 : GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
164 : }
165 :
166 6678 : oldseg = GinDataLeafPageGetPostingList(page);
167 6678 : writePtr = (Pointer) oldseg;
168 6678 : segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page);
169 6678 : segno = 0;
170 :
171 6678 : walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf);
172 13404 : for (actionno = 0; actionno < data->nactions; actionno++)
173 : {
174 6726 : uint8 a_segno = *((uint8 *) (walbuf++));
175 6726 : uint8 a_action = *((uint8 *) (walbuf++));
176 6726 : GinPostingList *newseg = NULL;
177 6726 : int newsegsize = 0;
178 6726 : ItemPointerData *items = NULL;
179 6726 : uint16 nitems = 0;
180 : ItemPointerData *olditems;
181 : int nolditems;
182 : ItemPointerData *newitems;
183 : int nnewitems;
184 : int segsize;
185 :
186 : /* Extract all the information we need from the WAL record */
187 6726 : if (a_action == GIN_SEGMENT_INSERT ||
188 : a_action == GIN_SEGMENT_REPLACE)
189 : {
190 62 : newseg = (GinPostingList *) walbuf;
191 62 : newsegsize = SizeOfGinPostingList(newseg);
192 62 : walbuf += SHORTALIGN(newsegsize);
193 : }
194 :
195 6726 : if (a_action == GIN_SEGMENT_ADDITEMS)
196 : {
197 6640 : memcpy(&nitems, walbuf, sizeof(uint16));
198 6640 : walbuf += sizeof(uint16);
199 6640 : items = (ItemPointerData *) walbuf;
200 6640 : walbuf += nitems * sizeof(ItemPointerData);
201 : }
202 :
203 : /* Skip to the segment that this action concerns */
204 : Assert(segno <= a_segno);
205 119298 : while (segno < a_segno)
206 : {
207 : /*
208 : * Once modification is started and page tail is copied, we've to
209 : * copy unmodified segments.
210 : */
211 112572 : segsize = SizeOfGinPostingList(oldseg);
212 112572 : if (tailCopy)
213 : {
214 : Assert(writePtr + segsize < PageGetSpecialPointer(page));
215 0 : memcpy(writePtr, (Pointer) oldseg, segsize);
216 : }
217 112572 : writePtr += segsize;
218 112572 : oldseg = GinNextPostingListSegment(oldseg);
219 112572 : segno++;
220 : }
221 :
222 : /*
223 : * ADDITEMS action is handled like REPLACE, but the new segment to
224 : * replace the old one is reconstructed using the old segment from
225 : * disk and the new items from the WAL record.
226 : */
227 6726 : if (a_action == GIN_SEGMENT_ADDITEMS)
228 : {
229 : int npacked;
230 :
231 6640 : olditems = ginPostingListDecode(oldseg, &nolditems);
232 :
233 6640 : newitems = ginMergeItemPointers(items, nitems,
234 : olditems, nolditems,
235 : &nnewitems);
236 : Assert(nnewitems == nolditems + nitems);
237 :
238 6640 : newseg = ginCompressPostingList(newitems, nnewitems,
239 : BLCKSZ, &npacked);
240 : Assert(npacked == nnewitems);
241 :
242 6640 : newsegsize = SizeOfGinPostingList(newseg);
243 6640 : a_action = GIN_SEGMENT_REPLACE;
244 : }
245 :
246 6726 : segptr = (Pointer) oldseg;
247 6726 : if (segptr != segmentend)
248 6682 : segsize = SizeOfGinPostingList(oldseg);
249 : else
250 : {
251 : /*
252 : * Positioned after the last existing segment. Only INSERTs
253 : * expected here.
254 : */
255 : Assert(a_action == GIN_SEGMENT_INSERT);
256 44 : segsize = 0;
257 : }
258 :
259 : /*
260 : * We're about to start modification of the page. So, copy tail of
261 : * the page if it's not done already.
262 : */
263 6726 : if (!tailCopy && segptr != segmentend)
264 : {
265 6652 : int tailSize = segmentend - segptr;
266 :
267 6652 : tailCopy = (Pointer) palloc(tailSize);
268 6652 : memcpy(tailCopy, segptr, tailSize);
269 6652 : segptr = tailCopy;
270 6652 : oldseg = (GinPostingList *) segptr;
271 6652 : segmentend = segptr + tailSize;
272 : }
273 :
274 6726 : switch (a_action)
275 : {
276 24 : case GIN_SEGMENT_DELETE:
277 24 : segptr += segsize;
278 24 : segno++;
279 24 : break;
280 :
281 44 : case GIN_SEGMENT_INSERT:
282 : /* copy the new segment in place */
283 : Assert(writePtr + newsegsize <= PageGetSpecialPointer(page));
284 44 : memcpy(writePtr, newseg, newsegsize);
285 44 : writePtr += newsegsize;
286 44 : break;
287 :
288 6658 : case GIN_SEGMENT_REPLACE:
289 : /* copy the new version of segment in place */
290 : Assert(writePtr + newsegsize <= PageGetSpecialPointer(page));
291 6658 : memcpy(writePtr, newseg, newsegsize);
292 6658 : writePtr += newsegsize;
293 6658 : segptr += segsize;
294 6658 : segno++;
295 6658 : break;
296 :
297 0 : default:
298 0 : elog(ERROR, "unexpected GIN leaf action: %u", a_action);
299 : }
300 6726 : oldseg = (GinPostingList *) segptr;
301 : }
302 :
303 : /* Copy the rest of unmodified segments if any. */
304 6678 : segptr = (Pointer) oldseg;
305 6678 : if (segptr != segmentend && tailCopy)
306 : {
307 6 : int restSize = segmentend - segptr;
308 :
309 : Assert(writePtr + restSize <= PageGetSpecialPointer(page));
310 6 : memcpy(writePtr, segptr, restSize);
311 6 : writePtr += restSize;
312 : }
313 :
314 6678 : totalsize = writePtr - (Pointer) GinDataLeafPageGetPostingList(page);
315 6678 : GinDataPageSetDataSize(page, totalsize);
316 6678 : }
317 :
318 : static void
319 6676 : ginRedoInsertData(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdata)
320 : {
321 6676 : Page page = BufferGetPage(buffer);
322 :
323 6676 : if (isLeaf)
324 : {
325 6672 : ginxlogRecompressDataLeaf *data = (ginxlogRecompressDataLeaf *) rdata;
326 :
327 : Assert(GinPageIsLeaf(page));
328 :
329 6672 : ginRedoRecompress(page, data);
330 : }
331 : else
332 : {
333 4 : ginxlogInsertDataInternal *data = (ginxlogInsertDataInternal *) rdata;
334 : PostingItem *oldpitem;
335 :
336 : Assert(!GinPageIsLeaf(page));
337 :
338 : /* update link to right page after split */
339 4 : oldpitem = GinDataPageGetPostingItem(page, data->offset);
340 4 : PostingItemSetBlockNumber(oldpitem, rightblkno);
341 :
342 4 : GinDataPageAddPostingItem(page, &data->newitem, data->offset);
343 : }
344 6676 : }
345 :
346 : static void
347 54014 : ginRedoInsert(XLogReaderState *record)
348 : {
349 54014 : XLogRecPtr lsn = record->EndRecPtr;
350 54014 : ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
351 : Buffer buffer;
352 : #ifdef NOT_USED
353 : BlockNumber leftChildBlkno = InvalidBlockNumber;
354 : #endif
355 54014 : BlockNumber rightChildBlkno = InvalidBlockNumber;
356 54014 : bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
357 :
358 : /*
359 : * First clear incomplete-split flag on child page if this finishes a
360 : * split.
361 : */
362 54014 : if (!isLeaf)
363 : {
364 278 : char *payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
365 :
366 : #ifdef NOT_USED
367 : leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
368 : #endif
369 278 : payload += sizeof(BlockIdData);
370 278 : rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
371 278 : payload += sizeof(BlockIdData);
372 :
373 278 : ginRedoClearIncompleteSplit(record, 1);
374 : }
375 :
376 54014 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
377 : {
378 53902 : Page page = BufferGetPage(buffer);
379 : Size len;
380 53902 : char *payload = XLogRecGetBlockData(record, 0, &len);
381 :
382 : /* How to insert the payload is tree-type specific */
383 53902 : if (data->flags & GIN_INSERT_ISDATA)
384 : {
385 : Assert(GinPageIsData(page));
386 6676 : ginRedoInsertData(buffer, isLeaf, rightChildBlkno, payload);
387 : }
388 : else
389 : {
390 : Assert(!GinPageIsData(page));
391 47226 : ginRedoInsertEntry(buffer, isLeaf, rightChildBlkno, payload);
392 : }
393 :
394 53902 : PageSetLSN(page, lsn);
395 53902 : MarkBufferDirty(buffer);
396 : }
397 54014 : if (BufferIsValid(buffer))
398 54014 : UnlockReleaseBuffer(buffer);
399 54014 : }
400 :
401 : static void
402 284 : ginRedoSplit(XLogReaderState *record)
403 : {
404 284 : ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
405 : Buffer lbuffer,
406 : rbuffer,
407 : rootbuf;
408 284 : bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
409 284 : bool isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
410 :
411 : /*
412 : * First clear incomplete-split flag on child page if this finishes a
413 : * split
414 : */
415 284 : if (!isLeaf)
416 0 : ginRedoClearIncompleteSplit(record, 3);
417 :
418 284 : if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED)
419 0 : elog(ERROR, "GIN split record did not contain a full-page image of left page");
420 :
421 284 : if (XLogReadBufferForRedo(record, 1, &rbuffer) != BLK_RESTORED)
422 0 : elog(ERROR, "GIN split record did not contain a full-page image of right page");
423 :
424 284 : if (isRoot)
425 : {
426 6 : if (XLogReadBufferForRedo(record, 2, &rootbuf) != BLK_RESTORED)
427 0 : elog(ERROR, "GIN split record did not contain a full-page image of root page");
428 6 : UnlockReleaseBuffer(rootbuf);
429 : }
430 :
431 284 : UnlockReleaseBuffer(rbuffer);
432 284 : UnlockReleaseBuffer(lbuffer);
433 284 : }
434 :
435 : /*
436 : * VACUUM_PAGE record contains simply a full image of the page, similar to
437 : * an XLOG_FPI record.
438 : */
439 : static void
440 0 : ginRedoVacuumPage(XLogReaderState *record)
441 : {
442 : Buffer buffer;
443 :
444 0 : if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
445 : {
446 0 : elog(ERROR, "replay of gin entry tree page vacuum did not restore the page");
447 : }
448 0 : UnlockReleaseBuffer(buffer);
449 0 : }
450 :
451 : static void
452 6 : ginRedoVacuumDataLeafPage(XLogReaderState *record)
453 : {
454 6 : XLogRecPtr lsn = record->EndRecPtr;
455 : Buffer buffer;
456 :
457 6 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
458 : {
459 6 : Page page = BufferGetPage(buffer);
460 : Size len;
461 : ginxlogVacuumDataLeafPage *xlrec;
462 :
463 6 : xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len);
464 :
465 : Assert(GinPageIsLeaf(page));
466 : Assert(GinPageIsData(page));
467 :
468 6 : ginRedoRecompress(page, &xlrec->data);
469 6 : PageSetLSN(page, lsn);
470 6 : MarkBufferDirty(buffer);
471 : }
472 6 : if (BufferIsValid(buffer))
473 6 : UnlockReleaseBuffer(buffer);
474 6 : }
475 :
476 : static void
477 0 : ginRedoDeletePage(XLogReaderState *record)
478 : {
479 0 : XLogRecPtr lsn = record->EndRecPtr;
480 0 : ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record);
481 : Buffer dbuffer;
482 : Buffer pbuffer;
483 : Buffer lbuffer;
484 : Page page;
485 :
486 : /*
487 : * Lock left page first in order to prevent possible deadlock with
488 : * ginStepRight().
489 : */
490 0 : if (XLogReadBufferForRedo(record, 2, &lbuffer) == BLK_NEEDS_REDO)
491 : {
492 0 : page = BufferGetPage(lbuffer);
493 : Assert(GinPageIsData(page));
494 0 : GinPageGetOpaque(page)->rightlink = data->rightLink;
495 0 : PageSetLSN(page, lsn);
496 0 : MarkBufferDirty(lbuffer);
497 : }
498 :
499 0 : if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO)
500 : {
501 0 : page = BufferGetPage(dbuffer);
502 : Assert(GinPageIsData(page));
503 0 : GinPageSetDeleted(page);
504 0 : GinPageSetDeleteXid(page, data->deleteXid);
505 0 : PageSetLSN(page, lsn);
506 0 : MarkBufferDirty(dbuffer);
507 : }
508 :
509 0 : if (XLogReadBufferForRedo(record, 1, &pbuffer) == BLK_NEEDS_REDO)
510 : {
511 0 : page = BufferGetPage(pbuffer);
512 : Assert(GinPageIsData(page));
513 : Assert(!GinPageIsLeaf(page));
514 0 : GinPageDeletePostingItem(page, data->parentOffset);
515 0 : PageSetLSN(page, lsn);
516 0 : MarkBufferDirty(pbuffer);
517 : }
518 :
519 0 : if (BufferIsValid(lbuffer))
520 0 : UnlockReleaseBuffer(lbuffer);
521 0 : if (BufferIsValid(pbuffer))
522 0 : UnlockReleaseBuffer(pbuffer);
523 0 : if (BufferIsValid(dbuffer))
524 0 : UnlockReleaseBuffer(dbuffer);
525 0 : }
526 :
527 : static void
528 47966 : ginRedoUpdateMetapage(XLogReaderState *record)
529 : {
530 47966 : XLogRecPtr lsn = record->EndRecPtr;
531 47966 : ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record);
532 : Buffer metabuffer;
533 : Page metapage;
534 : Buffer buffer;
535 :
536 : /*
537 : * Restore the metapage. This is essentially the same as a full-page
538 : * image, so restore the metapage unconditionally without looking at the
539 : * LSN, to avoid torn page hazards.
540 : */
541 47966 : metabuffer = XLogInitBufferForRedo(record, 0);
542 : Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
543 47966 : metapage = BufferGetPage(metabuffer);
544 :
545 47966 : GinInitMetabuffer(metabuffer);
546 47966 : memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
547 47966 : PageSetLSN(metapage, lsn);
548 47966 : MarkBufferDirty(metabuffer);
549 :
550 47966 : if (data->ntuples > 0)
551 : {
552 : /*
553 : * insert into tail page
554 : */
555 47590 : if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
556 : {
557 47554 : Page page = BufferGetPage(buffer);
558 : OffsetNumber off;
559 : int i;
560 : Size tupsize;
561 : char *payload;
562 : IndexTuple tuples;
563 : Size totaltupsize;
564 :
565 47554 : payload = XLogRecGetBlockData(record, 1, &totaltupsize);
566 47554 : tuples = (IndexTuple) payload;
567 :
568 47554 : if (PageIsEmpty(page))
569 0 : off = FirstOffsetNumber;
570 : else
571 47554 : off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
572 :
573 190200 : for (i = 0; i < data->ntuples; i++)
574 : {
575 142646 : tupsize = IndexTupleSize(tuples);
576 :
577 142646 : if (PageAddItem(page, (Item) tuples, tupsize, off,
578 : false, false) == InvalidOffsetNumber)
579 0 : elog(ERROR, "failed to add item to index page");
580 :
581 142646 : tuples = (IndexTuple) (((char *) tuples) + tupsize);
582 :
583 142646 : off++;
584 : }
585 : Assert(payload + totaltupsize == (char *) tuples);
586 :
587 : /*
588 : * Increase counter of heap tuples
589 : */
590 47554 : GinPageGetOpaque(page)->maxoff++;
591 :
592 47554 : PageSetLSN(page, lsn);
593 47554 : MarkBufferDirty(buffer);
594 : }
595 47590 : if (BufferIsValid(buffer))
596 47590 : UnlockReleaseBuffer(buffer);
597 : }
598 376 : else if (data->prevTail != InvalidBlockNumber)
599 : {
600 : /*
601 : * New tail
602 : */
603 350 : if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
604 : {
605 350 : Page page = BufferGetPage(buffer);
606 :
607 350 : GinPageGetOpaque(page)->rightlink = data->newRightlink;
608 :
609 350 : PageSetLSN(page, lsn);
610 350 : MarkBufferDirty(buffer);
611 : }
612 350 : if (BufferIsValid(buffer))
613 350 : UnlockReleaseBuffer(buffer);
614 : }
615 :
616 47966 : UnlockReleaseBuffer(metabuffer);
617 47966 : }
618 :
619 : static void
620 360 : ginRedoInsertListPage(XLogReaderState *record)
621 : {
622 360 : XLogRecPtr lsn = record->EndRecPtr;
623 360 : ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record);
624 : Buffer buffer;
625 : Page page;
626 : OffsetNumber l,
627 360 : off = FirstOffsetNumber;
628 : int i,
629 : tupsize;
630 : char *payload;
631 : IndexTuple tuples;
632 : Size totaltupsize;
633 :
634 : /* We always re-initialize the page. */
635 360 : buffer = XLogInitBufferForRedo(record, 0);
636 360 : page = BufferGetPage(buffer);
637 :
638 360 : GinInitBuffer(buffer, GIN_LIST);
639 360 : GinPageGetOpaque(page)->rightlink = data->rightlink;
640 360 : if (data->rightlink == InvalidBlockNumber)
641 : {
642 : /* tail of sublist */
643 360 : GinPageSetFullRow(page);
644 360 : GinPageGetOpaque(page)->maxoff = 1;
645 : }
646 : else
647 : {
648 0 : GinPageGetOpaque(page)->maxoff = 0;
649 : }
650 :
651 360 : payload = XLogRecGetBlockData(record, 0, &totaltupsize);
652 :
653 360 : tuples = (IndexTuple) payload;
654 1434 : for (i = 0; i < data->ntuples; i++)
655 : {
656 1074 : tupsize = IndexTupleSize(tuples);
657 :
658 1074 : l = PageAddItem(page, (Item) tuples, tupsize, off, false, false);
659 :
660 1074 : if (l == InvalidOffsetNumber)
661 0 : elog(ERROR, "failed to add item to index page");
662 :
663 1074 : tuples = (IndexTuple) (((char *) tuples) + tupsize);
664 1074 : off++;
665 : }
666 : Assert((char *) tuples == payload + totaltupsize);
667 :
668 360 : PageSetLSN(page, lsn);
669 360 : MarkBufferDirty(buffer);
670 :
671 360 : UnlockReleaseBuffer(buffer);
672 360 : }
673 :
674 : static void
675 26 : ginRedoDeleteListPages(XLogReaderState *record)
676 : {
677 26 : XLogRecPtr lsn = record->EndRecPtr;
678 26 : ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record);
679 : Buffer metabuffer;
680 : Page metapage;
681 : int i;
682 :
683 26 : metabuffer = XLogInitBufferForRedo(record, 0);
684 : Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
685 26 : metapage = BufferGetPage(metabuffer);
686 :
687 26 : GinInitMetabuffer(metabuffer);
688 :
689 26 : memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
690 26 : PageSetLSN(metapage, lsn);
691 26 : MarkBufferDirty(metabuffer);
692 :
693 : /*
694 : * In normal operation, shiftList() takes exclusive lock on all the
695 : * pages-to-be-deleted simultaneously. During replay, however, it should
696 : * be all right to lock them one at a time. This is dependent on the fact
697 : * that we are deleting pages from the head of the list, and that readers
698 : * share-lock the next page before releasing the one they are on. So we
699 : * cannot get past a reader that is on, or due to visit, any page we are
700 : * going to delete. New incoming readers will block behind our metapage
701 : * lock and then see a fully updated page list.
702 : *
703 : * No full-page images are taken of the deleted pages. Instead, they are
704 : * re-initialized as empty, deleted pages. Their right-links don't need to
705 : * be preserved, because no new readers can see the pages, as explained
706 : * above.
707 : */
708 384 : for (i = 0; i < data->ndeleted; i++)
709 : {
710 : Buffer buffer;
711 : Page page;
712 :
713 358 : buffer = XLogInitBufferForRedo(record, i + 1);
714 358 : page = BufferGetPage(buffer);
715 358 : GinInitBuffer(buffer, GIN_DELETED);
716 :
717 358 : PageSetLSN(page, lsn);
718 358 : MarkBufferDirty(buffer);
719 :
720 358 : UnlockReleaseBuffer(buffer);
721 : }
722 26 : UnlockReleaseBuffer(metabuffer);
723 26 : }
724 :
725 : void
726 102668 : gin_redo(XLogReaderState *record)
727 : {
728 102668 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
729 : MemoryContext oldCtx;
730 :
731 : /*
732 : * GIN indexes do not require any conflict processing. NB: If we ever
733 : * implement a similar optimization as we have in b-tree, and remove
734 : * killed tuples outside VACUUM, we'll need to handle that here.
735 : */
736 :
737 102668 : oldCtx = MemoryContextSwitchTo(opCtx);
738 102668 : switch (info)
739 : {
740 12 : case XLOG_GIN_CREATE_PTREE:
741 12 : ginRedoCreatePTree(record);
742 12 : break;
743 54014 : case XLOG_GIN_INSERT:
744 54014 : ginRedoInsert(record);
745 54014 : break;
746 284 : case XLOG_GIN_SPLIT:
747 284 : ginRedoSplit(record);
748 284 : break;
749 0 : case XLOG_GIN_VACUUM_PAGE:
750 0 : ginRedoVacuumPage(record);
751 0 : break;
752 6 : case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
753 6 : ginRedoVacuumDataLeafPage(record);
754 6 : break;
755 0 : case XLOG_GIN_DELETE_PAGE:
756 0 : ginRedoDeletePage(record);
757 0 : break;
758 47966 : case XLOG_GIN_UPDATE_META_PAGE:
759 47966 : ginRedoUpdateMetapage(record);
760 47966 : break;
761 360 : case XLOG_GIN_INSERT_LISTPAGE:
762 360 : ginRedoInsertListPage(record);
763 360 : break;
764 26 : case XLOG_GIN_DELETE_LISTPAGE:
765 26 : ginRedoDeleteListPages(record);
766 26 : break;
767 0 : default:
768 0 : elog(PANIC, "gin_redo: unknown op code %u", info);
769 : }
770 102668 : MemoryContextSwitchTo(oldCtx);
771 102668 : MemoryContextReset(opCtx);
772 102668 : }
773 :
774 : void
775 392 : gin_xlog_startup(void)
776 : {
777 392 : opCtx = AllocSetContextCreate(CurrentMemoryContext,
778 : "GIN recovery temporary context",
779 : ALLOCSET_DEFAULT_SIZES);
780 392 : }
781 :
782 : void
783 288 : gin_xlog_cleanup(void)
784 : {
785 288 : MemoryContextDelete(opCtx);
786 288 : opCtx = NULL;
787 288 : }
788 :
789 : /*
790 : * Mask a GIN page before running consistency checks on it.
791 : */
792 : void
793 301624 : gin_mask(char *pagedata, BlockNumber blkno)
794 : {
795 301624 : Page page = (Page) pagedata;
796 301624 : PageHeader pagehdr = (PageHeader) page;
797 : GinPageOpaque opaque;
798 :
799 301624 : mask_page_lsn_and_checksum(page);
800 301624 : opaque = GinPageGetOpaque(page);
801 :
802 301624 : mask_page_hint_bits(page);
803 :
804 : /*
805 : * For a GIN_DELETED page, the page is initialized to empty. Hence, mask
806 : * the whole page content. For other pages, mask the hole if pd_lower
807 : * appears to have been set correctly.
808 : */
809 301624 : if (opaque->flags & GIN_DELETED)
810 716 : mask_page_content(page);
811 300908 : else if (pagehdr->pd_lower > SizeOfPageHeaderData)
812 300908 : mask_unused_space(page);
813 301624 : }
|