Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * verify_gin.c
4 : * Verifies the integrity of GIN indexes based on invariants.
5 : *
6 : *
7 : * GIN index verification checks a number of invariants:
8 : *
9 : * - consistency: Paths in GIN graph have to contain consistent keys: tuples
10 : * on parent pages consistently include tuples from children pages.
11 : *
12 : * - graph invariants: Each internal page must have at least one downlink, and
13 : * can reference either only leaf pages or only internal pages.
14 : *
15 : *
16 : * Copyright (c) 2016-2025, PostgreSQL Global Development Group
17 : *
18 : * IDENTIFICATION
19 : * contrib/amcheck/verify_gin.c
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 : #include "postgres.h"
24 :
25 : #include "access/gin_private.h"
26 : #include "access/nbtree.h"
27 : #include "catalog/pg_am.h"
28 : #include "utils/memutils.h"
29 : #include "utils/rel.h"
30 : #include "verify_common.h"
31 : #include "string.h"
32 :
33 : /*
34 : * GinScanItem represents one item of depth-first scan of the index.
35 : */
36 : typedef struct GinScanItem
37 : {
38 : int depth;
39 : IndexTuple parenttup;
40 : BlockNumber parentblk;
41 : BlockNumber blkno;
42 : struct GinScanItem *next;
43 : } GinScanItem;
44 :
45 : /*
46 : * GinPostingTreeScanItem represents one item of a depth-first posting tree scan.
47 : */
48 : typedef struct GinPostingTreeScanItem
49 : {
50 : int depth;
51 : ItemPointerData parentkey;
52 : BlockNumber parentblk;
53 : BlockNumber blkno;
54 : struct GinPostingTreeScanItem *next;
55 : } GinPostingTreeScanItem;
56 :
57 :
58 56 : PG_FUNCTION_INFO_V1(gin_index_check);
59 :
60 : static void gin_check_parent_keys_consistency(Relation rel,
61 : Relation heaprel,
62 : void *callback_state, bool readonly);
63 : static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
64 : static IndexTuple gin_refind_parent(Relation rel,
65 : BlockNumber parentblkno,
66 : BlockNumber childblkno,
67 : BufferAccessStrategy strategy);
68 : static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
69 : OffsetNumber offset);
70 :
71 : /*
72 : * gin_index_check(index regclass)
73 : *
74 : * Verify integrity of GIN index.
75 : *
76 : * Acquires AccessShareLock on heap & index relations.
77 : */
78 : Datum
79 98 : gin_index_check(PG_FUNCTION_ARGS)
80 : {
81 98 : Oid indrelid = PG_GETARG_OID(0);
82 :
83 98 : amcheck_lock_relation_and_check(indrelid,
84 : GIN_AM_OID,
85 : gin_check_parent_keys_consistency,
86 : AccessShareLock,
87 : NULL);
88 :
89 88 : PG_RETURN_VOID();
90 : }
91 :
92 : /*
93 : * Read item pointers from leaf entry tuple.
94 : *
95 : * Returns a palloc'd array of ItemPointers. The number of items is returned
96 : * in *nitems.
97 : */
98 : static ItemPointer
99 3504 : ginReadTupleWithoutState(IndexTuple itup, int *nitems)
100 : {
101 3504 : Pointer ptr = GinGetPosting(itup);
102 3504 : int nipd = GinGetNPosting(itup);
103 : ItemPointer ipd;
104 : int ndecoded;
105 :
106 3504 : if (GinItupIsCompressed(itup))
107 : {
108 3504 : if (nipd > 0)
109 : {
110 3504 : ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded);
111 3504 : if (nipd != ndecoded)
112 0 : elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded",
113 : nipd, ndecoded);
114 : }
115 : else
116 0 : ipd = palloc(0);
117 : }
118 : else
119 : {
120 0 : ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd);
121 0 : memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd);
122 : }
123 3504 : *nitems = nipd;
124 3504 : return ipd;
125 : }
126 :
127 : /*
128 : * Scans through a posting tree (given by the root), and verifies that the keys
129 : * on a child keys are consistent with the parent.
130 : *
131 : * Allocates a separate memory context and scans through posting tree graph.
132 : */
133 : static void
134 0 : gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting_tree_root)
135 : {
136 0 : BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
137 : GinPostingTreeScanItem *stack;
138 : MemoryContext mctx;
139 : MemoryContext oldcontext;
140 :
141 : int leafdepth;
142 :
143 0 : mctx = AllocSetContextCreate(CurrentMemoryContext,
144 : "posting tree check context",
145 : ALLOCSET_DEFAULT_SIZES);
146 0 : oldcontext = MemoryContextSwitchTo(mctx);
147 :
148 : /*
149 : * We don't know the height of the tree yet, but as soon as we encounter a
150 : * leaf page, we will set 'leafdepth' to its depth.
151 : */
152 0 : leafdepth = -1;
153 :
154 : /* Start the scan at the root page */
155 0 : stack = (GinPostingTreeScanItem *) palloc0(sizeof(GinPostingTreeScanItem));
156 0 : stack->depth = 0;
157 0 : ItemPointerSetInvalid(&stack->parentkey);
158 0 : stack->parentblk = InvalidBlockNumber;
159 0 : stack->blkno = posting_tree_root;
160 :
161 0 : elog(DEBUG3, "processing posting tree at blk %u", posting_tree_root);
162 :
163 0 : while (stack)
164 : {
165 : GinPostingTreeScanItem *stack_next;
166 : Buffer buffer;
167 : Page page;
168 : OffsetNumber i,
169 : maxoff;
170 : BlockNumber rightlink;
171 :
172 0 : CHECK_FOR_INTERRUPTS();
173 :
174 0 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
175 : RBM_NORMAL, strategy);
176 0 : LockBuffer(buffer, GIN_SHARE);
177 0 : page = (Page) BufferGetPage(buffer);
178 :
179 : Assert(GinPageIsData(page));
180 :
181 : /* Check that the tree has the same height in all branches */
182 0 : if (GinPageIsLeaf(page))
183 : {
184 : ItemPointerData minItem;
185 : int nlist;
186 : ItemPointerData *list;
187 : char tidrange_buf[MAXPGPATH];
188 :
189 0 : ItemPointerSetMin(&minItem);
190 :
191 0 : elog(DEBUG1, "page blk: %u, type leaf", stack->blkno);
192 :
193 0 : if (leafdepth == -1)
194 0 : leafdepth = stack->depth;
195 0 : else if (stack->depth != leafdepth)
196 0 : ereport(ERROR,
197 : (errcode(ERRCODE_INDEX_CORRUPTED),
198 : errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
199 : RelationGetRelationName(rel), stack->blkno)));
200 0 : list = GinDataLeafPageGetItems(page, &nlist, minItem);
201 :
202 0 : if (nlist > 0)
203 0 : snprintf(tidrange_buf, sizeof(tidrange_buf),
204 : "%d tids (%u, %u) - (%u, %u)",
205 : nlist,
206 : ItemPointerGetBlockNumberNoCheck(&list[0]),
207 0 : ItemPointerGetOffsetNumberNoCheck(&list[0]),
208 0 : ItemPointerGetBlockNumberNoCheck(&list[nlist - 1]),
209 0 : ItemPointerGetOffsetNumberNoCheck(&list[nlist - 1]));
210 : else
211 0 : snprintf(tidrange_buf, sizeof(tidrange_buf), "0 tids");
212 :
213 0 : if (stack->parentblk != InvalidBlockNumber)
214 0 : elog(DEBUG3, "blk %u: parent %u highkey (%u, %u), %s",
215 : stack->blkno,
216 : stack->parentblk,
217 : ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
218 : ItemPointerGetOffsetNumberNoCheck(&stack->parentkey),
219 : tidrange_buf);
220 : else
221 0 : elog(DEBUG3, "blk %u: root leaf, %s",
222 : stack->blkno,
223 : tidrange_buf);
224 :
225 0 : if (stack->parentblk != InvalidBlockNumber &&
226 0 : ItemPointerGetOffsetNumberNoCheck(&stack->parentkey) != InvalidOffsetNumber &&
227 0 : nlist > 0 && ItemPointerCompare(&stack->parentkey, &list[nlist - 1]) < 0)
228 0 : ereport(ERROR,
229 : (errcode(ERRCODE_INDEX_CORRUPTED),
230 : errmsg("index \"%s\": tid exceeds parent's high key in postingTree leaf on block %u",
231 : RelationGetRelationName(rel), stack->blkno)));
232 : }
233 : else
234 : {
235 : LocationIndex pd_lower;
236 : ItemPointerData bound;
237 : int lowersize;
238 :
239 : /*
240 : * Check that tuples in each page are properly ordered and
241 : * consistent with parent high key
242 : */
243 0 : maxoff = GinPageGetOpaque(page)->maxoff;
244 0 : rightlink = GinPageGetOpaque(page)->rightlink;
245 :
246 0 : elog(DEBUG1, "page blk: %u, type data, maxoff %d", stack->blkno, maxoff);
247 :
248 0 : if (stack->parentblk != InvalidBlockNumber)
249 0 : elog(DEBUG3, "blk %u: internal posting tree page with %u items, parent %u highkey (%u, %u)",
250 : stack->blkno, maxoff, stack->parentblk,
251 : ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
252 : ItemPointerGetOffsetNumberNoCheck(&stack->parentkey));
253 : else
254 0 : elog(DEBUG3, "blk %u: root internal posting tree page with %u items",
255 : stack->blkno, maxoff);
256 :
257 : /*
258 : * A GIN posting tree internal page stores PostingItems in the
259 : * 'lower' part of the page. The 'upper' part is unused. The
260 : * number of elements is stored in the opaque area (maxoff). Make
261 : * sure the size of the 'lower' part agrees with 'maxoff'
262 : *
263 : * We didn't set pd_lower until PostgreSQL version 9.4, so if this
264 : * check fails, it could also be because the index was
265 : * binary-upgraded from an earlier version. That was a long time
266 : * ago, though, so let's warn if it doesn't match.
267 : */
268 0 : pd_lower = ((PageHeader) page)->pd_lower;
269 0 : lowersize = pd_lower - MAXALIGN(SizeOfPageHeaderData);
270 0 : if ((lowersize - MAXALIGN(sizeof(ItemPointerData))) / sizeof(PostingItem) != maxoff)
271 0 : ereport(ERROR,
272 : (errcode(ERRCODE_INDEX_CORRUPTED),
273 : errmsg("index \"%s\" has unexpected pd_lower %u in posting tree block %u with maxoff %u)",
274 : RelationGetRelationName(rel), pd_lower, stack->blkno, maxoff)));
275 :
276 : /*
277 : * Before the PostingItems, there's one ItemPointerData in the
278 : * 'lower' part that stores the page's high key.
279 : */
280 0 : bound = *GinDataPageGetRightBound(page);
281 :
282 : /*
283 : * Gin page right bound has a sane value only when not a highkey
284 : * on the rightmost page (at a given level). For the rightmost
285 : * page does not store the highkey explicitly, and the value is
286 : * infinity.
287 : */
288 0 : if (ItemPointerIsValid(&stack->parentkey) &&
289 0 : rightlink != InvalidBlockNumber &&
290 0 : !ItemPointerEquals(&stack->parentkey, &bound))
291 0 : ereport(ERROR,
292 : (errcode(ERRCODE_INDEX_CORRUPTED),
293 : errmsg("index \"%s\": posting tree page's high key (%u, %u) doesn't match the downlink on block %u (parent blk %u, key (%u, %u))",
294 : RelationGetRelationName(rel),
295 : ItemPointerGetBlockNumberNoCheck(&bound),
296 : ItemPointerGetOffsetNumberNoCheck(&bound),
297 : stack->blkno, stack->parentblk,
298 : ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
299 : ItemPointerGetOffsetNumberNoCheck(&stack->parentkey))));
300 :
301 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
302 : {
303 : GinPostingTreeScanItem *ptr;
304 0 : PostingItem *posting_item = GinDataPageGetPostingItem(page, i);
305 :
306 : /* ItemPointerGetOffsetNumber expects a valid pointer */
307 0 : if (!(i == maxoff &&
308 : rightlink == InvalidBlockNumber))
309 0 : elog(DEBUG3, "key (%u, %u) -> %u",
310 : ItemPointerGetBlockNumber(&posting_item->key),
311 : ItemPointerGetOffsetNumber(&posting_item->key),
312 : BlockIdGetBlockNumber(&posting_item->child_blkno));
313 : else
314 0 : elog(DEBUG3, "key (%u, %u) -> %u",
315 : 0, 0, BlockIdGetBlockNumber(&posting_item->child_blkno));
316 :
317 0 : if (i == maxoff && rightlink == InvalidBlockNumber)
318 : {
319 : /*
320 : * The rightmost item in the tree level has (0, 0) as the
321 : * key
322 : */
323 0 : if (ItemPointerGetBlockNumberNoCheck(&posting_item->key) != 0 ||
324 0 : ItemPointerGetOffsetNumberNoCheck(&posting_item->key) != 0)
325 0 : ereport(ERROR,
326 : (errcode(ERRCODE_INDEX_CORRUPTED),
327 : errmsg("index \"%s\": rightmost posting tree page (blk %u) has unexpected last key (%u, %u)",
328 : RelationGetRelationName(rel),
329 : stack->blkno,
330 : ItemPointerGetBlockNumberNoCheck(&posting_item->key),
331 : ItemPointerGetOffsetNumberNoCheck(&posting_item->key))));
332 : }
333 0 : else if (i != FirstOffsetNumber)
334 : {
335 0 : PostingItem *previous_posting_item = GinDataPageGetPostingItem(page, i - 1);
336 :
337 0 : if (ItemPointerCompare(&posting_item->key, &previous_posting_item->key) < 0)
338 0 : ereport(ERROR,
339 : (errcode(ERRCODE_INDEX_CORRUPTED),
340 : errmsg("index \"%s\" has wrong tuple order in posting tree, block %u, offset %u",
341 : RelationGetRelationName(rel), stack->blkno, i)));
342 : }
343 :
344 : /*
345 : * Check if this tuple is consistent with the downlink in the
346 : * parent.
347 : */
348 0 : if (i == maxoff && ItemPointerIsValid(&stack->parentkey) &&
349 0 : ItemPointerCompare(&stack->parentkey, &posting_item->key) < 0)
350 0 : ereport(ERROR,
351 : (errcode(ERRCODE_INDEX_CORRUPTED),
352 : errmsg("index \"%s\": posting item exceeds parent's high key in postingTree internal page on block %u offset %u",
353 : RelationGetRelationName(rel),
354 : stack->blkno, i)));
355 :
356 : /* This is an internal page, recurse into the child. */
357 0 : ptr = (GinPostingTreeScanItem *) palloc(sizeof(GinPostingTreeScanItem));
358 0 : ptr->depth = stack->depth + 1;
359 :
360 : /*
361 : * The rightmost parent key is always invalid item pointer.
362 : * Its value is 'Infinity' and not explicitly stored.
363 : */
364 0 : ptr->parentkey = posting_item->key;
365 0 : ptr->parentblk = stack->blkno;
366 0 : ptr->blkno = BlockIdGetBlockNumber(&posting_item->child_blkno);
367 0 : ptr->next = stack->next;
368 0 : stack->next = ptr;
369 : }
370 : }
371 0 : LockBuffer(buffer, GIN_UNLOCK);
372 0 : ReleaseBuffer(buffer);
373 :
374 : /* Step to next item in the queue */
375 0 : stack_next = stack->next;
376 0 : pfree(stack);
377 0 : stack = stack_next;
378 : }
379 :
380 0 : MemoryContextSwitchTo(oldcontext);
381 0 : MemoryContextDelete(mctx);
382 0 : }
383 :
384 : /*
385 : * Main entry point for GIN checks.
386 : *
387 : * Allocates memory context and scans through the whole GIN graph.
388 : */
389 : static void
390 98 : gin_check_parent_keys_consistency(Relation rel,
391 : Relation heaprel,
392 : void *callback_state,
393 : bool readonly)
394 : {
395 98 : BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
396 : GinScanItem *stack;
397 : MemoryContext mctx;
398 : MemoryContext oldcontext;
399 : GinState state;
400 : int leafdepth;
401 :
402 98 : mctx = AllocSetContextCreate(CurrentMemoryContext,
403 : "amcheck consistency check context",
404 : ALLOCSET_DEFAULT_SIZES);
405 98 : oldcontext = MemoryContextSwitchTo(mctx);
406 98 : initGinState(&state, rel);
407 :
408 : /*
409 : * We don't know the height of the tree yet, but as soon as we encounter a
410 : * leaf page, we will set 'leafdepth' to its depth.
411 : */
412 98 : leafdepth = -1;
413 :
414 : /* Start the scan at the root page */
415 98 : stack = (GinScanItem *) palloc0(sizeof(GinScanItem));
416 98 : stack->depth = 0;
417 98 : stack->parenttup = NULL;
418 98 : stack->parentblk = InvalidBlockNumber;
419 98 : stack->blkno = GIN_ROOT_BLKNO;
420 :
421 424 : while (stack)
422 : {
423 : GinScanItem *stack_next;
424 : Buffer buffer;
425 : Page page;
426 : OffsetNumber i,
427 : maxoff,
428 : prev_attnum;
429 : IndexTuple prev_tuple;
430 : BlockNumber rightlink;
431 :
432 336 : CHECK_FOR_INTERRUPTS();
433 :
434 336 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
435 : RBM_NORMAL, strategy);
436 336 : LockBuffer(buffer, GIN_SHARE);
437 336 : page = (Page) BufferGetPage(buffer);
438 336 : maxoff = PageGetMaxOffsetNumber(page);
439 336 : rightlink = GinPageGetOpaque(page)->rightlink;
440 :
441 : /* Do basic sanity checks on the page headers */
442 336 : check_index_page(rel, buffer, stack->blkno);
443 :
444 336 : elog(DEBUG3, "processing entry tree page at blk %u, maxoff: %u", stack->blkno, maxoff);
445 :
446 : /*
447 : * It's possible that the page was split since we looked at the
448 : * parent, so that we didn't missed the downlink of the right sibling
449 : * when we scanned the parent. If so, add the right sibling to the
450 : * stack now.
451 : */
452 336 : if (stack->parenttup != NULL)
453 : {
454 : GinNullCategory parent_key_category;
455 228 : Datum parent_key = gintuple_get_key(&state,
456 : stack->parenttup,
457 : &parent_key_category);
458 228 : OffsetNumber parent_key_attnum = gintuple_get_attrnum(&state, stack->parenttup);
459 228 : ItemId iid = PageGetItemIdCareful(rel, stack->blkno,
460 : page, maxoff);
461 228 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
462 228 : OffsetNumber page_max_key_attnum = gintuple_get_attrnum(&state, idxtuple);
463 : GinNullCategory page_max_key_category;
464 228 : Datum page_max_key = gintuple_get_key(&state, idxtuple, &page_max_key_category);
465 :
466 456 : if (rightlink != InvalidBlockNumber &&
467 228 : ginCompareAttEntries(&state, page_max_key_attnum, page_max_key,
468 : page_max_key_category, parent_key_attnum,
469 : parent_key, parent_key_category) < 0)
470 : {
471 : /* split page detected, install right link to the stack */
472 : GinScanItem *ptr;
473 :
474 0 : elog(DEBUG3, "split detected for blk: %u, parent blk: %u", stack->blkno, stack->parentblk);
475 :
476 0 : ptr = (GinScanItem *) palloc(sizeof(GinScanItem));
477 0 : ptr->depth = stack->depth;
478 0 : ptr->parenttup = CopyIndexTuple(stack->parenttup);
479 0 : ptr->parentblk = stack->parentblk;
480 0 : ptr->blkno = rightlink;
481 0 : ptr->next = stack->next;
482 0 : stack->next = ptr;
483 : }
484 : }
485 :
486 : /* Check that the tree has the same height in all branches */
487 336 : if (GinPageIsLeaf(page))
488 : {
489 324 : if (leafdepth == -1)
490 96 : leafdepth = stack->depth;
491 228 : else if (stack->depth != leafdepth)
492 0 : ereport(ERROR,
493 : (errcode(ERRCODE_INDEX_CORRUPTED),
494 : errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
495 : RelationGetRelationName(rel), stack->blkno)));
496 : }
497 :
498 : /*
499 : * Check that tuples in each page are properly ordered and consistent
500 : * with parent high key
501 : */
502 336 : prev_tuple = NULL;
503 336 : prev_attnum = InvalidAttrNumber;
504 4080 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
505 : {
506 3754 : ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
507 3754 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
508 3754 : OffsetNumber current_attnum = gintuple_get_attrnum(&state, idxtuple);
509 : GinNullCategory current_key_category;
510 : Datum current_key;
511 :
512 3754 : if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
513 0 : ereport(ERROR,
514 : (errcode(ERRCODE_INDEX_CORRUPTED),
515 : errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
516 : RelationGetRelationName(rel), stack->blkno, i)));
517 :
518 3754 : current_key = gintuple_get_key(&state, idxtuple, ¤t_key_category);
519 :
520 : /*
521 : * Compare the entry to the preceding one.
522 : *
523 : * Don't check for high key on the rightmost inner page, as this
524 : * key is not really stored explicitly.
525 : *
526 : * The entries may be for different attributes, so make sure to
527 : * use ginCompareAttEntries for comparison.
528 : */
529 3754 : if ((i != FirstOffsetNumber) &&
530 334 : !(i == maxoff && rightlink == InvalidBlockNumber && !GinPageIsLeaf(page)))
531 : {
532 : Datum prev_key;
533 : GinNullCategory prev_key_category;
534 :
535 3408 : prev_key = gintuple_get_key(&state, prev_tuple, &prev_key_category);
536 3408 : if (ginCompareAttEntries(&state, prev_attnum, prev_key,
537 : prev_key_category, current_attnum,
538 : current_key, current_key_category) >= 0)
539 6 : ereport(ERROR,
540 : (errcode(ERRCODE_INDEX_CORRUPTED),
541 : errmsg("index \"%s\" has wrong tuple order on entry tree page, block %u, offset %u, rightlink %u",
542 : RelationGetRelationName(rel), stack->blkno, i, rightlink)));
543 : }
544 :
545 : /*
546 : * Check if this tuple is consistent with the downlink in the
547 : * parent.
548 : */
549 3748 : if (stack->parenttup &&
550 : i == maxoff)
551 : {
552 : GinNullCategory parent_key_category;
553 228 : OffsetNumber parent_key_attnum = gintuple_get_attrnum(&state, stack->parenttup);
554 228 : Datum parent_key = gintuple_get_key(&state,
555 : stack->parenttup,
556 : &parent_key_category);
557 :
558 228 : if (ginCompareAttEntries(&state, current_attnum, current_key,
559 : current_key_category, parent_key_attnum,
560 : parent_key, parent_key_category) > 0)
561 : {
562 : /*
563 : * There was a discrepancy between parent and child
564 : * tuples. We need to verify it is not a result of
565 : * concurrent call of gistplacetopage(). So, lock parent
566 : * and try to find downlink for current page. It may be
567 : * missing due to concurrent page split, this is OK.
568 : */
569 4 : pfree(stack->parenttup);
570 4 : stack->parenttup = gin_refind_parent(rel, stack->parentblk,
571 : stack->blkno, strategy);
572 :
573 : /* We found it - make a final check before failing */
574 4 : if (!stack->parenttup)
575 0 : elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
576 : stack->blkno, stack->parentblk);
577 : else
578 : {
579 4 : parent_key_attnum = gintuple_get_attrnum(&state, stack->parenttup);
580 4 : parent_key = gintuple_get_key(&state,
581 : stack->parenttup,
582 : &parent_key_category);
583 :
584 : /*
585 : * Check if it is properly adjusted. If succeed,
586 : * proceed to the next key.
587 : */
588 4 : if (ginCompareAttEntries(&state, current_attnum, current_key,
589 : current_key_category, parent_key_attnum,
590 : parent_key, parent_key_category) > 0)
591 4 : ereport(ERROR,
592 : (errcode(ERRCODE_INDEX_CORRUPTED),
593 : errmsg("index \"%s\" has inconsistent records on page %u offset %u",
594 : RelationGetRelationName(rel), stack->blkno, i)));
595 : }
596 : }
597 : }
598 :
599 : /* If this is an internal page, recurse into the child */
600 3744 : if (!GinPageIsLeaf(page))
601 : {
602 : GinScanItem *ptr;
603 :
604 240 : ptr = (GinScanItem *) palloc(sizeof(GinScanItem));
605 240 : ptr->depth = stack->depth + 1;
606 : /* last tuple in layer has no high key */
607 240 : if (i == maxoff && rightlink == InvalidBlockNumber)
608 10 : ptr->parenttup = NULL;
609 : else
610 230 : ptr->parenttup = CopyIndexTuple(idxtuple);
611 240 : ptr->parentblk = stack->blkno;
612 240 : ptr->blkno = GinGetDownlink(idxtuple);
613 240 : ptr->next = stack->next;
614 240 : stack->next = ptr;
615 : }
616 : /* If this item is a pointer to a posting tree, recurse into it */
617 3504 : else if (GinIsPostingTree(idxtuple))
618 : {
619 0 : BlockNumber rootPostingTree = GinGetPostingTree(idxtuple);
620 :
621 0 : gin_check_posting_tree_parent_keys_consistency(rel, rootPostingTree);
622 : }
623 : else
624 : {
625 : ItemPointer ipd;
626 : int nipd;
627 :
628 3504 : ipd = ginReadTupleWithoutState(idxtuple, &nipd);
629 :
630 668074 : for (int j = 0; j < nipd; j++)
631 : {
632 664570 : if (!OffsetNumberIsValid(ItemPointerGetOffsetNumber(&ipd[j])))
633 0 : ereport(ERROR,
634 : (errcode(ERRCODE_INDEX_CORRUPTED),
635 : errmsg("index \"%s\": posting list contains invalid heap pointer on block %u",
636 : RelationGetRelationName(rel), stack->blkno)));
637 : }
638 3504 : pfree(ipd);
639 : }
640 :
641 3744 : prev_tuple = CopyIndexTuple(idxtuple);
642 3744 : prev_attnum = current_attnum;
643 : }
644 :
645 326 : LockBuffer(buffer, GIN_UNLOCK);
646 326 : ReleaseBuffer(buffer);
647 :
648 : /* Step to next item in the queue */
649 326 : stack_next = stack->next;
650 326 : if (stack->parenttup)
651 224 : pfree(stack->parenttup);
652 326 : pfree(stack);
653 326 : stack = stack_next;
654 : }
655 :
656 88 : MemoryContextSwitchTo(oldcontext);
657 88 : MemoryContextDelete(mctx);
658 88 : }
659 :
660 : /*
661 : * Verify that a freshly-read page looks sane.
662 : */
663 : static void
664 336 : check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
665 : {
666 336 : Page page = BufferGetPage(buffer);
667 :
668 : /*
669 : * ReadBuffer verifies that every newly-read page passes
670 : * PageHeaderIsValid, which means it either contains a reasonably sane
671 : * page header or is all-zero. We have to defend against the all-zero
672 : * case, however.
673 : */
674 336 : if (PageIsNew(page))
675 0 : ereport(ERROR,
676 : (errcode(ERRCODE_INDEX_CORRUPTED),
677 : errmsg("index \"%s\" contains unexpected zero page at block %u",
678 : RelationGetRelationName(rel),
679 : BufferGetBlockNumber(buffer)),
680 : errhint("Please REINDEX it.")));
681 :
682 : /*
683 : * Additionally check that the special area looks sane.
684 : */
685 336 : if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GinPageOpaqueData)))
686 0 : ereport(ERROR,
687 : (errcode(ERRCODE_INDEX_CORRUPTED),
688 : errmsg("index \"%s\" contains corrupted page at block %u",
689 : RelationGetRelationName(rel),
690 : BufferGetBlockNumber(buffer)),
691 : errhint("Please REINDEX it.")));
692 :
693 336 : if (GinPageIsDeleted(page))
694 : {
695 0 : if (!GinPageIsLeaf(page))
696 0 : ereport(ERROR,
697 : (errcode(ERRCODE_INDEX_CORRUPTED),
698 : errmsg("index \"%s\" has deleted internal page %u",
699 : RelationGetRelationName(rel), blockNo)));
700 0 : if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
701 0 : ereport(ERROR,
702 : (errcode(ERRCODE_INDEX_CORRUPTED),
703 : errmsg("index \"%s\" has deleted page %u with tuples",
704 : RelationGetRelationName(rel), blockNo)));
705 : }
706 336 : else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
707 0 : ereport(ERROR,
708 : (errcode(ERRCODE_INDEX_CORRUPTED),
709 : errmsg("index \"%s\" has page %u with exceeding count of tuples",
710 : RelationGetRelationName(rel), blockNo)));
711 336 : }
712 :
713 : /*
714 : * Try to re-find downlink pointing to 'blkno', in 'parentblkno'.
715 : *
716 : * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
717 : * returns NULL.
718 : */
719 : static IndexTuple
720 4 : gin_refind_parent(Relation rel, BlockNumber parentblkno,
721 : BlockNumber childblkno, BufferAccessStrategy strategy)
722 : {
723 : Buffer parentbuf;
724 : Page parentpage;
725 : OffsetNumber o,
726 : parent_maxoff;
727 4 : IndexTuple result = NULL;
728 :
729 4 : parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
730 : strategy);
731 :
732 4 : LockBuffer(parentbuf, GIN_SHARE);
733 4 : parentpage = BufferGetPage(parentbuf);
734 :
735 4 : if (GinPageIsLeaf(parentpage))
736 : {
737 0 : UnlockReleaseBuffer(parentbuf);
738 0 : return result;
739 : }
740 :
741 4 : parent_maxoff = PageGetMaxOffsetNumber(parentpage);
742 4 : for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
743 : {
744 4 : ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o);
745 4 : IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
746 :
747 4 : if (GinGetDownlink(itup) == childblkno)
748 : {
749 : /* Found it! Make copy and return it */
750 4 : result = CopyIndexTuple(itup);
751 4 : break;
752 : }
753 : }
754 :
755 4 : UnlockReleaseBuffer(parentbuf);
756 :
757 4 : return result;
758 : }
759 :
760 : static ItemId
761 3986 : PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
762 : OffsetNumber offset)
763 : {
764 3986 : ItemId itemid = PageGetItemId(page, offset);
765 :
766 3986 : if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
767 : BLCKSZ - MAXALIGN(sizeof(GinPageOpaqueData)))
768 0 : ereport(ERROR,
769 : (errcode(ERRCODE_INDEX_CORRUPTED),
770 : errmsg("line pointer points past end of tuple space in index \"%s\"",
771 : RelationGetRelationName(rel)),
772 : errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
773 : block, offset, ItemIdGetOffset(itemid),
774 : ItemIdGetLength(itemid),
775 : ItemIdGetFlags(itemid))));
776 :
777 : /*
778 : * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED or LP_DEAD,
779 : * since GIN never uses all three. Verify that line pointer has storage,
780 : * too.
781 : */
782 3986 : if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
783 3986 : ItemIdIsDead(itemid) || ItemIdGetLength(itemid) == 0)
784 0 : ereport(ERROR,
785 : (errcode(ERRCODE_INDEX_CORRUPTED),
786 : errmsg("invalid line pointer storage in index \"%s\"",
787 : RelationGetRelationName(rel)),
788 : errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
789 : block, offset, ItemIdGetOffset(itemid),
790 : ItemIdGetLength(itemid),
791 : ItemIdGetFlags(itemid))));
792 :
793 3986 : return itemid;
794 : }
|