Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginget.c
4 : * fetch tuples from a GIN scan.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginget.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/relscan.h"
19 : #include "common/pg_prng.h"
20 : #include "miscadmin.h"
21 : #include "storage/predicate.h"
22 : #include "utils/datum.h"
23 : #include "utils/memutils.h"
24 : #include "utils/rel.h"
25 :
26 : /* GUC parameter */
27 : int GinFuzzySearchLimit = 0;
28 :
29 : typedef struct pendingPosition
30 : {
31 : Buffer pendingBuffer;
32 : OffsetNumber firstOffset;
33 : OffsetNumber lastOffset;
34 : ItemPointerData item;
35 : bool *hasMatchKey;
36 : } pendingPosition;
37 :
38 :
39 : /*
40 : * Goes to the next page if current offset is outside of bounds
41 : */
42 : static bool
43 407620 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
44 : {
45 407620 : Page page = BufferGetPage(stack->buffer);
46 :
47 407620 : if (stack->off > PageGetMaxOffsetNumber(page))
48 : {
49 : /*
50 : * We scanned the whole page, so we should take right page
51 : */
52 3790 : if (GinPageRightMost(page))
53 366 : return false; /* no more pages */
54 :
55 3424 : stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
56 3424 : stack->blkno = BufferGetBlockNumber(stack->buffer);
57 3424 : stack->off = FirstOffsetNumber;
58 3424 : PredicateLockPage(btree->index, stack->blkno, snapshot);
59 : }
60 :
61 407254 : return true;
62 : }
63 :
64 : /*
65 : * Scan all pages of a posting tree and save all its heap ItemPointers
66 : * in scanEntry->matchBitmap
67 : */
68 : static void
69 12 : scanPostingTree(Relation index, GinScanEntry scanEntry,
70 : BlockNumber rootPostingTree)
71 : {
72 : GinBtreeData btree;
73 : GinBtreeStack *stack;
74 : Buffer buffer;
75 : Page page;
76 :
77 : /* Descend to the leftmost leaf page */
78 12 : stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
79 12 : buffer = stack->buffer;
80 :
81 12 : IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
82 :
83 12 : freeGinBtreeStack(stack);
84 :
85 : /*
86 : * Loop iterates through all leaf pages of posting tree
87 : */
88 : for (;;)
89 : {
90 30 : page = BufferGetPage(buffer);
91 30 : if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
92 : {
93 30 : int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
94 :
95 30 : scanEntry->predictNumberResult += n;
96 : }
97 :
98 30 : if (GinPageRightMost(page))
99 12 : break; /* no more pages */
100 :
101 18 : buffer = ginStepRight(buffer, index, GIN_SHARE);
102 : }
103 :
104 12 : UnlockReleaseBuffer(buffer);
105 12 : }
106 :
107 : /*
108 : * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
109 : * match the search entry. This supports three different match modes:
110 : *
111 : * 1. Partial-match support: scan from current point until the
112 : * comparePartialFn says we're done.
113 : * 2. SEARCH_MODE_ALL: scan from current point (which should be first
114 : * key for the current attnum) until we hit null items or end of attnum
115 : * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
116 : * key for the current attnum) until we hit end of attnum
117 : *
118 : * Returns true if done, false if it's necessary to restart scan from scratch
119 : */
120 : static bool
121 534 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
122 : GinScanEntry scanEntry, Snapshot snapshot)
123 : {
124 : OffsetNumber attnum;
125 : Form_pg_attribute attr;
126 :
127 : /* Initialize empty bitmap result */
128 534 : scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
129 :
130 : /* Null query cannot partial-match anything */
131 534 : if (scanEntry->isPartialMatch &&
132 252 : scanEntry->queryCategory != GIN_CAT_NORM_KEY)
133 0 : return true;
134 :
135 : /* Locate tupdesc entry for key column (for attbyval/attlen data) */
136 534 : attnum = scanEntry->attnum;
137 534 : attr = TupleDescAttr(btree->ginstate->origTupdesc, attnum - 1);
138 :
139 : /*
140 : * Predicate lock entry leaf page, following pages will be locked by
141 : * moveRightIfItNeeded()
142 : */
143 534 : PredicateLockPage(btree->index,
144 : BufferGetBlockNumber(stack->buffer),
145 : snapshot);
146 :
147 : for (;;)
148 407074 : {
149 : Page page;
150 : IndexTuple itup;
151 : Datum idatum;
152 : GinNullCategory icategory;
153 :
154 : /*
155 : * stack->off points to the interested entry, buffer is already locked
156 : */
157 407608 : if (moveRightIfItNeeded(btree, stack, snapshot) == false)
158 534 : return true;
159 :
160 407242 : page = BufferGetPage(stack->buffer);
161 407242 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
162 :
163 : /*
164 : * If tuple stores another attribute then stop scan
165 : */
166 407242 : if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
167 0 : return true;
168 :
169 : /* Safe to fetch attribute value */
170 407242 : idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
171 :
172 : /*
173 : * Check for appropriate scan stop conditions
174 : */
175 407242 : if (scanEntry->isPartialMatch)
176 : {
177 : int32 cmp;
178 :
179 : /*
180 : * In partial match, stop scan at any null (including
181 : * placeholders); partial matches never match nulls
182 : */
183 1332 : if (icategory != GIN_CAT_NORM_KEY)
184 10 : return true;
185 :
186 : /*----------
187 : * Check of partial match.
188 : * case cmp == 0 => match
189 : * case cmp > 0 => not match and finish scan
190 : * case cmp < 0 => not match and continue scan
191 : *----------
192 : */
193 1322 : cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
194 1322 : btree->ginstate->supportCollation[attnum - 1],
195 : scanEntry->queryKey,
196 : idatum,
197 1322 : UInt16GetDatum(scanEntry->strategy),
198 1322 : PointerGetDatum(scanEntry->extra_data)));
199 :
200 1322 : if (cmp > 0)
201 130 : return true;
202 1192 : else if (cmp < 0)
203 : {
204 60 : stack->off++;
205 60 : continue;
206 : }
207 : }
208 405910 : else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
209 : {
210 : /*
211 : * In ALL mode, we are not interested in null items, so we can
212 : * stop if we get to a null-item placeholder (which will be the
213 : * last entry for a given attnum). We do want to include NULL_KEY
214 : * and EMPTY_ITEM entries, though.
215 : */
216 405910 : if (icategory == GIN_CAT_NULL_ITEM)
217 28 : return true;
218 : }
219 :
220 : /*
221 : * OK, we want to return the TIDs listed in this entry.
222 : */
223 407014 : if (GinIsPostingTree(itup))
224 : {
225 12 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
226 :
227 : /*
228 : * We should unlock current page (but not unpin) during tree scan
229 : * to prevent deadlock with vacuum processes.
230 : *
231 : * We save current entry value (idatum) to be able to re-find our
232 : * tuple after re-locking
233 : */
234 12 : if (icategory == GIN_CAT_NORM_KEY)
235 12 : idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
236 :
237 12 : LockBuffer(stack->buffer, GIN_UNLOCK);
238 :
239 : /*
240 : * Acquire predicate lock on the posting tree. We already hold a
241 : * lock on the entry page, but insertions to the posting tree
242 : * don't check for conflicts on that level.
243 : */
244 12 : PredicateLockPage(btree->index, rootPostingTree, snapshot);
245 :
246 : /* Collect all the TIDs in this entry's posting tree */
247 12 : scanPostingTree(btree->index, scanEntry, rootPostingTree);
248 :
249 : /*
250 : * We lock again the entry page and while it was unlocked insert
251 : * might have occurred, so we need to re-find our position.
252 : */
253 12 : LockBuffer(stack->buffer, GIN_SHARE);
254 12 : page = BufferGetPage(stack->buffer);
255 12 : if (!GinPageIsLeaf(page))
256 : {
257 : /*
258 : * Root page becomes non-leaf while we unlock it. We will
259 : * start again, this situation doesn't occur often - root can
260 : * became a non-leaf only once per life of index.
261 : */
262 0 : return false;
263 : }
264 :
265 : /* Search forward to re-find idatum */
266 : for (;;)
267 : {
268 12 : if (moveRightIfItNeeded(btree, stack, snapshot) == false)
269 0 : ereport(ERROR,
270 : (errcode(ERRCODE_INTERNAL_ERROR),
271 : errmsg("failed to re-find tuple within index \"%s\"",
272 : RelationGetRelationName(btree->index))));
273 :
274 12 : page = BufferGetPage(stack->buffer);
275 12 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
276 :
277 12 : if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
278 : {
279 : Datum newDatum;
280 : GinNullCategory newCategory;
281 :
282 12 : newDatum = gintuple_get_key(btree->ginstate, itup,
283 : &newCategory);
284 :
285 12 : if (ginCompareEntries(btree->ginstate, attnum,
286 : newDatum, newCategory,
287 : idatum, icategory) == 0)
288 12 : break; /* Found! */
289 : }
290 :
291 0 : stack->off++;
292 : }
293 :
294 12 : if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
295 0 : pfree(DatumGetPointer(idatum));
296 : }
297 : else
298 : {
299 : ItemPointer ipd;
300 : int nipd;
301 :
302 407002 : ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
303 407002 : tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
304 407002 : scanEntry->predictNumberResult += GinGetNPosting(itup);
305 407002 : pfree(ipd);
306 : }
307 :
308 : /*
309 : * Done with this entry, go to the next
310 : */
311 407014 : stack->off++;
312 : }
313 : }
314 :
315 : /*
316 : * Start* functions setup beginning state of searches: finds correct buffer and pins it.
317 : */
318 : static void
319 6692 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
320 : {
321 : GinBtreeData btreeEntry;
322 : GinBtreeStack *stackEntry;
323 : Page page;
324 : bool needUnlock;
325 :
326 6692 : restartScanEntry:
327 6692 : entry->buffer = InvalidBuffer;
328 6692 : ItemPointerSetMin(&entry->curItem);
329 6692 : entry->offset = InvalidOffsetNumber;
330 6692 : if (entry->list)
331 0 : pfree(entry->list);
332 6692 : entry->list = NULL;
333 6692 : entry->nlist = 0;
334 6692 : entry->matchBitmap = NULL;
335 6692 : entry->matchResult = NULL;
336 6692 : entry->reduceResult = false;
337 6692 : entry->predictNumberResult = 0;
338 :
339 : /*
340 : * we should find entry, and begin scan of posting tree or just store
341 : * posting list in memory
342 : */
343 6692 : ginPrepareEntryScan(&btreeEntry, entry->attnum,
344 6692 : entry->queryKey, entry->queryCategory,
345 : ginstate);
346 6692 : stackEntry = ginFindLeafPage(&btreeEntry, true, false);
347 6692 : page = BufferGetPage(stackEntry->buffer);
348 :
349 : /* ginFindLeafPage() will have already checked snapshot age. */
350 6692 : needUnlock = true;
351 :
352 6692 : entry->isFinished = true;
353 :
354 6692 : if (entry->isPartialMatch ||
355 6440 : entry->queryCategory == GIN_CAT_EMPTY_QUERY)
356 : {
357 : /*
358 : * btreeEntry.findItem locates the first item >= given search key.
359 : * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
360 : * because of the way the GIN_CAT_EMPTY_QUERY category code is
361 : * assigned.) We scan forward from there and collect all TIDs needed
362 : * for the entry type.
363 : */
364 534 : btreeEntry.findItem(&btreeEntry, stackEntry);
365 534 : if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
366 534 : == false)
367 : {
368 : /*
369 : * GIN tree was seriously restructured, so we will cleanup all
370 : * found data and rescan. See comments near 'return false' in
371 : * collectMatchBitmap()
372 : */
373 0 : if (entry->matchBitmap)
374 : {
375 0 : if (entry->matchIterator)
376 0 : tbm_end_iterate(entry->matchIterator);
377 0 : entry->matchIterator = NULL;
378 0 : tbm_free(entry->matchBitmap);
379 0 : entry->matchBitmap = NULL;
380 : }
381 0 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
382 0 : freeGinBtreeStack(stackEntry);
383 0 : goto restartScanEntry;
384 : }
385 :
386 534 : if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
387 : {
388 420 : entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
389 420 : entry->isFinished = false;
390 : }
391 : }
392 6158 : else if (btreeEntry.findItem(&btreeEntry, stackEntry))
393 : {
394 4448 : IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
395 :
396 4448 : if (GinIsPostingTree(itup))
397 : {
398 64 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
399 : GinBtreeStack *stack;
400 : Page entrypage;
401 : ItemPointerData minItem;
402 :
403 : /*
404 : * This is an equality scan, so lock the root of the posting tree.
405 : * It represents a lock on the exact key value, and covers all the
406 : * items in the posting tree.
407 : */
408 64 : PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
409 :
410 : /*
411 : * We should unlock entry page before touching posting tree to
412 : * prevent deadlocks with vacuum processes. Because entry is never
413 : * deleted from page and posting tree is never reduced to the
414 : * posting list, we can unlock page after getting BlockNumber of
415 : * root of posting tree.
416 : */
417 64 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
418 64 : needUnlock = false;
419 :
420 64 : stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
421 : rootPostingTree);
422 64 : entry->buffer = stack->buffer;
423 :
424 : /*
425 : * We keep buffer pinned because we need to prevent deletion of
426 : * page during scan. See GIN's vacuum implementation. RefCount is
427 : * increased to keep buffer pinned after freeGinBtreeStack() call.
428 : */
429 64 : IncrBufferRefCount(entry->buffer);
430 :
431 64 : entrypage = BufferGetPage(entry->buffer);
432 :
433 : /*
434 : * Load the first page into memory.
435 : */
436 64 : ItemPointerSetMin(&minItem);
437 64 : entry->list = GinDataLeafPageGetItems(entrypage, &entry->nlist, minItem);
438 :
439 64 : entry->predictNumberResult = stack->predictNumber * entry->nlist;
440 :
441 64 : LockBuffer(entry->buffer, GIN_UNLOCK);
442 64 : freeGinBtreeStack(stack);
443 64 : entry->isFinished = false;
444 : }
445 : else
446 : {
447 : /*
448 : * Lock the entry leaf page. This is more coarse-grained than
449 : * necessary, because it will conflict with any insertions that
450 : * land on the same leaf page, not only the exact key we searched
451 : * for. But locking an individual tuple would require updating
452 : * that lock whenever it moves because of insertions or vacuums,
453 : * which seems too complicated.
454 : */
455 4384 : PredicateLockPage(ginstate->index,
456 : BufferGetBlockNumber(stackEntry->buffer),
457 : snapshot);
458 4384 : if (GinGetNPosting(itup) > 0)
459 : {
460 4378 : entry->list = ginReadTuple(ginstate, entry->attnum, itup,
461 : &entry->nlist);
462 4378 : entry->predictNumberResult = entry->nlist;
463 :
464 4378 : entry->isFinished = false;
465 : }
466 : }
467 : }
468 : else
469 : {
470 : /*
471 : * No entry found. Predicate lock the leaf page, to lock the place
472 : * where the entry would've been, had there been one.
473 : */
474 1710 : PredicateLockPage(ginstate->index,
475 : BufferGetBlockNumber(stackEntry->buffer), snapshot);
476 : }
477 :
478 6692 : if (needUnlock)
479 6628 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
480 6692 : freeGinBtreeStack(stackEntry);
481 6692 : }
482 :
483 : /*
484 : * Comparison function for scan entry indexes. Sorts by predictNumberResult,
485 : * least frequent items first.
486 : */
487 : static int
488 9440 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
489 : {
490 9440 : const GinScanKey key = (const GinScanKey) arg;
491 9440 : int i1 = *(const int *) a1;
492 9440 : int i2 = *(const int *) a2;
493 9440 : uint32 n1 = key->scanEntry[i1]->predictNumberResult;
494 9440 : uint32 n2 = key->scanEntry[i2]->predictNumberResult;
495 :
496 9440 : if (n1 < n2)
497 1368 : return -1;
498 8072 : else if (n1 == n2)
499 6302 : return 0;
500 : else
501 1770 : return 1;
502 : }
503 :
504 : static void
505 1714 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
506 : {
507 1714 : MemoryContext oldCtx = CurrentMemoryContext;
508 : int i;
509 : int j;
510 : int *entryIndexes;
511 :
512 1714 : ItemPointerSetMin(&key->curItem);
513 1714 : key->curItemMatches = false;
514 1714 : key->recheckCurItem = false;
515 1714 : key->isFinished = false;
516 :
517 : /*
518 : * Divide the entries into two distinct sets: required and additional.
519 : * Additional entries are not enough for a match alone, without any items
520 : * from the required set, but are needed by the consistent function to
521 : * decide if an item matches. When scanning, we can skip over items from
522 : * additional entries that have no corresponding matches in any of the
523 : * required entries. That speeds up queries like "frequent & rare"
524 : * considerably, if the frequent term can be put in the additional set.
525 : *
526 : * There can be many legal ways to divide them entries into these two
527 : * sets. A conservative division is to just put everything in the required
528 : * set, but the more you can put in the additional set, the more you can
529 : * skip during the scan. To maximize skipping, we try to put as many
530 : * frequent items as possible into additional, and less frequent ones into
531 : * required. To do that, sort the entries by frequency
532 : * (predictNumberResult), and put entries into the required set in that
533 : * order, until the consistent function says that none of the remaining
534 : * entries can form a match, without any items from the required set. The
535 : * rest go to the additional set.
536 : *
537 : * Exclude-only scan keys are known to have no required entries.
538 : */
539 1714 : if (key->excludeOnly)
540 : {
541 38 : MemoryContextSwitchTo(so->keyCtx);
542 :
543 38 : key->nrequired = 0;
544 38 : key->nadditional = key->nentries;
545 38 : key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
546 44 : for (i = 0; i < key->nadditional; i++)
547 6 : key->additionalEntries[i] = key->scanEntry[i];
548 : }
549 1676 : else if (key->nentries > 1)
550 : {
551 560 : MemoryContextSwitchTo(so->tempCtx);
552 :
553 560 : entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
554 6130 : for (i = 0; i < key->nentries; i++)
555 5570 : entryIndexes[i] = i;
556 560 : qsort_arg(entryIndexes, key->nentries, sizeof(int),
557 : entryIndexByFrequencyCmp, key);
558 :
559 4834 : for (i = 0; i < key->nentries - 1; i++)
560 : {
561 : /* Pass all entries <= i as FALSE, and the rest as MAYBE */
562 521048 : for (j = 0; j <= i; j++)
563 516356 : key->entryRes[entryIndexes[j]] = GIN_FALSE;
564 521856 : for (j = i + 1; j < key->nentries; j++)
565 517164 : key->entryRes[entryIndexes[j]] = GIN_MAYBE;
566 :
567 4692 : if (key->triConsistentFn(key) == GIN_FALSE)
568 418 : break;
569 : }
570 : /* i is now the last required entry. */
571 :
572 560 : MemoryContextSwitchTo(so->keyCtx);
573 :
574 560 : key->nrequired = i + 1;
575 560 : key->nadditional = key->nentries - key->nrequired;
576 560 : key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
577 560 : key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
578 :
579 560 : j = 0;
580 5394 : for (i = 0; i < key->nrequired; i++)
581 4834 : key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
582 1296 : for (i = 0; i < key->nadditional; i++)
583 736 : key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
584 :
585 : /* clean up after consistentFn calls (also frees entryIndexes) */
586 560 : MemoryContextReset(so->tempCtx);
587 : }
588 : else
589 : {
590 1116 : MemoryContextSwitchTo(so->keyCtx);
591 :
592 1116 : key->nrequired = 1;
593 1116 : key->nadditional = 0;
594 1116 : key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
595 1116 : key->requiredEntries[0] = key->scanEntry[0];
596 : }
597 1714 : MemoryContextSwitchTo(oldCtx);
598 1714 : }
599 :
600 : static void
601 1586 : startScan(IndexScanDesc scan)
602 : {
603 1586 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
604 1586 : GinState *ginstate = &so->ginstate;
605 : uint32 i;
606 :
607 8278 : for (i = 0; i < so->totalentries; i++)
608 6692 : startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
609 :
610 1586 : if (GinFuzzySearchLimit > 0)
611 : {
612 : /*
613 : * If all of keys more than threshold we will try to reduce result, we
614 : * hope (and only hope, for intersection operation of array our
615 : * supposition isn't true), that total result will not more than
616 : * minimal predictNumberResult.
617 : */
618 6 : bool reduce = true;
619 :
620 12 : for (i = 0; i < so->totalentries; i++)
621 : {
622 6 : if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
623 : {
624 0 : reduce = false;
625 0 : break;
626 : }
627 : }
628 6 : if (reduce)
629 : {
630 12 : for (i = 0; i < so->totalentries; i++)
631 : {
632 6 : so->entries[i]->predictNumberResult /= so->totalentries;
633 6 : so->entries[i]->reduceResult = true;
634 : }
635 : }
636 : }
637 :
638 : /*
639 : * Now that we have the estimates for the entry frequencies, finish
640 : * initializing the scan keys.
641 : */
642 3300 : for (i = 0; i < so->nkeys; i++)
643 1714 : startScanKey(ginstate, so, so->keys + i);
644 1586 : }
645 :
646 : /*
647 : * Load the next batch of item pointers from a posting tree.
648 : *
649 : * Note that we copy the page into GinScanEntry->list array and unlock it, but
650 : * keep it pinned to prevent interference with vacuum.
651 : */
652 : static void
653 100 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
654 : ItemPointerData advancePast)
655 : {
656 : Page page;
657 : int i;
658 : bool stepright;
659 :
660 100 : if (!BufferIsValid(entry->buffer))
661 : {
662 0 : entry->isFinished = true;
663 0 : return;
664 : }
665 :
666 : /*
667 : * We have two strategies for finding the correct page: step right from
668 : * the current page, or descend the tree again from the root. If
669 : * advancePast equals the current item, the next matching item should be
670 : * on the next page, so we step right. Otherwise, descend from root.
671 : */
672 100 : if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
673 : {
674 94 : stepright = true;
675 94 : LockBuffer(entry->buffer, GIN_SHARE);
676 : }
677 : else
678 : {
679 : GinBtreeStack *stack;
680 :
681 6 : ReleaseBuffer(entry->buffer);
682 :
683 : /*
684 : * Set the search key, and find the correct leaf page.
685 : */
686 6 : if (ItemPointerIsLossyPage(&advancePast))
687 : {
688 0 : ItemPointerSet(&entry->btree.itemptr,
689 0 : GinItemPointerGetBlockNumber(&advancePast) + 1,
690 : FirstOffsetNumber);
691 : }
692 : else
693 : {
694 6 : ItemPointerSet(&entry->btree.itemptr,
695 : GinItemPointerGetBlockNumber(&advancePast),
696 6 : OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
697 : }
698 6 : entry->btree.fullScan = false;
699 6 : stack = ginFindLeafPage(&entry->btree, true, false);
700 :
701 : /* we don't need the stack, just the buffer. */
702 6 : entry->buffer = stack->buffer;
703 6 : IncrBufferRefCount(entry->buffer);
704 6 : freeGinBtreeStack(stack);
705 6 : stepright = false;
706 : }
707 :
708 100 : elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
709 : GinItemPointerGetBlockNumber(&advancePast),
710 : GinItemPointerGetOffsetNumber(&advancePast),
711 : !stepright);
712 :
713 100 : page = BufferGetPage(entry->buffer);
714 : for (;;)
715 : {
716 106 : entry->offset = InvalidOffsetNumber;
717 106 : if (entry->list)
718 : {
719 94 : pfree(entry->list);
720 94 : entry->list = NULL;
721 94 : entry->nlist = 0;
722 : }
723 :
724 106 : if (stepright)
725 : {
726 : /*
727 : * We've processed all the entries on this page. If it was the
728 : * last page in the tree, we're done.
729 : */
730 100 : if (GinPageRightMost(page))
731 : {
732 6 : UnlockReleaseBuffer(entry->buffer);
733 6 : entry->buffer = InvalidBuffer;
734 6 : entry->isFinished = true;
735 6 : return;
736 : }
737 :
738 : /*
739 : * Step to next page, following the right link. then find the
740 : * first ItemPointer greater than advancePast.
741 : */
742 94 : entry->buffer = ginStepRight(entry->buffer,
743 : ginstate->index,
744 : GIN_SHARE);
745 94 : page = BufferGetPage(entry->buffer);
746 : }
747 100 : stepright = true;
748 :
749 100 : if (GinPageGetOpaque(page)->flags & GIN_DELETED)
750 0 : continue; /* page was deleted by concurrent vacuum */
751 :
752 : /*
753 : * The first item > advancePast might not be on this page, but
754 : * somewhere to the right, if the page was split, or a non-match from
755 : * another key in the query allowed us to skip some items from this
756 : * entry. Keep following the right-links until we re-find the correct
757 : * page.
758 : */
759 136 : if (!GinPageRightMost(page) &&
760 36 : ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
761 : {
762 : /*
763 : * the item we're looking is > the right bound of the page, so it
764 : * can't be on this page.
765 : */
766 0 : continue;
767 : }
768 :
769 100 : entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
770 :
771 1924 : for (i = 0; i < entry->nlist; i++)
772 : {
773 1918 : if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
774 : {
775 94 : entry->offset = i;
776 :
777 94 : if (GinPageRightMost(page))
778 : {
779 : /* after processing the copied items, we're done. */
780 58 : UnlockReleaseBuffer(entry->buffer);
781 58 : entry->buffer = InvalidBuffer;
782 : }
783 : else
784 36 : LockBuffer(entry->buffer, GIN_UNLOCK);
785 94 : return;
786 : }
787 : }
788 : }
789 : }
790 :
791 : #define gin_rand() pg_prng_double(&pg_global_prng_state)
792 : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
793 :
794 : /*
795 : * Sets entry->curItem to next heap item pointer > advancePast, for one entry
796 : * of one scan key, or sets entry->isFinished to true if there are no more.
797 : *
798 : * Item pointers are returned in ascending order.
799 : *
800 : * Note: this can return a "lossy page" item pointer, indicating that the
801 : * entry potentially matches all items on that heap page. However, it is
802 : * not allowed to return both a lossy page pointer and exact (regular)
803 : * item pointers for the same page. (Doing so would break the key-combination
804 : * logic in keyGetItem and scanGetItem; see comment in scanGetItem.) In the
805 : * current implementation this is guaranteed by the behavior of tidbitmaps.
806 : */
807 : static void
808 1088038 : entryGetItem(GinState *ginstate, GinScanEntry entry,
809 : ItemPointerData advancePast)
810 : {
811 : Assert(!entry->isFinished);
812 :
813 : Assert(!ItemPointerIsValid(&entry->curItem) ||
814 : ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
815 :
816 1088038 : if (entry->matchBitmap)
817 : {
818 : /* A bitmap result */
819 268052 : BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
820 268052 : OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
821 :
822 : for (;;)
823 : {
824 : /*
825 : * If we've exhausted all items on this block, move to next block
826 : * in the bitmap.
827 : */
828 272878 : while (entry->matchResult == NULL ||
829 272458 : (entry->matchResult->ntuples >= 0 &&
830 272458 : entry->offset >= entry->matchResult->ntuples) ||
831 535264 : entry->matchResult->blockno < advancePastBlk ||
832 267632 : (ItemPointerIsLossyPage(&advancePast) &&
833 0 : entry->matchResult->blockno == advancePastBlk))
834 : {
835 5246 : entry->matchResult = tbm_iterate(entry->matchIterator);
836 :
837 5246 : if (entry->matchResult == NULL)
838 : {
839 420 : ItemPointerSetInvalid(&entry->curItem);
840 420 : tbm_end_iterate(entry->matchIterator);
841 420 : entry->matchIterator = NULL;
842 420 : entry->isFinished = true;
843 420 : break;
844 : }
845 :
846 : /*
847 : * Reset counter to the beginning of entry->matchResult. Note:
848 : * entry->offset is still greater than matchResult->ntuples if
849 : * matchResult is lossy. So, on next call we will get next
850 : * result from TIDBitmap.
851 : */
852 4826 : entry->offset = 0;
853 : }
854 268052 : if (entry->isFinished)
855 420 : break;
856 :
857 : /*
858 : * We're now on the first page after advancePast which has any
859 : * items on it. If it's a lossy result, return that.
860 : */
861 267632 : if (entry->matchResult->ntuples < 0)
862 : {
863 0 : ItemPointerSetLossyPage(&entry->curItem,
864 : entry->matchResult->blockno);
865 :
866 : /*
867 : * We might as well fall out of the loop; we could not
868 : * estimate number of results on this page to support correct
869 : * reducing of result even if it's enabled.
870 : */
871 0 : break;
872 : }
873 :
874 : /*
875 : * Not a lossy page. Skip over any offsets <= advancePast, and
876 : * return that.
877 : */
878 267632 : if (entry->matchResult->blockno == advancePastBlk)
879 : {
880 : /*
881 : * First, do a quick check against the last offset on the
882 : * page. If that's > advancePast, so are all the other
883 : * offsets, so just go back to the top to get the next page.
884 : */
885 263226 : if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
886 : {
887 0 : entry->offset = entry->matchResult->ntuples;
888 0 : continue;
889 : }
890 :
891 : /* Otherwise scan to find the first item > advancePast */
892 263226 : while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
893 0 : entry->offset++;
894 : }
895 :
896 267632 : ItemPointerSet(&entry->curItem,
897 267632 : entry->matchResult->blockno,
898 267632 : entry->matchResult->offsets[entry->offset]);
899 267632 : entry->offset++;
900 :
901 : /* Done unless we need to reduce the result */
902 267632 : if (!entry->reduceResult || !dropItem(entry))
903 : break;
904 : }
905 : }
906 819986 : else if (!BufferIsValid(entry->buffer))
907 : {
908 : /*
909 : * A posting list from an entry tuple, or the last page of a posting
910 : * tree.
911 : */
912 : for (;;)
913 : {
914 393930 : if (entry->offset >= entry->nlist)
915 : {
916 3890 : ItemPointerSetInvalid(&entry->curItem);
917 3890 : entry->isFinished = true;
918 3890 : break;
919 : }
920 :
921 390040 : entry->curItem = entry->list[entry->offset++];
922 :
923 : /* If we're not past advancePast, keep scanning */
924 390040 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
925 66634 : continue;
926 :
927 : /* Done unless we need to reduce the result */
928 323406 : if (!entry->reduceResult || !dropItem(entry))
929 : break;
930 : }
931 : }
932 : else
933 : {
934 : /* A posting tree */
935 : for (;;)
936 : {
937 : /* If we've processed the current batch, load more items */
938 679178 : while (entry->offset >= entry->nlist)
939 : {
940 100 : entryLoadMoreItems(ginstate, entry, advancePast);
941 :
942 100 : if (entry->isFinished)
943 : {
944 6 : ItemPointerSetInvalid(&entry->curItem);
945 6 : return;
946 : }
947 : }
948 :
949 679078 : entry->curItem = entry->list[entry->offset++];
950 :
951 : /* If we're not past advancePast, keep scanning */
952 679078 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
953 47106 : continue;
954 :
955 : /* Done unless we need to reduce the result */
956 631972 : if (!entry->reduceResult || !dropItem(entry))
957 : break;
958 :
959 : /*
960 : * Advance advancePast (so that entryLoadMoreItems will load the
961 : * right data), and keep scanning
962 : */
963 119000 : advancePast = entry->curItem;
964 : }
965 : }
966 : }
967 :
968 : /*
969 : * Identify the "current" item among the input entry streams for this scan key
970 : * that is greater than advancePast, and test whether it passes the scan key
971 : * qual condition.
972 : *
973 : * The current item is the smallest curItem among the inputs. key->curItem
974 : * is set to that value. key->curItemMatches is set to indicate whether that
975 : * TID passes the consistentFn test. If so, key->recheckCurItem is set true
976 : * iff recheck is needed for this item pointer (including the case where the
977 : * item pointer is a lossy page pointer).
978 : *
979 : * If all entry streams are exhausted, sets key->isFinished to true.
980 : *
981 : * Item pointers must be returned in ascending order.
982 : *
983 : * Note: this can return a "lossy page" item pointer, indicating that the
984 : * key potentially matches all items on that heap page. However, it is
985 : * not allowed to return both a lossy page pointer and exact (regular)
986 : * item pointers for the same page. (Doing so would break the key-combination
987 : * logic in scanGetItem.)
988 : */
989 : static void
990 1002512 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
991 : ItemPointerData advancePast)
992 : {
993 : ItemPointerData minItem;
994 : ItemPointerData curPageLossy;
995 : uint32 i;
996 : bool haveLossyEntry;
997 : GinScanEntry entry;
998 : GinTernaryValue res;
999 : MemoryContext oldCtx;
1000 : bool allFinished;
1001 :
1002 : Assert(!key->isFinished);
1003 :
1004 : /*
1005 : * We might have already tested this item; if so, no need to repeat work.
1006 : * (Note: the ">" case can happen, if advancePast is exact but we
1007 : * previously had to set curItem to a lossy-page pointer.)
1008 : */
1009 1002512 : if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
1010 1608 : return;
1011 :
1012 : /*
1013 : * Find the minimum item > advancePast among the active entry streams.
1014 : *
1015 : * Note: a lossy-page entry is encoded by a ItemPointer with max value for
1016 : * offset (0xffff), so that it will sort after any exact entries for the
1017 : * same page. So we'll prefer to return exact pointers not lossy
1018 : * pointers, which is good.
1019 : */
1020 1002490 : ItemPointerSetMax(&minItem);
1021 1002490 : allFinished = true;
1022 2757792 : for (i = 0; i < key->nrequired; i++)
1023 : {
1024 1755302 : entry = key->requiredEntries[i];
1025 :
1026 1755302 : if (entry->isFinished)
1027 560142 : continue;
1028 :
1029 : /*
1030 : * Advance this stream if necessary.
1031 : *
1032 : * In particular, since entry->curItem was initialized with
1033 : * ItemPointerSetMin, this ensures we fetch the first item for each
1034 : * entry on the first call.
1035 : */
1036 1195160 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1037 : {
1038 1034282 : entryGetItem(ginstate, entry, advancePast);
1039 1034282 : if (entry->isFinished)
1040 4138 : continue;
1041 : }
1042 :
1043 1191022 : allFinished = false;
1044 1191022 : if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1045 1097672 : minItem = entry->curItem;
1046 : }
1047 :
1048 1002490 : if (allFinished && !key->excludeOnly)
1049 : {
1050 : /* all entries are finished */
1051 1586 : key->isFinished = true;
1052 1586 : return;
1053 : }
1054 :
1055 1000904 : if (!key->excludeOnly)
1056 : {
1057 : /*
1058 : * For a normal scan key, we now know there are no matches < minItem.
1059 : *
1060 : * If minItem is lossy, it means that there were no exact items on the
1061 : * page among requiredEntries, because lossy pointers sort after exact
1062 : * items. However, there might be exact items for the same page among
1063 : * additionalEntries, so we mustn't advance past them.
1064 : */
1065 996428 : if (ItemPointerIsLossyPage(&minItem))
1066 : {
1067 0 : if (GinItemPointerGetBlockNumber(&advancePast) <
1068 0 : GinItemPointerGetBlockNumber(&minItem))
1069 : {
1070 0 : ItemPointerSet(&advancePast,
1071 : GinItemPointerGetBlockNumber(&minItem),
1072 : InvalidOffsetNumber);
1073 : }
1074 : }
1075 : else
1076 : {
1077 : Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
1078 996428 : ItemPointerSet(&advancePast,
1079 : GinItemPointerGetBlockNumber(&minItem),
1080 996428 : OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
1081 : }
1082 : }
1083 : else
1084 : {
1085 : /*
1086 : * excludeOnly scan keys don't have any entries that are necessarily
1087 : * present in matching items. So, we consider the item just after
1088 : * advancePast.
1089 : */
1090 : Assert(key->nrequired == 0);
1091 4476 : ItemPointerSet(&minItem,
1092 : GinItemPointerGetBlockNumber(&advancePast),
1093 4476 : OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
1094 : }
1095 :
1096 : /*
1097 : * We might not have loaded all the entry streams for this TID yet. We
1098 : * could call the consistent function, passing MAYBE for those entries, to
1099 : * see if it can decide if this TID matches based on the information we
1100 : * have. But if the consistent-function is expensive, and cannot in fact
1101 : * decide with partial information, that could be a big loss. So, load all
1102 : * the additional entries, before calling the consistent function.
1103 : */
1104 1067564 : for (i = 0; i < key->nadditional; i++)
1105 : {
1106 66660 : entry = key->additionalEntries[i];
1107 :
1108 66660 : if (entry->isFinished)
1109 412 : continue;
1110 :
1111 66248 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1112 : {
1113 53756 : entryGetItem(ginstate, entry, advancePast);
1114 53756 : if (entry->isFinished)
1115 178 : continue;
1116 : }
1117 :
1118 : /*
1119 : * Normally, none of the items in additionalEntries can have a curItem
1120 : * larger than minItem. But if minItem is a lossy page, then there
1121 : * might be exact items on the same page among additionalEntries.
1122 : */
1123 66070 : if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1124 : {
1125 : Assert(ItemPointerIsLossyPage(&minItem));
1126 0 : minItem = entry->curItem;
1127 : }
1128 : }
1129 :
1130 : /*
1131 : * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
1132 : * and perform consistentFn test.
1133 : *
1134 : * Lossy-page entries pose a problem, since we don't know the correct
1135 : * entryRes state to pass to the consistentFn, and we also don't know what
1136 : * its combining logic will be (could be AND, OR, or even NOT). If the
1137 : * logic is OR then the consistentFn might succeed for all items in the
1138 : * lossy page even when none of the other entries match.
1139 : *
1140 : * Our strategy is to call the tri-state consistent function, with the
1141 : * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
1142 : * returns FALSE, none of the lossy items alone are enough for a match, so
1143 : * we don't need to return a lossy-page pointer. Otherwise, return a
1144 : * lossy-page pointer to indicate that the whole heap page must be
1145 : * checked. (On subsequent calls, we'll do nothing until minItem is past
1146 : * the page altogether, thus ensuring that we never return both regular
1147 : * and lossy pointers for the same page.)
1148 : *
1149 : * An exception is that it doesn't matter what we pass for lossy pointers
1150 : * in "hidden" entries, because the consistentFn's result can't depend on
1151 : * them. We could pass them as MAYBE as well, but if we're using the
1152 : * "shim" implementation of a tri-state consistent function (see
1153 : * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
1154 : * them as true.
1155 : *
1156 : * Note that only lossy-page entries pointing to the current item's page
1157 : * should trigger this processing; we might have future lossy pages in the
1158 : * entry array, but they aren't relevant yet.
1159 : */
1160 1000904 : key->curItem = minItem;
1161 1000904 : ItemPointerSetLossyPage(&curPageLossy,
1162 : GinItemPointerGetBlockNumber(&key->curItem));
1163 1000904 : haveLossyEntry = false;
1164 2817006 : for (i = 0; i < key->nentries; i++)
1165 : {
1166 1816102 : entry = key->scanEntry[i];
1167 3073194 : if (entry->isFinished == false &&
1168 1257092 : ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1169 : {
1170 0 : if (i < key->nuserentries)
1171 0 : key->entryRes[i] = GIN_MAYBE;
1172 : else
1173 0 : key->entryRes[i] = GIN_TRUE;
1174 0 : haveLossyEntry = true;
1175 : }
1176 : else
1177 1816102 : key->entryRes[i] = GIN_FALSE;
1178 : }
1179 :
1180 : /* prepare for calling consistentFn in temp context */
1181 1000904 : oldCtx = MemoryContextSwitchTo(tempCtx);
1182 :
1183 1000904 : if (haveLossyEntry)
1184 : {
1185 : /* Have lossy-page entries, so see if whole page matches */
1186 0 : res = key->triConsistentFn(key);
1187 :
1188 0 : if (res == GIN_TRUE || res == GIN_MAYBE)
1189 : {
1190 : /* Yes, so clean up ... */
1191 0 : MemoryContextSwitchTo(oldCtx);
1192 0 : MemoryContextReset(tempCtx);
1193 :
1194 : /* and return lossy pointer for whole page */
1195 0 : key->curItem = curPageLossy;
1196 0 : key->curItemMatches = true;
1197 0 : key->recheckCurItem = true;
1198 0 : return;
1199 : }
1200 : }
1201 :
1202 : /*
1203 : * At this point we know that we don't need to return a lossy whole-page
1204 : * pointer, but we might have matches for individual exact item pointers,
1205 : * possibly in combination with a lossy pointer. Pass lossy pointers as
1206 : * MAYBE to the ternary consistent function, to let it decide if this
1207 : * tuple satisfies the overall key, even though we don't know if the lossy
1208 : * entries match.
1209 : *
1210 : * Prepare entryRes array to be passed to consistentFn.
1211 : */
1212 2817006 : for (i = 0; i < key->nentries; i++)
1213 : {
1214 1816102 : entry = key->scanEntry[i];
1215 1816102 : if (entry->isFinished)
1216 559010 : key->entryRes[i] = GIN_FALSE;
1217 : #if 0
1218 :
1219 : /*
1220 : * This case can't currently happen, because we loaded all the entries
1221 : * for this item earlier.
1222 : */
1223 : else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1224 : key->entryRes[i] = GIN_MAYBE;
1225 : #endif
1226 1257092 : else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1227 0 : key->entryRes[i] = GIN_MAYBE;
1228 1257092 : else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
1229 1073446 : key->entryRes[i] = GIN_TRUE;
1230 : else
1231 183646 : key->entryRes[i] = GIN_FALSE;
1232 : }
1233 :
1234 1000904 : res = key->triConsistentFn(key);
1235 :
1236 1000904 : switch (res)
1237 : {
1238 865256 : case GIN_TRUE:
1239 865256 : key->curItemMatches = true;
1240 : /* triConsistentFn set recheckCurItem */
1241 865256 : break;
1242 :
1243 18026 : case GIN_FALSE:
1244 18026 : key->curItemMatches = false;
1245 18026 : break;
1246 :
1247 117622 : case GIN_MAYBE:
1248 117622 : key->curItemMatches = true;
1249 117622 : key->recheckCurItem = true;
1250 117622 : break;
1251 :
1252 0 : default:
1253 :
1254 : /*
1255 : * the 'default' case shouldn't happen, but if the consistent
1256 : * function returns something bogus, this is the safe result
1257 : */
1258 0 : key->curItemMatches = true;
1259 0 : key->recheckCurItem = true;
1260 0 : break;
1261 : }
1262 :
1263 : /*
1264 : * We have a tuple, and we know if it matches or not. If it's a non-match,
1265 : * we could continue to find the next matching tuple, but let's break out
1266 : * and give scanGetItem a chance to advance the other keys. They might be
1267 : * able to skip past to a much higher TID, allowing us to save work.
1268 : */
1269 :
1270 : /* clean up after consistentFn calls */
1271 1000904 : MemoryContextSwitchTo(oldCtx);
1272 1000904 : MemoryContextReset(tempCtx);
1273 : }
1274 :
1275 : /*
1276 : * Get next heap item pointer (after advancePast) from scan.
1277 : * Returns true if anything found.
1278 : * On success, *item and *recheck are set.
1279 : *
1280 : * Note: this is very nearly the same logic as in keyGetItem(), except
1281 : * that we know the keys are to be combined with AND logic, whereas in
1282 : * keyGetItem() the combination logic is known only to the consistentFn.
1283 : */
1284 : static bool
1285 979892 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
1286 : ItemPointerData *item, bool *recheck)
1287 : {
1288 979892 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1289 : uint32 i;
1290 : bool match;
1291 :
1292 : /*----------
1293 : * Advance the scan keys in lock-step, until we find an item that matches
1294 : * all the keys. If any key reports isFinished, meaning its subset of the
1295 : * entries is exhausted, we can stop. Otherwise, set *item to the next
1296 : * matching item.
1297 : *
1298 : * This logic works only if a keyGetItem stream can never contain both
1299 : * exact and lossy pointers for the same page. Else we could have a
1300 : * case like
1301 : *
1302 : * stream 1 stream 2
1303 : * ... ...
1304 : * 42/6 42/7
1305 : * 50/1 42/0xffff
1306 : * ... ...
1307 : *
1308 : * We would conclude that 42/6 is not a match and advance stream 1,
1309 : * thus never detecting the match to the lossy pointer in stream 2.
1310 : * (keyGetItem has a similar problem versus entryGetItem.)
1311 : *----------
1312 : */
1313 : do
1314 : {
1315 997976 : ItemPointerSetMin(item);
1316 997976 : match = true;
1317 1980876 : for (i = 0; i < so->nkeys && match; i++)
1318 : {
1319 1002512 : GinScanKey key = so->keys + i;
1320 :
1321 : /*
1322 : * If we're considering a lossy page, skip excludeOnly keys. They
1323 : * can't exclude the whole page anyway.
1324 : */
1325 1002512 : if (ItemPointerIsLossyPage(item) && key->excludeOnly)
1326 : {
1327 : /*
1328 : * ginNewScanKey() should never mark the first key as
1329 : * excludeOnly.
1330 : */
1331 : Assert(i > 0);
1332 0 : continue;
1333 : }
1334 :
1335 : /* Fetch the next item for this key that is > advancePast. */
1336 1002512 : keyGetItem(&so->ginstate, so->tempCtx, key, advancePast);
1337 :
1338 1002512 : if (key->isFinished)
1339 1586 : return false;
1340 :
1341 : /*
1342 : * If it's not a match, we can immediately conclude that nothing
1343 : * <= this item matches, without checking the rest of the keys.
1344 : */
1345 1000926 : if (!key->curItemMatches)
1346 : {
1347 18026 : advancePast = key->curItem;
1348 18026 : match = false;
1349 18026 : break;
1350 : }
1351 :
1352 : /*
1353 : * It's a match. We can conclude that nothing < matches, so the
1354 : * other key streams can skip to this item.
1355 : *
1356 : * Beware of lossy pointers, though; from a lossy pointer, we can
1357 : * only conclude that nothing smaller than this *block* matches.
1358 : */
1359 982900 : if (ItemPointerIsLossyPage(&key->curItem))
1360 : {
1361 0 : if (GinItemPointerGetBlockNumber(&advancePast) <
1362 0 : GinItemPointerGetBlockNumber(&key->curItem))
1363 : {
1364 0 : ItemPointerSet(&advancePast,
1365 0 : GinItemPointerGetBlockNumber(&key->curItem),
1366 : InvalidOffsetNumber);
1367 : }
1368 : }
1369 : else
1370 : {
1371 : Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
1372 982900 : ItemPointerSet(&advancePast,
1373 982900 : GinItemPointerGetBlockNumber(&key->curItem),
1374 982900 : OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
1375 : }
1376 :
1377 : /*
1378 : * If this is the first key, remember this location as a potential
1379 : * match, and proceed to check the rest of the keys.
1380 : *
1381 : * Otherwise, check if this is the same item that we checked the
1382 : * previous keys for (or a lossy pointer for the same page). If
1383 : * not, loop back to check the previous keys for this item (we
1384 : * will check this key again too, but keyGetItem returns quickly
1385 : * for that)
1386 : */
1387 982900 : if (i == 0)
1388 : {
1389 978470 : *item = key->curItem;
1390 : }
1391 : else
1392 : {
1393 8860 : if (ItemPointerIsLossyPage(&key->curItem) ||
1394 4430 : ItemPointerIsLossyPage(item))
1395 : {
1396 : Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
1397 0 : match = (GinItemPointerGetBlockNumber(&key->curItem) ==
1398 0 : GinItemPointerGetBlockNumber(item));
1399 : }
1400 : else
1401 : {
1402 : Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
1403 4430 : match = (ginCompareItemPointers(&key->curItem, item) == 0);
1404 : }
1405 : }
1406 : }
1407 996390 : } while (!match);
1408 :
1409 : Assert(!ItemPointerIsMin(item));
1410 :
1411 : /*
1412 : * Now *item contains the first ItemPointer after previous result that
1413 : * satisfied all the keys for that exact TID, or a lossy reference to the
1414 : * same page.
1415 : *
1416 : * We must return recheck = true if any of the keys are marked recheck.
1417 : */
1418 978306 : *recheck = false;
1419 1826316 : for (i = 0; i < so->nkeys; i++)
1420 : {
1421 978678 : GinScanKey key = so->keys + i;
1422 :
1423 978678 : if (key->recheckCurItem)
1424 : {
1425 130668 : *recheck = true;
1426 130668 : break;
1427 : }
1428 : }
1429 :
1430 978306 : return true;
1431 : }
1432 :
1433 :
1434 : /*
1435 : * Functions for scanning the pending list
1436 : */
1437 :
1438 :
1439 : /*
1440 : * Get ItemPointer of next heap row to be checked from pending list.
1441 : * Returns false if there are no more. On pages with several heap rows
1442 : * it returns each row separately, on page with part of heap row returns
1443 : * per page data. pos->firstOffset and pos->lastOffset are set to identify
1444 : * the range of pending-list tuples belonging to this heap row.
1445 : *
1446 : * The pendingBuffer is presumed pinned and share-locked on entry, and is
1447 : * pinned and share-locked on success exit. On failure exit it's released.
1448 : */
1449 : static bool
1450 1614 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
1451 : {
1452 : OffsetNumber maxoff;
1453 : Page page;
1454 : IndexTuple itup;
1455 :
1456 1614 : ItemPointerSetInvalid(&pos->item);
1457 : for (;;)
1458 : {
1459 1614 : page = BufferGetPage(pos->pendingBuffer);
1460 :
1461 1614 : maxoff = PageGetMaxOffsetNumber(page);
1462 1614 : if (pos->firstOffset > maxoff)
1463 : {
1464 166 : BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
1465 :
1466 166 : if (blkno == InvalidBlockNumber)
1467 : {
1468 166 : UnlockReleaseBuffer(pos->pendingBuffer);
1469 166 : pos->pendingBuffer = InvalidBuffer;
1470 :
1471 166 : return false;
1472 : }
1473 : else
1474 : {
1475 : /*
1476 : * Here we must prevent deletion of next page by insertcleanup
1477 : * process, which may be trying to obtain exclusive lock on
1478 : * current page. So, we lock next page before releasing the
1479 : * current one
1480 : */
1481 0 : Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
1482 :
1483 0 : LockBuffer(tmpbuf, GIN_SHARE);
1484 0 : UnlockReleaseBuffer(pos->pendingBuffer);
1485 :
1486 0 : pos->pendingBuffer = tmpbuf;
1487 0 : pos->firstOffset = FirstOffsetNumber;
1488 : }
1489 : }
1490 : else
1491 : {
1492 1448 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
1493 1448 : pos->item = itup->t_tid;
1494 1448 : if (GinPageHasFullRow(page))
1495 : {
1496 : /*
1497 : * find itempointer to the next row
1498 : */
1499 3354 : for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
1500 : {
1501 3188 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
1502 3188 : if (!ItemPointerEquals(&pos->item, &itup->t_tid))
1503 1282 : break;
1504 : }
1505 : }
1506 : else
1507 : {
1508 : /*
1509 : * All itempointers are the same on this page
1510 : */
1511 0 : pos->lastOffset = maxoff + 1;
1512 : }
1513 :
1514 : /*
1515 : * Now pos->firstOffset points to the first tuple of current heap
1516 : * row, pos->lastOffset points to the first tuple of next heap row
1517 : * (or to the end of page)
1518 : */
1519 1448 : break;
1520 : }
1521 : }
1522 :
1523 1448 : return true;
1524 : }
1525 :
1526 : /*
1527 : * Scan pending-list page from current tuple (off) up till the first of:
1528 : * - match is found (then returns true)
1529 : * - no later match is possible
1530 : * - tuple's attribute number is not equal to entry's attrnum
1531 : * - reach end of page
1532 : *
1533 : * datum[]/category[]/datumExtracted[] arrays are used to cache the results
1534 : * of gintuple_get_key() on the current page.
1535 : */
1536 : static bool
1537 0 : matchPartialInPendingList(GinState *ginstate, Page page,
1538 : OffsetNumber off, OffsetNumber maxoff,
1539 : GinScanEntry entry,
1540 : Datum *datum, GinNullCategory *category,
1541 : bool *datumExtracted)
1542 : {
1543 : IndexTuple itup;
1544 : int32 cmp;
1545 :
1546 : /* Partial match to a null is not possible */
1547 0 : if (entry->queryCategory != GIN_CAT_NORM_KEY)
1548 0 : return false;
1549 :
1550 0 : while (off < maxoff)
1551 : {
1552 0 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
1553 :
1554 0 : if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
1555 0 : return false;
1556 :
1557 0 : if (datumExtracted[off - 1] == false)
1558 : {
1559 0 : datum[off - 1] = gintuple_get_key(ginstate, itup,
1560 0 : &category[off - 1]);
1561 0 : datumExtracted[off - 1] = true;
1562 : }
1563 :
1564 : /* Once we hit nulls, no further match is possible */
1565 0 : if (category[off - 1] != GIN_CAT_NORM_KEY)
1566 0 : return false;
1567 :
1568 : /*----------
1569 : * Check partial match.
1570 : * case cmp == 0 => match
1571 : * case cmp > 0 => not match and end scan (no later match possible)
1572 : * case cmp < 0 => not match and continue scan
1573 : *----------
1574 : */
1575 0 : cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
1576 0 : ginstate->supportCollation[entry->attnum - 1],
1577 : entry->queryKey,
1578 0 : datum[off - 1],
1579 0 : UInt16GetDatum(entry->strategy),
1580 0 : PointerGetDatum(entry->extra_data)));
1581 0 : if (cmp == 0)
1582 0 : return true;
1583 0 : else if (cmp > 0)
1584 0 : return false;
1585 :
1586 0 : off++;
1587 : }
1588 :
1589 0 : return false;
1590 : }
1591 :
1592 : /*
1593 : * Set up the entryRes array for each key by looking at
1594 : * every entry for current heap row in pending list.
1595 : *
1596 : * Returns true if each scan key has at least one entryRes match.
1597 : * This corresponds to the situations where the normal index search will
1598 : * try to apply the key's consistentFn. (A tuple not meeting that requirement
1599 : * cannot be returned by the normal search since no entry stream will
1600 : * source its TID.)
1601 : *
1602 : * The pendingBuffer is presumed pinned and share-locked on entry.
1603 : */
1604 : static bool
1605 1448 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
1606 : {
1607 1448 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1608 : OffsetNumber attrnum;
1609 : Page page;
1610 : IndexTuple itup;
1611 : int i,
1612 : j;
1613 :
1614 : /*
1615 : * Reset all entryRes and hasMatchKey flags
1616 : */
1617 3924 : for (i = 0; i < so->nkeys; i++)
1618 : {
1619 2476 : GinScanKey key = so->keys + i;
1620 :
1621 2476 : memset(key->entryRes, GIN_FALSE, key->nentries);
1622 : }
1623 1448 : memset(pos->hasMatchKey, false, so->nkeys);
1624 :
1625 : /*
1626 : * Outer loop iterates over multiple pending-list pages when a single heap
1627 : * row has entries spanning those pages.
1628 : */
1629 : for (;;)
1630 0 : {
1631 : Datum datum[BLCKSZ / sizeof(IndexTupleData)];
1632 : GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
1633 : bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
1634 :
1635 : Assert(pos->lastOffset > pos->firstOffset);
1636 1448 : memset(datumExtracted + pos->firstOffset - 1, 0,
1637 1448 : sizeof(bool) * (pos->lastOffset - pos->firstOffset));
1638 :
1639 1448 : page = BufferGetPage(pos->pendingBuffer);
1640 :
1641 3924 : for (i = 0; i < so->nkeys; i++)
1642 : {
1643 2476 : GinScanKey key = so->keys + i;
1644 :
1645 4776 : for (j = 0; j < key->nentries; j++)
1646 : {
1647 2300 : GinScanEntry entry = key->scanEntry[j];
1648 2300 : OffsetNumber StopLow = pos->firstOffset,
1649 2300 : StopHigh = pos->lastOffset,
1650 : StopMiddle;
1651 :
1652 : /* If already matched on earlier page, do no extra work */
1653 2300 : if (key->entryRes[j])
1654 0 : continue;
1655 :
1656 : /*
1657 : * Interesting tuples are from pos->firstOffset to
1658 : * pos->lastOffset and they are ordered by (attnum, Datum) as
1659 : * it's done in entry tree. So we can use binary search to
1660 : * avoid linear scanning.
1661 : */
1662 5108 : while (StopLow < StopHigh)
1663 : {
1664 : int res;
1665 :
1666 4010 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1667 :
1668 4010 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
1669 :
1670 4010 : attrnum = gintuple_get_attrnum(&so->ginstate, itup);
1671 :
1672 4010 : if (key->attnum < attrnum)
1673 : {
1674 798 : StopHigh = StopMiddle;
1675 798 : continue;
1676 : }
1677 3212 : if (key->attnum > attrnum)
1678 : {
1679 660 : StopLow = StopMiddle + 1;
1680 660 : continue;
1681 : }
1682 :
1683 2552 : if (datumExtracted[StopMiddle - 1] == false)
1684 : {
1685 4952 : datum[StopMiddle - 1] =
1686 2476 : gintuple_get_key(&so->ginstate, itup,
1687 2476 : &category[StopMiddle - 1]);
1688 2476 : datumExtracted[StopMiddle - 1] = true;
1689 : }
1690 :
1691 2552 : if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
1692 : {
1693 : /* special behavior depending on searchMode */
1694 1084 : if (entry->searchMode == GIN_SEARCH_MODE_ALL)
1695 : {
1696 : /* match anything except NULL_ITEM */
1697 1084 : if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
1698 378 : res = -1;
1699 : else
1700 706 : res = 0;
1701 : }
1702 : else
1703 : {
1704 : /* match everything */
1705 0 : res = 0;
1706 : }
1707 : }
1708 : else
1709 : {
1710 1468 : res = ginCompareEntries(&so->ginstate,
1711 1468 : entry->attnum,
1712 : entry->queryKey,
1713 1468 : entry->queryCategory,
1714 1468 : datum[StopMiddle - 1],
1715 1468 : category[StopMiddle - 1]);
1716 : }
1717 :
1718 2552 : if (res == 0)
1719 : {
1720 : /*
1721 : * Found exact match (there can be only one, except in
1722 : * EMPTY_QUERY mode).
1723 : *
1724 : * If doing partial match, scan forward from here to
1725 : * end of page to check for matches.
1726 : *
1727 : * See comment above about tuple's ordering.
1728 : */
1729 1202 : if (entry->isPartialMatch)
1730 0 : key->entryRes[j] =
1731 0 : matchPartialInPendingList(&so->ginstate,
1732 : page,
1733 : StopMiddle,
1734 0 : pos->lastOffset,
1735 : entry,
1736 : datum,
1737 : category,
1738 : datumExtracted);
1739 : else
1740 1202 : key->entryRes[j] = true;
1741 :
1742 : /* done with binary search */
1743 1202 : break;
1744 : }
1745 1350 : else if (res < 0)
1746 1314 : StopHigh = StopMiddle;
1747 : else
1748 36 : StopLow = StopMiddle + 1;
1749 : }
1750 :
1751 2300 : if (StopLow >= StopHigh && entry->isPartialMatch)
1752 : {
1753 : /*
1754 : * No exact match on this page. If doing partial match,
1755 : * scan from the first tuple greater than target value to
1756 : * end of page. Note that since we don't remember whether
1757 : * the comparePartialFn told us to stop early on a
1758 : * previous page, we will uselessly apply comparePartialFn
1759 : * to the first tuple on each subsequent page.
1760 : */
1761 0 : key->entryRes[j] =
1762 0 : matchPartialInPendingList(&so->ginstate,
1763 : page,
1764 : StopHigh,
1765 0 : pos->lastOffset,
1766 : entry,
1767 : datum,
1768 : category,
1769 : datumExtracted);
1770 : }
1771 :
1772 2300 : pos->hasMatchKey[i] |= key->entryRes[j];
1773 : }
1774 : }
1775 :
1776 : /* Advance firstOffset over the scanned tuples */
1777 1448 : pos->firstOffset = pos->lastOffset;
1778 :
1779 1448 : if (GinPageHasFullRow(page))
1780 : {
1781 : /*
1782 : * We have examined all pending entries for the current heap row.
1783 : * Break out of loop over pages.
1784 : */
1785 1448 : break;
1786 : }
1787 : else
1788 : {
1789 : /*
1790 : * Advance to next page of pending entries for the current heap
1791 : * row. Complain if there isn't one.
1792 : */
1793 0 : ItemPointerData item = pos->item;
1794 :
1795 0 : if (scanGetCandidate(scan, pos) == false ||
1796 0 : !ItemPointerEquals(&pos->item, &item))
1797 0 : elog(ERROR, "could not find additional pending pages for same heap tuple");
1798 : }
1799 : }
1800 :
1801 : /*
1802 : * All scan keys except excludeOnly require at least one entry to match.
1803 : * excludeOnly keys are an exception, because their implied
1804 : * GIN_CAT_EMPTY_QUERY scanEntry always matches. So return "true" if all
1805 : * non-excludeOnly scan keys have at least one match.
1806 : */
1807 2538 : for (i = 0; i < so->nkeys; i++)
1808 : {
1809 1984 : if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
1810 894 : return false;
1811 : }
1812 :
1813 554 : return true;
1814 : }
1815 :
1816 : /*
1817 : * Collect all matched rows from pending list into bitmap.
1818 : */
1819 : static void
1820 1586 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
1821 : {
1822 1586 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1823 : MemoryContext oldCtx;
1824 : bool recheck,
1825 : match;
1826 : int i;
1827 : pendingPosition pos;
1828 1586 : Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
1829 : Page page;
1830 : BlockNumber blkno;
1831 :
1832 1586 : *ntids = 0;
1833 :
1834 : /*
1835 : * Acquire predicate lock on the metapage, to conflict with any fastupdate
1836 : * insertions.
1837 : */
1838 1586 : PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
1839 :
1840 1586 : LockBuffer(metabuffer, GIN_SHARE);
1841 1586 : page = BufferGetPage(metabuffer);
1842 1586 : blkno = GinPageGetMeta(page)->head;
1843 :
1844 : /*
1845 : * fetch head of list before unlocking metapage. head page must be pinned
1846 : * to prevent deletion by vacuum process
1847 : */
1848 1586 : if (blkno == InvalidBlockNumber)
1849 : {
1850 : /* No pending list, so proceed with normal scan */
1851 1420 : UnlockReleaseBuffer(metabuffer);
1852 1420 : return;
1853 : }
1854 :
1855 166 : pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
1856 166 : LockBuffer(pos.pendingBuffer, GIN_SHARE);
1857 166 : pos.firstOffset = FirstOffsetNumber;
1858 166 : UnlockReleaseBuffer(metabuffer);
1859 166 : pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
1860 :
1861 : /*
1862 : * loop for each heap row. scanGetCandidate returns full row or row's
1863 : * tuples from first page.
1864 : */
1865 1614 : while (scanGetCandidate(scan, &pos))
1866 : {
1867 : /*
1868 : * Check entries in tuple and set up entryRes array.
1869 : *
1870 : * If pending tuples belonging to the current heap row are spread
1871 : * across several pages, collectMatchesForHeapRow will read all of
1872 : * those pages.
1873 : */
1874 1448 : if (!collectMatchesForHeapRow(scan, &pos))
1875 894 : continue;
1876 :
1877 : /*
1878 : * Matching of entries of one row is finished, so check row using
1879 : * consistent functions.
1880 : */
1881 554 : oldCtx = MemoryContextSwitchTo(so->tempCtx);
1882 554 : recheck = false;
1883 554 : match = true;
1884 :
1885 1404 : for (i = 0; i < so->nkeys; i++)
1886 : {
1887 850 : GinScanKey key = so->keys + i;
1888 :
1889 850 : if (!key->boolConsistentFn(key))
1890 : {
1891 0 : match = false;
1892 0 : break;
1893 : }
1894 850 : recheck |= key->recheckCurItem;
1895 : }
1896 :
1897 554 : MemoryContextSwitchTo(oldCtx);
1898 554 : MemoryContextReset(so->tempCtx);
1899 :
1900 554 : if (match)
1901 : {
1902 554 : tbm_add_tuples(tbm, &pos.item, 1, recheck);
1903 554 : (*ntids)++;
1904 : }
1905 : }
1906 :
1907 166 : pfree(pos.hasMatchKey);
1908 : }
1909 :
1910 :
1911 : #define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes )
1912 :
1913 : int64
1914 1598 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
1915 : {
1916 1598 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1917 : int64 ntids;
1918 : ItemPointerData iptr;
1919 : bool recheck;
1920 :
1921 : /*
1922 : * Set up the scan keys, and check for unsatisfiable query.
1923 : */
1924 1598 : ginFreeScanKeys(so); /* there should be no keys yet, but just to be
1925 : * sure */
1926 1598 : ginNewScanKey(scan);
1927 :
1928 1598 : if (GinIsVoidRes(scan))
1929 12 : return 0;
1930 :
1931 1586 : ntids = 0;
1932 :
1933 : /*
1934 : * First, scan the pending list and collect any matching entries into the
1935 : * bitmap. After we scan a pending item, some other backend could post it
1936 : * into the main index, and so we might visit it a second time during the
1937 : * main scan. This is okay because we'll just re-set the same bit in the
1938 : * bitmap. (The possibility of duplicate visits is a major reason why GIN
1939 : * can't support the amgettuple API, however.) Note that it would not do
1940 : * to scan the main index before the pending list, since concurrent
1941 : * cleanup could then make us miss entries entirely.
1942 : */
1943 1586 : scanPendingInsert(scan, tbm, &ntids);
1944 :
1945 : /*
1946 : * Now scan the main index.
1947 : */
1948 1586 : startScan(scan);
1949 :
1950 1586 : ItemPointerSetMin(&iptr);
1951 :
1952 : for (;;)
1953 : {
1954 979892 : CHECK_FOR_INTERRUPTS();
1955 :
1956 979892 : if (!scanGetItem(scan, iptr, &iptr, &recheck))
1957 1586 : break;
1958 :
1959 978306 : if (ItemPointerIsLossyPage(&iptr))
1960 0 : tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
1961 : else
1962 978306 : tbm_add_tuples(tbm, &iptr, 1, recheck);
1963 978306 : ntids++;
1964 : }
1965 :
1966 1586 : return ntids;
1967 : }
|