Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginget.c
4 : * fetch tuples from a GIN scan.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginget.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/relscan.h"
19 : #include "common/pg_prng.h"
20 : #include "miscadmin.h"
21 : #include "storage/predicate.h"
22 : #include "utils/datum.h"
23 : #include "utils/memutils.h"
24 : #include "utils/rel.h"
25 :
26 : /* GUC parameter */
27 : int GinFuzzySearchLimit = 0;
28 :
29 : typedef struct pendingPosition
30 : {
31 : Buffer pendingBuffer;
32 : OffsetNumber firstOffset;
33 : OffsetNumber lastOffset;
34 : ItemPointerData item;
35 : bool *hasMatchKey;
36 : } pendingPosition;
37 :
38 :
39 : /*
40 : * Goes to the next page if current offset is outside of bounds
41 : */
42 : static bool
43 407620 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
44 : {
45 407620 : Page page = BufferGetPage(stack->buffer);
46 :
47 407620 : if (stack->off > PageGetMaxOffsetNumber(page))
48 : {
49 : /*
50 : * We scanned the whole page, so we should take right page
51 : */
52 3790 : if (GinPageRightMost(page))
53 366 : return false; /* no more pages */
54 :
55 3424 : stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
56 3424 : stack->blkno = BufferGetBlockNumber(stack->buffer);
57 3424 : stack->off = FirstOffsetNumber;
58 3424 : PredicateLockPage(btree->index, stack->blkno, snapshot);
59 : }
60 :
61 407254 : return true;
62 : }
63 :
64 : /*
65 : * Scan all pages of a posting tree and save all its heap ItemPointers
66 : * in scanEntry->matchBitmap
67 : */
68 : static void
69 12 : scanPostingTree(Relation index, GinScanEntry scanEntry,
70 : BlockNumber rootPostingTree)
71 : {
72 : GinBtreeData btree;
73 : GinBtreeStack *stack;
74 : Buffer buffer;
75 : Page page;
76 :
77 : /* Descend to the leftmost leaf page */
78 12 : stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
79 12 : buffer = stack->buffer;
80 :
81 12 : IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
82 :
83 12 : freeGinBtreeStack(stack);
84 :
85 : /*
86 : * Loop iterates through all leaf pages of posting tree
87 : */
88 : for (;;)
89 : {
90 30 : page = BufferGetPage(buffer);
91 30 : if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
92 : {
93 30 : int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
94 :
95 30 : scanEntry->predictNumberResult += n;
96 : }
97 :
98 30 : if (GinPageRightMost(page))
99 12 : break; /* no more pages */
100 :
101 18 : buffer = ginStepRight(buffer, index, GIN_SHARE);
102 : }
103 :
104 12 : UnlockReleaseBuffer(buffer);
105 12 : }
106 :
107 : /*
108 : * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
109 : * match the search entry. This supports three different match modes:
110 : *
111 : * 1. Partial-match support: scan from current point until the
112 : * comparePartialFn says we're done.
113 : * 2. SEARCH_MODE_ALL: scan from current point (which should be first
114 : * key for the current attnum) until we hit null items or end of attnum
115 : * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
116 : * key for the current attnum) until we hit end of attnum
117 : *
118 : * Returns true if done, false if it's necessary to restart scan from scratch
119 : */
120 : static bool
121 534 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
122 : GinScanEntry scanEntry, Snapshot snapshot)
123 : {
124 : OffsetNumber attnum;
125 : CompactAttribute *attr;
126 :
127 : /* Initialize empty bitmap result */
128 534 : scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
129 :
130 : /* Null query cannot partial-match anything */
131 534 : if (scanEntry->isPartialMatch &&
132 252 : scanEntry->queryCategory != GIN_CAT_NORM_KEY)
133 0 : return true;
134 :
135 : /* Locate tupdesc entry for key column (for attbyval/attlen data) */
136 534 : attnum = scanEntry->attnum;
137 534 : attr = TupleDescCompactAttr(btree->ginstate->origTupdesc, attnum - 1);
138 :
139 : /*
140 : * Predicate lock entry leaf page, following pages will be locked by
141 : * moveRightIfItNeeded()
142 : */
143 534 : PredicateLockPage(btree->index,
144 : BufferGetBlockNumber(stack->buffer),
145 : snapshot);
146 :
147 : for (;;)
148 407074 : {
149 : Page page;
150 : IndexTuple itup;
151 : Datum idatum;
152 : GinNullCategory icategory;
153 :
154 : /*
155 : * stack->off points to the interested entry, buffer is already locked
156 : */
157 407608 : if (moveRightIfItNeeded(btree, stack, snapshot) == false)
158 534 : return true;
159 :
160 407242 : page = BufferGetPage(stack->buffer);
161 407242 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
162 :
163 : /*
164 : * If tuple stores another attribute then stop scan
165 : */
166 407242 : if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
167 0 : return true;
168 :
169 : /* Safe to fetch attribute value */
170 407242 : idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
171 :
172 : /*
173 : * Check for appropriate scan stop conditions
174 : */
175 407242 : if (scanEntry->isPartialMatch)
176 : {
177 : int32 cmp;
178 :
179 : /*
180 : * In partial match, stop scan at any null (including
181 : * placeholders); partial matches never match nulls
182 : */
183 1332 : if (icategory != GIN_CAT_NORM_KEY)
184 10 : return true;
185 :
186 : /*----------
187 : * Check of partial match.
188 : * case cmp == 0 => match
189 : * case cmp > 0 => not match and finish scan
190 : * case cmp < 0 => not match and continue scan
191 : *----------
192 : */
193 1322 : cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
194 1322 : btree->ginstate->supportCollation[attnum - 1],
195 : scanEntry->queryKey,
196 : idatum,
197 1322 : UInt16GetDatum(scanEntry->strategy),
198 1322 : PointerGetDatum(scanEntry->extra_data)));
199 :
200 1322 : if (cmp > 0)
201 130 : return true;
202 1192 : else if (cmp < 0)
203 : {
204 60 : stack->off++;
205 60 : continue;
206 : }
207 : }
208 405910 : else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
209 : {
210 : /*
211 : * In ALL mode, we are not interested in null items, so we can
212 : * stop if we get to a null-item placeholder (which will be the
213 : * last entry for a given attnum). We do want to include NULL_KEY
214 : * and EMPTY_ITEM entries, though.
215 : */
216 405910 : if (icategory == GIN_CAT_NULL_ITEM)
217 28 : return true;
218 : }
219 :
220 : /*
221 : * OK, we want to return the TIDs listed in this entry.
222 : */
223 407014 : if (GinIsPostingTree(itup))
224 : {
225 12 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
226 :
227 : /*
228 : * We should unlock current page (but not unpin) during tree scan
229 : * to prevent deadlock with vacuum processes.
230 : *
231 : * We save current entry value (idatum) to be able to re-find our
232 : * tuple after re-locking
233 : */
234 12 : if (icategory == GIN_CAT_NORM_KEY)
235 12 : idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
236 :
237 12 : LockBuffer(stack->buffer, GIN_UNLOCK);
238 :
239 : /*
240 : * Acquire predicate lock on the posting tree. We already hold a
241 : * lock on the entry page, but insertions to the posting tree
242 : * don't check for conflicts on that level.
243 : */
244 12 : PredicateLockPage(btree->index, rootPostingTree, snapshot);
245 :
246 : /* Collect all the TIDs in this entry's posting tree */
247 12 : scanPostingTree(btree->index, scanEntry, rootPostingTree);
248 :
249 : /*
250 : * We lock again the entry page and while it was unlocked insert
251 : * might have occurred, so we need to re-find our position.
252 : */
253 12 : LockBuffer(stack->buffer, GIN_SHARE);
254 12 : page = BufferGetPage(stack->buffer);
255 12 : if (!GinPageIsLeaf(page))
256 : {
257 : /*
258 : * Root page becomes non-leaf while we unlock it. We will
259 : * start again, this situation doesn't occur often - root can
260 : * became a non-leaf only once per life of index.
261 : */
262 0 : return false;
263 : }
264 :
265 : /* Search forward to re-find idatum */
266 : for (;;)
267 : {
268 12 : if (moveRightIfItNeeded(btree, stack, snapshot) == false)
269 0 : ereport(ERROR,
270 : (errcode(ERRCODE_INTERNAL_ERROR),
271 : errmsg("failed to re-find tuple within index \"%s\"",
272 : RelationGetRelationName(btree->index))));
273 :
274 12 : page = BufferGetPage(stack->buffer);
275 12 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
276 :
277 12 : if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
278 : {
279 : Datum newDatum;
280 : GinNullCategory newCategory;
281 :
282 12 : newDatum = gintuple_get_key(btree->ginstate, itup,
283 : &newCategory);
284 :
285 12 : if (ginCompareEntries(btree->ginstate, attnum,
286 : newDatum, newCategory,
287 : idatum, icategory) == 0)
288 12 : break; /* Found! */
289 : }
290 :
291 0 : stack->off++;
292 : }
293 :
294 12 : if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
295 0 : pfree(DatumGetPointer(idatum));
296 : }
297 : else
298 : {
299 : ItemPointer ipd;
300 : int nipd;
301 :
302 407002 : ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
303 407002 : tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
304 407002 : scanEntry->predictNumberResult += GinGetNPosting(itup);
305 407002 : pfree(ipd);
306 : }
307 :
308 : /*
309 : * Done with this entry, go to the next
310 : */
311 407014 : stack->off++;
312 : }
313 : }
314 :
315 : /*
316 : * Start* functions setup beginning state of searches: finds correct buffer and pins it.
317 : */
318 : static void
319 6692 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
320 : {
321 : GinBtreeData btreeEntry;
322 : GinBtreeStack *stackEntry;
323 : Page page;
324 : bool needUnlock;
325 :
326 6692 : restartScanEntry:
327 6692 : entry->buffer = InvalidBuffer;
328 6692 : ItemPointerSetMin(&entry->curItem);
329 6692 : entry->offset = InvalidOffsetNumber;
330 6692 : if (entry->list)
331 0 : pfree(entry->list);
332 6692 : entry->list = NULL;
333 6692 : entry->nlist = 0;
334 6692 : entry->matchBitmap = NULL;
335 6692 : entry->matchResult = NULL;
336 6692 : entry->reduceResult = false;
337 6692 : entry->predictNumberResult = 0;
338 :
339 : /*
340 : * we should find entry, and begin scan of posting tree or just store
341 : * posting list in memory
342 : */
343 6692 : ginPrepareEntryScan(&btreeEntry, entry->attnum,
344 6692 : entry->queryKey, entry->queryCategory,
345 : ginstate);
346 6692 : stackEntry = ginFindLeafPage(&btreeEntry, true, false);
347 6692 : page = BufferGetPage(stackEntry->buffer);
348 :
349 : /* ginFindLeafPage() will have already checked snapshot age. */
350 6692 : needUnlock = true;
351 :
352 6692 : entry->isFinished = true;
353 :
354 6692 : if (entry->isPartialMatch ||
355 6440 : entry->queryCategory == GIN_CAT_EMPTY_QUERY)
356 : {
357 : /*
358 : * btreeEntry.findItem locates the first item >= given search key.
359 : * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
360 : * because of the way the GIN_CAT_EMPTY_QUERY category code is
361 : * assigned.) We scan forward from there and collect all TIDs needed
362 : * for the entry type.
363 : */
364 534 : btreeEntry.findItem(&btreeEntry, stackEntry);
365 534 : if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
366 534 : == false)
367 : {
368 : /*
369 : * GIN tree was seriously restructured, so we will cleanup all
370 : * found data and rescan. See comments near 'return false' in
371 : * collectMatchBitmap()
372 : */
373 0 : if (entry->matchBitmap)
374 : {
375 0 : if (entry->matchIterator)
376 0 : tbm_end_private_iterate(entry->matchIterator);
377 0 : entry->matchIterator = NULL;
378 0 : tbm_free(entry->matchBitmap);
379 0 : entry->matchBitmap = NULL;
380 : }
381 0 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
382 0 : freeGinBtreeStack(stackEntry);
383 0 : goto restartScanEntry;
384 : }
385 :
386 534 : if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
387 : {
388 420 : entry->matchIterator =
389 420 : tbm_begin_private_iterate(entry->matchBitmap);
390 420 : entry->isFinished = false;
391 : }
392 : }
393 6158 : else if (btreeEntry.findItem(&btreeEntry, stackEntry))
394 : {
395 4448 : IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
396 :
397 4448 : if (GinIsPostingTree(itup))
398 : {
399 64 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
400 : GinBtreeStack *stack;
401 : Page entrypage;
402 : ItemPointerData minItem;
403 :
404 : /*
405 : * This is an equality scan, so lock the root of the posting tree.
406 : * It represents a lock on the exact key value, and covers all the
407 : * items in the posting tree.
408 : */
409 64 : PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
410 :
411 : /*
412 : * We should unlock entry page before touching posting tree to
413 : * prevent deadlocks with vacuum processes. Because entry is never
414 : * deleted from page and posting tree is never reduced to the
415 : * posting list, we can unlock page after getting BlockNumber of
416 : * root of posting tree.
417 : */
418 64 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
419 64 : needUnlock = false;
420 :
421 64 : stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
422 : rootPostingTree);
423 64 : entry->buffer = stack->buffer;
424 :
425 : /*
426 : * We keep buffer pinned because we need to prevent deletion of
427 : * page during scan. See GIN's vacuum implementation. RefCount is
428 : * increased to keep buffer pinned after freeGinBtreeStack() call.
429 : */
430 64 : IncrBufferRefCount(entry->buffer);
431 :
432 64 : entrypage = BufferGetPage(entry->buffer);
433 :
434 : /*
435 : * Load the first page into memory.
436 : */
437 64 : ItemPointerSetMin(&minItem);
438 64 : entry->list = GinDataLeafPageGetItems(entrypage, &entry->nlist, minItem);
439 :
440 64 : entry->predictNumberResult = stack->predictNumber * entry->nlist;
441 :
442 64 : LockBuffer(entry->buffer, GIN_UNLOCK);
443 64 : freeGinBtreeStack(stack);
444 64 : entry->isFinished = false;
445 : }
446 : else
447 : {
448 : /*
449 : * Lock the entry leaf page. This is more coarse-grained than
450 : * necessary, because it will conflict with any insertions that
451 : * land on the same leaf page, not only the exact key we searched
452 : * for. But locking an individual tuple would require updating
453 : * that lock whenever it moves because of insertions or vacuums,
454 : * which seems too complicated.
455 : */
456 4384 : PredicateLockPage(ginstate->index,
457 : BufferGetBlockNumber(stackEntry->buffer),
458 : snapshot);
459 4384 : if (GinGetNPosting(itup) > 0)
460 : {
461 4378 : entry->list = ginReadTuple(ginstate, entry->attnum, itup,
462 : &entry->nlist);
463 4378 : entry->predictNumberResult = entry->nlist;
464 :
465 4378 : entry->isFinished = false;
466 : }
467 : }
468 : }
469 : else
470 : {
471 : /*
472 : * No entry found. Predicate lock the leaf page, to lock the place
473 : * where the entry would've been, had there been one.
474 : */
475 1710 : PredicateLockPage(ginstate->index,
476 : BufferGetBlockNumber(stackEntry->buffer), snapshot);
477 : }
478 :
479 6692 : if (needUnlock)
480 6628 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
481 6692 : freeGinBtreeStack(stackEntry);
482 6692 : }
483 :
484 : /*
485 : * Comparison function for scan entry indexes. Sorts by predictNumberResult,
486 : * least frequent items first.
487 : */
488 : static int
489 9440 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
490 : {
491 9440 : const GinScanKey key = (const GinScanKey) arg;
492 9440 : int i1 = *(const int *) a1;
493 9440 : int i2 = *(const int *) a2;
494 9440 : uint32 n1 = key->scanEntry[i1]->predictNumberResult;
495 9440 : uint32 n2 = key->scanEntry[i2]->predictNumberResult;
496 :
497 9440 : if (n1 < n2)
498 1368 : return -1;
499 8072 : else if (n1 == n2)
500 6302 : return 0;
501 : else
502 1770 : return 1;
503 : }
504 :
505 : static void
506 1714 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
507 : {
508 1714 : MemoryContext oldCtx = CurrentMemoryContext;
509 : int i;
510 : int j;
511 : int *entryIndexes;
512 :
513 1714 : ItemPointerSetMin(&key->curItem);
514 1714 : key->curItemMatches = false;
515 1714 : key->recheckCurItem = false;
516 1714 : key->isFinished = false;
517 :
518 : /*
519 : * Divide the entries into two distinct sets: required and additional.
520 : * Additional entries are not enough for a match alone, without any items
521 : * from the required set, but are needed by the consistent function to
522 : * decide if an item matches. When scanning, we can skip over items from
523 : * additional entries that have no corresponding matches in any of the
524 : * required entries. That speeds up queries like "frequent & rare"
525 : * considerably, if the frequent term can be put in the additional set.
526 : *
527 : * There can be many legal ways to divide them entries into these two
528 : * sets. A conservative division is to just put everything in the required
529 : * set, but the more you can put in the additional set, the more you can
530 : * skip during the scan. To maximize skipping, we try to put as many
531 : * frequent items as possible into additional, and less frequent ones into
532 : * required. To do that, sort the entries by frequency
533 : * (predictNumberResult), and put entries into the required set in that
534 : * order, until the consistent function says that none of the remaining
535 : * entries can form a match, without any items from the required set. The
536 : * rest go to the additional set.
537 : *
538 : * Exclude-only scan keys are known to have no required entries.
539 : */
540 1714 : if (key->excludeOnly)
541 : {
542 38 : MemoryContextSwitchTo(so->keyCtx);
543 :
544 38 : key->nrequired = 0;
545 38 : key->nadditional = key->nentries;
546 38 : key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
547 44 : for (i = 0; i < key->nadditional; i++)
548 6 : key->additionalEntries[i] = key->scanEntry[i];
549 : }
550 1676 : else if (key->nentries > 1)
551 : {
552 560 : MemoryContextSwitchTo(so->tempCtx);
553 :
554 560 : entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
555 6130 : for (i = 0; i < key->nentries; i++)
556 5570 : entryIndexes[i] = i;
557 560 : qsort_arg(entryIndexes, key->nentries, sizeof(int),
558 : entryIndexByFrequencyCmp, key);
559 :
560 4834 : for (i = 0; i < key->nentries - 1; i++)
561 : {
562 : /* Pass all entries <= i as FALSE, and the rest as MAYBE */
563 521048 : for (j = 0; j <= i; j++)
564 516356 : key->entryRes[entryIndexes[j]] = GIN_FALSE;
565 521856 : for (j = i + 1; j < key->nentries; j++)
566 517164 : key->entryRes[entryIndexes[j]] = GIN_MAYBE;
567 :
568 4692 : if (key->triConsistentFn(key) == GIN_FALSE)
569 418 : break;
570 : }
571 : /* i is now the last required entry. */
572 :
573 560 : MemoryContextSwitchTo(so->keyCtx);
574 :
575 560 : key->nrequired = i + 1;
576 560 : key->nadditional = key->nentries - key->nrequired;
577 560 : key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
578 560 : key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
579 :
580 560 : j = 0;
581 5394 : for (i = 0; i < key->nrequired; i++)
582 4834 : key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
583 1296 : for (i = 0; i < key->nadditional; i++)
584 736 : key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
585 :
586 : /* clean up after consistentFn calls (also frees entryIndexes) */
587 560 : MemoryContextReset(so->tempCtx);
588 : }
589 : else
590 : {
591 1116 : MemoryContextSwitchTo(so->keyCtx);
592 :
593 1116 : key->nrequired = 1;
594 1116 : key->nadditional = 0;
595 1116 : key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
596 1116 : key->requiredEntries[0] = key->scanEntry[0];
597 : }
598 1714 : MemoryContextSwitchTo(oldCtx);
599 1714 : }
600 :
601 : static void
602 1586 : startScan(IndexScanDesc scan)
603 : {
604 1586 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
605 1586 : GinState *ginstate = &so->ginstate;
606 : uint32 i;
607 :
608 8278 : for (i = 0; i < so->totalentries; i++)
609 6692 : startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
610 :
611 1586 : if (GinFuzzySearchLimit > 0)
612 : {
613 : /*
614 : * If all of keys more than threshold we will try to reduce result, we
615 : * hope (and only hope, for intersection operation of array our
616 : * supposition isn't true), that total result will not more than
617 : * minimal predictNumberResult.
618 : */
619 6 : bool reduce = true;
620 :
621 12 : for (i = 0; i < so->totalentries; i++)
622 : {
623 6 : if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
624 : {
625 0 : reduce = false;
626 0 : break;
627 : }
628 : }
629 6 : if (reduce)
630 : {
631 12 : for (i = 0; i < so->totalentries; i++)
632 : {
633 6 : so->entries[i]->predictNumberResult /= so->totalentries;
634 6 : so->entries[i]->reduceResult = true;
635 : }
636 : }
637 : }
638 :
639 : /*
640 : * Now that we have the estimates for the entry frequencies, finish
641 : * initializing the scan keys.
642 : */
643 3300 : for (i = 0; i < so->nkeys; i++)
644 1714 : startScanKey(ginstate, so, so->keys + i);
645 1586 : }
646 :
647 : /*
648 : * Load the next batch of item pointers from a posting tree.
649 : *
650 : * Note that we copy the page into GinScanEntry->list array and unlock it, but
651 : * keep it pinned to prevent interference with vacuum.
652 : */
653 : static void
654 100 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
655 : ItemPointerData advancePast)
656 : {
657 : Page page;
658 : int i;
659 : bool stepright;
660 :
661 100 : if (!BufferIsValid(entry->buffer))
662 : {
663 0 : entry->isFinished = true;
664 0 : return;
665 : }
666 :
667 : /*
668 : * We have two strategies for finding the correct page: step right from
669 : * the current page, or descend the tree again from the root. If
670 : * advancePast equals the current item, the next matching item should be
671 : * on the next page, so we step right. Otherwise, descend from root.
672 : */
673 100 : if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
674 : {
675 94 : stepright = true;
676 94 : LockBuffer(entry->buffer, GIN_SHARE);
677 : }
678 : else
679 : {
680 : GinBtreeStack *stack;
681 :
682 6 : ReleaseBuffer(entry->buffer);
683 :
684 : /*
685 : * Set the search key, and find the correct leaf page.
686 : */
687 6 : if (ItemPointerIsLossyPage(&advancePast))
688 : {
689 0 : ItemPointerSet(&entry->btree.itemptr,
690 0 : GinItemPointerGetBlockNumber(&advancePast) + 1,
691 : FirstOffsetNumber);
692 : }
693 : else
694 : {
695 6 : ItemPointerSet(&entry->btree.itemptr,
696 : GinItemPointerGetBlockNumber(&advancePast),
697 6 : OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
698 : }
699 6 : entry->btree.fullScan = false;
700 6 : stack = ginFindLeafPage(&entry->btree, true, false);
701 :
702 : /* we don't need the stack, just the buffer. */
703 6 : entry->buffer = stack->buffer;
704 6 : IncrBufferRefCount(entry->buffer);
705 6 : freeGinBtreeStack(stack);
706 6 : stepright = false;
707 : }
708 :
709 100 : elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
710 : GinItemPointerGetBlockNumber(&advancePast),
711 : GinItemPointerGetOffsetNumber(&advancePast),
712 : !stepright);
713 :
714 100 : page = BufferGetPage(entry->buffer);
715 : for (;;)
716 : {
717 106 : entry->offset = InvalidOffsetNumber;
718 106 : if (entry->list)
719 : {
720 94 : pfree(entry->list);
721 94 : entry->list = NULL;
722 94 : entry->nlist = 0;
723 : }
724 :
725 106 : if (stepright)
726 : {
727 : /*
728 : * We've processed all the entries on this page. If it was the
729 : * last page in the tree, we're done.
730 : */
731 100 : if (GinPageRightMost(page))
732 : {
733 6 : UnlockReleaseBuffer(entry->buffer);
734 6 : entry->buffer = InvalidBuffer;
735 6 : entry->isFinished = true;
736 6 : return;
737 : }
738 :
739 : /*
740 : * Step to next page, following the right link. then find the
741 : * first ItemPointer greater than advancePast.
742 : */
743 94 : entry->buffer = ginStepRight(entry->buffer,
744 : ginstate->index,
745 : GIN_SHARE);
746 94 : page = BufferGetPage(entry->buffer);
747 : }
748 100 : stepright = true;
749 :
750 100 : if (GinPageGetOpaque(page)->flags & GIN_DELETED)
751 0 : continue; /* page was deleted by concurrent vacuum */
752 :
753 : /*
754 : * The first item > advancePast might not be on this page, but
755 : * somewhere to the right, if the page was split, or a non-match from
756 : * another key in the query allowed us to skip some items from this
757 : * entry. Keep following the right-links until we re-find the correct
758 : * page.
759 : */
760 136 : if (!GinPageRightMost(page) &&
761 36 : ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
762 : {
763 : /*
764 : * the item we're looking is > the right bound of the page, so it
765 : * can't be on this page.
766 : */
767 0 : continue;
768 : }
769 :
770 100 : entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
771 :
772 1924 : for (i = 0; i < entry->nlist; i++)
773 : {
774 1918 : if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
775 : {
776 94 : entry->offset = i;
777 :
778 94 : if (GinPageRightMost(page))
779 : {
780 : /* after processing the copied items, we're done. */
781 58 : UnlockReleaseBuffer(entry->buffer);
782 58 : entry->buffer = InvalidBuffer;
783 : }
784 : else
785 36 : LockBuffer(entry->buffer, GIN_UNLOCK);
786 94 : return;
787 : }
788 : }
789 : }
790 : }
791 :
792 : #define gin_rand() pg_prng_double(&pg_global_prng_state)
793 : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
794 :
795 : /*
796 : * Sets entry->curItem to next heap item pointer > advancePast, for one entry
797 : * of one scan key, or sets entry->isFinished to true if there are no more.
798 : *
799 : * Item pointers are returned in ascending order.
800 : *
801 : * Note: this can return a "lossy page" item pointer, indicating that the
802 : * entry potentially matches all items on that heap page. However, it is
803 : * not allowed to return both a lossy page pointer and exact (regular)
804 : * item pointers for the same page. (Doing so would break the key-combination
805 : * logic in keyGetItem and scanGetItem; see comment in scanGetItem.) In the
806 : * current implementation this is guaranteed by the behavior of tidbitmaps.
807 : */
808 : static void
809 1087938 : entryGetItem(GinState *ginstate, GinScanEntry entry,
810 : ItemPointerData advancePast)
811 : {
812 : Assert(!entry->isFinished);
813 :
814 : Assert(!ItemPointerIsValid(&entry->curItem) ||
815 : ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
816 :
817 1087938 : if (entry->matchBitmap)
818 : {
819 : /* A bitmap result */
820 268052 : BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
821 268052 : OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
822 :
823 : for (;;)
824 : {
825 : /*
826 : * If we've exhausted all items on this block, move to next block
827 : * in the bitmap.
828 : */
829 272878 : while (entry->matchResult == NULL ||
830 272458 : (entry->matchResult->ntuples >= 0 &&
831 272458 : entry->offset >= entry->matchResult->ntuples) ||
832 535264 : entry->matchResult->blockno < advancePastBlk ||
833 267632 : (ItemPointerIsLossyPage(&advancePast) &&
834 0 : entry->matchResult->blockno == advancePastBlk))
835 : {
836 5246 : entry->matchResult =
837 5246 : tbm_private_iterate(entry->matchIterator);
838 :
839 5246 : if (entry->matchResult == NULL)
840 : {
841 420 : ItemPointerSetInvalid(&entry->curItem);
842 420 : tbm_end_private_iterate(entry->matchIterator);
843 420 : entry->matchIterator = NULL;
844 420 : entry->isFinished = true;
845 420 : break;
846 : }
847 :
848 : /*
849 : * Reset counter to the beginning of entry->matchResult. Note:
850 : * entry->offset is still greater than matchResult->ntuples if
851 : * matchResult is lossy. So, on next call we will get next
852 : * result from TIDBitmap.
853 : */
854 4826 : entry->offset = 0;
855 : }
856 268052 : if (entry->isFinished)
857 420 : break;
858 :
859 : /*
860 : * We're now on the first page after advancePast which has any
861 : * items on it. If it's a lossy result, return that.
862 : */
863 267632 : if (entry->matchResult->ntuples < 0)
864 : {
865 0 : ItemPointerSetLossyPage(&entry->curItem,
866 : entry->matchResult->blockno);
867 :
868 : /*
869 : * We might as well fall out of the loop; we could not
870 : * estimate number of results on this page to support correct
871 : * reducing of result even if it's enabled.
872 : */
873 0 : break;
874 : }
875 :
876 : /*
877 : * Not a lossy page. Skip over any offsets <= advancePast, and
878 : * return that.
879 : */
880 267632 : if (entry->matchResult->blockno == advancePastBlk)
881 : {
882 : /*
883 : * First, do a quick check against the last offset on the
884 : * page. If that's > advancePast, so are all the other
885 : * offsets, so just go back to the top to get the next page.
886 : */
887 263226 : if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
888 : {
889 0 : entry->offset = entry->matchResult->ntuples;
890 0 : continue;
891 : }
892 :
893 : /* Otherwise scan to find the first item > advancePast */
894 263226 : while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
895 0 : entry->offset++;
896 : }
897 :
898 267632 : ItemPointerSet(&entry->curItem,
899 267632 : entry->matchResult->blockno,
900 267632 : entry->matchResult->offsets[entry->offset]);
901 267632 : entry->offset++;
902 :
903 : /* Done unless we need to reduce the result */
904 267632 : if (!entry->reduceResult || !dropItem(entry))
905 : break;
906 : }
907 : }
908 819886 : else if (!BufferIsValid(entry->buffer))
909 : {
910 : /*
911 : * A posting list from an entry tuple, or the last page of a posting
912 : * tree.
913 : */
914 : for (;;)
915 : {
916 394150 : if (entry->offset >= entry->nlist)
917 : {
918 3890 : ItemPointerSetInvalid(&entry->curItem);
919 3890 : entry->isFinished = true;
920 3890 : break;
921 : }
922 :
923 390260 : entry->curItem = entry->list[entry->offset++];
924 :
925 : /* If we're not past advancePast, keep scanning */
926 390260 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
927 66634 : continue;
928 :
929 : /* Done unless we need to reduce the result */
930 323626 : if (!entry->reduceResult || !dropItem(entry))
931 : break;
932 : }
933 : }
934 : else
935 : {
936 : /* A posting tree */
937 : for (;;)
938 : {
939 : /* If we've processed the current batch, load more items */
940 678958 : while (entry->offset >= entry->nlist)
941 : {
942 100 : entryLoadMoreItems(ginstate, entry, advancePast);
943 :
944 100 : if (entry->isFinished)
945 : {
946 6 : ItemPointerSetInvalid(&entry->curItem);
947 6 : return;
948 : }
949 : }
950 :
951 678858 : entry->curItem = entry->list[entry->offset++];
952 :
953 : /* If we're not past advancePast, keep scanning */
954 678858 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
955 47106 : continue;
956 :
957 : /* Done unless we need to reduce the result */
958 631752 : if (!entry->reduceResult || !dropItem(entry))
959 : break;
960 :
961 : /*
962 : * Advance advancePast (so that entryLoadMoreItems will load the
963 : * right data), and keep scanning
964 : */
965 118910 : advancePast = entry->curItem;
966 : }
967 : }
968 : }
969 :
970 : /*
971 : * Identify the "current" item among the input entry streams for this scan key
972 : * that is greater than advancePast, and test whether it passes the scan key
973 : * qual condition.
974 : *
975 : * The current item is the smallest curItem among the inputs. key->curItem
976 : * is set to that value. key->curItemMatches is set to indicate whether that
977 : * TID passes the consistentFn test. If so, key->recheckCurItem is set true
978 : * iff recheck is needed for this item pointer (including the case where the
979 : * item pointer is a lossy page pointer).
980 : *
981 : * If all entry streams are exhausted, sets key->isFinished to true.
982 : *
983 : * Item pointers must be returned in ascending order.
984 : *
985 : * Note: this can return a "lossy page" item pointer, indicating that the
986 : * key potentially matches all items on that heap page. However, it is
987 : * not allowed to return both a lossy page pointer and exact (regular)
988 : * item pointers for the same page. (Doing so would break the key-combination
989 : * logic in scanGetItem.)
990 : */
991 : static void
992 1002412 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
993 : ItemPointerData advancePast)
994 : {
995 : ItemPointerData minItem;
996 : ItemPointerData curPageLossy;
997 : uint32 i;
998 : bool haveLossyEntry;
999 : GinScanEntry entry;
1000 : GinTernaryValue res;
1001 : MemoryContext oldCtx;
1002 : bool allFinished;
1003 :
1004 : Assert(!key->isFinished);
1005 :
1006 : /*
1007 : * We might have already tested this item; if so, no need to repeat work.
1008 : * (Note: the ">" case can happen, if advancePast is exact but we
1009 : * previously had to set curItem to a lossy-page pointer.)
1010 : */
1011 1002412 : if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
1012 1608 : return;
1013 :
1014 : /*
1015 : * Find the minimum item > advancePast among the active entry streams.
1016 : *
1017 : * Note: a lossy-page entry is encoded by a ItemPointer with max value for
1018 : * offset (0xffff), so that it will sort after any exact entries for the
1019 : * same page. So we'll prefer to return exact pointers not lossy
1020 : * pointers, which is good.
1021 : */
1022 1002390 : ItemPointerSetMax(&minItem);
1023 1002390 : allFinished = true;
1024 2757592 : for (i = 0; i < key->nrequired; i++)
1025 : {
1026 1755202 : entry = key->requiredEntries[i];
1027 :
1028 1755202 : if (entry->isFinished)
1029 560142 : continue;
1030 :
1031 : /*
1032 : * Advance this stream if necessary.
1033 : *
1034 : * In particular, since entry->curItem was initialized with
1035 : * ItemPointerSetMin, this ensures we fetch the first item for each
1036 : * entry on the first call.
1037 : */
1038 1195060 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1039 : {
1040 1034182 : entryGetItem(ginstate, entry, advancePast);
1041 1034182 : if (entry->isFinished)
1042 4138 : continue;
1043 : }
1044 :
1045 1190922 : allFinished = false;
1046 1190922 : if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1047 1097572 : minItem = entry->curItem;
1048 : }
1049 :
1050 1002390 : if (allFinished && !key->excludeOnly)
1051 : {
1052 : /* all entries are finished */
1053 1586 : key->isFinished = true;
1054 1586 : return;
1055 : }
1056 :
1057 1000804 : if (!key->excludeOnly)
1058 : {
1059 : /*
1060 : * For a normal scan key, we now know there are no matches < minItem.
1061 : *
1062 : * If minItem is lossy, it means that there were no exact items on the
1063 : * page among requiredEntries, because lossy pointers sort after exact
1064 : * items. However, there might be exact items for the same page among
1065 : * additionalEntries, so we mustn't advance past them.
1066 : */
1067 996328 : if (ItemPointerIsLossyPage(&minItem))
1068 : {
1069 0 : if (GinItemPointerGetBlockNumber(&advancePast) <
1070 0 : GinItemPointerGetBlockNumber(&minItem))
1071 : {
1072 0 : ItemPointerSet(&advancePast,
1073 : GinItemPointerGetBlockNumber(&minItem),
1074 : InvalidOffsetNumber);
1075 : }
1076 : }
1077 : else
1078 : {
1079 : Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
1080 996328 : ItemPointerSet(&advancePast,
1081 : GinItemPointerGetBlockNumber(&minItem),
1082 996328 : OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
1083 : }
1084 : }
1085 : else
1086 : {
1087 : /*
1088 : * excludeOnly scan keys don't have any entries that are necessarily
1089 : * present in matching items. So, we consider the item just after
1090 : * advancePast.
1091 : */
1092 : Assert(key->nrequired == 0);
1093 4476 : ItemPointerSet(&minItem,
1094 : GinItemPointerGetBlockNumber(&advancePast),
1095 4476 : OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
1096 : }
1097 :
1098 : /*
1099 : * We might not have loaded all the entry streams for this TID yet. We
1100 : * could call the consistent function, passing MAYBE for those entries, to
1101 : * see if it can decide if this TID matches based on the information we
1102 : * have. But if the consistent-function is expensive, and cannot in fact
1103 : * decide with partial information, that could be a big loss. So, load all
1104 : * the additional entries, before calling the consistent function.
1105 : */
1106 1067464 : for (i = 0; i < key->nadditional; i++)
1107 : {
1108 66660 : entry = key->additionalEntries[i];
1109 :
1110 66660 : if (entry->isFinished)
1111 412 : continue;
1112 :
1113 66248 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1114 : {
1115 53756 : entryGetItem(ginstate, entry, advancePast);
1116 53756 : if (entry->isFinished)
1117 178 : continue;
1118 : }
1119 :
1120 : /*
1121 : * Normally, none of the items in additionalEntries can have a curItem
1122 : * larger than minItem. But if minItem is a lossy page, then there
1123 : * might be exact items on the same page among additionalEntries.
1124 : */
1125 66070 : if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1126 : {
1127 : Assert(ItemPointerIsLossyPage(&minItem));
1128 0 : minItem = entry->curItem;
1129 : }
1130 : }
1131 :
1132 : /*
1133 : * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
1134 : * and perform consistentFn test.
1135 : *
1136 : * Lossy-page entries pose a problem, since we don't know the correct
1137 : * entryRes state to pass to the consistentFn, and we also don't know what
1138 : * its combining logic will be (could be AND, OR, or even NOT). If the
1139 : * logic is OR then the consistentFn might succeed for all items in the
1140 : * lossy page even when none of the other entries match.
1141 : *
1142 : * Our strategy is to call the tri-state consistent function, with the
1143 : * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
1144 : * returns FALSE, none of the lossy items alone are enough for a match, so
1145 : * we don't need to return a lossy-page pointer. Otherwise, return a
1146 : * lossy-page pointer to indicate that the whole heap page must be
1147 : * checked. (On subsequent calls, we'll do nothing until minItem is past
1148 : * the page altogether, thus ensuring that we never return both regular
1149 : * and lossy pointers for the same page.)
1150 : *
1151 : * An exception is that it doesn't matter what we pass for lossy pointers
1152 : * in "hidden" entries, because the consistentFn's result can't depend on
1153 : * them. We could pass them as MAYBE as well, but if we're using the
1154 : * "shim" implementation of a tri-state consistent function (see
1155 : * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
1156 : * them as true.
1157 : *
1158 : * Note that only lossy-page entries pointing to the current item's page
1159 : * should trigger this processing; we might have future lossy pages in the
1160 : * entry array, but they aren't relevant yet.
1161 : */
1162 1000804 : key->curItem = minItem;
1163 1000804 : ItemPointerSetLossyPage(&curPageLossy,
1164 : GinItemPointerGetBlockNumber(&key->curItem));
1165 1000804 : haveLossyEntry = false;
1166 2816806 : for (i = 0; i < key->nentries; i++)
1167 : {
1168 1816002 : entry = key->scanEntry[i];
1169 3072994 : if (entry->isFinished == false &&
1170 1256992 : ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1171 : {
1172 0 : if (i < key->nuserentries)
1173 0 : key->entryRes[i] = GIN_MAYBE;
1174 : else
1175 0 : key->entryRes[i] = GIN_TRUE;
1176 0 : haveLossyEntry = true;
1177 : }
1178 : else
1179 1816002 : key->entryRes[i] = GIN_FALSE;
1180 : }
1181 :
1182 : /* prepare for calling consistentFn in temp context */
1183 1000804 : oldCtx = MemoryContextSwitchTo(tempCtx);
1184 :
1185 1000804 : if (haveLossyEntry)
1186 : {
1187 : /* Have lossy-page entries, so see if whole page matches */
1188 0 : res = key->triConsistentFn(key);
1189 :
1190 0 : if (res == GIN_TRUE || res == GIN_MAYBE)
1191 : {
1192 : /* Yes, so clean up ... */
1193 0 : MemoryContextSwitchTo(oldCtx);
1194 0 : MemoryContextReset(tempCtx);
1195 :
1196 : /* and return lossy pointer for whole page */
1197 0 : key->curItem = curPageLossy;
1198 0 : key->curItemMatches = true;
1199 0 : key->recheckCurItem = true;
1200 0 : return;
1201 : }
1202 : }
1203 :
1204 : /*
1205 : * At this point we know that we don't need to return a lossy whole-page
1206 : * pointer, but we might have matches for individual exact item pointers,
1207 : * possibly in combination with a lossy pointer. Pass lossy pointers as
1208 : * MAYBE to the ternary consistent function, to let it decide if this
1209 : * tuple satisfies the overall key, even though we don't know if the lossy
1210 : * entries match.
1211 : *
1212 : * Prepare entryRes array to be passed to consistentFn.
1213 : */
1214 2816806 : for (i = 0; i < key->nentries; i++)
1215 : {
1216 1816002 : entry = key->scanEntry[i];
1217 1816002 : if (entry->isFinished)
1218 559010 : key->entryRes[i] = GIN_FALSE;
1219 : #if 0
1220 :
1221 : /*
1222 : * This case can't currently happen, because we loaded all the entries
1223 : * for this item earlier.
1224 : */
1225 : else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1226 : key->entryRes[i] = GIN_MAYBE;
1227 : #endif
1228 1256992 : else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1229 0 : key->entryRes[i] = GIN_MAYBE;
1230 1256992 : else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
1231 1073346 : key->entryRes[i] = GIN_TRUE;
1232 : else
1233 183646 : key->entryRes[i] = GIN_FALSE;
1234 : }
1235 :
1236 1000804 : res = key->triConsistentFn(key);
1237 :
1238 1000804 : switch (res)
1239 : {
1240 865156 : case GIN_TRUE:
1241 865156 : key->curItemMatches = true;
1242 : /* triConsistentFn set recheckCurItem */
1243 865156 : break;
1244 :
1245 18026 : case GIN_FALSE:
1246 18026 : key->curItemMatches = false;
1247 18026 : break;
1248 :
1249 117622 : case GIN_MAYBE:
1250 117622 : key->curItemMatches = true;
1251 117622 : key->recheckCurItem = true;
1252 117622 : break;
1253 :
1254 0 : default:
1255 :
1256 : /*
1257 : * the 'default' case shouldn't happen, but if the consistent
1258 : * function returns something bogus, this is the safe result
1259 : */
1260 0 : key->curItemMatches = true;
1261 0 : key->recheckCurItem = true;
1262 0 : break;
1263 : }
1264 :
1265 : /*
1266 : * We have a tuple, and we know if it matches or not. If it's a non-match,
1267 : * we could continue to find the next matching tuple, but let's break out
1268 : * and give scanGetItem a chance to advance the other keys. They might be
1269 : * able to skip past to a much higher TID, allowing us to save work.
1270 : */
1271 :
1272 : /* clean up after consistentFn calls */
1273 1000804 : MemoryContextSwitchTo(oldCtx);
1274 1000804 : MemoryContextReset(tempCtx);
1275 : }
1276 :
1277 : /*
1278 : * Get next heap item pointer (after advancePast) from scan.
1279 : * Returns true if anything found.
1280 : * On success, *item and *recheck are set.
1281 : *
1282 : * Note: this is very nearly the same logic as in keyGetItem(), except
1283 : * that we know the keys are to be combined with AND logic, whereas in
1284 : * keyGetItem() the combination logic is known only to the consistentFn.
1285 : */
1286 : static bool
1287 979792 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
1288 : ItemPointerData *item, bool *recheck)
1289 : {
1290 979792 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1291 : uint32 i;
1292 : bool match;
1293 :
1294 : /*----------
1295 : * Advance the scan keys in lock-step, until we find an item that matches
1296 : * all the keys. If any key reports isFinished, meaning its subset of the
1297 : * entries is exhausted, we can stop. Otherwise, set *item to the next
1298 : * matching item.
1299 : *
1300 : * This logic works only if a keyGetItem stream can never contain both
1301 : * exact and lossy pointers for the same page. Else we could have a
1302 : * case like
1303 : *
1304 : * stream 1 stream 2
1305 : * ... ...
1306 : * 42/6 42/7
1307 : * 50/1 42/0xffff
1308 : * ... ...
1309 : *
1310 : * We would conclude that 42/6 is not a match and advance stream 1,
1311 : * thus never detecting the match to the lossy pointer in stream 2.
1312 : * (keyGetItem has a similar problem versus entryGetItem.)
1313 : *----------
1314 : */
1315 : do
1316 : {
1317 997876 : ItemPointerSetMin(item);
1318 997876 : match = true;
1319 1980676 : for (i = 0; i < so->nkeys && match; i++)
1320 : {
1321 1002412 : GinScanKey key = so->keys + i;
1322 :
1323 : /*
1324 : * If we're considering a lossy page, skip excludeOnly keys. They
1325 : * can't exclude the whole page anyway.
1326 : */
1327 1002412 : if (ItemPointerIsLossyPage(item) && key->excludeOnly)
1328 : {
1329 : /*
1330 : * ginNewScanKey() should never mark the first key as
1331 : * excludeOnly.
1332 : */
1333 : Assert(i > 0);
1334 0 : continue;
1335 : }
1336 :
1337 : /* Fetch the next item for this key that is > advancePast. */
1338 1002412 : keyGetItem(&so->ginstate, so->tempCtx, key, advancePast);
1339 :
1340 1002412 : if (key->isFinished)
1341 1586 : return false;
1342 :
1343 : /*
1344 : * If it's not a match, we can immediately conclude that nothing
1345 : * <= this item matches, without checking the rest of the keys.
1346 : */
1347 1000826 : if (!key->curItemMatches)
1348 : {
1349 18026 : advancePast = key->curItem;
1350 18026 : match = false;
1351 18026 : break;
1352 : }
1353 :
1354 : /*
1355 : * It's a match. We can conclude that nothing < matches, so the
1356 : * other key streams can skip to this item.
1357 : *
1358 : * Beware of lossy pointers, though; from a lossy pointer, we can
1359 : * only conclude that nothing smaller than this *block* matches.
1360 : */
1361 982800 : if (ItemPointerIsLossyPage(&key->curItem))
1362 : {
1363 0 : if (GinItemPointerGetBlockNumber(&advancePast) <
1364 0 : GinItemPointerGetBlockNumber(&key->curItem))
1365 : {
1366 0 : ItemPointerSet(&advancePast,
1367 0 : GinItemPointerGetBlockNumber(&key->curItem),
1368 : InvalidOffsetNumber);
1369 : }
1370 : }
1371 : else
1372 : {
1373 : Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
1374 982800 : ItemPointerSet(&advancePast,
1375 982800 : GinItemPointerGetBlockNumber(&key->curItem),
1376 982800 : OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
1377 : }
1378 :
1379 : /*
1380 : * If this is the first key, remember this location as a potential
1381 : * match, and proceed to check the rest of the keys.
1382 : *
1383 : * Otherwise, check if this is the same item that we checked the
1384 : * previous keys for (or a lossy pointer for the same page). If
1385 : * not, loop back to check the previous keys for this item (we
1386 : * will check this key again too, but keyGetItem returns quickly
1387 : * for that)
1388 : */
1389 982800 : if (i == 0)
1390 : {
1391 978370 : *item = key->curItem;
1392 : }
1393 : else
1394 : {
1395 8860 : if (ItemPointerIsLossyPage(&key->curItem) ||
1396 4430 : ItemPointerIsLossyPage(item))
1397 : {
1398 : Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
1399 0 : match = (GinItemPointerGetBlockNumber(&key->curItem) ==
1400 0 : GinItemPointerGetBlockNumber(item));
1401 : }
1402 : else
1403 : {
1404 : Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
1405 4430 : match = (ginCompareItemPointers(&key->curItem, item) == 0);
1406 : }
1407 : }
1408 : }
1409 996290 : } while (!match);
1410 :
1411 : Assert(!ItemPointerIsMin(item));
1412 :
1413 : /*
1414 : * Now *item contains the first ItemPointer after previous result that
1415 : * satisfied all the keys for that exact TID, or a lossy reference to the
1416 : * same page.
1417 : *
1418 : * We must return recheck = true if any of the keys are marked recheck.
1419 : */
1420 978206 : *recheck = false;
1421 1826116 : for (i = 0; i < so->nkeys; i++)
1422 : {
1423 978578 : GinScanKey key = so->keys + i;
1424 :
1425 978578 : if (key->recheckCurItem)
1426 : {
1427 130668 : *recheck = true;
1428 130668 : break;
1429 : }
1430 : }
1431 :
1432 978206 : return true;
1433 : }
1434 :
1435 :
1436 : /*
1437 : * Functions for scanning the pending list
1438 : */
1439 :
1440 :
1441 : /*
1442 : * Get ItemPointer of next heap row to be checked from pending list.
1443 : * Returns false if there are no more. On pages with several heap rows
1444 : * it returns each row separately, on page with part of heap row returns
1445 : * per page data. pos->firstOffset and pos->lastOffset are set to identify
1446 : * the range of pending-list tuples belonging to this heap row.
1447 : *
1448 : * The pendingBuffer is presumed pinned and share-locked on entry, and is
1449 : * pinned and share-locked on success exit. On failure exit it's released.
1450 : */
1451 : static bool
1452 1614 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
1453 : {
1454 : OffsetNumber maxoff;
1455 : Page page;
1456 : IndexTuple itup;
1457 :
1458 1614 : ItemPointerSetInvalid(&pos->item);
1459 : for (;;)
1460 : {
1461 1614 : page = BufferGetPage(pos->pendingBuffer);
1462 :
1463 1614 : maxoff = PageGetMaxOffsetNumber(page);
1464 1614 : if (pos->firstOffset > maxoff)
1465 : {
1466 166 : BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
1467 :
1468 166 : if (blkno == InvalidBlockNumber)
1469 : {
1470 166 : UnlockReleaseBuffer(pos->pendingBuffer);
1471 166 : pos->pendingBuffer = InvalidBuffer;
1472 :
1473 166 : return false;
1474 : }
1475 : else
1476 : {
1477 : /*
1478 : * Here we must prevent deletion of next page by insertcleanup
1479 : * process, which may be trying to obtain exclusive lock on
1480 : * current page. So, we lock next page before releasing the
1481 : * current one
1482 : */
1483 0 : Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
1484 :
1485 0 : LockBuffer(tmpbuf, GIN_SHARE);
1486 0 : UnlockReleaseBuffer(pos->pendingBuffer);
1487 :
1488 0 : pos->pendingBuffer = tmpbuf;
1489 0 : pos->firstOffset = FirstOffsetNumber;
1490 : }
1491 : }
1492 : else
1493 : {
1494 1448 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
1495 1448 : pos->item = itup->t_tid;
1496 1448 : if (GinPageHasFullRow(page))
1497 : {
1498 : /*
1499 : * find itempointer to the next row
1500 : */
1501 3354 : for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
1502 : {
1503 3188 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
1504 3188 : if (!ItemPointerEquals(&pos->item, &itup->t_tid))
1505 1282 : break;
1506 : }
1507 : }
1508 : else
1509 : {
1510 : /*
1511 : * All itempointers are the same on this page
1512 : */
1513 0 : pos->lastOffset = maxoff + 1;
1514 : }
1515 :
1516 : /*
1517 : * Now pos->firstOffset points to the first tuple of current heap
1518 : * row, pos->lastOffset points to the first tuple of next heap row
1519 : * (or to the end of page)
1520 : */
1521 1448 : break;
1522 : }
1523 : }
1524 :
1525 1448 : return true;
1526 : }
1527 :
1528 : /*
1529 : * Scan pending-list page from current tuple (off) up till the first of:
1530 : * - match is found (then returns true)
1531 : * - no later match is possible
1532 : * - tuple's attribute number is not equal to entry's attrnum
1533 : * - reach end of page
1534 : *
1535 : * datum[]/category[]/datumExtracted[] arrays are used to cache the results
1536 : * of gintuple_get_key() on the current page.
1537 : */
1538 : static bool
1539 0 : matchPartialInPendingList(GinState *ginstate, Page page,
1540 : OffsetNumber off, OffsetNumber maxoff,
1541 : GinScanEntry entry,
1542 : Datum *datum, GinNullCategory *category,
1543 : bool *datumExtracted)
1544 : {
1545 : IndexTuple itup;
1546 : int32 cmp;
1547 :
1548 : /* Partial match to a null is not possible */
1549 0 : if (entry->queryCategory != GIN_CAT_NORM_KEY)
1550 0 : return false;
1551 :
1552 0 : while (off < maxoff)
1553 : {
1554 0 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
1555 :
1556 0 : if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
1557 0 : return false;
1558 :
1559 0 : if (datumExtracted[off - 1] == false)
1560 : {
1561 0 : datum[off - 1] = gintuple_get_key(ginstate, itup,
1562 0 : &category[off - 1]);
1563 0 : datumExtracted[off - 1] = true;
1564 : }
1565 :
1566 : /* Once we hit nulls, no further match is possible */
1567 0 : if (category[off - 1] != GIN_CAT_NORM_KEY)
1568 0 : return false;
1569 :
1570 : /*----------
1571 : * Check partial match.
1572 : * case cmp == 0 => match
1573 : * case cmp > 0 => not match and end scan (no later match possible)
1574 : * case cmp < 0 => not match and continue scan
1575 : *----------
1576 : */
1577 0 : cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
1578 0 : ginstate->supportCollation[entry->attnum - 1],
1579 : entry->queryKey,
1580 0 : datum[off - 1],
1581 0 : UInt16GetDatum(entry->strategy),
1582 0 : PointerGetDatum(entry->extra_data)));
1583 0 : if (cmp == 0)
1584 0 : return true;
1585 0 : else if (cmp > 0)
1586 0 : return false;
1587 :
1588 0 : off++;
1589 : }
1590 :
1591 0 : return false;
1592 : }
1593 :
1594 : /*
1595 : * Set up the entryRes array for each key by looking at
1596 : * every entry for current heap row in pending list.
1597 : *
1598 : * Returns true if each scan key has at least one entryRes match.
1599 : * This corresponds to the situations where the normal index search will
1600 : * try to apply the key's consistentFn. (A tuple not meeting that requirement
1601 : * cannot be returned by the normal search since no entry stream will
1602 : * source its TID.)
1603 : *
1604 : * The pendingBuffer is presumed pinned and share-locked on entry.
1605 : */
1606 : static bool
1607 1448 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
1608 : {
1609 1448 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1610 : OffsetNumber attrnum;
1611 : Page page;
1612 : IndexTuple itup;
1613 : int i,
1614 : j;
1615 :
1616 : /*
1617 : * Reset all entryRes and hasMatchKey flags
1618 : */
1619 3924 : for (i = 0; i < so->nkeys; i++)
1620 : {
1621 2476 : GinScanKey key = so->keys + i;
1622 :
1623 2476 : memset(key->entryRes, GIN_FALSE, key->nentries);
1624 : }
1625 1448 : memset(pos->hasMatchKey, false, so->nkeys);
1626 :
1627 : /*
1628 : * Outer loop iterates over multiple pending-list pages when a single heap
1629 : * row has entries spanning those pages.
1630 : */
1631 : for (;;)
1632 0 : {
1633 : Datum datum[BLCKSZ / sizeof(IndexTupleData)];
1634 : GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
1635 : bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
1636 :
1637 : Assert(pos->lastOffset > pos->firstOffset);
1638 1448 : memset(datumExtracted + pos->firstOffset - 1, 0,
1639 1448 : sizeof(bool) * (pos->lastOffset - pos->firstOffset));
1640 :
1641 1448 : page = BufferGetPage(pos->pendingBuffer);
1642 :
1643 3924 : for (i = 0; i < so->nkeys; i++)
1644 : {
1645 2476 : GinScanKey key = so->keys + i;
1646 :
1647 4776 : for (j = 0; j < key->nentries; j++)
1648 : {
1649 2300 : GinScanEntry entry = key->scanEntry[j];
1650 2300 : OffsetNumber StopLow = pos->firstOffset,
1651 2300 : StopHigh = pos->lastOffset,
1652 : StopMiddle;
1653 :
1654 : /* If already matched on earlier page, do no extra work */
1655 2300 : if (key->entryRes[j])
1656 0 : continue;
1657 :
1658 : /*
1659 : * Interesting tuples are from pos->firstOffset to
1660 : * pos->lastOffset and they are ordered by (attnum, Datum) as
1661 : * it's done in entry tree. So we can use binary search to
1662 : * avoid linear scanning.
1663 : */
1664 5108 : while (StopLow < StopHigh)
1665 : {
1666 : int res;
1667 :
1668 4010 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1669 :
1670 4010 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
1671 :
1672 4010 : attrnum = gintuple_get_attrnum(&so->ginstate, itup);
1673 :
1674 4010 : if (key->attnum < attrnum)
1675 : {
1676 798 : StopHigh = StopMiddle;
1677 798 : continue;
1678 : }
1679 3212 : if (key->attnum > attrnum)
1680 : {
1681 660 : StopLow = StopMiddle + 1;
1682 660 : continue;
1683 : }
1684 :
1685 2552 : if (datumExtracted[StopMiddle - 1] == false)
1686 : {
1687 4952 : datum[StopMiddle - 1] =
1688 2476 : gintuple_get_key(&so->ginstate, itup,
1689 2476 : &category[StopMiddle - 1]);
1690 2476 : datumExtracted[StopMiddle - 1] = true;
1691 : }
1692 :
1693 2552 : if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
1694 : {
1695 : /* special behavior depending on searchMode */
1696 1084 : if (entry->searchMode == GIN_SEARCH_MODE_ALL)
1697 : {
1698 : /* match anything except NULL_ITEM */
1699 1084 : if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
1700 378 : res = -1;
1701 : else
1702 706 : res = 0;
1703 : }
1704 : else
1705 : {
1706 : /* match everything */
1707 0 : res = 0;
1708 : }
1709 : }
1710 : else
1711 : {
1712 1468 : res = ginCompareEntries(&so->ginstate,
1713 1468 : entry->attnum,
1714 : entry->queryKey,
1715 1468 : entry->queryCategory,
1716 1468 : datum[StopMiddle - 1],
1717 1468 : category[StopMiddle - 1]);
1718 : }
1719 :
1720 2552 : if (res == 0)
1721 : {
1722 : /*
1723 : * Found exact match (there can be only one, except in
1724 : * EMPTY_QUERY mode).
1725 : *
1726 : * If doing partial match, scan forward from here to
1727 : * end of page to check for matches.
1728 : *
1729 : * See comment above about tuple's ordering.
1730 : */
1731 1202 : if (entry->isPartialMatch)
1732 0 : key->entryRes[j] =
1733 0 : matchPartialInPendingList(&so->ginstate,
1734 : page,
1735 : StopMiddle,
1736 0 : pos->lastOffset,
1737 : entry,
1738 : datum,
1739 : category,
1740 : datumExtracted);
1741 : else
1742 1202 : key->entryRes[j] = true;
1743 :
1744 : /* done with binary search */
1745 1202 : break;
1746 : }
1747 1350 : else if (res < 0)
1748 1314 : StopHigh = StopMiddle;
1749 : else
1750 36 : StopLow = StopMiddle + 1;
1751 : }
1752 :
1753 2300 : if (StopLow >= StopHigh && entry->isPartialMatch)
1754 : {
1755 : /*
1756 : * No exact match on this page. If doing partial match,
1757 : * scan from the first tuple greater than target value to
1758 : * end of page. Note that since we don't remember whether
1759 : * the comparePartialFn told us to stop early on a
1760 : * previous page, we will uselessly apply comparePartialFn
1761 : * to the first tuple on each subsequent page.
1762 : */
1763 0 : key->entryRes[j] =
1764 0 : matchPartialInPendingList(&so->ginstate,
1765 : page,
1766 : StopHigh,
1767 0 : pos->lastOffset,
1768 : entry,
1769 : datum,
1770 : category,
1771 : datumExtracted);
1772 : }
1773 :
1774 2300 : pos->hasMatchKey[i] |= key->entryRes[j];
1775 : }
1776 : }
1777 :
1778 : /* Advance firstOffset over the scanned tuples */
1779 1448 : pos->firstOffset = pos->lastOffset;
1780 :
1781 1448 : if (GinPageHasFullRow(page))
1782 : {
1783 : /*
1784 : * We have examined all pending entries for the current heap row.
1785 : * Break out of loop over pages.
1786 : */
1787 1448 : break;
1788 : }
1789 : else
1790 : {
1791 : /*
1792 : * Advance to next page of pending entries for the current heap
1793 : * row. Complain if there isn't one.
1794 : */
1795 0 : ItemPointerData item = pos->item;
1796 :
1797 0 : if (scanGetCandidate(scan, pos) == false ||
1798 0 : !ItemPointerEquals(&pos->item, &item))
1799 0 : elog(ERROR, "could not find additional pending pages for same heap tuple");
1800 : }
1801 : }
1802 :
1803 : /*
1804 : * All scan keys except excludeOnly require at least one entry to match.
1805 : * excludeOnly keys are an exception, because their implied
1806 : * GIN_CAT_EMPTY_QUERY scanEntry always matches. So return "true" if all
1807 : * non-excludeOnly scan keys have at least one match.
1808 : */
1809 2538 : for (i = 0; i < so->nkeys; i++)
1810 : {
1811 1984 : if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
1812 894 : return false;
1813 : }
1814 :
1815 554 : return true;
1816 : }
1817 :
1818 : /*
1819 : * Collect all matched rows from pending list into bitmap.
1820 : */
1821 : static void
1822 1586 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
1823 : {
1824 1586 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1825 : MemoryContext oldCtx;
1826 : bool recheck,
1827 : match;
1828 : int i;
1829 : pendingPosition pos;
1830 1586 : Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
1831 : Page page;
1832 : BlockNumber blkno;
1833 :
1834 1586 : *ntids = 0;
1835 :
1836 : /*
1837 : * Acquire predicate lock on the metapage, to conflict with any fastupdate
1838 : * insertions.
1839 : */
1840 1586 : PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
1841 :
1842 1586 : LockBuffer(metabuffer, GIN_SHARE);
1843 1586 : page = BufferGetPage(metabuffer);
1844 1586 : blkno = GinPageGetMeta(page)->head;
1845 :
1846 : /*
1847 : * fetch head of list before unlocking metapage. head page must be pinned
1848 : * to prevent deletion by vacuum process
1849 : */
1850 1586 : if (blkno == InvalidBlockNumber)
1851 : {
1852 : /* No pending list, so proceed with normal scan */
1853 1420 : UnlockReleaseBuffer(metabuffer);
1854 1420 : return;
1855 : }
1856 :
1857 166 : pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
1858 166 : LockBuffer(pos.pendingBuffer, GIN_SHARE);
1859 166 : pos.firstOffset = FirstOffsetNumber;
1860 166 : UnlockReleaseBuffer(metabuffer);
1861 166 : pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
1862 :
1863 : /*
1864 : * loop for each heap row. scanGetCandidate returns full row or row's
1865 : * tuples from first page.
1866 : */
1867 1614 : while (scanGetCandidate(scan, &pos))
1868 : {
1869 : /*
1870 : * Check entries in tuple and set up entryRes array.
1871 : *
1872 : * If pending tuples belonging to the current heap row are spread
1873 : * across several pages, collectMatchesForHeapRow will read all of
1874 : * those pages.
1875 : */
1876 1448 : if (!collectMatchesForHeapRow(scan, &pos))
1877 894 : continue;
1878 :
1879 : /*
1880 : * Matching of entries of one row is finished, so check row using
1881 : * consistent functions.
1882 : */
1883 554 : oldCtx = MemoryContextSwitchTo(so->tempCtx);
1884 554 : recheck = false;
1885 554 : match = true;
1886 :
1887 1404 : for (i = 0; i < so->nkeys; i++)
1888 : {
1889 850 : GinScanKey key = so->keys + i;
1890 :
1891 850 : if (!key->boolConsistentFn(key))
1892 : {
1893 0 : match = false;
1894 0 : break;
1895 : }
1896 850 : recheck |= key->recheckCurItem;
1897 : }
1898 :
1899 554 : MemoryContextSwitchTo(oldCtx);
1900 554 : MemoryContextReset(so->tempCtx);
1901 :
1902 554 : if (match)
1903 : {
1904 554 : tbm_add_tuples(tbm, &pos.item, 1, recheck);
1905 554 : (*ntids)++;
1906 : }
1907 : }
1908 :
1909 166 : pfree(pos.hasMatchKey);
1910 : }
1911 :
1912 :
1913 : #define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes )
1914 :
1915 : int64
1916 1598 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
1917 : {
1918 1598 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1919 : int64 ntids;
1920 : ItemPointerData iptr;
1921 : bool recheck;
1922 :
1923 : /*
1924 : * Set up the scan keys, and check for unsatisfiable query.
1925 : */
1926 1598 : ginFreeScanKeys(so); /* there should be no keys yet, but just to be
1927 : * sure */
1928 1598 : ginNewScanKey(scan);
1929 :
1930 1598 : if (GinIsVoidRes(scan))
1931 12 : return 0;
1932 :
1933 1586 : ntids = 0;
1934 :
1935 : /*
1936 : * First, scan the pending list and collect any matching entries into the
1937 : * bitmap. After we scan a pending item, some other backend could post it
1938 : * into the main index, and so we might visit it a second time during the
1939 : * main scan. This is okay because we'll just re-set the same bit in the
1940 : * bitmap. (The possibility of duplicate visits is a major reason why GIN
1941 : * can't support the amgettuple API, however.) Note that it would not do
1942 : * to scan the main index before the pending list, since concurrent
1943 : * cleanup could then make us miss entries entirely.
1944 : */
1945 1586 : scanPendingInsert(scan, tbm, &ntids);
1946 :
1947 : /*
1948 : * Now scan the main index.
1949 : */
1950 1586 : startScan(scan);
1951 :
1952 1586 : ItemPointerSetMin(&iptr);
1953 :
1954 : for (;;)
1955 : {
1956 979792 : CHECK_FOR_INTERRUPTS();
1957 :
1958 979792 : if (!scanGetItem(scan, iptr, &iptr, &recheck))
1959 1586 : break;
1960 :
1961 978206 : if (ItemPointerIsLossyPage(&iptr))
1962 0 : tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
1963 : else
1964 978206 : tbm_add_tuples(tbm, &iptr, 1, recheck);
1965 978206 : ntids++;
1966 : }
1967 :
1968 1586 : return ntids;
1969 : }
|