Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginget.c
4 : * fetch tuples from a GIN scan.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginget.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/relscan.h"
19 : #include "common/pg_prng.h"
20 : #include "miscadmin.h"
21 : #include "storage/predicate.h"
22 : #include "utils/datum.h"
23 : #include "utils/memutils.h"
24 : #include "utils/rel.h"
25 :
26 : /* GUC parameter */
27 : int GinFuzzySearchLimit = 0;
28 :
29 : typedef struct pendingPosition
30 : {
31 : Buffer pendingBuffer;
32 : OffsetNumber firstOffset;
33 : OffsetNumber lastOffset;
34 : ItemPointerData item;
35 : bool *hasMatchKey;
36 : } pendingPosition;
37 :
38 :
39 : /*
40 : * Goes to the next page if current offset is outside of bounds
41 : */
42 : static bool
43 407620 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
44 : {
45 407620 : Page page = BufferGetPage(stack->buffer);
46 :
47 407620 : if (stack->off > PageGetMaxOffsetNumber(page))
48 : {
49 : /*
50 : * We scanned the whole page, so we should take right page
51 : */
52 3790 : if (GinPageRightMost(page))
53 366 : return false; /* no more pages */
54 :
55 3424 : stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
56 3424 : stack->blkno = BufferGetBlockNumber(stack->buffer);
57 3424 : stack->off = FirstOffsetNumber;
58 3424 : PredicateLockPage(btree->index, stack->blkno, snapshot);
59 : }
60 :
61 407254 : return true;
62 : }
63 :
64 : /*
65 : * Scan all pages of a posting tree and save all its heap ItemPointers
66 : * in scanEntry->matchBitmap
67 : */
68 : static void
69 12 : scanPostingTree(Relation index, GinScanEntry scanEntry,
70 : BlockNumber rootPostingTree)
71 : {
72 : GinBtreeData btree;
73 : GinBtreeStack *stack;
74 : Buffer buffer;
75 : Page page;
76 :
77 : /* Descend to the leftmost leaf page */
78 12 : stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
79 12 : buffer = stack->buffer;
80 :
81 12 : IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
82 :
83 12 : freeGinBtreeStack(stack);
84 :
85 : /*
86 : * Loop iterates through all leaf pages of posting tree
87 : */
88 : for (;;)
89 : {
90 30 : page = BufferGetPage(buffer);
91 30 : if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
92 : {
93 30 : int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
94 :
95 30 : scanEntry->predictNumberResult += n;
96 : }
97 :
98 30 : if (GinPageRightMost(page))
99 12 : break; /* no more pages */
100 :
101 18 : buffer = ginStepRight(buffer, index, GIN_SHARE);
102 : }
103 :
104 12 : UnlockReleaseBuffer(buffer);
105 12 : }
106 :
107 : /*
108 : * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
109 : * match the search entry. This supports three different match modes:
110 : *
111 : * 1. Partial-match support: scan from current point until the
112 : * comparePartialFn says we're done.
113 : * 2. SEARCH_MODE_ALL: scan from current point (which should be first
114 : * key for the current attnum) until we hit null items or end of attnum
115 : * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
116 : * key for the current attnum) until we hit end of attnum
117 : *
118 : * Returns true if done, false if it's necessary to restart scan from scratch
119 : */
120 : static bool
121 534 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
122 : GinScanEntry scanEntry, Snapshot snapshot)
123 : {
124 : OffsetNumber attnum;
125 : CompactAttribute *attr;
126 :
127 : /* Initialize empty bitmap result */
128 534 : scanEntry->matchBitmap = tbm_create(work_mem * (Size) 1024, NULL);
129 :
130 : /* Null query cannot partial-match anything */
131 534 : if (scanEntry->isPartialMatch &&
132 252 : scanEntry->queryCategory != GIN_CAT_NORM_KEY)
133 0 : return true;
134 :
135 : /* Locate tupdesc entry for key column (for attbyval/attlen data) */
136 534 : attnum = scanEntry->attnum;
137 534 : attr = TupleDescCompactAttr(btree->ginstate->origTupdesc, attnum - 1);
138 :
139 : /*
140 : * Predicate lock entry leaf page, following pages will be locked by
141 : * moveRightIfItNeeded()
142 : */
143 534 : PredicateLockPage(btree->index,
144 : BufferGetBlockNumber(stack->buffer),
145 : snapshot);
146 :
147 : for (;;)
148 407074 : {
149 : Page page;
150 : IndexTuple itup;
151 : Datum idatum;
152 : GinNullCategory icategory;
153 :
154 : /*
155 : * stack->off points to the interested entry, buffer is already locked
156 : */
157 407608 : if (moveRightIfItNeeded(btree, stack, snapshot) == false)
158 534 : return true;
159 :
160 407242 : page = BufferGetPage(stack->buffer);
161 407242 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
162 :
163 : /*
164 : * If tuple stores another attribute then stop scan
165 : */
166 407242 : if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
167 0 : return true;
168 :
169 : /* Safe to fetch attribute value */
170 407242 : idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
171 :
172 : /*
173 : * Check for appropriate scan stop conditions
174 : */
175 407242 : if (scanEntry->isPartialMatch)
176 : {
177 : int32 cmp;
178 :
179 : /*
180 : * In partial match, stop scan at any null (including
181 : * placeholders); partial matches never match nulls
182 : */
183 1332 : if (icategory != GIN_CAT_NORM_KEY)
184 10 : return true;
185 :
186 : /*----------
187 : * Check of partial match.
188 : * case cmp == 0 => match
189 : * case cmp > 0 => not match and finish scan
190 : * case cmp < 0 => not match and continue scan
191 : *----------
192 : */
193 1322 : cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
194 1322 : btree->ginstate->supportCollation[attnum - 1],
195 : scanEntry->queryKey,
196 : idatum,
197 1322 : UInt16GetDatum(scanEntry->strategy),
198 1322 : PointerGetDatum(scanEntry->extra_data)));
199 :
200 1322 : if (cmp > 0)
201 130 : return true;
202 1192 : else if (cmp < 0)
203 : {
204 60 : stack->off++;
205 60 : continue;
206 : }
207 : }
208 405910 : else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
209 : {
210 : /*
211 : * In ALL mode, we are not interested in null items, so we can
212 : * stop if we get to a null-item placeholder (which will be the
213 : * last entry for a given attnum). We do want to include NULL_KEY
214 : * and EMPTY_ITEM entries, though.
215 : */
216 405910 : if (icategory == GIN_CAT_NULL_ITEM)
217 28 : return true;
218 : }
219 :
220 : /*
221 : * OK, we want to return the TIDs listed in this entry.
222 : */
223 407014 : if (GinIsPostingTree(itup))
224 : {
225 12 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
226 :
227 : /*
228 : * We should unlock current page (but not unpin) during tree scan
229 : * to prevent deadlock with vacuum processes.
230 : *
231 : * We save current entry value (idatum) to be able to re-find our
232 : * tuple after re-locking
233 : */
234 12 : if (icategory == GIN_CAT_NORM_KEY)
235 12 : idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
236 :
237 12 : LockBuffer(stack->buffer, GIN_UNLOCK);
238 :
239 : /*
240 : * Acquire predicate lock on the posting tree. We already hold a
241 : * lock on the entry page, but insertions to the posting tree
242 : * don't check for conflicts on that level.
243 : */
244 12 : PredicateLockPage(btree->index, rootPostingTree, snapshot);
245 :
246 : /* Collect all the TIDs in this entry's posting tree */
247 12 : scanPostingTree(btree->index, scanEntry, rootPostingTree);
248 :
249 : /*
250 : * We lock again the entry page and while it was unlocked insert
251 : * might have occurred, so we need to re-find our position.
252 : */
253 12 : LockBuffer(stack->buffer, GIN_SHARE);
254 12 : page = BufferGetPage(stack->buffer);
255 12 : if (!GinPageIsLeaf(page))
256 : {
257 : /*
258 : * Root page becomes non-leaf while we unlock it. We will
259 : * start again, this situation doesn't occur often - root can
260 : * became a non-leaf only once per life of index.
261 : */
262 0 : return false;
263 : }
264 :
265 : /* Search forward to re-find idatum */
266 : for (;;)
267 : {
268 12 : if (moveRightIfItNeeded(btree, stack, snapshot) == false)
269 0 : ereport(ERROR,
270 : (errcode(ERRCODE_INTERNAL_ERROR),
271 : errmsg("failed to re-find tuple within index \"%s\"",
272 : RelationGetRelationName(btree->index))));
273 :
274 12 : page = BufferGetPage(stack->buffer);
275 12 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
276 :
277 12 : if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
278 : {
279 : Datum newDatum;
280 : GinNullCategory newCategory;
281 :
282 12 : newDatum = gintuple_get_key(btree->ginstate, itup,
283 : &newCategory);
284 :
285 12 : if (ginCompareEntries(btree->ginstate, attnum,
286 : newDatum, newCategory,
287 : idatum, icategory) == 0)
288 12 : break; /* Found! */
289 : }
290 :
291 0 : stack->off++;
292 : }
293 :
294 12 : if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
295 0 : pfree(DatumGetPointer(idatum));
296 : }
297 : else
298 : {
299 : ItemPointer ipd;
300 : int nipd;
301 :
302 407002 : ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
303 407002 : tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
304 407002 : scanEntry->predictNumberResult += GinGetNPosting(itup);
305 407002 : pfree(ipd);
306 : }
307 :
308 : /*
309 : * Done with this entry, go to the next
310 : */
311 407014 : stack->off++;
312 : }
313 : }
314 :
315 : /*
316 : * Start* functions setup beginning state of searches: finds correct buffer and pins it.
317 : */
318 : static void
319 6692 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
320 : {
321 : GinBtreeData btreeEntry;
322 : GinBtreeStack *stackEntry;
323 : Page page;
324 : bool needUnlock;
325 :
326 6692 : restartScanEntry:
327 6692 : entry->buffer = InvalidBuffer;
328 6692 : ItemPointerSetMin(&entry->curItem);
329 6692 : entry->offset = InvalidOffsetNumber;
330 6692 : if (entry->list)
331 0 : pfree(entry->list);
332 6692 : entry->list = NULL;
333 6692 : entry->nlist = 0;
334 6692 : entry->matchBitmap = NULL;
335 6692 : entry->matchNtuples = -1;
336 6692 : entry->matchResult.blockno = InvalidBlockNumber;
337 6692 : entry->reduceResult = false;
338 6692 : entry->predictNumberResult = 0;
339 :
340 : /*
341 : * we should find entry, and begin scan of posting tree or just store
342 : * posting list in memory
343 : */
344 6692 : ginPrepareEntryScan(&btreeEntry, entry->attnum,
345 6692 : entry->queryKey, entry->queryCategory,
346 : ginstate);
347 6692 : stackEntry = ginFindLeafPage(&btreeEntry, true, false);
348 6692 : page = BufferGetPage(stackEntry->buffer);
349 :
350 : /* ginFindLeafPage() will have already checked snapshot age. */
351 6692 : needUnlock = true;
352 :
353 6692 : entry->isFinished = true;
354 :
355 6692 : if (entry->isPartialMatch ||
356 6440 : entry->queryCategory == GIN_CAT_EMPTY_QUERY)
357 : {
358 : /*
359 : * btreeEntry.findItem locates the first item >= given search key.
360 : * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
361 : * because of the way the GIN_CAT_EMPTY_QUERY category code is
362 : * assigned.) We scan forward from there and collect all TIDs needed
363 : * for the entry type.
364 : */
365 534 : btreeEntry.findItem(&btreeEntry, stackEntry);
366 534 : if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
367 534 : == false)
368 : {
369 : /*
370 : * GIN tree was seriously restructured, so we will cleanup all
371 : * found data and rescan. See comments near 'return false' in
372 : * collectMatchBitmap()
373 : */
374 0 : if (entry->matchBitmap)
375 : {
376 0 : if (entry->matchIterator)
377 0 : tbm_end_private_iterate(entry->matchIterator);
378 0 : entry->matchIterator = NULL;
379 0 : tbm_free(entry->matchBitmap);
380 0 : entry->matchBitmap = NULL;
381 : }
382 0 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
383 0 : freeGinBtreeStack(stackEntry);
384 0 : goto restartScanEntry;
385 : }
386 :
387 534 : if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
388 : {
389 420 : entry->matchIterator =
390 420 : tbm_begin_private_iterate(entry->matchBitmap);
391 420 : entry->isFinished = false;
392 : }
393 : }
394 6158 : else if (btreeEntry.findItem(&btreeEntry, stackEntry))
395 : {
396 4448 : IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
397 :
398 4448 : if (GinIsPostingTree(itup))
399 : {
400 64 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
401 : GinBtreeStack *stack;
402 : Page entrypage;
403 : ItemPointerData minItem;
404 :
405 : /*
406 : * This is an equality scan, so lock the root of the posting tree.
407 : * It represents a lock on the exact key value, and covers all the
408 : * items in the posting tree.
409 : */
410 64 : PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
411 :
412 : /*
413 : * We should unlock entry page before touching posting tree to
414 : * prevent deadlocks with vacuum processes. Because entry is never
415 : * deleted from page and posting tree is never reduced to the
416 : * posting list, we can unlock page after getting BlockNumber of
417 : * root of posting tree.
418 : */
419 64 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
420 64 : needUnlock = false;
421 :
422 64 : stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
423 : rootPostingTree);
424 64 : entry->buffer = stack->buffer;
425 :
426 : /*
427 : * We keep buffer pinned because we need to prevent deletion of
428 : * page during scan. See GIN's vacuum implementation. RefCount is
429 : * increased to keep buffer pinned after freeGinBtreeStack() call.
430 : */
431 64 : IncrBufferRefCount(entry->buffer);
432 :
433 64 : entrypage = BufferGetPage(entry->buffer);
434 :
435 : /*
436 : * Load the first page into memory.
437 : */
438 64 : ItemPointerSetMin(&minItem);
439 64 : entry->list = GinDataLeafPageGetItems(entrypage, &entry->nlist, minItem);
440 :
441 64 : entry->predictNumberResult = stack->predictNumber * entry->nlist;
442 :
443 64 : LockBuffer(entry->buffer, GIN_UNLOCK);
444 64 : freeGinBtreeStack(stack);
445 64 : entry->isFinished = false;
446 : }
447 : else
448 : {
449 : /*
450 : * Lock the entry leaf page. This is more coarse-grained than
451 : * necessary, because it will conflict with any insertions that
452 : * land on the same leaf page, not only the exact key we searched
453 : * for. But locking an individual tuple would require updating
454 : * that lock whenever it moves because of insertions or vacuums,
455 : * which seems too complicated.
456 : */
457 4384 : PredicateLockPage(ginstate->index,
458 : BufferGetBlockNumber(stackEntry->buffer),
459 : snapshot);
460 4384 : if (GinGetNPosting(itup) > 0)
461 : {
462 4378 : entry->list = ginReadTuple(ginstate, entry->attnum, itup,
463 : &entry->nlist);
464 4378 : entry->predictNumberResult = entry->nlist;
465 :
466 4378 : entry->isFinished = false;
467 : }
468 : }
469 : }
470 : else
471 : {
472 : /*
473 : * No entry found. Predicate lock the leaf page, to lock the place
474 : * where the entry would've been, had there been one.
475 : */
476 1710 : PredicateLockPage(ginstate->index,
477 : BufferGetBlockNumber(stackEntry->buffer), snapshot);
478 : }
479 :
480 6692 : if (needUnlock)
481 6628 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
482 6692 : freeGinBtreeStack(stackEntry);
483 6692 : }
484 :
485 : /*
486 : * Comparison function for scan entry indexes. Sorts by predictNumberResult,
487 : * least frequent items first.
488 : */
489 : static int
490 9440 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
491 : {
492 9440 : const GinScanKey key = (const GinScanKey) arg;
493 9440 : int i1 = *(const int *) a1;
494 9440 : int i2 = *(const int *) a2;
495 9440 : uint32 n1 = key->scanEntry[i1]->predictNumberResult;
496 9440 : uint32 n2 = key->scanEntry[i2]->predictNumberResult;
497 :
498 9440 : if (n1 < n2)
499 1368 : return -1;
500 8072 : else if (n1 == n2)
501 6302 : return 0;
502 : else
503 1770 : return 1;
504 : }
505 :
506 : static void
507 1714 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
508 : {
509 1714 : MemoryContext oldCtx = CurrentMemoryContext;
510 : int i;
511 : int j;
512 : int *entryIndexes;
513 :
514 1714 : ItemPointerSetMin(&key->curItem);
515 1714 : key->curItemMatches = false;
516 1714 : key->recheckCurItem = false;
517 1714 : key->isFinished = false;
518 :
519 : /*
520 : * Divide the entries into two distinct sets: required and additional.
521 : * Additional entries are not enough for a match alone, without any items
522 : * from the required set, but are needed by the consistent function to
523 : * decide if an item matches. When scanning, we can skip over items from
524 : * additional entries that have no corresponding matches in any of the
525 : * required entries. That speeds up queries like "frequent & rare"
526 : * considerably, if the frequent term can be put in the additional set.
527 : *
528 : * There can be many legal ways to divide them entries into these two
529 : * sets. A conservative division is to just put everything in the required
530 : * set, but the more you can put in the additional set, the more you can
531 : * skip during the scan. To maximize skipping, we try to put as many
532 : * frequent items as possible into additional, and less frequent ones into
533 : * required. To do that, sort the entries by frequency
534 : * (predictNumberResult), and put entries into the required set in that
535 : * order, until the consistent function says that none of the remaining
536 : * entries can form a match, without any items from the required set. The
537 : * rest go to the additional set.
538 : *
539 : * Exclude-only scan keys are known to have no required entries.
540 : */
541 1714 : if (key->excludeOnly)
542 : {
543 38 : MemoryContextSwitchTo(so->keyCtx);
544 :
545 38 : key->nrequired = 0;
546 38 : key->nadditional = key->nentries;
547 38 : key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
548 44 : for (i = 0; i < key->nadditional; i++)
549 6 : key->additionalEntries[i] = key->scanEntry[i];
550 : }
551 1676 : else if (key->nentries > 1)
552 : {
553 560 : MemoryContextSwitchTo(so->tempCtx);
554 :
555 560 : entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
556 6130 : for (i = 0; i < key->nentries; i++)
557 5570 : entryIndexes[i] = i;
558 560 : qsort_arg(entryIndexes, key->nentries, sizeof(int),
559 : entryIndexByFrequencyCmp, key);
560 :
561 5570 : for (i = 1; i < key->nentries; i++)
562 5010 : key->entryRes[entryIndexes[i]] = GIN_MAYBE;
563 4834 : for (i = 0; i < key->nentries - 1; i++)
564 : {
565 : /* Pass all entries <= i as FALSE, and the rest as MAYBE */
566 4692 : key->entryRes[entryIndexes[i]] = GIN_FALSE;
567 :
568 4692 : if (key->triConsistentFn(key) == GIN_FALSE)
569 418 : break;
570 :
571 : /* Make this loop interruptible in case there are many keys */
572 4274 : CHECK_FOR_INTERRUPTS();
573 : }
574 : /* i is now the last required entry. */
575 :
576 560 : MemoryContextSwitchTo(so->keyCtx);
577 :
578 560 : key->nrequired = i + 1;
579 560 : key->nadditional = key->nentries - key->nrequired;
580 560 : key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
581 560 : key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
582 :
583 560 : j = 0;
584 5394 : for (i = 0; i < key->nrequired; i++)
585 4834 : key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
586 1296 : for (i = 0; i < key->nadditional; i++)
587 736 : key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
588 :
589 : /* clean up after consistentFn calls (also frees entryIndexes) */
590 560 : MemoryContextReset(so->tempCtx);
591 : }
592 : else
593 : {
594 1116 : MemoryContextSwitchTo(so->keyCtx);
595 :
596 1116 : key->nrequired = 1;
597 1116 : key->nadditional = 0;
598 1116 : key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
599 1116 : key->requiredEntries[0] = key->scanEntry[0];
600 : }
601 1714 : MemoryContextSwitchTo(oldCtx);
602 1714 : }
603 :
604 : static void
605 1586 : startScan(IndexScanDesc scan)
606 : {
607 1586 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
608 1586 : GinState *ginstate = &so->ginstate;
609 : uint32 i;
610 :
611 8278 : for (i = 0; i < so->totalentries; i++)
612 6692 : startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
613 :
614 1586 : if (GinFuzzySearchLimit > 0)
615 : {
616 : /*
617 : * If all of keys more than threshold we will try to reduce result, we
618 : * hope (and only hope, for intersection operation of array our
619 : * supposition isn't true), that total result will not more than
620 : * minimal predictNumberResult.
621 : */
622 6 : bool reduce = true;
623 :
624 12 : for (i = 0; i < so->totalentries; i++)
625 : {
626 6 : if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
627 : {
628 0 : reduce = false;
629 0 : break;
630 : }
631 : }
632 6 : if (reduce)
633 : {
634 12 : for (i = 0; i < so->totalentries; i++)
635 : {
636 6 : so->entries[i]->predictNumberResult /= so->totalentries;
637 6 : so->entries[i]->reduceResult = true;
638 : }
639 : }
640 : }
641 :
642 : /*
643 : * Now that we have the estimates for the entry frequencies, finish
644 : * initializing the scan keys.
645 : */
646 3300 : for (i = 0; i < so->nkeys; i++)
647 1714 : startScanKey(ginstate, so, so->keys + i);
648 1586 : }
649 :
650 : /*
651 : * Load the next batch of item pointers from a posting tree.
652 : *
653 : * Note that we copy the page into GinScanEntry->list array and unlock it, but
654 : * keep it pinned to prevent interference with vacuum.
655 : */
656 : static void
657 100 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
658 : ItemPointerData advancePast)
659 : {
660 : Page page;
661 : int i;
662 : bool stepright;
663 :
664 100 : if (!BufferIsValid(entry->buffer))
665 : {
666 0 : entry->isFinished = true;
667 0 : return;
668 : }
669 :
670 : /*
671 : * We have two strategies for finding the correct page: step right from
672 : * the current page, or descend the tree again from the root. If
673 : * advancePast equals the current item, the next matching item should be
674 : * on the next page, so we step right. Otherwise, descend from root.
675 : */
676 100 : if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
677 : {
678 94 : stepright = true;
679 94 : LockBuffer(entry->buffer, GIN_SHARE);
680 : }
681 : else
682 : {
683 : GinBtreeStack *stack;
684 :
685 6 : ReleaseBuffer(entry->buffer);
686 :
687 : /*
688 : * Set the search key, and find the correct leaf page.
689 : */
690 6 : if (ItemPointerIsLossyPage(&advancePast))
691 : {
692 0 : ItemPointerSet(&entry->btree.itemptr,
693 0 : GinItemPointerGetBlockNumber(&advancePast) + 1,
694 : FirstOffsetNumber);
695 : }
696 : else
697 : {
698 6 : ItemPointerSet(&entry->btree.itemptr,
699 : GinItemPointerGetBlockNumber(&advancePast),
700 6 : OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
701 : }
702 6 : entry->btree.fullScan = false;
703 6 : stack = ginFindLeafPage(&entry->btree, true, false);
704 :
705 : /* we don't need the stack, just the buffer. */
706 6 : entry->buffer = stack->buffer;
707 6 : IncrBufferRefCount(entry->buffer);
708 6 : freeGinBtreeStack(stack);
709 6 : stepright = false;
710 : }
711 :
712 100 : elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
713 : GinItemPointerGetBlockNumber(&advancePast),
714 : GinItemPointerGetOffsetNumber(&advancePast),
715 : !stepright);
716 :
717 100 : page = BufferGetPage(entry->buffer);
718 : for (;;)
719 : {
720 106 : entry->offset = InvalidOffsetNumber;
721 106 : if (entry->list)
722 : {
723 94 : pfree(entry->list);
724 94 : entry->list = NULL;
725 94 : entry->nlist = 0;
726 : }
727 :
728 106 : if (stepright)
729 : {
730 : /*
731 : * We've processed all the entries on this page. If it was the
732 : * last page in the tree, we're done.
733 : */
734 100 : if (GinPageRightMost(page))
735 : {
736 6 : UnlockReleaseBuffer(entry->buffer);
737 6 : entry->buffer = InvalidBuffer;
738 6 : entry->isFinished = true;
739 6 : return;
740 : }
741 :
742 : /*
743 : * Step to next page, following the right link. then find the
744 : * first ItemPointer greater than advancePast.
745 : */
746 94 : entry->buffer = ginStepRight(entry->buffer,
747 : ginstate->index,
748 : GIN_SHARE);
749 94 : page = BufferGetPage(entry->buffer);
750 : }
751 100 : stepright = true;
752 :
753 100 : if (GinPageGetOpaque(page)->flags & GIN_DELETED)
754 0 : continue; /* page was deleted by concurrent vacuum */
755 :
756 : /*
757 : * The first item > advancePast might not be on this page, but
758 : * somewhere to the right, if the page was split, or a non-match from
759 : * another key in the query allowed us to skip some items from this
760 : * entry. Keep following the right-links until we re-find the correct
761 : * page.
762 : */
763 136 : if (!GinPageRightMost(page) &&
764 36 : ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
765 : {
766 : /*
767 : * the item we're looking is > the right bound of the page, so it
768 : * can't be on this page.
769 : */
770 0 : continue;
771 : }
772 :
773 100 : entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
774 :
775 1924 : for (i = 0; i < entry->nlist; i++)
776 : {
777 1918 : if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
778 : {
779 94 : entry->offset = i;
780 :
781 94 : if (GinPageRightMost(page))
782 : {
783 : /* after processing the copied items, we're done. */
784 58 : UnlockReleaseBuffer(entry->buffer);
785 58 : entry->buffer = InvalidBuffer;
786 : }
787 : else
788 36 : LockBuffer(entry->buffer, GIN_UNLOCK);
789 94 : return;
790 : }
791 : }
792 : }
793 : }
794 :
795 : #define gin_rand() pg_prng_double(&pg_global_prng_state)
796 : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
797 :
798 : /*
799 : * Sets entry->curItem to next heap item pointer > advancePast, for one entry
800 : * of one scan key, or sets entry->isFinished to true if there are no more.
801 : *
802 : * Item pointers are returned in ascending order.
803 : *
804 : * Note: this can return a "lossy page" item pointer, indicating that the
805 : * entry potentially matches all items on that heap page. However, it is
806 : * not allowed to return both a lossy page pointer and exact (regular)
807 : * item pointers for the same page. (Doing so would break the key-combination
808 : * logic in keyGetItem and scanGetItem; see comment in scanGetItem.) In the
809 : * current implementation this is guaranteed by the behavior of tidbitmaps.
810 : */
811 : static void
812 1088038 : entryGetItem(GinState *ginstate, GinScanEntry entry,
813 : ItemPointerData advancePast)
814 : {
815 : Assert(!entry->isFinished);
816 :
817 : Assert(!ItemPointerIsValid(&entry->curItem) ||
818 : ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
819 :
820 1088038 : if (entry->matchBitmap)
821 : {
822 : /* A bitmap result */
823 268052 : BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
824 268052 : OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
825 :
826 : for (;;)
827 : {
828 : /*
829 : * If we've exhausted all items on this block, move to next block
830 : * in the bitmap. tbm_private_iterate() sets matchResult.blockno
831 : * to InvalidBlockNumber when the bitmap is exhausted.
832 : */
833 272878 : while ((!BlockNumberIsValid(entry->matchResult.blockno)) ||
834 272458 : (!entry->matchResult.lossy &&
835 272458 : entry->offset >= entry->matchNtuples) ||
836 535264 : entry->matchResult.blockno < advancePastBlk ||
837 267632 : (ItemPointerIsLossyPage(&advancePast) &&
838 0 : entry->matchResult.blockno == advancePastBlk))
839 : {
840 5246 : if (!tbm_private_iterate(entry->matchIterator, &entry->matchResult))
841 : {
842 : Assert(!BlockNumberIsValid(entry->matchResult.blockno));
843 420 : ItemPointerSetInvalid(&entry->curItem);
844 420 : tbm_end_private_iterate(entry->matchIterator);
845 420 : entry->matchIterator = NULL;
846 420 : entry->isFinished = true;
847 420 : break;
848 : }
849 :
850 : /* Exact pages need their tuple offsets extracted. */
851 4826 : if (!entry->matchResult.lossy)
852 4826 : entry->matchNtuples = tbm_extract_page_tuple(&entry->matchResult,
853 4826 : entry->matchOffsets,
854 : TBM_MAX_TUPLES_PER_PAGE);
855 :
856 : /*
857 : * Reset counter to the beginning of entry->matchResult. Note:
858 : * entry->offset is still greater than matchResult.ntuples if
859 : * matchResult is lossy. So, on next call we will get next
860 : * result from TIDBitmap.
861 : */
862 4826 : entry->offset = 0;
863 : }
864 268052 : if (entry->isFinished)
865 420 : break;
866 :
867 : /*
868 : * We're now on the first page after advancePast which has any
869 : * items on it. If it's a lossy result, return that.
870 : */
871 267632 : if (entry->matchResult.lossy)
872 : {
873 0 : ItemPointerSetLossyPage(&entry->curItem,
874 : entry->matchResult.blockno);
875 :
876 : /*
877 : * We might as well fall out of the loop; we could not
878 : * estimate number of results on this page to support correct
879 : * reducing of result even if it's enabled.
880 : */
881 0 : break;
882 : }
883 :
884 : /*
885 : * Not a lossy page. If tuple offsets were extracted,
886 : * entry->matchNtuples must be > -1
887 : */
888 : Assert(entry->matchNtuples > -1);
889 :
890 : /* Skip over any offsets <= advancePast, and return that. */
891 267632 : if (entry->matchResult.blockno == advancePastBlk)
892 : {
893 : Assert(entry->matchNtuples > 0);
894 :
895 : /*
896 : * First, do a quick check against the last offset on the
897 : * page. If that's > advancePast, so are all the other
898 : * offsets, so just go back to the top to get the next page.
899 : */
900 263226 : if (entry->matchOffsets[entry->matchNtuples - 1] <= advancePastOff)
901 : {
902 0 : entry->offset = entry->matchNtuples;
903 0 : continue;
904 : }
905 :
906 : /* Otherwise scan to find the first item > advancePast */
907 263226 : while (entry->matchOffsets[entry->offset] <= advancePastOff)
908 0 : entry->offset++;
909 : }
910 :
911 267632 : ItemPointerSet(&entry->curItem,
912 : entry->matchResult.blockno,
913 267632 : entry->matchOffsets[entry->offset]);
914 267632 : entry->offset++;
915 :
916 : /* Done unless we need to reduce the result */
917 267632 : if (!entry->reduceResult || !dropItem(entry))
918 : break;
919 : }
920 : }
921 819986 : else if (!BufferIsValid(entry->buffer))
922 : {
923 : /*
924 : * A posting list from an entry tuple, or the last page of a posting
925 : * tree.
926 : */
927 : for (;;)
928 : {
929 394214 : if (entry->offset >= entry->nlist)
930 : {
931 3890 : ItemPointerSetInvalid(&entry->curItem);
932 3890 : entry->isFinished = true;
933 3890 : break;
934 : }
935 :
936 390324 : entry->curItem = entry->list[entry->offset++];
937 :
938 : /* If we're not past advancePast, keep scanning */
939 390324 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
940 66634 : continue;
941 :
942 : /* Done unless we need to reduce the result */
943 323690 : if (!entry->reduceResult || !dropItem(entry))
944 : break;
945 : }
946 : }
947 : else
948 : {
949 : /* A posting tree */
950 : for (;;)
951 : {
952 : /* If we've processed the current batch, load more items */
953 678894 : while (entry->offset >= entry->nlist)
954 : {
955 100 : entryLoadMoreItems(ginstate, entry, advancePast);
956 :
957 100 : if (entry->isFinished)
958 : {
959 6 : ItemPointerSetInvalid(&entry->curItem);
960 6 : return;
961 : }
962 : }
963 :
964 678794 : entry->curItem = entry->list[entry->offset++];
965 :
966 : /* If we're not past advancePast, keep scanning */
967 678794 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
968 47106 : continue;
969 :
970 : /* Done unless we need to reduce the result */
971 631688 : if (!entry->reduceResult || !dropItem(entry))
972 : break;
973 :
974 : /*
975 : * Advance advancePast (so that entryLoadMoreItems will load the
976 : * right data), and keep scanning
977 : */
978 118744 : advancePast = entry->curItem;
979 : }
980 : }
981 : }
982 :
983 : /*
984 : * Identify the "current" item among the input entry streams for this scan key
985 : * that is greater than advancePast, and test whether it passes the scan key
986 : * qual condition.
987 : *
988 : * The current item is the smallest curItem among the inputs. key->curItem
989 : * is set to that value. key->curItemMatches is set to indicate whether that
990 : * TID passes the consistentFn test. If so, key->recheckCurItem is set true
991 : * iff recheck is needed for this item pointer (including the case where the
992 : * item pointer is a lossy page pointer).
993 : *
994 : * If all entry streams are exhausted, sets key->isFinished to true.
995 : *
996 : * Item pointers must be returned in ascending order.
997 : *
998 : * Note: this can return a "lossy page" item pointer, indicating that the
999 : * key potentially matches all items on that heap page. However, it is
1000 : * not allowed to return both a lossy page pointer and exact (regular)
1001 : * item pointers for the same page. (Doing so would break the key-combination
1002 : * logic in scanGetItem.)
1003 : */
1004 : static void
1005 1002512 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
1006 : ItemPointerData advancePast)
1007 : {
1008 : ItemPointerData minItem;
1009 : ItemPointerData curPageLossy;
1010 : uint32 i;
1011 : bool haveLossyEntry;
1012 : GinScanEntry entry;
1013 : GinTernaryValue res;
1014 : MemoryContext oldCtx;
1015 : bool allFinished;
1016 :
1017 : Assert(!key->isFinished);
1018 :
1019 : /*
1020 : * We might have already tested this item; if so, no need to repeat work.
1021 : * (Note: the ">" case can happen, if advancePast is exact but we
1022 : * previously had to set curItem to a lossy-page pointer.)
1023 : */
1024 1002512 : if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
1025 1608 : return;
1026 :
1027 : /*
1028 : * Find the minimum item > advancePast among the active entry streams.
1029 : *
1030 : * Note: a lossy-page entry is encoded by a ItemPointer with max value for
1031 : * offset (0xffff), so that it will sort after any exact entries for the
1032 : * same page. So we'll prefer to return exact pointers not lossy
1033 : * pointers, which is good.
1034 : */
1035 1002490 : ItemPointerSetMax(&minItem);
1036 1002490 : allFinished = true;
1037 2757792 : for (i = 0; i < key->nrequired; i++)
1038 : {
1039 1755302 : entry = key->requiredEntries[i];
1040 :
1041 1755302 : if (entry->isFinished)
1042 560142 : continue;
1043 :
1044 : /*
1045 : * Advance this stream if necessary.
1046 : *
1047 : * In particular, since entry->curItem was initialized with
1048 : * ItemPointerSetMin, this ensures we fetch the first item for each
1049 : * entry on the first call.
1050 : */
1051 1195160 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1052 : {
1053 1034282 : entryGetItem(ginstate, entry, advancePast);
1054 1034282 : if (entry->isFinished)
1055 4138 : continue;
1056 : }
1057 :
1058 1191022 : allFinished = false;
1059 1191022 : if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1060 1097672 : minItem = entry->curItem;
1061 : }
1062 :
1063 1002490 : if (allFinished && !key->excludeOnly)
1064 : {
1065 : /* all entries are finished */
1066 1586 : key->isFinished = true;
1067 1586 : return;
1068 : }
1069 :
1070 1000904 : if (!key->excludeOnly)
1071 : {
1072 : /*
1073 : * For a normal scan key, we now know there are no matches < minItem.
1074 : *
1075 : * If minItem is lossy, it means that there were no exact items on the
1076 : * page among requiredEntries, because lossy pointers sort after exact
1077 : * items. However, there might be exact items for the same page among
1078 : * additionalEntries, so we mustn't advance past them.
1079 : */
1080 996428 : if (ItemPointerIsLossyPage(&minItem))
1081 : {
1082 0 : if (GinItemPointerGetBlockNumber(&advancePast) <
1083 0 : GinItemPointerGetBlockNumber(&minItem))
1084 : {
1085 0 : ItemPointerSet(&advancePast,
1086 : GinItemPointerGetBlockNumber(&minItem),
1087 : InvalidOffsetNumber);
1088 : }
1089 : }
1090 : else
1091 : {
1092 : Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
1093 996428 : ItemPointerSet(&advancePast,
1094 : GinItemPointerGetBlockNumber(&minItem),
1095 996428 : OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
1096 : }
1097 : }
1098 : else
1099 : {
1100 : /*
1101 : * excludeOnly scan keys don't have any entries that are necessarily
1102 : * present in matching items. So, we consider the item just after
1103 : * advancePast.
1104 : */
1105 : Assert(key->nrequired == 0);
1106 4476 : ItemPointerSet(&minItem,
1107 : GinItemPointerGetBlockNumber(&advancePast),
1108 4476 : OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
1109 : }
1110 :
1111 : /*
1112 : * We might not have loaded all the entry streams for this TID yet. We
1113 : * could call the consistent function, passing MAYBE for those entries, to
1114 : * see if it can decide if this TID matches based on the information we
1115 : * have. But if the consistent-function is expensive, and cannot in fact
1116 : * decide with partial information, that could be a big loss. So, load all
1117 : * the additional entries, before calling the consistent function.
1118 : */
1119 1067564 : for (i = 0; i < key->nadditional; i++)
1120 : {
1121 66660 : entry = key->additionalEntries[i];
1122 :
1123 66660 : if (entry->isFinished)
1124 412 : continue;
1125 :
1126 66248 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1127 : {
1128 53756 : entryGetItem(ginstate, entry, advancePast);
1129 53756 : if (entry->isFinished)
1130 178 : continue;
1131 : }
1132 :
1133 : /*
1134 : * Normally, none of the items in additionalEntries can have a curItem
1135 : * larger than minItem. But if minItem is a lossy page, then there
1136 : * might be exact items on the same page among additionalEntries.
1137 : */
1138 66070 : if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1139 : {
1140 : Assert(ItemPointerIsLossyPage(&minItem));
1141 0 : minItem = entry->curItem;
1142 : }
1143 : }
1144 :
1145 : /*
1146 : * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
1147 : * and perform consistentFn test.
1148 : *
1149 : * Lossy-page entries pose a problem, since we don't know the correct
1150 : * entryRes state to pass to the consistentFn, and we also don't know what
1151 : * its combining logic will be (could be AND, OR, or even NOT). If the
1152 : * logic is OR then the consistentFn might succeed for all items in the
1153 : * lossy page even when none of the other entries match.
1154 : *
1155 : * Our strategy is to call the tri-state consistent function, with the
1156 : * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
1157 : * returns FALSE, none of the lossy items alone are enough for a match, so
1158 : * we don't need to return a lossy-page pointer. Otherwise, return a
1159 : * lossy-page pointer to indicate that the whole heap page must be
1160 : * checked. (On subsequent calls, we'll do nothing until minItem is past
1161 : * the page altogether, thus ensuring that we never return both regular
1162 : * and lossy pointers for the same page.)
1163 : *
1164 : * An exception is that it doesn't matter what we pass for lossy pointers
1165 : * in "hidden" entries, because the consistentFn's result can't depend on
1166 : * them. We could pass them as MAYBE as well, but if we're using the
1167 : * "shim" implementation of a tri-state consistent function (see
1168 : * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
1169 : * them as true.
1170 : *
1171 : * Note that only lossy-page entries pointing to the current item's page
1172 : * should trigger this processing; we might have future lossy pages in the
1173 : * entry array, but they aren't relevant yet.
1174 : */
1175 1000904 : key->curItem = minItem;
1176 1000904 : ItemPointerSetLossyPage(&curPageLossy,
1177 : GinItemPointerGetBlockNumber(&key->curItem));
1178 1000904 : haveLossyEntry = false;
1179 2817006 : for (i = 0; i < key->nentries; i++)
1180 : {
1181 1816102 : entry = key->scanEntry[i];
1182 3073194 : if (entry->isFinished == false &&
1183 1257092 : ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1184 : {
1185 0 : if (i < key->nuserentries)
1186 0 : key->entryRes[i] = GIN_MAYBE;
1187 : else
1188 0 : key->entryRes[i] = GIN_TRUE;
1189 0 : haveLossyEntry = true;
1190 : }
1191 : else
1192 1816102 : key->entryRes[i] = GIN_FALSE;
1193 : }
1194 :
1195 : /* prepare for calling consistentFn in temp context */
1196 1000904 : oldCtx = MemoryContextSwitchTo(tempCtx);
1197 :
1198 1000904 : if (haveLossyEntry)
1199 : {
1200 : /* Have lossy-page entries, so see if whole page matches */
1201 0 : res = key->triConsistentFn(key);
1202 :
1203 0 : if (res == GIN_TRUE || res == GIN_MAYBE)
1204 : {
1205 : /* Yes, so clean up ... */
1206 0 : MemoryContextSwitchTo(oldCtx);
1207 0 : MemoryContextReset(tempCtx);
1208 :
1209 : /* and return lossy pointer for whole page */
1210 0 : key->curItem = curPageLossy;
1211 0 : key->curItemMatches = true;
1212 0 : key->recheckCurItem = true;
1213 0 : return;
1214 : }
1215 : }
1216 :
1217 : /*
1218 : * At this point we know that we don't need to return a lossy whole-page
1219 : * pointer, but we might have matches for individual exact item pointers,
1220 : * possibly in combination with a lossy pointer. Pass lossy pointers as
1221 : * MAYBE to the ternary consistent function, to let it decide if this
1222 : * tuple satisfies the overall key, even though we don't know if the lossy
1223 : * entries match.
1224 : *
1225 : * Prepare entryRes array to be passed to consistentFn.
1226 : */
1227 2817006 : for (i = 0; i < key->nentries; i++)
1228 : {
1229 1816102 : entry = key->scanEntry[i];
1230 1816102 : if (entry->isFinished)
1231 559010 : key->entryRes[i] = GIN_FALSE;
1232 : #if 0
1233 :
1234 : /*
1235 : * This case can't currently happen, because we loaded all the entries
1236 : * for this item earlier.
1237 : */
1238 : else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1239 : key->entryRes[i] = GIN_MAYBE;
1240 : #endif
1241 1257092 : else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1242 0 : key->entryRes[i] = GIN_MAYBE;
1243 1257092 : else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
1244 1073446 : key->entryRes[i] = GIN_TRUE;
1245 : else
1246 183646 : key->entryRes[i] = GIN_FALSE;
1247 : }
1248 :
1249 1000904 : res = key->triConsistentFn(key);
1250 :
1251 1000904 : switch (res)
1252 : {
1253 865256 : case GIN_TRUE:
1254 865256 : key->curItemMatches = true;
1255 : /* triConsistentFn set recheckCurItem */
1256 865256 : break;
1257 :
1258 18026 : case GIN_FALSE:
1259 18026 : key->curItemMatches = false;
1260 18026 : break;
1261 :
1262 117622 : case GIN_MAYBE:
1263 117622 : key->curItemMatches = true;
1264 117622 : key->recheckCurItem = true;
1265 117622 : break;
1266 :
1267 0 : default:
1268 :
1269 : /*
1270 : * the 'default' case shouldn't happen, but if the consistent
1271 : * function returns something bogus, this is the safe result
1272 : */
1273 0 : key->curItemMatches = true;
1274 0 : key->recheckCurItem = true;
1275 0 : break;
1276 : }
1277 :
1278 : /*
1279 : * We have a tuple, and we know if it matches or not. If it's a non-match,
1280 : * we could continue to find the next matching tuple, but let's break out
1281 : * and give scanGetItem a chance to advance the other keys. They might be
1282 : * able to skip past to a much higher TID, allowing us to save work.
1283 : */
1284 :
1285 : /* clean up after consistentFn calls */
1286 1000904 : MemoryContextSwitchTo(oldCtx);
1287 1000904 : MemoryContextReset(tempCtx);
1288 : }
1289 :
1290 : /*
1291 : * Get next heap item pointer (after advancePast) from scan.
1292 : * Returns true if anything found.
1293 : * On success, *item and *recheck are set.
1294 : *
1295 : * Note: this is very nearly the same logic as in keyGetItem(), except
1296 : * that we know the keys are to be combined with AND logic, whereas in
1297 : * keyGetItem() the combination logic is known only to the consistentFn.
1298 : */
1299 : static bool
1300 979892 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
1301 : ItemPointerData *item, bool *recheck)
1302 : {
1303 979892 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1304 : uint32 i;
1305 : bool match;
1306 :
1307 : /*----------
1308 : * Advance the scan keys in lock-step, until we find an item that matches
1309 : * all the keys. If any key reports isFinished, meaning its subset of the
1310 : * entries is exhausted, we can stop. Otherwise, set *item to the next
1311 : * matching item.
1312 : *
1313 : * This logic works only if a keyGetItem stream can never contain both
1314 : * exact and lossy pointers for the same page. Else we could have a
1315 : * case like
1316 : *
1317 : * stream 1 stream 2
1318 : * ... ...
1319 : * 42/6 42/7
1320 : * 50/1 42/0xffff
1321 : * ... ...
1322 : *
1323 : * We would conclude that 42/6 is not a match and advance stream 1,
1324 : * thus never detecting the match to the lossy pointer in stream 2.
1325 : * (keyGetItem has a similar problem versus entryGetItem.)
1326 : *----------
1327 : */
1328 : do
1329 : {
1330 997976 : ItemPointerSetMin(item);
1331 997976 : match = true;
1332 1980876 : for (i = 0; i < so->nkeys && match; i++)
1333 : {
1334 1002512 : GinScanKey key = so->keys + i;
1335 :
1336 : /*
1337 : * If we're considering a lossy page, skip excludeOnly keys. They
1338 : * can't exclude the whole page anyway.
1339 : */
1340 1002512 : if (ItemPointerIsLossyPage(item) && key->excludeOnly)
1341 : {
1342 : /*
1343 : * ginNewScanKey() should never mark the first key as
1344 : * excludeOnly.
1345 : */
1346 : Assert(i > 0);
1347 0 : continue;
1348 : }
1349 :
1350 : /* Fetch the next item for this key that is > advancePast. */
1351 1002512 : keyGetItem(&so->ginstate, so->tempCtx, key, advancePast);
1352 :
1353 1002512 : if (key->isFinished)
1354 1586 : return false;
1355 :
1356 : /*
1357 : * If it's not a match, we can immediately conclude that nothing
1358 : * <= this item matches, without checking the rest of the keys.
1359 : */
1360 1000926 : if (!key->curItemMatches)
1361 : {
1362 18026 : advancePast = key->curItem;
1363 18026 : match = false;
1364 18026 : break;
1365 : }
1366 :
1367 : /*
1368 : * It's a match. We can conclude that nothing < matches, so the
1369 : * other key streams can skip to this item.
1370 : *
1371 : * Beware of lossy pointers, though; from a lossy pointer, we can
1372 : * only conclude that nothing smaller than this *block* matches.
1373 : */
1374 982900 : if (ItemPointerIsLossyPage(&key->curItem))
1375 : {
1376 0 : if (GinItemPointerGetBlockNumber(&advancePast) <
1377 0 : GinItemPointerGetBlockNumber(&key->curItem))
1378 : {
1379 0 : ItemPointerSet(&advancePast,
1380 0 : GinItemPointerGetBlockNumber(&key->curItem),
1381 : InvalidOffsetNumber);
1382 : }
1383 : }
1384 : else
1385 : {
1386 : Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
1387 982900 : ItemPointerSet(&advancePast,
1388 982900 : GinItemPointerGetBlockNumber(&key->curItem),
1389 982900 : OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
1390 : }
1391 :
1392 : /*
1393 : * If this is the first key, remember this location as a potential
1394 : * match, and proceed to check the rest of the keys.
1395 : *
1396 : * Otherwise, check if this is the same item that we checked the
1397 : * previous keys for (or a lossy pointer for the same page). If
1398 : * not, loop back to check the previous keys for this item (we
1399 : * will check this key again too, but keyGetItem returns quickly
1400 : * for that)
1401 : */
1402 982900 : if (i == 0)
1403 : {
1404 978470 : *item = key->curItem;
1405 : }
1406 : else
1407 : {
1408 8860 : if (ItemPointerIsLossyPage(&key->curItem) ||
1409 4430 : ItemPointerIsLossyPage(item))
1410 : {
1411 : Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
1412 0 : match = (GinItemPointerGetBlockNumber(&key->curItem) ==
1413 0 : GinItemPointerGetBlockNumber(item));
1414 : }
1415 : else
1416 : {
1417 : Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
1418 4430 : match = (ginCompareItemPointers(&key->curItem, item) == 0);
1419 : }
1420 : }
1421 : }
1422 996390 : } while (!match);
1423 :
1424 : Assert(!ItemPointerIsMin(item));
1425 :
1426 : /*
1427 : * Now *item contains the first ItemPointer after previous result that
1428 : * satisfied all the keys for that exact TID, or a lossy reference to the
1429 : * same page.
1430 : *
1431 : * We must return recheck = true if any of the keys are marked recheck.
1432 : */
1433 978306 : *recheck = false;
1434 1826316 : for (i = 0; i < so->nkeys; i++)
1435 : {
1436 978678 : GinScanKey key = so->keys + i;
1437 :
1438 978678 : if (key->recheckCurItem)
1439 : {
1440 130668 : *recheck = true;
1441 130668 : break;
1442 : }
1443 : }
1444 :
1445 978306 : return true;
1446 : }
1447 :
1448 :
1449 : /*
1450 : * Functions for scanning the pending list
1451 : */
1452 :
1453 :
1454 : /*
1455 : * Get ItemPointer of next heap row to be checked from pending list.
1456 : * Returns false if there are no more. On pages with several heap rows
1457 : * it returns each row separately, on page with part of heap row returns
1458 : * per page data. pos->firstOffset and pos->lastOffset are set to identify
1459 : * the range of pending-list tuples belonging to this heap row.
1460 : *
1461 : * The pendingBuffer is presumed pinned and share-locked on entry, and is
1462 : * pinned and share-locked on success exit. On failure exit it's released.
1463 : */
1464 : static bool
1465 1614 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
1466 : {
1467 : OffsetNumber maxoff;
1468 : Page page;
1469 : IndexTuple itup;
1470 :
1471 1614 : ItemPointerSetInvalid(&pos->item);
1472 : for (;;)
1473 : {
1474 1614 : page = BufferGetPage(pos->pendingBuffer);
1475 :
1476 1614 : maxoff = PageGetMaxOffsetNumber(page);
1477 1614 : if (pos->firstOffset > maxoff)
1478 : {
1479 166 : BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
1480 :
1481 166 : if (blkno == InvalidBlockNumber)
1482 : {
1483 166 : UnlockReleaseBuffer(pos->pendingBuffer);
1484 166 : pos->pendingBuffer = InvalidBuffer;
1485 :
1486 166 : return false;
1487 : }
1488 : else
1489 : {
1490 : /*
1491 : * Here we must prevent deletion of next page by insertcleanup
1492 : * process, which may be trying to obtain exclusive lock on
1493 : * current page. So, we lock next page before releasing the
1494 : * current one
1495 : */
1496 0 : Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
1497 :
1498 0 : LockBuffer(tmpbuf, GIN_SHARE);
1499 0 : UnlockReleaseBuffer(pos->pendingBuffer);
1500 :
1501 0 : pos->pendingBuffer = tmpbuf;
1502 0 : pos->firstOffset = FirstOffsetNumber;
1503 : }
1504 : }
1505 : else
1506 : {
1507 1448 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
1508 1448 : pos->item = itup->t_tid;
1509 1448 : if (GinPageHasFullRow(page))
1510 : {
1511 : /*
1512 : * find itempointer to the next row
1513 : */
1514 3354 : for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
1515 : {
1516 3188 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
1517 3188 : if (!ItemPointerEquals(&pos->item, &itup->t_tid))
1518 1282 : break;
1519 : }
1520 : }
1521 : else
1522 : {
1523 : /*
1524 : * All itempointers are the same on this page
1525 : */
1526 0 : pos->lastOffset = maxoff + 1;
1527 : }
1528 :
1529 : /*
1530 : * Now pos->firstOffset points to the first tuple of current heap
1531 : * row, pos->lastOffset points to the first tuple of next heap row
1532 : * (or to the end of page)
1533 : */
1534 1448 : break;
1535 : }
1536 : }
1537 :
1538 1448 : return true;
1539 : }
1540 :
1541 : /*
1542 : * Scan pending-list page from current tuple (off) up till the first of:
1543 : * - match is found (then returns true)
1544 : * - no later match is possible
1545 : * - tuple's attribute number is not equal to entry's attrnum
1546 : * - reach end of page
1547 : *
1548 : * datum[]/category[]/datumExtracted[] arrays are used to cache the results
1549 : * of gintuple_get_key() on the current page.
1550 : */
1551 : static bool
1552 0 : matchPartialInPendingList(GinState *ginstate, Page page,
1553 : OffsetNumber off, OffsetNumber maxoff,
1554 : GinScanEntry entry,
1555 : Datum *datum, GinNullCategory *category,
1556 : bool *datumExtracted)
1557 : {
1558 : IndexTuple itup;
1559 : int32 cmp;
1560 :
1561 : /* Partial match to a null is not possible */
1562 0 : if (entry->queryCategory != GIN_CAT_NORM_KEY)
1563 0 : return false;
1564 :
1565 0 : while (off < maxoff)
1566 : {
1567 0 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
1568 :
1569 0 : if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
1570 0 : return false;
1571 :
1572 0 : if (datumExtracted[off - 1] == false)
1573 : {
1574 0 : datum[off - 1] = gintuple_get_key(ginstate, itup,
1575 0 : &category[off - 1]);
1576 0 : datumExtracted[off - 1] = true;
1577 : }
1578 :
1579 : /* Once we hit nulls, no further match is possible */
1580 0 : if (category[off - 1] != GIN_CAT_NORM_KEY)
1581 0 : return false;
1582 :
1583 : /*----------
1584 : * Check partial match.
1585 : * case cmp == 0 => match
1586 : * case cmp > 0 => not match and end scan (no later match possible)
1587 : * case cmp < 0 => not match and continue scan
1588 : *----------
1589 : */
1590 0 : cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
1591 0 : ginstate->supportCollation[entry->attnum - 1],
1592 : entry->queryKey,
1593 0 : datum[off - 1],
1594 0 : UInt16GetDatum(entry->strategy),
1595 0 : PointerGetDatum(entry->extra_data)));
1596 0 : if (cmp == 0)
1597 0 : return true;
1598 0 : else if (cmp > 0)
1599 0 : return false;
1600 :
1601 0 : off++;
1602 : }
1603 :
1604 0 : return false;
1605 : }
1606 :
1607 : /*
1608 : * Set up the entryRes array for each key by looking at
1609 : * every entry for current heap row in pending list.
1610 : *
1611 : * Returns true if each scan key has at least one entryRes match.
1612 : * This corresponds to the situations where the normal index search will
1613 : * try to apply the key's consistentFn. (A tuple not meeting that requirement
1614 : * cannot be returned by the normal search since no entry stream will
1615 : * source its TID.)
1616 : *
1617 : * The pendingBuffer is presumed pinned and share-locked on entry.
1618 : */
1619 : static bool
1620 1448 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
1621 : {
1622 1448 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1623 : OffsetNumber attrnum;
1624 : Page page;
1625 : IndexTuple itup;
1626 : int i,
1627 : j;
1628 :
1629 : /*
1630 : * Reset all entryRes and hasMatchKey flags
1631 : */
1632 3924 : for (i = 0; i < so->nkeys; i++)
1633 : {
1634 2476 : GinScanKey key = so->keys + i;
1635 :
1636 2476 : memset(key->entryRes, GIN_FALSE, key->nentries);
1637 : }
1638 1448 : memset(pos->hasMatchKey, false, so->nkeys);
1639 :
1640 : /*
1641 : * Outer loop iterates over multiple pending-list pages when a single heap
1642 : * row has entries spanning those pages.
1643 : */
1644 : for (;;)
1645 0 : {
1646 : Datum datum[BLCKSZ / sizeof(IndexTupleData)];
1647 : GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
1648 : bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
1649 :
1650 : Assert(pos->lastOffset > pos->firstOffset);
1651 1448 : memset(datumExtracted + pos->firstOffset - 1, 0,
1652 1448 : sizeof(bool) * (pos->lastOffset - pos->firstOffset));
1653 :
1654 1448 : page = BufferGetPage(pos->pendingBuffer);
1655 :
1656 3924 : for (i = 0; i < so->nkeys; i++)
1657 : {
1658 2476 : GinScanKey key = so->keys + i;
1659 :
1660 4776 : for (j = 0; j < key->nentries; j++)
1661 : {
1662 2300 : GinScanEntry entry = key->scanEntry[j];
1663 2300 : OffsetNumber StopLow = pos->firstOffset,
1664 2300 : StopHigh = pos->lastOffset,
1665 : StopMiddle;
1666 :
1667 : /* If already matched on earlier page, do no extra work */
1668 2300 : if (key->entryRes[j])
1669 0 : continue;
1670 :
1671 : /*
1672 : * Interesting tuples are from pos->firstOffset to
1673 : * pos->lastOffset and they are ordered by (attnum, Datum) as
1674 : * it's done in entry tree. So we can use binary search to
1675 : * avoid linear scanning.
1676 : */
1677 5108 : while (StopLow < StopHigh)
1678 : {
1679 : int res;
1680 :
1681 4010 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1682 :
1683 4010 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
1684 :
1685 4010 : attrnum = gintuple_get_attrnum(&so->ginstate, itup);
1686 :
1687 4010 : if (key->attnum < attrnum)
1688 : {
1689 798 : StopHigh = StopMiddle;
1690 798 : continue;
1691 : }
1692 3212 : if (key->attnum > attrnum)
1693 : {
1694 660 : StopLow = StopMiddle + 1;
1695 660 : continue;
1696 : }
1697 :
1698 2552 : if (datumExtracted[StopMiddle - 1] == false)
1699 : {
1700 4952 : datum[StopMiddle - 1] =
1701 2476 : gintuple_get_key(&so->ginstate, itup,
1702 2476 : &category[StopMiddle - 1]);
1703 2476 : datumExtracted[StopMiddle - 1] = true;
1704 : }
1705 :
1706 2552 : if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
1707 : {
1708 : /* special behavior depending on searchMode */
1709 1084 : if (entry->searchMode == GIN_SEARCH_MODE_ALL)
1710 : {
1711 : /* match anything except NULL_ITEM */
1712 1084 : if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
1713 378 : res = -1;
1714 : else
1715 706 : res = 0;
1716 : }
1717 : else
1718 : {
1719 : /* match everything */
1720 0 : res = 0;
1721 : }
1722 : }
1723 : else
1724 : {
1725 1468 : res = ginCompareEntries(&so->ginstate,
1726 1468 : entry->attnum,
1727 : entry->queryKey,
1728 1468 : entry->queryCategory,
1729 1468 : datum[StopMiddle - 1],
1730 1468 : category[StopMiddle - 1]);
1731 : }
1732 :
1733 2552 : if (res == 0)
1734 : {
1735 : /*
1736 : * Found exact match (there can be only one, except in
1737 : * EMPTY_QUERY mode).
1738 : *
1739 : * If doing partial match, scan forward from here to
1740 : * end of page to check for matches.
1741 : *
1742 : * See comment above about tuple's ordering.
1743 : */
1744 1202 : if (entry->isPartialMatch)
1745 0 : key->entryRes[j] =
1746 0 : matchPartialInPendingList(&so->ginstate,
1747 : page,
1748 : StopMiddle,
1749 0 : pos->lastOffset,
1750 : entry,
1751 : datum,
1752 : category,
1753 : datumExtracted);
1754 : else
1755 1202 : key->entryRes[j] = true;
1756 :
1757 : /* done with binary search */
1758 1202 : break;
1759 : }
1760 1350 : else if (res < 0)
1761 1314 : StopHigh = StopMiddle;
1762 : else
1763 36 : StopLow = StopMiddle + 1;
1764 : }
1765 :
1766 2300 : if (StopLow >= StopHigh && entry->isPartialMatch)
1767 : {
1768 : /*
1769 : * No exact match on this page. If doing partial match,
1770 : * scan from the first tuple greater than target value to
1771 : * end of page. Note that since we don't remember whether
1772 : * the comparePartialFn told us to stop early on a
1773 : * previous page, we will uselessly apply comparePartialFn
1774 : * to the first tuple on each subsequent page.
1775 : */
1776 0 : key->entryRes[j] =
1777 0 : matchPartialInPendingList(&so->ginstate,
1778 : page,
1779 : StopHigh,
1780 0 : pos->lastOffset,
1781 : entry,
1782 : datum,
1783 : category,
1784 : datumExtracted);
1785 : }
1786 :
1787 2300 : pos->hasMatchKey[i] |= key->entryRes[j];
1788 : }
1789 : }
1790 :
1791 : /* Advance firstOffset over the scanned tuples */
1792 1448 : pos->firstOffset = pos->lastOffset;
1793 :
1794 1448 : if (GinPageHasFullRow(page))
1795 : {
1796 : /*
1797 : * We have examined all pending entries for the current heap row.
1798 : * Break out of loop over pages.
1799 : */
1800 1448 : break;
1801 : }
1802 : else
1803 : {
1804 : /*
1805 : * Advance to next page of pending entries for the current heap
1806 : * row. Complain if there isn't one.
1807 : */
1808 0 : ItemPointerData item = pos->item;
1809 :
1810 0 : if (scanGetCandidate(scan, pos) == false ||
1811 0 : !ItemPointerEquals(&pos->item, &item))
1812 0 : elog(ERROR, "could not find additional pending pages for same heap tuple");
1813 : }
1814 : }
1815 :
1816 : /*
1817 : * All scan keys except excludeOnly require at least one entry to match.
1818 : * excludeOnly keys are an exception, because their implied
1819 : * GIN_CAT_EMPTY_QUERY scanEntry always matches. So return "true" if all
1820 : * non-excludeOnly scan keys have at least one match.
1821 : */
1822 2538 : for (i = 0; i < so->nkeys; i++)
1823 : {
1824 1984 : if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
1825 894 : return false;
1826 : }
1827 :
1828 554 : return true;
1829 : }
1830 :
1831 : /*
1832 : * Collect all matched rows from pending list into bitmap.
1833 : */
1834 : static void
1835 1586 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
1836 : {
1837 1586 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1838 : MemoryContext oldCtx;
1839 : bool recheck,
1840 : match;
1841 : int i;
1842 : pendingPosition pos;
1843 1586 : Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
1844 : Page page;
1845 : BlockNumber blkno;
1846 :
1847 1586 : *ntids = 0;
1848 :
1849 : /*
1850 : * Acquire predicate lock on the metapage, to conflict with any fastupdate
1851 : * insertions.
1852 : */
1853 1586 : PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
1854 :
1855 1586 : LockBuffer(metabuffer, GIN_SHARE);
1856 1586 : page = BufferGetPage(metabuffer);
1857 1586 : blkno = GinPageGetMeta(page)->head;
1858 :
1859 : /*
1860 : * fetch head of list before unlocking metapage. head page must be pinned
1861 : * to prevent deletion by vacuum process
1862 : */
1863 1586 : if (blkno == InvalidBlockNumber)
1864 : {
1865 : /* No pending list, so proceed with normal scan */
1866 1420 : UnlockReleaseBuffer(metabuffer);
1867 1420 : return;
1868 : }
1869 :
1870 166 : pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
1871 166 : LockBuffer(pos.pendingBuffer, GIN_SHARE);
1872 166 : pos.firstOffset = FirstOffsetNumber;
1873 166 : UnlockReleaseBuffer(metabuffer);
1874 166 : pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
1875 :
1876 : /*
1877 : * loop for each heap row. scanGetCandidate returns full row or row's
1878 : * tuples from first page.
1879 : */
1880 1614 : while (scanGetCandidate(scan, &pos))
1881 : {
1882 : /*
1883 : * Check entries in tuple and set up entryRes array.
1884 : *
1885 : * If pending tuples belonging to the current heap row are spread
1886 : * across several pages, collectMatchesForHeapRow will read all of
1887 : * those pages.
1888 : */
1889 1448 : if (!collectMatchesForHeapRow(scan, &pos))
1890 894 : continue;
1891 :
1892 : /*
1893 : * Matching of entries of one row is finished, so check row using
1894 : * consistent functions.
1895 : */
1896 554 : oldCtx = MemoryContextSwitchTo(so->tempCtx);
1897 554 : recheck = false;
1898 554 : match = true;
1899 :
1900 1404 : for (i = 0; i < so->nkeys; i++)
1901 : {
1902 850 : GinScanKey key = so->keys + i;
1903 :
1904 850 : if (!key->boolConsistentFn(key))
1905 : {
1906 0 : match = false;
1907 0 : break;
1908 : }
1909 850 : recheck |= key->recheckCurItem;
1910 : }
1911 :
1912 554 : MemoryContextSwitchTo(oldCtx);
1913 554 : MemoryContextReset(so->tempCtx);
1914 :
1915 554 : if (match)
1916 : {
1917 554 : tbm_add_tuples(tbm, &pos.item, 1, recheck);
1918 554 : (*ntids)++;
1919 : }
1920 : }
1921 :
1922 166 : pfree(pos.hasMatchKey);
1923 : }
1924 :
1925 :
1926 : #define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes )
1927 :
1928 : int64
1929 1598 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
1930 : {
1931 1598 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1932 : int64 ntids;
1933 : ItemPointerData iptr;
1934 : bool recheck;
1935 :
1936 : /*
1937 : * Set up the scan keys, and check for unsatisfiable query.
1938 : */
1939 1598 : ginFreeScanKeys(so); /* there should be no keys yet, but just to be
1940 : * sure */
1941 1598 : ginNewScanKey(scan);
1942 :
1943 1598 : if (GinIsVoidRes(scan))
1944 12 : return 0;
1945 :
1946 1586 : ntids = 0;
1947 :
1948 : /*
1949 : * First, scan the pending list and collect any matching entries into the
1950 : * bitmap. After we scan a pending item, some other backend could post it
1951 : * into the main index, and so we might visit it a second time during the
1952 : * main scan. This is okay because we'll just re-set the same bit in the
1953 : * bitmap. (The possibility of duplicate visits is a major reason why GIN
1954 : * can't support the amgettuple API, however.) Note that it would not do
1955 : * to scan the main index before the pending list, since concurrent
1956 : * cleanup could then make us miss entries entirely.
1957 : */
1958 1586 : scanPendingInsert(scan, tbm, &ntids);
1959 :
1960 : /*
1961 : * Now scan the main index.
1962 : */
1963 1586 : startScan(scan);
1964 :
1965 1586 : ItemPointerSetMin(&iptr);
1966 :
1967 : for (;;)
1968 : {
1969 979892 : CHECK_FOR_INTERRUPTS();
1970 :
1971 979892 : if (!scanGetItem(scan, iptr, &iptr, &recheck))
1972 1586 : break;
1973 :
1974 978306 : if (ItemPointerIsLossyPage(&iptr))
1975 0 : tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
1976 : else
1977 978306 : tbm_add_tuples(tbm, &iptr, 1, recheck);
1978 978306 : ntids++;
1979 : }
1980 :
1981 1586 : return ntids;
1982 : }
|