Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginget.c
4 : * fetch tuples from a GIN scan.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginget.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/relscan.h"
19 : #include "common/pg_prng.h"
20 : #include "miscadmin.h"
21 : #include "storage/predicate.h"
22 : #include "utils/datum.h"
23 : #include "utils/memutils.h"
24 : #include "utils/rel.h"
25 :
26 : /* GUC parameter */
27 : int GinFuzzySearchLimit = 0;
28 :
29 : typedef struct pendingPosition
30 : {
31 : Buffer pendingBuffer;
32 : OffsetNumber firstOffset;
33 : OffsetNumber lastOffset;
34 : ItemPointerData item;
35 : bool *hasMatchKey;
36 : } pendingPosition;
37 :
38 :
39 : /*
40 : * Goes to the next page if current offset is outside of bounds
41 : */
42 : static bool
43 407608 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
44 : {
45 407608 : Page page = BufferGetPage(stack->buffer);
46 :
47 407608 : if (stack->off > PageGetMaxOffsetNumber(page))
48 : {
49 : /*
50 : * We scanned the whole page, so we should take right page
51 : */
52 3790 : if (GinPageRightMost(page))
53 366 : return false; /* no more pages */
54 :
55 3424 : stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
56 3424 : stack->blkno = BufferGetBlockNumber(stack->buffer);
57 3424 : stack->off = FirstOffsetNumber;
58 3424 : PredicateLockPage(btree->index, stack->blkno, snapshot);
59 : }
60 :
61 407242 : return true;
62 : }
63 :
64 : /*
65 : * Scan all pages of a posting tree and save all its heap ItemPointers
66 : * in scanEntry->matchBitmap
67 : */
68 : static void
69 12 : scanPostingTree(Relation index, GinScanEntry scanEntry,
70 : BlockNumber rootPostingTree, Snapshot snapshot)
71 : {
72 : GinBtreeData btree;
73 : GinBtreeStack *stack;
74 : Buffer buffer;
75 : Page page;
76 :
77 : /* Descend to the leftmost leaf page */
78 12 : stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
79 12 : buffer = stack->buffer;
80 :
81 12 : IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
82 :
83 12 : freeGinBtreeStack(stack);
84 :
85 : /*
86 : * Loop iterates through all leaf pages of posting tree
87 : */
88 : for (;;)
89 : {
90 30 : page = BufferGetPage(buffer);
91 30 : if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
92 : {
93 30 : int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
94 :
95 30 : scanEntry->predictNumberResult += n;
96 : }
97 :
98 30 : if (GinPageRightMost(page))
99 12 : break; /* no more pages */
100 :
101 18 : buffer = ginStepRight(buffer, index, GIN_SHARE);
102 : }
103 :
104 12 : UnlockReleaseBuffer(buffer);
105 12 : }
106 :
107 : /*
108 : * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
109 : * match the search entry. This supports three different match modes:
110 : *
111 : * 1. Partial-match support: scan from current point until the
112 : * comparePartialFn says we're done.
113 : * 2. SEARCH_MODE_ALL: scan from current point (which should be first
114 : * key for the current attnum) until we hit null items or end of attnum
115 : * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
116 : * key for the current attnum) until we hit end of attnum
117 : *
118 : * Returns true if done, false if it's necessary to restart scan from scratch
119 : */
120 : static bool
121 534 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
122 : GinScanEntry scanEntry, Snapshot snapshot)
123 : {
124 : OffsetNumber attnum;
125 : Form_pg_attribute attr;
126 :
127 : /* Initialize empty bitmap result */
128 534 : scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
129 :
130 : /* Null query cannot partial-match anything */
131 534 : if (scanEntry->isPartialMatch &&
132 252 : scanEntry->queryCategory != GIN_CAT_NORM_KEY)
133 0 : return true;
134 :
135 : /* Locate tupdesc entry for key column (for attbyval/attlen data) */
136 534 : attnum = scanEntry->attnum;
137 534 : attr = TupleDescAttr(btree->ginstate->origTupdesc, attnum - 1);
138 :
139 : /*
140 : * Predicate lock entry leaf page, following pages will be locked by
141 : * moveRightIfItNeeded()
142 : */
143 534 : PredicateLockPage(btree->index, stack->buffer, snapshot);
144 :
145 : for (;;)
146 407062 : {
147 : Page page;
148 : IndexTuple itup;
149 : Datum idatum;
150 : GinNullCategory icategory;
151 :
152 : /*
153 : * stack->off points to the interested entry, buffer is already locked
154 : */
155 407596 : if (moveRightIfItNeeded(btree, stack, snapshot) == false)
156 534 : return true;
157 :
158 407230 : page = BufferGetPage(stack->buffer);
159 407230 : TestForOldSnapshot(snapshot, btree->index, page);
160 407230 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
161 :
162 : /*
163 : * If tuple stores another attribute then stop scan
164 : */
165 407230 : if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
166 0 : return true;
167 :
168 : /* Safe to fetch attribute value */
169 407230 : idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
170 :
171 : /*
172 : * Check for appropriate scan stop conditions
173 : */
174 407230 : if (scanEntry->isPartialMatch)
175 : {
176 : int32 cmp;
177 :
178 : /*
179 : * In partial match, stop scan at any null (including
180 : * placeholders); partial matches never match nulls
181 : */
182 1324 : if (icategory != GIN_CAT_NORM_KEY)
183 10 : return true;
184 :
185 : /*----------
186 : * Check of partial match.
187 : * case cmp == 0 => match
188 : * case cmp > 0 => not match and finish scan
189 : * case cmp < 0 => not match and continue scan
190 : *----------
191 : */
192 1314 : cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
193 1314 : btree->ginstate->supportCollation[attnum - 1],
194 : scanEntry->queryKey,
195 : idatum,
196 1314 : UInt16GetDatum(scanEntry->strategy),
197 1314 : PointerGetDatum(scanEntry->extra_data)));
198 :
199 1314 : if (cmp > 0)
200 130 : return true;
201 1184 : else if (cmp < 0)
202 : {
203 60 : stack->off++;
204 60 : continue;
205 : }
206 : }
207 405906 : else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
208 : {
209 : /*
210 : * In ALL mode, we are not interested in null items, so we can
211 : * stop if we get to a null-item placeholder (which will be the
212 : * last entry for a given attnum). We do want to include NULL_KEY
213 : * and EMPTY_ITEM entries, though.
214 : */
215 405906 : if (icategory == GIN_CAT_NULL_ITEM)
216 28 : return true;
217 : }
218 :
219 : /*
220 : * OK, we want to return the TIDs listed in this entry.
221 : */
222 407002 : if (GinIsPostingTree(itup))
223 : {
224 12 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
225 :
226 : /*
227 : * We should unlock current page (but not unpin) during tree scan
228 : * to prevent deadlock with vacuum processes.
229 : *
230 : * We save current entry value (idatum) to be able to re-find our
231 : * tuple after re-locking
232 : */
233 12 : if (icategory == GIN_CAT_NORM_KEY)
234 12 : idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
235 :
236 12 : LockBuffer(stack->buffer, GIN_UNLOCK);
237 :
238 : /*
239 : * Acquire predicate lock on the posting tree. We already hold a
240 : * lock on the entry page, but insertions to the posting tree
241 : * don't check for conflicts on that level.
242 : */
243 12 : PredicateLockPage(btree->index, rootPostingTree, snapshot);
244 :
245 : /* Collect all the TIDs in this entry's posting tree */
246 12 : scanPostingTree(btree->index, scanEntry, rootPostingTree,
247 : snapshot);
248 :
249 : /*
250 : * We lock again the entry page and while it was unlocked insert
251 : * might have occurred, so we need to re-find our position.
252 : */
253 12 : LockBuffer(stack->buffer, GIN_SHARE);
254 12 : page = BufferGetPage(stack->buffer);
255 12 : if (!GinPageIsLeaf(page))
256 : {
257 : /*
258 : * Root page becomes non-leaf while we unlock it. We will
259 : * start again, this situation doesn't occur often - root can
260 : * became a non-leaf only once per life of index.
261 : */
262 0 : return false;
263 : }
264 :
265 : /* Search forward to re-find idatum */
266 : for (;;)
267 : {
268 12 : if (moveRightIfItNeeded(btree, stack, snapshot) == false)
269 0 : ereport(ERROR,
270 : (errcode(ERRCODE_INTERNAL_ERROR),
271 : errmsg("failed to re-find tuple within index \"%s\"",
272 : RelationGetRelationName(btree->index))));
273 :
274 12 : page = BufferGetPage(stack->buffer);
275 12 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
276 :
277 12 : if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
278 : {
279 : Datum newDatum;
280 : GinNullCategory newCategory;
281 :
282 12 : newDatum = gintuple_get_key(btree->ginstate, itup,
283 : &newCategory);
284 :
285 12 : if (ginCompareEntries(btree->ginstate, attnum,
286 : newDatum, newCategory,
287 : idatum, icategory) == 0)
288 12 : break; /* Found! */
289 : }
290 :
291 0 : stack->off++;
292 : }
293 :
294 12 : if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
295 0 : pfree(DatumGetPointer(idatum));
296 : }
297 : else
298 : {
299 : ItemPointer ipd;
300 : int nipd;
301 :
302 406990 : ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
303 406990 : tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
304 406990 : scanEntry->predictNumberResult += GinGetNPosting(itup);
305 406990 : pfree(ipd);
306 : }
307 :
308 : /*
309 : * Done with this entry, go to the next
310 : */
311 407002 : stack->off++;
312 : }
313 : }
314 :
315 : /*
316 : * Start* functions setup beginning state of searches: finds correct buffer and pins it.
317 : */
318 : static void
319 3692 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
320 : {
321 : GinBtreeData btreeEntry;
322 : GinBtreeStack *stackEntry;
323 : Page page;
324 : bool needUnlock;
325 :
326 3692 : restartScanEntry:
327 3692 : entry->buffer = InvalidBuffer;
328 3692 : ItemPointerSetMin(&entry->curItem);
329 3692 : entry->offset = InvalidOffsetNumber;
330 3692 : if (entry->list)
331 0 : pfree(entry->list);
332 3692 : entry->list = NULL;
333 3692 : entry->nlist = 0;
334 3692 : entry->matchBitmap = NULL;
335 3692 : entry->matchResult = NULL;
336 3692 : entry->reduceResult = false;
337 3692 : entry->predictNumberResult = 0;
338 :
339 : /*
340 : * we should find entry, and begin scan of posting tree or just store
341 : * posting list in memory
342 : */
343 3692 : ginPrepareEntryScan(&btreeEntry, entry->attnum,
344 3692 : entry->queryKey, entry->queryCategory,
345 : ginstate);
346 3692 : stackEntry = ginFindLeafPage(&btreeEntry, true, false, snapshot);
347 3692 : page = BufferGetPage(stackEntry->buffer);
348 :
349 : /* ginFindLeafPage() will have already checked snapshot age. */
350 3692 : needUnlock = true;
351 :
352 3692 : entry->isFinished = true;
353 :
354 3692 : if (entry->isPartialMatch ||
355 3440 : entry->queryCategory == GIN_CAT_EMPTY_QUERY)
356 : {
357 : /*
358 : * btreeEntry.findItem locates the first item >= given search key.
359 : * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
360 : * because of the way the GIN_CAT_EMPTY_QUERY category code is
361 : * assigned.) We scan forward from there and collect all TIDs needed
362 : * for the entry type.
363 : */
364 534 : btreeEntry.findItem(&btreeEntry, stackEntry);
365 534 : if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
366 534 : == false)
367 : {
368 : /*
369 : * GIN tree was seriously restructured, so we will cleanup all
370 : * found data and rescan. See comments near 'return false' in
371 : * collectMatchBitmap()
372 : */
373 0 : if (entry->matchBitmap)
374 : {
375 0 : if (entry->matchIterator)
376 0 : tbm_end_iterate(entry->matchIterator);
377 0 : entry->matchIterator = NULL;
378 0 : tbm_free(entry->matchBitmap);
379 0 : entry->matchBitmap = NULL;
380 : }
381 0 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
382 0 : freeGinBtreeStack(stackEntry);
383 0 : goto restartScanEntry;
384 : }
385 :
386 534 : if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
387 : {
388 420 : entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
389 420 : entry->isFinished = false;
390 : }
391 : }
392 3158 : else if (btreeEntry.findItem(&btreeEntry, stackEntry))
393 : {
394 2178 : IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
395 :
396 2178 : if (GinIsPostingTree(itup))
397 : {
398 64 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
399 : GinBtreeStack *stack;
400 : Page entrypage;
401 : ItemPointerData minItem;
402 :
403 : /*
404 : * This is an equality scan, so lock the root of the posting tree.
405 : * It represents a lock on the exact key value, and covers all the
406 : * items in the posting tree.
407 : */
408 64 : PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
409 :
410 : /*
411 : * We should unlock entry page before touching posting tree to
412 : * prevent deadlocks with vacuum processes. Because entry is never
413 : * deleted from page and posting tree is never reduced to the
414 : * posting list, we can unlock page after getting BlockNumber of
415 : * root of posting tree.
416 : */
417 64 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
418 64 : needUnlock = false;
419 :
420 64 : stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
421 : rootPostingTree, snapshot);
422 64 : entry->buffer = stack->buffer;
423 :
424 : /*
425 : * We keep buffer pinned because we need to prevent deletion of
426 : * page during scan. See GIN's vacuum implementation. RefCount is
427 : * increased to keep buffer pinned after freeGinBtreeStack() call.
428 : */
429 64 : IncrBufferRefCount(entry->buffer);
430 :
431 64 : entrypage = BufferGetPage(entry->buffer);
432 :
433 : /*
434 : * Load the first page into memory.
435 : */
436 64 : ItemPointerSetMin(&minItem);
437 64 : entry->list = GinDataLeafPageGetItems(entrypage, &entry->nlist, minItem);
438 :
439 64 : entry->predictNumberResult = stack->predictNumber * entry->nlist;
440 :
441 64 : LockBuffer(entry->buffer, GIN_UNLOCK);
442 64 : freeGinBtreeStack(stack);
443 64 : entry->isFinished = false;
444 : }
445 : else
446 : {
447 : /*
448 : * Lock the entry leaf page. This is more coarse-grained than
449 : * necessary, because it will conflict with any insertions that
450 : * land on the same leaf page, not only the exact key we searched
451 : * for. But locking an individual tuple would require updating
452 : * that lock whenever it moves because of insertions or vacuums,
453 : * which seems too complicated.
454 : */
455 2114 : PredicateLockPage(ginstate->index,
456 : BufferGetBlockNumber(stackEntry->buffer),
457 : snapshot);
458 2114 : if (GinGetNPosting(itup) > 0)
459 : {
460 2108 : entry->list = ginReadTuple(ginstate, entry->attnum, itup,
461 : &entry->nlist);
462 2108 : entry->predictNumberResult = entry->nlist;
463 :
464 2108 : entry->isFinished = false;
465 : }
466 : }
467 : }
468 : else
469 : {
470 : /*
471 : * No entry found. Predicate lock the leaf page, to lock the place
472 : * where the entry would've been, had there been one.
473 : */
474 980 : PredicateLockPage(ginstate->index,
475 : BufferGetBlockNumber(stackEntry->buffer), snapshot);
476 : }
477 :
478 3692 : if (needUnlock)
479 3628 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
480 3692 : freeGinBtreeStack(stackEntry);
481 3692 : }
482 :
483 : /*
484 : * Comparison function for scan entry indexes. Sorts by predictNumberResult,
485 : * least frequent items first.
486 : */
487 : static int
488 3474 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
489 : {
490 3474 : const GinScanKey key = (const GinScanKey) arg;
491 3474 : int i1 = *(const int *) a1;
492 3474 : int i2 = *(const int *) a2;
493 3474 : uint32 n1 = key->scanEntry[i1]->predictNumberResult;
494 3474 : uint32 n2 = key->scanEntry[i2]->predictNumberResult;
495 :
496 3474 : if (n1 < n2)
497 634 : return -1;
498 2840 : else if (n1 == n2)
499 1116 : return 0;
500 : else
501 1724 : return 1;
502 : }
503 :
504 : static void
505 1704 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
506 : {
507 1704 : MemoryContext oldCtx = CurrentMemoryContext;
508 : int i;
509 : int j;
510 : int *entryIndexes;
511 :
512 1704 : ItemPointerSetMin(&key->curItem);
513 1704 : key->curItemMatches = false;
514 1704 : key->recheckCurItem = false;
515 1704 : key->isFinished = false;
516 :
517 : /*
518 : * Divide the entries into two distinct sets: required and additional.
519 : * Additional entries are not enough for a match alone, without any items
520 : * from the required set, but are needed by the consistent function to
521 : * decide if an item matches. When scanning, we can skip over items from
522 : * additional entries that have no corresponding matches in any of the
523 : * required entries. That speeds up queries like "frequent & rare"
524 : * considerably, if the frequent term can be put in the additional set.
525 : *
526 : * There can be many legal ways to divide them entries into these two
527 : * sets. A conservative division is to just put everything in the required
528 : * set, but the more you can put in the additional set, the more you can
529 : * skip during the scan. To maximize skipping, we try to put as many
530 : * frequent items as possible into additional, and less frequent ones into
531 : * required. To do that, sort the entries by frequency
532 : * (predictNumberResult), and put entries into the required set in that
533 : * order, until the consistent function says that none of the remaining
534 : * entries can form a match, without any items from the required set. The
535 : * rest go to the additional set.
536 : *
537 : * Exclude-only scan keys are known to have no required entries.
538 : */
539 1704 : if (key->excludeOnly)
540 : {
541 38 : MemoryContextSwitchTo(so->keyCtx);
542 :
543 38 : key->nrequired = 0;
544 38 : key->nadditional = key->nentries;
545 38 : key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
546 44 : for (i = 0; i < key->nadditional; i++)
547 6 : key->additionalEntries[i] = key->scanEntry[i];
548 : }
549 1666 : else if (key->nentries > 1)
550 : {
551 550 : MemoryContextSwitchTo(so->tempCtx);
552 :
553 550 : entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
554 3120 : for (i = 0; i < key->nentries; i++)
555 2570 : entryIndexes[i] = i;
556 550 : qsort_arg(entryIndexes, key->nentries, sizeof(int),
557 : entryIndexByFrequencyCmp, key);
558 :
559 1834 : for (i = 0; i < key->nentries - 1; i++)
560 : {
561 : /* Pass all entries <= i as FALSE, and the rest as MAYBE */
562 69558 : for (j = 0; j <= i; j++)
563 67856 : key->entryRes[entryIndexes[j]] = GIN_FALSE;
564 70366 : for (j = i + 1; j < key->nentries; j++)
565 68664 : key->entryRes[entryIndexes[j]] = GIN_MAYBE;
566 :
567 1702 : if (key->triConsistentFn(key) == GIN_FALSE)
568 418 : break;
569 : }
570 : /* i is now the last required entry. */
571 :
572 550 : MemoryContextSwitchTo(so->keyCtx);
573 :
574 550 : key->nrequired = i + 1;
575 550 : key->nadditional = key->nentries - key->nrequired;
576 550 : key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
577 550 : key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
578 :
579 550 : j = 0;
580 2384 : for (i = 0; i < key->nrequired; i++)
581 1834 : key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
582 1286 : for (i = 0; i < key->nadditional; i++)
583 736 : key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
584 :
585 : /* clean up after consistentFn calls (also frees entryIndexes) */
586 550 : MemoryContextReset(so->tempCtx);
587 : }
588 : else
589 : {
590 1116 : MemoryContextSwitchTo(so->keyCtx);
591 :
592 1116 : key->nrequired = 1;
593 1116 : key->nadditional = 0;
594 1116 : key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
595 1116 : key->requiredEntries[0] = key->scanEntry[0];
596 : }
597 1704 : MemoryContextSwitchTo(oldCtx);
598 1704 : }
599 :
600 : static void
601 1576 : startScan(IndexScanDesc scan)
602 : {
603 1576 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
604 1576 : GinState *ginstate = &so->ginstate;
605 : uint32 i;
606 :
607 5268 : for (i = 0; i < so->totalentries; i++)
608 3692 : startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
609 :
610 1576 : if (GinFuzzySearchLimit > 0)
611 : {
612 : /*
613 : * If all of keys more than threshold we will try to reduce result, we
614 : * hope (and only hope, for intersection operation of array our
615 : * supposition isn't true), that total result will not more than
616 : * minimal predictNumberResult.
617 : */
618 6 : bool reduce = true;
619 :
620 12 : for (i = 0; i < so->totalentries; i++)
621 : {
622 6 : if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
623 : {
624 0 : reduce = false;
625 0 : break;
626 : }
627 : }
628 6 : if (reduce)
629 : {
630 12 : for (i = 0; i < so->totalentries; i++)
631 : {
632 6 : so->entries[i]->predictNumberResult /= so->totalentries;
633 6 : so->entries[i]->reduceResult = true;
634 : }
635 : }
636 : }
637 :
638 : /*
639 : * Now that we have the estimates for the entry frequencies, finish
640 : * initializing the scan keys.
641 : */
642 3280 : for (i = 0; i < so->nkeys; i++)
643 1704 : startScanKey(ginstate, so, so->keys + i);
644 1576 : }
645 :
646 : /*
647 : * Load the next batch of item pointers from a posting tree.
648 : *
649 : * Note that we copy the page into GinScanEntry->list array and unlock it, but
650 : * keep it pinned to prevent interference with vacuum.
651 : */
652 : static void
653 100 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
654 : ItemPointerData advancePast, Snapshot snapshot)
655 : {
656 : Page page;
657 : int i;
658 : bool stepright;
659 :
660 100 : if (!BufferIsValid(entry->buffer))
661 : {
662 0 : entry->isFinished = true;
663 0 : return;
664 : }
665 :
666 : /*
667 : * We have two strategies for finding the correct page: step right from
668 : * the current page, or descend the tree again from the root. If
669 : * advancePast equals the current item, the next matching item should be
670 : * on the next page, so we step right. Otherwise, descend from root.
671 : */
672 100 : if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
673 : {
674 94 : stepright = true;
675 94 : LockBuffer(entry->buffer, GIN_SHARE);
676 : }
677 : else
678 : {
679 : GinBtreeStack *stack;
680 :
681 6 : ReleaseBuffer(entry->buffer);
682 :
683 : /*
684 : * Set the search key, and find the correct leaf page.
685 : */
686 6 : if (ItemPointerIsLossyPage(&advancePast))
687 : {
688 0 : ItemPointerSet(&entry->btree.itemptr,
689 0 : GinItemPointerGetBlockNumber(&advancePast) + 1,
690 : FirstOffsetNumber);
691 : }
692 : else
693 : {
694 6 : ItemPointerSet(&entry->btree.itemptr,
695 : GinItemPointerGetBlockNumber(&advancePast),
696 6 : OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
697 : }
698 6 : entry->btree.fullScan = false;
699 6 : stack = ginFindLeafPage(&entry->btree, true, false, snapshot);
700 :
701 : /* we don't need the stack, just the buffer. */
702 6 : entry->buffer = stack->buffer;
703 6 : IncrBufferRefCount(entry->buffer);
704 6 : freeGinBtreeStack(stack);
705 6 : stepright = false;
706 : }
707 :
708 100 : elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
709 : GinItemPointerGetBlockNumber(&advancePast),
710 : GinItemPointerGetOffsetNumber(&advancePast),
711 : !stepright);
712 :
713 100 : page = BufferGetPage(entry->buffer);
714 : for (;;)
715 : {
716 106 : entry->offset = InvalidOffsetNumber;
717 106 : if (entry->list)
718 : {
719 94 : pfree(entry->list);
720 94 : entry->list = NULL;
721 94 : entry->nlist = 0;
722 : }
723 :
724 106 : if (stepright)
725 : {
726 : /*
727 : * We've processed all the entries on this page. If it was the
728 : * last page in the tree, we're done.
729 : */
730 100 : if (GinPageRightMost(page))
731 : {
732 6 : UnlockReleaseBuffer(entry->buffer);
733 6 : entry->buffer = InvalidBuffer;
734 6 : entry->isFinished = true;
735 6 : return;
736 : }
737 :
738 : /*
739 : * Step to next page, following the right link. then find the
740 : * first ItemPointer greater than advancePast.
741 : */
742 94 : entry->buffer = ginStepRight(entry->buffer,
743 : ginstate->index,
744 : GIN_SHARE);
745 94 : page = BufferGetPage(entry->buffer);
746 : }
747 100 : stepright = true;
748 :
749 100 : if (GinPageGetOpaque(page)->flags & GIN_DELETED)
750 0 : continue; /* page was deleted by concurrent vacuum */
751 :
752 : /*
753 : * The first item > advancePast might not be on this page, but
754 : * somewhere to the right, if the page was split, or a non-match from
755 : * another key in the query allowed us to skip some items from this
756 : * entry. Keep following the right-links until we re-find the correct
757 : * page.
758 : */
759 136 : if (!GinPageRightMost(page) &&
760 36 : ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
761 : {
762 : /*
763 : * the item we're looking is > the right bound of the page, so it
764 : * can't be on this page.
765 : */
766 0 : continue;
767 : }
768 :
769 100 : entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
770 :
771 1924 : for (i = 0; i < entry->nlist; i++)
772 : {
773 1918 : if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
774 : {
775 94 : entry->offset = i;
776 :
777 94 : if (GinPageRightMost(page))
778 : {
779 : /* after processing the copied items, we're done. */
780 58 : UnlockReleaseBuffer(entry->buffer);
781 58 : entry->buffer = InvalidBuffer;
782 : }
783 : else
784 36 : LockBuffer(entry->buffer, GIN_UNLOCK);
785 94 : return;
786 : }
787 : }
788 : }
789 : }
790 :
791 : #define gin_rand() pg_prng_double(&pg_global_prng_state)
792 : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
793 :
794 : /*
795 : * Sets entry->curItem to next heap item pointer > advancePast, for one entry
796 : * of one scan key, or sets entry->isFinished to true if there are no more.
797 : *
798 : * Item pointers are returned in ascending order.
799 : *
800 : * Note: this can return a "lossy page" item pointer, indicating that the
801 : * entry potentially matches all items on that heap page. However, it is
802 : * not allowed to return both a lossy page pointer and exact (regular)
803 : * item pointers for the same page. (Doing so would break the key-combination
804 : * logic in keyGetItem and scanGetItem; see comment in scanGetItem.) In the
805 : * current implementation this is guaranteed by the behavior of tidbitmaps.
806 : */
807 : static void
808 1083352 : entryGetItem(GinState *ginstate, GinScanEntry entry,
809 : ItemPointerData advancePast, Snapshot snapshot)
810 : {
811 : Assert(!entry->isFinished);
812 :
813 : Assert(!ItemPointerIsValid(&entry->curItem) ||
814 : ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
815 :
816 1083352 : if (entry->matchBitmap)
817 : {
818 : /* A bitmap result */
819 268040 : BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
820 268040 : OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
821 :
822 : for (;;)
823 : {
824 : /*
825 : * If we've exhausted all items on this block, move to next block
826 : * in the bitmap.
827 : */
828 272866 : while (entry->matchResult == NULL ||
829 272446 : (entry->matchResult->ntuples >= 0 &&
830 272446 : entry->offset >= entry->matchResult->ntuples) ||
831 535240 : entry->matchResult->blockno < advancePastBlk ||
832 267620 : (ItemPointerIsLossyPage(&advancePast) &&
833 0 : entry->matchResult->blockno == advancePastBlk))
834 : {
835 5246 : entry->matchResult = tbm_iterate(entry->matchIterator);
836 :
837 5246 : if (entry->matchResult == NULL)
838 : {
839 420 : ItemPointerSetInvalid(&entry->curItem);
840 420 : tbm_end_iterate(entry->matchIterator);
841 420 : entry->matchIterator = NULL;
842 420 : entry->isFinished = true;
843 420 : break;
844 : }
845 :
846 : /*
847 : * Reset counter to the beginning of entry->matchResult. Note:
848 : * entry->offset is still greater than matchResult->ntuples if
849 : * matchResult is lossy. So, on next call we will get next
850 : * result from TIDBitmap.
851 : */
852 4826 : entry->offset = 0;
853 : }
854 268040 : if (entry->isFinished)
855 420 : break;
856 :
857 : /*
858 : * We're now on the first page after advancePast which has any
859 : * items on it. If it's a lossy result, return that.
860 : */
861 267620 : if (entry->matchResult->ntuples < 0)
862 : {
863 0 : ItemPointerSetLossyPage(&entry->curItem,
864 : entry->matchResult->blockno);
865 :
866 : /*
867 : * We might as well fall out of the loop; we could not
868 : * estimate number of results on this page to support correct
869 : * reducing of result even if it's enabled.
870 : */
871 0 : break;
872 : }
873 :
874 : /*
875 : * Not a lossy page. Skip over any offsets <= advancePast, and
876 : * return that.
877 : */
878 267620 : if (entry->matchResult->blockno == advancePastBlk)
879 : {
880 : /*
881 : * First, do a quick check against the last offset on the
882 : * page. If that's > advancePast, so are all the other
883 : * offsets, so just go back to the top to get the next page.
884 : */
885 263214 : if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
886 : {
887 0 : entry->offset = entry->matchResult->ntuples;
888 0 : continue;
889 : }
890 :
891 : /* Otherwise scan to find the first item > advancePast */
892 263214 : while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
893 0 : entry->offset++;
894 : }
895 :
896 267620 : ItemPointerSet(&entry->curItem,
897 267620 : entry->matchResult->blockno,
898 267620 : entry->matchResult->offsets[entry->offset]);
899 267620 : entry->offset++;
900 :
901 : /* Done unless we need to reduce the result */
902 267620 : if (!entry->reduceResult || !dropItem(entry))
903 : break;
904 : }
905 : }
906 815312 : else if (!BufferIsValid(entry->buffer))
907 : {
908 : /*
909 : * A posting list from an entry tuple, or the last page of a posting
910 : * tree.
911 : */
912 : for (;;)
913 : {
914 389548 : if (entry->offset >= entry->nlist)
915 : {
916 1620 : ItemPointerSetInvalid(&entry->curItem);
917 1620 : entry->isFinished = true;
918 1620 : break;
919 : }
920 :
921 387928 : entry->curItem = entry->list[entry->offset++];
922 :
923 : /* If we're not past advancePast, keep scanning */
924 387928 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
925 66634 : continue;
926 :
927 : /* Done unless we need to reduce the result */
928 321294 : if (!entry->reduceResult || !dropItem(entry))
929 : break;
930 : }
931 : }
932 : else
933 : {
934 : /* A posting tree */
935 : for (;;)
936 : {
937 : /* If we've processed the current batch, load more items */
938 678998 : while (entry->offset >= entry->nlist)
939 : {
940 100 : entryLoadMoreItems(ginstate, entry, advancePast, snapshot);
941 :
942 100 : if (entry->isFinished)
943 : {
944 6 : ItemPointerSetInvalid(&entry->curItem);
945 6 : return;
946 : }
947 : }
948 :
949 678898 : entry->curItem = entry->list[entry->offset++];
950 :
951 : /* If we're not past advancePast, keep scanning */
952 678898 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
953 47106 : continue;
954 :
955 : /* Done unless we need to reduce the result */
956 631792 : if (!entry->reduceResult || !dropItem(entry))
957 : break;
958 :
959 : /*
960 : * Advance advancePast (so that entryLoadMoreItems will load the
961 : * right data), and keep scanning
962 : */
963 118970 : advancePast = entry->curItem;
964 : }
965 : }
966 : }
967 :
968 : /*
969 : * Identify the "current" item among the input entry streams for this scan key
970 : * that is greater than advancePast, and test whether it passes the scan key
971 : * qual condition.
972 : *
973 : * The current item is the smallest curItem among the inputs. key->curItem
974 : * is set to that value. key->curItemMatches is set to indicate whether that
975 : * TID passes the consistentFn test. If so, key->recheckCurItem is set true
976 : * iff recheck is needed for this item pointer (including the case where the
977 : * item pointer is a lossy page pointer).
978 : *
979 : * If all entry streams are exhausted, sets key->isFinished to true.
980 : *
981 : * Item pointers must be returned in ascending order.
982 : *
983 : * Note: this can return a "lossy page" item pointer, indicating that the
984 : * key potentially matches all items on that heap page. However, it is
985 : * not allowed to return both a lossy page pointer and exact (regular)
986 : * item pointers for the same page. (Doing so would break the key-combination
987 : * logic in scanGetItem.)
988 : */
989 : static void
990 1002248 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
991 : ItemPointerData advancePast, Snapshot snapshot)
992 : {
993 : ItemPointerData minItem;
994 : ItemPointerData curPageLossy;
995 : uint32 i;
996 : bool haveLossyEntry;
997 : GinScanEntry entry;
998 : GinTernaryValue res;
999 : MemoryContext oldCtx;
1000 : bool allFinished;
1001 :
1002 : Assert(!key->isFinished);
1003 :
1004 : /*
1005 : * We might have already tested this item; if so, no need to repeat work.
1006 : * (Note: the ">" case can happen, if advancePast is exact but we
1007 : * previously had to set curItem to a lossy-page pointer.)
1008 : */
1009 1002248 : if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
1010 1598 : return;
1011 :
1012 : /*
1013 : * Find the minimum item > advancePast among the active entry streams.
1014 : *
1015 : * Note: a lossy-page entry is encoded by a ItemPointer with max value for
1016 : * offset (0xffff), so that it will sort after any exact entries for the
1017 : * same page. So we'll prefer to return exact pointers not lossy
1018 : * pointers, which is good.
1019 : */
1020 1002226 : ItemPointerSetMax(&minItem);
1021 1002226 : allFinished = true;
1022 2715396 : for (i = 0; i < key->nrequired; i++)
1023 : {
1024 1713170 : entry = key->requiredEntries[i];
1025 :
1026 1713170 : if (entry->isFinished)
1027 525688 : continue;
1028 :
1029 : /*
1030 : * Advance this stream if necessary.
1031 : *
1032 : * In particular, since entry->curItem was initialized with
1033 : * ItemPointerSetMin, this ensures we fetch the first item for each
1034 : * entry on the first call.
1035 : */
1036 1187482 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1037 : {
1038 1029596 : entryGetItem(ginstate, entry, advancePast, snapshot);
1039 1029596 : if (entry->isFinished)
1040 1868 : continue;
1041 : }
1042 :
1043 1185614 : allFinished = false;
1044 1185614 : if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1045 1097412 : minItem = entry->curItem;
1046 : }
1047 :
1048 1002226 : if (allFinished && !key->excludeOnly)
1049 : {
1050 : /* all entries are finished */
1051 1576 : key->isFinished = true;
1052 1576 : return;
1053 : }
1054 :
1055 1000650 : if (!key->excludeOnly)
1056 : {
1057 : /*
1058 : * For a normal scan key, we now know there are no matches < minItem.
1059 : *
1060 : * If minItem is lossy, it means that there were no exact items on the
1061 : * page among requiredEntries, because lossy pointers sort after exact
1062 : * items. However, there might be exact items for the same page among
1063 : * additionalEntries, so we mustn't advance past them.
1064 : */
1065 996174 : if (ItemPointerIsLossyPage(&minItem))
1066 : {
1067 0 : if (GinItemPointerGetBlockNumber(&advancePast) <
1068 0 : GinItemPointerGetBlockNumber(&minItem))
1069 : {
1070 0 : ItemPointerSet(&advancePast,
1071 : GinItemPointerGetBlockNumber(&minItem),
1072 : InvalidOffsetNumber);
1073 : }
1074 : }
1075 : else
1076 : {
1077 : Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
1078 996174 : ItemPointerSet(&advancePast,
1079 : GinItemPointerGetBlockNumber(&minItem),
1080 996174 : OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
1081 : }
1082 : }
1083 : else
1084 : {
1085 : /*
1086 : * excludeOnly scan keys don't have any entries that are necessarily
1087 : * present in matching items. So, we consider the item just after
1088 : * advancePast.
1089 : */
1090 : Assert(key->nrequired == 0);
1091 4476 : ItemPointerSet(&minItem,
1092 : GinItemPointerGetBlockNumber(&advancePast),
1093 4476 : OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
1094 : }
1095 :
1096 : /*
1097 : * We might not have loaded all the entry streams for this TID yet. We
1098 : * could call the consistent function, passing MAYBE for those entries, to
1099 : * see if it can decide if this TID matches based on the information we
1100 : * have. But if the consistent-function is expensive, and cannot in fact
1101 : * decide with partial information, that could be a big loss. So, load all
1102 : * the additional entries, before calling the consistent function.
1103 : */
1104 1067310 : for (i = 0; i < key->nadditional; i++)
1105 : {
1106 66660 : entry = key->additionalEntries[i];
1107 :
1108 66660 : if (entry->isFinished)
1109 412 : continue;
1110 :
1111 66248 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1112 : {
1113 53756 : entryGetItem(ginstate, entry, advancePast, snapshot);
1114 53756 : if (entry->isFinished)
1115 178 : continue;
1116 : }
1117 :
1118 : /*
1119 : * Normally, none of the items in additionalEntries can have a curItem
1120 : * larger than minItem. But if minItem is a lossy page, then there
1121 : * might be exact items on the same page among additionalEntries.
1122 : */
1123 66070 : if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1124 : {
1125 : Assert(ItemPointerIsLossyPage(&minItem));
1126 0 : minItem = entry->curItem;
1127 : }
1128 : }
1129 :
1130 : /*
1131 : * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
1132 : * and perform consistentFn test.
1133 : *
1134 : * Lossy-page entries pose a problem, since we don't know the correct
1135 : * entryRes state to pass to the consistentFn, and we also don't know what
1136 : * its combining logic will be (could be AND, OR, or even NOT). If the
1137 : * logic is OR then the consistentFn might succeed for all items in the
1138 : * lossy page even when none of the other entries match.
1139 : *
1140 : * Our strategy is to call the tri-state consistent function, with the
1141 : * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
1142 : * returns FALSE, none of the lossy items alone are enough for a match, so
1143 : * we don't need to return a lossy-page pointer. Otherwise, return a
1144 : * lossy-page pointer to indicate that the whole heap page must be
1145 : * checked. (On subsequent calls, we'll do nothing until minItem is past
1146 : * the page altogether, thus ensuring that we never return both regular
1147 : * and lossy pointers for the same page.)
1148 : *
1149 : * An exception is that it doesn't matter what we pass for lossy pointers
1150 : * in "hidden" entries, because the consistentFn's result can't depend on
1151 : * them. We could pass them as MAYBE as well, but if we're using the
1152 : * "shim" implementation of a tri-state consistent function (see
1153 : * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
1154 : * them as true.
1155 : *
1156 : * Note that only lossy-page entries pointing to the current item's page
1157 : * should trigger this processing; we might have future lossy pages in the
1158 : * entry array, but they aren't relevant yet.
1159 : */
1160 1000650 : key->curItem = minItem;
1161 1000650 : ItemPointerSetLossyPage(&curPageLossy,
1162 : GinItemPointerGetBlockNumber(&key->curItem));
1163 1000650 : haveLossyEntry = false;
1164 2777620 : for (i = 0; i < key->nentries; i++)
1165 : {
1166 1776970 : entry = key->scanEntry[i];
1167 3028654 : if (entry->isFinished == false &&
1168 1251684 : ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1169 : {
1170 0 : if (i < key->nuserentries)
1171 0 : key->entryRes[i] = GIN_MAYBE;
1172 : else
1173 0 : key->entryRes[i] = GIN_TRUE;
1174 0 : haveLossyEntry = true;
1175 : }
1176 : else
1177 1776970 : key->entryRes[i] = GIN_FALSE;
1178 : }
1179 :
1180 : /* prepare for calling consistentFn in temp context */
1181 1000650 : oldCtx = MemoryContextSwitchTo(tempCtx);
1182 :
1183 1000650 : if (haveLossyEntry)
1184 : {
1185 : /* Have lossy-page entries, so see if whole page matches */
1186 0 : res = key->triConsistentFn(key);
1187 :
1188 0 : if (res == GIN_TRUE || res == GIN_MAYBE)
1189 : {
1190 : /* Yes, so clean up ... */
1191 0 : MemoryContextSwitchTo(oldCtx);
1192 0 : MemoryContextReset(tempCtx);
1193 :
1194 : /* and return lossy pointer for whole page */
1195 0 : key->curItem = curPageLossy;
1196 0 : key->curItemMatches = true;
1197 0 : key->recheckCurItem = true;
1198 0 : return;
1199 : }
1200 : }
1201 :
1202 : /*
1203 : * At this point we know that we don't need to return a lossy whole-page
1204 : * pointer, but we might have matches for individual exact item pointers,
1205 : * possibly in combination with a lossy pointer. Pass lossy pointers as
1206 : * MAYBE to the ternary consistent function, to let it decide if this
1207 : * tuple satisfies the overall key, even though we don't know if the lossy
1208 : * entries match.
1209 : *
1210 : * Prepare entryRes array to be passed to consistentFn.
1211 : */
1212 2777620 : for (i = 0; i < key->nentries; i++)
1213 : {
1214 1776970 : entry = key->scanEntry[i];
1215 1776970 : if (entry->isFinished)
1216 525286 : key->entryRes[i] = GIN_FALSE;
1217 : #if 0
1218 :
1219 : /*
1220 : * This case can't currently happen, because we loaded all the entries
1221 : * for this item earlier.
1222 : */
1223 : else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1224 : key->entryRes[i] = GIN_MAYBE;
1225 : #endif
1226 1251684 : else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1227 0 : key->entryRes[i] = GIN_MAYBE;
1228 1251684 : else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
1229 1071030 : key->entryRes[i] = GIN_TRUE;
1230 : else
1231 180654 : key->entryRes[i] = GIN_FALSE;
1232 : }
1233 :
1234 1000650 : res = key->triConsistentFn(key);
1235 :
1236 1000650 : switch (res)
1237 : {
1238 865002 : case GIN_TRUE:
1239 865002 : key->curItemMatches = true;
1240 : /* triConsistentFn set recheckCurItem */
1241 865002 : break;
1242 :
1243 18026 : case GIN_FALSE:
1244 18026 : key->curItemMatches = false;
1245 18026 : break;
1246 :
1247 117622 : case GIN_MAYBE:
1248 117622 : key->curItemMatches = true;
1249 117622 : key->recheckCurItem = true;
1250 117622 : break;
1251 :
1252 0 : default:
1253 :
1254 : /*
1255 : * the 'default' case shouldn't happen, but if the consistent
1256 : * function returns something bogus, this is the safe result
1257 : */
1258 0 : key->curItemMatches = true;
1259 0 : key->recheckCurItem = true;
1260 0 : break;
1261 : }
1262 :
1263 : /*
1264 : * We have a tuple, and we know if it matches or not. If it's a non-match,
1265 : * we could continue to find the next matching tuple, but let's break out
1266 : * and give scanGetItem a chance to advance the other keys. They might be
1267 : * able to skip past to a much higher TID, allowing us to save work.
1268 : */
1269 :
1270 : /* clean up after consistentFn calls */
1271 1000650 : MemoryContextSwitchTo(oldCtx);
1272 1000650 : MemoryContextReset(tempCtx);
1273 : }
1274 :
1275 : /*
1276 : * Get next heap item pointer (after advancePast) from scan.
1277 : * Returns true if anything found.
1278 : * On success, *item and *recheck are set.
1279 : *
1280 : * Note: this is very nearly the same logic as in keyGetItem(), except
1281 : * that we know the keys are to be combined with AND logic, whereas in
1282 : * keyGetItem() the combination logic is known only to the consistentFn.
1283 : */
1284 : static bool
1285 979628 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
1286 : ItemPointerData *item, bool *recheck)
1287 : {
1288 979628 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1289 : uint32 i;
1290 : bool match;
1291 :
1292 : /*----------
1293 : * Advance the scan keys in lock-step, until we find an item that matches
1294 : * all the keys. If any key reports isFinished, meaning its subset of the
1295 : * entries is exhausted, we can stop. Otherwise, set *item to the next
1296 : * matching item.
1297 : *
1298 : * This logic works only if a keyGetItem stream can never contain both
1299 : * exact and lossy pointers for the same page. Else we could have a
1300 : * case like
1301 : *
1302 : * stream 1 stream 2
1303 : * ... ...
1304 : * 42/6 42/7
1305 : * 50/1 42/0xffff
1306 : * ... ...
1307 : *
1308 : * We would conclude that 42/6 is not a match and advance stream 1,
1309 : * thus never detecting the match to the lossy pointer in stream 2.
1310 : * (keyGetItem has a similar problem versus entryGetItem.)
1311 : *----------
1312 : */
1313 : do
1314 : {
1315 997712 : ItemPointerSetMin(item);
1316 997712 : match = true;
1317 1980358 : for (i = 0; i < so->nkeys && match; i++)
1318 : {
1319 1002248 : GinScanKey key = so->keys + i;
1320 :
1321 : /*
1322 : * If we're considering a lossy page, skip excludeOnly keys. They
1323 : * can't exclude the whole page anyway.
1324 : */
1325 1002248 : if (ItemPointerIsLossyPage(item) && key->excludeOnly)
1326 : {
1327 : /*
1328 : * ginNewScanKey() should never mark the first key as
1329 : * excludeOnly.
1330 : */
1331 : Assert(i > 0);
1332 0 : continue;
1333 : }
1334 :
1335 : /* Fetch the next item for this key that is > advancePast. */
1336 1002248 : keyGetItem(&so->ginstate, so->tempCtx, key, advancePast,
1337 : scan->xs_snapshot);
1338 :
1339 1002248 : if (key->isFinished)
1340 1576 : return false;
1341 :
1342 : /*
1343 : * If it's not a match, we can immediately conclude that nothing
1344 : * <= this item matches, without checking the rest of the keys.
1345 : */
1346 1000672 : if (!key->curItemMatches)
1347 : {
1348 18026 : advancePast = key->curItem;
1349 18026 : match = false;
1350 18026 : break;
1351 : }
1352 :
1353 : /*
1354 : * It's a match. We can conclude that nothing < matches, so the
1355 : * other key streams can skip to this item.
1356 : *
1357 : * Beware of lossy pointers, though; from a lossy pointer, we can
1358 : * only conclude that nothing smaller than this *block* matches.
1359 : */
1360 982646 : if (ItemPointerIsLossyPage(&key->curItem))
1361 : {
1362 0 : if (GinItemPointerGetBlockNumber(&advancePast) <
1363 0 : GinItemPointerGetBlockNumber(&key->curItem))
1364 : {
1365 0 : ItemPointerSet(&advancePast,
1366 0 : GinItemPointerGetBlockNumber(&key->curItem),
1367 : InvalidOffsetNumber);
1368 : }
1369 : }
1370 : else
1371 : {
1372 : Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
1373 982646 : ItemPointerSet(&advancePast,
1374 982646 : GinItemPointerGetBlockNumber(&key->curItem),
1375 982646 : OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
1376 : }
1377 :
1378 : /*
1379 : * If this is the first key, remember this location as a potential
1380 : * match, and proceed to check the rest of the keys.
1381 : *
1382 : * Otherwise, check if this is the same item that we checked the
1383 : * previous keys for (or a lossy pointer for the same page). If
1384 : * not, loop back to check the previous keys for this item (we
1385 : * will check this key again too, but keyGetItem returns quickly
1386 : * for that)
1387 : */
1388 982646 : if (i == 0)
1389 : {
1390 978216 : *item = key->curItem;
1391 : }
1392 : else
1393 : {
1394 8860 : if (ItemPointerIsLossyPage(&key->curItem) ||
1395 4430 : ItemPointerIsLossyPage(item))
1396 : {
1397 : Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
1398 0 : match = (GinItemPointerGetBlockNumber(&key->curItem) ==
1399 0 : GinItemPointerGetBlockNumber(item));
1400 : }
1401 : else
1402 : {
1403 : Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
1404 4430 : match = (ginCompareItemPointers(&key->curItem, item) == 0);
1405 : }
1406 : }
1407 : }
1408 996136 : } while (!match);
1409 :
1410 : Assert(!ItemPointerIsMin(item));
1411 :
1412 : /*
1413 : * Now *item contains the first ItemPointer after previous result that
1414 : * satisfied all the keys for that exact TID, or a lossy reference to the
1415 : * same page.
1416 : *
1417 : * We must return recheck = true if any of the keys are marked recheck.
1418 : */
1419 978052 : *recheck = false;
1420 1825808 : for (i = 0; i < so->nkeys; i++)
1421 : {
1422 978424 : GinScanKey key = so->keys + i;
1423 :
1424 978424 : if (key->recheckCurItem)
1425 : {
1426 130668 : *recheck = true;
1427 130668 : break;
1428 : }
1429 : }
1430 :
1431 978052 : return true;
1432 : }
1433 :
1434 :
1435 : /*
1436 : * Functions for scanning the pending list
1437 : */
1438 :
1439 :
1440 : /*
1441 : * Get ItemPointer of next heap row to be checked from pending list.
1442 : * Returns false if there are no more. On pages with several heap rows
1443 : * it returns each row separately, on page with part of heap row returns
1444 : * per page data. pos->firstOffset and pos->lastOffset are set to identify
1445 : * the range of pending-list tuples belonging to this heap row.
1446 : *
1447 : * The pendingBuffer is presumed pinned and share-locked on entry, and is
1448 : * pinned and share-locked on success exit. On failure exit it's released.
1449 : */
1450 : static bool
1451 1614 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
1452 : {
1453 : OffsetNumber maxoff;
1454 : Page page;
1455 : IndexTuple itup;
1456 :
1457 1614 : ItemPointerSetInvalid(&pos->item);
1458 : for (;;)
1459 : {
1460 1614 : page = BufferGetPage(pos->pendingBuffer);
1461 1614 : TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1462 :
1463 1614 : maxoff = PageGetMaxOffsetNumber(page);
1464 1614 : if (pos->firstOffset > maxoff)
1465 : {
1466 166 : BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
1467 :
1468 166 : if (blkno == InvalidBlockNumber)
1469 : {
1470 166 : UnlockReleaseBuffer(pos->pendingBuffer);
1471 166 : pos->pendingBuffer = InvalidBuffer;
1472 :
1473 166 : return false;
1474 : }
1475 : else
1476 : {
1477 : /*
1478 : * Here we must prevent deletion of next page by insertcleanup
1479 : * process, which may be trying to obtain exclusive lock on
1480 : * current page. So, we lock next page before releasing the
1481 : * current one
1482 : */
1483 0 : Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
1484 :
1485 0 : LockBuffer(tmpbuf, GIN_SHARE);
1486 0 : UnlockReleaseBuffer(pos->pendingBuffer);
1487 :
1488 0 : pos->pendingBuffer = tmpbuf;
1489 0 : pos->firstOffset = FirstOffsetNumber;
1490 : }
1491 : }
1492 : else
1493 : {
1494 1448 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
1495 1448 : pos->item = itup->t_tid;
1496 1448 : if (GinPageHasFullRow(page))
1497 : {
1498 : /*
1499 : * find itempointer to the next row
1500 : */
1501 3354 : for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
1502 : {
1503 3188 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
1504 3188 : if (!ItemPointerEquals(&pos->item, &itup->t_tid))
1505 1282 : break;
1506 : }
1507 : }
1508 : else
1509 : {
1510 : /*
1511 : * All itempointers are the same on this page
1512 : */
1513 0 : pos->lastOffset = maxoff + 1;
1514 : }
1515 :
1516 : /*
1517 : * Now pos->firstOffset points to the first tuple of current heap
1518 : * row, pos->lastOffset points to the first tuple of next heap row
1519 : * (or to the end of page)
1520 : */
1521 1448 : break;
1522 : }
1523 : }
1524 :
1525 1448 : return true;
1526 : }
1527 :
1528 : /*
1529 : * Scan pending-list page from current tuple (off) up till the first of:
1530 : * - match is found (then returns true)
1531 : * - no later match is possible
1532 : * - tuple's attribute number is not equal to entry's attrnum
1533 : * - reach end of page
1534 : *
1535 : * datum[]/category[]/datumExtracted[] arrays are used to cache the results
1536 : * of gintuple_get_key() on the current page.
1537 : */
1538 : static bool
1539 0 : matchPartialInPendingList(GinState *ginstate, Page page,
1540 : OffsetNumber off, OffsetNumber maxoff,
1541 : GinScanEntry entry,
1542 : Datum *datum, GinNullCategory *category,
1543 : bool *datumExtracted)
1544 : {
1545 : IndexTuple itup;
1546 : int32 cmp;
1547 :
1548 : /* Partial match to a null is not possible */
1549 0 : if (entry->queryCategory != GIN_CAT_NORM_KEY)
1550 0 : return false;
1551 :
1552 0 : while (off < maxoff)
1553 : {
1554 0 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
1555 :
1556 0 : if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
1557 0 : return false;
1558 :
1559 0 : if (datumExtracted[off - 1] == false)
1560 : {
1561 0 : datum[off - 1] = gintuple_get_key(ginstate, itup,
1562 0 : &category[off - 1]);
1563 0 : datumExtracted[off - 1] = true;
1564 : }
1565 :
1566 : /* Once we hit nulls, no further match is possible */
1567 0 : if (category[off - 1] != GIN_CAT_NORM_KEY)
1568 0 : return false;
1569 :
1570 : /*----------
1571 : * Check partial match.
1572 : * case cmp == 0 => match
1573 : * case cmp > 0 => not match and end scan (no later match possible)
1574 : * case cmp < 0 => not match and continue scan
1575 : *----------
1576 : */
1577 0 : cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
1578 0 : ginstate->supportCollation[entry->attnum - 1],
1579 : entry->queryKey,
1580 0 : datum[off - 1],
1581 0 : UInt16GetDatum(entry->strategy),
1582 0 : PointerGetDatum(entry->extra_data)));
1583 0 : if (cmp == 0)
1584 0 : return true;
1585 0 : else if (cmp > 0)
1586 0 : return false;
1587 :
1588 0 : off++;
1589 : }
1590 :
1591 0 : return false;
1592 : }
1593 :
1594 : /*
1595 : * Set up the entryRes array for each key by looking at
1596 : * every entry for current heap row in pending list.
1597 : *
1598 : * Returns true if each scan key has at least one entryRes match.
1599 : * This corresponds to the situations where the normal index search will
1600 : * try to apply the key's consistentFn. (A tuple not meeting that requirement
1601 : * cannot be returned by the normal search since no entry stream will
1602 : * source its TID.)
1603 : *
1604 : * The pendingBuffer is presumed pinned and share-locked on entry.
1605 : */
1606 : static bool
1607 1448 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
1608 : {
1609 1448 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1610 : OffsetNumber attrnum;
1611 : Page page;
1612 : IndexTuple itup;
1613 : int i,
1614 : j;
1615 :
1616 : /*
1617 : * Reset all entryRes and hasMatchKey flags
1618 : */
1619 3924 : for (i = 0; i < so->nkeys; i++)
1620 : {
1621 2476 : GinScanKey key = so->keys + i;
1622 :
1623 2476 : memset(key->entryRes, GIN_FALSE, key->nentries);
1624 : }
1625 1448 : memset(pos->hasMatchKey, false, so->nkeys);
1626 :
1627 : /*
1628 : * Outer loop iterates over multiple pending-list pages when a single heap
1629 : * row has entries spanning those pages.
1630 : */
1631 : for (;;)
1632 0 : {
1633 : Datum datum[BLCKSZ / sizeof(IndexTupleData)];
1634 : GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
1635 : bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
1636 :
1637 : Assert(pos->lastOffset > pos->firstOffset);
1638 1448 : memset(datumExtracted + pos->firstOffset - 1, 0,
1639 1448 : sizeof(bool) * (pos->lastOffset - pos->firstOffset));
1640 :
1641 1448 : page = BufferGetPage(pos->pendingBuffer);
1642 1448 : TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1643 :
1644 3924 : for (i = 0; i < so->nkeys; i++)
1645 : {
1646 2476 : GinScanKey key = so->keys + i;
1647 :
1648 4776 : for (j = 0; j < key->nentries; j++)
1649 : {
1650 2300 : GinScanEntry entry = key->scanEntry[j];
1651 2300 : OffsetNumber StopLow = pos->firstOffset,
1652 2300 : StopHigh = pos->lastOffset,
1653 : StopMiddle;
1654 :
1655 : /* If already matched on earlier page, do no extra work */
1656 2300 : if (key->entryRes[j])
1657 0 : continue;
1658 :
1659 : /*
1660 : * Interesting tuples are from pos->firstOffset to
1661 : * pos->lastOffset and they are ordered by (attnum, Datum) as
1662 : * it's done in entry tree. So we can use binary search to
1663 : * avoid linear scanning.
1664 : */
1665 5108 : while (StopLow < StopHigh)
1666 : {
1667 : int res;
1668 :
1669 4010 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1670 :
1671 4010 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
1672 :
1673 4010 : attrnum = gintuple_get_attrnum(&so->ginstate, itup);
1674 :
1675 4010 : if (key->attnum < attrnum)
1676 : {
1677 798 : StopHigh = StopMiddle;
1678 798 : continue;
1679 : }
1680 3212 : if (key->attnum > attrnum)
1681 : {
1682 660 : StopLow = StopMiddle + 1;
1683 660 : continue;
1684 : }
1685 :
1686 2552 : if (datumExtracted[StopMiddle - 1] == false)
1687 : {
1688 4952 : datum[StopMiddle - 1] =
1689 2476 : gintuple_get_key(&so->ginstate, itup,
1690 2476 : &category[StopMiddle - 1]);
1691 2476 : datumExtracted[StopMiddle - 1] = true;
1692 : }
1693 :
1694 2552 : if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
1695 : {
1696 : /* special behavior depending on searchMode */
1697 1084 : if (entry->searchMode == GIN_SEARCH_MODE_ALL)
1698 : {
1699 : /* match anything except NULL_ITEM */
1700 1084 : if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
1701 378 : res = -1;
1702 : else
1703 706 : res = 0;
1704 : }
1705 : else
1706 : {
1707 : /* match everything */
1708 0 : res = 0;
1709 : }
1710 : }
1711 : else
1712 : {
1713 1468 : res = ginCompareEntries(&so->ginstate,
1714 1468 : entry->attnum,
1715 : entry->queryKey,
1716 1468 : entry->queryCategory,
1717 1468 : datum[StopMiddle - 1],
1718 1468 : category[StopMiddle - 1]);
1719 : }
1720 :
1721 2552 : if (res == 0)
1722 : {
1723 : /*
1724 : * Found exact match (there can be only one, except in
1725 : * EMPTY_QUERY mode).
1726 : *
1727 : * If doing partial match, scan forward from here to
1728 : * end of page to check for matches.
1729 : *
1730 : * See comment above about tuple's ordering.
1731 : */
1732 1202 : if (entry->isPartialMatch)
1733 0 : key->entryRes[j] =
1734 0 : matchPartialInPendingList(&so->ginstate,
1735 : page,
1736 : StopMiddle,
1737 0 : pos->lastOffset,
1738 : entry,
1739 : datum,
1740 : category,
1741 : datumExtracted);
1742 : else
1743 1202 : key->entryRes[j] = true;
1744 :
1745 : /* done with binary search */
1746 1202 : break;
1747 : }
1748 1350 : else if (res < 0)
1749 1314 : StopHigh = StopMiddle;
1750 : else
1751 36 : StopLow = StopMiddle + 1;
1752 : }
1753 :
1754 2300 : if (StopLow >= StopHigh && entry->isPartialMatch)
1755 : {
1756 : /*
1757 : * No exact match on this page. If doing partial match,
1758 : * scan from the first tuple greater than target value to
1759 : * end of page. Note that since we don't remember whether
1760 : * the comparePartialFn told us to stop early on a
1761 : * previous page, we will uselessly apply comparePartialFn
1762 : * to the first tuple on each subsequent page.
1763 : */
1764 0 : key->entryRes[j] =
1765 0 : matchPartialInPendingList(&so->ginstate,
1766 : page,
1767 : StopHigh,
1768 0 : pos->lastOffset,
1769 : entry,
1770 : datum,
1771 : category,
1772 : datumExtracted);
1773 : }
1774 :
1775 2300 : pos->hasMatchKey[i] |= key->entryRes[j];
1776 : }
1777 : }
1778 :
1779 : /* Advance firstOffset over the scanned tuples */
1780 1448 : pos->firstOffset = pos->lastOffset;
1781 :
1782 1448 : if (GinPageHasFullRow(page))
1783 : {
1784 : /*
1785 : * We have examined all pending entries for the current heap row.
1786 : * Break out of loop over pages.
1787 : */
1788 1448 : break;
1789 : }
1790 : else
1791 : {
1792 : /*
1793 : * Advance to next page of pending entries for the current heap
1794 : * row. Complain if there isn't one.
1795 : */
1796 0 : ItemPointerData item = pos->item;
1797 :
1798 0 : if (scanGetCandidate(scan, pos) == false ||
1799 0 : !ItemPointerEquals(&pos->item, &item))
1800 0 : elog(ERROR, "could not find additional pending pages for same heap tuple");
1801 : }
1802 : }
1803 :
1804 : /*
1805 : * All scan keys except excludeOnly require at least one entry to match.
1806 : * excludeOnly keys are an exception, because their implied
1807 : * GIN_CAT_EMPTY_QUERY scanEntry always matches. So return "true" if all
1808 : * non-excludeOnly scan keys have at least one match.
1809 : */
1810 2538 : for (i = 0; i < so->nkeys; i++)
1811 : {
1812 1984 : if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
1813 894 : return false;
1814 : }
1815 :
1816 554 : return true;
1817 : }
1818 :
1819 : /*
1820 : * Collect all matched rows from pending list into bitmap.
1821 : */
1822 : static void
1823 1576 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
1824 : {
1825 1576 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1826 : MemoryContext oldCtx;
1827 : bool recheck,
1828 : match;
1829 : int i;
1830 : pendingPosition pos;
1831 1576 : Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
1832 : Page page;
1833 : BlockNumber blkno;
1834 :
1835 1576 : *ntids = 0;
1836 :
1837 : /*
1838 : * Acquire predicate lock on the metapage, to conflict with any fastupdate
1839 : * insertions.
1840 : */
1841 1576 : PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
1842 :
1843 1576 : LockBuffer(metabuffer, GIN_SHARE);
1844 1576 : page = BufferGetPage(metabuffer);
1845 1576 : TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1846 1576 : blkno = GinPageGetMeta(page)->head;
1847 :
1848 : /*
1849 : * fetch head of list before unlocking metapage. head page must be pinned
1850 : * to prevent deletion by vacuum process
1851 : */
1852 1576 : if (blkno == InvalidBlockNumber)
1853 : {
1854 : /* No pending list, so proceed with normal scan */
1855 1410 : UnlockReleaseBuffer(metabuffer);
1856 1410 : return;
1857 : }
1858 :
1859 166 : pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
1860 166 : LockBuffer(pos.pendingBuffer, GIN_SHARE);
1861 166 : pos.firstOffset = FirstOffsetNumber;
1862 166 : UnlockReleaseBuffer(metabuffer);
1863 166 : pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
1864 :
1865 : /*
1866 : * loop for each heap row. scanGetCandidate returns full row or row's
1867 : * tuples from first page.
1868 : */
1869 1614 : while (scanGetCandidate(scan, &pos))
1870 : {
1871 : /*
1872 : * Check entries in tuple and set up entryRes array.
1873 : *
1874 : * If pending tuples belonging to the current heap row are spread
1875 : * across several pages, collectMatchesForHeapRow will read all of
1876 : * those pages.
1877 : */
1878 1448 : if (!collectMatchesForHeapRow(scan, &pos))
1879 894 : continue;
1880 :
1881 : /*
1882 : * Matching of entries of one row is finished, so check row using
1883 : * consistent functions.
1884 : */
1885 554 : oldCtx = MemoryContextSwitchTo(so->tempCtx);
1886 554 : recheck = false;
1887 554 : match = true;
1888 :
1889 1404 : for (i = 0; i < so->nkeys; i++)
1890 : {
1891 850 : GinScanKey key = so->keys + i;
1892 :
1893 850 : if (!key->boolConsistentFn(key))
1894 : {
1895 0 : match = false;
1896 0 : break;
1897 : }
1898 850 : recheck |= key->recheckCurItem;
1899 : }
1900 :
1901 554 : MemoryContextSwitchTo(oldCtx);
1902 554 : MemoryContextReset(so->tempCtx);
1903 :
1904 554 : if (match)
1905 : {
1906 554 : tbm_add_tuples(tbm, &pos.item, 1, recheck);
1907 554 : (*ntids)++;
1908 : }
1909 : }
1910 :
1911 166 : pfree(pos.hasMatchKey);
1912 : }
1913 :
1914 :
1915 : #define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes )
1916 :
1917 : int64
1918 1588 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
1919 : {
1920 1588 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1921 : int64 ntids;
1922 : ItemPointerData iptr;
1923 : bool recheck;
1924 :
1925 : /*
1926 : * Set up the scan keys, and check for unsatisfiable query.
1927 : */
1928 1588 : ginFreeScanKeys(so); /* there should be no keys yet, but just to be
1929 : * sure */
1930 1588 : ginNewScanKey(scan);
1931 :
1932 1588 : if (GinIsVoidRes(scan))
1933 12 : return 0;
1934 :
1935 1576 : ntids = 0;
1936 :
1937 : /*
1938 : * First, scan the pending list and collect any matching entries into the
1939 : * bitmap. After we scan a pending item, some other backend could post it
1940 : * into the main index, and so we might visit it a second time during the
1941 : * main scan. This is okay because we'll just re-set the same bit in the
1942 : * bitmap. (The possibility of duplicate visits is a major reason why GIN
1943 : * can't support the amgettuple API, however.) Note that it would not do
1944 : * to scan the main index before the pending list, since concurrent
1945 : * cleanup could then make us miss entries entirely.
1946 : */
1947 1576 : scanPendingInsert(scan, tbm, &ntids);
1948 :
1949 : /*
1950 : * Now scan the main index.
1951 : */
1952 1576 : startScan(scan);
1953 :
1954 1576 : ItemPointerSetMin(&iptr);
1955 :
1956 : for (;;)
1957 : {
1958 979628 : CHECK_FOR_INTERRUPTS();
1959 :
1960 979628 : if (!scanGetItem(scan, iptr, &iptr, &recheck))
1961 1576 : break;
1962 :
1963 978052 : if (ItemPointerIsLossyPage(&iptr))
1964 0 : tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
1965 : else
1966 978052 : tbm_add_tuples(tbm, &iptr, 1, recheck);
1967 978052 : ntids++;
1968 : }
1969 :
1970 1576 : return ntids;
1971 : }
|