LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginget.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 515 619 83.2 %
Date: 2025-04-01 14:15:22 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ginget.c
       4             :  *    fetch tuples from a GIN scan.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/ginget.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : #include "access/relscan.h"
      19             : #include "common/pg_prng.h"
      20             : #include "miscadmin.h"
      21             : #include "storage/predicate.h"
      22             : #include "utils/datum.h"
      23             : #include "utils/memutils.h"
      24             : #include "utils/rel.h"
      25             : 
      26             : /* GUC parameter */
      27             : int         GinFuzzySearchLimit = 0;
      28             : 
      29             : typedef struct pendingPosition
      30             : {
      31             :     Buffer      pendingBuffer;
      32             :     OffsetNumber firstOffset;
      33             :     OffsetNumber lastOffset;
      34             :     ItemPointerData item;
      35             :     bool       *hasMatchKey;
      36             : } pendingPosition;
      37             : 
      38             : 
      39             : /*
      40             :  * Goes to the next page if current offset is outside of bounds
      41             :  */
      42             : static bool
      43      407620 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
      44             : {
      45      407620 :     Page        page = BufferGetPage(stack->buffer);
      46             : 
      47      407620 :     if (stack->off > PageGetMaxOffsetNumber(page))
      48             :     {
      49             :         /*
      50             :          * We scanned the whole page, so we should take right page
      51             :          */
      52        3790 :         if (GinPageRightMost(page))
      53         366 :             return false;       /* no more pages */
      54             : 
      55        3424 :         stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
      56        3424 :         stack->blkno = BufferGetBlockNumber(stack->buffer);
      57        3424 :         stack->off = FirstOffsetNumber;
      58        3424 :         PredicateLockPage(btree->index, stack->blkno, snapshot);
      59             :     }
      60             : 
      61      407254 :     return true;
      62             : }
      63             : 
      64             : /*
      65             :  * Scan all pages of a posting tree and save all its heap ItemPointers
      66             :  * in scanEntry->matchBitmap
      67             :  */
      68             : static void
      69          12 : scanPostingTree(Relation index, GinScanEntry scanEntry,
      70             :                 BlockNumber rootPostingTree)
      71             : {
      72             :     GinBtreeData btree;
      73             :     GinBtreeStack *stack;
      74             :     Buffer      buffer;
      75             :     Page        page;
      76             : 
      77             :     /* Descend to the leftmost leaf page */
      78          12 :     stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
      79          12 :     buffer = stack->buffer;
      80             : 
      81          12 :     IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
      82             : 
      83          12 :     freeGinBtreeStack(stack);
      84             : 
      85             :     /*
      86             :      * Loop iterates through all leaf pages of posting tree
      87             :      */
      88             :     for (;;)
      89             :     {
      90          30 :         page = BufferGetPage(buffer);
      91          30 :         if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
      92             :         {
      93          30 :             int         n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
      94             : 
      95          30 :             scanEntry->predictNumberResult += n;
      96             :         }
      97             : 
      98          30 :         if (GinPageRightMost(page))
      99          12 :             break;              /* no more pages */
     100             : 
     101          18 :         buffer = ginStepRight(buffer, index, GIN_SHARE);
     102             :     }
     103             : 
     104          12 :     UnlockReleaseBuffer(buffer);
     105          12 : }
     106             : 
     107             : /*
     108             :  * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
     109             :  * match the search entry.  This supports three different match modes:
     110             :  *
     111             :  * 1. Partial-match support: scan from current point until the
     112             :  *    comparePartialFn says we're done.
     113             :  * 2. SEARCH_MODE_ALL: scan from current point (which should be first
     114             :  *    key for the current attnum) until we hit null items or end of attnum
     115             :  * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
     116             :  *    key for the current attnum) until we hit end of attnum
     117             :  *
     118             :  * Returns true if done, false if it's necessary to restart scan from scratch
     119             :  */
     120             : static bool
     121         534 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
     122             :                    GinScanEntry scanEntry, Snapshot snapshot)
     123             : {
     124             :     OffsetNumber attnum;
     125             :     CompactAttribute *attr;
     126             : 
     127             :     /* Initialize empty bitmap result */
     128         534 :     scanEntry->matchBitmap = tbm_create(work_mem * (Size) 1024, NULL);
     129             : 
     130             :     /* Null query cannot partial-match anything */
     131         534 :     if (scanEntry->isPartialMatch &&
     132         252 :         scanEntry->queryCategory != GIN_CAT_NORM_KEY)
     133           0 :         return true;
     134             : 
     135             :     /* Locate tupdesc entry for key column (for attbyval/attlen data) */
     136         534 :     attnum = scanEntry->attnum;
     137         534 :     attr = TupleDescCompactAttr(btree->ginstate->origTupdesc, attnum - 1);
     138             : 
     139             :     /*
     140             :      * Predicate lock entry leaf page, following pages will be locked by
     141             :      * moveRightIfItNeeded()
     142             :      */
     143         534 :     PredicateLockPage(btree->index,
     144             :                       BufferGetBlockNumber(stack->buffer),
     145             :                       snapshot);
     146             : 
     147             :     for (;;)
     148      407074 :     {
     149             :         Page        page;
     150             :         IndexTuple  itup;
     151             :         Datum       idatum;
     152             :         GinNullCategory icategory;
     153             : 
     154             :         /*
     155             :          * stack->off points to the interested entry, buffer is already locked
     156             :          */
     157      407608 :         if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     158         534 :             return true;
     159             : 
     160      407242 :         page = BufferGetPage(stack->buffer);
     161      407242 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     162             : 
     163             :         /*
     164             :          * If tuple stores another attribute then stop scan
     165             :          */
     166      407242 :         if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
     167           0 :             return true;
     168             : 
     169             :         /* Safe to fetch attribute value */
     170      407242 :         idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
     171             : 
     172             :         /*
     173             :          * Check for appropriate scan stop conditions
     174             :          */
     175      407242 :         if (scanEntry->isPartialMatch)
     176             :         {
     177             :             int32       cmp;
     178             : 
     179             :             /*
     180             :              * In partial match, stop scan at any null (including
     181             :              * placeholders); partial matches never match nulls
     182             :              */
     183        1332 :             if (icategory != GIN_CAT_NORM_KEY)
     184          10 :                 return true;
     185             : 
     186             :             /*----------
     187             :              * Check of partial match.
     188             :              * case cmp == 0 => match
     189             :              * case cmp > 0 => not match and finish scan
     190             :              * case cmp < 0 => not match and continue scan
     191             :              *----------
     192             :              */
     193        1322 :             cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
     194        1322 :                                                   btree->ginstate->supportCollation[attnum - 1],
     195             :                                                   scanEntry->queryKey,
     196             :                                                   idatum,
     197        1322 :                                                   UInt16GetDatum(scanEntry->strategy),
     198        1322 :                                                   PointerGetDatum(scanEntry->extra_data)));
     199             : 
     200        1322 :             if (cmp > 0)
     201         130 :                 return true;
     202        1192 :             else if (cmp < 0)
     203             :             {
     204          60 :                 stack->off++;
     205          60 :                 continue;
     206             :             }
     207             :         }
     208      405910 :         else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
     209             :         {
     210             :             /*
     211             :              * In ALL mode, we are not interested in null items, so we can
     212             :              * stop if we get to a null-item placeholder (which will be the
     213             :              * last entry for a given attnum).  We do want to include NULL_KEY
     214             :              * and EMPTY_ITEM entries, though.
     215             :              */
     216      405910 :             if (icategory == GIN_CAT_NULL_ITEM)
     217          28 :                 return true;
     218             :         }
     219             : 
     220             :         /*
     221             :          * OK, we want to return the TIDs listed in this entry.
     222             :          */
     223      407014 :         if (GinIsPostingTree(itup))
     224             :         {
     225          12 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     226             : 
     227             :             /*
     228             :              * We should unlock current page (but not unpin) during tree scan
     229             :              * to prevent deadlock with vacuum processes.
     230             :              *
     231             :              * We save current entry value (idatum) to be able to re-find our
     232             :              * tuple after re-locking
     233             :              */
     234          12 :             if (icategory == GIN_CAT_NORM_KEY)
     235          12 :                 idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
     236             : 
     237          12 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     238             : 
     239             :             /*
     240             :              * Acquire predicate lock on the posting tree.  We already hold a
     241             :              * lock on the entry page, but insertions to the posting tree
     242             :              * don't check for conflicts on that level.
     243             :              */
     244          12 :             PredicateLockPage(btree->index, rootPostingTree, snapshot);
     245             : 
     246             :             /* Collect all the TIDs in this entry's posting tree */
     247          12 :             scanPostingTree(btree->index, scanEntry, rootPostingTree);
     248             : 
     249             :             /*
     250             :              * We lock again the entry page and while it was unlocked insert
     251             :              * might have occurred, so we need to re-find our position.
     252             :              */
     253          12 :             LockBuffer(stack->buffer, GIN_SHARE);
     254          12 :             page = BufferGetPage(stack->buffer);
     255          12 :             if (!GinPageIsLeaf(page))
     256             :             {
     257             :                 /*
     258             :                  * Root page becomes non-leaf while we unlock it. We will
     259             :                  * start again, this situation doesn't occur often - root can
     260             :                  * became a non-leaf only once per life of index.
     261             :                  */
     262           0 :                 return false;
     263             :             }
     264             : 
     265             :             /* Search forward to re-find idatum */
     266             :             for (;;)
     267             :             {
     268          12 :                 if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     269           0 :                     ereport(ERROR,
     270             :                             (errcode(ERRCODE_INTERNAL_ERROR),
     271             :                              errmsg("failed to re-find tuple within index \"%s\"",
     272             :                                     RelationGetRelationName(btree->index))));
     273             : 
     274          12 :                 page = BufferGetPage(stack->buffer);
     275          12 :                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     276             : 
     277          12 :                 if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
     278             :                 {
     279             :                     Datum       newDatum;
     280             :                     GinNullCategory newCategory;
     281             : 
     282          12 :                     newDatum = gintuple_get_key(btree->ginstate, itup,
     283             :                                                 &newCategory);
     284             : 
     285          12 :                     if (ginCompareEntries(btree->ginstate, attnum,
     286             :                                           newDatum, newCategory,
     287             :                                           idatum, icategory) == 0)
     288          12 :                         break;  /* Found! */
     289             :                 }
     290             : 
     291           0 :                 stack->off++;
     292             :             }
     293             : 
     294          12 :             if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
     295           0 :                 pfree(DatumGetPointer(idatum));
     296             :         }
     297             :         else
     298             :         {
     299             :             ItemPointer ipd;
     300             :             int         nipd;
     301             : 
     302      407002 :             ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
     303      407002 :             tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
     304      407002 :             scanEntry->predictNumberResult += GinGetNPosting(itup);
     305      407002 :             pfree(ipd);
     306             :         }
     307             : 
     308             :         /*
     309             :          * Done with this entry, go to the next
     310             :          */
     311      407014 :         stack->off++;
     312             :     }
     313             : }
     314             : 
     315             : /*
     316             :  * Start* functions setup beginning state of searches: finds correct buffer and pins it.
     317             :  */
     318             : static void
     319        6692 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
     320             : {
     321             :     GinBtreeData btreeEntry;
     322             :     GinBtreeStack *stackEntry;
     323             :     Page        page;
     324             :     bool        needUnlock;
     325             : 
     326        6692 : restartScanEntry:
     327        6692 :     entry->buffer = InvalidBuffer;
     328        6692 :     ItemPointerSetMin(&entry->curItem);
     329        6692 :     entry->offset = InvalidOffsetNumber;
     330        6692 :     if (entry->list)
     331           0 :         pfree(entry->list);
     332        6692 :     entry->list = NULL;
     333        6692 :     entry->nlist = 0;
     334        6692 :     entry->matchBitmap = NULL;
     335        6692 :     entry->matchNtuples = -1;
     336        6692 :     entry->matchResult.blockno = InvalidBlockNumber;
     337        6692 :     entry->reduceResult = false;
     338        6692 :     entry->predictNumberResult = 0;
     339             : 
     340             :     /*
     341             :      * we should find entry, and begin scan of posting tree or just store
     342             :      * posting list in memory
     343             :      */
     344        6692 :     ginPrepareEntryScan(&btreeEntry, entry->attnum,
     345        6692 :                         entry->queryKey, entry->queryCategory,
     346             :                         ginstate);
     347        6692 :     stackEntry = ginFindLeafPage(&btreeEntry, true, false);
     348        6692 :     page = BufferGetPage(stackEntry->buffer);
     349             : 
     350             :     /* ginFindLeafPage() will have already checked snapshot age. */
     351        6692 :     needUnlock = true;
     352             : 
     353        6692 :     entry->isFinished = true;
     354             : 
     355        6692 :     if (entry->isPartialMatch ||
     356        6440 :         entry->queryCategory == GIN_CAT_EMPTY_QUERY)
     357             :     {
     358             :         /*
     359             :          * btreeEntry.findItem locates the first item >= given search key.
     360             :          * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
     361             :          * because of the way the GIN_CAT_EMPTY_QUERY category code is
     362             :          * assigned.)  We scan forward from there and collect all TIDs needed
     363             :          * for the entry type.
     364             :          */
     365         534 :         btreeEntry.findItem(&btreeEntry, stackEntry);
     366         534 :         if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
     367         534 :             == false)
     368             :         {
     369             :             /*
     370             :              * GIN tree was seriously restructured, so we will cleanup all
     371             :              * found data and rescan. See comments near 'return false' in
     372             :              * collectMatchBitmap()
     373             :              */
     374           0 :             if (entry->matchBitmap)
     375             :             {
     376           0 :                 if (entry->matchIterator)
     377           0 :                     tbm_end_private_iterate(entry->matchIterator);
     378           0 :                 entry->matchIterator = NULL;
     379           0 :                 tbm_free(entry->matchBitmap);
     380           0 :                 entry->matchBitmap = NULL;
     381             :             }
     382           0 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     383           0 :             freeGinBtreeStack(stackEntry);
     384           0 :             goto restartScanEntry;
     385             :         }
     386             : 
     387         534 :         if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
     388             :         {
     389         420 :             entry->matchIterator =
     390         420 :                 tbm_begin_private_iterate(entry->matchBitmap);
     391         420 :             entry->isFinished = false;
     392             :         }
     393             :     }
     394        6158 :     else if (btreeEntry.findItem(&btreeEntry, stackEntry))
     395             :     {
     396        4448 :         IndexTuple  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
     397             : 
     398        4448 :         if (GinIsPostingTree(itup))
     399             :         {
     400          64 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     401             :             GinBtreeStack *stack;
     402             :             Page        entrypage;
     403             :             ItemPointerData minItem;
     404             : 
     405             :             /*
     406             :              * This is an equality scan, so lock the root of the posting tree.
     407             :              * It represents a lock on the exact key value, and covers all the
     408             :              * items in the posting tree.
     409             :              */
     410          64 :             PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
     411             : 
     412             :             /*
     413             :              * We should unlock entry page before touching posting tree to
     414             :              * prevent deadlocks with vacuum processes. Because entry is never
     415             :              * deleted from page and posting tree is never reduced to the
     416             :              * posting list, we can unlock page after getting BlockNumber of
     417             :              * root of posting tree.
     418             :              */
     419          64 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     420          64 :             needUnlock = false;
     421             : 
     422          64 :             stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
     423             :                                             rootPostingTree);
     424          64 :             entry->buffer = stack->buffer;
     425             : 
     426             :             /*
     427             :              * We keep buffer pinned because we need to prevent deletion of
     428             :              * page during scan. See GIN's vacuum implementation. RefCount is
     429             :              * increased to keep buffer pinned after freeGinBtreeStack() call.
     430             :              */
     431          64 :             IncrBufferRefCount(entry->buffer);
     432             : 
     433          64 :             entrypage = BufferGetPage(entry->buffer);
     434             : 
     435             :             /*
     436             :              * Load the first page into memory.
     437             :              */
     438          64 :             ItemPointerSetMin(&minItem);
     439          64 :             entry->list = GinDataLeafPageGetItems(entrypage, &entry->nlist, minItem);
     440             : 
     441          64 :             entry->predictNumberResult = stack->predictNumber * entry->nlist;
     442             : 
     443          64 :             LockBuffer(entry->buffer, GIN_UNLOCK);
     444          64 :             freeGinBtreeStack(stack);
     445          64 :             entry->isFinished = false;
     446             :         }
     447             :         else
     448             :         {
     449             :             /*
     450             :              * Lock the entry leaf page.  This is more coarse-grained than
     451             :              * necessary, because it will conflict with any insertions that
     452             :              * land on the same leaf page, not only the exact key we searched
     453             :              * for.  But locking an individual tuple would require updating
     454             :              * that lock whenever it moves because of insertions or vacuums,
     455             :              * which seems too complicated.
     456             :              */
     457        4384 :             PredicateLockPage(ginstate->index,
     458             :                               BufferGetBlockNumber(stackEntry->buffer),
     459             :                               snapshot);
     460        4384 :             if (GinGetNPosting(itup) > 0)
     461             :             {
     462        4378 :                 entry->list = ginReadTuple(ginstate, entry->attnum, itup,
     463             :                                            &entry->nlist);
     464        4378 :                 entry->predictNumberResult = entry->nlist;
     465             : 
     466        4378 :                 entry->isFinished = false;
     467             :             }
     468             :         }
     469             :     }
     470             :     else
     471             :     {
     472             :         /*
     473             :          * No entry found.  Predicate lock the leaf page, to lock the place
     474             :          * where the entry would've been, had there been one.
     475             :          */
     476        1710 :         PredicateLockPage(ginstate->index,
     477             :                           BufferGetBlockNumber(stackEntry->buffer), snapshot);
     478             :     }
     479             : 
     480        6692 :     if (needUnlock)
     481        6628 :         LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     482        6692 :     freeGinBtreeStack(stackEntry);
     483        6692 : }
     484             : 
     485             : /*
     486             :  * Comparison function for scan entry indexes. Sorts by predictNumberResult,
     487             :  * least frequent items first.
     488             :  */
     489             : static int
     490        9440 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
     491             : {
     492        9440 :     const GinScanKey key = (const GinScanKey) arg;
     493        9440 :     int         i1 = *(const int *) a1;
     494        9440 :     int         i2 = *(const int *) a2;
     495        9440 :     uint32      n1 = key->scanEntry[i1]->predictNumberResult;
     496        9440 :     uint32      n2 = key->scanEntry[i2]->predictNumberResult;
     497             : 
     498        9440 :     if (n1 < n2)
     499        1368 :         return -1;
     500        8072 :     else if (n1 == n2)
     501        6302 :         return 0;
     502             :     else
     503        1770 :         return 1;
     504             : }
     505             : 
     506             : static void
     507        1714 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
     508             : {
     509        1714 :     MemoryContext oldCtx = CurrentMemoryContext;
     510             :     int         i;
     511             :     int         j;
     512             :     int        *entryIndexes;
     513             : 
     514        1714 :     ItemPointerSetMin(&key->curItem);
     515        1714 :     key->curItemMatches = false;
     516        1714 :     key->recheckCurItem = false;
     517        1714 :     key->isFinished = false;
     518             : 
     519             :     /*
     520             :      * Divide the entries into two distinct sets: required and additional.
     521             :      * Additional entries are not enough for a match alone, without any items
     522             :      * from the required set, but are needed by the consistent function to
     523             :      * decide if an item matches. When scanning, we can skip over items from
     524             :      * additional entries that have no corresponding matches in any of the
     525             :      * required entries. That speeds up queries like "frequent & rare"
     526             :      * considerably, if the frequent term can be put in the additional set.
     527             :      *
     528             :      * There can be many legal ways to divide them entries into these two
     529             :      * sets. A conservative division is to just put everything in the required
     530             :      * set, but the more you can put in the additional set, the more you can
     531             :      * skip during the scan. To maximize skipping, we try to put as many
     532             :      * frequent items as possible into additional, and less frequent ones into
     533             :      * required. To do that, sort the entries by frequency
     534             :      * (predictNumberResult), and put entries into the required set in that
     535             :      * order, until the consistent function says that none of the remaining
     536             :      * entries can form a match, without any items from the required set. The
     537             :      * rest go to the additional set.
     538             :      *
     539             :      * Exclude-only scan keys are known to have no required entries.
     540             :      */
     541        1714 :     if (key->excludeOnly)
     542             :     {
     543          38 :         MemoryContextSwitchTo(so->keyCtx);
     544             : 
     545          38 :         key->nrequired = 0;
     546          38 :         key->nadditional = key->nentries;
     547          38 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     548          44 :         for (i = 0; i < key->nadditional; i++)
     549           6 :             key->additionalEntries[i] = key->scanEntry[i];
     550             :     }
     551        1676 :     else if (key->nentries > 1)
     552             :     {
     553         560 :         MemoryContextSwitchTo(so->tempCtx);
     554             : 
     555         560 :         entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
     556        6130 :         for (i = 0; i < key->nentries; i++)
     557        5570 :             entryIndexes[i] = i;
     558         560 :         qsort_arg(entryIndexes, key->nentries, sizeof(int),
     559             :                   entryIndexByFrequencyCmp, key);
     560             : 
     561        5570 :         for (i = 1; i < key->nentries; i++)
     562        5010 :             key->entryRes[entryIndexes[i]] = GIN_MAYBE;
     563        4834 :         for (i = 0; i < key->nentries - 1; i++)
     564             :         {
     565             :             /* Pass all entries <= i as FALSE, and the rest as MAYBE */
     566        4692 :             key->entryRes[entryIndexes[i]] = GIN_FALSE;
     567             : 
     568        4692 :             if (key->triConsistentFn(key) == GIN_FALSE)
     569         418 :                 break;
     570             : 
     571             :             /* Make this loop interruptible in case there are many keys */
     572        4274 :             CHECK_FOR_INTERRUPTS();
     573             :         }
     574             :         /* i is now the last required entry. */
     575             : 
     576         560 :         MemoryContextSwitchTo(so->keyCtx);
     577             : 
     578         560 :         key->nrequired = i + 1;
     579         560 :         key->nadditional = key->nentries - key->nrequired;
     580         560 :         key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
     581         560 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     582             : 
     583         560 :         j = 0;
     584        5394 :         for (i = 0; i < key->nrequired; i++)
     585        4834 :             key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
     586        1296 :         for (i = 0; i < key->nadditional; i++)
     587         736 :             key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
     588             : 
     589             :         /* clean up after consistentFn calls (also frees entryIndexes) */
     590         560 :         MemoryContextReset(so->tempCtx);
     591             :     }
     592             :     else
     593             :     {
     594        1116 :         MemoryContextSwitchTo(so->keyCtx);
     595             : 
     596        1116 :         key->nrequired = 1;
     597        1116 :         key->nadditional = 0;
     598        1116 :         key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
     599        1116 :         key->requiredEntries[0] = key->scanEntry[0];
     600             :     }
     601        1714 :     MemoryContextSwitchTo(oldCtx);
     602        1714 : }
     603             : 
     604             : static void
     605        1586 : startScan(IndexScanDesc scan)
     606             : {
     607        1586 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
     608        1586 :     GinState   *ginstate = &so->ginstate;
     609             :     uint32      i;
     610             : 
     611        8278 :     for (i = 0; i < so->totalentries; i++)
     612        6692 :         startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
     613             : 
     614        1586 :     if (GinFuzzySearchLimit > 0)
     615             :     {
     616             :         /*
     617             :          * If all of keys more than threshold we will try to reduce result, we
     618             :          * hope (and only hope, for intersection operation of array our
     619             :          * supposition isn't true), that total result will not more than
     620             :          * minimal predictNumberResult.
     621             :          */
     622           6 :         bool        reduce = true;
     623             : 
     624          12 :         for (i = 0; i < so->totalentries; i++)
     625             :         {
     626           6 :             if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
     627             :             {
     628           0 :                 reduce = false;
     629           0 :                 break;
     630             :             }
     631             :         }
     632           6 :         if (reduce)
     633             :         {
     634          12 :             for (i = 0; i < so->totalentries; i++)
     635             :             {
     636           6 :                 so->entries[i]->predictNumberResult /= so->totalentries;
     637           6 :                 so->entries[i]->reduceResult = true;
     638             :             }
     639             :         }
     640             :     }
     641             : 
     642             :     /*
     643             :      * Now that we have the estimates for the entry frequencies, finish
     644             :      * initializing the scan keys.
     645             :      */
     646        3300 :     for (i = 0; i < so->nkeys; i++)
     647        1714 :         startScanKey(ginstate, so, so->keys + i);
     648        1586 : }
     649             : 
     650             : /*
     651             :  * Load the next batch of item pointers from a posting tree.
     652             :  *
     653             :  * Note that we copy the page into GinScanEntry->list array and unlock it, but
     654             :  * keep it pinned to prevent interference with vacuum.
     655             :  */
     656             : static void
     657         100 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
     658             :                    ItemPointerData advancePast)
     659             : {
     660             :     Page        page;
     661             :     int         i;
     662             :     bool        stepright;
     663             : 
     664         100 :     if (!BufferIsValid(entry->buffer))
     665             :     {
     666           0 :         entry->isFinished = true;
     667           0 :         return;
     668             :     }
     669             : 
     670             :     /*
     671             :      * We have two strategies for finding the correct page: step right from
     672             :      * the current page, or descend the tree again from the root. If
     673             :      * advancePast equals the current item, the next matching item should be
     674             :      * on the next page, so we step right. Otherwise, descend from root.
     675             :      */
     676         100 :     if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
     677             :     {
     678          94 :         stepright = true;
     679          94 :         LockBuffer(entry->buffer, GIN_SHARE);
     680             :     }
     681             :     else
     682             :     {
     683             :         GinBtreeStack *stack;
     684             : 
     685           6 :         ReleaseBuffer(entry->buffer);
     686             : 
     687             :         /*
     688             :          * Set the search key, and find the correct leaf page.
     689             :          */
     690           6 :         if (ItemPointerIsLossyPage(&advancePast))
     691             :         {
     692           0 :             ItemPointerSet(&entry->btree.itemptr,
     693           0 :                            GinItemPointerGetBlockNumber(&advancePast) + 1,
     694             :                            FirstOffsetNumber);
     695             :         }
     696             :         else
     697             :         {
     698           6 :             ItemPointerSet(&entry->btree.itemptr,
     699             :                            GinItemPointerGetBlockNumber(&advancePast),
     700           6 :                            OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
     701             :         }
     702           6 :         entry->btree.fullScan = false;
     703           6 :         stack = ginFindLeafPage(&entry->btree, true, false);
     704             : 
     705             :         /* we don't need the stack, just the buffer. */
     706           6 :         entry->buffer = stack->buffer;
     707           6 :         IncrBufferRefCount(entry->buffer);
     708           6 :         freeGinBtreeStack(stack);
     709           6 :         stepright = false;
     710             :     }
     711             : 
     712         100 :     elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
     713             :          GinItemPointerGetBlockNumber(&advancePast),
     714             :          GinItemPointerGetOffsetNumber(&advancePast),
     715             :          !stepright);
     716             : 
     717         100 :     page = BufferGetPage(entry->buffer);
     718             :     for (;;)
     719             :     {
     720         106 :         entry->offset = InvalidOffsetNumber;
     721         106 :         if (entry->list)
     722             :         {
     723          94 :             pfree(entry->list);
     724          94 :             entry->list = NULL;
     725          94 :             entry->nlist = 0;
     726             :         }
     727             : 
     728         106 :         if (stepright)
     729             :         {
     730             :             /*
     731             :              * We've processed all the entries on this page. If it was the
     732             :              * last page in the tree, we're done.
     733             :              */
     734         100 :             if (GinPageRightMost(page))
     735             :             {
     736           6 :                 UnlockReleaseBuffer(entry->buffer);
     737           6 :                 entry->buffer = InvalidBuffer;
     738           6 :                 entry->isFinished = true;
     739           6 :                 return;
     740             :             }
     741             : 
     742             :             /*
     743             :              * Step to next page, following the right link. then find the
     744             :              * first ItemPointer greater than advancePast.
     745             :              */
     746          94 :             entry->buffer = ginStepRight(entry->buffer,
     747             :                                          ginstate->index,
     748             :                                          GIN_SHARE);
     749          94 :             page = BufferGetPage(entry->buffer);
     750             :         }
     751         100 :         stepright = true;
     752             : 
     753         100 :         if (GinPageGetOpaque(page)->flags & GIN_DELETED)
     754           0 :             continue;           /* page was deleted by concurrent vacuum */
     755             : 
     756             :         /*
     757             :          * The first item > advancePast might not be on this page, but
     758             :          * somewhere to the right, if the page was split, or a non-match from
     759             :          * another key in the query allowed us to skip some items from this
     760             :          * entry. Keep following the right-links until we re-find the correct
     761             :          * page.
     762             :          */
     763         136 :         if (!GinPageRightMost(page) &&
     764          36 :             ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
     765             :         {
     766             :             /*
     767             :              * the item we're looking is > the right bound of the page, so it
     768             :              * can't be on this page.
     769             :              */
     770           0 :             continue;
     771             :         }
     772             : 
     773         100 :         entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
     774             : 
     775        1924 :         for (i = 0; i < entry->nlist; i++)
     776             :         {
     777        1918 :             if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
     778             :             {
     779          94 :                 entry->offset = i;
     780             : 
     781          94 :                 if (GinPageRightMost(page))
     782             :                 {
     783             :                     /* after processing the copied items, we're done. */
     784          58 :                     UnlockReleaseBuffer(entry->buffer);
     785          58 :                     entry->buffer = InvalidBuffer;
     786             :                 }
     787             :                 else
     788          36 :                     LockBuffer(entry->buffer, GIN_UNLOCK);
     789          94 :                 return;
     790             :             }
     791             :         }
     792             :     }
     793             : }
     794             : 
     795             : #define gin_rand() pg_prng_double(&pg_global_prng_state)
     796             : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
     797             : 
     798             : /*
     799             :  * Sets entry->curItem to next heap item pointer > advancePast, for one entry
     800             :  * of one scan key, or sets entry->isFinished to true if there are no more.
     801             :  *
     802             :  * Item pointers are returned in ascending order.
     803             :  *
     804             :  * Note: this can return a "lossy page" item pointer, indicating that the
     805             :  * entry potentially matches all items on that heap page.  However, it is
     806             :  * not allowed to return both a lossy page pointer and exact (regular)
     807             :  * item pointers for the same page.  (Doing so would break the key-combination
     808             :  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
     809             :  * current implementation this is guaranteed by the behavior of tidbitmaps.
     810             :  */
     811             : static void
     812     1088038 : entryGetItem(GinState *ginstate, GinScanEntry entry,
     813             :              ItemPointerData advancePast)
     814             : {
     815             :     Assert(!entry->isFinished);
     816             : 
     817             :     Assert(!ItemPointerIsValid(&entry->curItem) ||
     818             :            ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
     819             : 
     820     1088038 :     if (entry->matchBitmap)
     821             :     {
     822             :         /* A bitmap result */
     823      268052 :         BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
     824      268052 :         OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
     825             : 
     826             :         for (;;)
     827             :         {
     828             :             /*
     829             :              * If we've exhausted all items on this block, move to next block
     830             :              * in the bitmap. tbm_private_iterate() sets matchResult.blockno
     831             :              * to InvalidBlockNumber when the bitmap is exhausted.
     832             :              */
     833      272878 :             while ((!BlockNumberIsValid(entry->matchResult.blockno)) ||
     834      272458 :                    (!entry->matchResult.lossy &&
     835      272458 :                     entry->offset >= entry->matchNtuples) ||
     836      535264 :                    entry->matchResult.blockno < advancePastBlk ||
     837      267632 :                    (ItemPointerIsLossyPage(&advancePast) &&
     838           0 :                     entry->matchResult.blockno == advancePastBlk))
     839             :             {
     840        5246 :                 if (!tbm_private_iterate(entry->matchIterator, &entry->matchResult))
     841             :                 {
     842             :                     Assert(!BlockNumberIsValid(entry->matchResult.blockno));
     843         420 :                     ItemPointerSetInvalid(&entry->curItem);
     844         420 :                     tbm_end_private_iterate(entry->matchIterator);
     845         420 :                     entry->matchIterator = NULL;
     846         420 :                     entry->isFinished = true;
     847         420 :                     break;
     848             :                 }
     849             : 
     850             :                 /* Exact pages need their tuple offsets extracted. */
     851        4826 :                 if (!entry->matchResult.lossy)
     852        4826 :                     entry->matchNtuples = tbm_extract_page_tuple(&entry->matchResult,
     853        4826 :                                                                  entry->matchOffsets,
     854             :                                                                  TBM_MAX_TUPLES_PER_PAGE);
     855             : 
     856             :                 /*
     857             :                  * Reset counter to the beginning of entry->matchResult. Note:
     858             :                  * entry->offset is still greater than matchResult.ntuples if
     859             :                  * matchResult is lossy.  So, on next call we will get next
     860             :                  * result from TIDBitmap.
     861             :                  */
     862        4826 :                 entry->offset = 0;
     863             :             }
     864      268052 :             if (entry->isFinished)
     865         420 :                 break;
     866             : 
     867             :             /*
     868             :              * We're now on the first page after advancePast which has any
     869             :              * items on it. If it's a lossy result, return that.
     870             :              */
     871      267632 :             if (entry->matchResult.lossy)
     872             :             {
     873           0 :                 ItemPointerSetLossyPage(&entry->curItem,
     874             :                                         entry->matchResult.blockno);
     875             : 
     876             :                 /*
     877             :                  * We might as well fall out of the loop; we could not
     878             :                  * estimate number of results on this page to support correct
     879             :                  * reducing of result even if it's enabled.
     880             :                  */
     881           0 :                 break;
     882             :             }
     883             : 
     884             :             /*
     885             :              * Not a lossy page. If tuple offsets were extracted,
     886             :              * entry->matchNtuples must be > -1
     887             :              */
     888             :             Assert(entry->matchNtuples > -1);
     889             : 
     890             :             /* Skip over any offsets <= advancePast, and return that. */
     891      267632 :             if (entry->matchResult.blockno == advancePastBlk)
     892             :             {
     893             :                 Assert(entry->matchNtuples > 0);
     894             : 
     895             :                 /*
     896             :                  * First, do a quick check against the last offset on the
     897             :                  * page. If that's > advancePast, so are all the other
     898             :                  * offsets, so just go back to the top to get the next page.
     899             :                  */
     900      263226 :                 if (entry->matchOffsets[entry->matchNtuples - 1] <= advancePastOff)
     901             :                 {
     902           0 :                     entry->offset = entry->matchNtuples;
     903           0 :                     continue;
     904             :                 }
     905             : 
     906             :                 /* Otherwise scan to find the first item > advancePast */
     907      263226 :                 while (entry->matchOffsets[entry->offset] <= advancePastOff)
     908           0 :                     entry->offset++;
     909             :             }
     910             : 
     911      267632 :             ItemPointerSet(&entry->curItem,
     912             :                            entry->matchResult.blockno,
     913      267632 :                            entry->matchOffsets[entry->offset]);
     914      267632 :             entry->offset++;
     915             : 
     916             :             /* Done unless we need to reduce the result */
     917      267632 :             if (!entry->reduceResult || !dropItem(entry))
     918             :                 break;
     919             :         }
     920             :     }
     921      819986 :     else if (!BufferIsValid(entry->buffer))
     922             :     {
     923             :         /*
     924             :          * A posting list from an entry tuple, or the last page of a posting
     925             :          * tree.
     926             :          */
     927             :         for (;;)
     928             :         {
     929      394214 :             if (entry->offset >= entry->nlist)
     930             :             {
     931        3890 :                 ItemPointerSetInvalid(&entry->curItem);
     932        3890 :                 entry->isFinished = true;
     933        3890 :                 break;
     934             :             }
     935             : 
     936      390324 :             entry->curItem = entry->list[entry->offset++];
     937             : 
     938             :             /* If we're not past advancePast, keep scanning */
     939      390324 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     940       66634 :                 continue;
     941             : 
     942             :             /* Done unless we need to reduce the result */
     943      323690 :             if (!entry->reduceResult || !dropItem(entry))
     944             :                 break;
     945             :         }
     946             :     }
     947             :     else
     948             :     {
     949             :         /* A posting tree */
     950             :         for (;;)
     951             :         {
     952             :             /* If we've processed the current batch, load more items */
     953      678894 :             while (entry->offset >= entry->nlist)
     954             :             {
     955         100 :                 entryLoadMoreItems(ginstate, entry, advancePast);
     956             : 
     957         100 :                 if (entry->isFinished)
     958             :                 {
     959           6 :                     ItemPointerSetInvalid(&entry->curItem);
     960           6 :                     return;
     961             :                 }
     962             :             }
     963             : 
     964      678794 :             entry->curItem = entry->list[entry->offset++];
     965             : 
     966             :             /* If we're not past advancePast, keep scanning */
     967      678794 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     968       47106 :                 continue;
     969             : 
     970             :             /* Done unless we need to reduce the result */
     971      631688 :             if (!entry->reduceResult || !dropItem(entry))
     972             :                 break;
     973             : 
     974             :             /*
     975             :              * Advance advancePast (so that entryLoadMoreItems will load the
     976             :              * right data), and keep scanning
     977             :              */
     978      118744 :             advancePast = entry->curItem;
     979             :         }
     980             :     }
     981             : }
     982             : 
     983             : /*
     984             :  * Identify the "current" item among the input entry streams for this scan key
     985             :  * that is greater than advancePast, and test whether it passes the scan key
     986             :  * qual condition.
     987             :  *
     988             :  * The current item is the smallest curItem among the inputs.  key->curItem
     989             :  * is set to that value.  key->curItemMatches is set to indicate whether that
     990             :  * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
     991             :  * iff recheck is needed for this item pointer (including the case where the
     992             :  * item pointer is a lossy page pointer).
     993             :  *
     994             :  * If all entry streams are exhausted, sets key->isFinished to true.
     995             :  *
     996             :  * Item pointers must be returned in ascending order.
     997             :  *
     998             :  * Note: this can return a "lossy page" item pointer, indicating that the
     999             :  * key potentially matches all items on that heap page.  However, it is
    1000             :  * not allowed to return both a lossy page pointer and exact (regular)
    1001             :  * item pointers for the same page.  (Doing so would break the key-combination
    1002             :  * logic in scanGetItem.)
    1003             :  */
    1004             : static void
    1005     1002512 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
    1006             :            ItemPointerData advancePast)
    1007             : {
    1008             :     ItemPointerData minItem;
    1009             :     ItemPointerData curPageLossy;
    1010             :     uint32      i;
    1011             :     bool        haveLossyEntry;
    1012             :     GinScanEntry entry;
    1013             :     GinTernaryValue res;
    1014             :     MemoryContext oldCtx;
    1015             :     bool        allFinished;
    1016             : 
    1017             :     Assert(!key->isFinished);
    1018             : 
    1019             :     /*
    1020             :      * We might have already tested this item; if so, no need to repeat work.
    1021             :      * (Note: the ">" case can happen, if advancePast is exact but we
    1022             :      * previously had to set curItem to a lossy-page pointer.)
    1023             :      */
    1024     1002512 :     if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
    1025        1608 :         return;
    1026             : 
    1027             :     /*
    1028             :      * Find the minimum item > advancePast among the active entry streams.
    1029             :      *
    1030             :      * Note: a lossy-page entry is encoded by a ItemPointer with max value for
    1031             :      * offset (0xffff), so that it will sort after any exact entries for the
    1032             :      * same page.  So we'll prefer to return exact pointers not lossy
    1033             :      * pointers, which is good.
    1034             :      */
    1035     1002490 :     ItemPointerSetMax(&minItem);
    1036     1002490 :     allFinished = true;
    1037     2757792 :     for (i = 0; i < key->nrequired; i++)
    1038             :     {
    1039     1755302 :         entry = key->requiredEntries[i];
    1040             : 
    1041     1755302 :         if (entry->isFinished)
    1042      560142 :             continue;
    1043             : 
    1044             :         /*
    1045             :          * Advance this stream if necessary.
    1046             :          *
    1047             :          * In particular, since entry->curItem was initialized with
    1048             :          * ItemPointerSetMin, this ensures we fetch the first item for each
    1049             :          * entry on the first call.
    1050             :          */
    1051     1195160 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1052             :         {
    1053     1034282 :             entryGetItem(ginstate, entry, advancePast);
    1054     1034282 :             if (entry->isFinished)
    1055        4138 :                 continue;
    1056             :         }
    1057             : 
    1058     1191022 :         allFinished = false;
    1059     1191022 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1060     1097672 :             minItem = entry->curItem;
    1061             :     }
    1062             : 
    1063     1002490 :     if (allFinished && !key->excludeOnly)
    1064             :     {
    1065             :         /* all entries are finished */
    1066        1586 :         key->isFinished = true;
    1067        1586 :         return;
    1068             :     }
    1069             : 
    1070     1000904 :     if (!key->excludeOnly)
    1071             :     {
    1072             :         /*
    1073             :          * For a normal scan key, we now know there are no matches < minItem.
    1074             :          *
    1075             :          * If minItem is lossy, it means that there were no exact items on the
    1076             :          * page among requiredEntries, because lossy pointers sort after exact
    1077             :          * items. However, there might be exact items for the same page among
    1078             :          * additionalEntries, so we mustn't advance past them.
    1079             :          */
    1080      996428 :         if (ItemPointerIsLossyPage(&minItem))
    1081             :         {
    1082           0 :             if (GinItemPointerGetBlockNumber(&advancePast) <
    1083           0 :                 GinItemPointerGetBlockNumber(&minItem))
    1084             :             {
    1085           0 :                 ItemPointerSet(&advancePast,
    1086             :                                GinItemPointerGetBlockNumber(&minItem),
    1087             :                                InvalidOffsetNumber);
    1088             :             }
    1089             :         }
    1090             :         else
    1091             :         {
    1092             :             Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
    1093      996428 :             ItemPointerSet(&advancePast,
    1094             :                            GinItemPointerGetBlockNumber(&minItem),
    1095      996428 :                            OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
    1096             :         }
    1097             :     }
    1098             :     else
    1099             :     {
    1100             :         /*
    1101             :          * excludeOnly scan keys don't have any entries that are necessarily
    1102             :          * present in matching items.  So, we consider the item just after
    1103             :          * advancePast.
    1104             :          */
    1105             :         Assert(key->nrequired == 0);
    1106        4476 :         ItemPointerSet(&minItem,
    1107             :                        GinItemPointerGetBlockNumber(&advancePast),
    1108        4476 :                        OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
    1109             :     }
    1110             : 
    1111             :     /*
    1112             :      * We might not have loaded all the entry streams for this TID yet. We
    1113             :      * could call the consistent function, passing MAYBE for those entries, to
    1114             :      * see if it can decide if this TID matches based on the information we
    1115             :      * have. But if the consistent-function is expensive, and cannot in fact
    1116             :      * decide with partial information, that could be a big loss. So, load all
    1117             :      * the additional entries, before calling the consistent function.
    1118             :      */
    1119     1067564 :     for (i = 0; i < key->nadditional; i++)
    1120             :     {
    1121       66660 :         entry = key->additionalEntries[i];
    1122             : 
    1123       66660 :         if (entry->isFinished)
    1124         412 :             continue;
    1125             : 
    1126       66248 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1127             :         {
    1128       53756 :             entryGetItem(ginstate, entry, advancePast);
    1129       53756 :             if (entry->isFinished)
    1130         178 :                 continue;
    1131             :         }
    1132             : 
    1133             :         /*
    1134             :          * Normally, none of the items in additionalEntries can have a curItem
    1135             :          * larger than minItem. But if minItem is a lossy page, then there
    1136             :          * might be exact items on the same page among additionalEntries.
    1137             :          */
    1138       66070 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1139             :         {
    1140             :             Assert(ItemPointerIsLossyPage(&minItem));
    1141           0 :             minItem = entry->curItem;
    1142             :         }
    1143             :     }
    1144             : 
    1145             :     /*
    1146             :      * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
    1147             :      * and perform consistentFn test.
    1148             :      *
    1149             :      * Lossy-page entries pose a problem, since we don't know the correct
    1150             :      * entryRes state to pass to the consistentFn, and we also don't know what
    1151             :      * its combining logic will be (could be AND, OR, or even NOT). If the
    1152             :      * logic is OR then the consistentFn might succeed for all items in the
    1153             :      * lossy page even when none of the other entries match.
    1154             :      *
    1155             :      * Our strategy is to call the tri-state consistent function, with the
    1156             :      * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
    1157             :      * returns FALSE, none of the lossy items alone are enough for a match, so
    1158             :      * we don't need to return a lossy-page pointer. Otherwise, return a
    1159             :      * lossy-page pointer to indicate that the whole heap page must be
    1160             :      * checked.  (On subsequent calls, we'll do nothing until minItem is past
    1161             :      * the page altogether, thus ensuring that we never return both regular
    1162             :      * and lossy pointers for the same page.)
    1163             :      *
    1164             :      * An exception is that it doesn't matter what we pass for lossy pointers
    1165             :      * in "hidden" entries, because the consistentFn's result can't depend on
    1166             :      * them. We could pass them as MAYBE as well, but if we're using the
    1167             :      * "shim" implementation of a tri-state consistent function (see
    1168             :      * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
    1169             :      * them as true.
    1170             :      *
    1171             :      * Note that only lossy-page entries pointing to the current item's page
    1172             :      * should trigger this processing; we might have future lossy pages in the
    1173             :      * entry array, but they aren't relevant yet.
    1174             :      */
    1175     1000904 :     key->curItem = minItem;
    1176     1000904 :     ItemPointerSetLossyPage(&curPageLossy,
    1177             :                             GinItemPointerGetBlockNumber(&key->curItem));
    1178     1000904 :     haveLossyEntry = false;
    1179     2817006 :     for (i = 0; i < key->nentries; i++)
    1180             :     {
    1181     1816102 :         entry = key->scanEntry[i];
    1182     3073194 :         if (entry->isFinished == false &&
    1183     1257092 :             ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1184             :         {
    1185           0 :             if (i < key->nuserentries)
    1186           0 :                 key->entryRes[i] = GIN_MAYBE;
    1187             :             else
    1188           0 :                 key->entryRes[i] = GIN_TRUE;
    1189           0 :             haveLossyEntry = true;
    1190             :         }
    1191             :         else
    1192     1816102 :             key->entryRes[i] = GIN_FALSE;
    1193             :     }
    1194             : 
    1195             :     /* prepare for calling consistentFn in temp context */
    1196     1000904 :     oldCtx = MemoryContextSwitchTo(tempCtx);
    1197             : 
    1198     1000904 :     if (haveLossyEntry)
    1199             :     {
    1200             :         /* Have lossy-page entries, so see if whole page matches */
    1201           0 :         res = key->triConsistentFn(key);
    1202             : 
    1203           0 :         if (res == GIN_TRUE || res == GIN_MAYBE)
    1204             :         {
    1205             :             /* Yes, so clean up ... */
    1206           0 :             MemoryContextSwitchTo(oldCtx);
    1207           0 :             MemoryContextReset(tempCtx);
    1208             : 
    1209             :             /* and return lossy pointer for whole page */
    1210           0 :             key->curItem = curPageLossy;
    1211           0 :             key->curItemMatches = true;
    1212           0 :             key->recheckCurItem = true;
    1213           0 :             return;
    1214             :         }
    1215             :     }
    1216             : 
    1217             :     /*
    1218             :      * At this point we know that we don't need to return a lossy whole-page
    1219             :      * pointer, but we might have matches for individual exact item pointers,
    1220             :      * possibly in combination with a lossy pointer. Pass lossy pointers as
    1221             :      * MAYBE to the ternary consistent function, to let it decide if this
    1222             :      * tuple satisfies the overall key, even though we don't know if the lossy
    1223             :      * entries match.
    1224             :      *
    1225             :      * Prepare entryRes array to be passed to consistentFn.
    1226             :      */
    1227     2817006 :     for (i = 0; i < key->nentries; i++)
    1228             :     {
    1229     1816102 :         entry = key->scanEntry[i];
    1230     1816102 :         if (entry->isFinished)
    1231      559010 :             key->entryRes[i] = GIN_FALSE;
    1232             : #if 0
    1233             : 
    1234             :         /*
    1235             :          * This case can't currently happen, because we loaded all the entries
    1236             :          * for this item earlier.
    1237             :          */
    1238             :         else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1239             :             key->entryRes[i] = GIN_MAYBE;
    1240             : #endif
    1241     1257092 :         else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1242           0 :             key->entryRes[i] = GIN_MAYBE;
    1243     1257092 :         else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
    1244     1073446 :             key->entryRes[i] = GIN_TRUE;
    1245             :         else
    1246      183646 :             key->entryRes[i] = GIN_FALSE;
    1247             :     }
    1248             : 
    1249     1000904 :     res = key->triConsistentFn(key);
    1250             : 
    1251     1000904 :     switch (res)
    1252             :     {
    1253      865256 :         case GIN_TRUE:
    1254      865256 :             key->curItemMatches = true;
    1255             :             /* triConsistentFn set recheckCurItem */
    1256      865256 :             break;
    1257             : 
    1258       18026 :         case GIN_FALSE:
    1259       18026 :             key->curItemMatches = false;
    1260       18026 :             break;
    1261             : 
    1262      117622 :         case GIN_MAYBE:
    1263      117622 :             key->curItemMatches = true;
    1264      117622 :             key->recheckCurItem = true;
    1265      117622 :             break;
    1266             : 
    1267           0 :         default:
    1268             : 
    1269             :             /*
    1270             :              * the 'default' case shouldn't happen, but if the consistent
    1271             :              * function returns something bogus, this is the safe result
    1272             :              */
    1273           0 :             key->curItemMatches = true;
    1274           0 :             key->recheckCurItem = true;
    1275           0 :             break;
    1276             :     }
    1277             : 
    1278             :     /*
    1279             :      * We have a tuple, and we know if it matches or not. If it's a non-match,
    1280             :      * we could continue to find the next matching tuple, but let's break out
    1281             :      * and give scanGetItem a chance to advance the other keys. They might be
    1282             :      * able to skip past to a much higher TID, allowing us to save work.
    1283             :      */
    1284             : 
    1285             :     /* clean up after consistentFn calls */
    1286     1000904 :     MemoryContextSwitchTo(oldCtx);
    1287     1000904 :     MemoryContextReset(tempCtx);
    1288             : }
    1289             : 
    1290             : /*
    1291             :  * Get next heap item pointer (after advancePast) from scan.
    1292             :  * Returns true if anything found.
    1293             :  * On success, *item and *recheck are set.
    1294             :  *
    1295             :  * Note: this is very nearly the same logic as in keyGetItem(), except
    1296             :  * that we know the keys are to be combined with AND logic, whereas in
    1297             :  * keyGetItem() the combination logic is known only to the consistentFn.
    1298             :  */
    1299             : static bool
    1300      979892 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
    1301             :             ItemPointerData *item, bool *recheck)
    1302             : {
    1303      979892 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1304             :     uint32      i;
    1305             :     bool        match;
    1306             : 
    1307             :     /*----------
    1308             :      * Advance the scan keys in lock-step, until we find an item that matches
    1309             :      * all the keys. If any key reports isFinished, meaning its subset of the
    1310             :      * entries is exhausted, we can stop.  Otherwise, set *item to the next
    1311             :      * matching item.
    1312             :      *
    1313             :      * This logic works only if a keyGetItem stream can never contain both
    1314             :      * exact and lossy pointers for the same page.  Else we could have a
    1315             :      * case like
    1316             :      *
    1317             :      *      stream 1        stream 2
    1318             :      *      ...             ...
    1319             :      *      42/6            42/7
    1320             :      *      50/1            42/0xffff
    1321             :      *      ...             ...
    1322             :      *
    1323             :      * We would conclude that 42/6 is not a match and advance stream 1,
    1324             :      * thus never detecting the match to the lossy pointer in stream 2.
    1325             :      * (keyGetItem has a similar problem versus entryGetItem.)
    1326             :      *----------
    1327             :      */
    1328             :     do
    1329             :     {
    1330      997976 :         ItemPointerSetMin(item);
    1331      997976 :         match = true;
    1332     1980876 :         for (i = 0; i < so->nkeys && match; i++)
    1333             :         {
    1334     1002512 :             GinScanKey  key = so->keys + i;
    1335             : 
    1336             :             /*
    1337             :              * If we're considering a lossy page, skip excludeOnly keys. They
    1338             :              * can't exclude the whole page anyway.
    1339             :              */
    1340     1002512 :             if (ItemPointerIsLossyPage(item) && key->excludeOnly)
    1341             :             {
    1342             :                 /*
    1343             :                  * ginNewScanKey() should never mark the first key as
    1344             :                  * excludeOnly.
    1345             :                  */
    1346             :                 Assert(i > 0);
    1347           0 :                 continue;
    1348             :             }
    1349             : 
    1350             :             /* Fetch the next item for this key that is > advancePast. */
    1351     1002512 :             keyGetItem(&so->ginstate, so->tempCtx, key, advancePast);
    1352             : 
    1353     1002512 :             if (key->isFinished)
    1354        1586 :                 return false;
    1355             : 
    1356             :             /*
    1357             :              * If it's not a match, we can immediately conclude that nothing
    1358             :              * <= this item matches, without checking the rest of the keys.
    1359             :              */
    1360     1000926 :             if (!key->curItemMatches)
    1361             :             {
    1362       18026 :                 advancePast = key->curItem;
    1363       18026 :                 match = false;
    1364       18026 :                 break;
    1365             :             }
    1366             : 
    1367             :             /*
    1368             :              * It's a match. We can conclude that nothing < matches, so the
    1369             :              * other key streams can skip to this item.
    1370             :              *
    1371             :              * Beware of lossy pointers, though; from a lossy pointer, we can
    1372             :              * only conclude that nothing smaller than this *block* matches.
    1373             :              */
    1374      982900 :             if (ItemPointerIsLossyPage(&key->curItem))
    1375             :             {
    1376           0 :                 if (GinItemPointerGetBlockNumber(&advancePast) <
    1377           0 :                     GinItemPointerGetBlockNumber(&key->curItem))
    1378             :                 {
    1379           0 :                     ItemPointerSet(&advancePast,
    1380           0 :                                    GinItemPointerGetBlockNumber(&key->curItem),
    1381             :                                    InvalidOffsetNumber);
    1382             :                 }
    1383             :             }
    1384             :             else
    1385             :             {
    1386             :                 Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
    1387      982900 :                 ItemPointerSet(&advancePast,
    1388      982900 :                                GinItemPointerGetBlockNumber(&key->curItem),
    1389      982900 :                                OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
    1390             :             }
    1391             : 
    1392             :             /*
    1393             :              * If this is the first key, remember this location as a potential
    1394             :              * match, and proceed to check the rest of the keys.
    1395             :              *
    1396             :              * Otherwise, check if this is the same item that we checked the
    1397             :              * previous keys for (or a lossy pointer for the same page). If
    1398             :              * not, loop back to check the previous keys for this item (we
    1399             :              * will check this key again too, but keyGetItem returns quickly
    1400             :              * for that)
    1401             :              */
    1402      982900 :             if (i == 0)
    1403             :             {
    1404      978470 :                 *item = key->curItem;
    1405             :             }
    1406             :             else
    1407             :             {
    1408        8860 :                 if (ItemPointerIsLossyPage(&key->curItem) ||
    1409        4430 :                     ItemPointerIsLossyPage(item))
    1410             :                 {
    1411             :                     Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
    1412           0 :                     match = (GinItemPointerGetBlockNumber(&key->curItem) ==
    1413           0 :                              GinItemPointerGetBlockNumber(item));
    1414             :                 }
    1415             :                 else
    1416             :                 {
    1417             :                     Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
    1418        4430 :                     match = (ginCompareItemPointers(&key->curItem, item) == 0);
    1419             :                 }
    1420             :             }
    1421             :         }
    1422      996390 :     } while (!match);
    1423             : 
    1424             :     Assert(!ItemPointerIsMin(item));
    1425             : 
    1426             :     /*
    1427             :      * Now *item contains the first ItemPointer after previous result that
    1428             :      * satisfied all the keys for that exact TID, or a lossy reference to the
    1429             :      * same page.
    1430             :      *
    1431             :      * We must return recheck = true if any of the keys are marked recheck.
    1432             :      */
    1433      978306 :     *recheck = false;
    1434     1826316 :     for (i = 0; i < so->nkeys; i++)
    1435             :     {
    1436      978678 :         GinScanKey  key = so->keys + i;
    1437             : 
    1438      978678 :         if (key->recheckCurItem)
    1439             :         {
    1440      130668 :             *recheck = true;
    1441      130668 :             break;
    1442             :         }
    1443             :     }
    1444             : 
    1445      978306 :     return true;
    1446             : }
    1447             : 
    1448             : 
    1449             : /*
    1450             :  * Functions for scanning the pending list
    1451             :  */
    1452             : 
    1453             : 
    1454             : /*
    1455             :  * Get ItemPointer of next heap row to be checked from pending list.
    1456             :  * Returns false if there are no more. On pages with several heap rows
    1457             :  * it returns each row separately, on page with part of heap row returns
    1458             :  * per page data.  pos->firstOffset and pos->lastOffset are set to identify
    1459             :  * the range of pending-list tuples belonging to this heap row.
    1460             :  *
    1461             :  * The pendingBuffer is presumed pinned and share-locked on entry, and is
    1462             :  * pinned and share-locked on success exit.  On failure exit it's released.
    1463             :  */
    1464             : static bool
    1465        1614 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
    1466             : {
    1467             :     OffsetNumber maxoff;
    1468             :     Page        page;
    1469             :     IndexTuple  itup;
    1470             : 
    1471        1614 :     ItemPointerSetInvalid(&pos->item);
    1472             :     for (;;)
    1473             :     {
    1474        1614 :         page = BufferGetPage(pos->pendingBuffer);
    1475             : 
    1476        1614 :         maxoff = PageGetMaxOffsetNumber(page);
    1477        1614 :         if (pos->firstOffset > maxoff)
    1478             :         {
    1479         166 :             BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
    1480             : 
    1481         166 :             if (blkno == InvalidBlockNumber)
    1482             :             {
    1483         166 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1484         166 :                 pos->pendingBuffer = InvalidBuffer;
    1485             : 
    1486         166 :                 return false;
    1487             :             }
    1488             :             else
    1489             :             {
    1490             :                 /*
    1491             :                  * Here we must prevent deletion of next page by insertcleanup
    1492             :                  * process, which may be trying to obtain exclusive lock on
    1493             :                  * current page.  So, we lock next page before releasing the
    1494             :                  * current one
    1495             :                  */
    1496           0 :                 Buffer      tmpbuf = ReadBuffer(scan->indexRelation, blkno);
    1497             : 
    1498           0 :                 LockBuffer(tmpbuf, GIN_SHARE);
    1499           0 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1500             : 
    1501           0 :                 pos->pendingBuffer = tmpbuf;
    1502           0 :                 pos->firstOffset = FirstOffsetNumber;
    1503             :             }
    1504             :         }
    1505             :         else
    1506             :         {
    1507        1448 :             itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
    1508        1448 :             pos->item = itup->t_tid;
    1509        1448 :             if (GinPageHasFullRow(page))
    1510             :             {
    1511             :                 /*
    1512             :                  * find itempointer to the next row
    1513             :                  */
    1514        3354 :                 for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
    1515             :                 {
    1516        3188 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
    1517        3188 :                     if (!ItemPointerEquals(&pos->item, &itup->t_tid))
    1518        1282 :                         break;
    1519             :                 }
    1520             :             }
    1521             :             else
    1522             :             {
    1523             :                 /*
    1524             :                  * All itempointers are the same on this page
    1525             :                  */
    1526           0 :                 pos->lastOffset = maxoff + 1;
    1527             :             }
    1528             : 
    1529             :             /*
    1530             :              * Now pos->firstOffset points to the first tuple of current heap
    1531             :              * row, pos->lastOffset points to the first tuple of next heap row
    1532             :              * (or to the end of page)
    1533             :              */
    1534        1448 :             break;
    1535             :         }
    1536             :     }
    1537             : 
    1538        1448 :     return true;
    1539             : }
    1540             : 
    1541             : /*
    1542             :  * Scan pending-list page from current tuple (off) up till the first of:
    1543             :  * - match is found (then returns true)
    1544             :  * - no later match is possible
    1545             :  * - tuple's attribute number is not equal to entry's attrnum
    1546             :  * - reach end of page
    1547             :  *
    1548             :  * datum[]/category[]/datumExtracted[] arrays are used to cache the results
    1549             :  * of gintuple_get_key() on the current page.
    1550             :  */
    1551             : static bool
    1552           0 : matchPartialInPendingList(GinState *ginstate, Page page,
    1553             :                           OffsetNumber off, OffsetNumber maxoff,
    1554             :                           GinScanEntry entry,
    1555             :                           Datum *datum, GinNullCategory *category,
    1556             :                           bool *datumExtracted)
    1557             : {
    1558             :     IndexTuple  itup;
    1559             :     int32       cmp;
    1560             : 
    1561             :     /* Partial match to a null is not possible */
    1562           0 :     if (entry->queryCategory != GIN_CAT_NORM_KEY)
    1563           0 :         return false;
    1564             : 
    1565           0 :     while (off < maxoff)
    1566             :     {
    1567           0 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
    1568             : 
    1569           0 :         if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
    1570           0 :             return false;
    1571             : 
    1572           0 :         if (datumExtracted[off - 1] == false)
    1573             :         {
    1574           0 :             datum[off - 1] = gintuple_get_key(ginstate, itup,
    1575           0 :                                               &category[off - 1]);
    1576           0 :             datumExtracted[off - 1] = true;
    1577             :         }
    1578             : 
    1579             :         /* Once we hit nulls, no further match is possible */
    1580           0 :         if (category[off - 1] != GIN_CAT_NORM_KEY)
    1581           0 :             return false;
    1582             : 
    1583             :         /*----------
    1584             :          * Check partial match.
    1585             :          * case cmp == 0 => match
    1586             :          * case cmp > 0 => not match and end scan (no later match possible)
    1587             :          * case cmp < 0 => not match and continue scan
    1588             :          *----------
    1589             :          */
    1590           0 :         cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
    1591           0 :                                               ginstate->supportCollation[entry->attnum - 1],
    1592             :                                               entry->queryKey,
    1593           0 :                                               datum[off - 1],
    1594           0 :                                               UInt16GetDatum(entry->strategy),
    1595           0 :                                               PointerGetDatum(entry->extra_data)));
    1596           0 :         if (cmp == 0)
    1597           0 :             return true;
    1598           0 :         else if (cmp > 0)
    1599           0 :             return false;
    1600             : 
    1601           0 :         off++;
    1602             :     }
    1603             : 
    1604           0 :     return false;
    1605             : }
    1606             : 
    1607             : /*
    1608             :  * Set up the entryRes array for each key by looking at
    1609             :  * every entry for current heap row in pending list.
    1610             :  *
    1611             :  * Returns true if each scan key has at least one entryRes match.
    1612             :  * This corresponds to the situations where the normal index search will
    1613             :  * try to apply the key's consistentFn.  (A tuple not meeting that requirement
    1614             :  * cannot be returned by the normal search since no entry stream will
    1615             :  * source its TID.)
    1616             :  *
    1617             :  * The pendingBuffer is presumed pinned and share-locked on entry.
    1618             :  */
    1619             : static bool
    1620        1448 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
    1621             : {
    1622        1448 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1623             :     OffsetNumber attrnum;
    1624             :     Page        page;
    1625             :     IndexTuple  itup;
    1626             :     int         i,
    1627             :                 j;
    1628             : 
    1629             :     /*
    1630             :      * Reset all entryRes and hasMatchKey flags
    1631             :      */
    1632        3924 :     for (i = 0; i < so->nkeys; i++)
    1633             :     {
    1634        2476 :         GinScanKey  key = so->keys + i;
    1635             : 
    1636        2476 :         memset(key->entryRes, GIN_FALSE, key->nentries);
    1637             :     }
    1638        1448 :     memset(pos->hasMatchKey, false, so->nkeys);
    1639             : 
    1640             :     /*
    1641             :      * Outer loop iterates over multiple pending-list pages when a single heap
    1642             :      * row has entries spanning those pages.
    1643             :      */
    1644             :     for (;;)
    1645           0 :     {
    1646             :         Datum       datum[BLCKSZ / sizeof(IndexTupleData)];
    1647             :         GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
    1648             :         bool        datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
    1649             : 
    1650             :         Assert(pos->lastOffset > pos->firstOffset);
    1651        1448 :         memset(datumExtracted + pos->firstOffset - 1, 0,
    1652        1448 :                sizeof(bool) * (pos->lastOffset - pos->firstOffset));
    1653             : 
    1654        1448 :         page = BufferGetPage(pos->pendingBuffer);
    1655             : 
    1656        3924 :         for (i = 0; i < so->nkeys; i++)
    1657             :         {
    1658        2476 :             GinScanKey  key = so->keys + i;
    1659             : 
    1660        4776 :             for (j = 0; j < key->nentries; j++)
    1661             :             {
    1662        2300 :                 GinScanEntry entry = key->scanEntry[j];
    1663        2300 :                 OffsetNumber StopLow = pos->firstOffset,
    1664        2300 :                             StopHigh = pos->lastOffset,
    1665             :                             StopMiddle;
    1666             : 
    1667             :                 /* If already matched on earlier page, do no extra work */
    1668        2300 :                 if (key->entryRes[j])
    1669           0 :                     continue;
    1670             : 
    1671             :                 /*
    1672             :                  * Interesting tuples are from pos->firstOffset to
    1673             :                  * pos->lastOffset and they are ordered by (attnum, Datum) as
    1674             :                  * it's done in entry tree.  So we can use binary search to
    1675             :                  * avoid linear scanning.
    1676             :                  */
    1677        5108 :                 while (StopLow < StopHigh)
    1678             :                 {
    1679             :                     int         res;
    1680             : 
    1681        4010 :                     StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    1682             : 
    1683        4010 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
    1684             : 
    1685        4010 :                     attrnum = gintuple_get_attrnum(&so->ginstate, itup);
    1686             : 
    1687        4010 :                     if (key->attnum < attrnum)
    1688             :                     {
    1689         798 :                         StopHigh = StopMiddle;
    1690         798 :                         continue;
    1691             :                     }
    1692        3212 :                     if (key->attnum > attrnum)
    1693             :                     {
    1694         660 :                         StopLow = StopMiddle + 1;
    1695         660 :                         continue;
    1696             :                     }
    1697             : 
    1698        2552 :                     if (datumExtracted[StopMiddle - 1] == false)
    1699             :                     {
    1700        4952 :                         datum[StopMiddle - 1] =
    1701        2476 :                             gintuple_get_key(&so->ginstate, itup,
    1702        2476 :                                              &category[StopMiddle - 1]);
    1703        2476 :                         datumExtracted[StopMiddle - 1] = true;
    1704             :                     }
    1705             : 
    1706        2552 :                     if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
    1707             :                     {
    1708             :                         /* special behavior depending on searchMode */
    1709        1084 :                         if (entry->searchMode == GIN_SEARCH_MODE_ALL)
    1710             :                         {
    1711             :                             /* match anything except NULL_ITEM */
    1712        1084 :                             if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
    1713         378 :                                 res = -1;
    1714             :                             else
    1715         706 :                                 res = 0;
    1716             :                         }
    1717             :                         else
    1718             :                         {
    1719             :                             /* match everything */
    1720           0 :                             res = 0;
    1721             :                         }
    1722             :                     }
    1723             :                     else
    1724             :                     {
    1725        1468 :                         res = ginCompareEntries(&so->ginstate,
    1726        1468 :                                                 entry->attnum,
    1727             :                                                 entry->queryKey,
    1728        1468 :                                                 entry->queryCategory,
    1729        1468 :                                                 datum[StopMiddle - 1],
    1730        1468 :                                                 category[StopMiddle - 1]);
    1731             :                     }
    1732             : 
    1733        2552 :                     if (res == 0)
    1734             :                     {
    1735             :                         /*
    1736             :                          * Found exact match (there can be only one, except in
    1737             :                          * EMPTY_QUERY mode).
    1738             :                          *
    1739             :                          * If doing partial match, scan forward from here to
    1740             :                          * end of page to check for matches.
    1741             :                          *
    1742             :                          * See comment above about tuple's ordering.
    1743             :                          */
    1744        1202 :                         if (entry->isPartialMatch)
    1745           0 :                             key->entryRes[j] =
    1746           0 :                                 matchPartialInPendingList(&so->ginstate,
    1747             :                                                           page,
    1748             :                                                           StopMiddle,
    1749           0 :                                                           pos->lastOffset,
    1750             :                                                           entry,
    1751             :                                                           datum,
    1752             :                                                           category,
    1753             :                                                           datumExtracted);
    1754             :                         else
    1755        1202 :                             key->entryRes[j] = true;
    1756             : 
    1757             :                         /* done with binary search */
    1758        1202 :                         break;
    1759             :                     }
    1760        1350 :                     else if (res < 0)
    1761        1314 :                         StopHigh = StopMiddle;
    1762             :                     else
    1763          36 :                         StopLow = StopMiddle + 1;
    1764             :                 }
    1765             : 
    1766        2300 :                 if (StopLow >= StopHigh && entry->isPartialMatch)
    1767             :                 {
    1768             :                     /*
    1769             :                      * No exact match on this page.  If doing partial match,
    1770             :                      * scan from the first tuple greater than target value to
    1771             :                      * end of page.  Note that since we don't remember whether
    1772             :                      * the comparePartialFn told us to stop early on a
    1773             :                      * previous page, we will uselessly apply comparePartialFn
    1774             :                      * to the first tuple on each subsequent page.
    1775             :                      */
    1776           0 :                     key->entryRes[j] =
    1777           0 :                         matchPartialInPendingList(&so->ginstate,
    1778             :                                                   page,
    1779             :                                                   StopHigh,
    1780           0 :                                                   pos->lastOffset,
    1781             :                                                   entry,
    1782             :                                                   datum,
    1783             :                                                   category,
    1784             :                                                   datumExtracted);
    1785             :                 }
    1786             : 
    1787        2300 :                 pos->hasMatchKey[i] |= key->entryRes[j];
    1788             :             }
    1789             :         }
    1790             : 
    1791             :         /* Advance firstOffset over the scanned tuples */
    1792        1448 :         pos->firstOffset = pos->lastOffset;
    1793             : 
    1794        1448 :         if (GinPageHasFullRow(page))
    1795             :         {
    1796             :             /*
    1797             :              * We have examined all pending entries for the current heap row.
    1798             :              * Break out of loop over pages.
    1799             :              */
    1800        1448 :             break;
    1801             :         }
    1802             :         else
    1803             :         {
    1804             :             /*
    1805             :              * Advance to next page of pending entries for the current heap
    1806             :              * row.  Complain if there isn't one.
    1807             :              */
    1808           0 :             ItemPointerData item = pos->item;
    1809             : 
    1810           0 :             if (scanGetCandidate(scan, pos) == false ||
    1811           0 :                 !ItemPointerEquals(&pos->item, &item))
    1812           0 :                 elog(ERROR, "could not find additional pending pages for same heap tuple");
    1813             :         }
    1814             :     }
    1815             : 
    1816             :     /*
    1817             :      * All scan keys except excludeOnly require at least one entry to match.
    1818             :      * excludeOnly keys are an exception, because their implied
    1819             :      * GIN_CAT_EMPTY_QUERY scanEntry always matches.  So return "true" if all
    1820             :      * non-excludeOnly scan keys have at least one match.
    1821             :      */
    1822        2538 :     for (i = 0; i < so->nkeys; i++)
    1823             :     {
    1824        1984 :         if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
    1825         894 :             return false;
    1826             :     }
    1827             : 
    1828         554 :     return true;
    1829             : }
    1830             : 
    1831             : /*
    1832             :  * Collect all matched rows from pending list into bitmap.
    1833             :  */
    1834             : static void
    1835        1586 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
    1836             : {
    1837        1586 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1838             :     MemoryContext oldCtx;
    1839             :     bool        recheck,
    1840             :                 match;
    1841             :     int         i;
    1842             :     pendingPosition pos;
    1843        1586 :     Buffer      metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
    1844             :     Page        page;
    1845             :     BlockNumber blkno;
    1846             : 
    1847        1586 :     *ntids = 0;
    1848             : 
    1849             :     /*
    1850             :      * Acquire predicate lock on the metapage, to conflict with any fastupdate
    1851             :      * insertions.
    1852             :      */
    1853        1586 :     PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
    1854             : 
    1855        1586 :     LockBuffer(metabuffer, GIN_SHARE);
    1856        1586 :     page = BufferGetPage(metabuffer);
    1857        1586 :     blkno = GinPageGetMeta(page)->head;
    1858             : 
    1859             :     /*
    1860             :      * fetch head of list before unlocking metapage. head page must be pinned
    1861             :      * to prevent deletion by vacuum process
    1862             :      */
    1863        1586 :     if (blkno == InvalidBlockNumber)
    1864             :     {
    1865             :         /* No pending list, so proceed with normal scan */
    1866        1420 :         UnlockReleaseBuffer(metabuffer);
    1867        1420 :         return;
    1868             :     }
    1869             : 
    1870         166 :     pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
    1871         166 :     LockBuffer(pos.pendingBuffer, GIN_SHARE);
    1872         166 :     pos.firstOffset = FirstOffsetNumber;
    1873         166 :     UnlockReleaseBuffer(metabuffer);
    1874         166 :     pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
    1875             : 
    1876             :     /*
    1877             :      * loop for each heap row. scanGetCandidate returns full row or row's
    1878             :      * tuples from first page.
    1879             :      */
    1880        1614 :     while (scanGetCandidate(scan, &pos))
    1881             :     {
    1882             :         /*
    1883             :          * Check entries in tuple and set up entryRes array.
    1884             :          *
    1885             :          * If pending tuples belonging to the current heap row are spread
    1886             :          * across several pages, collectMatchesForHeapRow will read all of
    1887             :          * those pages.
    1888             :          */
    1889        1448 :         if (!collectMatchesForHeapRow(scan, &pos))
    1890         894 :             continue;
    1891             : 
    1892             :         /*
    1893             :          * Matching of entries of one row is finished, so check row using
    1894             :          * consistent functions.
    1895             :          */
    1896         554 :         oldCtx = MemoryContextSwitchTo(so->tempCtx);
    1897         554 :         recheck = false;
    1898         554 :         match = true;
    1899             : 
    1900        1404 :         for (i = 0; i < so->nkeys; i++)
    1901             :         {
    1902         850 :             GinScanKey  key = so->keys + i;
    1903             : 
    1904         850 :             if (!key->boolConsistentFn(key))
    1905             :             {
    1906           0 :                 match = false;
    1907           0 :                 break;
    1908             :             }
    1909         850 :             recheck |= key->recheckCurItem;
    1910             :         }
    1911             : 
    1912         554 :         MemoryContextSwitchTo(oldCtx);
    1913         554 :         MemoryContextReset(so->tempCtx);
    1914             : 
    1915         554 :         if (match)
    1916             :         {
    1917         554 :             tbm_add_tuples(tbm, &pos.item, 1, recheck);
    1918         554 :             (*ntids)++;
    1919             :         }
    1920             :     }
    1921             : 
    1922         166 :     pfree(pos.hasMatchKey);
    1923             : }
    1924             : 
    1925             : 
    1926             : #define GinIsVoidRes(s)     ( ((GinScanOpaque) scan->opaque)->isVoidRes )
    1927             : 
    1928             : int64
    1929        1598 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
    1930             : {
    1931        1598 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1932             :     int64       ntids;
    1933             :     ItemPointerData iptr;
    1934             :     bool        recheck;
    1935             : 
    1936             :     /*
    1937             :      * Set up the scan keys, and check for unsatisfiable query.
    1938             :      */
    1939        1598 :     ginFreeScanKeys(so);        /* there should be no keys yet, but just to be
    1940             :                                  * sure */
    1941        1598 :     ginNewScanKey(scan);
    1942             : 
    1943        1598 :     if (GinIsVoidRes(scan))
    1944          12 :         return 0;
    1945             : 
    1946        1586 :     ntids = 0;
    1947             : 
    1948             :     /*
    1949             :      * First, scan the pending list and collect any matching entries into the
    1950             :      * bitmap.  After we scan a pending item, some other backend could post it
    1951             :      * into the main index, and so we might visit it a second time during the
    1952             :      * main scan.  This is okay because we'll just re-set the same bit in the
    1953             :      * bitmap.  (The possibility of duplicate visits is a major reason why GIN
    1954             :      * can't support the amgettuple API, however.) Note that it would not do
    1955             :      * to scan the main index before the pending list, since concurrent
    1956             :      * cleanup could then make us miss entries entirely.
    1957             :      */
    1958        1586 :     scanPendingInsert(scan, tbm, &ntids);
    1959             : 
    1960             :     /*
    1961             :      * Now scan the main index.
    1962             :      */
    1963        1586 :     startScan(scan);
    1964             : 
    1965        1586 :     ItemPointerSetMin(&iptr);
    1966             : 
    1967             :     for (;;)
    1968             :     {
    1969      979892 :         CHECK_FOR_INTERRUPTS();
    1970             : 
    1971      979892 :         if (!scanGetItem(scan, iptr, &iptr, &recheck))
    1972        1586 :             break;
    1973             : 
    1974      978306 :         if (ItemPointerIsLossyPage(&iptr))
    1975           0 :             tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
    1976             :         else
    1977      978306 :             tbm_add_tuples(tbm, &iptr, 1, recheck);
    1978      978306 :         ntids++;
    1979             :     }
    1980             : 
    1981        1586 :     return ntids;
    1982             : }

Generated by: LCOV version 1.14