LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginget.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 512 616 83.1 %
Date: 2024-11-21 08:14:44 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ginget.c
       4             :  *    fetch tuples from a GIN scan.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/ginget.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : #include "access/relscan.h"
      19             : #include "common/pg_prng.h"
      20             : #include "miscadmin.h"
      21             : #include "storage/predicate.h"
      22             : #include "utils/datum.h"
      23             : #include "utils/memutils.h"
      24             : #include "utils/rel.h"
      25             : 
      26             : /* GUC parameter */
      27             : int         GinFuzzySearchLimit = 0;
      28             : 
      29             : typedef struct pendingPosition
      30             : {
      31             :     Buffer      pendingBuffer;
      32             :     OffsetNumber firstOffset;
      33             :     OffsetNumber lastOffset;
      34             :     ItemPointerData item;
      35             :     bool       *hasMatchKey;
      36             : } pendingPosition;
      37             : 
      38             : 
      39             : /*
      40             :  * Goes to the next page if current offset is outside of bounds
      41             :  */
      42             : static bool
      43      407620 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
      44             : {
      45      407620 :     Page        page = BufferGetPage(stack->buffer);
      46             : 
      47      407620 :     if (stack->off > PageGetMaxOffsetNumber(page))
      48             :     {
      49             :         /*
      50             :          * We scanned the whole page, so we should take right page
      51             :          */
      52        3790 :         if (GinPageRightMost(page))
      53         366 :             return false;       /* no more pages */
      54             : 
      55        3424 :         stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
      56        3424 :         stack->blkno = BufferGetBlockNumber(stack->buffer);
      57        3424 :         stack->off = FirstOffsetNumber;
      58        3424 :         PredicateLockPage(btree->index, stack->blkno, snapshot);
      59             :     }
      60             : 
      61      407254 :     return true;
      62             : }
      63             : 
      64             : /*
      65             :  * Scan all pages of a posting tree and save all its heap ItemPointers
      66             :  * in scanEntry->matchBitmap
      67             :  */
      68             : static void
      69          12 : scanPostingTree(Relation index, GinScanEntry scanEntry,
      70             :                 BlockNumber rootPostingTree)
      71             : {
      72             :     GinBtreeData btree;
      73             :     GinBtreeStack *stack;
      74             :     Buffer      buffer;
      75             :     Page        page;
      76             : 
      77             :     /* Descend to the leftmost leaf page */
      78          12 :     stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
      79          12 :     buffer = stack->buffer;
      80             : 
      81          12 :     IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
      82             : 
      83          12 :     freeGinBtreeStack(stack);
      84             : 
      85             :     /*
      86             :      * Loop iterates through all leaf pages of posting tree
      87             :      */
      88             :     for (;;)
      89             :     {
      90          30 :         page = BufferGetPage(buffer);
      91          30 :         if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
      92             :         {
      93          30 :             int         n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
      94             : 
      95          30 :             scanEntry->predictNumberResult += n;
      96             :         }
      97             : 
      98          30 :         if (GinPageRightMost(page))
      99          12 :             break;              /* no more pages */
     100             : 
     101          18 :         buffer = ginStepRight(buffer, index, GIN_SHARE);
     102             :     }
     103             : 
     104          12 :     UnlockReleaseBuffer(buffer);
     105          12 : }
     106             : 
     107             : /*
     108             :  * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
     109             :  * match the search entry.  This supports three different match modes:
     110             :  *
     111             :  * 1. Partial-match support: scan from current point until the
     112             :  *    comparePartialFn says we're done.
     113             :  * 2. SEARCH_MODE_ALL: scan from current point (which should be first
     114             :  *    key for the current attnum) until we hit null items or end of attnum
     115             :  * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
     116             :  *    key for the current attnum) until we hit end of attnum
     117             :  *
     118             :  * Returns true if done, false if it's necessary to restart scan from scratch
     119             :  */
     120             : static bool
     121         534 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
     122             :                    GinScanEntry scanEntry, Snapshot snapshot)
     123             : {
     124             :     OffsetNumber attnum;
     125             :     Form_pg_attribute attr;
     126             : 
     127             :     /* Initialize empty bitmap result */
     128         534 :     scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
     129             : 
     130             :     /* Null query cannot partial-match anything */
     131         534 :     if (scanEntry->isPartialMatch &&
     132         252 :         scanEntry->queryCategory != GIN_CAT_NORM_KEY)
     133           0 :         return true;
     134             : 
     135             :     /* Locate tupdesc entry for key column (for attbyval/attlen data) */
     136         534 :     attnum = scanEntry->attnum;
     137         534 :     attr = TupleDescAttr(btree->ginstate->origTupdesc, attnum - 1);
     138             : 
     139             :     /*
     140             :      * Predicate lock entry leaf page, following pages will be locked by
     141             :      * moveRightIfItNeeded()
     142             :      */
     143         534 :     PredicateLockPage(btree->index,
     144             :                       BufferGetBlockNumber(stack->buffer),
     145             :                       snapshot);
     146             : 
     147             :     for (;;)
     148      407074 :     {
     149             :         Page        page;
     150             :         IndexTuple  itup;
     151             :         Datum       idatum;
     152             :         GinNullCategory icategory;
     153             : 
     154             :         /*
     155             :          * stack->off points to the interested entry, buffer is already locked
     156             :          */
     157      407608 :         if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     158         534 :             return true;
     159             : 
     160      407242 :         page = BufferGetPage(stack->buffer);
     161      407242 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     162             : 
     163             :         /*
     164             :          * If tuple stores another attribute then stop scan
     165             :          */
     166      407242 :         if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
     167           0 :             return true;
     168             : 
     169             :         /* Safe to fetch attribute value */
     170      407242 :         idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
     171             : 
     172             :         /*
     173             :          * Check for appropriate scan stop conditions
     174             :          */
     175      407242 :         if (scanEntry->isPartialMatch)
     176             :         {
     177             :             int32       cmp;
     178             : 
     179             :             /*
     180             :              * In partial match, stop scan at any null (including
     181             :              * placeholders); partial matches never match nulls
     182             :              */
     183        1332 :             if (icategory != GIN_CAT_NORM_KEY)
     184          10 :                 return true;
     185             : 
     186             :             /*----------
     187             :              * Check of partial match.
     188             :              * case cmp == 0 => match
     189             :              * case cmp > 0 => not match and finish scan
     190             :              * case cmp < 0 => not match and continue scan
     191             :              *----------
     192             :              */
     193        1322 :             cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
     194        1322 :                                                   btree->ginstate->supportCollation[attnum - 1],
     195             :                                                   scanEntry->queryKey,
     196             :                                                   idatum,
     197        1322 :                                                   UInt16GetDatum(scanEntry->strategy),
     198        1322 :                                                   PointerGetDatum(scanEntry->extra_data)));
     199             : 
     200        1322 :             if (cmp > 0)
     201         130 :                 return true;
     202        1192 :             else if (cmp < 0)
     203             :             {
     204          60 :                 stack->off++;
     205          60 :                 continue;
     206             :             }
     207             :         }
     208      405910 :         else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
     209             :         {
     210             :             /*
     211             :              * In ALL mode, we are not interested in null items, so we can
     212             :              * stop if we get to a null-item placeholder (which will be the
     213             :              * last entry for a given attnum).  We do want to include NULL_KEY
     214             :              * and EMPTY_ITEM entries, though.
     215             :              */
     216      405910 :             if (icategory == GIN_CAT_NULL_ITEM)
     217          28 :                 return true;
     218             :         }
     219             : 
     220             :         /*
     221             :          * OK, we want to return the TIDs listed in this entry.
     222             :          */
     223      407014 :         if (GinIsPostingTree(itup))
     224             :         {
     225          12 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     226             : 
     227             :             /*
     228             :              * We should unlock current page (but not unpin) during tree scan
     229             :              * to prevent deadlock with vacuum processes.
     230             :              *
     231             :              * We save current entry value (idatum) to be able to re-find our
     232             :              * tuple after re-locking
     233             :              */
     234          12 :             if (icategory == GIN_CAT_NORM_KEY)
     235          12 :                 idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
     236             : 
     237          12 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     238             : 
     239             :             /*
     240             :              * Acquire predicate lock on the posting tree.  We already hold a
     241             :              * lock on the entry page, but insertions to the posting tree
     242             :              * don't check for conflicts on that level.
     243             :              */
     244          12 :             PredicateLockPage(btree->index, rootPostingTree, snapshot);
     245             : 
     246             :             /* Collect all the TIDs in this entry's posting tree */
     247          12 :             scanPostingTree(btree->index, scanEntry, rootPostingTree);
     248             : 
     249             :             /*
     250             :              * We lock again the entry page and while it was unlocked insert
     251             :              * might have occurred, so we need to re-find our position.
     252             :              */
     253          12 :             LockBuffer(stack->buffer, GIN_SHARE);
     254          12 :             page = BufferGetPage(stack->buffer);
     255          12 :             if (!GinPageIsLeaf(page))
     256             :             {
     257             :                 /*
     258             :                  * Root page becomes non-leaf while we unlock it. We will
     259             :                  * start again, this situation doesn't occur often - root can
     260             :                  * became a non-leaf only once per life of index.
     261             :                  */
     262           0 :                 return false;
     263             :             }
     264             : 
     265             :             /* Search forward to re-find idatum */
     266             :             for (;;)
     267             :             {
     268          12 :                 if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     269           0 :                     ereport(ERROR,
     270             :                             (errcode(ERRCODE_INTERNAL_ERROR),
     271             :                              errmsg("failed to re-find tuple within index \"%s\"",
     272             :                                     RelationGetRelationName(btree->index))));
     273             : 
     274          12 :                 page = BufferGetPage(stack->buffer);
     275          12 :                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     276             : 
     277          12 :                 if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
     278             :                 {
     279             :                     Datum       newDatum;
     280             :                     GinNullCategory newCategory;
     281             : 
     282          12 :                     newDatum = gintuple_get_key(btree->ginstate, itup,
     283             :                                                 &newCategory);
     284             : 
     285          12 :                     if (ginCompareEntries(btree->ginstate, attnum,
     286             :                                           newDatum, newCategory,
     287             :                                           idatum, icategory) == 0)
     288          12 :                         break;  /* Found! */
     289             :                 }
     290             : 
     291           0 :                 stack->off++;
     292             :             }
     293             : 
     294          12 :             if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
     295           0 :                 pfree(DatumGetPointer(idatum));
     296             :         }
     297             :         else
     298             :         {
     299             :             ItemPointer ipd;
     300             :             int         nipd;
     301             : 
     302      407002 :             ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
     303      407002 :             tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
     304      407002 :             scanEntry->predictNumberResult += GinGetNPosting(itup);
     305      407002 :             pfree(ipd);
     306             :         }
     307             : 
     308             :         /*
     309             :          * Done with this entry, go to the next
     310             :          */
     311      407014 :         stack->off++;
     312             :     }
     313             : }
     314             : 
     315             : /*
     316             :  * Start* functions setup beginning state of searches: finds correct buffer and pins it.
     317             :  */
     318             : static void
     319        6692 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
     320             : {
     321             :     GinBtreeData btreeEntry;
     322             :     GinBtreeStack *stackEntry;
     323             :     Page        page;
     324             :     bool        needUnlock;
     325             : 
     326        6692 : restartScanEntry:
     327        6692 :     entry->buffer = InvalidBuffer;
     328        6692 :     ItemPointerSetMin(&entry->curItem);
     329        6692 :     entry->offset = InvalidOffsetNumber;
     330        6692 :     if (entry->list)
     331           0 :         pfree(entry->list);
     332        6692 :     entry->list = NULL;
     333        6692 :     entry->nlist = 0;
     334        6692 :     entry->matchBitmap = NULL;
     335        6692 :     entry->matchResult = NULL;
     336        6692 :     entry->reduceResult = false;
     337        6692 :     entry->predictNumberResult = 0;
     338             : 
     339             :     /*
     340             :      * we should find entry, and begin scan of posting tree or just store
     341             :      * posting list in memory
     342             :      */
     343        6692 :     ginPrepareEntryScan(&btreeEntry, entry->attnum,
     344        6692 :                         entry->queryKey, entry->queryCategory,
     345             :                         ginstate);
     346        6692 :     stackEntry = ginFindLeafPage(&btreeEntry, true, false);
     347        6692 :     page = BufferGetPage(stackEntry->buffer);
     348             : 
     349             :     /* ginFindLeafPage() will have already checked snapshot age. */
     350        6692 :     needUnlock = true;
     351             : 
     352        6692 :     entry->isFinished = true;
     353             : 
     354        6692 :     if (entry->isPartialMatch ||
     355        6440 :         entry->queryCategory == GIN_CAT_EMPTY_QUERY)
     356             :     {
     357             :         /*
     358             :          * btreeEntry.findItem locates the first item >= given search key.
     359             :          * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
     360             :          * because of the way the GIN_CAT_EMPTY_QUERY category code is
     361             :          * assigned.)  We scan forward from there and collect all TIDs needed
     362             :          * for the entry type.
     363             :          */
     364         534 :         btreeEntry.findItem(&btreeEntry, stackEntry);
     365         534 :         if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
     366         534 :             == false)
     367             :         {
     368             :             /*
     369             :              * GIN tree was seriously restructured, so we will cleanup all
     370             :              * found data and rescan. See comments near 'return false' in
     371             :              * collectMatchBitmap()
     372             :              */
     373           0 :             if (entry->matchBitmap)
     374             :             {
     375           0 :                 if (entry->matchIterator)
     376           0 :                     tbm_end_iterate(entry->matchIterator);
     377           0 :                 entry->matchIterator = NULL;
     378           0 :                 tbm_free(entry->matchBitmap);
     379           0 :                 entry->matchBitmap = NULL;
     380             :             }
     381           0 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     382           0 :             freeGinBtreeStack(stackEntry);
     383           0 :             goto restartScanEntry;
     384             :         }
     385             : 
     386         534 :         if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
     387             :         {
     388         420 :             entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
     389         420 :             entry->isFinished = false;
     390             :         }
     391             :     }
     392        6158 :     else if (btreeEntry.findItem(&btreeEntry, stackEntry))
     393             :     {
     394        4448 :         IndexTuple  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
     395             : 
     396        4448 :         if (GinIsPostingTree(itup))
     397             :         {
     398          64 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     399             :             GinBtreeStack *stack;
     400             :             Page        entrypage;
     401             :             ItemPointerData minItem;
     402             : 
     403             :             /*
     404             :              * This is an equality scan, so lock the root of the posting tree.
     405             :              * It represents a lock on the exact key value, and covers all the
     406             :              * items in the posting tree.
     407             :              */
     408          64 :             PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
     409             : 
     410             :             /*
     411             :              * We should unlock entry page before touching posting tree to
     412             :              * prevent deadlocks with vacuum processes. Because entry is never
     413             :              * deleted from page and posting tree is never reduced to the
     414             :              * posting list, we can unlock page after getting BlockNumber of
     415             :              * root of posting tree.
     416             :              */
     417          64 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     418          64 :             needUnlock = false;
     419             : 
     420          64 :             stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
     421             :                                             rootPostingTree);
     422          64 :             entry->buffer = stack->buffer;
     423             : 
     424             :             /*
     425             :              * We keep buffer pinned because we need to prevent deletion of
     426             :              * page during scan. See GIN's vacuum implementation. RefCount is
     427             :              * increased to keep buffer pinned after freeGinBtreeStack() call.
     428             :              */
     429          64 :             IncrBufferRefCount(entry->buffer);
     430             : 
     431          64 :             entrypage = BufferGetPage(entry->buffer);
     432             : 
     433             :             /*
     434             :              * Load the first page into memory.
     435             :              */
     436          64 :             ItemPointerSetMin(&minItem);
     437          64 :             entry->list = GinDataLeafPageGetItems(entrypage, &entry->nlist, minItem);
     438             : 
     439          64 :             entry->predictNumberResult = stack->predictNumber * entry->nlist;
     440             : 
     441          64 :             LockBuffer(entry->buffer, GIN_UNLOCK);
     442          64 :             freeGinBtreeStack(stack);
     443          64 :             entry->isFinished = false;
     444             :         }
     445             :         else
     446             :         {
     447             :             /*
     448             :              * Lock the entry leaf page.  This is more coarse-grained than
     449             :              * necessary, because it will conflict with any insertions that
     450             :              * land on the same leaf page, not only the exact key we searched
     451             :              * for.  But locking an individual tuple would require updating
     452             :              * that lock whenever it moves because of insertions or vacuums,
     453             :              * which seems too complicated.
     454             :              */
     455        4384 :             PredicateLockPage(ginstate->index,
     456             :                               BufferGetBlockNumber(stackEntry->buffer),
     457             :                               snapshot);
     458        4384 :             if (GinGetNPosting(itup) > 0)
     459             :             {
     460        4378 :                 entry->list = ginReadTuple(ginstate, entry->attnum, itup,
     461             :                                            &entry->nlist);
     462        4378 :                 entry->predictNumberResult = entry->nlist;
     463             : 
     464        4378 :                 entry->isFinished = false;
     465             :             }
     466             :         }
     467             :     }
     468             :     else
     469             :     {
     470             :         /*
     471             :          * No entry found.  Predicate lock the leaf page, to lock the place
     472             :          * where the entry would've been, had there been one.
     473             :          */
     474        1710 :         PredicateLockPage(ginstate->index,
     475             :                           BufferGetBlockNumber(stackEntry->buffer), snapshot);
     476             :     }
     477             : 
     478        6692 :     if (needUnlock)
     479        6628 :         LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     480        6692 :     freeGinBtreeStack(stackEntry);
     481        6692 : }
     482             : 
     483             : /*
     484             :  * Comparison function for scan entry indexes. Sorts by predictNumberResult,
     485             :  * least frequent items first.
     486             :  */
     487             : static int
     488        9440 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
     489             : {
     490        9440 :     const GinScanKey key = (const GinScanKey) arg;
     491        9440 :     int         i1 = *(const int *) a1;
     492        9440 :     int         i2 = *(const int *) a2;
     493        9440 :     uint32      n1 = key->scanEntry[i1]->predictNumberResult;
     494        9440 :     uint32      n2 = key->scanEntry[i2]->predictNumberResult;
     495             : 
     496        9440 :     if (n1 < n2)
     497        1368 :         return -1;
     498        8072 :     else if (n1 == n2)
     499        6302 :         return 0;
     500             :     else
     501        1770 :         return 1;
     502             : }
     503             : 
     504             : static void
     505        1714 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
     506             : {
     507        1714 :     MemoryContext oldCtx = CurrentMemoryContext;
     508             :     int         i;
     509             :     int         j;
     510             :     int        *entryIndexes;
     511             : 
     512        1714 :     ItemPointerSetMin(&key->curItem);
     513        1714 :     key->curItemMatches = false;
     514        1714 :     key->recheckCurItem = false;
     515        1714 :     key->isFinished = false;
     516             : 
     517             :     /*
     518             :      * Divide the entries into two distinct sets: required and additional.
     519             :      * Additional entries are not enough for a match alone, without any items
     520             :      * from the required set, but are needed by the consistent function to
     521             :      * decide if an item matches. When scanning, we can skip over items from
     522             :      * additional entries that have no corresponding matches in any of the
     523             :      * required entries. That speeds up queries like "frequent & rare"
     524             :      * considerably, if the frequent term can be put in the additional set.
     525             :      *
     526             :      * There can be many legal ways to divide them entries into these two
     527             :      * sets. A conservative division is to just put everything in the required
     528             :      * set, but the more you can put in the additional set, the more you can
     529             :      * skip during the scan. To maximize skipping, we try to put as many
     530             :      * frequent items as possible into additional, and less frequent ones into
     531             :      * required. To do that, sort the entries by frequency
     532             :      * (predictNumberResult), and put entries into the required set in that
     533             :      * order, until the consistent function says that none of the remaining
     534             :      * entries can form a match, without any items from the required set. The
     535             :      * rest go to the additional set.
     536             :      *
     537             :      * Exclude-only scan keys are known to have no required entries.
     538             :      */
     539        1714 :     if (key->excludeOnly)
     540             :     {
     541          38 :         MemoryContextSwitchTo(so->keyCtx);
     542             : 
     543          38 :         key->nrequired = 0;
     544          38 :         key->nadditional = key->nentries;
     545          38 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     546          44 :         for (i = 0; i < key->nadditional; i++)
     547           6 :             key->additionalEntries[i] = key->scanEntry[i];
     548             :     }
     549        1676 :     else if (key->nentries > 1)
     550             :     {
     551         560 :         MemoryContextSwitchTo(so->tempCtx);
     552             : 
     553         560 :         entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
     554        6130 :         for (i = 0; i < key->nentries; i++)
     555        5570 :             entryIndexes[i] = i;
     556         560 :         qsort_arg(entryIndexes, key->nentries, sizeof(int),
     557             :                   entryIndexByFrequencyCmp, key);
     558             : 
     559        4834 :         for (i = 0; i < key->nentries - 1; i++)
     560             :         {
     561             :             /* Pass all entries <= i as FALSE, and the rest as MAYBE */
     562      521048 :             for (j = 0; j <= i; j++)
     563      516356 :                 key->entryRes[entryIndexes[j]] = GIN_FALSE;
     564      521856 :             for (j = i + 1; j < key->nentries; j++)
     565      517164 :                 key->entryRes[entryIndexes[j]] = GIN_MAYBE;
     566             : 
     567        4692 :             if (key->triConsistentFn(key) == GIN_FALSE)
     568         418 :                 break;
     569             :         }
     570             :         /* i is now the last required entry. */
     571             : 
     572         560 :         MemoryContextSwitchTo(so->keyCtx);
     573             : 
     574         560 :         key->nrequired = i + 1;
     575         560 :         key->nadditional = key->nentries - key->nrequired;
     576         560 :         key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
     577         560 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     578             : 
     579         560 :         j = 0;
     580        5394 :         for (i = 0; i < key->nrequired; i++)
     581        4834 :             key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
     582        1296 :         for (i = 0; i < key->nadditional; i++)
     583         736 :             key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
     584             : 
     585             :         /* clean up after consistentFn calls (also frees entryIndexes) */
     586         560 :         MemoryContextReset(so->tempCtx);
     587             :     }
     588             :     else
     589             :     {
     590        1116 :         MemoryContextSwitchTo(so->keyCtx);
     591             : 
     592        1116 :         key->nrequired = 1;
     593        1116 :         key->nadditional = 0;
     594        1116 :         key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
     595        1116 :         key->requiredEntries[0] = key->scanEntry[0];
     596             :     }
     597        1714 :     MemoryContextSwitchTo(oldCtx);
     598        1714 : }
     599             : 
     600             : static void
     601        1586 : startScan(IndexScanDesc scan)
     602             : {
     603        1586 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
     604        1586 :     GinState   *ginstate = &so->ginstate;
     605             :     uint32      i;
     606             : 
     607        8278 :     for (i = 0; i < so->totalentries; i++)
     608        6692 :         startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
     609             : 
     610        1586 :     if (GinFuzzySearchLimit > 0)
     611             :     {
     612             :         /*
     613             :          * If all of keys more than threshold we will try to reduce result, we
     614             :          * hope (and only hope, for intersection operation of array our
     615             :          * supposition isn't true), that total result will not more than
     616             :          * minimal predictNumberResult.
     617             :          */
     618           6 :         bool        reduce = true;
     619             : 
     620          12 :         for (i = 0; i < so->totalentries; i++)
     621             :         {
     622           6 :             if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
     623             :             {
     624           0 :                 reduce = false;
     625           0 :                 break;
     626             :             }
     627             :         }
     628           6 :         if (reduce)
     629             :         {
     630          12 :             for (i = 0; i < so->totalentries; i++)
     631             :             {
     632           6 :                 so->entries[i]->predictNumberResult /= so->totalentries;
     633           6 :                 so->entries[i]->reduceResult = true;
     634             :             }
     635             :         }
     636             :     }
     637             : 
     638             :     /*
     639             :      * Now that we have the estimates for the entry frequencies, finish
     640             :      * initializing the scan keys.
     641             :      */
     642        3300 :     for (i = 0; i < so->nkeys; i++)
     643        1714 :         startScanKey(ginstate, so, so->keys + i);
     644        1586 : }
     645             : 
     646             : /*
     647             :  * Load the next batch of item pointers from a posting tree.
     648             :  *
     649             :  * Note that we copy the page into GinScanEntry->list array and unlock it, but
     650             :  * keep it pinned to prevent interference with vacuum.
     651             :  */
     652             : static void
     653         100 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
     654             :                    ItemPointerData advancePast)
     655             : {
     656             :     Page        page;
     657             :     int         i;
     658             :     bool        stepright;
     659             : 
     660         100 :     if (!BufferIsValid(entry->buffer))
     661             :     {
     662           0 :         entry->isFinished = true;
     663           0 :         return;
     664             :     }
     665             : 
     666             :     /*
     667             :      * We have two strategies for finding the correct page: step right from
     668             :      * the current page, or descend the tree again from the root. If
     669             :      * advancePast equals the current item, the next matching item should be
     670             :      * on the next page, so we step right. Otherwise, descend from root.
     671             :      */
     672         100 :     if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
     673             :     {
     674          94 :         stepright = true;
     675          94 :         LockBuffer(entry->buffer, GIN_SHARE);
     676             :     }
     677             :     else
     678             :     {
     679             :         GinBtreeStack *stack;
     680             : 
     681           6 :         ReleaseBuffer(entry->buffer);
     682             : 
     683             :         /*
     684             :          * Set the search key, and find the correct leaf page.
     685             :          */
     686           6 :         if (ItemPointerIsLossyPage(&advancePast))
     687             :         {
     688           0 :             ItemPointerSet(&entry->btree.itemptr,
     689           0 :                            GinItemPointerGetBlockNumber(&advancePast) + 1,
     690             :                            FirstOffsetNumber);
     691             :         }
     692             :         else
     693             :         {
     694           6 :             ItemPointerSet(&entry->btree.itemptr,
     695             :                            GinItemPointerGetBlockNumber(&advancePast),
     696           6 :                            OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
     697             :         }
     698           6 :         entry->btree.fullScan = false;
     699           6 :         stack = ginFindLeafPage(&entry->btree, true, false);
     700             : 
     701             :         /* we don't need the stack, just the buffer. */
     702           6 :         entry->buffer = stack->buffer;
     703           6 :         IncrBufferRefCount(entry->buffer);
     704           6 :         freeGinBtreeStack(stack);
     705           6 :         stepright = false;
     706             :     }
     707             : 
     708         100 :     elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
     709             :          GinItemPointerGetBlockNumber(&advancePast),
     710             :          GinItemPointerGetOffsetNumber(&advancePast),
     711             :          !stepright);
     712             : 
     713         100 :     page = BufferGetPage(entry->buffer);
     714             :     for (;;)
     715             :     {
     716         106 :         entry->offset = InvalidOffsetNumber;
     717         106 :         if (entry->list)
     718             :         {
     719          94 :             pfree(entry->list);
     720          94 :             entry->list = NULL;
     721          94 :             entry->nlist = 0;
     722             :         }
     723             : 
     724         106 :         if (stepright)
     725             :         {
     726             :             /*
     727             :              * We've processed all the entries on this page. If it was the
     728             :              * last page in the tree, we're done.
     729             :              */
     730         100 :             if (GinPageRightMost(page))
     731             :             {
     732           6 :                 UnlockReleaseBuffer(entry->buffer);
     733           6 :                 entry->buffer = InvalidBuffer;
     734           6 :                 entry->isFinished = true;
     735           6 :                 return;
     736             :             }
     737             : 
     738             :             /*
     739             :              * Step to next page, following the right link. then find the
     740             :              * first ItemPointer greater than advancePast.
     741             :              */
     742          94 :             entry->buffer = ginStepRight(entry->buffer,
     743             :                                          ginstate->index,
     744             :                                          GIN_SHARE);
     745          94 :             page = BufferGetPage(entry->buffer);
     746             :         }
     747         100 :         stepright = true;
     748             : 
     749         100 :         if (GinPageGetOpaque(page)->flags & GIN_DELETED)
     750           0 :             continue;           /* page was deleted by concurrent vacuum */
     751             : 
     752             :         /*
     753             :          * The first item > advancePast might not be on this page, but
     754             :          * somewhere to the right, if the page was split, or a non-match from
     755             :          * another key in the query allowed us to skip some items from this
     756             :          * entry. Keep following the right-links until we re-find the correct
     757             :          * page.
     758             :          */
     759         136 :         if (!GinPageRightMost(page) &&
     760          36 :             ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
     761             :         {
     762             :             /*
     763             :              * the item we're looking is > the right bound of the page, so it
     764             :              * can't be on this page.
     765             :              */
     766           0 :             continue;
     767             :         }
     768             : 
     769         100 :         entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
     770             : 
     771        1924 :         for (i = 0; i < entry->nlist; i++)
     772             :         {
     773        1918 :             if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
     774             :             {
     775          94 :                 entry->offset = i;
     776             : 
     777          94 :                 if (GinPageRightMost(page))
     778             :                 {
     779             :                     /* after processing the copied items, we're done. */
     780          58 :                     UnlockReleaseBuffer(entry->buffer);
     781          58 :                     entry->buffer = InvalidBuffer;
     782             :                 }
     783             :                 else
     784          36 :                     LockBuffer(entry->buffer, GIN_UNLOCK);
     785          94 :                 return;
     786             :             }
     787             :         }
     788             :     }
     789             : }
     790             : 
     791             : #define gin_rand() pg_prng_double(&pg_global_prng_state)
     792             : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
     793             : 
     794             : /*
     795             :  * Sets entry->curItem to next heap item pointer > advancePast, for one entry
     796             :  * of one scan key, or sets entry->isFinished to true if there are no more.
     797             :  *
     798             :  * Item pointers are returned in ascending order.
     799             :  *
     800             :  * Note: this can return a "lossy page" item pointer, indicating that the
     801             :  * entry potentially matches all items on that heap page.  However, it is
     802             :  * not allowed to return both a lossy page pointer and exact (regular)
     803             :  * item pointers for the same page.  (Doing so would break the key-combination
     804             :  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
     805             :  * current implementation this is guaranteed by the behavior of tidbitmaps.
     806             :  */
     807             : static void
     808     1088038 : entryGetItem(GinState *ginstate, GinScanEntry entry,
     809             :              ItemPointerData advancePast)
     810             : {
     811             :     Assert(!entry->isFinished);
     812             : 
     813             :     Assert(!ItemPointerIsValid(&entry->curItem) ||
     814             :            ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
     815             : 
     816     1088038 :     if (entry->matchBitmap)
     817             :     {
     818             :         /* A bitmap result */
     819      268052 :         BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
     820      268052 :         OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
     821             : 
     822             :         for (;;)
     823             :         {
     824             :             /*
     825             :              * If we've exhausted all items on this block, move to next block
     826             :              * in the bitmap.
     827             :              */
     828      272878 :             while (entry->matchResult == NULL ||
     829      272458 :                    (entry->matchResult->ntuples >= 0 &&
     830      272458 :                     entry->offset >= entry->matchResult->ntuples) ||
     831      535264 :                    entry->matchResult->blockno < advancePastBlk ||
     832      267632 :                    (ItemPointerIsLossyPage(&advancePast) &&
     833           0 :                     entry->matchResult->blockno == advancePastBlk))
     834             :             {
     835        5246 :                 entry->matchResult = tbm_iterate(entry->matchIterator);
     836             : 
     837        5246 :                 if (entry->matchResult == NULL)
     838             :                 {
     839         420 :                     ItemPointerSetInvalid(&entry->curItem);
     840         420 :                     tbm_end_iterate(entry->matchIterator);
     841         420 :                     entry->matchIterator = NULL;
     842         420 :                     entry->isFinished = true;
     843         420 :                     break;
     844             :                 }
     845             : 
     846             :                 /*
     847             :                  * Reset counter to the beginning of entry->matchResult. Note:
     848             :                  * entry->offset is still greater than matchResult->ntuples if
     849             :                  * matchResult is lossy.  So, on next call we will get next
     850             :                  * result from TIDBitmap.
     851             :                  */
     852        4826 :                 entry->offset = 0;
     853             :             }
     854      268052 :             if (entry->isFinished)
     855         420 :                 break;
     856             : 
     857             :             /*
     858             :              * We're now on the first page after advancePast which has any
     859             :              * items on it. If it's a lossy result, return that.
     860             :              */
     861      267632 :             if (entry->matchResult->ntuples < 0)
     862             :             {
     863           0 :                 ItemPointerSetLossyPage(&entry->curItem,
     864             :                                         entry->matchResult->blockno);
     865             : 
     866             :                 /*
     867             :                  * We might as well fall out of the loop; we could not
     868             :                  * estimate number of results on this page to support correct
     869             :                  * reducing of result even if it's enabled.
     870             :                  */
     871           0 :                 break;
     872             :             }
     873             : 
     874             :             /*
     875             :              * Not a lossy page. Skip over any offsets <= advancePast, and
     876             :              * return that.
     877             :              */
     878      267632 :             if (entry->matchResult->blockno == advancePastBlk)
     879             :             {
     880             :                 /*
     881             :                  * First, do a quick check against the last offset on the
     882             :                  * page. If that's > advancePast, so are all the other
     883             :                  * offsets, so just go back to the top to get the next page.
     884             :                  */
     885      263226 :                 if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
     886             :                 {
     887           0 :                     entry->offset = entry->matchResult->ntuples;
     888           0 :                     continue;
     889             :                 }
     890             : 
     891             :                 /* Otherwise scan to find the first item > advancePast */
     892      263226 :                 while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
     893           0 :                     entry->offset++;
     894             :             }
     895             : 
     896      267632 :             ItemPointerSet(&entry->curItem,
     897      267632 :                            entry->matchResult->blockno,
     898      267632 :                            entry->matchResult->offsets[entry->offset]);
     899      267632 :             entry->offset++;
     900             : 
     901             :             /* Done unless we need to reduce the result */
     902      267632 :             if (!entry->reduceResult || !dropItem(entry))
     903             :                 break;
     904             :         }
     905             :     }
     906      819986 :     else if (!BufferIsValid(entry->buffer))
     907             :     {
     908             :         /*
     909             :          * A posting list from an entry tuple, or the last page of a posting
     910             :          * tree.
     911             :          */
     912             :         for (;;)
     913             :         {
     914      393930 :             if (entry->offset >= entry->nlist)
     915             :             {
     916        3890 :                 ItemPointerSetInvalid(&entry->curItem);
     917        3890 :                 entry->isFinished = true;
     918        3890 :                 break;
     919             :             }
     920             : 
     921      390040 :             entry->curItem = entry->list[entry->offset++];
     922             : 
     923             :             /* If we're not past advancePast, keep scanning */
     924      390040 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     925       66634 :                 continue;
     926             : 
     927             :             /* Done unless we need to reduce the result */
     928      323406 :             if (!entry->reduceResult || !dropItem(entry))
     929             :                 break;
     930             :         }
     931             :     }
     932             :     else
     933             :     {
     934             :         /* A posting tree */
     935             :         for (;;)
     936             :         {
     937             :             /* If we've processed the current batch, load more items */
     938      679178 :             while (entry->offset >= entry->nlist)
     939             :             {
     940         100 :                 entryLoadMoreItems(ginstate, entry, advancePast);
     941             : 
     942         100 :                 if (entry->isFinished)
     943             :                 {
     944           6 :                     ItemPointerSetInvalid(&entry->curItem);
     945           6 :                     return;
     946             :                 }
     947             :             }
     948             : 
     949      679078 :             entry->curItem = entry->list[entry->offset++];
     950             : 
     951             :             /* If we're not past advancePast, keep scanning */
     952      679078 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     953       47106 :                 continue;
     954             : 
     955             :             /* Done unless we need to reduce the result */
     956      631972 :             if (!entry->reduceResult || !dropItem(entry))
     957             :                 break;
     958             : 
     959             :             /*
     960             :              * Advance advancePast (so that entryLoadMoreItems will load the
     961             :              * right data), and keep scanning
     962             :              */
     963      119000 :             advancePast = entry->curItem;
     964             :         }
     965             :     }
     966             : }
     967             : 
     968             : /*
     969             :  * Identify the "current" item among the input entry streams for this scan key
     970             :  * that is greater than advancePast, and test whether it passes the scan key
     971             :  * qual condition.
     972             :  *
     973             :  * The current item is the smallest curItem among the inputs.  key->curItem
     974             :  * is set to that value.  key->curItemMatches is set to indicate whether that
     975             :  * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
     976             :  * iff recheck is needed for this item pointer (including the case where the
     977             :  * item pointer is a lossy page pointer).
     978             :  *
     979             :  * If all entry streams are exhausted, sets key->isFinished to true.
     980             :  *
     981             :  * Item pointers must be returned in ascending order.
     982             :  *
     983             :  * Note: this can return a "lossy page" item pointer, indicating that the
     984             :  * key potentially matches all items on that heap page.  However, it is
     985             :  * not allowed to return both a lossy page pointer and exact (regular)
     986             :  * item pointers for the same page.  (Doing so would break the key-combination
     987             :  * logic in scanGetItem.)
     988             :  */
     989             : static void
     990     1002512 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
     991             :            ItemPointerData advancePast)
     992             : {
     993             :     ItemPointerData minItem;
     994             :     ItemPointerData curPageLossy;
     995             :     uint32      i;
     996             :     bool        haveLossyEntry;
     997             :     GinScanEntry entry;
     998             :     GinTernaryValue res;
     999             :     MemoryContext oldCtx;
    1000             :     bool        allFinished;
    1001             : 
    1002             :     Assert(!key->isFinished);
    1003             : 
    1004             :     /*
    1005             :      * We might have already tested this item; if so, no need to repeat work.
    1006             :      * (Note: the ">" case can happen, if advancePast is exact but we
    1007             :      * previously had to set curItem to a lossy-page pointer.)
    1008             :      */
    1009     1002512 :     if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
    1010        1608 :         return;
    1011             : 
    1012             :     /*
    1013             :      * Find the minimum item > advancePast among the active entry streams.
    1014             :      *
    1015             :      * Note: a lossy-page entry is encoded by a ItemPointer with max value for
    1016             :      * offset (0xffff), so that it will sort after any exact entries for the
    1017             :      * same page.  So we'll prefer to return exact pointers not lossy
    1018             :      * pointers, which is good.
    1019             :      */
    1020     1002490 :     ItemPointerSetMax(&minItem);
    1021     1002490 :     allFinished = true;
    1022     2757792 :     for (i = 0; i < key->nrequired; i++)
    1023             :     {
    1024     1755302 :         entry = key->requiredEntries[i];
    1025             : 
    1026     1755302 :         if (entry->isFinished)
    1027      560142 :             continue;
    1028             : 
    1029             :         /*
    1030             :          * Advance this stream if necessary.
    1031             :          *
    1032             :          * In particular, since entry->curItem was initialized with
    1033             :          * ItemPointerSetMin, this ensures we fetch the first item for each
    1034             :          * entry on the first call.
    1035             :          */
    1036     1195160 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1037             :         {
    1038     1034282 :             entryGetItem(ginstate, entry, advancePast);
    1039     1034282 :             if (entry->isFinished)
    1040        4138 :                 continue;
    1041             :         }
    1042             : 
    1043     1191022 :         allFinished = false;
    1044     1191022 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1045     1097672 :             minItem = entry->curItem;
    1046             :     }
    1047             : 
    1048     1002490 :     if (allFinished && !key->excludeOnly)
    1049             :     {
    1050             :         /* all entries are finished */
    1051        1586 :         key->isFinished = true;
    1052        1586 :         return;
    1053             :     }
    1054             : 
    1055     1000904 :     if (!key->excludeOnly)
    1056             :     {
    1057             :         /*
    1058             :          * For a normal scan key, we now know there are no matches < minItem.
    1059             :          *
    1060             :          * If minItem is lossy, it means that there were no exact items on the
    1061             :          * page among requiredEntries, because lossy pointers sort after exact
    1062             :          * items. However, there might be exact items for the same page among
    1063             :          * additionalEntries, so we mustn't advance past them.
    1064             :          */
    1065      996428 :         if (ItemPointerIsLossyPage(&minItem))
    1066             :         {
    1067           0 :             if (GinItemPointerGetBlockNumber(&advancePast) <
    1068           0 :                 GinItemPointerGetBlockNumber(&minItem))
    1069             :             {
    1070           0 :                 ItemPointerSet(&advancePast,
    1071             :                                GinItemPointerGetBlockNumber(&minItem),
    1072             :                                InvalidOffsetNumber);
    1073             :             }
    1074             :         }
    1075             :         else
    1076             :         {
    1077             :             Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
    1078      996428 :             ItemPointerSet(&advancePast,
    1079             :                            GinItemPointerGetBlockNumber(&minItem),
    1080      996428 :                            OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
    1081             :         }
    1082             :     }
    1083             :     else
    1084             :     {
    1085             :         /*
    1086             :          * excludeOnly scan keys don't have any entries that are necessarily
    1087             :          * present in matching items.  So, we consider the item just after
    1088             :          * advancePast.
    1089             :          */
    1090             :         Assert(key->nrequired == 0);
    1091        4476 :         ItemPointerSet(&minItem,
    1092             :                        GinItemPointerGetBlockNumber(&advancePast),
    1093        4476 :                        OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
    1094             :     }
    1095             : 
    1096             :     /*
    1097             :      * We might not have loaded all the entry streams for this TID yet. We
    1098             :      * could call the consistent function, passing MAYBE for those entries, to
    1099             :      * see if it can decide if this TID matches based on the information we
    1100             :      * have. But if the consistent-function is expensive, and cannot in fact
    1101             :      * decide with partial information, that could be a big loss. So, load all
    1102             :      * the additional entries, before calling the consistent function.
    1103             :      */
    1104     1067564 :     for (i = 0; i < key->nadditional; i++)
    1105             :     {
    1106       66660 :         entry = key->additionalEntries[i];
    1107             : 
    1108       66660 :         if (entry->isFinished)
    1109         412 :             continue;
    1110             : 
    1111       66248 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1112             :         {
    1113       53756 :             entryGetItem(ginstate, entry, advancePast);
    1114       53756 :             if (entry->isFinished)
    1115         178 :                 continue;
    1116             :         }
    1117             : 
    1118             :         /*
    1119             :          * Normally, none of the items in additionalEntries can have a curItem
    1120             :          * larger than minItem. But if minItem is a lossy page, then there
    1121             :          * might be exact items on the same page among additionalEntries.
    1122             :          */
    1123       66070 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1124             :         {
    1125             :             Assert(ItemPointerIsLossyPage(&minItem));
    1126           0 :             minItem = entry->curItem;
    1127             :         }
    1128             :     }
    1129             : 
    1130             :     /*
    1131             :      * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
    1132             :      * and perform consistentFn test.
    1133             :      *
    1134             :      * Lossy-page entries pose a problem, since we don't know the correct
    1135             :      * entryRes state to pass to the consistentFn, and we also don't know what
    1136             :      * its combining logic will be (could be AND, OR, or even NOT). If the
    1137             :      * logic is OR then the consistentFn might succeed for all items in the
    1138             :      * lossy page even when none of the other entries match.
    1139             :      *
    1140             :      * Our strategy is to call the tri-state consistent function, with the
    1141             :      * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
    1142             :      * returns FALSE, none of the lossy items alone are enough for a match, so
    1143             :      * we don't need to return a lossy-page pointer. Otherwise, return a
    1144             :      * lossy-page pointer to indicate that the whole heap page must be
    1145             :      * checked.  (On subsequent calls, we'll do nothing until minItem is past
    1146             :      * the page altogether, thus ensuring that we never return both regular
    1147             :      * and lossy pointers for the same page.)
    1148             :      *
    1149             :      * An exception is that it doesn't matter what we pass for lossy pointers
    1150             :      * in "hidden" entries, because the consistentFn's result can't depend on
    1151             :      * them. We could pass them as MAYBE as well, but if we're using the
    1152             :      * "shim" implementation of a tri-state consistent function (see
    1153             :      * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
    1154             :      * them as true.
    1155             :      *
    1156             :      * Note that only lossy-page entries pointing to the current item's page
    1157             :      * should trigger this processing; we might have future lossy pages in the
    1158             :      * entry array, but they aren't relevant yet.
    1159             :      */
    1160     1000904 :     key->curItem = minItem;
    1161     1000904 :     ItemPointerSetLossyPage(&curPageLossy,
    1162             :                             GinItemPointerGetBlockNumber(&key->curItem));
    1163     1000904 :     haveLossyEntry = false;
    1164     2817006 :     for (i = 0; i < key->nentries; i++)
    1165             :     {
    1166     1816102 :         entry = key->scanEntry[i];
    1167     3073194 :         if (entry->isFinished == false &&
    1168     1257092 :             ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1169             :         {
    1170           0 :             if (i < key->nuserentries)
    1171           0 :                 key->entryRes[i] = GIN_MAYBE;
    1172             :             else
    1173           0 :                 key->entryRes[i] = GIN_TRUE;
    1174           0 :             haveLossyEntry = true;
    1175             :         }
    1176             :         else
    1177     1816102 :             key->entryRes[i] = GIN_FALSE;
    1178             :     }
    1179             : 
    1180             :     /* prepare for calling consistentFn in temp context */
    1181     1000904 :     oldCtx = MemoryContextSwitchTo(tempCtx);
    1182             : 
    1183     1000904 :     if (haveLossyEntry)
    1184             :     {
    1185             :         /* Have lossy-page entries, so see if whole page matches */
    1186           0 :         res = key->triConsistentFn(key);
    1187             : 
    1188           0 :         if (res == GIN_TRUE || res == GIN_MAYBE)
    1189             :         {
    1190             :             /* Yes, so clean up ... */
    1191           0 :             MemoryContextSwitchTo(oldCtx);
    1192           0 :             MemoryContextReset(tempCtx);
    1193             : 
    1194             :             /* and return lossy pointer for whole page */
    1195           0 :             key->curItem = curPageLossy;
    1196           0 :             key->curItemMatches = true;
    1197           0 :             key->recheckCurItem = true;
    1198           0 :             return;
    1199             :         }
    1200             :     }
    1201             : 
    1202             :     /*
    1203             :      * At this point we know that we don't need to return a lossy whole-page
    1204             :      * pointer, but we might have matches for individual exact item pointers,
    1205             :      * possibly in combination with a lossy pointer. Pass lossy pointers as
    1206             :      * MAYBE to the ternary consistent function, to let it decide if this
    1207             :      * tuple satisfies the overall key, even though we don't know if the lossy
    1208             :      * entries match.
    1209             :      *
    1210             :      * Prepare entryRes array to be passed to consistentFn.
    1211             :      */
    1212     2817006 :     for (i = 0; i < key->nentries; i++)
    1213             :     {
    1214     1816102 :         entry = key->scanEntry[i];
    1215     1816102 :         if (entry->isFinished)
    1216      559010 :             key->entryRes[i] = GIN_FALSE;
    1217             : #if 0
    1218             : 
    1219             :         /*
    1220             :          * This case can't currently happen, because we loaded all the entries
    1221             :          * for this item earlier.
    1222             :          */
    1223             :         else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1224             :             key->entryRes[i] = GIN_MAYBE;
    1225             : #endif
    1226     1257092 :         else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1227           0 :             key->entryRes[i] = GIN_MAYBE;
    1228     1257092 :         else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
    1229     1073446 :             key->entryRes[i] = GIN_TRUE;
    1230             :         else
    1231      183646 :             key->entryRes[i] = GIN_FALSE;
    1232             :     }
    1233             : 
    1234     1000904 :     res = key->triConsistentFn(key);
    1235             : 
    1236     1000904 :     switch (res)
    1237             :     {
    1238      865256 :         case GIN_TRUE:
    1239      865256 :             key->curItemMatches = true;
    1240             :             /* triConsistentFn set recheckCurItem */
    1241      865256 :             break;
    1242             : 
    1243       18026 :         case GIN_FALSE:
    1244       18026 :             key->curItemMatches = false;
    1245       18026 :             break;
    1246             : 
    1247      117622 :         case GIN_MAYBE:
    1248      117622 :             key->curItemMatches = true;
    1249      117622 :             key->recheckCurItem = true;
    1250      117622 :             break;
    1251             : 
    1252           0 :         default:
    1253             : 
    1254             :             /*
    1255             :              * the 'default' case shouldn't happen, but if the consistent
    1256             :              * function returns something bogus, this is the safe result
    1257             :              */
    1258           0 :             key->curItemMatches = true;
    1259           0 :             key->recheckCurItem = true;
    1260           0 :             break;
    1261             :     }
    1262             : 
    1263             :     /*
    1264             :      * We have a tuple, and we know if it matches or not. If it's a non-match,
    1265             :      * we could continue to find the next matching tuple, but let's break out
    1266             :      * and give scanGetItem a chance to advance the other keys. They might be
    1267             :      * able to skip past to a much higher TID, allowing us to save work.
    1268             :      */
    1269             : 
    1270             :     /* clean up after consistentFn calls */
    1271     1000904 :     MemoryContextSwitchTo(oldCtx);
    1272     1000904 :     MemoryContextReset(tempCtx);
    1273             : }
    1274             : 
    1275             : /*
    1276             :  * Get next heap item pointer (after advancePast) from scan.
    1277             :  * Returns true if anything found.
    1278             :  * On success, *item and *recheck are set.
    1279             :  *
    1280             :  * Note: this is very nearly the same logic as in keyGetItem(), except
    1281             :  * that we know the keys are to be combined with AND logic, whereas in
    1282             :  * keyGetItem() the combination logic is known only to the consistentFn.
    1283             :  */
    1284             : static bool
    1285      979892 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
    1286             :             ItemPointerData *item, bool *recheck)
    1287             : {
    1288      979892 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1289             :     uint32      i;
    1290             :     bool        match;
    1291             : 
    1292             :     /*----------
    1293             :      * Advance the scan keys in lock-step, until we find an item that matches
    1294             :      * all the keys. If any key reports isFinished, meaning its subset of the
    1295             :      * entries is exhausted, we can stop.  Otherwise, set *item to the next
    1296             :      * matching item.
    1297             :      *
    1298             :      * This logic works only if a keyGetItem stream can never contain both
    1299             :      * exact and lossy pointers for the same page.  Else we could have a
    1300             :      * case like
    1301             :      *
    1302             :      *      stream 1        stream 2
    1303             :      *      ...             ...
    1304             :      *      42/6            42/7
    1305             :      *      50/1            42/0xffff
    1306             :      *      ...             ...
    1307             :      *
    1308             :      * We would conclude that 42/6 is not a match and advance stream 1,
    1309             :      * thus never detecting the match to the lossy pointer in stream 2.
    1310             :      * (keyGetItem has a similar problem versus entryGetItem.)
    1311             :      *----------
    1312             :      */
    1313             :     do
    1314             :     {
    1315      997976 :         ItemPointerSetMin(item);
    1316      997976 :         match = true;
    1317     1980876 :         for (i = 0; i < so->nkeys && match; i++)
    1318             :         {
    1319     1002512 :             GinScanKey  key = so->keys + i;
    1320             : 
    1321             :             /*
    1322             :              * If we're considering a lossy page, skip excludeOnly keys. They
    1323             :              * can't exclude the whole page anyway.
    1324             :              */
    1325     1002512 :             if (ItemPointerIsLossyPage(item) && key->excludeOnly)
    1326             :             {
    1327             :                 /*
    1328             :                  * ginNewScanKey() should never mark the first key as
    1329             :                  * excludeOnly.
    1330             :                  */
    1331             :                 Assert(i > 0);
    1332           0 :                 continue;
    1333             :             }
    1334             : 
    1335             :             /* Fetch the next item for this key that is > advancePast. */
    1336     1002512 :             keyGetItem(&so->ginstate, so->tempCtx, key, advancePast);
    1337             : 
    1338     1002512 :             if (key->isFinished)
    1339        1586 :                 return false;
    1340             : 
    1341             :             /*
    1342             :              * If it's not a match, we can immediately conclude that nothing
    1343             :              * <= this item matches, without checking the rest of the keys.
    1344             :              */
    1345     1000926 :             if (!key->curItemMatches)
    1346             :             {
    1347       18026 :                 advancePast = key->curItem;
    1348       18026 :                 match = false;
    1349       18026 :                 break;
    1350             :             }
    1351             : 
    1352             :             /*
    1353             :              * It's a match. We can conclude that nothing < matches, so the
    1354             :              * other key streams can skip to this item.
    1355             :              *
    1356             :              * Beware of lossy pointers, though; from a lossy pointer, we can
    1357             :              * only conclude that nothing smaller than this *block* matches.
    1358             :              */
    1359      982900 :             if (ItemPointerIsLossyPage(&key->curItem))
    1360             :             {
    1361           0 :                 if (GinItemPointerGetBlockNumber(&advancePast) <
    1362           0 :                     GinItemPointerGetBlockNumber(&key->curItem))
    1363             :                 {
    1364           0 :                     ItemPointerSet(&advancePast,
    1365           0 :                                    GinItemPointerGetBlockNumber(&key->curItem),
    1366             :                                    InvalidOffsetNumber);
    1367             :                 }
    1368             :             }
    1369             :             else
    1370             :             {
    1371             :                 Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
    1372      982900 :                 ItemPointerSet(&advancePast,
    1373      982900 :                                GinItemPointerGetBlockNumber(&key->curItem),
    1374      982900 :                                OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
    1375             :             }
    1376             : 
    1377             :             /*
    1378             :              * If this is the first key, remember this location as a potential
    1379             :              * match, and proceed to check the rest of the keys.
    1380             :              *
    1381             :              * Otherwise, check if this is the same item that we checked the
    1382             :              * previous keys for (or a lossy pointer for the same page). If
    1383             :              * not, loop back to check the previous keys for this item (we
    1384             :              * will check this key again too, but keyGetItem returns quickly
    1385             :              * for that)
    1386             :              */
    1387      982900 :             if (i == 0)
    1388             :             {
    1389      978470 :                 *item = key->curItem;
    1390             :             }
    1391             :             else
    1392             :             {
    1393        8860 :                 if (ItemPointerIsLossyPage(&key->curItem) ||
    1394        4430 :                     ItemPointerIsLossyPage(item))
    1395             :                 {
    1396             :                     Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
    1397           0 :                     match = (GinItemPointerGetBlockNumber(&key->curItem) ==
    1398           0 :                              GinItemPointerGetBlockNumber(item));
    1399             :                 }
    1400             :                 else
    1401             :                 {
    1402             :                     Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
    1403        4430 :                     match = (ginCompareItemPointers(&key->curItem, item) == 0);
    1404             :                 }
    1405             :             }
    1406             :         }
    1407      996390 :     } while (!match);
    1408             : 
    1409             :     Assert(!ItemPointerIsMin(item));
    1410             : 
    1411             :     /*
    1412             :      * Now *item contains the first ItemPointer after previous result that
    1413             :      * satisfied all the keys for that exact TID, or a lossy reference to the
    1414             :      * same page.
    1415             :      *
    1416             :      * We must return recheck = true if any of the keys are marked recheck.
    1417             :      */
    1418      978306 :     *recheck = false;
    1419     1826316 :     for (i = 0; i < so->nkeys; i++)
    1420             :     {
    1421      978678 :         GinScanKey  key = so->keys + i;
    1422             : 
    1423      978678 :         if (key->recheckCurItem)
    1424             :         {
    1425      130668 :             *recheck = true;
    1426      130668 :             break;
    1427             :         }
    1428             :     }
    1429             : 
    1430      978306 :     return true;
    1431             : }
    1432             : 
    1433             : 
    1434             : /*
    1435             :  * Functions for scanning the pending list
    1436             :  */
    1437             : 
    1438             : 
    1439             : /*
    1440             :  * Get ItemPointer of next heap row to be checked from pending list.
    1441             :  * Returns false if there are no more. On pages with several heap rows
    1442             :  * it returns each row separately, on page with part of heap row returns
    1443             :  * per page data.  pos->firstOffset and pos->lastOffset are set to identify
    1444             :  * the range of pending-list tuples belonging to this heap row.
    1445             :  *
    1446             :  * The pendingBuffer is presumed pinned and share-locked on entry, and is
    1447             :  * pinned and share-locked on success exit.  On failure exit it's released.
    1448             :  */
    1449             : static bool
    1450        1614 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
    1451             : {
    1452             :     OffsetNumber maxoff;
    1453             :     Page        page;
    1454             :     IndexTuple  itup;
    1455             : 
    1456        1614 :     ItemPointerSetInvalid(&pos->item);
    1457             :     for (;;)
    1458             :     {
    1459        1614 :         page = BufferGetPage(pos->pendingBuffer);
    1460             : 
    1461        1614 :         maxoff = PageGetMaxOffsetNumber(page);
    1462        1614 :         if (pos->firstOffset > maxoff)
    1463             :         {
    1464         166 :             BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
    1465             : 
    1466         166 :             if (blkno == InvalidBlockNumber)
    1467             :             {
    1468         166 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1469         166 :                 pos->pendingBuffer = InvalidBuffer;
    1470             : 
    1471         166 :                 return false;
    1472             :             }
    1473             :             else
    1474             :             {
    1475             :                 /*
    1476             :                  * Here we must prevent deletion of next page by insertcleanup
    1477             :                  * process, which may be trying to obtain exclusive lock on
    1478             :                  * current page.  So, we lock next page before releasing the
    1479             :                  * current one
    1480             :                  */
    1481           0 :                 Buffer      tmpbuf = ReadBuffer(scan->indexRelation, blkno);
    1482             : 
    1483           0 :                 LockBuffer(tmpbuf, GIN_SHARE);
    1484           0 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1485             : 
    1486           0 :                 pos->pendingBuffer = tmpbuf;
    1487           0 :                 pos->firstOffset = FirstOffsetNumber;
    1488             :             }
    1489             :         }
    1490             :         else
    1491             :         {
    1492        1448 :             itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
    1493        1448 :             pos->item = itup->t_tid;
    1494        1448 :             if (GinPageHasFullRow(page))
    1495             :             {
    1496             :                 /*
    1497             :                  * find itempointer to the next row
    1498             :                  */
    1499        3354 :                 for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
    1500             :                 {
    1501        3188 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
    1502        3188 :                     if (!ItemPointerEquals(&pos->item, &itup->t_tid))
    1503        1282 :                         break;
    1504             :                 }
    1505             :             }
    1506             :             else
    1507             :             {
    1508             :                 /*
    1509             :                  * All itempointers are the same on this page
    1510             :                  */
    1511           0 :                 pos->lastOffset = maxoff + 1;
    1512             :             }
    1513             : 
    1514             :             /*
    1515             :              * Now pos->firstOffset points to the first tuple of current heap
    1516             :              * row, pos->lastOffset points to the first tuple of next heap row
    1517             :              * (or to the end of page)
    1518             :              */
    1519        1448 :             break;
    1520             :         }
    1521             :     }
    1522             : 
    1523        1448 :     return true;
    1524             : }
    1525             : 
    1526             : /*
    1527             :  * Scan pending-list page from current tuple (off) up till the first of:
    1528             :  * - match is found (then returns true)
    1529             :  * - no later match is possible
    1530             :  * - tuple's attribute number is not equal to entry's attrnum
    1531             :  * - reach end of page
    1532             :  *
    1533             :  * datum[]/category[]/datumExtracted[] arrays are used to cache the results
    1534             :  * of gintuple_get_key() on the current page.
    1535             :  */
    1536             : static bool
    1537           0 : matchPartialInPendingList(GinState *ginstate, Page page,
    1538             :                           OffsetNumber off, OffsetNumber maxoff,
    1539             :                           GinScanEntry entry,
    1540             :                           Datum *datum, GinNullCategory *category,
    1541             :                           bool *datumExtracted)
    1542             : {
    1543             :     IndexTuple  itup;
    1544             :     int32       cmp;
    1545             : 
    1546             :     /* Partial match to a null is not possible */
    1547           0 :     if (entry->queryCategory != GIN_CAT_NORM_KEY)
    1548           0 :         return false;
    1549             : 
    1550           0 :     while (off < maxoff)
    1551             :     {
    1552           0 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
    1553             : 
    1554           0 :         if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
    1555           0 :             return false;
    1556             : 
    1557           0 :         if (datumExtracted[off - 1] == false)
    1558             :         {
    1559           0 :             datum[off - 1] = gintuple_get_key(ginstate, itup,
    1560           0 :                                               &category[off - 1]);
    1561           0 :             datumExtracted[off - 1] = true;
    1562             :         }
    1563             : 
    1564             :         /* Once we hit nulls, no further match is possible */
    1565           0 :         if (category[off - 1] != GIN_CAT_NORM_KEY)
    1566           0 :             return false;
    1567             : 
    1568             :         /*----------
    1569             :          * Check partial match.
    1570             :          * case cmp == 0 => match
    1571             :          * case cmp > 0 => not match and end scan (no later match possible)
    1572             :          * case cmp < 0 => not match and continue scan
    1573             :          *----------
    1574             :          */
    1575           0 :         cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
    1576           0 :                                               ginstate->supportCollation[entry->attnum - 1],
    1577             :                                               entry->queryKey,
    1578           0 :                                               datum[off - 1],
    1579           0 :                                               UInt16GetDatum(entry->strategy),
    1580           0 :                                               PointerGetDatum(entry->extra_data)));
    1581           0 :         if (cmp == 0)
    1582           0 :             return true;
    1583           0 :         else if (cmp > 0)
    1584           0 :             return false;
    1585             : 
    1586           0 :         off++;
    1587             :     }
    1588             : 
    1589           0 :     return false;
    1590             : }
    1591             : 
    1592             : /*
    1593             :  * Set up the entryRes array for each key by looking at
    1594             :  * every entry for current heap row in pending list.
    1595             :  *
    1596             :  * Returns true if each scan key has at least one entryRes match.
    1597             :  * This corresponds to the situations where the normal index search will
    1598             :  * try to apply the key's consistentFn.  (A tuple not meeting that requirement
    1599             :  * cannot be returned by the normal search since no entry stream will
    1600             :  * source its TID.)
    1601             :  *
    1602             :  * The pendingBuffer is presumed pinned and share-locked on entry.
    1603             :  */
    1604             : static bool
    1605        1448 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
    1606             : {
    1607        1448 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1608             :     OffsetNumber attrnum;
    1609             :     Page        page;
    1610             :     IndexTuple  itup;
    1611             :     int         i,
    1612             :                 j;
    1613             : 
    1614             :     /*
    1615             :      * Reset all entryRes and hasMatchKey flags
    1616             :      */
    1617        3924 :     for (i = 0; i < so->nkeys; i++)
    1618             :     {
    1619        2476 :         GinScanKey  key = so->keys + i;
    1620             : 
    1621        2476 :         memset(key->entryRes, GIN_FALSE, key->nentries);
    1622             :     }
    1623        1448 :     memset(pos->hasMatchKey, false, so->nkeys);
    1624             : 
    1625             :     /*
    1626             :      * Outer loop iterates over multiple pending-list pages when a single heap
    1627             :      * row has entries spanning those pages.
    1628             :      */
    1629             :     for (;;)
    1630           0 :     {
    1631             :         Datum       datum[BLCKSZ / sizeof(IndexTupleData)];
    1632             :         GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
    1633             :         bool        datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
    1634             : 
    1635             :         Assert(pos->lastOffset > pos->firstOffset);
    1636        1448 :         memset(datumExtracted + pos->firstOffset - 1, 0,
    1637        1448 :                sizeof(bool) * (pos->lastOffset - pos->firstOffset));
    1638             : 
    1639        1448 :         page = BufferGetPage(pos->pendingBuffer);
    1640             : 
    1641        3924 :         for (i = 0; i < so->nkeys; i++)
    1642             :         {
    1643        2476 :             GinScanKey  key = so->keys + i;
    1644             : 
    1645        4776 :             for (j = 0; j < key->nentries; j++)
    1646             :             {
    1647        2300 :                 GinScanEntry entry = key->scanEntry[j];
    1648        2300 :                 OffsetNumber StopLow = pos->firstOffset,
    1649        2300 :                             StopHigh = pos->lastOffset,
    1650             :                             StopMiddle;
    1651             : 
    1652             :                 /* If already matched on earlier page, do no extra work */
    1653        2300 :                 if (key->entryRes[j])
    1654           0 :                     continue;
    1655             : 
    1656             :                 /*
    1657             :                  * Interesting tuples are from pos->firstOffset to
    1658             :                  * pos->lastOffset and they are ordered by (attnum, Datum) as
    1659             :                  * it's done in entry tree.  So we can use binary search to
    1660             :                  * avoid linear scanning.
    1661             :                  */
    1662        5108 :                 while (StopLow < StopHigh)
    1663             :                 {
    1664             :                     int         res;
    1665             : 
    1666        4010 :                     StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    1667             : 
    1668        4010 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
    1669             : 
    1670        4010 :                     attrnum = gintuple_get_attrnum(&so->ginstate, itup);
    1671             : 
    1672        4010 :                     if (key->attnum < attrnum)
    1673             :                     {
    1674         798 :                         StopHigh = StopMiddle;
    1675         798 :                         continue;
    1676             :                     }
    1677        3212 :                     if (key->attnum > attrnum)
    1678             :                     {
    1679         660 :                         StopLow = StopMiddle + 1;
    1680         660 :                         continue;
    1681             :                     }
    1682             : 
    1683        2552 :                     if (datumExtracted[StopMiddle - 1] == false)
    1684             :                     {
    1685        4952 :                         datum[StopMiddle - 1] =
    1686        2476 :                             gintuple_get_key(&so->ginstate, itup,
    1687        2476 :                                              &category[StopMiddle - 1]);
    1688        2476 :                         datumExtracted[StopMiddle - 1] = true;
    1689             :                     }
    1690             : 
    1691        2552 :                     if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
    1692             :                     {
    1693             :                         /* special behavior depending on searchMode */
    1694        1084 :                         if (entry->searchMode == GIN_SEARCH_MODE_ALL)
    1695             :                         {
    1696             :                             /* match anything except NULL_ITEM */
    1697        1084 :                             if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
    1698         378 :                                 res = -1;
    1699             :                             else
    1700         706 :                                 res = 0;
    1701             :                         }
    1702             :                         else
    1703             :                         {
    1704             :                             /* match everything */
    1705           0 :                             res = 0;
    1706             :                         }
    1707             :                     }
    1708             :                     else
    1709             :                     {
    1710        1468 :                         res = ginCompareEntries(&so->ginstate,
    1711        1468 :                                                 entry->attnum,
    1712             :                                                 entry->queryKey,
    1713        1468 :                                                 entry->queryCategory,
    1714        1468 :                                                 datum[StopMiddle - 1],
    1715        1468 :                                                 category[StopMiddle - 1]);
    1716             :                     }
    1717             : 
    1718        2552 :                     if (res == 0)
    1719             :                     {
    1720             :                         /*
    1721             :                          * Found exact match (there can be only one, except in
    1722             :                          * EMPTY_QUERY mode).
    1723             :                          *
    1724             :                          * If doing partial match, scan forward from here to
    1725             :                          * end of page to check for matches.
    1726             :                          *
    1727             :                          * See comment above about tuple's ordering.
    1728             :                          */
    1729        1202 :                         if (entry->isPartialMatch)
    1730           0 :                             key->entryRes[j] =
    1731           0 :                                 matchPartialInPendingList(&so->ginstate,
    1732             :                                                           page,
    1733             :                                                           StopMiddle,
    1734           0 :                                                           pos->lastOffset,
    1735             :                                                           entry,
    1736             :                                                           datum,
    1737             :                                                           category,
    1738             :                                                           datumExtracted);
    1739             :                         else
    1740        1202 :                             key->entryRes[j] = true;
    1741             : 
    1742             :                         /* done with binary search */
    1743        1202 :                         break;
    1744             :                     }
    1745        1350 :                     else if (res < 0)
    1746        1314 :                         StopHigh = StopMiddle;
    1747             :                     else
    1748          36 :                         StopLow = StopMiddle + 1;
    1749             :                 }
    1750             : 
    1751        2300 :                 if (StopLow >= StopHigh && entry->isPartialMatch)
    1752             :                 {
    1753             :                     /*
    1754             :                      * No exact match on this page.  If doing partial match,
    1755             :                      * scan from the first tuple greater than target value to
    1756             :                      * end of page.  Note that since we don't remember whether
    1757             :                      * the comparePartialFn told us to stop early on a
    1758             :                      * previous page, we will uselessly apply comparePartialFn
    1759             :                      * to the first tuple on each subsequent page.
    1760             :                      */
    1761           0 :                     key->entryRes[j] =
    1762           0 :                         matchPartialInPendingList(&so->ginstate,
    1763             :                                                   page,
    1764             :                                                   StopHigh,
    1765           0 :                                                   pos->lastOffset,
    1766             :                                                   entry,
    1767             :                                                   datum,
    1768             :                                                   category,
    1769             :                                                   datumExtracted);
    1770             :                 }
    1771             : 
    1772        2300 :                 pos->hasMatchKey[i] |= key->entryRes[j];
    1773             :             }
    1774             :         }
    1775             : 
    1776             :         /* Advance firstOffset over the scanned tuples */
    1777        1448 :         pos->firstOffset = pos->lastOffset;
    1778             : 
    1779        1448 :         if (GinPageHasFullRow(page))
    1780             :         {
    1781             :             /*
    1782             :              * We have examined all pending entries for the current heap row.
    1783             :              * Break out of loop over pages.
    1784             :              */
    1785        1448 :             break;
    1786             :         }
    1787             :         else
    1788             :         {
    1789             :             /*
    1790             :              * Advance to next page of pending entries for the current heap
    1791             :              * row.  Complain if there isn't one.
    1792             :              */
    1793           0 :             ItemPointerData item = pos->item;
    1794             : 
    1795           0 :             if (scanGetCandidate(scan, pos) == false ||
    1796           0 :                 !ItemPointerEquals(&pos->item, &item))
    1797           0 :                 elog(ERROR, "could not find additional pending pages for same heap tuple");
    1798             :         }
    1799             :     }
    1800             : 
    1801             :     /*
    1802             :      * All scan keys except excludeOnly require at least one entry to match.
    1803             :      * excludeOnly keys are an exception, because their implied
    1804             :      * GIN_CAT_EMPTY_QUERY scanEntry always matches.  So return "true" if all
    1805             :      * non-excludeOnly scan keys have at least one match.
    1806             :      */
    1807        2538 :     for (i = 0; i < so->nkeys; i++)
    1808             :     {
    1809        1984 :         if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
    1810         894 :             return false;
    1811             :     }
    1812             : 
    1813         554 :     return true;
    1814             : }
    1815             : 
    1816             : /*
    1817             :  * Collect all matched rows from pending list into bitmap.
    1818             :  */
    1819             : static void
    1820        1586 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
    1821             : {
    1822        1586 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1823             :     MemoryContext oldCtx;
    1824             :     bool        recheck,
    1825             :                 match;
    1826             :     int         i;
    1827             :     pendingPosition pos;
    1828        1586 :     Buffer      metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
    1829             :     Page        page;
    1830             :     BlockNumber blkno;
    1831             : 
    1832        1586 :     *ntids = 0;
    1833             : 
    1834             :     /*
    1835             :      * Acquire predicate lock on the metapage, to conflict with any fastupdate
    1836             :      * insertions.
    1837             :      */
    1838        1586 :     PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
    1839             : 
    1840        1586 :     LockBuffer(metabuffer, GIN_SHARE);
    1841        1586 :     page = BufferGetPage(metabuffer);
    1842        1586 :     blkno = GinPageGetMeta(page)->head;
    1843             : 
    1844             :     /*
    1845             :      * fetch head of list before unlocking metapage. head page must be pinned
    1846             :      * to prevent deletion by vacuum process
    1847             :      */
    1848        1586 :     if (blkno == InvalidBlockNumber)
    1849             :     {
    1850             :         /* No pending list, so proceed with normal scan */
    1851        1420 :         UnlockReleaseBuffer(metabuffer);
    1852        1420 :         return;
    1853             :     }
    1854             : 
    1855         166 :     pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
    1856         166 :     LockBuffer(pos.pendingBuffer, GIN_SHARE);
    1857         166 :     pos.firstOffset = FirstOffsetNumber;
    1858         166 :     UnlockReleaseBuffer(metabuffer);
    1859         166 :     pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
    1860             : 
    1861             :     /*
    1862             :      * loop for each heap row. scanGetCandidate returns full row or row's
    1863             :      * tuples from first page.
    1864             :      */
    1865        1614 :     while (scanGetCandidate(scan, &pos))
    1866             :     {
    1867             :         /*
    1868             :          * Check entries in tuple and set up entryRes array.
    1869             :          *
    1870             :          * If pending tuples belonging to the current heap row are spread
    1871             :          * across several pages, collectMatchesForHeapRow will read all of
    1872             :          * those pages.
    1873             :          */
    1874        1448 :         if (!collectMatchesForHeapRow(scan, &pos))
    1875         894 :             continue;
    1876             : 
    1877             :         /*
    1878             :          * Matching of entries of one row is finished, so check row using
    1879             :          * consistent functions.
    1880             :          */
    1881         554 :         oldCtx = MemoryContextSwitchTo(so->tempCtx);
    1882         554 :         recheck = false;
    1883         554 :         match = true;
    1884             : 
    1885        1404 :         for (i = 0; i < so->nkeys; i++)
    1886             :         {
    1887         850 :             GinScanKey  key = so->keys + i;
    1888             : 
    1889         850 :             if (!key->boolConsistentFn(key))
    1890             :             {
    1891           0 :                 match = false;
    1892           0 :                 break;
    1893             :             }
    1894         850 :             recheck |= key->recheckCurItem;
    1895             :         }
    1896             : 
    1897         554 :         MemoryContextSwitchTo(oldCtx);
    1898         554 :         MemoryContextReset(so->tempCtx);
    1899             : 
    1900         554 :         if (match)
    1901             :         {
    1902         554 :             tbm_add_tuples(tbm, &pos.item, 1, recheck);
    1903         554 :             (*ntids)++;
    1904             :         }
    1905             :     }
    1906             : 
    1907         166 :     pfree(pos.hasMatchKey);
    1908             : }
    1909             : 
    1910             : 
    1911             : #define GinIsVoidRes(s)     ( ((GinScanOpaque) scan->opaque)->isVoidRes )
    1912             : 
    1913             : int64
    1914        1598 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
    1915             : {
    1916        1598 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1917             :     int64       ntids;
    1918             :     ItemPointerData iptr;
    1919             :     bool        recheck;
    1920             : 
    1921             :     /*
    1922             :      * Set up the scan keys, and check for unsatisfiable query.
    1923             :      */
    1924        1598 :     ginFreeScanKeys(so);        /* there should be no keys yet, but just to be
    1925             :                                  * sure */
    1926        1598 :     ginNewScanKey(scan);
    1927             : 
    1928        1598 :     if (GinIsVoidRes(scan))
    1929          12 :         return 0;
    1930             : 
    1931        1586 :     ntids = 0;
    1932             : 
    1933             :     /*
    1934             :      * First, scan the pending list and collect any matching entries into the
    1935             :      * bitmap.  After we scan a pending item, some other backend could post it
    1936             :      * into the main index, and so we might visit it a second time during the
    1937             :      * main scan.  This is okay because we'll just re-set the same bit in the
    1938             :      * bitmap.  (The possibility of duplicate visits is a major reason why GIN
    1939             :      * can't support the amgettuple API, however.) Note that it would not do
    1940             :      * to scan the main index before the pending list, since concurrent
    1941             :      * cleanup could then make us miss entries entirely.
    1942             :      */
    1943        1586 :     scanPendingInsert(scan, tbm, &ntids);
    1944             : 
    1945             :     /*
    1946             :      * Now scan the main index.
    1947             :      */
    1948        1586 :     startScan(scan);
    1949             : 
    1950        1586 :     ItemPointerSetMin(&iptr);
    1951             : 
    1952             :     for (;;)
    1953             :     {
    1954      979892 :         CHECK_FOR_INTERRUPTS();
    1955             : 
    1956      979892 :         if (!scanGetItem(scan, iptr, &iptr, &recheck))
    1957        1586 :             break;
    1958             : 
    1959      978306 :         if (ItemPointerIsLossyPage(&iptr))
    1960           0 :             tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
    1961             :         else
    1962      978306 :             tbm_add_tuples(tbm, &iptr, 1, recheck);
    1963      978306 :         ntids++;
    1964             :     }
    1965             : 
    1966        1586 :     return ntids;
    1967             : }

Generated by: LCOV version 1.14