LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginget.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 468 606 77.2 %
Date: 2020-05-25 03:07:04 Functions: 14 16 87.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ginget.c
       4             :  *    fetch tuples from a GIN scan.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/ginget.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : #include "access/relscan.h"
      19             : #include "miscadmin.h"
      20             : #include "storage/predicate.h"
      21             : #include "utils/datum.h"
      22             : #include "utils/memutils.h"
      23             : #include "utils/rel.h"
      24             : 
      25             : /* GUC parameter */
      26             : int         GinFuzzySearchLimit = 0;
      27             : 
      28             : typedef struct pendingPosition
      29             : {
      30             :     Buffer      pendingBuffer;
      31             :     OffsetNumber firstOffset;
      32             :     OffsetNumber lastOffset;
      33             :     ItemPointerData item;
      34             :     bool       *hasMatchKey;
      35             : } pendingPosition;
      36             : 
      37             : 
      38             : /*
      39             :  * Goes to the next page if current offset is outside of bounds
      40             :  */
      41             : static bool
      42      101698 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
      43             : {
      44      101698 :     Page        page = BufferGetPage(stack->buffer);
      45             : 
      46      101698 :     if (stack->off > PageGetMaxOffsetNumber(page))
      47             :     {
      48             :         /*
      49             :          * We scanned the whole page, so we should take right page
      50             :          */
      51        1282 :         if (GinPageRightMost(page))
      52         280 :             return false;       /* no more pages */
      53             : 
      54        1002 :         stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
      55        1002 :         stack->blkno = BufferGetBlockNumber(stack->buffer);
      56        1002 :         stack->off = FirstOffsetNumber;
      57        1002 :         PredicateLockPage(btree->index, stack->blkno, snapshot);
      58             :     }
      59             : 
      60      101418 :     return true;
      61             : }
      62             : 
      63             : /*
      64             :  * Scan all pages of a posting tree and save all its heap ItemPointers
      65             :  * in scanEntry->matchBitmap
      66             :  */
      67             : static void
      68           0 : scanPostingTree(Relation index, GinScanEntry scanEntry,
      69             :                 BlockNumber rootPostingTree, Snapshot snapshot)
      70             : {
      71             :     GinBtreeData btree;
      72             :     GinBtreeStack *stack;
      73             :     Buffer      buffer;
      74             :     Page        page;
      75             : 
      76             :     /* Descend to the leftmost leaf page */
      77           0 :     stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
      78           0 :     buffer = stack->buffer;
      79             : 
      80           0 :     IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
      81             : 
      82           0 :     freeGinBtreeStack(stack);
      83             : 
      84             :     /*
      85             :      * Loop iterates through all leaf pages of posting tree
      86             :      */
      87             :     for (;;)
      88             :     {
      89           0 :         page = BufferGetPage(buffer);
      90           0 :         if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
      91             :         {
      92           0 :             int         n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
      93             : 
      94           0 :             scanEntry->predictNumberResult += n;
      95             :         }
      96             : 
      97           0 :         if (GinPageRightMost(page))
      98           0 :             break;              /* no more pages */
      99             : 
     100           0 :         buffer = ginStepRight(buffer, index, GIN_SHARE);
     101             :     }
     102             : 
     103           0 :     UnlockReleaseBuffer(buffer);
     104           0 : }
     105             : 
     106             : /*
     107             :  * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
     108             :  * match the search entry.  This supports three different match modes:
     109             :  *
     110             :  * 1. Partial-match support: scan from current point until the
     111             :  *    comparePartialFn says we're done.
     112             :  * 2. SEARCH_MODE_ALL: scan from current point (which should be first
     113             :  *    key for the current attnum) until we hit null items or end of attnum
     114             :  * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
     115             :  *    key for the current attnum) until we hit end of attnum
     116             :  *
     117             :  * Returns true if done, false if it's necessary to restart scan from scratch
     118             :  */
     119             : static bool
     120         424 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
     121             :                    GinScanEntry scanEntry, Snapshot snapshot)
     122             : {
     123             :     OffsetNumber attnum;
     124             :     Form_pg_attribute attr;
     125             : 
     126             :     /* Initialize empty bitmap result */
     127         424 :     scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
     128             : 
     129             :     /* Null query cannot partial-match anything */
     130         424 :     if (scanEntry->isPartialMatch &&
     131         248 :         scanEntry->queryCategory != GIN_CAT_NORM_KEY)
     132           0 :         return true;
     133             : 
     134             :     /* Locate tupdesc entry for key column (for attbyval/attlen data) */
     135         424 :     attnum = scanEntry->attnum;
     136         424 :     attr = TupleDescAttr(btree->ginstate->origTupdesc, attnum - 1);
     137             : 
     138             :     /*
     139             :      * Predicate lock entry leaf page, following pages will be locked by
     140             :      * moveRightIfItNeeded()
     141             :      */
     142         424 :     PredicateLockPage(btree->index, stack->buffer, snapshot);
     143             : 
     144             :     for (;;)
     145      101274 :     {
     146             :         Page        page;
     147             :         IndexTuple  itup;
     148             :         Datum       idatum;
     149             :         GinNullCategory icategory;
     150             : 
     151             :         /*
     152             :          * stack->off points to the interested entry, buffer is already locked
     153             :          */
     154      101698 :         if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     155         424 :             return true;
     156             : 
     157      101418 :         page = BufferGetPage(stack->buffer);
     158      101418 :         TestForOldSnapshot(snapshot, btree->index, page);
     159      101418 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     160             : 
     161             :         /*
     162             :          * If tuple stores another attribute then stop scan
     163             :          */
     164      101418 :         if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
     165           0 :             return true;
     166             : 
     167             :         /* Safe to fetch attribute value */
     168      101418 :         idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
     169             : 
     170             :         /*
     171             :          * Check for appropriate scan stop conditions
     172             :          */
     173      101418 :         if (scanEntry->isPartialMatch)
     174             :         {
     175             :             int32       cmp;
     176             : 
     177             :             /*
     178             :              * In partial match, stop scan at any null (including
     179             :              * placeholders); partial matches never match nulls
     180             :              */
     181        1158 :             if (icategory != GIN_CAT_NORM_KEY)
     182          10 :                 return true;
     183             : 
     184             :             /*----------
     185             :              * Check of partial match.
     186             :              * case cmp == 0 => match
     187             :              * case cmp > 0 => not match and finish scan
     188             :              * case cmp < 0 => not match and continue scan
     189             :              *----------
     190             :              */
     191        1148 :             cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
     192             :                                                   btree->ginstate->supportCollation[attnum - 1],
     193             :                                                   scanEntry->queryKey,
     194             :                                                   idatum,
     195             :                                                   UInt16GetDatum(scanEntry->strategy),
     196             :                                                   PointerGetDatum(scanEntry->extra_data)));
     197             : 
     198        1148 :             if (cmp > 0)
     199         122 :                 return true;
     200        1026 :             else if (cmp < 0)
     201             :             {
     202          60 :                 stack->off++;
     203          60 :                 continue;
     204             :             }
     205             :         }
     206      100260 :         else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
     207             :         {
     208             :             /*
     209             :              * In ALL mode, we are not interested in null items, so we can
     210             :              * stop if we get to a null-item placeholder (which will be the
     211             :              * last entry for a given attnum).  We do want to include NULL_KEY
     212             :              * and EMPTY_ITEM entries, though.
     213             :              */
     214      100260 :             if (icategory == GIN_CAT_NULL_ITEM)
     215          12 :                 return true;
     216             :         }
     217             : 
     218             :         /*
     219             :          * OK, we want to return the TIDs listed in this entry.
     220             :          */
     221      101214 :         if (GinIsPostingTree(itup))
     222             :         {
     223           0 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     224             : 
     225             :             /*
     226             :              * We should unlock current page (but not unpin) during tree scan
     227             :              * to prevent deadlock with vacuum processes.
     228             :              *
     229             :              * We save current entry value (idatum) to be able to re-find our
     230             :              * tuple after re-locking
     231             :              */
     232           0 :             if (icategory == GIN_CAT_NORM_KEY)
     233           0 :                 idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
     234             : 
     235           0 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     236             : 
     237             :             /*
     238             :              * Acquire predicate lock on the posting tree.  We already hold a
     239             :              * lock on the entry page, but insertions to the posting tree
     240             :              * don't check for conflicts on that level.
     241             :              */
     242           0 :             PredicateLockPage(btree->index, rootPostingTree, snapshot);
     243             : 
     244             :             /* Collect all the TIDs in this entry's posting tree */
     245           0 :             scanPostingTree(btree->index, scanEntry, rootPostingTree,
     246             :                             snapshot);
     247             : 
     248             :             /*
     249             :              * We lock again the entry page and while it was unlocked insert
     250             :              * might have occurred, so we need to re-find our position.
     251             :              */
     252           0 :             LockBuffer(stack->buffer, GIN_SHARE);
     253           0 :             page = BufferGetPage(stack->buffer);
     254           0 :             if (!GinPageIsLeaf(page))
     255             :             {
     256             :                 /*
     257             :                  * Root page becomes non-leaf while we unlock it. We will
     258             :                  * start again, this situation doesn't occur often - root can
     259             :                  * became a non-leaf only once per life of index.
     260             :                  */
     261           0 :                 return false;
     262             :             }
     263             : 
     264             :             /* Search forward to re-find idatum */
     265             :             for (;;)
     266           0 :             {
     267             :                 Datum       newDatum;
     268             :                 GinNullCategory newCategory;
     269             : 
     270           0 :                 if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     271           0 :                     elog(ERROR, "lost saved point in index"); /* must not happen !!! */
     272             : 
     273           0 :                 page = BufferGetPage(stack->buffer);
     274           0 :                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     275             : 
     276           0 :                 if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
     277           0 :                     elog(ERROR, "lost saved point in index"); /* must not happen !!! */
     278           0 :                 newDatum = gintuple_get_key(btree->ginstate, itup,
     279             :                                             &newCategory);
     280             : 
     281           0 :                 if (ginCompareEntries(btree->ginstate, attnum,
     282             :                                       newDatum, newCategory,
     283             :                                       idatum, icategory) == 0)
     284           0 :                     break;      /* Found! */
     285             : 
     286           0 :                 stack->off++;
     287             :             }
     288             : 
     289           0 :             if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
     290           0 :                 pfree(DatumGetPointer(idatum));
     291             :         }
     292             :         else
     293             :         {
     294             :             ItemPointer ipd;
     295             :             int         nipd;
     296             : 
     297      101214 :             ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
     298      101214 :             tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
     299      101214 :             scanEntry->predictNumberResult += GinGetNPosting(itup);
     300      101214 :             pfree(ipd);
     301             :         }
     302             : 
     303             :         /*
     304             :          * Done with this entry, go to the next
     305             :          */
     306      101214 :         stack->off++;
     307             :     }
     308             : }
     309             : 
     310             : /*
     311             :  * Start* functions setup beginning state of searches: finds correct buffer and pins it.
     312             :  */
     313             : static void
     314        2888 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
     315             : {
     316             :     GinBtreeData btreeEntry;
     317             :     GinBtreeStack *stackEntry;
     318             :     Page        page;
     319             :     bool        needUnlock;
     320             : 
     321        2888 : restartScanEntry:
     322        2888 :     entry->buffer = InvalidBuffer;
     323        2888 :     ItemPointerSetMin(&entry->curItem);
     324        2888 :     entry->offset = InvalidOffsetNumber;
     325        2888 :     if (entry->list)
     326           0 :         pfree(entry->list);
     327        2888 :     entry->list = NULL;
     328        2888 :     entry->nlist = 0;
     329        2888 :     entry->matchBitmap = NULL;
     330        2888 :     entry->matchResult = NULL;
     331        2888 :     entry->reduceResult = false;
     332        2888 :     entry->predictNumberResult = 0;
     333             : 
     334             :     /*
     335             :      * we should find entry, and begin scan of posting tree or just store
     336             :      * posting list in memory
     337             :      */
     338        2888 :     ginPrepareEntryScan(&btreeEntry, entry->attnum,
     339        2888 :                         entry->queryKey, entry->queryCategory,
     340             :                         ginstate);
     341        2888 :     stackEntry = ginFindLeafPage(&btreeEntry, true, false, snapshot);
     342        2888 :     page = BufferGetPage(stackEntry->buffer);
     343             : 
     344             :     /* ginFindLeafPage() will have already checked snapshot age. */
     345        2888 :     needUnlock = true;
     346             : 
     347        2888 :     entry->isFinished = true;
     348             : 
     349        2888 :     if (entry->isPartialMatch ||
     350        2640 :         entry->queryCategory == GIN_CAT_EMPTY_QUERY)
     351             :     {
     352             :         /*
     353             :          * btreeEntry.findItem locates the first item >= given search key.
     354             :          * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
     355             :          * because of the way the GIN_CAT_EMPTY_QUERY category code is
     356             :          * assigned.)  We scan forward from there and collect all TIDs needed
     357             :          * for the entry type.
     358             :          */
     359         424 :         btreeEntry.findItem(&btreeEntry, stackEntry);
     360         424 :         if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
     361         424 :             == false)
     362             :         {
     363             :             /*
     364             :              * GIN tree was seriously restructured, so we will cleanup all
     365             :              * found data and rescan. See comments near 'return false' in
     366             :              * collectMatchBitmap()
     367             :              */
     368           0 :             if (entry->matchBitmap)
     369             :             {
     370           0 :                 if (entry->matchIterator)
     371           0 :                     tbm_end_iterate(entry->matchIterator);
     372           0 :                 entry->matchIterator = NULL;
     373           0 :                 tbm_free(entry->matchBitmap);
     374           0 :                 entry->matchBitmap = NULL;
     375             :             }
     376           0 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     377           0 :             freeGinBtreeStack(stackEntry);
     378           0 :             goto restartScanEntry;
     379             :         }
     380             : 
     381         766 :         if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
     382             :         {
     383         342 :             entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
     384         342 :             entry->isFinished = false;
     385             :         }
     386             :     }
     387        2464 :     else if (btreeEntry.findItem(&btreeEntry, stackEntry))
     388             :     {
     389        1558 :         IndexTuple  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
     390             : 
     391        1558 :         if (GinIsPostingTree(itup))
     392             :         {
     393          38 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     394             :             GinBtreeStack *stack;
     395             :             Page        page;
     396             :             ItemPointerData minItem;
     397             : 
     398             :             /*
     399             :              * This is an equality scan, so lock the root of the posting tree.
     400             :              * It represents a lock on the exact key value, and covers all the
     401             :              * items in the posting tree.
     402             :              */
     403          38 :             PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
     404             : 
     405             :             /*
     406             :              * We should unlock entry page before touching posting tree to
     407             :              * prevent deadlocks with vacuum processes. Because entry is never
     408             :              * deleted from page and posting tree is never reduced to the
     409             :              * posting list, we can unlock page after getting BlockNumber of
     410             :              * root of posting tree.
     411             :              */
     412          38 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     413          38 :             needUnlock = false;
     414             : 
     415          38 :             stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
     416             :                                             rootPostingTree, snapshot);
     417          38 :             entry->buffer = stack->buffer;
     418             : 
     419             :             /*
     420             :              * We keep buffer pinned because we need to prevent deletion of
     421             :              * page during scan. See GIN's vacuum implementation. RefCount is
     422             :              * increased to keep buffer pinned after freeGinBtreeStack() call.
     423             :              */
     424          38 :             IncrBufferRefCount(entry->buffer);
     425             : 
     426          38 :             page = BufferGetPage(entry->buffer);
     427             : 
     428             :             /*
     429             :              * Load the first page into memory.
     430             :              */
     431          38 :             ItemPointerSetMin(&minItem);
     432          38 :             entry->list = GinDataLeafPageGetItems(page, &entry->nlist, minItem);
     433             : 
     434          38 :             entry->predictNumberResult = stack->predictNumber * entry->nlist;
     435             : 
     436          38 :             LockBuffer(entry->buffer, GIN_UNLOCK);
     437          38 :             freeGinBtreeStack(stack);
     438          38 :             entry->isFinished = false;
     439             :         }
     440             :         else
     441             :         {
     442             :             /*
     443             :              * Lock the entry leaf page.  This is more coarse-grained than
     444             :              * necessary, because it will conflict with any insertions that
     445             :              * land on the same leaf page, not only the exact key we searched
     446             :              * for.  But locking an individual tuple would require updating
     447             :              * that lock whenever it moves because of insertions or vacuums,
     448             :              * which seems too complicated.
     449             :              */
     450        1520 :             PredicateLockPage(ginstate->index,
     451             :                               BufferGetBlockNumber(stackEntry->buffer),
     452             :                               snapshot);
     453        1520 :             if (GinGetNPosting(itup) > 0)
     454             :             {
     455        1520 :                 entry->list = ginReadTuple(ginstate, entry->attnum, itup,
     456             :                                            &entry->nlist);
     457        1520 :                 entry->predictNumberResult = entry->nlist;
     458             : 
     459        1520 :                 entry->isFinished = false;
     460             :             }
     461             :         }
     462             :     }
     463             :     else
     464             :     {
     465             :         /*
     466             :          * No entry found.  Predicate lock the leaf page, to lock the place
     467             :          * where the entry would've been, had there been one.
     468             :          */
     469         906 :         PredicateLockPage(ginstate->index,
     470             :                           BufferGetBlockNumber(stackEntry->buffer), snapshot);
     471             :     }
     472             : 
     473        2888 :     if (needUnlock)
     474        2850 :         LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     475        2888 :     freeGinBtreeStack(stackEntry);
     476        2888 : }
     477             : 
     478             : /*
     479             :  * Comparison function for scan entry indexes. Sorts by predictNumberResult,
     480             :  * least frequent items first.
     481             :  */
     482             : static int
     483        2848 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
     484             : {
     485        2848 :     const GinScanKey key = (const GinScanKey) arg;
     486        2848 :     int         i1 = *(const int *) a1;
     487        2848 :     int         i2 = *(const int *) a2;
     488        2848 :     uint32      n1 = key->scanEntry[i1]->predictNumberResult;
     489        2848 :     uint32      n2 = key->scanEntry[i2]->predictNumberResult;
     490             : 
     491        2848 :     if (n1 < n2)
     492         410 :         return -1;
     493        2438 :     else if (n1 == n2)
     494        1002 :         return 0;
     495             :     else
     496        1436 :         return 1;
     497             : }
     498             : 
     499             : static void
     500        1246 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
     501             : {
     502        1246 :     MemoryContext oldCtx = CurrentMemoryContext;
     503             :     int         i;
     504             :     int         j;
     505             :     int        *entryIndexes;
     506             : 
     507        1246 :     ItemPointerSetMin(&key->curItem);
     508        1246 :     key->curItemMatches = false;
     509        1246 :     key->recheckCurItem = false;
     510        1246 :     key->isFinished = false;
     511             : 
     512             :     /*
     513             :      * Divide the entries into two distinct sets: required and additional.
     514             :      * Additional entries are not enough for a match alone, without any items
     515             :      * from the required set, but are needed by the consistent function to
     516             :      * decide if an item matches. When scanning, we can skip over items from
     517             :      * additional entries that have no corresponding matches in any of the
     518             :      * required entries. That speeds up queries like "frequent & rare"
     519             :      * considerably, if the frequent term can be put in the additional set.
     520             :      *
     521             :      * There can be many legal ways to divide them entries into these two
     522             :      * sets. A conservative division is to just put everything in the required
     523             :      * set, but the more you can put in the additional set, the more you can
     524             :      * skip during the scan. To maximize skipping, we try to put as many
     525             :      * frequent items as possible into additional, and less frequent ones into
     526             :      * required. To do that, sort the entries by frequency
     527             :      * (predictNumberResult), and put entries into the required set in that
     528             :      * order, until the consistent function says that none of the remaining
     529             :      * entries can form a match, without any items from the required set. The
     530             :      * rest go to the additional set.
     531             :      *
     532             :      * Exclude-only scan keys are known to have no required entries.
     533             :      */
     534        1246 :     if (key->excludeOnly)
     535             :     {
     536          28 :         MemoryContextSwitchTo(so->keyCtx);
     537             : 
     538          28 :         key->nrequired = 0;
     539          28 :         key->nadditional = key->nentries;
     540          28 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     541          32 :         for (i = 0; i < key->nadditional; i++)
     542           4 :             key->additionalEntries[i] = key->scanEntry[i];
     543             :     }
     544        1218 :     else if (key->nentries > 1)
     545             :     {
     546         384 :         MemoryContextSwitchTo(so->tempCtx);
     547             : 
     548         384 :         entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
     549        2434 :         for (i = 0; i < key->nentries; i++)
     550        2050 :             entryIndexes[i] = i;
     551         384 :         qsort_arg(entryIndexes, key->nentries, sizeof(int),
     552             :                   entryIndexByFrequencyCmp, key);
     553             : 
     554        1570 :         for (i = 0; i < key->nentries - 1; i++)
     555             :         {
     556             :             /* Pass all entries <= i as FALSE, and the rest as MAYBE */
     557       69036 :             for (j = 0; j <= i; j++)
     558       67554 :                 key->entryRes[entryIndexes[j]] = GIN_FALSE;
     559       69700 :             for (j = i + 1; j < key->nentries; j++)
     560       68218 :                 key->entryRes[entryIndexes[j]] = GIN_MAYBE;
     561             : 
     562        1482 :             if (key->triConsistentFn(key) == GIN_FALSE)
     563         296 :                 break;
     564             :         }
     565             :         /* i is now the last required entry. */
     566             : 
     567         384 :         MemoryContextSwitchTo(so->keyCtx);
     568             : 
     569         384 :         key->nrequired = i + 1;
     570         384 :         key->nadditional = key->nentries - key->nrequired;
     571         384 :         key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
     572         384 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     573             : 
     574         384 :         j = 0;
     575        1954 :         for (i = 0; i < key->nrequired; i++)
     576        1570 :             key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
     577         864 :         for (i = 0; i < key->nadditional; i++)
     578         480 :             key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
     579             : 
     580             :         /* clean up after consistentFn calls (also frees entryIndexes) */
     581         384 :         MemoryContextReset(so->tempCtx);
     582             :     }
     583             :     else
     584             :     {
     585         834 :         MemoryContextSwitchTo(so->keyCtx);
     586             : 
     587         834 :         key->nrequired = 1;
     588         834 :         key->nadditional = 0;
     589         834 :         key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
     590         834 :         key->requiredEntries[0] = key->scanEntry[0];
     591             :     }
     592        1246 :     MemoryContextSwitchTo(oldCtx);
     593        1246 : }
     594             : 
     595             : static void
     596        1158 : startScan(IndexScanDesc scan)
     597             : {
     598        1158 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
     599        1158 :     GinState   *ginstate = &so->ginstate;
     600             :     uint32      i;
     601             : 
     602        4046 :     for (i = 0; i < so->totalentries; i++)
     603        2888 :         startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
     604             : 
     605        1158 :     if (GinFuzzySearchLimit > 0)
     606             :     {
     607             :         /*
     608             :          * If all of keys more than threshold we will try to reduce result, we
     609             :          * hope (and only hope, for intersection operation of array our
     610             :          * supposition isn't true), that total result will not more than
     611             :          * minimal predictNumberResult.
     612             :          */
     613           4 :         bool        reduce = true;
     614             : 
     615           8 :         for (i = 0; i < so->totalentries; i++)
     616             :         {
     617           4 :             if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
     618             :             {
     619           0 :                 reduce = false;
     620           0 :                 break;
     621             :             }
     622             :         }
     623           4 :         if (reduce)
     624             :         {
     625           8 :             for (i = 0; i < so->totalentries; i++)
     626             :             {
     627           4 :                 so->entries[i]->predictNumberResult /= so->totalentries;
     628           4 :                 so->entries[i]->reduceResult = true;
     629             :             }
     630             :         }
     631             :     }
     632             : 
     633             :     /*
     634             :      * Now that we have the estimates for the entry frequencies, finish
     635             :      * initializing the scan keys.
     636             :      */
     637        2404 :     for (i = 0; i < so->nkeys; i++)
     638        1246 :         startScanKey(ginstate, so, so->keys + i);
     639        1158 : }
     640             : 
     641             : /*
     642             :  * Load the next batch of item pointers from a posting tree.
     643             :  *
     644             :  * Note that we copy the page into GinScanEntry->list array and unlock it, but
     645             :  * keep it pinned to prevent interference with vacuum.
     646             :  */
     647             : static void
     648          54 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
     649             :                    ItemPointerData advancePast, Snapshot snapshot)
     650             : {
     651             :     Page        page;
     652             :     int         i;
     653             :     bool        stepright;
     654             : 
     655          54 :     if (!BufferIsValid(entry->buffer))
     656             :     {
     657           0 :         entry->isFinished = true;
     658           0 :         return;
     659             :     }
     660             : 
     661             :     /*
     662             :      * We have two strategies for finding the correct page: step right from
     663             :      * the current page, or descend the tree again from the root. If
     664             :      * advancePast equals the current item, the next matching item should be
     665             :      * on the next page, so we step right. Otherwise, descend from root.
     666             :      */
     667          54 :     if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
     668             :     {
     669          50 :         stepright = true;
     670          50 :         LockBuffer(entry->buffer, GIN_SHARE);
     671             :     }
     672             :     else
     673             :     {
     674             :         GinBtreeStack *stack;
     675             : 
     676           4 :         ReleaseBuffer(entry->buffer);
     677             : 
     678             :         /*
     679             :          * Set the search key, and find the correct leaf page.
     680             :          */
     681           4 :         if (ItemPointerIsLossyPage(&advancePast))
     682             :         {
     683           0 :             ItemPointerSet(&entry->btree.itemptr,
     684             :                            GinItemPointerGetBlockNumber(&advancePast) + 1,
     685             :                            FirstOffsetNumber);
     686             :         }
     687             :         else
     688             :         {
     689           4 :             ItemPointerSet(&entry->btree.itemptr,
     690             :                            GinItemPointerGetBlockNumber(&advancePast),
     691             :                            OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
     692             :         }
     693           4 :         entry->btree.fullScan = false;
     694           4 :         stack = ginFindLeafPage(&entry->btree, true, false, snapshot);
     695             : 
     696             :         /* we don't need the stack, just the buffer. */
     697           4 :         entry->buffer = stack->buffer;
     698           4 :         IncrBufferRefCount(entry->buffer);
     699           4 :         freeGinBtreeStack(stack);
     700           4 :         stepright = false;
     701             :     }
     702             : 
     703          54 :     elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
     704             :          GinItemPointerGetBlockNumber(&advancePast),
     705             :          GinItemPointerGetOffsetNumber(&advancePast),
     706             :          !stepright);
     707             : 
     708          54 :     page = BufferGetPage(entry->buffer);
     709             :     for (;;)
     710             :     {
     711           0 :         entry->offset = InvalidOffsetNumber;
     712          54 :         if (entry->list)
     713             :         {
     714          54 :             pfree(entry->list);
     715          54 :             entry->list = NULL;
     716          54 :             entry->nlist = 0;
     717             :         }
     718             : 
     719          54 :         if (stepright)
     720             :         {
     721             :             /*
     722             :              * We've processed all the entries on this page. If it was the
     723             :              * last page in the tree, we're done.
     724             :              */
     725          50 :             if (GinPageRightMost(page))
     726             :             {
     727           0 :                 UnlockReleaseBuffer(entry->buffer);
     728           0 :                 entry->buffer = InvalidBuffer;
     729           0 :                 entry->isFinished = true;
     730           0 :                 return;
     731             :             }
     732             : 
     733             :             /*
     734             :              * Step to next page, following the right link. then find the
     735             :              * first ItemPointer greater than advancePast.
     736             :              */
     737          50 :             entry->buffer = ginStepRight(entry->buffer,
     738             :                                          ginstate->index,
     739             :                                          GIN_SHARE);
     740          50 :             page = BufferGetPage(entry->buffer);
     741             :         }
     742          54 :         stepright = true;
     743             : 
     744          54 :         if (GinPageGetOpaque(page)->flags & GIN_DELETED)
     745           0 :             continue;           /* page was deleted by concurrent vacuum */
     746             : 
     747             :         /*
     748             :          * The first item > advancePast might not be on this page, but
     749             :          * somewhere to the right, if the page was split, or a non-match from
     750             :          * another key in the query allowed us to skip some items from this
     751             :          * entry. Keep following the right-links until we re-find the correct
     752             :          * page.
     753             :          */
     754          70 :         if (!GinPageRightMost(page) &&
     755          16 :             ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
     756             :         {
     757             :             /*
     758             :              * the item we're looking is > the right bound of the page, so it
     759             :              * can't be on this page.
     760             :              */
     761           0 :             continue;
     762             :         }
     763             : 
     764          54 :         entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
     765             : 
     766        1270 :         for (i = 0; i < entry->nlist; i++)
     767             :         {
     768        1270 :             if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
     769             :             {
     770          54 :                 entry->offset = i;
     771             : 
     772          54 :                 if (GinPageRightMost(page))
     773             :                 {
     774             :                     /* after processing the copied items, we're done. */
     775          38 :                     UnlockReleaseBuffer(entry->buffer);
     776          38 :                     entry->buffer = InvalidBuffer;
     777             :                 }
     778             :                 else
     779          16 :                     LockBuffer(entry->buffer, GIN_UNLOCK);
     780          54 :                 return;
     781             :             }
     782             :         }
     783             :     }
     784             : }
     785             : 
     786             : #define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
     787             : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
     788             : 
     789             : /*
     790             :  * Sets entry->curItem to next heap item pointer > advancePast, for one entry
     791             :  * of one scan key, or sets entry->isFinished to true if there are no more.
     792             :  *
     793             :  * Item pointers are returned in ascending order.
     794             :  *
     795             :  * Note: this can return a "lossy page" item pointer, indicating that the
     796             :  * entry potentially matches all items on that heap page.  However, it is
     797             :  * not allowed to return both a lossy page pointer and exact (regular)
     798             :  * item pointers for the same page.  (Doing so would break the key-combination
     799             :  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
     800             :  * current implementation this is guaranteed by the behavior of tidbitmaps.
     801             :  */
     802             : static void
     803      578458 : entryGetItem(GinState *ginstate, GinScanEntry entry,
     804             :              ItemPointerData advancePast, Snapshot snapshot)
     805             : {
     806             :     Assert(!entry->isFinished);
     807             : 
     808             :     Assert(!ItemPointerIsValid(&entry->curItem) ||
     809             :            ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
     810             : 
     811      578458 :     if (entry->matchBitmap)
     812             :     {
     813             :         /* A bitmap result */
     814      103588 :         BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
     815      103588 :         OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
     816             : 
     817             :         for (;;)
     818             :         {
     819             :             /*
     820             :              * If we've exhausted all items on this block, move to next block
     821             :              * in the bitmap.
     822             :              */
     823      105740 :             while (entry->matchResult == NULL ||
     824      105398 :                    (entry->matchResult->ntuples >= 0 &&
     825      105398 :                     entry->offset >= entry->matchResult->ntuples) ||
     826      103246 :                    entry->matchResult->blockno < advancePastBlk ||
     827      103246 :                    (ItemPointerIsLossyPage(&advancePast) &&
     828           0 :                     entry->matchResult->blockno == advancePastBlk))
     829             :             {
     830        2494 :                 entry->matchResult = tbm_iterate(entry->matchIterator);
     831             : 
     832        2494 :                 if (entry->matchResult == NULL)
     833             :                 {
     834         342 :                     ItemPointerSetInvalid(&entry->curItem);
     835         342 :                     tbm_end_iterate(entry->matchIterator);
     836         342 :                     entry->matchIterator = NULL;
     837         342 :                     entry->isFinished = true;
     838         342 :                     break;
     839             :                 }
     840             : 
     841             :                 /*
     842             :                  * Reset counter to the beginning of entry->matchResult. Note:
     843             :                  * entry->offset is still greater than matchResult->ntuples if
     844             :                  * matchResult is lossy.  So, on next call we will get next
     845             :                  * result from TIDBitmap.
     846             :                  */
     847        2152 :                 entry->offset = 0;
     848             :             }
     849      103588 :             if (entry->isFinished)
     850         342 :                 break;
     851             : 
     852             :             /*
     853             :              * We're now on the first page after advancePast which has any
     854             :              * items on it. If it's a lossy result, return that.
     855             :              */
     856      103246 :             if (entry->matchResult->ntuples < 0)
     857             :             {
     858           0 :                 ItemPointerSetLossyPage(&entry->curItem,
     859             :                                         entry->matchResult->blockno);
     860             : 
     861             :                 /*
     862             :                  * We might as well fall out of the loop; we could not
     863             :                  * estimate number of results on this page to support correct
     864             :                  * reducing of result even if it's enabled.
     865             :                  */
     866           0 :                 break;
     867             :             }
     868             : 
     869             :             /*
     870             :              * Not a lossy page. Skip over any offsets <= advancePast, and
     871             :              * return that.
     872             :              */
     873      103246 :             if (entry->matchResult->blockno == advancePastBlk)
     874             :             {
     875             :                 /*
     876             :                  * First, do a quick check against the last offset on the
     877             :                  * page. If that's > advancePast, so are all the other
     878             :                  * offsets, so just go back to the top to get the next page.
     879             :                  */
     880      101436 :                 if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
     881             :                 {
     882           0 :                     entry->offset = entry->matchResult->ntuples;
     883           0 :                     continue;
     884             :                 }
     885             : 
     886             :                 /* Otherwise scan to find the first item > advancePast */
     887      101436 :                 while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
     888           0 :                     entry->offset++;
     889             :             }
     890             : 
     891      103246 :             ItemPointerSet(&entry->curItem,
     892             :                            entry->matchResult->blockno,
     893             :                            entry->matchResult->offsets[entry->offset]);
     894      103246 :             entry->offset++;
     895             : 
     896             :             /* Done unless we need to reduce the result */
     897      103246 :             if (!entry->reduceResult || !dropItem(entry))
     898             :                 break;
     899             :         }
     900             :     }
     901      474870 :     else if (!BufferIsValid(entry->buffer))
     902             :     {
     903             :         /*
     904             :          * A posting list from an entry tuple, or the last page of a posting
     905             :          * tree.
     906             :          */
     907             :         for (;;)
     908             :         {
     909      254198 :             if (entry->offset >= entry->nlist)
     910             :             {
     911        1252 :                 ItemPointerSetInvalid(&entry->curItem);
     912        1252 :                 entry->isFinished = true;
     913        1252 :                 break;
     914             :             }
     915             : 
     916      252946 :             entry->curItem = entry->list[entry->offset++];
     917             : 
     918             :             /* If we're not past advancePast, keep scanning */
     919      252946 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     920       53454 :                 continue;
     921             : 
     922             :             /* Done unless we need to reduce the result */
     923      199492 :             if (!entry->reduceResult || !dropItem(entry))
     924             :                 break;
     925             :         }
     926             :     }
     927             :     else
     928             :     {
     929             :         /* A posting tree */
     930             :         for (;;)
     931             :         {
     932             :             /* If we've processed the current batch, load more items */
     933      398446 :             while (entry->offset >= entry->nlist)
     934             :             {
     935          54 :                 entryLoadMoreItems(ginstate, entry, advancePast, snapshot);
     936             : 
     937          54 :                 if (entry->isFinished)
     938             :                 {
     939           0 :                     ItemPointerSetInvalid(&entry->curItem);
     940           0 :                     return;
     941             :                 }
     942             :             }
     943             : 
     944      398392 :             entry->curItem = entry->list[entry->offset++];
     945             : 
     946             :             /* If we're not past advancePast, keep scanning */
     947      398392 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     948       31404 :                 continue;
     949             : 
     950             :             /* Done unless we need to reduce the result */
     951      366988 :             if (!entry->reduceResult || !dropItem(entry))
     952             :                 break;
     953             : 
     954             :             /*
     955             :              * Advance advancePast (so that entryLoadMoreItems will load the
     956             :              * right data), and keep scanning
     957             :              */
     958       79156 :             advancePast = entry->curItem;
     959             :         }
     960             :     }
     961             : }
     962             : 
     963             : /*
     964             :  * Identify the "current" item among the input entry streams for this scan key
     965             :  * that is greater than advancePast, and test whether it passes the scan key
     966             :  * qual condition.
     967             :  *
     968             :  * The current item is the smallest curItem among the inputs.  key->curItem
     969             :  * is set to that value.  key->curItemMatches is set to indicate whether that
     970             :  * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
     971             :  * iff recheck is needed for this item pointer (including the case where the
     972             :  * item pointer is a lossy page pointer).
     973             :  *
     974             :  * If all entry streams are exhausted, sets key->isFinished to true.
     975             :  *
     976             :  * Item pointers must be returned in ascending order.
     977             :  *
     978             :  * Note: this can return a "lossy page" item pointer, indicating that the
     979             :  * key potentially matches all items on that heap page.  However, it is
     980             :  * not allowed to return both a lossy page pointer and exact (regular)
     981             :  * item pointers for the same page.  (Doing so would break the key-combination
     982             :  * logic in scanGetItem.)
     983             :  */
     984             : static void
     985      503672 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
     986             :            ItemPointerData advancePast, Snapshot snapshot)
     987             : {
     988             :     ItemPointerData minItem;
     989             :     ItemPointerData curPageLossy;
     990             :     uint32      i;
     991             :     bool        haveLossyEntry;
     992             :     GinScanEntry entry;
     993             :     GinTernaryValue res;
     994             :     MemoryContext oldCtx;
     995             :     bool        allFinished;
     996             : 
     997             :     Assert(!key->isFinished);
     998             : 
     999             :     /*
    1000             :      * We might have already tested this item; if so, no need to repeat work.
    1001             :      * (Note: the ">" case can happen, if advancePast is exact but we
    1002             :      * previously had to set curItem to a lossy-page pointer.)
    1003             :      */
    1004      503672 :     if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
    1005        1176 :         return;
    1006             : 
    1007             :     /*
    1008             :      * Find the minimum item > advancePast among the active entry streams.
    1009             :      *
    1010             :      * Note: a lossy-page entry is encoded by a ItemPointer with max value for
    1011             :      * offset (0xffff), so that it will sort after any exact entries for the
    1012             :      * same page.  So we'll prefer to return exact pointers not lossy
    1013             :      * pointers, which is good.
    1014             :      */
    1015      503654 :     ItemPointerSetMax(&minItem);
    1016      503654 :     allFinished = true;
    1017     1696994 :     for (i = 0; i < key->nrequired; i++)
    1018             :     {
    1019     1193340 :         entry = key->requiredEntries[i];
    1020             : 
    1021     1193340 :         if (entry->isFinished)
    1022      523430 :             continue;
    1023             : 
    1024             :         /*
    1025             :          * Advance this stream if necessary.
    1026             :          *
    1027             :          * In particular, since entry->curItem was initialized with
    1028             :          * ItemPointerSetMin, this ensures we fetch the first item for each
    1029             :          * entry on the first call.
    1030             :          */
    1031      669910 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1032             :         {
    1033      528130 :             entryGetItem(ginstate, entry, advancePast, snapshot);
    1034      528130 :             if (entry->isFinished)
    1035        1430 :                 continue;
    1036             :         }
    1037             : 
    1038      668480 :         allFinished = false;
    1039      668480 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1040      587046 :             minItem = entry->curItem;
    1041             :     }
    1042             : 
    1043      503654 :     if (allFinished && !key->excludeOnly)
    1044             :     {
    1045             :         /* all entries are finished */
    1046        1158 :         key->isFinished = true;
    1047        1158 :         return;
    1048             :     }
    1049             : 
    1050      502496 :     if (!key->excludeOnly)
    1051             :     {
    1052             :         /*
    1053             :          * For a normal scan key, we now know there are no matches < minItem.
    1054             :          *
    1055             :          * If minItem is lossy, it means that there were no exact items on the
    1056             :          * page among requiredEntries, because lossy pointers sort after exact
    1057             :          * items. However, there might be exact items for the same page among
    1058             :          * additionalEntries, so we mustn't advance past them.
    1059             :          */
    1060      498174 :         if (ItemPointerIsLossyPage(&minItem))
    1061             :         {
    1062           0 :             if (GinItemPointerGetBlockNumber(&advancePast) <
    1063           0 :                 GinItemPointerGetBlockNumber(&minItem))
    1064             :             {
    1065           0 :                 ItemPointerSet(&advancePast,
    1066             :                                GinItemPointerGetBlockNumber(&minItem),
    1067             :                                InvalidOffsetNumber);
    1068             :             }
    1069             :         }
    1070             :         else
    1071             :         {
    1072             :             Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
    1073      498174 :             ItemPointerSet(&advancePast,
    1074             :                            GinItemPointerGetBlockNumber(&minItem),
    1075             :                            OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
    1076             :         }
    1077             :     }
    1078             :     else
    1079             :     {
    1080             :         /*
    1081             :          * excludeOnly scan keys don't have any entries that are necessarily
    1082             :          * present in matching items.  So, we consider the item just after
    1083             :          * advancePast.
    1084             :          */
    1085             :         Assert(key->nrequired == 0);
    1086        4322 :         ItemPointerSet(&minItem,
    1087             :                        GinItemPointerGetBlockNumber(&advancePast),
    1088             :                        OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
    1089             :     }
    1090             : 
    1091             :     /*
    1092             :      * We might not have loaded all the entry streams for this TID yet. We
    1093             :      * could call the consistent function, passing MAYBE for those entries, to
    1094             :      * see if it can decide if this TID matches based on the information we
    1095             :      * have. But if the consistent-function is expensive, and cannot in fact
    1096             :      * decide with partial information, that could be a big loss. So, load all
    1097             :      * the additional entries, before calling the consistent function.
    1098             :      */
    1099      564126 :     for (i = 0; i < key->nadditional; i++)
    1100             :     {
    1101       61630 :         entry = key->additionalEntries[i];
    1102             : 
    1103       61630 :         if (entry->isFinished)
    1104         416 :             continue;
    1105             : 
    1106       61214 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1107             :         {
    1108       50328 :             entryGetItem(ginstate, entry, advancePast, snapshot);
    1109       50328 :             if (entry->isFinished)
    1110         164 :                 continue;
    1111             :         }
    1112             : 
    1113             :         /*
    1114             :          * Normally, none of the items in additionalEntries can have a curItem
    1115             :          * larger than minItem. But if minItem is a lossy page, then there
    1116             :          * might be exact items on the same page among additionalEntries.
    1117             :          */
    1118       61050 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1119             :         {
    1120             :             Assert(ItemPointerIsLossyPage(&minItem));
    1121           0 :             minItem = entry->curItem;
    1122             :         }
    1123             :     }
    1124             : 
    1125             :     /*
    1126             :      * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
    1127             :      * and perform consistentFn test.
    1128             :      *
    1129             :      * Lossy-page entries pose a problem, since we don't know the correct
    1130             :      * entryRes state to pass to the consistentFn, and we also don't know what
    1131             :      * its combining logic will be (could be AND, OR, or even NOT). If the
    1132             :      * logic is OR then the consistentFn might succeed for all items in the
    1133             :      * lossy page even when none of the other entries match.
    1134             :      *
    1135             :      * Our strategy is to call the tri-state consistent function, with the
    1136             :      * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
    1137             :      * returns FALSE, none of the lossy items alone are enough for a match, so
    1138             :      * we don't need to return a lossy-page pointer. Otherwise, return a
    1139             :      * lossy-page pointer to indicate that the whole heap page must be
    1140             :      * checked.  (On subsequent calls, we'll do nothing until minItem is past
    1141             :      * the page altogether, thus ensuring that we never return both regular
    1142             :      * and lossy pointers for the same page.)
    1143             :      *
    1144             :      * An exception is that it doesn't matter what we pass for lossy pointers
    1145             :      * in "hidden" entries, because the consistentFn's result can't depend on
    1146             :      * them. We could pass them as MAYBE as well, but if we're using the
    1147             :      * "shim" implementation of a tri-state consistent function (see
    1148             :      * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
    1149             :      * them as true.
    1150             :      *
    1151             :      * Note that only lossy-page entries pointing to the current item's page
    1152             :      * should trigger this processing; we might have future lossy pages in the
    1153             :      * entry array, but they aren't relevant yet.
    1154             :      */
    1155      502496 :     key->curItem = minItem;
    1156      502496 :     ItemPointerSetLossyPage(&curPageLossy,
    1157             :                             GinItemPointerGetBlockNumber(&key->curItem));
    1158      502496 :     haveLossyEntry = false;
    1159     1755122 :     for (i = 0; i < key->nentries; i++)
    1160             :     {
    1161     1252626 :         entry = key->scanEntry[i];
    1162     1982156 :         if (entry->isFinished == false &&
    1163      729530 :             ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1164             :         {
    1165           0 :             if (i < key->nuserentries)
    1166           0 :                 key->entryRes[i] = GIN_MAYBE;
    1167             :             else
    1168           0 :                 key->entryRes[i] = GIN_TRUE;
    1169           0 :             haveLossyEntry = true;
    1170             :         }
    1171             :         else
    1172     1252626 :             key->entryRes[i] = GIN_FALSE;
    1173             :     }
    1174             : 
    1175             :     /* prepare for calling consistentFn in temp context */
    1176      502496 :     oldCtx = MemoryContextSwitchTo(tempCtx);
    1177             : 
    1178      502496 :     if (haveLossyEntry)
    1179             :     {
    1180             :         /* Have lossy-page entries, so see if whole page matches */
    1181           0 :         res = key->triConsistentFn(key);
    1182             : 
    1183           0 :         if (res == GIN_TRUE || res == GIN_MAYBE)
    1184             :         {
    1185             :             /* Yes, so clean up ... */
    1186           0 :             MemoryContextSwitchTo(oldCtx);
    1187           0 :             MemoryContextReset(tempCtx);
    1188             : 
    1189             :             /* and return lossy pointer for whole page */
    1190           0 :             key->curItem = curPageLossy;
    1191           0 :             key->curItemMatches = true;
    1192           0 :             key->recheckCurItem = true;
    1193           0 :             return;
    1194             :         }
    1195             :     }
    1196             : 
    1197             :     /*
    1198             :      * At this point we know that we don't need to return a lossy whole-page
    1199             :      * pointer, but we might have matches for individual exact item pointers,
    1200             :      * possibly in combination with a lossy pointer. Pass lossy pointers as
    1201             :      * MAYBE to the ternary consistent function, to let it decide if this
    1202             :      * tuple satisfies the overall key, even though we don't know if the lossy
    1203             :      * entries match.
    1204             :      *
    1205             :      * Prepare entryRes array to be passed to consistentFn.
    1206             :      */
    1207     1755122 :     for (i = 0; i < key->nentries; i++)
    1208             :     {
    1209     1252626 :         entry = key->scanEntry[i];
    1210     1252626 :         if (entry->isFinished)
    1211      523096 :             key->entryRes[i] = GIN_FALSE;
    1212             : #if 0
    1213             : 
    1214             :         /*
    1215             :          * This case can't currently happen, because we loaded all the entries
    1216             :          * for this item earlier.
    1217             :          */
    1218             :         else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1219             :             key->entryRes[i] = GIN_MAYBE;
    1220             : #endif
    1221      729530 :         else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1222           0 :             key->entryRes[i] = GIN_MAYBE;
    1223      729530 :         else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
    1224      568518 :             key->entryRes[i] = GIN_TRUE;
    1225             :         else
    1226      161012 :             key->entryRes[i] = GIN_FALSE;
    1227             :     }
    1228             : 
    1229      502496 :     res = key->triConsistentFn(key);
    1230             : 
    1231      502496 :     switch (res)
    1232             :     {
    1233      395746 :         case GIN_TRUE:
    1234      395746 :             key->curItemMatches = true;
    1235             :             /* triConsistentFn set recheckCurItem */
    1236      395746 :             break;
    1237             : 
    1238       15158 :         case GIN_FALSE:
    1239       15158 :             key->curItemMatches = false;
    1240       15158 :             break;
    1241             : 
    1242       91592 :         case GIN_MAYBE:
    1243       91592 :             key->curItemMatches = true;
    1244       91592 :             key->recheckCurItem = true;
    1245       91592 :             break;
    1246             : 
    1247           0 :         default:
    1248             : 
    1249             :             /*
    1250             :              * the 'default' case shouldn't happen, but if the consistent
    1251             :              * function returns something bogus, this is the safe result
    1252             :              */
    1253           0 :             key->curItemMatches = true;
    1254           0 :             key->recheckCurItem = true;
    1255           0 :             break;
    1256             :     }
    1257             : 
    1258             :     /*
    1259             :      * We have a tuple, and we know if it matches or not. If it's a non-match,
    1260             :      * we could continue to find the next matching tuple, but let's break out
    1261             :      * and give scanGetItem a chance to advance the other keys. They might be
    1262             :      * able to skip past to a much higher TID, allowing us to save work.
    1263             :      */
    1264             : 
    1265             :     /* clean up after consistentFn calls */
    1266      502496 :     MemoryContextSwitchTo(oldCtx);
    1267      502496 :     MemoryContextReset(tempCtx);
    1268             : }
    1269             : 
    1270             : /*
    1271             :  * Get next heap item pointer (after advancePast) from scan.
    1272             :  * Returns true if anything found.
    1273             :  * On success, *item and *recheck are set.
    1274             :  *
    1275             :  * Note: this is very nearly the same logic as in keyGetItem(), except
    1276             :  * that we know the keys are to be combined with AND logic, whereas in
    1277             :  * keyGetItem() the combination logic is known only to the consistentFn.
    1278             :  */
    1279             : static bool
    1280      484110 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
    1281             :             ItemPointerData *item, bool *recheck)
    1282             : {
    1283      484110 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1284             :     uint32      i;
    1285             :     bool        match;
    1286             : 
    1287             :     /*----------
    1288             :      * Advance the scan keys in lock-step, until we find an item that matches
    1289             :      * all the keys. If any key reports isFinished, meaning its subset of the
    1290             :      * entries is exhausted, we can stop.  Otherwise, set *item to the next
    1291             :      * matching item.
    1292             :      *
    1293             :      * This logic works only if a keyGetItem stream can never contain both
    1294             :      * exact and lossy pointers for the same page.  Else we could have a
    1295             :      * case like
    1296             :      *
    1297             :      *      stream 1        stream 2
    1298             :      *      ...             ...
    1299             :      *      42/6            42/7
    1300             :      *      50/1            42/0xffff
    1301             :      *      ...             ...
    1302             :      *
    1303             :      * We would conclude that 42/6 is not a match and advance stream 1,
    1304             :      * thus never detecting the match to the lossy pointer in stream 2.
    1305             :      * (keyGetItem has a similar problem versus entryGetItem.)
    1306             :      *----------
    1307             :      */
    1308             :     do
    1309             :     {
    1310      499310 :         ItemPointerSetMin(item);
    1311      499310 :         match = true;
    1312      986666 :         for (i = 0; i < so->nkeys && match; i++)
    1313             :         {
    1314      503672 :             GinScanKey  key = so->keys + i;
    1315             : 
    1316             :             /*
    1317             :              * If we're considering a lossy page, skip excludeOnly keys,  They
    1318             :              * can't exclude the whole page anyway.
    1319             :              */
    1320      503672 :             if (ItemPointerIsLossyPage(item) && key->excludeOnly)
    1321             :             {
    1322             :                 /*
    1323             :                  * ginNewScanKey() should never mark the first key as
    1324             :                  * excludeOnly.
    1325             :                  */
    1326             :                 Assert(i > 0);
    1327           0 :                 continue;
    1328             :             }
    1329             : 
    1330             :             /* Fetch the next item for this key that is > advancePast. */
    1331      503672 :             keyGetItem(&so->ginstate, so->tempCtx, key, advancePast,
    1332             :                        scan->xs_snapshot);
    1333             : 
    1334      503672 :             if (key->isFinished)
    1335        1158 :                 return false;
    1336             : 
    1337             :             /*
    1338             :              * If it's not a match, we can immediately conclude that nothing
    1339             :              * <= this item matches, without checking the rest of the keys.
    1340             :              */
    1341      502514 :             if (!key->curItemMatches)
    1342             :             {
    1343       15158 :                 advancePast = key->curItem;
    1344       15158 :                 match = false;
    1345       15158 :                 break;
    1346             :             }
    1347             : 
    1348             :             /*
    1349             :              * It's a match. We can conclude that nothing < matches, so the
    1350             :              * other key streams can skip to this item.
    1351             :              *
    1352             :              * Beware of lossy pointers, though; from a lossy pointer, we can
    1353             :              * only conclude that nothing smaller than this *block* matches.
    1354             :              */
    1355      487356 :             if (ItemPointerIsLossyPage(&key->curItem))
    1356             :             {
    1357           0 :                 if (GinItemPointerGetBlockNumber(&advancePast) <
    1358           0 :                     GinItemPointerGetBlockNumber(&key->curItem))
    1359             :                 {
    1360           0 :                     ItemPointerSet(&advancePast,
    1361             :                                    GinItemPointerGetBlockNumber(&key->curItem),
    1362             :                                    InvalidOffsetNumber);
    1363             :                 }
    1364             :             }
    1365             :             else
    1366             :             {
    1367             :                 Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
    1368      487356 :                 ItemPointerSet(&advancePast,
    1369             :                                GinItemPointerGetBlockNumber(&key->curItem),
    1370             :                                OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
    1371             :             }
    1372             : 
    1373             :             /*
    1374             :              * If this is the first key, remember this location as a potential
    1375             :              * match, and proceed to check the rest of the keys.
    1376             :              *
    1377             :              * Otherwise, check if this is the same item that we checked the
    1378             :              * previous keys for (or a lossy pointer for the same page). If
    1379             :              * not, loop back to check the previous keys for this item (we
    1380             :              * will check this key again too, but keyGetItem returns quickly
    1381             :              * for that)
    1382             :              */
    1383      487356 :             if (i == 0)
    1384             :             {
    1385      483066 :                 *item = key->curItem;
    1386             :             }
    1387             :             else
    1388             :             {
    1389        4290 :                 if (ItemPointerIsLossyPage(&key->curItem) ||
    1390        4290 :                     ItemPointerIsLossyPage(item))
    1391             :                 {
    1392             :                     Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
    1393           0 :                     match = (GinItemPointerGetBlockNumber(&key->curItem) ==
    1394           0 :                              GinItemPointerGetBlockNumber(item));
    1395             :                 }
    1396             :                 else
    1397             :                 {
    1398             :                     Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
    1399        4290 :                     match = (ginCompareItemPointers(&key->curItem, item) == 0);
    1400             :                 }
    1401             :             }
    1402             :         }
    1403      498152 :     } while (!match);
    1404             : 
    1405             :     Assert(!ItemPointerIsMin(item));
    1406             : 
    1407             :     /*
    1408             :      * Now *item contains the first ItemPointer after previous result that
    1409             :      * satisfied all the keys for that exact TID, or a lossy reference to the
    1410             :      * same page.
    1411             :      *
    1412             :      * We must return recheck = true if any of the keys are marked recheck.
    1413             :      */
    1414      482952 :     *recheck = false;
    1415      877264 :     for (i = 0; i < so->nkeys; i++)
    1416             :     {
    1417      483200 :         GinScanKey  key = so->keys + i;
    1418             : 
    1419      483200 :         if (key->recheckCurItem)
    1420             :         {
    1421       88888 :             *recheck = true;
    1422       88888 :             break;
    1423             :         }
    1424             :     }
    1425             : 
    1426      482952 :     return true;
    1427             : }
    1428             : 
    1429             : 
    1430             : /*
    1431             :  * Functions for scanning the pending list
    1432             :  */
    1433             : 
    1434             : 
    1435             : /*
    1436             :  * Get ItemPointer of next heap row to be checked from pending list.
    1437             :  * Returns false if there are no more. On pages with several heap rows
    1438             :  * it returns each row separately, on page with part of heap row returns
    1439             :  * per page data.  pos->firstOffset and pos->lastOffset are set to identify
    1440             :  * the range of pending-list tuples belonging to this heap row.
    1441             :  *
    1442             :  * The pendingBuffer is presumed pinned and share-locked on entry, and is
    1443             :  * pinned and share-locked on success exit.  On failure exit it's released.
    1444             :  */
    1445             : static bool
    1446        1080 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
    1447             : {
    1448             :     OffsetNumber maxoff;
    1449             :     Page        page;
    1450             :     IndexTuple  itup;
    1451             : 
    1452        1080 :     ItemPointerSetInvalid(&pos->item);
    1453             :     for (;;)
    1454             :     {
    1455        1080 :         page = BufferGetPage(pos->pendingBuffer);
    1456        1080 :         TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
    1457             : 
    1458        1080 :         maxoff = PageGetMaxOffsetNumber(page);
    1459        1080 :         if (pos->firstOffset > maxoff)
    1460             :         {
    1461         112 :             BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
    1462             : 
    1463         112 :             if (blkno == InvalidBlockNumber)
    1464             :             {
    1465         112 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1466         112 :                 pos->pendingBuffer = InvalidBuffer;
    1467             : 
    1468         112 :                 return false;
    1469             :             }
    1470             :             else
    1471             :             {
    1472             :                 /*
    1473             :                  * Here we must prevent deletion of next page by insertcleanup
    1474             :                  * process, which may be trying to obtain exclusive lock on
    1475             :                  * current page.  So, we lock next page before releasing the
    1476             :                  * current one
    1477             :                  */
    1478           0 :                 Buffer      tmpbuf = ReadBuffer(scan->indexRelation, blkno);
    1479             : 
    1480           0 :                 LockBuffer(tmpbuf, GIN_SHARE);
    1481           0 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1482             : 
    1483           0 :                 pos->pendingBuffer = tmpbuf;
    1484           0 :                 pos->firstOffset = FirstOffsetNumber;
    1485             :             }
    1486             :         }
    1487             :         else
    1488             :         {
    1489         968 :             itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
    1490         968 :             pos->item = itup->t_tid;
    1491         968 :             if (GinPageHasFullRow(page))
    1492             :             {
    1493             :                 /*
    1494             :                  * find itempointer to the next row
    1495             :                  */
    1496        2260 :                 for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
    1497             :                 {
    1498        2148 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
    1499        2148 :                     if (!ItemPointerEquals(&pos->item, &itup->t_tid))
    1500         856 :                         break;
    1501             :                 }
    1502             :             }
    1503             :             else
    1504             :             {
    1505             :                 /*
    1506             :                  * All itempointers are the same on this page
    1507             :                  */
    1508           0 :                 pos->lastOffset = maxoff + 1;
    1509             :             }
    1510             : 
    1511             :             /*
    1512             :              * Now pos->firstOffset points to the first tuple of current heap
    1513             :              * row, pos->lastOffset points to the first tuple of next heap row
    1514             :              * (or to the end of page)
    1515             :              */
    1516         968 :             break;
    1517             :         }
    1518             :     }
    1519             : 
    1520         968 :     return true;
    1521             : }
    1522             : 
    1523             : /*
    1524             :  * Scan pending-list page from current tuple (off) up till the first of:
    1525             :  * - match is found (then returns true)
    1526             :  * - no later match is possible
    1527             :  * - tuple's attribute number is not equal to entry's attrnum
    1528             :  * - reach end of page
    1529             :  *
    1530             :  * datum[]/category[]/datumExtracted[] arrays are used to cache the results
    1531             :  * of gintuple_get_key() on the current page.
    1532             :  */
    1533             : static bool
    1534           0 : matchPartialInPendingList(GinState *ginstate, Page page,
    1535             :                           OffsetNumber off, OffsetNumber maxoff,
    1536             :                           GinScanEntry entry,
    1537             :                           Datum *datum, GinNullCategory *category,
    1538             :                           bool *datumExtracted)
    1539             : {
    1540             :     IndexTuple  itup;
    1541             :     int32       cmp;
    1542             : 
    1543             :     /* Partial match to a null is not possible */
    1544           0 :     if (entry->queryCategory != GIN_CAT_NORM_KEY)
    1545           0 :         return false;
    1546             : 
    1547           0 :     while (off < maxoff)
    1548             :     {
    1549           0 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
    1550             : 
    1551           0 :         if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
    1552           0 :             return false;
    1553             : 
    1554           0 :         if (datumExtracted[off - 1] == false)
    1555             :         {
    1556           0 :             datum[off - 1] = gintuple_get_key(ginstate, itup,
    1557           0 :                                               &category[off - 1]);
    1558           0 :             datumExtracted[off - 1] = true;
    1559             :         }
    1560             : 
    1561             :         /* Once we hit nulls, no further match is possible */
    1562           0 :         if (category[off - 1] != GIN_CAT_NORM_KEY)
    1563           0 :             return false;
    1564             : 
    1565             :         /*----------
    1566             :          * Check partial match.
    1567             :          * case cmp == 0 => match
    1568             :          * case cmp > 0 => not match and end scan (no later match possible)
    1569             :          * case cmp < 0 => not match and continue scan
    1570             :          *----------
    1571             :          */
    1572           0 :         cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
    1573             :                                               ginstate->supportCollation[entry->attnum - 1],
    1574             :                                               entry->queryKey,
    1575             :                                               datum[off - 1],
    1576             :                                               UInt16GetDatum(entry->strategy),
    1577             :                                               PointerGetDatum(entry->extra_data)));
    1578           0 :         if (cmp == 0)
    1579           0 :             return true;
    1580           0 :         else if (cmp > 0)
    1581           0 :             return false;
    1582             : 
    1583           0 :         off++;
    1584             :     }
    1585             : 
    1586           0 :     return false;
    1587             : }
    1588             : 
    1589             : /*
    1590             :  * Set up the entryRes array for each key by looking at
    1591             :  * every entry for current heap row in pending list.
    1592             :  *
    1593             :  * Returns true if each scan key has at least one entryRes match.
    1594             :  * This corresponds to the situations where the normal index search will
    1595             :  * try to apply the key's consistentFn.  (A tuple not meeting that requirement
    1596             :  * cannot be returned by the normal search since no entry stream will
    1597             :  * source its TID.)
    1598             :  *
    1599             :  * The pendingBuffer is presumed pinned and share-locked on entry.
    1600             :  */
    1601             : static bool
    1602         968 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
    1603             : {
    1604         968 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1605             :     OffsetNumber attrnum;
    1606             :     Page        page;
    1607             :     IndexTuple  itup;
    1608             :     int         i,
    1609             :                 j;
    1610             : 
    1611             :     /*
    1612             :      * Reset all entryRes and hasMatchKey flags
    1613             :      */
    1614        2624 :     for (i = 0; i < so->nkeys; i++)
    1615             :     {
    1616        1656 :         GinScanKey  key = so->keys + i;
    1617             : 
    1618        1656 :         memset(key->entryRes, GIN_FALSE, key->nentries);
    1619             :     }
    1620         968 :     memset(pos->hasMatchKey, false, so->nkeys);
    1621             : 
    1622             :     /*
    1623             :      * Outer loop iterates over multiple pending-list pages when a single heap
    1624             :      * row has entries spanning those pages.
    1625             :      */
    1626             :     for (;;)
    1627           0 :     {
    1628             :         Datum       datum[BLCKSZ / sizeof(IndexTupleData)];
    1629             :         GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
    1630             :         bool        datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
    1631             : 
    1632             :         Assert(pos->lastOffset > pos->firstOffset);
    1633         968 :         memset(datumExtracted + pos->firstOffset - 1, 0,
    1634         968 :                sizeof(bool) * (pos->lastOffset - pos->firstOffset));
    1635             : 
    1636         968 :         page = BufferGetPage(pos->pendingBuffer);
    1637         968 :         TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
    1638             : 
    1639        2624 :         for (i = 0; i < so->nkeys; i++)
    1640             :         {
    1641        1656 :             GinScanKey  key = so->keys + i;
    1642             : 
    1643        3196 :             for (j = 0; j < key->nentries; j++)
    1644             :             {
    1645        1540 :                 GinScanEntry entry = key->scanEntry[j];
    1646        1540 :                 OffsetNumber StopLow = pos->firstOffset,
    1647        1540 :                             StopHigh = pos->lastOffset,
    1648             :                             StopMiddle;
    1649             : 
    1650             :                 /* If already matched on earlier page, do no extra work */
    1651        1540 :                 if (key->entryRes[j])
    1652           0 :                     continue;
    1653             : 
    1654             :                 /*
    1655             :                  * Interesting tuples are from pos->firstOffset to
    1656             :                  * pos->lastOffset and they are ordered by (attnum, Datum) as
    1657             :                  * it's done in entry tree.  So we can use binary search to
    1658             :                  * avoid linear scanning.
    1659             :                  */
    1660        3420 :                 while (StopLow < StopHigh)
    1661             :                 {
    1662             :                     int         res;
    1663             : 
    1664        2688 :                     StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    1665             : 
    1666        2688 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
    1667             : 
    1668        2688 :                     attrnum = gintuple_get_attrnum(&so->ginstate, itup);
    1669             : 
    1670        2688 :                     if (key->attnum < attrnum)
    1671             :                     {
    1672         532 :                         StopHigh = StopMiddle;
    1673         532 :                         continue;
    1674             :                     }
    1675        2156 :                     if (key->attnum > attrnum)
    1676             :                     {
    1677         440 :                         StopLow = StopMiddle + 1;
    1678         440 :                         continue;
    1679             :                     }
    1680             : 
    1681        1716 :                     if (datumExtracted[StopMiddle - 1] == false)
    1682             :                     {
    1683        3320 :                         datum[StopMiddle - 1] =
    1684        3320 :                             gintuple_get_key(&so->ginstate, itup,
    1685        1660 :                                              &category[StopMiddle - 1]);
    1686        1660 :                         datumExtracted[StopMiddle - 1] = true;
    1687             :                     }
    1688             : 
    1689        1716 :                     if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
    1690             :                     {
    1691             :                         /* special behavior depending on searchMode */
    1692         724 :                         if (entry->searchMode == GIN_SEARCH_MODE_ALL)
    1693             :                         {
    1694             :                             /* match anything except NULL_ITEM */
    1695         724 :                             if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
    1696         252 :                                 res = -1;
    1697             :                             else
    1698         472 :                                 res = 0;
    1699             :                         }
    1700             :                         else
    1701             :                         {
    1702             :                             /* match everything */
    1703           0 :                             res = 0;
    1704             :                         }
    1705             :                     }
    1706             :                     else
    1707             :                     {
    1708        3968 :                         res = ginCompareEntries(&so->ginstate,
    1709         992 :                                                 entry->attnum,
    1710             :                                                 entry->queryKey,
    1711         992 :                                                 entry->queryCategory,
    1712         992 :                                                 datum[StopMiddle - 1],
    1713         992 :                                                 category[StopMiddle - 1]);
    1714             :                     }
    1715             : 
    1716        1716 :                     if (res == 0)
    1717             :                     {
    1718             :                         /*
    1719             :                          * Found exact match (there can be only one, except in
    1720             :                          * EMPTY_QUERY mode).
    1721             :                          *
    1722             :                          * If doing partial match, scan forward from here to
    1723             :                          * end of page to check for matches.
    1724             :                          *
    1725             :                          * See comment above about tuple's ordering.
    1726             :                          */
    1727         808 :                         if (entry->isPartialMatch)
    1728           0 :                             key->entryRes[j] =
    1729           0 :                                 matchPartialInPendingList(&so->ginstate,
    1730             :                                                           page,
    1731             :                                                           StopMiddle,
    1732           0 :                                                           pos->lastOffset,
    1733             :                                                           entry,
    1734             :                                                           datum,
    1735             :                                                           category,
    1736             :                                                           datumExtracted);
    1737             :                         else
    1738         808 :                             key->entryRes[j] = true;
    1739             : 
    1740             :                         /* done with binary search */
    1741         808 :                         break;
    1742             :                     }
    1743         908 :                     else if (res < 0)
    1744         880 :                         StopHigh = StopMiddle;
    1745             :                     else
    1746          28 :                         StopLow = StopMiddle + 1;
    1747             :                 }
    1748             : 
    1749        1540 :                 if (StopLow >= StopHigh && entry->isPartialMatch)
    1750             :                 {
    1751             :                     /*
    1752             :                      * No exact match on this page.  If doing partial match,
    1753             :                      * scan from the first tuple greater than target value to
    1754             :                      * end of page.  Note that since we don't remember whether
    1755             :                      * the comparePartialFn told us to stop early on a
    1756             :                      * previous page, we will uselessly apply comparePartialFn
    1757             :                      * to the first tuple on each subsequent page.
    1758             :                      */
    1759           0 :                     key->entryRes[j] =
    1760           0 :                         matchPartialInPendingList(&so->ginstate,
    1761             :                                                   page,
    1762             :                                                   StopHigh,
    1763           0 :                                                   pos->lastOffset,
    1764             :                                                   entry,
    1765             :                                                   datum,
    1766             :                                                   category,
    1767             :                                                   datumExtracted);
    1768             :                 }
    1769             : 
    1770        1540 :                 pos->hasMatchKey[i] |= key->entryRes[j];
    1771             :             }
    1772             :         }
    1773             : 
    1774             :         /* Advance firstOffset over the scanned tuples */
    1775         968 :         pos->firstOffset = pos->lastOffset;
    1776             : 
    1777         968 :         if (GinPageHasFullRow(page))
    1778             :         {
    1779             :             /*
    1780             :              * We have examined all pending entries for the current heap row.
    1781             :              * Break out of loop over pages.
    1782             :              */
    1783         968 :             break;
    1784             :         }
    1785             :         else
    1786             :         {
    1787             :             /*
    1788             :              * Advance to next page of pending entries for the current heap
    1789             :              * row.  Complain if there isn't one.
    1790             :              */
    1791           0 :             ItemPointerData item = pos->item;
    1792             : 
    1793           0 :             if (scanGetCandidate(scan, pos) == false ||
    1794           0 :                 !ItemPointerEquals(&pos->item, &item))
    1795           0 :                 elog(ERROR, "could not find additional pending pages for same heap tuple");
    1796             :         }
    1797             :     }
    1798             : 
    1799             :     /*
    1800             :      * All scan keys except excludeOnly require at least one entry to match.
    1801             :      * excludeOnly keys are an exception, because their implied
    1802             :      * GIN_CAT_EMPTY_QUERY scanEntry always matches.  So return "true" if all
    1803             :      * non-excludeOnly scan keys have at least one match.
    1804             :      */
    1805        1700 :     for (i = 0; i < so->nkeys; i++)
    1806             :     {
    1807        1328 :         if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
    1808         596 :             return false;
    1809             :     }
    1810             : 
    1811         372 :     return true;
    1812             : }
    1813             : 
    1814             : /*
    1815             :  * Collect all matched rows from pending list into bitmap.
    1816             :  */
    1817             : static void
    1818        1158 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
    1819             : {
    1820        1158 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1821             :     MemoryContext oldCtx;
    1822             :     bool        recheck,
    1823             :                 match;
    1824             :     int         i;
    1825             :     pendingPosition pos;
    1826        1158 :     Buffer      metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
    1827             :     Page        page;
    1828             :     BlockNumber blkno;
    1829             : 
    1830        1158 :     *ntids = 0;
    1831             : 
    1832             :     /*
    1833             :      * Acquire predicate lock on the metapage, to conflict with any fastupdate
    1834             :      * insertions.
    1835             :      */
    1836        1158 :     PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
    1837             : 
    1838        1158 :     LockBuffer(metabuffer, GIN_SHARE);
    1839        1158 :     page = BufferGetPage(metabuffer);
    1840        1158 :     TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
    1841        1158 :     blkno = GinPageGetMeta(page)->head;
    1842             : 
    1843             :     /*
    1844             :      * fetch head of list before unlocking metapage. head page must be pinned
    1845             :      * to prevent deletion by vacuum process
    1846             :      */
    1847        1158 :     if (blkno == InvalidBlockNumber)
    1848             :     {
    1849             :         /* No pending list, so proceed with normal scan */
    1850        1046 :         UnlockReleaseBuffer(metabuffer);
    1851        1046 :         return;
    1852             :     }
    1853             : 
    1854         112 :     pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
    1855         112 :     LockBuffer(pos.pendingBuffer, GIN_SHARE);
    1856         112 :     pos.firstOffset = FirstOffsetNumber;
    1857         112 :     UnlockReleaseBuffer(metabuffer);
    1858         112 :     pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
    1859             : 
    1860             :     /*
    1861             :      * loop for each heap row. scanGetCandidate returns full row or row's
    1862             :      * tuples from first page.
    1863             :      */
    1864        1080 :     while (scanGetCandidate(scan, &pos))
    1865             :     {
    1866             :         /*
    1867             :          * Check entries in tuple and set up entryRes array.
    1868             :          *
    1869             :          * If pending tuples belonging to the current heap row are spread
    1870             :          * across several pages, collectMatchesForHeapRow will read all of
    1871             :          * those pages.
    1872             :          */
    1873         968 :         if (!collectMatchesForHeapRow(scan, &pos))
    1874         596 :             continue;
    1875             : 
    1876             :         /*
    1877             :          * Matching of entries of one row is finished, so check row using
    1878             :          * consistent functions.
    1879             :          */
    1880         372 :         oldCtx = MemoryContextSwitchTo(so->tempCtx);
    1881         372 :         recheck = false;
    1882         372 :         match = true;
    1883             : 
    1884         944 :         for (i = 0; i < so->nkeys; i++)
    1885             :         {
    1886         572 :             GinScanKey  key = so->keys + i;
    1887             : 
    1888         572 :             if (!key->boolConsistentFn(key))
    1889             :             {
    1890           0 :                 match = false;
    1891           0 :                 break;
    1892             :             }
    1893         572 :             recheck |= key->recheckCurItem;
    1894             :         }
    1895             : 
    1896         372 :         MemoryContextSwitchTo(oldCtx);
    1897         372 :         MemoryContextReset(so->tempCtx);
    1898             : 
    1899         372 :         if (match)
    1900             :         {
    1901         372 :             tbm_add_tuples(tbm, &pos.item, 1, recheck);
    1902         372 :             (*ntids)++;
    1903             :         }
    1904             :     }
    1905             : 
    1906         112 :     pfree(pos.hasMatchKey);
    1907             : }
    1908             : 
    1909             : 
    1910             : #define GinIsVoidRes(s)     ( ((GinScanOpaque) scan->opaque)->isVoidRes )
    1911             : 
    1912             : int64
    1913        1166 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
    1914             : {
    1915        1166 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1916             :     int64       ntids;
    1917             :     ItemPointerData iptr;
    1918             :     bool        recheck;
    1919             : 
    1920             :     /*
    1921             :      * Set up the scan keys, and check for unsatisfiable query.
    1922             :      */
    1923        1166 :     ginFreeScanKeys(so);        /* there should be no keys yet, but just to be
    1924             :                                  * sure */
    1925        1166 :     ginNewScanKey(scan);
    1926             : 
    1927        1166 :     if (GinIsVoidRes(scan))
    1928           8 :         return 0;
    1929             : 
    1930        1158 :     ntids = 0;
    1931             : 
    1932             :     /*
    1933             :      * First, scan the pending list and collect any matching entries into the
    1934             :      * bitmap.  After we scan a pending item, some other backend could post it
    1935             :      * into the main index, and so we might visit it a second time during the
    1936             :      * main scan.  This is okay because we'll just re-set the same bit in the
    1937             :      * bitmap.  (The possibility of duplicate visits is a major reason why GIN
    1938             :      * can't support the amgettuple API, however.) Note that it would not do
    1939             :      * to scan the main index before the pending list, since concurrent
    1940             :      * cleanup could then make us miss entries entirely.
    1941             :      */
    1942        1158 :     scanPendingInsert(scan, tbm, &ntids);
    1943             : 
    1944             :     /*
    1945             :      * Now scan the main index.
    1946             :      */
    1947        1158 :     startScan(scan);
    1948             : 
    1949        1158 :     ItemPointerSetMin(&iptr);
    1950             : 
    1951             :     for (;;)
    1952             :     {
    1953      484110 :         CHECK_FOR_INTERRUPTS();
    1954             : 
    1955      484110 :         if (!scanGetItem(scan, iptr, &iptr, &recheck))
    1956        1158 :             break;
    1957             : 
    1958      482952 :         if (ItemPointerIsLossyPage(&iptr))
    1959           0 :             tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
    1960             :         else
    1961      482952 :             tbm_add_tuples(tbm, &iptr, 1, recheck);
    1962      482952 :         ntids++;
    1963             :     }
    1964             : 
    1965        1158 :     return ntids;
    1966             : }

Generated by: LCOV version 1.13