LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginget.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 506 604 83.8 %
Date: 2021-12-09 03:08:47 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ginget.c
       4             :  *    fetch tuples from a GIN scan.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/ginget.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : #include "access/relscan.h"
      19             : #include "common/pg_prng.h"
      20             : #include "miscadmin.h"
      21             : #include "storage/predicate.h"
      22             : #include "utils/datum.h"
      23             : #include "utils/memutils.h"
      24             : #include "utils/rel.h"
      25             : 
      26             : /* GUC parameter */
      27             : int         GinFuzzySearchLimit = 0;
      28             : 
      29             : typedef struct pendingPosition
      30             : {
      31             :     Buffer      pendingBuffer;
      32             :     OffsetNumber firstOffset;
      33             :     OffsetNumber lastOffset;
      34             :     ItemPointerData item;
      35             :     bool       *hasMatchKey;
      36             : } pendingPosition;
      37             : 
      38             : 
      39             : /*
      40             :  * Goes to the next page if current offset is outside of bounds
      41             :  */
      42             : static bool
      43      271048 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
      44             : {
      45      271048 :     Page        page = BufferGetPage(stack->buffer);
      46             : 
      47      271048 :     if (stack->off > PageGetMaxOffsetNumber(page))
      48             :     {
      49             :         /*
      50             :          * We scanned the whole page, so we should take right page
      51             :          */
      52        2566 :         if (GinPageRightMost(page))
      53         284 :             return false;       /* no more pages */
      54             : 
      55        2282 :         stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
      56        2282 :         stack->blkno = BufferGetBlockNumber(stack->buffer);
      57        2282 :         stack->off = FirstOffsetNumber;
      58        2282 :         PredicateLockPage(btree->index, stack->blkno, snapshot);
      59             :     }
      60             : 
      61      270764 :     return true;
      62             : }
      63             : 
      64             : /*
      65             :  * Scan all pages of a posting tree and save all its heap ItemPointers
      66             :  * in scanEntry->matchBitmap
      67             :  */
      68             : static void
      69           8 : scanPostingTree(Relation index, GinScanEntry scanEntry,
      70             :                 BlockNumber rootPostingTree, Snapshot snapshot)
      71             : {
      72             :     GinBtreeData btree;
      73             :     GinBtreeStack *stack;
      74             :     Buffer      buffer;
      75             :     Page        page;
      76             : 
      77             :     /* Descend to the leftmost leaf page */
      78           8 :     stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
      79           8 :     buffer = stack->buffer;
      80             : 
      81           8 :     IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
      82             : 
      83           8 :     freeGinBtreeStack(stack);
      84             : 
      85             :     /*
      86             :      * Loop iterates through all leaf pages of posting tree
      87             :      */
      88             :     for (;;)
      89             :     {
      90          20 :         page = BufferGetPage(buffer);
      91          20 :         if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
      92             :         {
      93          20 :             int         n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
      94             : 
      95          20 :             scanEntry->predictNumberResult += n;
      96             :         }
      97             : 
      98          20 :         if (GinPageRightMost(page))
      99           8 :             break;              /* no more pages */
     100             : 
     101          12 :         buffer = ginStepRight(buffer, index, GIN_SHARE);
     102             :     }
     103             : 
     104           8 :     UnlockReleaseBuffer(buffer);
     105           8 : }
     106             : 
     107             : /*
     108             :  * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
     109             :  * match the search entry.  This supports three different match modes:
     110             :  *
     111             :  * 1. Partial-match support: scan from current point until the
     112             :  *    comparePartialFn says we're done.
     113             :  * 2. SEARCH_MODE_ALL: scan from current point (which should be first
     114             :  *    key for the current attnum) until we hit null items or end of attnum
     115             :  * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
     116             :  *    key for the current attnum) until we hit end of attnum
     117             :  *
     118             :  * Returns true if done, false if it's necessary to restart scan from scratch
     119             :  */
     120             : static bool
     121         440 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
     122             :                    GinScanEntry scanEntry, Snapshot snapshot)
     123             : {
     124             :     OffsetNumber attnum;
     125             :     Form_pg_attribute attr;
     126             : 
     127             :     /* Initialize empty bitmap result */
     128         440 :     scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
     129             : 
     130             :     /* Null query cannot partial-match anything */
     131         440 :     if (scanEntry->isPartialMatch &&
     132         248 :         scanEntry->queryCategory != GIN_CAT_NORM_KEY)
     133           0 :         return true;
     134             : 
     135             :     /* Locate tupdesc entry for key column (for attbyval/attlen data) */
     136         440 :     attnum = scanEntry->attnum;
     137         440 :     attr = TupleDescAttr(btree->ginstate->origTupdesc, attnum - 1);
     138             : 
     139             :     /*
     140             :      * Predicate lock entry leaf page, following pages will be locked by
     141             :      * moveRightIfItNeeded()
     142             :      */
     143         440 :     PredicateLockPage(btree->index, stack->buffer, snapshot);
     144             : 
     145             :     for (;;)
     146      270600 :     {
     147             :         Page        page;
     148             :         IndexTuple  itup;
     149             :         Datum       idatum;
     150             :         GinNullCategory icategory;
     151             : 
     152             :         /*
     153             :          * stack->off points to the interested entry, buffer is already locked
     154             :          */
     155      271040 :         if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     156         440 :             return true;
     157             : 
     158      270756 :         page = BufferGetPage(stack->buffer);
     159      270756 :         TestForOldSnapshot(snapshot, btree->index, page);
     160      270756 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     161             : 
     162             :         /*
     163             :          * If tuple stores another attribute then stop scan
     164             :          */
     165      270756 :         if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
     166           0 :             return true;
     167             : 
     168             :         /* Safe to fetch attribute value */
     169      270756 :         idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
     170             : 
     171             :         /*
     172             :          * Check for appropriate scan stop conditions
     173             :          */
     174      270756 :         if (scanEntry->isPartialMatch)
     175             :         {
     176             :             int32       cmp;
     177             : 
     178             :             /*
     179             :              * In partial match, stop scan at any null (including
     180             :              * placeholders); partial matches never match nulls
     181             :              */
     182        1176 :             if (icategory != GIN_CAT_NORM_KEY)
     183          10 :                 return true;
     184             : 
     185             :             /*----------
     186             :              * Check of partial match.
     187             :              * case cmp == 0 => match
     188             :              * case cmp > 0 => not match and finish scan
     189             :              * case cmp < 0 => not match and continue scan
     190             :              *----------
     191             :              */
     192        1166 :             cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
     193             :                                                   btree->ginstate->supportCollation[attnum - 1],
     194             :                                                   scanEntry->queryKey,
     195             :                                                   idatum,
     196             :                                                   UInt16GetDatum(scanEntry->strategy),
     197             :                                                   PointerGetDatum(scanEntry->extra_data)));
     198             : 
     199        1166 :             if (cmp > 0)
     200         126 :                 return true;
     201        1040 :             else if (cmp < 0)
     202             :             {
     203          60 :                 stack->off++;
     204          60 :                 continue;
     205             :             }
     206             :         }
     207      269580 :         else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
     208             :         {
     209             :             /*
     210             :              * In ALL mode, we are not interested in null items, so we can
     211             :              * stop if we get to a null-item placeholder (which will be the
     212             :              * last entry for a given attnum).  We do want to include NULL_KEY
     213             :              * and EMPTY_ITEM entries, though.
     214             :              */
     215      269580 :             if (icategory == GIN_CAT_NULL_ITEM)
     216          20 :                 return true;
     217             :         }
     218             : 
     219             :         /*
     220             :          * OK, we want to return the TIDs listed in this entry.
     221             :          */
     222      270540 :         if (GinIsPostingTree(itup))
     223             :         {
     224           8 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     225             : 
     226             :             /*
     227             :              * We should unlock current page (but not unpin) during tree scan
     228             :              * to prevent deadlock with vacuum processes.
     229             :              *
     230             :              * We save current entry value (idatum) to be able to re-find our
     231             :              * tuple after re-locking
     232             :              */
     233           8 :             if (icategory == GIN_CAT_NORM_KEY)
     234           8 :                 idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
     235             : 
     236           8 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     237             : 
     238             :             /*
     239             :              * Acquire predicate lock on the posting tree.  We already hold a
     240             :              * lock on the entry page, but insertions to the posting tree
     241             :              * don't check for conflicts on that level.
     242             :              */
     243           8 :             PredicateLockPage(btree->index, rootPostingTree, snapshot);
     244             : 
     245             :             /* Collect all the TIDs in this entry's posting tree */
     246           8 :             scanPostingTree(btree->index, scanEntry, rootPostingTree,
     247             :                             snapshot);
     248             : 
     249             :             /*
     250             :              * We lock again the entry page and while it was unlocked insert
     251             :              * might have occurred, so we need to re-find our position.
     252             :              */
     253           8 :             LockBuffer(stack->buffer, GIN_SHARE);
     254           8 :             page = BufferGetPage(stack->buffer);
     255           8 :             if (!GinPageIsLeaf(page))
     256             :             {
     257             :                 /*
     258             :                  * Root page becomes non-leaf while we unlock it. We will
     259             :                  * start again, this situation doesn't occur often - root can
     260             :                  * became a non-leaf only once per life of index.
     261             :                  */
     262           0 :                 return false;
     263             :             }
     264             : 
     265             :             /* Search forward to re-find idatum */
     266             :             for (;;)
     267             :             {
     268           8 :                 if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     269           0 :                     ereport(ERROR,
     270             :                             (errcode(ERRCODE_INTERNAL_ERROR),
     271             :                              errmsg("failed to re-find tuple within index \"%s\"",
     272             :                                     RelationGetRelationName(btree->index))));
     273             : 
     274           8 :                 page = BufferGetPage(stack->buffer);
     275           8 :                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     276             : 
     277           8 :                 if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
     278             :                 {
     279             :                     Datum       newDatum;
     280             :                     GinNullCategory newCategory;
     281             : 
     282           8 :                     newDatum = gintuple_get_key(btree->ginstate, itup,
     283             :                                                 &newCategory);
     284             : 
     285           8 :                     if (ginCompareEntries(btree->ginstate, attnum,
     286             :                                           newDatum, newCategory,
     287             :                                           idatum, icategory) == 0)
     288           8 :                         break;  /* Found! */
     289             :                 }
     290             : 
     291           0 :                 stack->off++;
     292             :             }
     293             : 
     294           8 :             if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
     295           0 :                 pfree(DatumGetPointer(idatum));
     296             :         }
     297             :         else
     298             :         {
     299             :             ItemPointer ipd;
     300             :             int         nipd;
     301             : 
     302      270532 :             ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
     303      270532 :             tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
     304      270532 :             scanEntry->predictNumberResult += GinGetNPosting(itup);
     305      270532 :             pfree(ipd);
     306             :         }
     307             : 
     308             :         /*
     309             :          * Done with this entry, go to the next
     310             :          */
     311      270540 :         stack->off++;
     312             :     }
     313             : }
     314             : 
     315             : /*
     316             :  * Start* functions setup beginning state of searches: finds correct buffer and pins it.
     317             :  */
     318             : static void
     319        3108 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
     320             : {
     321             :     GinBtreeData btreeEntry;
     322             :     GinBtreeStack *stackEntry;
     323             :     Page        page;
     324             :     bool        needUnlock;
     325             : 
     326        3108 : restartScanEntry:
     327        3108 :     entry->buffer = InvalidBuffer;
     328        3108 :     ItemPointerSetMin(&entry->curItem);
     329        3108 :     entry->offset = InvalidOffsetNumber;
     330        3108 :     if (entry->list)
     331           0 :         pfree(entry->list);
     332        3108 :     entry->list = NULL;
     333        3108 :     entry->nlist = 0;
     334        3108 :     entry->matchBitmap = NULL;
     335        3108 :     entry->matchResult = NULL;
     336        3108 :     entry->reduceResult = false;
     337        3108 :     entry->predictNumberResult = 0;
     338             : 
     339             :     /*
     340             :      * we should find entry, and begin scan of posting tree or just store
     341             :      * posting list in memory
     342             :      */
     343        3108 :     ginPrepareEntryScan(&btreeEntry, entry->attnum,
     344        3108 :                         entry->queryKey, entry->queryCategory,
     345             :                         ginstate);
     346        3108 :     stackEntry = ginFindLeafPage(&btreeEntry, true, false, snapshot);
     347        3108 :     page = BufferGetPage(stackEntry->buffer);
     348             : 
     349             :     /* ginFindLeafPage() will have already checked snapshot age. */
     350        3108 :     needUnlock = true;
     351             : 
     352        3108 :     entry->isFinished = true;
     353             : 
     354        3108 :     if (entry->isPartialMatch ||
     355        2860 :         entry->queryCategory == GIN_CAT_EMPTY_QUERY)
     356             :     {
     357             :         /*
     358             :          * btreeEntry.findItem locates the first item >= given search key.
     359             :          * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
     360             :          * because of the way the GIN_CAT_EMPTY_QUERY category code is
     361             :          * assigned.)  We scan forward from there and collect all TIDs needed
     362             :          * for the entry type.
     363             :          */
     364         440 :         btreeEntry.findItem(&btreeEntry, stackEntry);
     365         440 :         if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
     366         440 :             == false)
     367             :         {
     368             :             /*
     369             :              * GIN tree was seriously restructured, so we will cleanup all
     370             :              * found data and rescan. See comments near 'return false' in
     371             :              * collectMatchBitmap()
     372             :              */
     373           0 :             if (entry->matchBitmap)
     374             :             {
     375           0 :                 if (entry->matchIterator)
     376           0 :                     tbm_end_iterate(entry->matchIterator);
     377           0 :                 entry->matchIterator = NULL;
     378           0 :                 tbm_free(entry->matchBitmap);
     379           0 :                 entry->matchBitmap = NULL;
     380             :             }
     381           0 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     382           0 :             freeGinBtreeStack(stackEntry);
     383           0 :             goto restartScanEntry;
     384             :         }
     385             : 
     386         440 :         if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
     387             :         {
     388         362 :             entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
     389         362 :             entry->isFinished = false;
     390             :         }
     391             :     }
     392        2668 :     else if (btreeEntry.findItem(&btreeEntry, stackEntry))
     393             :     {
     394        1770 :         IndexTuple  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
     395             : 
     396        1770 :         if (GinIsPostingTree(itup))
     397             :         {
     398          50 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     399             :             GinBtreeStack *stack;
     400             :             Page        page;
     401             :             ItemPointerData minItem;
     402             : 
     403             :             /*
     404             :              * This is an equality scan, so lock the root of the posting tree.
     405             :              * It represents a lock on the exact key value, and covers all the
     406             :              * items in the posting tree.
     407             :              */
     408          50 :             PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
     409             : 
     410             :             /*
     411             :              * We should unlock entry page before touching posting tree to
     412             :              * prevent deadlocks with vacuum processes. Because entry is never
     413             :              * deleted from page and posting tree is never reduced to the
     414             :              * posting list, we can unlock page after getting BlockNumber of
     415             :              * root of posting tree.
     416             :              */
     417          50 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     418          50 :             needUnlock = false;
     419             : 
     420          50 :             stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
     421             :                                             rootPostingTree, snapshot);
     422          50 :             entry->buffer = stack->buffer;
     423             : 
     424             :             /*
     425             :              * We keep buffer pinned because we need to prevent deletion of
     426             :              * page during scan. See GIN's vacuum implementation. RefCount is
     427             :              * increased to keep buffer pinned after freeGinBtreeStack() call.
     428             :              */
     429          50 :             IncrBufferRefCount(entry->buffer);
     430             : 
     431          50 :             page = BufferGetPage(entry->buffer);
     432             : 
     433             :             /*
     434             :              * Load the first page into memory.
     435             :              */
     436          50 :             ItemPointerSetMin(&minItem);
     437          50 :             entry->list = GinDataLeafPageGetItems(page, &entry->nlist, minItem);
     438             : 
     439          50 :             entry->predictNumberResult = stack->predictNumber * entry->nlist;
     440             : 
     441          50 :             LockBuffer(entry->buffer, GIN_UNLOCK);
     442          50 :             freeGinBtreeStack(stack);
     443          50 :             entry->isFinished = false;
     444             :         }
     445             :         else
     446             :         {
     447             :             /*
     448             :              * Lock the entry leaf page.  This is more coarse-grained than
     449             :              * necessary, because it will conflict with any insertions that
     450             :              * land on the same leaf page, not only the exact key we searched
     451             :              * for.  But locking an individual tuple would require updating
     452             :              * that lock whenever it moves because of insertions or vacuums,
     453             :              * which seems too complicated.
     454             :              */
     455        1720 :             PredicateLockPage(ginstate->index,
     456             :                               BufferGetBlockNumber(stackEntry->buffer),
     457             :                               snapshot);
     458        1720 :             if (GinGetNPosting(itup) > 0)
     459             :             {
     460        1716 :                 entry->list = ginReadTuple(ginstate, entry->attnum, itup,
     461             :                                            &entry->nlist);
     462        1716 :                 entry->predictNumberResult = entry->nlist;
     463             : 
     464        1716 :                 entry->isFinished = false;
     465             :             }
     466             :         }
     467             :     }
     468             :     else
     469             :     {
     470             :         /*
     471             :          * No entry found.  Predicate lock the leaf page, to lock the place
     472             :          * where the entry would've been, had there been one.
     473             :          */
     474         898 :         PredicateLockPage(ginstate->index,
     475             :                           BufferGetBlockNumber(stackEntry->buffer), snapshot);
     476             :     }
     477             : 
     478        3108 :     if (needUnlock)
     479        3058 :         LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     480        3108 :     freeGinBtreeStack(stackEntry);
     481        3108 : }
     482             : 
     483             : /*
     484             :  * Comparison function for scan entry indexes. Sorts by predictNumberResult,
     485             :  * least frequent items first.
     486             :  */
     487             : static int
     488        3210 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
     489             : {
     490        3210 :     const GinScanKey key = (const GinScanKey) arg;
     491        3210 :     int         i1 = *(const int *) a1;
     492        3210 :     int         i2 = *(const int *) a2;
     493        3210 :     uint32      n1 = key->scanEntry[i1]->predictNumberResult;
     494        3210 :     uint32      n2 = key->scanEntry[i2]->predictNumberResult;
     495             : 
     496        3210 :     if (n1 < n2)
     497         550 :         return -1;
     498        2660 :     else if (n1 == n2)
     499        1080 :         return 0;
     500             :     else
     501        1580 :         return 1;
     502             : }
     503             : 
     504             : static void
     505        1314 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
     506             : {
     507        1314 :     MemoryContext oldCtx = CurrentMemoryContext;
     508             :     int         i;
     509             :     int         j;
     510             :     int        *entryIndexes;
     511             : 
     512        1314 :     ItemPointerSetMin(&key->curItem);
     513        1314 :     key->curItemMatches = false;
     514        1314 :     key->recheckCurItem = false;
     515        1314 :     key->isFinished = false;
     516             : 
     517             :     /*
     518             :      * Divide the entries into two distinct sets: required and additional.
     519             :      * Additional entries are not enough for a match alone, without any items
     520             :      * from the required set, but are needed by the consistent function to
     521             :      * decide if an item matches. When scanning, we can skip over items from
     522             :      * additional entries that have no corresponding matches in any of the
     523             :      * required entries. That speeds up queries like "frequent & rare"
     524             :      * considerably, if the frequent term can be put in the additional set.
     525             :      *
     526             :      * There can be many legal ways to divide them entries into these two
     527             :      * sets. A conservative division is to just put everything in the required
     528             :      * set, but the more you can put in the additional set, the more you can
     529             :      * skip during the scan. To maximize skipping, we try to put as many
     530             :      * frequent items as possible into additional, and less frequent ones into
     531             :      * required. To do that, sort the entries by frequency
     532             :      * (predictNumberResult), and put entries into the required set in that
     533             :      * order, until the consistent function says that none of the remaining
     534             :      * entries can form a match, without any items from the required set. The
     535             :      * rest go to the additional set.
     536             :      *
     537             :      * Exclude-only scan keys are known to have no required entries.
     538             :      */
     539        1314 :     if (key->excludeOnly)
     540             :     {
     541          28 :         MemoryContextSwitchTo(so->keyCtx);
     542             : 
     543          28 :         key->nrequired = 0;
     544          28 :         key->nadditional = key->nentries;
     545          28 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     546          32 :         for (i = 0; i < key->nadditional; i++)
     547           4 :             key->additionalEntries[i] = key->scanEntry[i];
     548             :     }
     549        1286 :     else if (key->nentries > 1)
     550             :     {
     551         416 :         MemoryContextSwitchTo(so->tempCtx);
     552             : 
     553         416 :         entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
     554        2650 :         for (i = 0; i < key->nentries; i++)
     555        2234 :             entryIndexes[i] = i;
     556         416 :         qsort_arg(entryIndexes, key->nentries, sizeof(int),
     557             :                   entryIndexByFrequencyCmp, key);
     558             : 
     559        1610 :         for (i = 0; i < key->nentries - 1; i++)
     560             :         {
     561             :             /* Pass all entries <= i as FALSE, and the rest as MAYBE */
     562       69100 :             for (j = 0; j <= i; j++)
     563       67586 :                 key->entryRes[entryIndexes[j]] = GIN_FALSE;
     564       69884 :             for (j = i + 1; j < key->nentries; j++)
     565       68370 :                 key->entryRes[entryIndexes[j]] = GIN_MAYBE;
     566             : 
     567        1514 :             if (key->triConsistentFn(key) == GIN_FALSE)
     568         320 :                 break;
     569             :         }
     570             :         /* i is now the last required entry. */
     571             : 
     572         416 :         MemoryContextSwitchTo(so->keyCtx);
     573             : 
     574         416 :         key->nrequired = i + 1;
     575         416 :         key->nadditional = key->nentries - key->nrequired;
     576         416 :         key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
     577         416 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     578             : 
     579         416 :         j = 0;
     580        2026 :         for (i = 0; i < key->nrequired; i++)
     581        1610 :             key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
     582        1040 :         for (i = 0; i < key->nadditional; i++)
     583         624 :             key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
     584             : 
     585             :         /* clean up after consistentFn calls (also frees entryIndexes) */
     586         416 :         MemoryContextReset(so->tempCtx);
     587             :     }
     588             :     else
     589             :     {
     590         870 :         MemoryContextSwitchTo(so->keyCtx);
     591             : 
     592         870 :         key->nrequired = 1;
     593         870 :         key->nadditional = 0;
     594         870 :         key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
     595         870 :         key->requiredEntries[0] = key->scanEntry[0];
     596             :     }
     597        1314 :     MemoryContextSwitchTo(oldCtx);
     598        1314 : }
     599             : 
     600             : static void
     601        1226 : startScan(IndexScanDesc scan)
     602             : {
     603        1226 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
     604        1226 :     GinState   *ginstate = &so->ginstate;
     605             :     uint32      i;
     606             : 
     607        4334 :     for (i = 0; i < so->totalentries; i++)
     608        3108 :         startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
     609             : 
     610        1226 :     if (GinFuzzySearchLimit > 0)
     611             :     {
     612             :         /*
     613             :          * If all of keys more than threshold we will try to reduce result, we
     614             :          * hope (and only hope, for intersection operation of array our
     615             :          * supposition isn't true), that total result will not more than
     616             :          * minimal predictNumberResult.
     617             :          */
     618           4 :         bool        reduce = true;
     619             : 
     620           8 :         for (i = 0; i < so->totalentries; i++)
     621             :         {
     622           4 :             if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
     623             :             {
     624           0 :                 reduce = false;
     625           0 :                 break;
     626             :             }
     627             :         }
     628           4 :         if (reduce)
     629             :         {
     630           8 :             for (i = 0; i < so->totalentries; i++)
     631             :             {
     632           4 :                 so->entries[i]->predictNumberResult /= so->totalentries;
     633           4 :                 so->entries[i]->reduceResult = true;
     634             :             }
     635             :         }
     636             :     }
     637             : 
     638             :     /*
     639             :      * Now that we have the estimates for the entry frequencies, finish
     640             :      * initializing the scan keys.
     641             :      */
     642        2540 :     for (i = 0; i < so->nkeys; i++)
     643        1314 :         startScanKey(ginstate, so, so->keys + i);
     644        1226 : }
     645             : 
     646             : /*
     647             :  * Load the next batch of item pointers from a posting tree.
     648             :  *
     649             :  * Note that we copy the page into GinScanEntry->list array and unlock it, but
     650             :  * keep it pinned to prevent interference with vacuum.
     651             :  */
     652             : static void
     653          74 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
     654             :                    ItemPointerData advancePast, Snapshot snapshot)
     655             : {
     656             :     Page        page;
     657             :     int         i;
     658             :     bool        stepright;
     659             : 
     660          74 :     if (!BufferIsValid(entry->buffer))
     661             :     {
     662           0 :         entry->isFinished = true;
     663           0 :         return;
     664             :     }
     665             : 
     666             :     /*
     667             :      * We have two strategies for finding the correct page: step right from
     668             :      * the current page, or descend the tree again from the root. If
     669             :      * advancePast equals the current item, the next matching item should be
     670             :      * on the next page, so we step right. Otherwise, descend from root.
     671             :      */
     672          74 :     if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
     673             :     {
     674          70 :         stepright = true;
     675          70 :         LockBuffer(entry->buffer, GIN_SHARE);
     676             :     }
     677             :     else
     678             :     {
     679             :         GinBtreeStack *stack;
     680             : 
     681           4 :         ReleaseBuffer(entry->buffer);
     682             : 
     683             :         /*
     684             :          * Set the search key, and find the correct leaf page.
     685             :          */
     686           4 :         if (ItemPointerIsLossyPage(&advancePast))
     687             :         {
     688           0 :             ItemPointerSet(&entry->btree.itemptr,
     689             :                            GinItemPointerGetBlockNumber(&advancePast) + 1,
     690             :                            FirstOffsetNumber);
     691             :         }
     692             :         else
     693             :         {
     694           4 :             ItemPointerSet(&entry->btree.itemptr,
     695             :                            GinItemPointerGetBlockNumber(&advancePast),
     696             :                            OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
     697             :         }
     698           4 :         entry->btree.fullScan = false;
     699           4 :         stack = ginFindLeafPage(&entry->btree, true, false, snapshot);
     700             : 
     701             :         /* we don't need the stack, just the buffer. */
     702           4 :         entry->buffer = stack->buffer;
     703           4 :         IncrBufferRefCount(entry->buffer);
     704           4 :         freeGinBtreeStack(stack);
     705           4 :         stepright = false;
     706             :     }
     707             : 
     708          74 :     elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
     709             :          GinItemPointerGetBlockNumber(&advancePast),
     710             :          GinItemPointerGetOffsetNumber(&advancePast),
     711             :          !stepright);
     712             : 
     713          74 :     page = BufferGetPage(entry->buffer);
     714             :     for (;;)
     715             :     {
     716          78 :         entry->offset = InvalidOffsetNumber;
     717          78 :         if (entry->list)
     718             :         {
     719          70 :             pfree(entry->list);
     720          70 :             entry->list = NULL;
     721          70 :             entry->nlist = 0;
     722             :         }
     723             : 
     724          78 :         if (stepright)
     725             :         {
     726             :             /*
     727             :              * We've processed all the entries on this page. If it was the
     728             :              * last page in the tree, we're done.
     729             :              */
     730          74 :             if (GinPageRightMost(page))
     731             :             {
     732           4 :                 UnlockReleaseBuffer(entry->buffer);
     733           4 :                 entry->buffer = InvalidBuffer;
     734           4 :                 entry->isFinished = true;
     735           4 :                 return;
     736             :             }
     737             : 
     738             :             /*
     739             :              * Step to next page, following the right link. then find the
     740             :              * first ItemPointer greater than advancePast.
     741             :              */
     742          70 :             entry->buffer = ginStepRight(entry->buffer,
     743             :                                          ginstate->index,
     744             :                                          GIN_SHARE);
     745          70 :             page = BufferGetPage(entry->buffer);
     746             :         }
     747          74 :         stepright = true;
     748             : 
     749          74 :         if (GinPageGetOpaque(page)->flags & GIN_DELETED)
     750           0 :             continue;           /* page was deleted by concurrent vacuum */
     751             : 
     752             :         /*
     753             :          * The first item > advancePast might not be on this page, but
     754             :          * somewhere to the right, if the page was split, or a non-match from
     755             :          * another key in the query allowed us to skip some items from this
     756             :          * entry. Keep following the right-links until we re-find the correct
     757             :          * page.
     758             :          */
     759          98 :         if (!GinPageRightMost(page) &&
     760          24 :             ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
     761             :         {
     762             :             /*
     763             :              * the item we're looking is > the right bound of the page, so it
     764             :              * can't be on this page.
     765             :              */
     766           0 :             continue;
     767             :         }
     768             : 
     769          74 :         entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
     770             : 
     771        1290 :         for (i = 0; i < entry->nlist; i++)
     772             :         {
     773        1286 :             if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
     774             :             {
     775          70 :                 entry->offset = i;
     776             : 
     777          70 :                 if (GinPageRightMost(page))
     778             :                 {
     779             :                     /* after processing the copied items, we're done. */
     780          46 :                     UnlockReleaseBuffer(entry->buffer);
     781          46 :                     entry->buffer = InvalidBuffer;
     782             :                 }
     783             :                 else
     784          24 :                     LockBuffer(entry->buffer, GIN_UNLOCK);
     785          70 :                 return;
     786             :             }
     787             :         }
     788             :     }
     789             : }
     790             : 
     791             : #define gin_rand() pg_prng_double(&pg_global_prng_state)
     792             : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
     793             : 
     794             : /*
     795             :  * Sets entry->curItem to next heap item pointer > advancePast, for one entry
     796             :  * of one scan key, or sets entry->isFinished to true if there are no more.
     797             :  *
     798             :  * Item pointers are returned in ascending order.
     799             :  *
     800             :  * Note: this can return a "lossy page" item pointer, indicating that the
     801             :  * entry potentially matches all items on that heap page.  However, it is
     802             :  * not allowed to return both a lossy page pointer and exact (regular)
     803             :  * item pointers for the same page.  (Doing so would break the key-combination
     804             :  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
     805             :  * current implementation this is guaranteed by the behavior of tidbitmaps.
     806             :  */
     807             : static void
     808      823802 : entryGetItem(GinState *ginstate, GinScanEntry entry,
     809             :              ItemPointerData advancePast, Snapshot snapshot)
     810             : {
     811             :     Assert(!entry->isFinished);
     812             : 
     813             :     Assert(!ItemPointerIsValid(&entry->curItem) ||
     814             :            ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
     815             : 
     816      823802 :     if (entry->matchBitmap)
     817             :     {
     818             :         /* A bitmap result */
     819      187782 :         BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
     820      187782 :         OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
     821             : 
     822             :         for (;;)
     823             :         {
     824             :             /*
     825             :              * If we've exhausted all items on this block, move to next block
     826             :              * in the bitmap.
     827             :              */
     828      191130 :             while (entry->matchResult == NULL ||
     829      190768 :                    (entry->matchResult->ntuples >= 0 &&
     830      190768 :                     entry->offset >= entry->matchResult->ntuples) ||
     831      187420 :                    entry->matchResult->blockno < advancePastBlk ||
     832      187420 :                    (ItemPointerIsLossyPage(&advancePast) &&
     833           0 :                     entry->matchResult->blockno == advancePastBlk))
     834             :             {
     835        3710 :                 entry->matchResult = tbm_iterate(entry->matchIterator);
     836             : 
     837        3710 :                 if (entry->matchResult == NULL)
     838             :                 {
     839         362 :                     ItemPointerSetInvalid(&entry->curItem);
     840         362 :                     tbm_end_iterate(entry->matchIterator);
     841         362 :                     entry->matchIterator = NULL;
     842         362 :                     entry->isFinished = true;
     843         362 :                     break;
     844             :                 }
     845             : 
     846             :                 /*
     847             :                  * Reset counter to the beginning of entry->matchResult. Note:
     848             :                  * entry->offset is still greater than matchResult->ntuples if
     849             :                  * matchResult is lossy.  So, on next call we will get next
     850             :                  * result from TIDBitmap.
     851             :                  */
     852        3348 :                 entry->offset = 0;
     853             :             }
     854      187782 :             if (entry->isFinished)
     855         362 :                 break;
     856             : 
     857             :             /*
     858             :              * We're now on the first page after advancePast which has any
     859             :              * items on it. If it's a lossy result, return that.
     860             :              */
     861      187420 :             if (entry->matchResult->ntuples < 0)
     862             :             {
     863           0 :                 ItemPointerSetLossyPage(&entry->curItem,
     864             :                                         entry->matchResult->blockno);
     865             : 
     866             :                 /*
     867             :                  * We might as well fall out of the loop; we could not
     868             :                  * estimate number of results on this page to support correct
     869             :                  * reducing of result even if it's enabled.
     870             :                  */
     871           0 :                 break;
     872             :             }
     873             : 
     874             :             /*
     875             :              * Not a lossy page. Skip over any offsets <= advancePast, and
     876             :              * return that.
     877             :              */
     878      187420 :             if (entry->matchResult->blockno == advancePastBlk)
     879             :             {
     880             :                 /*
     881             :                  * First, do a quick check against the last offset on the
     882             :                  * page. If that's > advancePast, so are all the other
     883             :                  * offsets, so just go back to the top to get the next page.
     884             :                  */
     885      184434 :                 if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
     886             :                 {
     887           0 :                     entry->offset = entry->matchResult->ntuples;
     888           0 :                     continue;
     889             :                 }
     890             : 
     891             :                 /* Otherwise scan to find the first item > advancePast */
     892      184434 :                 while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
     893           0 :                     entry->offset++;
     894             :             }
     895             : 
     896      187420 :             ItemPointerSet(&entry->curItem,
     897             :                            entry->matchResult->blockno,
     898             :                            entry->matchResult->offsets[entry->offset]);
     899      187420 :             entry->offset++;
     900             : 
     901             :             /* Done unless we need to reduce the result */
     902      187420 :             if (!entry->reduceResult || !dropItem(entry))
     903             :                 break;
     904             :         }
     905             :     }
     906      636020 :     else if (!BufferIsValid(entry->buffer))
     907             :     {
     908             :         /*
     909             :          * A posting list from an entry tuple, or the last page of a posting
     910             :          * tree.
     911             :          */
     912             :         for (;;)
     913             :         {
     914      303818 :             if (entry->offset >= entry->nlist)
     915             :             {
     916        1322 :                 ItemPointerSetInvalid(&entry->curItem);
     917        1322 :                 entry->isFinished = true;
     918        1322 :                 break;
     919             :             }
     920             : 
     921      302496 :             entry->curItem = entry->list[entry->offset++];
     922             : 
     923             :             /* If we're not past advancePast, keep scanning */
     924      302496 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     925       53694 :                 continue;
     926             : 
     927             :             /* Done unless we need to reduce the result */
     928      248802 :             if (!entry->reduceResult || !dropItem(entry))
     929             :                 break;
     930             :         }
     931             :     }
     932             :     else
     933             :     {
     934             :         /* A posting tree */
     935             :         for (;;)
     936             :         {
     937             :             /* If we've processed the current batch, load more items */
     938      510328 :             while (entry->offset >= entry->nlist)
     939             :             {
     940          74 :                 entryLoadMoreItems(ginstate, entry, advancePast, snapshot);
     941             : 
     942          74 :                 if (entry->isFinished)
     943             :                 {
     944           4 :                     ItemPointerSetInvalid(&entry->curItem);
     945           4 :                     return;
     946             :                 }
     947             :             }
     948             : 
     949      510254 :             entry->curItem = entry->list[entry->offset++];
     950             : 
     951             :             /* If we're not past advancePast, keep scanning */
     952      510254 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     953       31404 :                 continue;
     954             : 
     955             :             /* Done unless we need to reduce the result */
     956      478850 :             if (!entry->reduceResult || !dropItem(entry))
     957             :                 break;
     958             : 
     959             :             /*
     960             :              * Advance advancePast (so that entryLoadMoreItems will load the
     961             :              * right data), and keep scanning
     962             :              */
     963       79296 :             advancePast = entry->curItem;
     964             :         }
     965             :     }
     966             : }
     967             : 
     968             : /*
     969             :  * Identify the "current" item among the input entry streams for this scan key
     970             :  * that is greater than advancePast, and test whether it passes the scan key
     971             :  * qual condition.
     972             :  *
     973             :  * The current item is the smallest curItem among the inputs.  key->curItem
     974             :  * is set to that value.  key->curItemMatches is set to indicate whether that
     975             :  * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
     976             :  * iff recheck is needed for this item pointer (including the case where the
     977             :  * item pointer is a lossy page pointer).
     978             :  *
     979             :  * If all entry streams are exhausted, sets key->isFinished to true.
     980             :  *
     981             :  * Item pointers must be returned in ascending order.
     982             :  *
     983             :  * Note: this can return a "lossy page" item pointer, indicating that the
     984             :  * key potentially matches all items on that heap page.  However, it is
     985             :  * not allowed to return both a lossy page pointer and exact (regular)
     986             :  * item pointers for the same page.  (Doing so would break the key-combination
     987             :  * logic in scanGetItem.)
     988             :  */
     989             : static void
     990      748368 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
     991             :            ItemPointerData advancePast, Snapshot snapshot)
     992             : {
     993             :     ItemPointerData minItem;
     994             :     ItemPointerData curPageLossy;
     995             :     uint32      i;
     996             :     bool        haveLossyEntry;
     997             :     GinScanEntry entry;
     998             :     GinTernaryValue res;
     999             :     MemoryContext oldCtx;
    1000             :     bool        allFinished;
    1001             : 
    1002             :     Assert(!key->isFinished);
    1003             : 
    1004             :     /*
    1005             :      * We might have already tested this item; if so, no need to repeat work.
    1006             :      * (Note: the ">" case can happen, if advancePast is exact but we
    1007             :      * previously had to set curItem to a lossy-page pointer.)
    1008             :      */
    1009      748368 :     if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
    1010        1244 :         return;
    1011             : 
    1012             :     /*
    1013             :      * Find the minimum item > advancePast among the active entry streams.
    1014             :      *
    1015             :      * Note: a lossy-page entry is encoded by a ItemPointer with max value for
    1016             :      * offset (0xffff), so that it will sort after any exact entries for the
    1017             :      * same page.  So we'll prefer to return exact pointers not lossy
    1018             :      * pointers, which is good.
    1019             :      */
    1020      748350 :     ItemPointerSetMax(&minItem);
    1021      748350 :     allFinished = true;
    1022     2190566 :     for (i = 0; i < key->nrequired; i++)
    1023             :     {
    1024     1442216 :         entry = key->requiredEntries[i];
    1025             : 
    1026     1442216 :         if (entry->isFinished)
    1027      523492 :             continue;
    1028             : 
    1029             :         /*
    1030             :          * Advance this stream if necessary.
    1031             :          *
    1032             :          * In particular, since entry->curItem was initialized with
    1033             :          * ItemPointerSetMin, this ensures we fetch the first item for each
    1034             :          * entry on the first call.
    1035             :          */
    1036      918724 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1037             :         {
    1038      773306 :             entryGetItem(ginstate, entry, advancePast, snapshot);
    1039      773306 :             if (entry->isFinished)
    1040        1514 :                 continue;
    1041             :         }
    1042             : 
    1043      917210 :         allFinished = false;
    1044      917210 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1045      835288 :             minItem = entry->curItem;
    1046             :     }
    1047             : 
    1048      748350 :     if (allFinished && !key->excludeOnly)
    1049             :     {
    1050             :         /* all entries are finished */
    1051        1226 :         key->isFinished = true;
    1052        1226 :         return;
    1053             :     }
    1054             : 
    1055      747124 :     if (!key->excludeOnly)
    1056             :     {
    1057             :         /*
    1058             :          * For a normal scan key, we now know there are no matches < minItem.
    1059             :          *
    1060             :          * If minItem is lossy, it means that there were no exact items on the
    1061             :          * page among requiredEntries, because lossy pointers sort after exact
    1062             :          * items. However, there might be exact items for the same page among
    1063             :          * additionalEntries, so we mustn't advance past them.
    1064             :          */
    1065      742802 :         if (ItemPointerIsLossyPage(&minItem))
    1066             :         {
    1067           0 :             if (GinItemPointerGetBlockNumber(&advancePast) <
    1068           0 :                 GinItemPointerGetBlockNumber(&minItem))
    1069             :             {
    1070           0 :                 ItemPointerSet(&advancePast,
    1071             :                                GinItemPointerGetBlockNumber(&minItem),
    1072             :                                InvalidOffsetNumber);
    1073             :             }
    1074             :         }
    1075             :         else
    1076             :         {
    1077             :             Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
    1078      742802 :             ItemPointerSet(&advancePast,
    1079             :                            GinItemPointerGetBlockNumber(&minItem),
    1080             :                            OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
    1081             :         }
    1082             :     }
    1083             :     else
    1084             :     {
    1085             :         /*
    1086             :          * excludeOnly scan keys don't have any entries that are necessarily
    1087             :          * present in matching items.  So, we consider the item just after
    1088             :          * advancePast.
    1089             :          */
    1090             :         Assert(key->nrequired == 0);
    1091        4322 :         ItemPointerSet(&minItem,
    1092             :                        GinItemPointerGetBlockNumber(&advancePast),
    1093             :                        OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
    1094             :     }
    1095             : 
    1096             :     /*
    1097             :      * We might not have loaded all the entry streams for this TID yet. We
    1098             :      * could call the consistent function, passing MAYBE for those entries, to
    1099             :      * see if it can decide if this TID matches based on the information we
    1100             :      * have. But if the consistent-function is expensive, and cannot in fact
    1101             :      * decide with partial information, that could be a big loss. So, load all
    1102             :      * the additional entries, before calling the consistent function.
    1103             :      */
    1104      808922 :     for (i = 0; i < key->nadditional; i++)
    1105             :     {
    1106       61798 :         entry = key->additionalEntries[i];
    1107             : 
    1108       61798 :         if (entry->isFinished)
    1109         416 :             continue;
    1110             : 
    1111       61382 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1112             :         {
    1113       50496 :             entryGetItem(ginstate, entry, advancePast, snapshot);
    1114       50496 :             if (entry->isFinished)
    1115         174 :                 continue;
    1116             :         }
    1117             : 
    1118             :         /*
    1119             :          * Normally, none of the items in additionalEntries can have a curItem
    1120             :          * larger than minItem. But if minItem is a lossy page, then there
    1121             :          * might be exact items on the same page among additionalEntries.
    1122             :          */
    1123       61208 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1124             :         {
    1125             :             Assert(ItemPointerIsLossyPage(&minItem));
    1126           0 :             minItem = entry->curItem;
    1127             :         }
    1128             :     }
    1129             : 
    1130             :     /*
    1131             :      * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
    1132             :      * and perform consistentFn test.
    1133             :      *
    1134             :      * Lossy-page entries pose a problem, since we don't know the correct
    1135             :      * entryRes state to pass to the consistentFn, and we also don't know what
    1136             :      * its combining logic will be (could be AND, OR, or even NOT). If the
    1137             :      * logic is OR then the consistentFn might succeed for all items in the
    1138             :      * lossy page even when none of the other entries match.
    1139             :      *
    1140             :      * Our strategy is to call the tri-state consistent function, with the
    1141             :      * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
    1142             :      * returns FALSE, none of the lossy items alone are enough for a match, so
    1143             :      * we don't need to return a lossy-page pointer. Otherwise, return a
    1144             :      * lossy-page pointer to indicate that the whole heap page must be
    1145             :      * checked.  (On subsequent calls, we'll do nothing until minItem is past
    1146             :      * the page altogether, thus ensuring that we never return both regular
    1147             :      * and lossy pointers for the same page.)
    1148             :      *
    1149             :      * An exception is that it doesn't matter what we pass for lossy pointers
    1150             :      * in "hidden" entries, because the consistentFn's result can't depend on
    1151             :      * them. We could pass them as MAYBE as well, but if we're using the
    1152             :      * "shim" implementation of a tri-state consistent function (see
    1153             :      * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
    1154             :      * them as true.
    1155             :      *
    1156             :      * Note that only lossy-page entries pointing to the current item's page
    1157             :      * should trigger this processing; we might have future lossy pages in the
    1158             :      * entry array, but they aren't relevant yet.
    1159             :      */
    1160      747124 :     key->curItem = minItem;
    1161      747124 :     ItemPointerSetLossyPage(&curPageLossy,
    1162             :                             GinItemPointerGetBlockNumber(&key->curItem));
    1163      747124 :     haveLossyEntry = false;
    1164     2248718 :     for (i = 0; i < key->nentries; i++)
    1165             :     {
    1166     1501594 :         entry = key->scanEntry[i];
    1167     2480012 :         if (entry->isFinished == false &&
    1168      978418 :             ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1169             :         {
    1170           0 :             if (i < key->nuserentries)
    1171           0 :                 key->entryRes[i] = GIN_MAYBE;
    1172             :             else
    1173           0 :                 key->entryRes[i] = GIN_TRUE;
    1174           0 :             haveLossyEntry = true;
    1175             :         }
    1176             :         else
    1177     1501594 :             key->entryRes[i] = GIN_FALSE;
    1178             :     }
    1179             : 
    1180             :     /* prepare for calling consistentFn in temp context */
    1181      747124 :     oldCtx = MemoryContextSwitchTo(tempCtx);
    1182             : 
    1183      747124 :     if (haveLossyEntry)
    1184             :     {
    1185             :         /* Have lossy-page entries, so see if whole page matches */
    1186           0 :         res = key->triConsistentFn(key);
    1187             : 
    1188           0 :         if (res == GIN_TRUE || res == GIN_MAYBE)
    1189             :         {
    1190             :             /* Yes, so clean up ... */
    1191           0 :             MemoryContextSwitchTo(oldCtx);
    1192           0 :             MemoryContextReset(tempCtx);
    1193             : 
    1194             :             /* and return lossy pointer for whole page */
    1195           0 :             key->curItem = curPageLossy;
    1196           0 :             key->curItemMatches = true;
    1197           0 :             key->recheckCurItem = true;
    1198           0 :             return;
    1199             :         }
    1200             :     }
    1201             : 
    1202             :     /*
    1203             :      * At this point we know that we don't need to return a lossy whole-page
    1204             :      * pointer, but we might have matches for individual exact item pointers,
    1205             :      * possibly in combination with a lossy pointer. Pass lossy pointers as
    1206             :      * MAYBE to the ternary consistent function, to let it decide if this
    1207             :      * tuple satisfies the overall key, even though we don't know if the lossy
    1208             :      * entries match.
    1209             :      *
    1210             :      * Prepare entryRes array to be passed to consistentFn.
    1211             :      */
    1212     2248718 :     for (i = 0; i < key->nentries; i++)
    1213             :     {
    1214     1501594 :         entry = key->scanEntry[i];
    1215     1501594 :         if (entry->isFinished)
    1216      523176 :             key->entryRes[i] = GIN_FALSE;
    1217             : #if 0
    1218             : 
    1219             :         /*
    1220             :          * This case can't currently happen, because we loaded all the entries
    1221             :          * for this item earlier.
    1222             :          */
    1223             :         else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1224             :             key->entryRes[i] = GIN_MAYBE;
    1225             : #endif
    1226      978418 :         else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1227           0 :             key->entryRes[i] = GIN_MAYBE;
    1228      978418 :         else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
    1229      813762 :             key->entryRes[i] = GIN_TRUE;
    1230             :         else
    1231      164656 :             key->entryRes[i] = GIN_FALSE;
    1232             :     }
    1233             : 
    1234      747124 :     res = key->triConsistentFn(key);
    1235             : 
    1236      747124 :     switch (res)
    1237             :     {
    1238      647000 :         case GIN_TRUE:
    1239      647000 :             key->curItemMatches = true;
    1240             :             /* triConsistentFn set recheckCurItem */
    1241      647000 :             break;
    1242             : 
    1243       15164 :         case GIN_FALSE:
    1244       15164 :             key->curItemMatches = false;
    1245       15164 :             break;
    1246             : 
    1247       84960 :         case GIN_MAYBE:
    1248       84960 :             key->curItemMatches = true;
    1249       84960 :             key->recheckCurItem = true;
    1250       84960 :             break;
    1251             : 
    1252           0 :         default:
    1253             : 
    1254             :             /*
    1255             :              * the 'default' case shouldn't happen, but if the consistent
    1256             :              * function returns something bogus, this is the safe result
    1257             :              */
    1258           0 :             key->curItemMatches = true;
    1259           0 :             key->recheckCurItem = true;
    1260           0 :             break;
    1261             :     }
    1262             : 
    1263             :     /*
    1264             :      * We have a tuple, and we know if it matches or not. If it's a non-match,
    1265             :      * we could continue to find the next matching tuple, but let's break out
    1266             :      * and give scanGetItem a chance to advance the other keys. They might be
    1267             :      * able to skip past to a much higher TID, allowing us to save work.
    1268             :      */
    1269             : 
    1270             :     /* clean up after consistentFn calls */
    1271      747124 :     MemoryContextSwitchTo(oldCtx);
    1272      747124 :     MemoryContextReset(tempCtx);
    1273             : }
    1274             : 
    1275             : /*
    1276             :  * Get next heap item pointer (after advancePast) from scan.
    1277             :  * Returns true if anything found.
    1278             :  * On success, *item and *recheck are set.
    1279             :  *
    1280             :  * Note: this is very nearly the same logic as in keyGetItem(), except
    1281             :  * that we know the keys are to be combined with AND logic, whereas in
    1282             :  * keyGetItem() the combination logic is known only to the consistentFn.
    1283             :  */
    1284             : static bool
    1285      728800 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
    1286             :             ItemPointerData *item, bool *recheck)
    1287             : {
    1288      728800 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1289             :     uint32      i;
    1290             :     bool        match;
    1291             : 
    1292             :     /*----------
    1293             :      * Advance the scan keys in lock-step, until we find an item that matches
    1294             :      * all the keys. If any key reports isFinished, meaning its subset of the
    1295             :      * entries is exhausted, we can stop.  Otherwise, set *item to the next
    1296             :      * matching item.
    1297             :      *
    1298             :      * This logic works only if a keyGetItem stream can never contain both
    1299             :      * exact and lossy pointers for the same page.  Else we could have a
    1300             :      * case like
    1301             :      *
    1302             :      *      stream 1        stream 2
    1303             :      *      ...             ...
    1304             :      *      42/6            42/7
    1305             :      *      50/1            42/0xffff
    1306             :      *      ...             ...
    1307             :      *
    1308             :      * We would conclude that 42/6 is not a match and advance stream 1,
    1309             :      * thus never detecting the match to the lossy pointer in stream 2.
    1310             :      * (keyGetItem has a similar problem versus entryGetItem.)
    1311             :      *----------
    1312             :      */
    1313             :     do
    1314             :     {
    1315      744006 :         ItemPointerSetMin(item);
    1316      744006 :         match = true;
    1317     1475984 :         for (i = 0; i < so->nkeys && match; i++)
    1318             :         {
    1319      748368 :             GinScanKey  key = so->keys + i;
    1320             : 
    1321             :             /*
    1322             :              * If we're considering a lossy page, skip excludeOnly keys,  They
    1323             :              * can't exclude the whole page anyway.
    1324             :              */
    1325      748368 :             if (ItemPointerIsLossyPage(item) && key->excludeOnly)
    1326             :             {
    1327             :                 /*
    1328             :                  * ginNewScanKey() should never mark the first key as
    1329             :                  * excludeOnly.
    1330             :                  */
    1331             :                 Assert(i > 0);
    1332           0 :                 continue;
    1333             :             }
    1334             : 
    1335             :             /* Fetch the next item for this key that is > advancePast. */
    1336      748368 :             keyGetItem(&so->ginstate, so->tempCtx, key, advancePast,
    1337             :                        scan->xs_snapshot);
    1338             : 
    1339      748368 :             if (key->isFinished)
    1340        1226 :                 return false;
    1341             : 
    1342             :             /*
    1343             :              * If it's not a match, we can immediately conclude that nothing
    1344             :              * <= this item matches, without checking the rest of the keys.
    1345             :              */
    1346      747142 :             if (!key->curItemMatches)
    1347             :             {
    1348       15164 :                 advancePast = key->curItem;
    1349       15164 :                 match = false;
    1350       15164 :                 break;
    1351             :             }
    1352             : 
    1353             :             /*
    1354             :              * It's a match. We can conclude that nothing < matches, so the
    1355             :              * other key streams can skip to this item.
    1356             :              *
    1357             :              * Beware of lossy pointers, though; from a lossy pointer, we can
    1358             :              * only conclude that nothing smaller than this *block* matches.
    1359             :              */
    1360      731978 :             if (ItemPointerIsLossyPage(&key->curItem))
    1361             :             {
    1362           0 :                 if (GinItemPointerGetBlockNumber(&advancePast) <
    1363           0 :                     GinItemPointerGetBlockNumber(&key->curItem))
    1364             :                 {
    1365           0 :                     ItemPointerSet(&advancePast,
    1366             :                                    GinItemPointerGetBlockNumber(&key->curItem),
    1367             :                                    InvalidOffsetNumber);
    1368             :                 }
    1369             :             }
    1370             :             else
    1371             :             {
    1372             :                 Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
    1373      731978 :                 ItemPointerSet(&advancePast,
    1374             :                                GinItemPointerGetBlockNumber(&key->curItem),
    1375             :                                OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
    1376             :             }
    1377             : 
    1378             :             /*
    1379             :              * If this is the first key, remember this location as a potential
    1380             :              * match, and proceed to check the rest of the keys.
    1381             :              *
    1382             :              * Otherwise, check if this is the same item that we checked the
    1383             :              * previous keys for (or a lossy pointer for the same page). If
    1384             :              * not, loop back to check the previous keys for this item (we
    1385             :              * will check this key again too, but keyGetItem returns quickly
    1386             :              * for that)
    1387             :              */
    1388      731978 :             if (i == 0)
    1389             :             {
    1390      727688 :                 *item = key->curItem;
    1391             :             }
    1392             :             else
    1393             :             {
    1394        4290 :                 if (ItemPointerIsLossyPage(&key->curItem) ||
    1395        4290 :                     ItemPointerIsLossyPage(item))
    1396             :                 {
    1397             :                     Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
    1398           0 :                     match = (GinItemPointerGetBlockNumber(&key->curItem) ==
    1399           0 :                              GinItemPointerGetBlockNumber(item));
    1400             :                 }
    1401             :                 else
    1402             :                 {
    1403             :                     Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
    1404        4290 :                     match = (ginCompareItemPointers(&key->curItem, item) == 0);
    1405             :                 }
    1406             :             }
    1407             :         }
    1408      742780 :     } while (!match);
    1409             : 
    1410             :     Assert(!ItemPointerIsMin(item));
    1411             : 
    1412             :     /*
    1413             :      * Now *item contains the first ItemPointer after previous result that
    1414             :      * satisfied all the keys for that exact TID, or a lossy reference to the
    1415             :      * same page.
    1416             :      *
    1417             :      * We must return recheck = true if any of the keys are marked recheck.
    1418             :      */
    1419      727574 :     *recheck = false;
    1420     1362640 :     for (i = 0; i < so->nkeys; i++)
    1421             :     {
    1422      727822 :         GinScanKey  key = so->keys + i;
    1423             : 
    1424      727822 :         if (key->recheckCurItem)
    1425             :         {
    1426       92756 :             *recheck = true;
    1427       92756 :             break;
    1428             :         }
    1429             :     }
    1430             : 
    1431      727574 :     return true;
    1432             : }
    1433             : 
    1434             : 
    1435             : /*
    1436             :  * Functions for scanning the pending list
    1437             :  */
    1438             : 
    1439             : 
    1440             : /*
    1441             :  * Get ItemPointer of next heap row to be checked from pending list.
    1442             :  * Returns false if there are no more. On pages with several heap rows
    1443             :  * it returns each row separately, on page with part of heap row returns
    1444             :  * per page data.  pos->firstOffset and pos->lastOffset are set to identify
    1445             :  * the range of pending-list tuples belonging to this heap row.
    1446             :  *
    1447             :  * The pendingBuffer is presumed pinned and share-locked on entry, and is
    1448             :  * pinned and share-locked on success exit.  On failure exit it's released.
    1449             :  */
    1450             : static bool
    1451        1080 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
    1452             : {
    1453             :     OffsetNumber maxoff;
    1454             :     Page        page;
    1455             :     IndexTuple  itup;
    1456             : 
    1457        1080 :     ItemPointerSetInvalid(&pos->item);
    1458             :     for (;;)
    1459             :     {
    1460        1080 :         page = BufferGetPage(pos->pendingBuffer);
    1461        1080 :         TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
    1462             : 
    1463        1080 :         maxoff = PageGetMaxOffsetNumber(page);
    1464        1080 :         if (pos->firstOffset > maxoff)
    1465             :         {
    1466         112 :             BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
    1467             : 
    1468         112 :             if (blkno == InvalidBlockNumber)
    1469             :             {
    1470         112 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1471         112 :                 pos->pendingBuffer = InvalidBuffer;
    1472             : 
    1473         112 :                 return false;
    1474             :             }
    1475             :             else
    1476             :             {
    1477             :                 /*
    1478             :                  * Here we must prevent deletion of next page by insertcleanup
    1479             :                  * process, which may be trying to obtain exclusive lock on
    1480             :                  * current page.  So, we lock next page before releasing the
    1481             :                  * current one
    1482             :                  */
    1483           0 :                 Buffer      tmpbuf = ReadBuffer(scan->indexRelation, blkno);
    1484             : 
    1485           0 :                 LockBuffer(tmpbuf, GIN_SHARE);
    1486           0 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1487             : 
    1488           0 :                 pos->pendingBuffer = tmpbuf;
    1489           0 :                 pos->firstOffset = FirstOffsetNumber;
    1490             :             }
    1491             :         }
    1492             :         else
    1493             :         {
    1494         968 :             itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
    1495         968 :             pos->item = itup->t_tid;
    1496         968 :             if (GinPageHasFullRow(page))
    1497             :             {
    1498             :                 /*
    1499             :                  * find itempointer to the next row
    1500             :                  */
    1501        2260 :                 for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
    1502             :                 {
    1503        2148 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
    1504        2148 :                     if (!ItemPointerEquals(&pos->item, &itup->t_tid))
    1505         856 :                         break;
    1506             :                 }
    1507             :             }
    1508             :             else
    1509             :             {
    1510             :                 /*
    1511             :                  * All itempointers are the same on this page
    1512             :                  */
    1513           0 :                 pos->lastOffset = maxoff + 1;
    1514             :             }
    1515             : 
    1516             :             /*
    1517             :              * Now pos->firstOffset points to the first tuple of current heap
    1518             :              * row, pos->lastOffset points to the first tuple of next heap row
    1519             :              * (or to the end of page)
    1520             :              */
    1521         968 :             break;
    1522             :         }
    1523             :     }
    1524             : 
    1525         968 :     return true;
    1526             : }
    1527             : 
    1528             : /*
    1529             :  * Scan pending-list page from current tuple (off) up till the first of:
    1530             :  * - match is found (then returns true)
    1531             :  * - no later match is possible
    1532             :  * - tuple's attribute number is not equal to entry's attrnum
    1533             :  * - reach end of page
    1534             :  *
    1535             :  * datum[]/category[]/datumExtracted[] arrays are used to cache the results
    1536             :  * of gintuple_get_key() on the current page.
    1537             :  */
    1538             : static bool
    1539           0 : matchPartialInPendingList(GinState *ginstate, Page page,
    1540             :                           OffsetNumber off, OffsetNumber maxoff,
    1541             :                           GinScanEntry entry,
    1542             :                           Datum *datum, GinNullCategory *category,
    1543             :                           bool *datumExtracted)
    1544             : {
    1545             :     IndexTuple  itup;
    1546             :     int32       cmp;
    1547             : 
    1548             :     /* Partial match to a null is not possible */
    1549           0 :     if (entry->queryCategory != GIN_CAT_NORM_KEY)
    1550           0 :         return false;
    1551             : 
    1552           0 :     while (off < maxoff)
    1553             :     {
    1554           0 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
    1555             : 
    1556           0 :         if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
    1557           0 :             return false;
    1558             : 
    1559           0 :         if (datumExtracted[off - 1] == false)
    1560             :         {
    1561           0 :             datum[off - 1] = gintuple_get_key(ginstate, itup,
    1562           0 :                                               &category[off - 1]);
    1563           0 :             datumExtracted[off - 1] = true;
    1564             :         }
    1565             : 
    1566             :         /* Once we hit nulls, no further match is possible */
    1567           0 :         if (category[off - 1] != GIN_CAT_NORM_KEY)
    1568           0 :             return false;
    1569             : 
    1570             :         /*----------
    1571             :          * Check partial match.
    1572             :          * case cmp == 0 => match
    1573             :          * case cmp > 0 => not match and end scan (no later match possible)
    1574             :          * case cmp < 0 => not match and continue scan
    1575             :          *----------
    1576             :          */
    1577           0 :         cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
    1578             :                                               ginstate->supportCollation[entry->attnum - 1],
    1579             :                                               entry->queryKey,
    1580             :                                               datum[off - 1],
    1581             :                                               UInt16GetDatum(entry->strategy),
    1582             :                                               PointerGetDatum(entry->extra_data)));
    1583           0 :         if (cmp == 0)
    1584           0 :             return true;
    1585           0 :         else if (cmp > 0)
    1586           0 :             return false;
    1587             : 
    1588           0 :         off++;
    1589             :     }
    1590             : 
    1591           0 :     return false;
    1592             : }
    1593             : 
    1594             : /*
    1595             :  * Set up the entryRes array for each key by looking at
    1596             :  * every entry for current heap row in pending list.
    1597             :  *
    1598             :  * Returns true if each scan key has at least one entryRes match.
    1599             :  * This corresponds to the situations where the normal index search will
    1600             :  * try to apply the key's consistentFn.  (A tuple not meeting that requirement
    1601             :  * cannot be returned by the normal search since no entry stream will
    1602             :  * source its TID.)
    1603             :  *
    1604             :  * The pendingBuffer is presumed pinned and share-locked on entry.
    1605             :  */
    1606             : static bool
    1607         968 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
    1608             : {
    1609         968 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1610             :     OffsetNumber attrnum;
    1611             :     Page        page;
    1612             :     IndexTuple  itup;
    1613             :     int         i,
    1614             :                 j;
    1615             : 
    1616             :     /*
    1617             :      * Reset all entryRes and hasMatchKey flags
    1618             :      */
    1619        2624 :     for (i = 0; i < so->nkeys; i++)
    1620             :     {
    1621        1656 :         GinScanKey  key = so->keys + i;
    1622             : 
    1623        1656 :         memset(key->entryRes, GIN_FALSE, key->nentries);
    1624             :     }
    1625         968 :     memset(pos->hasMatchKey, false, so->nkeys);
    1626             : 
    1627             :     /*
    1628             :      * Outer loop iterates over multiple pending-list pages when a single heap
    1629             :      * row has entries spanning those pages.
    1630             :      */
    1631             :     for (;;)
    1632           0 :     {
    1633             :         Datum       datum[BLCKSZ / sizeof(IndexTupleData)];
    1634             :         GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
    1635             :         bool        datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
    1636             : 
    1637             :         Assert(pos->lastOffset > pos->firstOffset);
    1638         968 :         memset(datumExtracted + pos->firstOffset - 1, 0,
    1639         968 :                sizeof(bool) * (pos->lastOffset - pos->firstOffset));
    1640             : 
    1641         968 :         page = BufferGetPage(pos->pendingBuffer);
    1642         968 :         TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
    1643             : 
    1644        2624 :         for (i = 0; i < so->nkeys; i++)
    1645             :         {
    1646        1656 :             GinScanKey  key = so->keys + i;
    1647             : 
    1648        3196 :             for (j = 0; j < key->nentries; j++)
    1649             :             {
    1650        1540 :                 GinScanEntry entry = key->scanEntry[j];
    1651        1540 :                 OffsetNumber StopLow = pos->firstOffset,
    1652        1540 :                             StopHigh = pos->lastOffset,
    1653             :                             StopMiddle;
    1654             : 
    1655             :                 /* If already matched on earlier page, do no extra work */
    1656        1540 :                 if (key->entryRes[j])
    1657           0 :                     continue;
    1658             : 
    1659             :                 /*
    1660             :                  * Interesting tuples are from pos->firstOffset to
    1661             :                  * pos->lastOffset and they are ordered by (attnum, Datum) as
    1662             :                  * it's done in entry tree.  So we can use binary search to
    1663             :                  * avoid linear scanning.
    1664             :                  */
    1665        3420 :                 while (StopLow < StopHigh)
    1666             :                 {
    1667             :                     int         res;
    1668             : 
    1669        2688 :                     StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    1670             : 
    1671        2688 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
    1672             : 
    1673        2688 :                     attrnum = gintuple_get_attrnum(&so->ginstate, itup);
    1674             : 
    1675        2688 :                     if (key->attnum < attrnum)
    1676             :                     {
    1677         532 :                         StopHigh = StopMiddle;
    1678         532 :                         continue;
    1679             :                     }
    1680        2156 :                     if (key->attnum > attrnum)
    1681             :                     {
    1682         440 :                         StopLow = StopMiddle + 1;
    1683         440 :                         continue;
    1684             :                     }
    1685             : 
    1686        1716 :                     if (datumExtracted[StopMiddle - 1] == false)
    1687             :                     {
    1688        3320 :                         datum[StopMiddle - 1] =
    1689        1660 :                             gintuple_get_key(&so->ginstate, itup,
    1690        1660 :                                              &category[StopMiddle - 1]);
    1691        1660 :                         datumExtracted[StopMiddle - 1] = true;
    1692             :                     }
    1693             : 
    1694        1716 :                     if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
    1695             :                     {
    1696             :                         /* special behavior depending on searchMode */
    1697         724 :                         if (entry->searchMode == GIN_SEARCH_MODE_ALL)
    1698             :                         {
    1699             :                             /* match anything except NULL_ITEM */
    1700         724 :                             if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
    1701         252 :                                 res = -1;
    1702             :                             else
    1703         472 :                                 res = 0;
    1704             :                         }
    1705             :                         else
    1706             :                         {
    1707             :                             /* match everything */
    1708           0 :                             res = 0;
    1709             :                         }
    1710             :                     }
    1711             :                     else
    1712             :                     {
    1713         992 :                         res = ginCompareEntries(&so->ginstate,
    1714         992 :                                                 entry->attnum,
    1715             :                                                 entry->queryKey,
    1716         992 :                                                 entry->queryCategory,
    1717         992 :                                                 datum[StopMiddle - 1],
    1718         992 :                                                 category[StopMiddle - 1]);
    1719             :                     }
    1720             : 
    1721        1716 :                     if (res == 0)
    1722             :                     {
    1723             :                         /*
    1724             :                          * Found exact match (there can be only one, except in
    1725             :                          * EMPTY_QUERY mode).
    1726             :                          *
    1727             :                          * If doing partial match, scan forward from here to
    1728             :                          * end of page to check for matches.
    1729             :                          *
    1730             :                          * See comment above about tuple's ordering.
    1731             :                          */
    1732         808 :                         if (entry->isPartialMatch)
    1733           0 :                             key->entryRes[j] =
    1734           0 :                                 matchPartialInPendingList(&so->ginstate,
    1735             :                                                           page,
    1736             :                                                           StopMiddle,
    1737           0 :                                                           pos->lastOffset,
    1738             :                                                           entry,
    1739             :                                                           datum,
    1740             :                                                           category,
    1741             :                                                           datumExtracted);
    1742             :                         else
    1743         808 :                             key->entryRes[j] = true;
    1744             : 
    1745             :                         /* done with binary search */
    1746         808 :                         break;
    1747             :                     }
    1748         908 :                     else if (res < 0)
    1749         880 :                         StopHigh = StopMiddle;
    1750             :                     else
    1751          28 :                         StopLow = StopMiddle + 1;
    1752             :                 }
    1753             : 
    1754        1540 :                 if (StopLow >= StopHigh && entry->isPartialMatch)
    1755             :                 {
    1756             :                     /*
    1757             :                      * No exact match on this page.  If doing partial match,
    1758             :                      * scan from the first tuple greater than target value to
    1759             :                      * end of page.  Note that since we don't remember whether
    1760             :                      * the comparePartialFn told us to stop early on a
    1761             :                      * previous page, we will uselessly apply comparePartialFn
    1762             :                      * to the first tuple on each subsequent page.
    1763             :                      */
    1764           0 :                     key->entryRes[j] =
    1765           0 :                         matchPartialInPendingList(&so->ginstate,
    1766             :                                                   page,
    1767             :                                                   StopHigh,
    1768           0 :                                                   pos->lastOffset,
    1769             :                                                   entry,
    1770             :                                                   datum,
    1771             :                                                   category,
    1772             :                                                   datumExtracted);
    1773             :                 }
    1774             : 
    1775        1540 :                 pos->hasMatchKey[i] |= key->entryRes[j];
    1776             :             }
    1777             :         }
    1778             : 
    1779             :         /* Advance firstOffset over the scanned tuples */
    1780         968 :         pos->firstOffset = pos->lastOffset;
    1781             : 
    1782         968 :         if (GinPageHasFullRow(page))
    1783             :         {
    1784             :             /*
    1785             :              * We have examined all pending entries for the current heap row.
    1786             :              * Break out of loop over pages.
    1787             :              */
    1788         968 :             break;
    1789             :         }
    1790             :         else
    1791             :         {
    1792             :             /*
    1793             :              * Advance to next page of pending entries for the current heap
    1794             :              * row.  Complain if there isn't one.
    1795             :              */
    1796           0 :             ItemPointerData item = pos->item;
    1797             : 
    1798           0 :             if (scanGetCandidate(scan, pos) == false ||
    1799           0 :                 !ItemPointerEquals(&pos->item, &item))
    1800           0 :                 elog(ERROR, "could not find additional pending pages for same heap tuple");
    1801             :         }
    1802             :     }
    1803             : 
    1804             :     /*
    1805             :      * All scan keys except excludeOnly require at least one entry to match.
    1806             :      * excludeOnly keys are an exception, because their implied
    1807             :      * GIN_CAT_EMPTY_QUERY scanEntry always matches.  So return "true" if all
    1808             :      * non-excludeOnly scan keys have at least one match.
    1809             :      */
    1810        1700 :     for (i = 0; i < so->nkeys; i++)
    1811             :     {
    1812        1328 :         if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
    1813         596 :             return false;
    1814             :     }
    1815             : 
    1816         372 :     return true;
    1817             : }
    1818             : 
    1819             : /*
    1820             :  * Collect all matched rows from pending list into bitmap.
    1821             :  */
    1822             : static void
    1823        1226 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
    1824             : {
    1825        1226 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1826             :     MemoryContext oldCtx;
    1827             :     bool        recheck,
    1828             :                 match;
    1829             :     int         i;
    1830             :     pendingPosition pos;
    1831        1226 :     Buffer      metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
    1832             :     Page        page;
    1833             :     BlockNumber blkno;
    1834             : 
    1835        1226 :     *ntids = 0;
    1836             : 
    1837             :     /*
    1838             :      * Acquire predicate lock on the metapage, to conflict with any fastupdate
    1839             :      * insertions.
    1840             :      */
    1841        1226 :     PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
    1842             : 
    1843        1226 :     LockBuffer(metabuffer, GIN_SHARE);
    1844        1226 :     page = BufferGetPage(metabuffer);
    1845        1226 :     TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
    1846        1226 :     blkno = GinPageGetMeta(page)->head;
    1847             : 
    1848             :     /*
    1849             :      * fetch head of list before unlocking metapage. head page must be pinned
    1850             :      * to prevent deletion by vacuum process
    1851             :      */
    1852        1226 :     if (blkno == InvalidBlockNumber)
    1853             :     {
    1854             :         /* No pending list, so proceed with normal scan */
    1855        1114 :         UnlockReleaseBuffer(metabuffer);
    1856        1114 :         return;
    1857             :     }
    1858             : 
    1859         112 :     pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
    1860         112 :     LockBuffer(pos.pendingBuffer, GIN_SHARE);
    1861         112 :     pos.firstOffset = FirstOffsetNumber;
    1862         112 :     UnlockReleaseBuffer(metabuffer);
    1863         112 :     pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
    1864             : 
    1865             :     /*
    1866             :      * loop for each heap row. scanGetCandidate returns full row or row's
    1867             :      * tuples from first page.
    1868             :      */
    1869        1080 :     while (scanGetCandidate(scan, &pos))
    1870             :     {
    1871             :         /*
    1872             :          * Check entries in tuple and set up entryRes array.
    1873             :          *
    1874             :          * If pending tuples belonging to the current heap row are spread
    1875             :          * across several pages, collectMatchesForHeapRow will read all of
    1876             :          * those pages.
    1877             :          */
    1878         968 :         if (!collectMatchesForHeapRow(scan, &pos))
    1879         596 :             continue;
    1880             : 
    1881             :         /*
    1882             :          * Matching of entries of one row is finished, so check row using
    1883             :          * consistent functions.
    1884             :          */
    1885         372 :         oldCtx = MemoryContextSwitchTo(so->tempCtx);
    1886         372 :         recheck = false;
    1887         372 :         match = true;
    1888             : 
    1889         944 :         for (i = 0; i < so->nkeys; i++)
    1890             :         {
    1891         572 :             GinScanKey  key = so->keys + i;
    1892             : 
    1893         572 :             if (!key->boolConsistentFn(key))
    1894             :             {
    1895           0 :                 match = false;
    1896           0 :                 break;
    1897             :             }
    1898         572 :             recheck |= key->recheckCurItem;
    1899             :         }
    1900             : 
    1901         372 :         MemoryContextSwitchTo(oldCtx);
    1902         372 :         MemoryContextReset(so->tempCtx);
    1903             : 
    1904         372 :         if (match)
    1905             :         {
    1906         372 :             tbm_add_tuples(tbm, &pos.item, 1, recheck);
    1907         372 :             (*ntids)++;
    1908             :         }
    1909             :     }
    1910             : 
    1911         112 :     pfree(pos.hasMatchKey);
    1912             : }
    1913             : 
    1914             : 
    1915             : #define GinIsVoidRes(s)     ( ((GinScanOpaque) scan->opaque)->isVoidRes )
    1916             : 
    1917             : int64
    1918        1234 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
    1919             : {
    1920        1234 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1921             :     int64       ntids;
    1922             :     ItemPointerData iptr;
    1923             :     bool        recheck;
    1924             : 
    1925             :     /*
    1926             :      * Set up the scan keys, and check for unsatisfiable query.
    1927             :      */
    1928        1234 :     ginFreeScanKeys(so);        /* there should be no keys yet, but just to be
    1929             :                                  * sure */
    1930        1234 :     ginNewScanKey(scan);
    1931             : 
    1932        1234 :     if (GinIsVoidRes(scan))
    1933           8 :         return 0;
    1934             : 
    1935        1226 :     ntids = 0;
    1936             : 
    1937             :     /*
    1938             :      * First, scan the pending list and collect any matching entries into the
    1939             :      * bitmap.  After we scan a pending item, some other backend could post it
    1940             :      * into the main index, and so we might visit it a second time during the
    1941             :      * main scan.  This is okay because we'll just re-set the same bit in the
    1942             :      * bitmap.  (The possibility of duplicate visits is a major reason why GIN
    1943             :      * can't support the amgettuple API, however.) Note that it would not do
    1944             :      * to scan the main index before the pending list, since concurrent
    1945             :      * cleanup could then make us miss entries entirely.
    1946             :      */
    1947        1226 :     scanPendingInsert(scan, tbm, &ntids);
    1948             : 
    1949             :     /*
    1950             :      * Now scan the main index.
    1951             :      */
    1952        1226 :     startScan(scan);
    1953             : 
    1954        1226 :     ItemPointerSetMin(&iptr);
    1955             : 
    1956             :     for (;;)
    1957             :     {
    1958      728800 :         CHECK_FOR_INTERRUPTS();
    1959             : 
    1960      728800 :         if (!scanGetItem(scan, iptr, &iptr, &recheck))
    1961        1226 :             break;
    1962             : 
    1963      727574 :         if (ItemPointerIsLossyPage(&iptr))
    1964           0 :             tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
    1965             :         else
    1966      727574 :             tbm_add_tuples(tbm, &iptr, 1, recheck);
    1967      727574 :         ntids++;
    1968             :     }
    1969             : 
    1970        1226 :     return ntids;
    1971             : }

Generated by: LCOV version 1.14