LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginget.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 83.5 % 619 517
Test Date: 2026-02-17 17:20:33 Functions: 93.8 % 16 15
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * ginget.c
       4              :  *    fetch tuples from a GIN scan.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *          src/backend/access/gin/ginget.c
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/gin_private.h"
      18              : #include "access/relscan.h"
      19              : #include "common/pg_prng.h"
      20              : #include "miscadmin.h"
      21              : #include "storage/predicate.h"
      22              : #include "utils/datum.h"
      23              : #include "utils/memutils.h"
      24              : #include "utils/rel.h"
      25              : 
      26              : /* GUC parameter */
      27              : int         GinFuzzySearchLimit = 0;
      28              : 
      29              : typedef struct pendingPosition
      30              : {
      31              :     Buffer      pendingBuffer;
      32              :     OffsetNumber firstOffset;
      33              :     OffsetNumber lastOffset;
      34              :     ItemPointerData item;
      35              :     bool       *hasMatchKey;
      36              : } pendingPosition;
      37              : 
      38              : 
      39              : /*
      40              :  * Goes to the next page if current offset is outside of bounds
      41              :  */
      42              : static bool
      43       204546 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
      44              : {
      45       204546 :     Page        page = BufferGetPage(stack->buffer);
      46              : 
      47       204546 :     if (stack->off > PageGetMaxOffsetNumber(page))
      48              :     {
      49              :         /*
      50              :          * We scanned the whole page, so we should take right page
      51              :          */
      52         1976 :         if (GinPageRightMost(page))
      53          264 :             return false;       /* no more pages */
      54              : 
      55         1712 :         stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
      56         1712 :         stack->blkno = BufferGetBlockNumber(stack->buffer);
      57         1712 :         stack->off = FirstOffsetNumber;
      58         1712 :         PredicateLockPage(btree->index, stack->blkno, snapshot);
      59              :     }
      60              : 
      61       204282 :     return true;
      62              : }
      63              : 
      64              : /*
      65              :  * Scan all pages of a posting tree and save all its heap ItemPointers
      66              :  * in scanEntry->matchBitmap
      67              :  */
      68              : static void
      69            6 : scanPostingTree(Relation index, GinScanEntry scanEntry,
      70              :                 BlockNumber rootPostingTree)
      71              : {
      72              :     GinBtreeData btree;
      73              :     GinBtreeStack *stack;
      74              :     Buffer      buffer;
      75              :     Page        page;
      76              : 
      77              :     /* Descend to the leftmost leaf page */
      78            6 :     stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
      79            6 :     buffer = stack->buffer;
      80              : 
      81            6 :     IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
      82              : 
      83            6 :     freeGinBtreeStack(stack);
      84              : 
      85              :     /*
      86              :      * Loop iterates through all leaf pages of posting tree
      87              :      */
      88              :     for (;;)
      89              :     {
      90           15 :         page = BufferGetPage(buffer);
      91           15 :         if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
      92              :         {
      93           15 :             int         n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
      94              : 
      95           15 :             scanEntry->predictNumberResult += n;
      96              :         }
      97              : 
      98           15 :         if (GinPageRightMost(page))
      99            6 :             break;              /* no more pages */
     100              : 
     101            9 :         buffer = ginStepRight(buffer, index, GIN_SHARE);
     102              :     }
     103              : 
     104            6 :     UnlockReleaseBuffer(buffer);
     105            6 : }
     106              : 
     107              : /*
     108              :  * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
     109              :  * match the search entry.  This supports three different match modes:
     110              :  *
     111              :  * 1. Partial-match support: scan from current point until the
     112              :  *    comparePartialFn says we're done.
     113              :  * 2. SEARCH_MODE_ALL: scan from current point (which should be first
     114              :  *    key for the current attnum) until we hit null items or end of attnum
     115              :  * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
     116              :  *    key for the current attnum) until we hit end of attnum
     117              :  *
     118              :  * Returns true if done, false if it's necessary to restart scan from scratch
     119              :  */
     120              : static bool
     121          440 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
     122              :                    GinScanEntry scanEntry, Snapshot snapshot)
     123              : {
     124              :     OffsetNumber attnum;
     125              :     CompactAttribute *attr;
     126              : 
     127              :     /* Initialize empty bitmap result */
     128          440 :     scanEntry->matchBitmap = tbm_create(work_mem * (Size) 1024, NULL);
     129              : 
     130              :     /* Null query cannot partial-match anything */
     131          440 :     if (scanEntry->isPartialMatch &&
     132          299 :         scanEntry->queryCategory != GIN_CAT_NORM_KEY)
     133            0 :         return true;
     134              : 
     135              :     /* Locate tupdesc entry for key column (for attbyval/attlen data) */
     136          440 :     attnum = scanEntry->attnum;
     137          440 :     attr = TupleDescCompactAttr(btree->ginstate->origTupdesc, attnum - 1);
     138              : 
     139              :     /*
     140              :      * Predicate lock entry leaf page, following pages will be locked by
     141              :      * moveRightIfItNeeded()
     142              :      */
     143          440 :     PredicateLockPage(btree->index,
     144              :                       BufferGetBlockNumber(stack->buffer),
     145              :                       snapshot);
     146              : 
     147              :     for (;;)
     148       204100 :     {
     149              :         Page        page;
     150              :         IndexTuple  itup;
     151              :         Datum       idatum;
     152              :         GinNullCategory icategory;
     153              : 
     154              :         /*
     155              :          * stack->off points to the interested entry, buffer is already locked
     156              :          */
     157       204540 :         if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     158          440 :             return true;
     159              : 
     160       204276 :         page = BufferGetPage(stack->buffer);
     161       204276 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     162              : 
     163              :         /*
     164              :          * If tuple stores another attribute then stop scan
     165              :          */
     166       204276 :         if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
     167            0 :             return true;
     168              : 
     169              :         /* Safe to fetch attribute value */
     170       204276 :         idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
     171              : 
     172              :         /*
     173              :          * Check for appropriate scan stop conditions
     174              :          */
     175       204276 :         if (scanEntry->isPartialMatch)
     176              :         {
     177              :             int32       cmp;
     178              : 
     179              :             /*
     180              :              * In partial match, stop scan at any null (including
     181              :              * placeholders); partial matches never match nulls
     182              :              */
     183         1321 :             if (icategory != GIN_CAT_NORM_KEY)
     184            5 :                 return true;
     185              : 
     186              :             /*----------
     187              :              * Check of partial match.
     188              :              * case cmp == 0 => match
     189              :              * case cmp > 0 => not match and finish scan
     190              :              * case cmp < 0 => not match and continue scan
     191              :              *----------
     192              :              */
     193         1316 :             cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
     194         1316 :                                                   btree->ginstate->supportCollation[attnum - 1],
     195              :                                                   scanEntry->queryKey,
     196              :                                                   idatum,
     197         1316 :                                                   UInt16GetDatum(scanEntry->strategy),
     198         1316 :                                                   PointerGetDatum(scanEntry->extra_data)));
     199              : 
     200         1316 :             if (cmp > 0)
     201          157 :                 return true;
     202         1159 :             else if (cmp < 0)
     203              :             {
     204           64 :                 stack->off++;
     205           64 :                 continue;
     206              :             }
     207              :         }
     208       202955 :         else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
     209              :         {
     210              :             /*
     211              :              * In ALL mode, we are not interested in null items, so we can
     212              :              * stop if we get to a null-item placeholder (which will be the
     213              :              * last entry for a given attnum).  We do want to include NULL_KEY
     214              :              * and EMPTY_ITEM entries, though.
     215              :              */
     216       202955 :             if (icategory == GIN_CAT_NULL_ITEM)
     217           14 :                 return true;
     218              :         }
     219              : 
     220              :         /*
     221              :          * OK, we want to return the TIDs listed in this entry.
     222              :          */
     223       204036 :         if (GinIsPostingTree(itup))
     224              :         {
     225            6 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     226              : 
     227              :             /*
     228              :              * We should unlock current page (but not unpin) during tree scan
     229              :              * to prevent deadlock with vacuum processes.
     230              :              *
     231              :              * We save current entry value (idatum) to be able to re-find our
     232              :              * tuple after re-locking
     233              :              */
     234            6 :             if (icategory == GIN_CAT_NORM_KEY)
     235            6 :                 idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
     236              : 
     237            6 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     238              : 
     239              :             /*
     240              :              * Acquire predicate lock on the posting tree.  We already hold a
     241              :              * lock on the entry page, but insertions to the posting tree
     242              :              * don't check for conflicts on that level.
     243              :              */
     244            6 :             PredicateLockPage(btree->index, rootPostingTree, snapshot);
     245              : 
     246              :             /* Collect all the TIDs in this entry's posting tree */
     247            6 :             scanPostingTree(btree->index, scanEntry, rootPostingTree);
     248              : 
     249              :             /*
     250              :              * We lock again the entry page and while it was unlocked insert
     251              :              * might have occurred, so we need to re-find our position.
     252              :              */
     253            6 :             LockBuffer(stack->buffer, GIN_SHARE);
     254            6 :             page = BufferGetPage(stack->buffer);
     255            6 :             if (!GinPageIsLeaf(page))
     256              :             {
     257              :                 /*
     258              :                  * Root page becomes non-leaf while we unlock it. We will
     259              :                  * start again, this situation doesn't occur often - root can
     260              :                  * became a non-leaf only once per life of index.
     261              :                  */
     262            0 :                 return false;
     263              :             }
     264              : 
     265              :             /* Search forward to re-find idatum */
     266              :             for (;;)
     267              :             {
     268            6 :                 if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     269            0 :                     ereport(ERROR,
     270              :                             (errcode(ERRCODE_INTERNAL_ERROR),
     271              :                              errmsg("failed to re-find tuple within index \"%s\"",
     272              :                                     RelationGetRelationName(btree->index))));
     273              : 
     274            6 :                 page = BufferGetPage(stack->buffer);
     275            6 :                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     276              : 
     277            6 :                 if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
     278              :                 {
     279              :                     Datum       newDatum;
     280              :                     GinNullCategory newCategory;
     281              : 
     282            6 :                     newDatum = gintuple_get_key(btree->ginstate, itup,
     283              :                                                 &newCategory);
     284              : 
     285            6 :                     if (ginCompareEntries(btree->ginstate, attnum,
     286              :                                           newDatum, newCategory,
     287              :                                           idatum, icategory) == 0)
     288            6 :                         break;  /* Found! */
     289              :                 }
     290              : 
     291            0 :                 stack->off++;
     292              :             }
     293              : 
     294            6 :             if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
     295            0 :                 pfree(DatumGetPointer(idatum));
     296              :         }
     297              :         else
     298              :         {
     299              :             ItemPointer ipd;
     300              :             int         nipd;
     301              : 
     302       204030 :             ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
     303       204030 :             tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
     304       204030 :             scanEntry->predictNumberResult += GinGetNPosting(itup);
     305       204030 :             pfree(ipd);
     306              :         }
     307              : 
     308              :         /*
     309              :          * Done with this entry, go to the next
     310              :          */
     311       204036 :         stack->off++;
     312              :     }
     313              : }
     314              : 
     315              : /*
     316              :  * Start* functions setup beginning state of searches: finds correct buffer and pins it.
     317              :  */
     318              : static void
     319         3538 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
     320              : {
     321              :     GinBtreeData btreeEntry;
     322              :     GinBtreeStack *stackEntry;
     323              :     Page        page;
     324              :     bool        needUnlock;
     325              : 
     326         3538 : restartScanEntry:
     327         3538 :     entry->buffer = InvalidBuffer;
     328         3538 :     ItemPointerSetMin(&entry->curItem);
     329         3538 :     entry->offset = InvalidOffsetNumber;
     330         3538 :     if (entry->list)
     331            0 :         pfree(entry->list);
     332         3538 :     entry->list = NULL;
     333         3538 :     entry->nlist = 0;
     334         3538 :     entry->matchBitmap = NULL;
     335         3538 :     entry->matchNtuples = -1;
     336         3538 :     entry->matchResult.blockno = InvalidBlockNumber;
     337         3538 :     entry->reduceResult = false;
     338         3538 :     entry->predictNumberResult = 0;
     339              : 
     340              :     /*
     341              :      * we should find entry, and begin scan of posting tree or just store
     342              :      * posting list in memory
     343              :      */
     344         3538 :     ginPrepareEntryScan(&btreeEntry, entry->attnum,
     345         3538 :                         entry->queryKey, entry->queryCategory,
     346              :                         ginstate);
     347         3538 :     stackEntry = ginFindLeafPage(&btreeEntry, true, false);
     348         3538 :     page = BufferGetPage(stackEntry->buffer);
     349              : 
     350              :     /* ginFindLeafPage() will have already checked snapshot age. */
     351         3538 :     needUnlock = true;
     352              : 
     353         3538 :     entry->isFinished = true;
     354              : 
     355         3538 :     if (entry->isPartialMatch ||
     356         3239 :         entry->queryCategory == GIN_CAT_EMPTY_QUERY)
     357              :     {
     358              :         /*
     359              :          * btreeEntry.findItem locates the first item >= given search key.
     360              :          * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
     361              :          * because of the way the GIN_CAT_EMPTY_QUERY category code is
     362              :          * assigned.)  We scan forward from there and collect all TIDs needed
     363              :          * for the entry type.
     364              :          */
     365          440 :         btreeEntry.findItem(&btreeEntry, stackEntry);
     366          440 :         if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
     367          440 :             == false)
     368              :         {
     369              :             /*
     370              :              * GIN tree was seriously restructured, so we will cleanup all
     371              :              * found data and rescan. See comments near 'return false' in
     372              :              * collectMatchBitmap()
     373              :              */
     374            0 :             if (entry->matchBitmap)
     375              :             {
     376            0 :                 if (entry->matchIterator)
     377            0 :                     tbm_end_private_iterate(entry->matchIterator);
     378            0 :                 entry->matchIterator = NULL;
     379            0 :                 tbm_free(entry->matchBitmap);
     380            0 :                 entry->matchBitmap = NULL;
     381              :             }
     382            0 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     383            0 :             freeGinBtreeStack(stackEntry);
     384            0 :             goto restartScanEntry;
     385              :         }
     386              : 
     387          440 :         if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
     388              :         {
     389          360 :             entry->matchIterator =
     390          360 :                 tbm_begin_private_iterate(entry->matchBitmap);
     391          360 :             entry->isFinished = false;
     392              :         }
     393              :     }
     394         3098 :     else if (btreeEntry.findItem(&btreeEntry, stackEntry))
     395              :     {
     396         2233 :         IndexTuple  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
     397              : 
     398         2233 :         if (GinIsPostingTree(itup))
     399              :         {
     400           32 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     401              :             GinBtreeStack *stack;
     402              :             Page        entrypage;
     403              :             ItemPointerData minItem;
     404              : 
     405              :             /*
     406              :              * This is an equality scan, so lock the root of the posting tree.
     407              :              * It represents a lock on the exact key value, and covers all the
     408              :              * items in the posting tree.
     409              :              */
     410           32 :             PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
     411              : 
     412              :             /*
     413              :              * We should unlock entry page before touching posting tree to
     414              :              * prevent deadlocks with vacuum processes. Because entry is never
     415              :              * deleted from page and posting tree is never reduced to the
     416              :              * posting list, we can unlock page after getting BlockNumber of
     417              :              * root of posting tree.
     418              :              */
     419           32 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     420           32 :             needUnlock = false;
     421              : 
     422           32 :             stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
     423              :                                             rootPostingTree);
     424           32 :             entry->buffer = stack->buffer;
     425              : 
     426              :             /*
     427              :              * We keep buffer pinned because we need to prevent deletion of
     428              :              * page during scan. See GIN's vacuum implementation. RefCount is
     429              :              * increased to keep buffer pinned after freeGinBtreeStack() call.
     430              :              */
     431           32 :             IncrBufferRefCount(entry->buffer);
     432              : 
     433           32 :             entrypage = BufferGetPage(entry->buffer);
     434              : 
     435              :             /*
     436              :              * Load the first page into memory.
     437              :              */
     438           32 :             ItemPointerSetMin(&minItem);
     439           32 :             entry->list = GinDataLeafPageGetItems(entrypage, &entry->nlist, minItem);
     440              : 
     441           32 :             entry->predictNumberResult = stack->predictNumber * entry->nlist;
     442              : 
     443           32 :             LockBuffer(entry->buffer, GIN_UNLOCK);
     444           32 :             freeGinBtreeStack(stack);
     445           32 :             entry->isFinished = false;
     446              :         }
     447              :         else
     448              :         {
     449              :             /*
     450              :              * Lock the entry leaf page.  This is more coarse-grained than
     451              :              * necessary, because it will conflict with any insertions that
     452              :              * land on the same leaf page, not only the exact key we searched
     453              :              * for.  But locking an individual tuple would require updating
     454              :              * that lock whenever it moves because of insertions or vacuums,
     455              :              * which seems too complicated.
     456              :              */
     457         2201 :             PredicateLockPage(ginstate->index,
     458              :                               BufferGetBlockNumber(stackEntry->buffer),
     459              :                               snapshot);
     460         2201 :             if (GinGetNPosting(itup) > 0)
     461              :             {
     462         2198 :                 entry->list = ginReadTuple(ginstate, entry->attnum, itup,
     463              :                                            &entry->nlist);
     464         2198 :                 entry->predictNumberResult = entry->nlist;
     465              : 
     466         2198 :                 entry->isFinished = false;
     467              :             }
     468              :         }
     469              :     }
     470              :     else
     471              :     {
     472              :         /*
     473              :          * No entry found.  Predicate lock the leaf page, to lock the place
     474              :          * where the entry would've been, had there been one.
     475              :          */
     476          865 :         PredicateLockPage(ginstate->index,
     477              :                           BufferGetBlockNumber(stackEntry->buffer), snapshot);
     478              :     }
     479              : 
     480         3538 :     if (needUnlock)
     481         3506 :         LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     482         3538 :     freeGinBtreeStack(stackEntry);
     483         3538 : }
     484              : 
     485              : /*
     486              :  * Comparison function for scan entry indexes. Sorts by predictNumberResult,
     487              :  * least frequent items first.
     488              :  */
     489              : static int
     490         4739 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
     491              : {
     492         4739 :     const GinScanKeyData *key = arg;
     493         4739 :     int         i1 = *(const int *) a1;
     494         4739 :     int         i2 = *(const int *) a2;
     495         4739 :     uint32      n1 = key->scanEntry[i1]->predictNumberResult;
     496         4739 :     uint32      n2 = key->scanEntry[i2]->predictNumberResult;
     497              : 
     498         4739 :     if (n1 < n2)
     499          686 :         return -1;
     500         4053 :     else if (n1 == n2)
     501         3167 :         return 0;
     502              :     else
     503          886 :         return 1;
     504              : }
     505              : 
     506              : static void
     507         1037 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
     508              : {
     509         1037 :     MemoryContext oldCtx = CurrentMemoryContext;
     510              :     int         i;
     511              :     int         j;
     512              :     int        *entryIndexes;
     513              : 
     514         1037 :     ItemPointerSetMin(&key->curItem);
     515         1037 :     key->curItemMatches = false;
     516         1037 :     key->recheckCurItem = false;
     517         1037 :     key->isFinished = false;
     518              : 
     519              :     /*
     520              :      * Divide the entries into two distinct sets: required and additional.
     521              :      * Additional entries are not enough for a match alone, without any items
     522              :      * from the required set, but are needed by the consistent function to
     523              :      * decide if an item matches. When scanning, we can skip over items from
     524              :      * additional entries that have no corresponding matches in any of the
     525              :      * required entries. That speeds up queries like "frequent & rare"
     526              :      * considerably, if the frequent term can be put in the additional set.
     527              :      *
     528              :      * There can be many legal ways to divide them entries into these two
     529              :      * sets. A conservative division is to just put everything in the required
     530              :      * set, but the more you can put in the additional set, the more you can
     531              :      * skip during the scan. To maximize skipping, we try to put as many
     532              :      * frequent items as possible into additional, and less frequent ones into
     533              :      * required. To do that, sort the entries by frequency
     534              :      * (predictNumberResult), and put entries into the required set in that
     535              :      * order, until the consistent function says that none of the remaining
     536              :      * entries can form a match, without any items from the required set. The
     537              :      * rest go to the additional set.
     538              :      *
     539              :      * Exclude-only scan keys are known to have no required entries.
     540              :      */
     541         1037 :     if (key->excludeOnly)
     542              :     {
     543           21 :         MemoryContextSwitchTo(so->keyCtx);
     544              : 
     545           21 :         key->nrequired = 0;
     546           21 :         key->nadditional = key->nentries;
     547           21 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     548           24 :         for (i = 0; i < key->nadditional; i++)
     549            3 :             key->additionalEntries[i] = key->scanEntry[i];
     550              :     }
     551         1016 :     else if (key->nentries > 1)
     552              :     {
     553          283 :         MemoryContextSwitchTo(so->tempCtx);
     554              : 
     555          283 :         entryIndexes = palloc_array(int, key->nentries);
     556         3085 :         for (i = 0; i < key->nentries; i++)
     557         2802 :             entryIndexes[i] = i;
     558          283 :         qsort_arg(entryIndexes, key->nentries, sizeof(int),
     559              :                   entryIndexByFrequencyCmp, key);
     560              : 
     561         2802 :         for (i = 1; i < key->nentries; i++)
     562         2519 :             key->entryRes[entryIndexes[i]] = GIN_MAYBE;
     563         2426 :         for (i = 0; i < key->nentries - 1; i++)
     564              :         {
     565              :             /* Pass all entries <= i as FALSE, and the rest as MAYBE */
     566         2354 :             key->entryRes[entryIndexes[i]] = GIN_FALSE;
     567              : 
     568         2354 :             if (key->triConsistentFn(key) == GIN_FALSE)
     569          211 :                 break;
     570              : 
     571              :             /* Make this loop interruptible in case there are many keys */
     572         2143 :             CHECK_FOR_INTERRUPTS();
     573              :         }
     574              :         /* i is now the last required entry. */
     575              : 
     576          283 :         MemoryContextSwitchTo(so->keyCtx);
     577              : 
     578          283 :         key->nrequired = i + 1;
     579          283 :         key->nadditional = key->nentries - key->nrequired;
     580          283 :         key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
     581          283 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     582              : 
     583          283 :         j = 0;
     584         2709 :         for (i = 0; i < key->nrequired; i++)
     585         2426 :             key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
     586          659 :         for (i = 0; i < key->nadditional; i++)
     587          376 :             key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
     588              : 
     589              :         /* clean up after consistentFn calls (also frees entryIndexes) */
     590          283 :         MemoryContextReset(so->tempCtx);
     591              :     }
     592              :     else
     593              :     {
     594          733 :         MemoryContextSwitchTo(so->keyCtx);
     595              : 
     596          733 :         key->nrequired = 1;
     597          733 :         key->nadditional = 0;
     598          733 :         key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
     599          733 :         key->requiredEntries[0] = key->scanEntry[0];
     600              :     }
     601         1037 :     MemoryContextSwitchTo(oldCtx);
     602         1037 : }
     603              : 
     604              : static void
     605          971 : startScan(IndexScanDesc scan)
     606              : {
     607          971 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
     608          971 :     GinState   *ginstate = &so->ginstate;
     609              :     uint32      i;
     610              : 
     611         4509 :     for (i = 0; i < so->totalentries; i++)
     612         3538 :         startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
     613              : 
     614          971 :     if (GinFuzzySearchLimit > 0)
     615              :     {
     616              :         /*
     617              :          * If all of keys more than threshold we will try to reduce result, we
     618              :          * hope (and only hope, for intersection operation of array our
     619              :          * supposition isn't true), that total result will not more than
     620              :          * minimal predictNumberResult.
     621              :          */
     622            3 :         bool        reduce = true;
     623              : 
     624            6 :         for (i = 0; i < so->totalentries; i++)
     625              :         {
     626            3 :             if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
     627              :             {
     628            0 :                 reduce = false;
     629            0 :                 break;
     630              :             }
     631              :         }
     632            3 :         if (reduce)
     633              :         {
     634            6 :             for (i = 0; i < so->totalentries; i++)
     635              :             {
     636            3 :                 so->entries[i]->predictNumberResult /= so->totalentries;
     637            3 :                 so->entries[i]->reduceResult = true;
     638              :             }
     639              :         }
     640              :     }
     641              : 
     642              :     /*
     643              :      * Now that we have the estimates for the entry frequencies, finish
     644              :      * initializing the scan keys.
     645              :      */
     646         2008 :     for (i = 0; i < so->nkeys; i++)
     647         1037 :         startScanKey(ginstate, so, so->keys + i);
     648          971 : }
     649              : 
     650              : /*
     651              :  * Load the next batch of item pointers from a posting tree.
     652              :  *
     653              :  * Note that we copy the page into GinScanEntry->list array and unlock it, but
     654              :  * keep it pinned to prevent interference with vacuum.
     655              :  */
     656              : static void
     657           50 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
     658              :                    ItemPointerData advancePast)
     659              : {
     660              :     Page        page;
     661              :     int         i;
     662              :     bool        stepright;
     663              : 
     664           50 :     if (!BufferIsValid(entry->buffer))
     665              :     {
     666            0 :         entry->isFinished = true;
     667            0 :         return;
     668              :     }
     669              : 
     670              :     /*
     671              :      * We have two strategies for finding the correct page: step right from
     672              :      * the current page, or descend the tree again from the root. If
     673              :      * advancePast equals the current item, the next matching item should be
     674              :      * on the next page, so we step right. Otherwise, descend from root.
     675              :      */
     676           50 :     if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
     677              :     {
     678           47 :         stepright = true;
     679           47 :         LockBuffer(entry->buffer, GIN_SHARE);
     680              :     }
     681              :     else
     682              :     {
     683              :         GinBtreeStack *stack;
     684              : 
     685            3 :         ReleaseBuffer(entry->buffer);
     686              : 
     687              :         /*
     688              :          * Set the search key, and find the correct leaf page.
     689              :          */
     690            3 :         if (ItemPointerIsLossyPage(&advancePast))
     691              :         {
     692            0 :             ItemPointerSet(&entry->btree.itemptr,
     693            0 :                            GinItemPointerGetBlockNumber(&advancePast) + 1,
     694              :                            FirstOffsetNumber);
     695              :         }
     696              :         else
     697              :         {
     698            3 :             ItemPointerSet(&entry->btree.itemptr,
     699              :                            GinItemPointerGetBlockNumber(&advancePast),
     700            3 :                            OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
     701              :         }
     702            3 :         entry->btree.fullScan = false;
     703            3 :         stack = ginFindLeafPage(&entry->btree, true, false);
     704              : 
     705              :         /* we don't need the stack, just the buffer. */
     706            3 :         entry->buffer = stack->buffer;
     707            3 :         IncrBufferRefCount(entry->buffer);
     708            3 :         freeGinBtreeStack(stack);
     709            3 :         stepright = false;
     710              :     }
     711              : 
     712           50 :     elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
     713              :          GinItemPointerGetBlockNumber(&advancePast),
     714              :          GinItemPointerGetOffsetNumber(&advancePast),
     715              :          !stepright);
     716              : 
     717           50 :     page = BufferGetPage(entry->buffer);
     718              :     for (;;)
     719              :     {
     720           53 :         entry->offset = InvalidOffsetNumber;
     721           53 :         if (entry->list)
     722              :         {
     723           47 :             pfree(entry->list);
     724           47 :             entry->list = NULL;
     725           47 :             entry->nlist = 0;
     726              :         }
     727              : 
     728           53 :         if (stepright)
     729              :         {
     730              :             /*
     731              :              * We've processed all the entries on this page. If it was the
     732              :              * last page in the tree, we're done.
     733              :              */
     734           50 :             if (GinPageRightMost(page))
     735              :             {
     736            3 :                 UnlockReleaseBuffer(entry->buffer);
     737            3 :                 entry->buffer = InvalidBuffer;
     738            3 :                 entry->isFinished = true;
     739            3 :                 return;
     740              :             }
     741              : 
     742              :             /*
     743              :              * Step to next page, following the right link. then find the
     744              :              * first ItemPointer greater than advancePast.
     745              :              */
     746           47 :             entry->buffer = ginStepRight(entry->buffer,
     747              :                                          ginstate->index,
     748              :                                          GIN_SHARE);
     749           47 :             page = BufferGetPage(entry->buffer);
     750              :         }
     751           50 :         stepright = true;
     752              : 
     753           50 :         if (GinPageGetOpaque(page)->flags & GIN_DELETED)
     754            0 :             continue;           /* page was deleted by concurrent vacuum */
     755              : 
     756              :         /*
     757              :          * The first item > advancePast might not be on this page, but
     758              :          * somewhere to the right, if the page was split, or a non-match from
     759              :          * another key in the query allowed us to skip some items from this
     760              :          * entry. Keep following the right-links until we re-find the correct
     761              :          * page.
     762              :          */
     763           68 :         if (!GinPageRightMost(page) &&
     764           18 :             ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
     765              :         {
     766              :             /*
     767              :              * the item we're looking is > the right bound of the page, so it
     768              :              * can't be on this page.
     769              :              */
     770            0 :             continue;
     771              :         }
     772              : 
     773           50 :         entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
     774              : 
     775          962 :         for (i = 0; i < entry->nlist; i++)
     776              :         {
     777          959 :             if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
     778              :             {
     779           47 :                 entry->offset = i;
     780              : 
     781           47 :                 if (GinPageRightMost(page))
     782              :                 {
     783              :                     /* after processing the copied items, we're done. */
     784           29 :                     UnlockReleaseBuffer(entry->buffer);
     785           29 :                     entry->buffer = InvalidBuffer;
     786              :                 }
     787              :                 else
     788           18 :                     LockBuffer(entry->buffer, GIN_UNLOCK);
     789           47 :                 return;
     790              :             }
     791              :         }
     792              :     }
     793              : }
     794              : 
     795              : #define gin_rand() pg_prng_double(&pg_global_prng_state)
     796              : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
     797              : 
     798              : /*
     799              :  * Sets entry->curItem to next heap item pointer > advancePast, for one entry
     800              :  * of one scan key, or sets entry->isFinished to true if there are no more.
     801              :  *
     802              :  * Item pointers are returned in ascending order.
     803              :  *
     804              :  * Note: this can return a "lossy page" item pointer, indicating that the
     805              :  * entry potentially matches all items on that heap page.  However, it is
     806              :  * not allowed to return both a lossy page pointer and exact (regular)
     807              :  * item pointers for the same page.  (Doing so would break the key-combination
     808              :  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
     809              :  * current implementation this is guaranteed by the behavior of tidbitmaps.
     810              :  */
     811              : static void
     812       550675 : entryGetItem(GinState *ginstate, GinScanEntry entry,
     813              :              ItemPointerData advancePast)
     814              : {
     815              :     Assert(!entry->isFinished);
     816              : 
     817              :     Assert(!ItemPointerIsValid(&entry->curItem) ||
     818              :            ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
     819              : 
     820       550675 :     if (entry->matchBitmap)
     821              :     {
     822              :         /* A bitmap result */
     823       134705 :         BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
     824       134705 :         OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
     825              : 
     826              :         for (;;)
     827              :         {
     828              :             /*
     829              :              * If we've exhausted all items on this block, move to next block
     830              :              * in the bitmap. tbm_private_iterate() sets matchResult.blockno
     831              :              * to InvalidBlockNumber when the bitmap is exhausted.
     832              :              */
     833       137268 :             while ((!BlockNumberIsValid(entry->matchResult.blockno)) ||
     834       136908 :                    (!entry->matchResult.lossy &&
     835       136908 :                     entry->offset >= entry->matchNtuples) ||
     836       405958 :                    entry->matchResult.blockno < advancePastBlk ||
     837       134345 :                    (ItemPointerIsLossyPage(&advancePast) &&
     838            0 :                     entry->matchResult.blockno == advancePastBlk))
     839              :             {
     840         2923 :                 if (!tbm_private_iterate(entry->matchIterator, &entry->matchResult))
     841              :                 {
     842              :                     Assert(!BlockNumberIsValid(entry->matchResult.blockno));
     843          360 :                     ItemPointerSetInvalid(&entry->curItem);
     844          360 :                     tbm_end_private_iterate(entry->matchIterator);
     845          360 :                     entry->matchIterator = NULL;
     846          360 :                     entry->isFinished = true;
     847          360 :                     break;
     848              :                 }
     849              : 
     850              :                 /* Exact pages need their tuple offsets extracted. */
     851         2563 :                 if (!entry->matchResult.lossy)
     852         2563 :                     entry->matchNtuples = tbm_extract_page_tuple(&entry->matchResult,
     853         2563 :                                                                  entry->matchOffsets,
     854              :                                                                  TBM_MAX_TUPLES_PER_PAGE);
     855              : 
     856              :                 /*
     857              :                  * Reset counter to the beginning of entry->matchResult. Note:
     858              :                  * entry->offset is still greater than matchResult.ntuples if
     859              :                  * matchResult is lossy.  So, on next call we will get next
     860              :                  * result from TIDBitmap.
     861              :                  */
     862         2563 :                 entry->offset = 0;
     863              :             }
     864       134705 :             if (entry->isFinished)
     865          360 :                 break;
     866              : 
     867              :             /*
     868              :              * We're now on the first page after advancePast which has any
     869              :              * items on it. If it's a lossy result, return that.
     870              :              */
     871       134345 :             if (entry->matchResult.lossy)
     872              :             {
     873            0 :                 ItemPointerSetLossyPage(&entry->curItem,
     874              :                                         entry->matchResult.blockno);
     875              : 
     876              :                 /*
     877              :                  * We might as well fall out of the loop; we could not
     878              :                  * estimate number of results on this page to support correct
     879              :                  * reducing of result even if it's enabled.
     880              :                  */
     881            0 :                 break;
     882              :             }
     883              : 
     884              :             /*
     885              :              * Not a lossy page. If tuple offsets were extracted,
     886              :              * entry->matchNtuples must be > -1
     887              :              */
     888              :             Assert(entry->matchNtuples > -1);
     889              : 
     890              :             /* Skip over any offsets <= advancePast, and return that. */
     891       134345 :             if (entry->matchResult.blockno == advancePastBlk)
     892              :             {
     893              :                 Assert(entry->matchNtuples > 0);
     894              : 
     895              :                 /*
     896              :                  * First, do a quick check against the last offset on the
     897              :                  * page. If that's > advancePast, so are all the other
     898              :                  * offsets, so just go back to the top to get the next page.
     899              :                  */
     900       132142 :                 if (entry->matchOffsets[entry->matchNtuples - 1] <= advancePastOff)
     901              :                 {
     902            0 :                     entry->offset = entry->matchNtuples;
     903            0 :                     continue;
     904              :                 }
     905              : 
     906              :                 /* Otherwise scan to find the first item > advancePast */
     907       132142 :                 while (entry->matchOffsets[entry->offset] <= advancePastOff)
     908            0 :                     entry->offset++;
     909              :             }
     910              : 
     911       134345 :             ItemPointerSet(&entry->curItem,
     912              :                            entry->matchResult.blockno,
     913       134345 :                            entry->matchOffsets[entry->offset]);
     914       134345 :             entry->offset++;
     915              : 
     916              :             /* Done unless we need to reduce the result */
     917       134345 :             if (!entry->reduceResult || !dropItem(entry))
     918              :                 break;
     919              :         }
     920              :     }
     921       415970 :     else if (!BufferIsValid(entry->buffer))
     922              :     {
     923              :         /*
     924              :          * A posting list from an entry tuple, or the last page of a posting
     925              :          * tree.
     926              :          */
     927              :         for (;;)
     928              :         {
     929       203064 :             if (entry->offset >= entry->nlist)
     930              :             {
     931         1950 :                 ItemPointerSetInvalid(&entry->curItem);
     932         1950 :                 entry->isFinished = true;
     933         1950 :                 break;
     934              :             }
     935              : 
     936       201114 :             entry->curItem = entry->list[entry->offset++];
     937              : 
     938              :             /* If we're not past advancePast, keep scanning */
     939       201114 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     940        33317 :                 continue;
     941              : 
     942              :             /* Done unless we need to reduce the result */
     943       167797 :             if (!entry->reduceResult || !dropItem(entry))
     944              :                 break;
     945              :         }
     946              :     }
     947              :     else
     948              :     {
     949              :         /* A posting tree */
     950              :         for (;;)
     951              :         {
     952              :             /* If we've processed the current batch, load more items */
     953       339509 :             while (entry->offset >= entry->nlist)
     954              :             {
     955           50 :                 entryLoadMoreItems(ginstate, entry, advancePast);
     956              : 
     957           50 :                 if (entry->isFinished)
     958              :                 {
     959            3 :                     ItemPointerSetInvalid(&entry->curItem);
     960            3 :                     return;
     961              :                 }
     962              :             }
     963              : 
     964       339459 :             entry->curItem = entry->list[entry->offset++];
     965              : 
     966              :             /* If we're not past advancePast, keep scanning */
     967       339459 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     968        23553 :                 continue;
     969              : 
     970              :             /* Done unless we need to reduce the result */
     971       315906 :             if (!entry->reduceResult || !dropItem(entry))
     972              :                 break;
     973              : 
     974              :             /*
     975              :              * Advance advancePast (so that entryLoadMoreItems will load the
     976              :              * right data), and keep scanning
     977              :              */
     978        59477 :             advancePast = entry->curItem;
     979              :         }
     980              :     }
     981              : }
     982              : 
     983              : /*
     984              :  * Identify the "current" item among the input entry streams for this scan key
     985              :  * that is greater than advancePast, and test whether it passes the scan key
     986              :  * qual condition.
     987              :  *
     988              :  * The current item is the smallest curItem among the inputs.  key->curItem
     989              :  * is set to that value.  key->curItemMatches is set to indicate whether that
     990              :  * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
     991              :  * iff recheck is needed for this item pointer (including the case where the
     992              :  * item pointer is a lossy page pointer).
     993              :  *
     994              :  * If all entry streams are exhausted, sets key->isFinished to true.
     995              :  *
     996              :  * Item pointers must be returned in ascending order.
     997              :  *
     998              :  * Note: this can return a "lossy page" item pointer, indicating that the
     999              :  * key potentially matches all items on that heap page.  However, it is
    1000              :  * not allowed to return both a lossy page pointer and exact (regular)
    1001              :  * item pointers for the same page.  (Doing so would break the key-combination
    1002              :  * logic in scanGetItem.)
    1003              :  */
    1004              : static void
    1005       503923 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
    1006              :            ItemPointerData advancePast)
    1007              : {
    1008              :     ItemPointerData minItem;
    1009              :     ItemPointerData curPageLossy;
    1010              :     uint32      i;
    1011              :     bool        haveLossyEntry;
    1012              :     GinScanEntry entry;
    1013              :     GinTernaryValue res;
    1014              :     MemoryContext oldCtx;
    1015              :     bool        allFinished;
    1016              : 
    1017              :     Assert(!key->isFinished);
    1018              : 
    1019              :     /*
    1020              :      * We might have already tested this item; if so, no need to repeat work.
    1021              :      * (Note: the ">" case can happen, if advancePast is exact but we
    1022              :      * previously had to set curItem to a lossy-page pointer.)
    1023              :      */
    1024       503923 :     if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
    1025          977 :         return;
    1026              : 
    1027              :     /*
    1028              :      * Find the minimum item > advancePast among the active entry streams.
    1029              :      *
    1030              :      * Note: a lossy-page entry is encoded by a ItemPointer with max value for
    1031              :      * offset (0xffff), so that it will sort after any exact entries for the
    1032              :      * same page.  So we'll prefer to return exact pointers not lossy
    1033              :      * pointers, which is good.
    1034              :      */
    1035       503917 :     ItemPointerSetMax(&minItem);
    1036       503917 :     allFinished = true;
    1037      1385277 :     for (i = 0; i < key->nrequired; i++)
    1038              :     {
    1039       881360 :         entry = key->requiredEntries[i];
    1040              : 
    1041       881360 :         if (entry->isFinished)
    1042       281124 :             continue;
    1043              : 
    1044              :         /*
    1045              :          * Advance this stream if necessary.
    1046              :          *
    1047              :          * In particular, since entry->curItem was initialized with
    1048              :          * ItemPointerSetMin, this ensures we fetch the first item for each
    1049              :          * entry on the first call.
    1050              :          */
    1051       600236 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1052              :         {
    1053       519797 :             entryGetItem(ginstate, entry, advancePast);
    1054       519797 :             if (entry->isFinished)
    1055         2224 :                 continue;
    1056              :         }
    1057              : 
    1058       598012 :         allFinished = false;
    1059       598012 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1060       550337 :             minItem = entry->curItem;
    1061              :     }
    1062              : 
    1063       503917 :     if (allFinished && !key->excludeOnly)
    1064              :     {
    1065              :         /* all entries are finished */
    1066          971 :         key->isFinished = true;
    1067          971 :         return;
    1068              :     }
    1069              : 
    1070       502946 :     if (!key->excludeOnly)
    1071              :     {
    1072              :         /*
    1073              :          * For a normal scan key, we now know there are no matches < minItem.
    1074              :          *
    1075              :          * If minItem is lossy, it means that there were no exact items on the
    1076              :          * page among requiredEntries, because lossy pointers sort after exact
    1077              :          * items. However, there might be exact items for the same page among
    1078              :          * additionalEntries, so we mustn't advance past them.
    1079              :          */
    1080       499715 :         if (ItemPointerIsLossyPage(&minItem))
    1081              :         {
    1082            0 :             if (GinItemPointerGetBlockNumber(&advancePast) <
    1083            0 :                 GinItemPointerGetBlockNumber(&minItem))
    1084              :             {
    1085            0 :                 ItemPointerSet(&advancePast,
    1086              :                                GinItemPointerGetBlockNumber(&minItem),
    1087              :                                InvalidOffsetNumber);
    1088              :             }
    1089              :         }
    1090              :         else
    1091              :         {
    1092              :             Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
    1093       499715 :             ItemPointerSet(&advancePast,
    1094              :                            GinItemPointerGetBlockNumber(&minItem),
    1095       499715 :                            OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
    1096              :         }
    1097              :     }
    1098              :     else
    1099              :     {
    1100              :         /*
    1101              :          * excludeOnly scan keys don't have any entries that are necessarily
    1102              :          * present in matching items.  So, we consider the item just after
    1103              :          * advancePast.
    1104              :          */
    1105              :         Assert(key->nrequired == 0);
    1106         3231 :         ItemPointerSet(&minItem,
    1107              :                        GinItemPointerGetBlockNumber(&advancePast),
    1108         3231 :                        OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
    1109              :     }
    1110              : 
    1111              :     /*
    1112              :      * We might not have loaded all the entry streams for this TID yet. We
    1113              :      * could call the consistent function, passing MAYBE for those entries, to
    1114              :      * see if it can decide if this TID matches based on the information we
    1115              :      * have. But if the consistent-function is expensive, and cannot in fact
    1116              :      * decide with partial information, that could be a big loss. So, load all
    1117              :      * the additional entries, before calling the consistent function.
    1118              :      */
    1119       540276 :     for (i = 0; i < key->nadditional; i++)
    1120              :     {
    1121        37330 :         entry = key->additionalEntries[i];
    1122              : 
    1123        37330 :         if (entry->isFinished)
    1124          206 :             continue;
    1125              : 
    1126        37124 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1127              :         {
    1128        30878 :             entryGetItem(ginstate, entry, advancePast);
    1129        30878 :             if (entry->isFinished)
    1130           89 :                 continue;
    1131              :         }
    1132              : 
    1133              :         /*
    1134              :          * Normally, none of the items in additionalEntries can have a curItem
    1135              :          * larger than minItem. But if minItem is a lossy page, then there
    1136              :          * might be exact items on the same page among additionalEntries.
    1137              :          */
    1138        37035 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1139              :         {
    1140              :             Assert(ItemPointerIsLossyPage(&minItem));
    1141            0 :             minItem = entry->curItem;
    1142              :         }
    1143              :     }
    1144              : 
    1145              :     /*
    1146              :      * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
    1147              :      * and perform consistentFn test.
    1148              :      *
    1149              :      * Lossy-page entries pose a problem, since we don't know the correct
    1150              :      * entryRes state to pass to the consistentFn, and we also don't know what
    1151              :      * its combining logic will be (could be AND, OR, or even NOT). If the
    1152              :      * logic is OR then the consistentFn might succeed for all items in the
    1153              :      * lossy page even when none of the other entries match.
    1154              :      *
    1155              :      * Our strategy is to call the tri-state consistent function, with the
    1156              :      * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
    1157              :      * returns FALSE, none of the lossy items alone are enough for a match, so
    1158              :      * we don't need to return a lossy-page pointer. Otherwise, return a
    1159              :      * lossy-page pointer to indicate that the whole heap page must be
    1160              :      * checked.  (On subsequent calls, we'll do nothing until minItem is past
    1161              :      * the page altogether, thus ensuring that we never return both regular
    1162              :      * and lossy pointers for the same page.)
    1163              :      *
    1164              :      * An exception is that it doesn't matter what we pass for lossy pointers
    1165              :      * in "hidden" entries, because the consistentFn's result can't depend on
    1166              :      * them. We could pass them as MAYBE as well, but if we're using the
    1167              :      * "shim" implementation of a tri-state consistent function (see
    1168              :      * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
    1169              :      * them as true.
    1170              :      *
    1171              :      * Note that only lossy-page entries pointing to the current item's page
    1172              :      * should trigger this processing; we might have future lossy pages in the
    1173              :      * entry array, but they aren't relevant yet.
    1174              :      */
    1175       502946 :     key->curItem = minItem;
    1176       502946 :     ItemPointerSetLossyPage(&curPageLossy,
    1177              :                             GinItemPointerGetBlockNumber(&key->curItem));
    1178       502946 :     haveLossyEntry = false;
    1179      1418522 :     for (i = 0; i < key->nentries; i++)
    1180              :     {
    1181       915576 :         entry = key->scanEntry[i];
    1182      1550623 :         if (entry->isFinished == false &&
    1183       635047 :             ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1184              :         {
    1185            0 :             if (i < key->nuserentries)
    1186            0 :                 key->entryRes[i] = GIN_MAYBE;
    1187              :             else
    1188            0 :                 key->entryRes[i] = GIN_TRUE;
    1189            0 :             haveLossyEntry = true;
    1190              :         }
    1191              :         else
    1192       915576 :             key->entryRes[i] = GIN_FALSE;
    1193              :     }
    1194              : 
    1195              :     /* prepare for calling consistentFn in temp context */
    1196       502946 :     oldCtx = MemoryContextSwitchTo(tempCtx);
    1197              : 
    1198       502946 :     if (haveLossyEntry)
    1199              :     {
    1200              :         /* Have lossy-page entries, so see if whole page matches */
    1201            0 :         res = key->triConsistentFn(key);
    1202              : 
    1203            0 :         if (res == GIN_TRUE || res == GIN_MAYBE)
    1204              :         {
    1205              :             /* Yes, so clean up ... */
    1206            0 :             MemoryContextSwitchTo(oldCtx);
    1207            0 :             MemoryContextReset(tempCtx);
    1208              : 
    1209              :             /* and return lossy pointer for whole page */
    1210            0 :             key->curItem = curPageLossy;
    1211            0 :             key->curItemMatches = true;
    1212            0 :             key->recheckCurItem = true;
    1213            0 :             return;
    1214              :         }
    1215              :     }
    1216              : 
    1217              :     /*
    1218              :      * At this point we know that we don't need to return a lossy whole-page
    1219              :      * pointer, but we might have matches for individual exact item pointers,
    1220              :      * possibly in combination with a lossy pointer. Pass lossy pointers as
    1221              :      * MAYBE to the ternary consistent function, to let it decide if this
    1222              :      * tuple satisfies the overall key, even though we don't know if the lossy
    1223              :      * entries match.
    1224              :      *
    1225              :      * Prepare entryRes array to be passed to consistentFn.
    1226              :      */
    1227      1418522 :     for (i = 0; i < key->nentries; i++)
    1228              :     {
    1229       915576 :         entry = key->scanEntry[i];
    1230       915576 :         if (entry->isFinished)
    1231       280529 :             key->entryRes[i] = GIN_FALSE;
    1232              : #if 0
    1233              : 
    1234              :         /*
    1235              :          * This case can't currently happen, because we loaded all the entries
    1236              :          * for this item earlier.
    1237              :          */
    1238              :         else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1239              :             key->entryRes[i] = GIN_MAYBE;
    1240              : #endif
    1241       635047 :         else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1242            0 :             key->entryRes[i] = GIN_MAYBE;
    1243       635047 :         else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
    1244       543224 :             key->entryRes[i] = GIN_TRUE;
    1245              :         else
    1246        91823 :             key->entryRes[i] = GIN_FALSE;
    1247              :     }
    1248              : 
    1249       502946 :     res = key->triConsistentFn(key);
    1250              : 
    1251       502946 :     switch (res)
    1252              :     {
    1253       433129 :         case GIN_TRUE:
    1254       433129 :             key->curItemMatches = true;
    1255              :             /* triConsistentFn set recheckCurItem */
    1256       433129 :             break;
    1257              : 
    1258        10013 :         case GIN_FALSE:
    1259        10013 :             key->curItemMatches = false;
    1260        10013 :             break;
    1261              : 
    1262        59804 :         case GIN_MAYBE:
    1263        59804 :             key->curItemMatches = true;
    1264        59804 :             key->recheckCurItem = true;
    1265        59804 :             break;
    1266              : 
    1267            0 :         default:
    1268              : 
    1269              :             /*
    1270              :              * the 'default' case shouldn't happen, but if the consistent
    1271              :              * function returns something bogus, this is the safe result
    1272              :              */
    1273            0 :             key->curItemMatches = true;
    1274            0 :             key->recheckCurItem = true;
    1275            0 :             break;
    1276              :     }
    1277              : 
    1278              :     /*
    1279              :      * We have a tuple, and we know if it matches or not. If it's a non-match,
    1280              :      * we could continue to find the next matching tuple, but let's break out
    1281              :      * and give scanGetItem a chance to advance the other keys. They might be
    1282              :      * able to skip past to a much higher TID, allowing us to save work.
    1283              :      */
    1284              : 
    1285              :     /* clean up after consistentFn calls */
    1286       502946 :     MemoryContextSwitchTo(oldCtx);
    1287       502946 :     MemoryContextReset(tempCtx);
    1288              : }
    1289              : 
    1290              : /*
    1291              :  * Get next heap item pointer (after advancePast) from scan.
    1292              :  * Returns true if anything found.
    1293              :  * On success, *item and *recheck are set.
    1294              :  *
    1295              :  * Note: this is very nearly the same logic as in keyGetItem(), except
    1296              :  * that we know the keys are to be combined with AND logic, whereas in
    1297              :  * keyGetItem() the combination logic is known only to the consistentFn.
    1298              :  */
    1299              : static bool
    1300       490625 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
    1301              :             ItemPointerData *item, bool *recheck)
    1302              : {
    1303       490625 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1304              :     uint32      i;
    1305              :     bool        match;
    1306              : 
    1307              :     /*----------
    1308              :      * Advance the scan keys in lock-step, until we find an item that matches
    1309              :      * all the keys. If any key reports isFinished, meaning its subset of the
    1310              :      * entries is exhausted, we can stop.  Otherwise, set *item to the next
    1311              :      * matching item.
    1312              :      *
    1313              :      * This logic works only if a keyGetItem stream can never contain both
    1314              :      * exact and lossy pointers for the same page.  Else we could have a
    1315              :      * case like
    1316              :      *
    1317              :      *      stream 1        stream 2
    1318              :      *      ...             ...
    1319              :      *      42/6            42/7
    1320              :      *      50/1            42/0xffff
    1321              :      *      ...             ...
    1322              :      *
    1323              :      * We would conclude that 42/6 is not a match and advance stream 1,
    1324              :      * thus never detecting the match to the lossy pointer in stream 2.
    1325              :      * (keyGetItem has a similar problem versus entryGetItem.)
    1326              :      *----------
    1327              :      */
    1328              :     do
    1329              :     {
    1330       500662 :         CHECK_FOR_INTERRUPTS();
    1331              : 
    1332       500662 :         ItemPointerSetMin(item);
    1333       500662 :         match = true;
    1334       993601 :         for (i = 0; i < so->nkeys && match; i++)
    1335              :         {
    1336       503923 :             GinScanKey  key = so->keys + i;
    1337              : 
    1338              :             /*
    1339              :              * If we're considering a lossy page, skip excludeOnly keys. They
    1340              :              * can't exclude the whole page anyway.
    1341              :              */
    1342       503923 :             if (ItemPointerIsLossyPage(item) && key->excludeOnly)
    1343              :             {
    1344              :                 /*
    1345              :                  * ginNewScanKey() should never mark the first key as
    1346              :                  * excludeOnly.
    1347              :                  */
    1348              :                 Assert(i > 0);
    1349            0 :                 continue;
    1350              :             }
    1351              : 
    1352              :             /* Fetch the next item for this key that is > advancePast. */
    1353       503923 :             keyGetItem(&so->ginstate, so->tempCtx, key, advancePast);
    1354              : 
    1355       503923 :             if (key->isFinished)
    1356          971 :                 return false;
    1357              : 
    1358              :             /*
    1359              :              * If it's not a match, we can immediately conclude that nothing
    1360              :              * <= this item matches, without checking the rest of the keys.
    1361              :              */
    1362       502952 :             if (!key->curItemMatches)
    1363              :             {
    1364        10013 :                 advancePast = key->curItem;
    1365        10013 :                 match = false;
    1366        10013 :                 break;
    1367              :             }
    1368              : 
    1369              :             /*
    1370              :              * It's a match. We can conclude that nothing < matches, so the
    1371              :              * other key streams can skip to this item.
    1372              :              *
    1373              :              * Beware of lossy pointers, though; from a lossy pointer, we can
    1374              :              * only conclude that nothing smaller than this *block* matches.
    1375              :              */
    1376       492939 :             if (ItemPointerIsLossyPage(&key->curItem))
    1377              :             {
    1378            0 :                 if (GinItemPointerGetBlockNumber(&advancePast) <
    1379            0 :                     GinItemPointerGetBlockNumber(&key->curItem))
    1380              :                 {
    1381            0 :                     ItemPointerSet(&advancePast,
    1382            0 :                                    GinItemPointerGetBlockNumber(&key->curItem),
    1383              :                                    InvalidOffsetNumber);
    1384              :                 }
    1385              :             }
    1386              :             else
    1387              :             {
    1388              :                 Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
    1389       492939 :                 ItemPointerSet(&advancePast,
    1390       492939 :                                GinItemPointerGetBlockNumber(&key->curItem),
    1391       492939 :                                OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
    1392              :             }
    1393              : 
    1394              :             /*
    1395              :              * If this is the first key, remember this location as a potential
    1396              :              * match, and proceed to check the rest of the keys.
    1397              :              *
    1398              :              * Otherwise, check if this is the same item that we checked the
    1399              :              * previous keys for (or a lossy pointer for the same page). If
    1400              :              * not, loop back to check the previous keys for this item (we
    1401              :              * will check this key again too, but keyGetItem returns quickly
    1402              :              * for that)
    1403              :              */
    1404       492939 :             if (i == 0)
    1405              :             {
    1406       490729 :                 *item = key->curItem;
    1407              :             }
    1408              :             else
    1409              :             {
    1410         4420 :                 if (ItemPointerIsLossyPage(&key->curItem) ||
    1411         2210 :                     ItemPointerIsLossyPage(item))
    1412              :                 {
    1413              :                     Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
    1414            0 :                     match = (GinItemPointerGetBlockNumber(&key->curItem) ==
    1415            0 :                              GinItemPointerGetBlockNumber(item));
    1416              :                 }
    1417              :                 else
    1418              :                 {
    1419              :                     Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
    1420         2210 :                     match = (ginCompareItemPointers(&key->curItem, item) == 0);
    1421              :                 }
    1422              :             }
    1423              :         }
    1424       499691 :     } while (!match);
    1425              : 
    1426              :     Assert(!ItemPointerIsMin(item));
    1427              : 
    1428              :     /*
    1429              :      * Now *item contains the first ItemPointer after previous result that
    1430              :      * satisfied all the keys for that exact TID, or a lossy reference to the
    1431              :      * same page.
    1432              :      *
    1433              :      * We must return recheck = true if any of the keys are marked recheck.
    1434              :      */
    1435       489654 :     *recheck = false;
    1436       914160 :     for (i = 0; i < so->nkeys; i++)
    1437              :     {
    1438       489840 :         GinScanKey  key = so->keys + i;
    1439              : 
    1440       489840 :         if (key->recheckCurItem)
    1441              :         {
    1442        65334 :             *recheck = true;
    1443        65334 :             break;
    1444              :         }
    1445              :     }
    1446              : 
    1447       489654 :     return true;
    1448              : }
    1449              : 
    1450              : 
    1451              : /*
    1452              :  * Functions for scanning the pending list
    1453              :  */
    1454              : 
    1455              : 
    1456              : /*
    1457              :  * Get ItemPointer of next heap row to be checked from pending list.
    1458              :  * Returns false if there are no more. On pages with several heap rows
    1459              :  * it returns each row separately, on page with part of heap row returns
    1460              :  * per page data.  pos->firstOffset and pos->lastOffset are set to identify
    1461              :  * the range of pending-list tuples belonging to this heap row.
    1462              :  *
    1463              :  * The pendingBuffer is presumed pinned and share-locked on entry, and is
    1464              :  * pinned and share-locked on success exit.  On failure exit it's released.
    1465              :  */
    1466              : static bool
    1467          810 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
    1468              : {
    1469              :     OffsetNumber maxoff;
    1470              :     Page        page;
    1471              :     IndexTuple  itup;
    1472              : 
    1473          810 :     ItemPointerSetInvalid(&pos->item);
    1474              :     for (;;)
    1475              :     {
    1476          810 :         page = BufferGetPage(pos->pendingBuffer);
    1477              : 
    1478          810 :         maxoff = PageGetMaxOffsetNumber(page);
    1479          810 :         if (pos->firstOffset > maxoff)
    1480              :         {
    1481           84 :             BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
    1482              : 
    1483           84 :             if (blkno == InvalidBlockNumber)
    1484              :             {
    1485           84 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1486           84 :                 pos->pendingBuffer = InvalidBuffer;
    1487              : 
    1488           84 :                 return false;
    1489              :             }
    1490              :             else
    1491              :             {
    1492              :                 /*
    1493              :                  * Here we must prevent deletion of next page by insertcleanup
    1494              :                  * process, which may be trying to obtain exclusive lock on
    1495              :                  * current page.  So, we lock next page before releasing the
    1496              :                  * current one
    1497              :                  */
    1498            0 :                 Buffer      tmpbuf = ReadBuffer(scan->indexRelation, blkno);
    1499              : 
    1500            0 :                 LockBuffer(tmpbuf, GIN_SHARE);
    1501            0 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1502              : 
    1503            0 :                 pos->pendingBuffer = tmpbuf;
    1504            0 :                 pos->firstOffset = FirstOffsetNumber;
    1505              :             }
    1506              :         }
    1507              :         else
    1508              :         {
    1509          726 :             itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
    1510          726 :             pos->item = itup->t_tid;
    1511          726 :             if (GinPageHasFullRow(page))
    1512              :             {
    1513              :                 /*
    1514              :                  * find itempointer to the next row
    1515              :                  */
    1516         1695 :                 for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
    1517              :                 {
    1518         1611 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
    1519         1611 :                     if (!ItemPointerEquals(&pos->item, &itup->t_tid))
    1520          642 :                         break;
    1521              :                 }
    1522              :             }
    1523              :             else
    1524              :             {
    1525              :                 /*
    1526              :                  * All itempointers are the same on this page
    1527              :                  */
    1528            0 :                 pos->lastOffset = maxoff + 1;
    1529              :             }
    1530              : 
    1531              :             /*
    1532              :              * Now pos->firstOffset points to the first tuple of current heap
    1533              :              * row, pos->lastOffset points to the first tuple of next heap row
    1534              :              * (or to the end of page)
    1535              :              */
    1536          726 :             break;
    1537              :         }
    1538              :     }
    1539              : 
    1540          726 :     return true;
    1541              : }
    1542              : 
    1543              : /*
    1544              :  * Scan pending-list page from current tuple (off) up till the first of:
    1545              :  * - match is found (then returns true)
    1546              :  * - no later match is possible
    1547              :  * - tuple's attribute number is not equal to entry's attrnum
    1548              :  * - reach end of page
    1549              :  *
    1550              :  * datum[]/category[]/datumExtracted[] arrays are used to cache the results
    1551              :  * of gintuple_get_key() on the current page.
    1552              :  */
    1553              : static bool
    1554            0 : matchPartialInPendingList(GinState *ginstate, Page page,
    1555              :                           OffsetNumber off, OffsetNumber maxoff,
    1556              :                           GinScanEntry entry,
    1557              :                           Datum *datum, GinNullCategory *category,
    1558              :                           bool *datumExtracted)
    1559              : {
    1560              :     IndexTuple  itup;
    1561              :     int32       cmp;
    1562              : 
    1563              :     /* Partial match to a null is not possible */
    1564            0 :     if (entry->queryCategory != GIN_CAT_NORM_KEY)
    1565            0 :         return false;
    1566              : 
    1567            0 :     while (off < maxoff)
    1568              :     {
    1569            0 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
    1570              : 
    1571            0 :         if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
    1572            0 :             return false;
    1573              : 
    1574            0 :         if (datumExtracted[off - 1] == false)
    1575              :         {
    1576            0 :             datum[off - 1] = gintuple_get_key(ginstate, itup,
    1577            0 :                                               &category[off - 1]);
    1578            0 :             datumExtracted[off - 1] = true;
    1579              :         }
    1580              : 
    1581              :         /* Once we hit nulls, no further match is possible */
    1582            0 :         if (category[off - 1] != GIN_CAT_NORM_KEY)
    1583            0 :             return false;
    1584              : 
    1585              :         /*----------
    1586              :          * Check partial match.
    1587              :          * case cmp == 0 => match
    1588              :          * case cmp > 0 => not match and end scan (no later match possible)
    1589              :          * case cmp < 0 => not match and continue scan
    1590              :          *----------
    1591              :          */
    1592            0 :         cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
    1593            0 :                                               ginstate->supportCollation[entry->attnum - 1],
    1594              :                                               entry->queryKey,
    1595            0 :                                               datum[off - 1],
    1596            0 :                                               UInt16GetDatum(entry->strategy),
    1597            0 :                                               PointerGetDatum(entry->extra_data)));
    1598            0 :         if (cmp == 0)
    1599            0 :             return true;
    1600            0 :         else if (cmp > 0)
    1601            0 :             return false;
    1602              : 
    1603            0 :         off++;
    1604              :     }
    1605              : 
    1606            0 :     return false;
    1607              : }
    1608              : 
    1609              : /*
    1610              :  * Set up the entryRes array for each key by looking at
    1611              :  * every entry for current heap row in pending list.
    1612              :  *
    1613              :  * Returns true if each scan key has at least one entryRes match.
    1614              :  * This corresponds to the situations where the normal index search will
    1615              :  * try to apply the key's consistentFn.  (A tuple not meeting that requirement
    1616              :  * cannot be returned by the normal search since no entry stream will
    1617              :  * source its TID.)
    1618              :  *
    1619              :  * The pendingBuffer is presumed pinned and share-locked on entry.
    1620              :  */
    1621              : static bool
    1622          726 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
    1623              : {
    1624          726 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1625              :     OffsetNumber attrnum;
    1626              :     Page        page;
    1627              :     IndexTuple  itup;
    1628              :     int         i,
    1629              :                 j;
    1630              : 
    1631              :     /*
    1632              :      * Reset all entryRes and hasMatchKey flags
    1633              :      */
    1634         1968 :     for (i = 0; i < so->nkeys; i++)
    1635              :     {
    1636         1242 :         GinScanKey  key = so->keys + i;
    1637              : 
    1638         1242 :         memset(key->entryRes, GIN_FALSE, key->nentries);
    1639              :     }
    1640          726 :     memset(pos->hasMatchKey, false, so->nkeys);
    1641              : 
    1642              :     /*
    1643              :      * Outer loop iterates over multiple pending-list pages when a single heap
    1644              :      * row has entries spanning those pages.
    1645              :      */
    1646              :     for (;;)
    1647            0 :     {
    1648              :         Datum       datum[BLCKSZ / sizeof(IndexTupleData)];
    1649              :         GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
    1650              :         bool        datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
    1651              : 
    1652              :         Assert(pos->lastOffset > pos->firstOffset);
    1653          726 :         memset(datumExtracted + pos->firstOffset - 1, 0,
    1654          726 :                sizeof(bool) * (pos->lastOffset - pos->firstOffset));
    1655              : 
    1656          726 :         page = BufferGetPage(pos->pendingBuffer);
    1657              : 
    1658         1968 :         for (i = 0; i < so->nkeys; i++)
    1659              :         {
    1660         1242 :             GinScanKey  key = so->keys + i;
    1661              : 
    1662         2406 :             for (j = 0; j < key->nentries; j++)
    1663              :             {
    1664         1164 :                 GinScanEntry entry = key->scanEntry[j];
    1665         1164 :                 OffsetNumber StopLow = pos->firstOffset,
    1666         1164 :                             StopHigh = pos->lastOffset,
    1667              :                             StopMiddle;
    1668              : 
    1669              :                 /* If already matched on earlier page, do no extra work */
    1670         1164 :                 if (key->entryRes[j])
    1671            0 :                     continue;
    1672              : 
    1673              :                 /*
    1674              :                  * Interesting tuples are from pos->firstOffset to
    1675              :                  * pos->lastOffset and they are ordered by (attnum, Datum) as
    1676              :                  * it's done in entry tree.  So we can use binary search to
    1677              :                  * avoid linear scanning.
    1678              :                  */
    1679         2598 :                 while (StopLow < StopHigh)
    1680              :                 {
    1681              :                     int         res;
    1682              : 
    1683         2047 :                     StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    1684              : 
    1685         2047 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
    1686              : 
    1687         2047 :                     attrnum = gintuple_get_attrnum(&so->ginstate, itup);
    1688              : 
    1689         2047 :                     if (key->attnum < attrnum)
    1690              :                     {
    1691          399 :                         StopHigh = StopMiddle;
    1692          399 :                         continue;
    1693              :                     }
    1694         1648 :                     if (key->attnum > attrnum)
    1695              :                     {
    1696          330 :                         StopLow = StopMiddle + 1;
    1697          330 :                         continue;
    1698              :                     }
    1699              : 
    1700         1318 :                     if (datumExtracted[StopMiddle - 1] == false)
    1701              :                     {
    1702         2508 :                         datum[StopMiddle - 1] =
    1703         1254 :                             gintuple_get_key(&so->ginstate, itup,
    1704         1254 :                                              &category[StopMiddle - 1]);
    1705         1254 :                         datumExtracted[StopMiddle - 1] = true;
    1706              :                     }
    1707              : 
    1708         1318 :                     if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
    1709              :                     {
    1710              :                         /* special behavior depending on searchMode */
    1711          542 :                         if (entry->searchMode == GIN_SEARCH_MODE_ALL)
    1712              :                         {
    1713              :                             /* match anything except NULL_ITEM */
    1714          542 :                             if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
    1715          189 :                                 res = -1;
    1716              :                             else
    1717          353 :                                 res = 0;
    1718              :                         }
    1719              :                         else
    1720              :                         {
    1721              :                             /* match everything */
    1722            0 :                             res = 0;
    1723              :                         }
    1724              :                     }
    1725              :                     else
    1726              :                     {
    1727          776 :                         res = ginCompareEntries(&so->ginstate,
    1728          776 :                                                 entry->attnum,
    1729              :                                                 entry->queryKey,
    1730          776 :                                                 entry->queryCategory,
    1731          776 :                                                 datum[StopMiddle - 1],
    1732          776 :                                                 category[StopMiddle - 1]);
    1733              :                     }
    1734              : 
    1735         1318 :                     if (res == 0)
    1736              :                     {
    1737              :                         /*
    1738              :                          * Found exact match (there can be only one, except in
    1739              :                          * EMPTY_QUERY mode).
    1740              :                          *
    1741              :                          * If doing partial match, scan forward from here to
    1742              :                          * end of page to check for matches.
    1743              :                          *
    1744              :                          * See comment above about tuple's ordering.
    1745              :                          */
    1746          613 :                         if (entry->isPartialMatch)
    1747            0 :                             key->entryRes[j] =
    1748            0 :                                 matchPartialInPendingList(&so->ginstate,
    1749              :                                                           page,
    1750              :                                                           StopMiddle,
    1751            0 :                                                           pos->lastOffset,
    1752              :                                                           entry,
    1753              :                                                           datum,
    1754              :                                                           category,
    1755              :                                                           datumExtracted);
    1756              :                         else
    1757          613 :                             key->entryRes[j] = true;
    1758              : 
    1759              :                         /* done with binary search */
    1760          613 :                         break;
    1761              :                     }
    1762          705 :                     else if (res < 0)
    1763          677 :                         StopHigh = StopMiddle;
    1764              :                     else
    1765           28 :                         StopLow = StopMiddle + 1;
    1766              :                 }
    1767              : 
    1768         1164 :                 if (StopLow >= StopHigh && entry->isPartialMatch)
    1769              :                 {
    1770              :                     /*
    1771              :                      * No exact match on this page.  If doing partial match,
    1772              :                      * scan from the first tuple greater than target value to
    1773              :                      * end of page.  Note that since we don't remember whether
    1774              :                      * the comparePartialFn told us to stop early on a
    1775              :                      * previous page, we will uselessly apply comparePartialFn
    1776              :                      * to the first tuple on each subsequent page.
    1777              :                      */
    1778            0 :                     key->entryRes[j] =
    1779            0 :                         matchPartialInPendingList(&so->ginstate,
    1780              :                                                   page,
    1781              :                                                   StopHigh,
    1782            0 :                                                   pos->lastOffset,
    1783              :                                                   entry,
    1784              :                                                   datum,
    1785              :                                                   category,
    1786              :                                                   datumExtracted);
    1787              :                 }
    1788              : 
    1789         1164 :                 pos->hasMatchKey[i] |= key->entryRes[j];
    1790              :             }
    1791              :         }
    1792              : 
    1793              :         /* Advance firstOffset over the scanned tuples */
    1794          726 :         pos->firstOffset = pos->lastOffset;
    1795              : 
    1796          726 :         if (GinPageHasFullRow(page))
    1797              :         {
    1798              :             /*
    1799              :              * We have examined all pending entries for the current heap row.
    1800              :              * Break out of loop over pages.
    1801              :              */
    1802          726 :             break;
    1803              :         }
    1804              :         else
    1805              :         {
    1806              :             /*
    1807              :              * Advance to next page of pending entries for the current heap
    1808              :              * row.  Complain if there isn't one.
    1809              :              */
    1810            0 :             ItemPointerData item = pos->item;
    1811              : 
    1812            0 :             if (scanGetCandidate(scan, pos) == false ||
    1813            0 :                 !ItemPointerEquals(&pos->item, &item))
    1814            0 :                 elog(ERROR, "could not find additional pending pages for same heap tuple");
    1815              :         }
    1816              :     }
    1817              : 
    1818              :     /*
    1819              :      * All scan keys except excludeOnly require at least one entry to match.
    1820              :      * excludeOnly keys are an exception, because their implied
    1821              :      * GIN_CAT_EMPTY_QUERY scanEntry always matches.  So return "true" if all
    1822              :      * non-excludeOnly scan keys have at least one match.
    1823              :      */
    1824         1263 :     for (i = 0; i < so->nkeys; i++)
    1825              :     {
    1826          984 :         if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
    1827          447 :             return false;
    1828              :     }
    1829              : 
    1830          279 :     return true;
    1831              : }
    1832              : 
    1833              : /*
    1834              :  * Collect all matched rows from pending list into bitmap.
    1835              :  */
    1836              : static void
    1837          971 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
    1838              : {
    1839          971 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1840              :     MemoryContext oldCtx;
    1841              :     bool        recheck,
    1842              :                 match;
    1843              :     int         i;
    1844              :     pendingPosition pos;
    1845          971 :     Buffer      metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
    1846              :     Page        page;
    1847              :     BlockNumber blkno;
    1848              : 
    1849          971 :     *ntids = 0;
    1850              : 
    1851              :     /*
    1852              :      * Acquire predicate lock on the metapage, to conflict with any fastupdate
    1853              :      * insertions.
    1854              :      */
    1855          971 :     PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
    1856              : 
    1857          971 :     LockBuffer(metabuffer, GIN_SHARE);
    1858          971 :     page = BufferGetPage(metabuffer);
    1859          971 :     blkno = GinPageGetMeta(page)->head;
    1860              : 
    1861              :     /*
    1862              :      * fetch head of list before unlocking metapage. head page must be pinned
    1863              :      * to prevent deletion by vacuum process
    1864              :      */
    1865          971 :     if (blkno == InvalidBlockNumber)
    1866              :     {
    1867              :         /* No pending list, so proceed with normal scan */
    1868          887 :         UnlockReleaseBuffer(metabuffer);
    1869          887 :         return;
    1870              :     }
    1871              : 
    1872           84 :     pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
    1873           84 :     LockBuffer(pos.pendingBuffer, GIN_SHARE);
    1874           84 :     pos.firstOffset = FirstOffsetNumber;
    1875           84 :     UnlockReleaseBuffer(metabuffer);
    1876           84 :     pos.hasMatchKey = palloc_array(bool, so->nkeys);
    1877              : 
    1878              :     /*
    1879              :      * loop for each heap row. scanGetCandidate returns full row or row's
    1880              :      * tuples from first page.
    1881              :      */
    1882          810 :     while (scanGetCandidate(scan, &pos))
    1883              :     {
    1884              :         /*
    1885              :          * Check entries in tuple and set up entryRes array.
    1886              :          *
    1887              :          * If pending tuples belonging to the current heap row are spread
    1888              :          * across several pages, collectMatchesForHeapRow will read all of
    1889              :          * those pages.
    1890              :          */
    1891          726 :         if (!collectMatchesForHeapRow(scan, &pos))
    1892          447 :             continue;
    1893              : 
    1894              :         /*
    1895              :          * Matching of entries of one row is finished, so check row using
    1896              :          * consistent functions.
    1897              :          */
    1898          279 :         oldCtx = MemoryContextSwitchTo(so->tempCtx);
    1899          279 :         recheck = false;
    1900          279 :         match = true;
    1901              : 
    1902          706 :         for (i = 0; i < so->nkeys; i++)
    1903              :         {
    1904          429 :             GinScanKey  key = so->keys + i;
    1905              : 
    1906          429 :             if (!key->boolConsistentFn(key))
    1907              :             {
    1908            2 :                 match = false;
    1909            2 :                 break;
    1910              :             }
    1911          427 :             recheck |= key->recheckCurItem;
    1912              :         }
    1913              : 
    1914          279 :         MemoryContextSwitchTo(oldCtx);
    1915          279 :         MemoryContextReset(so->tempCtx);
    1916              : 
    1917          279 :         if (match)
    1918              :         {
    1919          277 :             tbm_add_tuples(tbm, &pos.item, 1, recheck);
    1920          277 :             (*ntids)++;
    1921              :         }
    1922              :     }
    1923              : 
    1924           84 :     pfree(pos.hasMatchKey);
    1925              : }
    1926              : 
    1927              : 
    1928              : #define GinIsVoidRes(s)     ( ((GinScanOpaque) scan->opaque)->isVoidRes )
    1929              : 
    1930              : int64
    1931          977 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
    1932              : {
    1933          977 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1934              :     int64       ntids;
    1935              :     ItemPointerData iptr;
    1936              :     bool        recheck;
    1937              : 
    1938              :     /*
    1939              :      * Set up the scan keys, and check for unsatisfiable query.
    1940              :      */
    1941          977 :     ginFreeScanKeys(so);        /* there should be no keys yet, but just to be
    1942              :                                  * sure */
    1943          977 :     ginNewScanKey(scan);
    1944              : 
    1945          977 :     if (GinIsVoidRes(scan))
    1946            6 :         return 0;
    1947              : 
    1948          971 :     ntids = 0;
    1949              : 
    1950              :     /*
    1951              :      * First, scan the pending list and collect any matching entries into the
    1952              :      * bitmap.  After we scan a pending item, some other backend could post it
    1953              :      * into the main index, and so we might visit it a second time during the
    1954              :      * main scan.  This is okay because we'll just re-set the same bit in the
    1955              :      * bitmap.  (The possibility of duplicate visits is a major reason why GIN
    1956              :      * can't support the amgettuple API, however.) Note that it would not do
    1957              :      * to scan the main index before the pending list, since concurrent
    1958              :      * cleanup could then make us miss entries entirely.
    1959              :      */
    1960          971 :     scanPendingInsert(scan, tbm, &ntids);
    1961              : 
    1962              :     /*
    1963              :      * Now scan the main index.
    1964              :      */
    1965          971 :     startScan(scan);
    1966              : 
    1967          971 :     ItemPointerSetMin(&iptr);
    1968              : 
    1969              :     for (;;)
    1970              :     {
    1971       490625 :         if (!scanGetItem(scan, iptr, &iptr, &recheck))
    1972          971 :             break;
    1973              : 
    1974       489654 :         if (ItemPointerIsLossyPage(&iptr))
    1975            0 :             tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
    1976              :         else
    1977       489654 :             tbm_add_tuples(tbm, &iptr, 1, recheck);
    1978       489654 :         ntids++;
    1979              :     }
    1980              : 
    1981          971 :     return ntids;
    1982              : }
        

Generated by: LCOV version 2.0-1