Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nbtreadpage.c
4 : * Leaf page reading for btree index scans.
5 : *
6 : * NOTES
7 : * This file contains code to return items that satisfy the scan's
8 : * search-type scan keys within caller-supplied btree leaf page.
9 : *
10 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
11 : * Portions Copyright (c) 1994, Regents of the University of California
12 : *
13 : * IDENTIFICATION
14 : * src/backend/access/nbtree/nbtreadpage.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 :
19 : #include "postgres.h"
20 :
21 : #include "access/nbtree.h"
22 : #include "access/relscan.h"
23 : #include "storage/predicate.h"
24 : #include "utils/datum.h"
25 : #include "utils/rel.h"
26 :
27 :
28 : /*
29 : * _bt_readpage state used across _bt_checkkeys calls for a page
30 : */
31 : typedef struct BTReadPageState
32 : {
33 : /* Input parameters, set by _bt_readpage for _bt_checkkeys */
34 : ScanDirection dir; /* current scan direction */
35 : OffsetNumber minoff; /* Lowest non-pivot tuple's offset */
36 : OffsetNumber maxoff; /* Highest non-pivot tuple's offset */
37 : IndexTuple finaltup; /* Needed by scans with array keys */
38 : Page page; /* Page being read */
39 : bool firstpage; /* page is first for primitive scan? */
40 : bool forcenonrequired; /* treat all keys as nonrequired? */
41 : int startikey; /* start comparisons from this scan key */
42 :
43 : /* Per-tuple input parameters, set by _bt_readpage for _bt_checkkeys */
44 : OffsetNumber offnum; /* current tuple's page offset number */
45 :
46 : /* Output parameters, set by _bt_checkkeys for _bt_readpage */
47 : OffsetNumber skip; /* Array keys "look ahead" skip offnum */
48 : bool continuescan; /* Terminate ongoing (primitive) index scan? */
49 :
50 : /*
51 : * Private _bt_checkkeys state used to manage "look ahead" optimization
52 : * and primscan scheduling (only used during scans with array keys)
53 : */
54 : int16 rechecks;
55 : int16 targetdistance;
56 : int16 nskipadvances;
57 :
58 : } BTReadPageState;
59 :
60 :
61 : static void _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate);
62 : static bool _bt_scanbehind_checkkeys(IndexScanDesc scan, ScanDirection dir,
63 : IndexTuple finaltup);
64 : static bool _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
65 : IndexTuple finaltup);
66 : static void _bt_saveitem(BTScanOpaque so, int itemIndex,
67 : OffsetNumber offnum, IndexTuple itup);
68 : static int _bt_setuppostingitems(BTScanOpaque so, int itemIndex,
69 : OffsetNumber offnum, const ItemPointerData *heapTid,
70 : IndexTuple itup);
71 : static inline void _bt_savepostingitem(BTScanOpaque so, int itemIndex,
72 : OffsetNumber offnum,
73 : ItemPointer heapTid, int tupleOffset);
74 : static bool _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys,
75 : IndexTuple tuple, int tupnatts);
76 : static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
77 : IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
78 : bool advancenonrequired, bool forcenonrequired,
79 : bool *continuescan, int *ikey);
80 : static bool _bt_check_rowcompare(ScanKey header,
81 : IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
82 : ScanDirection dir, bool forcenonrequired, bool *continuescan);
83 : static bool _bt_rowcompare_cmpresult(ScanKey subkey, int cmpresult);
84 : static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir,
85 : IndexTuple tuple, TupleDesc tupdesc, int tupnatts,
86 : bool readpagetup, int sktrig, bool *scanBehind);
87 : static void _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
88 : int tupnatts, TupleDesc tupdesc);
89 : static bool _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
90 : IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
91 : int sktrig, bool sktrig_required);
92 : static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
93 : bool *skip_array_set);
94 : static bool _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array);
95 : static bool _bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *array);
96 : static void _bt_array_set_low_or_high(Relation rel, ScanKey skey,
97 : BTArrayKeyInfo *array, bool low_not_high);
98 : static void _bt_skiparray_set_element(Relation rel, ScanKey skey, BTArrayKeyInfo *array,
99 : int32 set_elem_result, Datum tupdatum, bool tupnull);
100 : static void _bt_skiparray_set_isnull(Relation rel, ScanKey skey, BTArrayKeyInfo *array);
101 : static inline int32 _bt_compare_array_skey(FmgrInfo *orderproc,
102 : Datum tupdatum, bool tupnull,
103 : Datum arrdatum, ScanKey cur);
104 : static void _bt_binsrch_skiparray_skey(bool cur_elem_trig, ScanDirection dir,
105 : Datum tupdatum, bool tupnull,
106 : BTArrayKeyInfo *array, ScanKey cur,
107 : int32 *set_elem_result);
108 : #ifdef USE_ASSERT_CHECKING
109 : static bool _bt_verify_keys_with_arraykeys(IndexScanDesc scan);
110 : #endif
111 :
112 :
113 : /*
114 : * _bt_readpage() -- Load data from current index page into so->currPos
115 : *
116 : * Caller must have pinned and read-locked so->currPos.buf; the buffer's state
117 : * is not changed here. Also, currPos.moreLeft and moreRight must be valid;
118 : * they are updated as appropriate. All other fields of so->currPos are
119 : * initialized from scratch here.
120 : *
121 : * We scan the current page starting at offnum and moving in the indicated
122 : * direction. All items matching the scan keys are loaded into currPos.items.
123 : * moreLeft or moreRight (as appropriate) is cleared if _bt_checkkeys reports
124 : * that there can be no more matching tuples in the current scan direction
125 : * (could just be for the current primitive index scan when scan has arrays).
126 : *
127 : * In the case of a parallel scan, caller must have called _bt_parallel_seize
128 : * prior to calling this function; this function will invoke
129 : * _bt_parallel_release before returning.
130 : *
131 : * Returns true if any matching items found on the page, false if none.
132 : */
133 : bool
134 16172132 : _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
135 : bool firstpage)
136 : {
137 16172132 : Relation rel = scan->indexRelation;
138 16172132 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
139 : Page page;
140 : BTPageOpaque opaque;
141 : OffsetNumber minoff;
142 : OffsetNumber maxoff;
143 : BTReadPageState pstate;
144 : bool arrayKeys,
145 16172132 : ignore_killed_tuples = scan->ignore_killed_tuples;
146 : int itemIndex,
147 : indnatts;
148 :
149 : /* save the page/buffer block number, along with its sibling links */
150 16172132 : page = BufferGetPage(so->currPos.buf);
151 16172132 : opaque = BTPageGetOpaque(page);
152 16172132 : so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf);
153 16172132 : so->currPos.prevPage = opaque->btpo_prev;
154 16172132 : so->currPos.nextPage = opaque->btpo_next;
155 : /* delay setting so->currPos.lsn until _bt_drop_lock_and_maybe_pin */
156 16172132 : pstate.dir = so->currPos.dir = dir;
157 16172132 : so->currPos.nextTupleOffset = 0;
158 :
159 : /* either moreRight or moreLeft should be set now (may be unset later) */
160 : Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
161 : so->currPos.moreLeft);
162 : Assert(!P_IGNORE(opaque));
163 : Assert(BTScanPosIsPinned(so->currPos));
164 : Assert(!so->needPrimScan);
165 :
166 : /* initialize local variables */
167 16172132 : indnatts = IndexRelationGetNumberOfAttributes(rel);
168 16172132 : arrayKeys = so->numArrayKeys != 0;
169 16172132 : minoff = P_FIRSTDATAKEY(opaque);
170 16172132 : maxoff = PageGetMaxOffsetNumber(page);
171 :
172 : /* initialize page-level state that we'll pass to _bt_checkkeys */
173 16172132 : pstate.minoff = minoff;
174 16172132 : pstate.maxoff = maxoff;
175 16172132 : pstate.finaltup = NULL;
176 16172132 : pstate.page = page;
177 16172132 : pstate.firstpage = firstpage;
178 16172132 : pstate.forcenonrequired = false;
179 16172132 : pstate.startikey = 0;
180 16172132 : pstate.offnum = InvalidOffsetNumber;
181 16172132 : pstate.skip = InvalidOffsetNumber;
182 16172132 : pstate.continuescan = true; /* default assumption */
183 16172132 : pstate.rechecks = 0;
184 16172132 : pstate.targetdistance = 0;
185 16172132 : pstate.nskipadvances = 0;
186 :
187 16172132 : if (scan->parallel_scan)
188 : {
189 : /* allow next/prev page to be read by other worker without delay */
190 1336 : if (ScanDirectionIsForward(dir))
191 1336 : _bt_parallel_release(scan, so->currPos.nextPage,
192 : so->currPos.currPage);
193 : else
194 0 : _bt_parallel_release(scan, so->currPos.prevPage,
195 : so->currPos.currPage);
196 : }
197 :
198 16172132 : PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot);
199 :
200 16172132 : if (ScanDirectionIsForward(dir))
201 : {
202 : /* SK_SEARCHARRAY forward scans must provide high key up front */
203 16115148 : if (arrayKeys)
204 : {
205 91774 : if (!P_RIGHTMOST(opaque))
206 : {
207 29180 : ItemId iid = PageGetItemId(page, P_HIKEY);
208 :
209 29180 : pstate.finaltup = (IndexTuple) PageGetItem(page, iid);
210 :
211 29180 : if (unlikely(so->scanBehind) &&
212 2570 : !_bt_scanbehind_checkkeys(scan, dir, pstate.finaltup))
213 : {
214 : /* Schedule another primitive index scan after all */
215 406 : so->currPos.moreRight = false;
216 406 : so->needPrimScan = true;
217 406 : if (scan->parallel_scan)
218 0 : _bt_parallel_primscan_schedule(scan,
219 : so->currPos.currPage);
220 406 : return false;
221 : }
222 : }
223 :
224 91368 : so->scanBehind = so->oppositeDirCheck = false; /* reset */
225 : }
226 :
227 : /*
228 : * Consider pstate.startikey optimization once the ongoing primitive
229 : * index scan has already read at least one page
230 : */
231 16114742 : if (!pstate.firstpage && minoff < maxoff)
232 29124 : _bt_set_startikey(scan, &pstate);
233 :
234 : /* load items[] in ascending order */
235 16114742 : itemIndex = 0;
236 :
237 16114742 : offnum = Max(offnum, minoff);
238 :
239 61368636 : while (offnum <= maxoff)
240 : {
241 58021604 : ItemId iid = PageGetItemId(page, offnum);
242 : IndexTuple itup;
243 : bool passes_quals;
244 :
245 : /*
246 : * If the scan specifies not to return killed tuples, then we
247 : * treat a killed tuple as not passing the qual
248 : */
249 58021604 : if (ignore_killed_tuples && ItemIdIsDead(iid))
250 : {
251 4558534 : offnum = OffsetNumberNext(offnum);
252 4558534 : continue;
253 : }
254 :
255 53463070 : itup = (IndexTuple) PageGetItem(page, iid);
256 : Assert(!BTreeTupleIsPivot(itup));
257 :
258 53463070 : pstate.offnum = offnum;
259 53463070 : passes_quals = _bt_checkkeys(scan, &pstate, arrayKeys,
260 : itup, indnatts);
261 :
262 : /*
263 : * Check if we need to skip ahead to a later tuple (only possible
264 : * when the scan uses array keys)
265 : */
266 53463070 : if (arrayKeys && OffsetNumberIsValid(pstate.skip))
267 : {
268 : Assert(!passes_quals && pstate.continuescan);
269 : Assert(offnum < pstate.skip);
270 : Assert(!pstate.forcenonrequired);
271 :
272 4572 : offnum = pstate.skip;
273 4572 : pstate.skip = InvalidOffsetNumber;
274 4572 : continue;
275 : }
276 :
277 53458498 : if (passes_quals)
278 : {
279 : /* tuple passes all scan key conditions */
280 40143346 : if (!BTreeTupleIsPosting(itup))
281 : {
282 : /* Remember it */
283 39608256 : _bt_saveitem(so, itemIndex, offnum, itup);
284 39608256 : itemIndex++;
285 : }
286 : else
287 : {
288 : int tupleOffset;
289 :
290 : /* Set up posting list state (and remember first TID) */
291 : tupleOffset =
292 535090 : _bt_setuppostingitems(so, itemIndex, offnum,
293 535090 : BTreeTupleGetPostingN(itup, 0),
294 : itup);
295 535090 : itemIndex++;
296 :
297 : /* Remember all later TIDs (must be at least one) */
298 2996346 : for (int i = 1; i < BTreeTupleGetNPosting(itup); i++)
299 : {
300 2461256 : _bt_savepostingitem(so, itemIndex, offnum,
301 : BTreeTupleGetPostingN(itup, i),
302 : tupleOffset);
303 2461256 : itemIndex++;
304 : }
305 : }
306 : }
307 : /* When !continuescan, there can't be any more matches, so stop */
308 53458498 : if (!pstate.continuescan)
309 12767710 : break;
310 :
311 40690788 : offnum = OffsetNumberNext(offnum);
312 : }
313 :
314 : /*
315 : * We don't need to visit page to the right when the high key
316 : * indicates that no more matches will be found there.
317 : *
318 : * Checking the high key like this works out more often than you might
319 : * think. Leaf page splits pick a split point between the two most
320 : * dissimilar tuples (this is weighed against the need to evenly share
321 : * free space). Leaf pages with high key attribute values that can
322 : * only appear on non-pivot tuples on the right sibling page are
323 : * common.
324 : */
325 16114742 : if (pstate.continuescan && !so->scanBehind && !P_RIGHTMOST(opaque))
326 : {
327 131718 : ItemId iid = PageGetItemId(page, P_HIKEY);
328 131718 : IndexTuple itup = (IndexTuple) PageGetItem(page, iid);
329 : int truncatt;
330 :
331 : /* Reset arrays, per _bt_set_startikey contract */
332 131718 : if (pstate.forcenonrequired)
333 2142 : _bt_start_array_keys(scan, dir);
334 131718 : pstate.forcenonrequired = false;
335 131718 : pstate.startikey = 0; /* _bt_set_startikey ignores P_HIKEY */
336 :
337 131718 : truncatt = BTreeTupleGetNAtts(itup, rel);
338 131718 : _bt_checkkeys(scan, &pstate, arrayKeys, itup, truncatt);
339 : }
340 :
341 16114742 : if (!pstate.continuescan)
342 12849436 : so->currPos.moreRight = false;
343 :
344 : Assert(itemIndex <= MaxTIDsPerBTreePage);
345 16114742 : so->currPos.firstItem = 0;
346 16114742 : so->currPos.lastItem = itemIndex - 1;
347 16114742 : so->currPos.itemIndex = 0;
348 : }
349 : else
350 : {
351 : /* SK_SEARCHARRAY backward scans must provide final tuple up front */
352 56984 : if (arrayKeys)
353 : {
354 78 : if (minoff <= maxoff && !P_LEFTMOST(opaque))
355 : {
356 60 : ItemId iid = PageGetItemId(page, minoff);
357 :
358 60 : pstate.finaltup = (IndexTuple) PageGetItem(page, iid);
359 :
360 60 : if (unlikely(so->scanBehind) &&
361 12 : !_bt_scanbehind_checkkeys(scan, dir, pstate.finaltup))
362 : {
363 : /* Schedule another primitive index scan after all */
364 6 : so->currPos.moreLeft = false;
365 6 : so->needPrimScan = true;
366 6 : if (scan->parallel_scan)
367 0 : _bt_parallel_primscan_schedule(scan,
368 : so->currPos.currPage);
369 6 : return false;
370 : }
371 : }
372 :
373 72 : so->scanBehind = so->oppositeDirCheck = false; /* reset */
374 : }
375 :
376 : /*
377 : * Consider pstate.startikey optimization once the ongoing primitive
378 : * index scan has already read at least one page
379 : */
380 56978 : if (!pstate.firstpage && minoff < maxoff)
381 166 : _bt_set_startikey(scan, &pstate);
382 :
383 : /* load items[] in descending order */
384 56978 : itemIndex = MaxTIDsPerBTreePage;
385 :
386 56978 : offnum = Min(offnum, maxoff);
387 :
388 9231266 : while (offnum >= minoff)
389 : {
390 9174436 : ItemId iid = PageGetItemId(page, offnum);
391 : IndexTuple itup;
392 : bool tuple_alive;
393 : bool passes_quals;
394 :
395 : /*
396 : * If the scan specifies not to return killed tuples, then we
397 : * treat a killed tuple as not passing the qual. Most of the
398 : * time, it's a win to not bother examining the tuple's index
399 : * keys, but just skip to the next tuple (previous, actually,
400 : * since we're scanning backwards). However, if this is the first
401 : * tuple on the page, we do check the index keys, to prevent
402 : * uselessly advancing to the page to the left. This is similar
403 : * to the high key optimization used by forward scans.
404 : */
405 9174436 : if (ignore_killed_tuples && ItemIdIsDead(iid))
406 : {
407 401838 : if (offnum > minoff)
408 : {
409 401100 : offnum = OffsetNumberPrev(offnum);
410 401100 : continue;
411 : }
412 :
413 738 : tuple_alive = false;
414 : }
415 : else
416 8772598 : tuple_alive = true;
417 :
418 8773336 : itup = (IndexTuple) PageGetItem(page, iid);
419 : Assert(!BTreeTupleIsPivot(itup));
420 :
421 8773336 : pstate.offnum = offnum;
422 8773336 : if (arrayKeys && offnum == minoff && pstate.forcenonrequired)
423 : {
424 : /* Reset arrays, per _bt_set_startikey contract */
425 6 : pstate.forcenonrequired = false;
426 6 : pstate.startikey = 0;
427 6 : _bt_start_array_keys(scan, dir);
428 : }
429 8773336 : passes_quals = _bt_checkkeys(scan, &pstate, arrayKeys,
430 : itup, indnatts);
431 :
432 8773336 : if (arrayKeys && so->scanBehind)
433 : {
434 : /*
435 : * Done scanning this page, but not done with the current
436 : * primscan.
437 : *
438 : * Note: Forward scans don't check this explicitly, since they
439 : * prefer to reuse pstate.skip for this instead.
440 : */
441 : Assert(!passes_quals && pstate.continuescan);
442 : Assert(!pstate.forcenonrequired);
443 :
444 18 : break;
445 : }
446 :
447 : /*
448 : * Check if we need to skip ahead to a later tuple (only possible
449 : * when the scan uses array keys)
450 : */
451 8773318 : if (arrayKeys && OffsetNumberIsValid(pstate.skip))
452 : {
453 : Assert(!passes_quals && pstate.continuescan);
454 : Assert(offnum > pstate.skip);
455 : Assert(!pstate.forcenonrequired);
456 :
457 36 : offnum = pstate.skip;
458 36 : pstate.skip = InvalidOffsetNumber;
459 36 : continue;
460 : }
461 :
462 8773282 : if (passes_quals && tuple_alive)
463 : {
464 : /* tuple passes all scan key conditions */
465 8770560 : if (!BTreeTupleIsPosting(itup))
466 : {
467 : /* Remember it */
468 8727286 : itemIndex--;
469 8727286 : _bt_saveitem(so, itemIndex, offnum, itup);
470 : }
471 : else
472 : {
473 43274 : uint16 nitems = BTreeTupleGetNPosting(itup);
474 : int tupleOffset;
475 :
476 : /* Set up posting list state (and remember last TID) */
477 43274 : itemIndex--;
478 : tupleOffset =
479 43274 : _bt_setuppostingitems(so, itemIndex, offnum,
480 43274 : BTreeTupleGetPostingN(itup, nitems - 1),
481 : itup);
482 :
483 : /* Remember all prior TIDs (must be at least one) */
484 166476 : for (int i = nitems - 2; i >= 0; i--)
485 : {
486 123202 : itemIndex--;
487 123202 : _bt_savepostingitem(so, itemIndex, offnum,
488 : BTreeTupleGetPostingN(itup, i),
489 : tupleOffset);
490 : }
491 : }
492 : }
493 : /* When !continuescan, there can't be any more matches, so stop */
494 8773282 : if (!pstate.continuescan)
495 130 : break;
496 :
497 8773152 : offnum = OffsetNumberPrev(offnum);
498 : }
499 :
500 : /*
501 : * We don't need to visit page to the left when no more matches will
502 : * be found there
503 : */
504 56978 : if (!pstate.continuescan)
505 130 : so->currPos.moreLeft = false;
506 :
507 : Assert(itemIndex >= 0);
508 56978 : so->currPos.firstItem = itemIndex;
509 56978 : so->currPos.lastItem = MaxTIDsPerBTreePage - 1;
510 56978 : so->currPos.itemIndex = MaxTIDsPerBTreePage - 1;
511 : }
512 :
513 : /*
514 : * If _bt_set_startikey told us to temporarily treat the scan's keys as
515 : * nonrequired (possible only during scans with array keys), there must be
516 : * no lasting consequences for the scan's array keys. The scan's arrays
517 : * should now have exactly the same elements as they would have had if the
518 : * nonrequired behavior had never been used. (In general, a scan's arrays
519 : * are expected to track its progress through the index's key space.)
520 : *
521 : * We are required (by _bt_set_startikey) to call _bt_checkkeys against
522 : * pstate.finaltup with pstate.forcenonrequired=false to allow the scan's
523 : * arrays to recover. Assert that that step hasn't been missed.
524 : */
525 : Assert(!pstate.forcenonrequired);
526 :
527 16171720 : return (so->currPos.firstItem <= so->currPos.lastItem);
528 : }
529 :
530 : /*
531 : * _bt_start_array_keys() -- Initialize array keys at start of a scan
532 : *
533 : * Set up the cur_elem counters and fill in the first sk_argument value for
534 : * each array scankey.
535 : */
536 : void
537 81760 : _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
538 : {
539 81760 : Relation rel = scan->indexRelation;
540 81760 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
541 :
542 : Assert(so->numArrayKeys);
543 : Assert(so->qual_ok);
544 :
545 164140 : for (int i = 0; i < so->numArrayKeys; i++)
546 : {
547 82380 : BTArrayKeyInfo *array = &so->arrayKeys[i];
548 82380 : ScanKey skey = &so->keyData[array->scan_key];
549 :
550 : Assert(skey->sk_flags & SK_SEARCHARRAY);
551 :
552 82380 : _bt_array_set_low_or_high(rel, skey, array,
553 : ScanDirectionIsForward(dir));
554 : }
555 81760 : so->scanBehind = so->oppositeDirCheck = false; /* reset */
556 81760 : }
557 :
558 : /*
559 : * Determines an offset to the first scan key (an so->keyData[]-wise offset)
560 : * that is _not_ guaranteed to be satisfied by every tuple from pstate.page,
561 : * which is set in pstate.startikey for _bt_checkkeys calls for the page.
562 : * This allows caller to save cycles on comparisons of a prefix of keys while
563 : * reading pstate.page.
564 : *
565 : * Also determines if later calls to _bt_checkkeys (for pstate.page) should be
566 : * forced to treat all required scan keys >= pstate.startikey as nonrequired
567 : * (that is, if they're to be treated as if any SK_BT_REQFWD/SK_BT_REQBKWD
568 : * markings that were set by preprocessing were not set at all, for the
569 : * duration of _bt_checkkeys calls prior to the call for pstate.finaltup).
570 : * This is indicated to caller by setting pstate.forcenonrequired.
571 : *
572 : * Call here at the start of reading a leaf page beyond the first one for the
573 : * primitive index scan. We consider all non-pivot tuples, so it doesn't make
574 : * sense to call here when only a subset of those tuples can ever be read.
575 : * This is also a good idea on performance grounds; not calling here when on
576 : * the first page (first for the current primitive scan) avoids wasting cycles
577 : * during selective point queries. They typically don't stand to gain as much
578 : * when we can set pstate.startikey, and are likely to notice the overhead of
579 : * calling here. (Also, allowing pstate.forcenonrequired to be set on a
580 : * primscan's first page would mislead _bt_advance_array_keys, which expects
581 : * pstate.nskipadvances to be representative of every first page's key space.)
582 : *
583 : * Caller must call _bt_start_array_keys and reset startikey/forcenonrequired
584 : * ahead of the finaltup _bt_checkkeys call when we set forcenonrequired=true.
585 : * This will give _bt_checkkeys the opportunity to call _bt_advance_array_keys
586 : * with sktrig_required=true, restoring the invariant that the scan's required
587 : * arrays always track the scan's progress through the index's key space.
588 : * Caller won't need to do this on the rightmost/leftmost page in the index
589 : * (where pstate.finaltup isn't ever set), since forcenonrequired will never
590 : * be set here in the first place.
591 : */
592 : static void
593 29290 : _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate)
594 : {
595 29290 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
596 29290 : Relation rel = scan->indexRelation;
597 29290 : TupleDesc tupdesc = RelationGetDescr(rel);
598 : ItemId iid;
599 : IndexTuple firsttup,
600 : lasttup;
601 29290 : int startikey = 0,
602 29290 : arrayidx = 0,
603 : firstchangingattnum;
604 29290 : bool start_past_saop_eq = false;
605 :
606 : Assert(!so->scanBehind);
607 : Assert(pstate->minoff < pstate->maxoff);
608 : Assert(!pstate->firstpage);
609 : Assert(pstate->startikey == 0);
610 : Assert(!so->numArrayKeys || pstate->finaltup ||
611 : P_RIGHTMOST(BTPageGetOpaque(pstate->page)) ||
612 : P_LEFTMOST(BTPageGetOpaque(pstate->page)));
613 :
614 29290 : if (so->numberOfKeys == 0)
615 10794 : return;
616 :
617 : /* minoff is an offset to the lowest non-pivot tuple on the page */
618 18496 : iid = PageGetItemId(pstate->page, pstate->minoff);
619 18496 : firsttup = (IndexTuple) PageGetItem(pstate->page, iid);
620 :
621 : /* maxoff is an offset to the highest non-pivot tuple on the page */
622 18496 : iid = PageGetItemId(pstate->page, pstate->maxoff);
623 18496 : lasttup = (IndexTuple) PageGetItem(pstate->page, iid);
624 :
625 : /* Determine the first attribute whose values change on caller's page */
626 18496 : firstchangingattnum = _bt_keep_natts_fast(rel, firsttup, lasttup);
627 :
628 27734 : for (; startikey < so->numberOfKeys; startikey++)
629 : {
630 21310 : ScanKey key = so->keyData + startikey;
631 : BTArrayKeyInfo *array;
632 : Datum firstdatum,
633 : lastdatum;
634 : bool firstnull,
635 : lastnull;
636 : int32 result;
637 :
638 : /*
639 : * Determine if it's safe to set pstate.startikey to an offset to a
640 : * key that comes after this key, by examining this key
641 : */
642 21310 : if (key->sk_flags & SK_ROW_HEADER)
643 : {
644 : /* RowCompare inequality (header key) */
645 0 : ScanKey subkey = (ScanKey) DatumGetPointer(key->sk_argument);
646 0 : bool satisfied = false;
647 :
648 : for (;;)
649 0 : {
650 : int cmpresult;
651 0 : bool firstsatisfies = false;
652 :
653 0 : if (subkey->sk_attno > firstchangingattnum) /* >, not >= */
654 0 : break; /* unsafe, preceding attr has multiple
655 : * distinct values */
656 :
657 0 : if (subkey->sk_flags & SK_ISNULL)
658 0 : break; /* unsafe, unsatisfiable NULL subkey arg */
659 :
660 0 : firstdatum = index_getattr(firsttup, subkey->sk_attno,
661 : tupdesc, &firstnull);
662 0 : lastdatum = index_getattr(lasttup, subkey->sk_attno,
663 : tupdesc, &lastnull);
664 :
665 0 : if (firstnull || lastnull)
666 : break; /* unsafe, NULL value won't satisfy subkey */
667 :
668 : /*
669 : * Compare the first tuple's datum for this row compare member
670 : */
671 0 : cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
672 : subkey->sk_collation,
673 : firstdatum,
674 : subkey->sk_argument));
675 0 : if (subkey->sk_flags & SK_BT_DESC)
676 0 : INVERT_COMPARE_RESULT(cmpresult);
677 :
678 0 : if (cmpresult != 0 || (subkey->sk_flags & SK_ROW_END))
679 : {
680 0 : firstsatisfies = _bt_rowcompare_cmpresult(subkey,
681 : cmpresult);
682 0 : if (!firstsatisfies)
683 : {
684 : /* Unsafe, firstdatum does not satisfy subkey */
685 0 : break;
686 : }
687 : }
688 :
689 : /*
690 : * Compare the last tuple's datum for this row compare member
691 : */
692 0 : cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
693 : subkey->sk_collation,
694 : lastdatum,
695 : subkey->sk_argument));
696 0 : if (subkey->sk_flags & SK_BT_DESC)
697 0 : INVERT_COMPARE_RESULT(cmpresult);
698 :
699 0 : if (cmpresult != 0 || (subkey->sk_flags & SK_ROW_END))
700 : {
701 0 : if (!firstsatisfies)
702 : {
703 : /*
704 : * It's only safe to set startikey beyond the row
705 : * compare header key when both firsttup and lasttup
706 : * satisfy the key as a whole based on the same
707 : * deciding subkey/attribute. That can't happen now.
708 : */
709 0 : break; /* unsafe */
710 : }
711 :
712 0 : satisfied = _bt_rowcompare_cmpresult(subkey, cmpresult);
713 0 : break; /* safe iff 'satisfied' is true */
714 : }
715 :
716 : /* Move on to next row member/subkey */
717 0 : if (subkey->sk_flags & SK_ROW_END)
718 0 : break; /* defensive */
719 0 : subkey++;
720 :
721 : /*
722 : * We deliberately don't check if the next subkey has the same
723 : * strategy as this iteration's subkey (which happens when
724 : * subkeys for both ASC and DESC columns are used together),
725 : * nor if any subkey is marked required. This is safe because
726 : * in general all prior index attributes must have only one
727 : * distinct value (across all of the tuples on the page) in
728 : * order for us to even consider any subkey's attribute.
729 : */
730 : }
731 :
732 0 : if (satisfied)
733 : {
734 : /* Safe, row compare satisfied by every tuple on page */
735 8992 : continue;
736 : }
737 :
738 12072 : break; /* unsafe */
739 : }
740 21310 : if (key->sk_strategy != BTEqualStrategyNumber)
741 : {
742 : /*
743 : * Scalar inequality key.
744 : *
745 : * It's definitely safe for _bt_checkkeys to avoid assessing this
746 : * inequality when the page's first and last non-pivot tuples both
747 : * satisfy the inequality (since the same must also be true of all
748 : * the tuples in between these two).
749 : *
750 : * Unlike the "=" case, it doesn't matter if this attribute has
751 : * more than one distinct value (though it _is_ necessary for any
752 : * and all _prior_ attributes to contain no more than one distinct
753 : * value amongst all of the tuples from pstate.page).
754 : */
755 4730 : if (key->sk_attno > firstchangingattnum) /* >, not >= */
756 360 : break; /* unsafe, preceding attr has multiple
757 : * distinct values */
758 :
759 4370 : firstdatum = index_getattr(firsttup, key->sk_attno, tupdesc, &firstnull);
760 4370 : lastdatum = index_getattr(lasttup, key->sk_attno, tupdesc, &lastnull);
761 :
762 4370 : if (key->sk_flags & SK_ISNULL)
763 : {
764 : /* IS NOT NULL key */
765 : Assert(key->sk_flags & SK_SEARCHNOTNULL);
766 :
767 142 : if (firstnull || lastnull)
768 : break; /* unsafe */
769 :
770 : /* Safe, IS NOT NULL key satisfied by every tuple */
771 142 : continue;
772 : }
773 :
774 : /* Test firsttup */
775 4228 : if (firstnull ||
776 4228 : !DatumGetBool(FunctionCall2Coll(&key->sk_func,
777 : key->sk_collation, firstdatum,
778 : key->sk_argument)))
779 : break; /* unsafe */
780 :
781 : /* Test lasttup */
782 4228 : if (lastnull ||
783 4228 : !DatumGetBool(FunctionCall2Coll(&key->sk_func,
784 : key->sk_collation, lastdatum,
785 : key->sk_argument)))
786 : break; /* unsafe */
787 :
788 : /* Safe, scalar inequality satisfied by every tuple */
789 4122 : continue;
790 : }
791 :
792 : /* Some = key (could be a scalar = key, could be an array = key) */
793 : Assert(key->sk_strategy == BTEqualStrategyNumber);
794 :
795 16580 : if (!(key->sk_flags & SK_SEARCHARRAY))
796 : {
797 : /*
798 : * Scalar = key (possibly an IS NULL key).
799 : *
800 : * It is unsafe to set pstate.startikey to an ikey beyond this
801 : * key, unless the = key is satisfied by every possible tuple on
802 : * the page (possible only when attribute has just one distinct
803 : * value among all tuples on the page).
804 : */
805 13088 : if (key->sk_attno >= firstchangingattnum)
806 10706 : break; /* unsafe, multiple distinct attr values */
807 :
808 2382 : firstdatum = index_getattr(firsttup, key->sk_attno, tupdesc,
809 : &firstnull);
810 2382 : if (key->sk_flags & SK_ISNULL)
811 : {
812 : /* IS NULL key */
813 : Assert(key->sk_flags & SK_SEARCHNULL);
814 :
815 0 : if (!firstnull)
816 0 : break; /* unsafe */
817 :
818 : /* Safe, IS NULL key satisfied by every tuple */
819 0 : continue;
820 : }
821 2382 : if (firstnull ||
822 2382 : !DatumGetBool(FunctionCall2Coll(&key->sk_func,
823 : key->sk_collation, firstdatum,
824 : key->sk_argument)))
825 : break; /* unsafe */
826 :
827 : /* Safe, scalar = key satisfied by every tuple */
828 2382 : continue;
829 : }
830 :
831 : /* = array key (could be a SAOP array, could be a skip array) */
832 3492 : array = &so->arrayKeys[arrayidx++];
833 : Assert(array->scan_key == startikey);
834 3492 : if (array->num_elems != -1)
835 : {
836 : /*
837 : * SAOP array = key.
838 : *
839 : * Handle this like we handle scalar = keys (though binary search
840 : * for a matching element, to avoid relying on key's sk_argument).
841 : */
842 840 : if (key->sk_attno >= firstchangingattnum)
843 840 : break; /* unsafe, multiple distinct attr values */
844 :
845 0 : firstdatum = index_getattr(firsttup, key->sk_attno, tupdesc,
846 : &firstnull);
847 0 : _bt_binsrch_array_skey(&so->orderProcs[startikey],
848 : false, NoMovementScanDirection,
849 : firstdatum, firstnull, array, key,
850 : &result);
851 0 : if (result != 0)
852 0 : break; /* unsafe */
853 :
854 : /* Safe, SAOP = key satisfied by every tuple */
855 0 : start_past_saop_eq = true;
856 0 : continue;
857 : }
858 :
859 : /*
860 : * Skip array = key
861 : */
862 : Assert(key->sk_flags & SK_BT_SKIP);
863 2652 : if (array->null_elem)
864 : {
865 : /*
866 : * Non-range skip array = key.
867 : *
868 : * Safe, non-range skip array "satisfied" by every tuple on page
869 : * (safe even when "key->sk_attno > firstchangingattnum").
870 : */
871 2346 : continue;
872 : }
873 :
874 : /*
875 : * Range skip array = key.
876 : *
877 : * Handle this like we handle scalar inequality keys (but avoid using
878 : * key's sk_argument directly, as in the SAOP array case).
879 : */
880 306 : if (key->sk_attno > firstchangingattnum) /* >, not >= */
881 48 : break; /* unsafe, preceding attr has multiple
882 : * distinct values */
883 :
884 258 : firstdatum = index_getattr(firsttup, key->sk_attno, tupdesc, &firstnull);
885 258 : lastdatum = index_getattr(lasttup, key->sk_attno, tupdesc, &lastnull);
886 :
887 : /* Test firsttup */
888 258 : _bt_binsrch_skiparray_skey(false, ForwardScanDirection,
889 : firstdatum, firstnull, array, key,
890 : &result);
891 258 : if (result != 0)
892 0 : break; /* unsafe */
893 :
894 : /* Test lasttup */
895 258 : _bt_binsrch_skiparray_skey(false, ForwardScanDirection,
896 : lastdatum, lastnull, array, key,
897 : &result);
898 258 : if (result != 0)
899 12 : break; /* unsafe */
900 :
901 : /* Safe, range skip array satisfied by every tuple on page */
902 : }
903 :
904 : /*
905 : * Use of forcenonrequired is typically undesirable, since it'll force
906 : * _bt_readpage caller to read every tuple on the page -- even though, in
907 : * general, it might well be possible to end the scan on an earlier tuple.
908 : * However, caller must use forcenonrequired when start_past_saop_eq=true,
909 : * since the usual required array behavior might fail to roll over to the
910 : * SAOP array.
911 : *
912 : * We always prefer forcenonrequired=true during scans with skip arrays
913 : * (except on the first page of each primitive index scan), though -- even
914 : * when "startikey == 0". That way, _bt_advance_array_keys's low-order
915 : * key precheck optimization can always be used (unless on the first page
916 : * of the scan). It seems slightly preferable to check more tuples when
917 : * that allows us to do significantly less skip array maintenance.
918 : */
919 18496 : pstate->forcenonrequired = (start_past_saop_eq || so->skipScan);
920 18496 : pstate->startikey = startikey;
921 :
922 : /*
923 : * _bt_readpage caller is required to call _bt_checkkeys against page's
924 : * finaltup with forcenonrequired=false whenever we initially set
925 : * forcenonrequired=true. That way the scan's arrays will reliably track
926 : * its progress through the index's key space.
927 : *
928 : * We don't expect this when _bt_readpage caller has no finaltup due to
929 : * its page being the rightmost (or the leftmost, during backwards scans).
930 : * When we see that _bt_readpage has no finaltup, back out of everything.
931 : */
932 : Assert(!pstate->forcenonrequired || so->numArrayKeys);
933 18496 : if (pstate->forcenonrequired && !pstate->finaltup)
934 : {
935 456 : pstate->forcenonrequired = false;
936 456 : pstate->startikey = 0;
937 : }
938 : }
939 :
940 : /*
941 : * Test whether caller's finaltup tuple is still before the start of matches
942 : * for the current array keys.
943 : *
944 : * Called at the start of reading a page during a scan with array keys, though
945 : * only when the so->scanBehind flag was set on the scan's prior page.
946 : *
947 : * Returns false if the tuple is still before the start of matches. When that
948 : * happens, caller should cut its losses and start a new primitive index scan.
949 : * Otherwise returns true.
950 : */
951 : static bool
952 2582 : _bt_scanbehind_checkkeys(IndexScanDesc scan, ScanDirection dir,
953 : IndexTuple finaltup)
954 : {
955 2582 : Relation rel = scan->indexRelation;
956 2582 : TupleDesc tupdesc = RelationGetDescr(rel);
957 2582 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
958 2582 : int nfinaltupatts = BTreeTupleGetNAtts(finaltup, rel);
959 : bool scanBehind;
960 :
961 : Assert(so->numArrayKeys);
962 :
963 2582 : if (_bt_tuple_before_array_skeys(scan, dir, finaltup, tupdesc,
964 : nfinaltupatts, false, 0, &scanBehind))
965 412 : return false;
966 :
967 : /*
968 : * If scanBehind was set, all of the untruncated attribute values from
969 : * finaltup that correspond to an array match the array's current element,
970 : * but there are other keys associated with truncated suffix attributes.
971 : * Array advancement must have incremented the scan's arrays on the
972 : * previous page, resulting in a set of array keys that happen to be an
973 : * exact match for the current page high key's untruncated prefix values.
974 : *
975 : * This page definitely doesn't contain tuples that the scan will need to
976 : * return. The next page may or may not contain relevant tuples. Handle
977 : * this by cutting our losses and starting a new primscan.
978 : */
979 2170 : if (scanBehind)
980 0 : return false;
981 :
982 2170 : if (!so->oppositeDirCheck)
983 2044 : return true;
984 :
985 126 : return _bt_oppodir_checkkeys(scan, dir, finaltup);
986 : }
987 :
988 : /*
989 : * Test whether an indextuple fails to satisfy an inequality required in the
990 : * opposite direction only.
991 : *
992 : * Caller's finaltup tuple is the page high key (for forwards scans), or the
993 : * first non-pivot tuple (for backwards scans). Called during scans with
994 : * required array keys and required opposite-direction inequalities.
995 : *
996 : * Returns false if an inequality scan key required in the opposite direction
997 : * only isn't satisfied (and any earlier required scan keys are satisfied).
998 : * Otherwise returns true.
999 : *
1000 : * An unsatisfied inequality required in the opposite direction only might
1001 : * well enable skipping over many leaf pages, provided another _bt_first call
1002 : * takes place. This type of unsatisfied inequality won't usually cause
1003 : * _bt_checkkeys to stop the scan to consider array advancement/starting a new
1004 : * primitive index scan.
1005 : */
1006 : static bool
1007 4418 : _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
1008 : IndexTuple finaltup)
1009 : {
1010 4418 : Relation rel = scan->indexRelation;
1011 4418 : TupleDesc tupdesc = RelationGetDescr(rel);
1012 4418 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
1013 4418 : int nfinaltupatts = BTreeTupleGetNAtts(finaltup, rel);
1014 : bool continuescan;
1015 4418 : ScanDirection flipped = -dir;
1016 4418 : int ikey = 0;
1017 :
1018 : Assert(so->numArrayKeys);
1019 :
1020 4418 : _bt_check_compare(scan, flipped, finaltup, nfinaltupatts, tupdesc, false,
1021 : false, &continuescan,
1022 : &ikey);
1023 :
1024 4418 : if (!continuescan && so->keyData[ikey].sk_strategy != BTEqualStrategyNumber)
1025 2 : return false;
1026 :
1027 4416 : return true;
1028 : }
1029 :
1030 : /* Save an index item into so->currPos.items[itemIndex] */
1031 : static void
1032 48335542 : _bt_saveitem(BTScanOpaque so, int itemIndex,
1033 : OffsetNumber offnum, IndexTuple itup)
1034 : {
1035 48335542 : BTScanPosItem *currItem = &so->currPos.items[itemIndex];
1036 :
1037 : Assert(!BTreeTupleIsPivot(itup) && !BTreeTupleIsPosting(itup));
1038 :
1039 48335542 : currItem->heapTid = itup->t_tid;
1040 48335542 : currItem->indexOffset = offnum;
1041 48335542 : if (so->currTuples)
1042 : {
1043 22340972 : Size itupsz = IndexTupleSize(itup);
1044 :
1045 22340972 : currItem->tupleOffset = so->currPos.nextTupleOffset;
1046 22340972 : memcpy(so->currTuples + so->currPos.nextTupleOffset, itup, itupsz);
1047 22340972 : so->currPos.nextTupleOffset += MAXALIGN(itupsz);
1048 : }
1049 48335542 : }
1050 :
1051 : /*
1052 : * Setup state to save TIDs/items from a single posting list tuple.
1053 : *
1054 : * Saves an index item into so->currPos.items[itemIndex] for TID that is
1055 : * returned to scan first. Second or subsequent TIDs for posting list should
1056 : * be saved by calling _bt_savepostingitem().
1057 : *
1058 : * Returns an offset into tuple storage space that main tuple is stored at if
1059 : * needed.
1060 : */
1061 : static int
1062 578364 : _bt_setuppostingitems(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
1063 : const ItemPointerData *heapTid, IndexTuple itup)
1064 : {
1065 578364 : BTScanPosItem *currItem = &so->currPos.items[itemIndex];
1066 :
1067 : Assert(BTreeTupleIsPosting(itup));
1068 :
1069 578364 : currItem->heapTid = *heapTid;
1070 578364 : currItem->indexOffset = offnum;
1071 578364 : if (so->currTuples)
1072 : {
1073 : /* Save base IndexTuple (truncate posting list) */
1074 : IndexTuple base;
1075 166266 : Size itupsz = BTreeTupleGetPostingOffset(itup);
1076 :
1077 166266 : itupsz = MAXALIGN(itupsz);
1078 166266 : currItem->tupleOffset = so->currPos.nextTupleOffset;
1079 166266 : base = (IndexTuple) (so->currTuples + so->currPos.nextTupleOffset);
1080 166266 : memcpy(base, itup, itupsz);
1081 : /* Defensively reduce work area index tuple header size */
1082 166266 : base->t_info &= ~INDEX_SIZE_MASK;
1083 166266 : base->t_info |= itupsz;
1084 166266 : so->currPos.nextTupleOffset += itupsz;
1085 :
1086 166266 : return currItem->tupleOffset;
1087 : }
1088 :
1089 412098 : return 0;
1090 : }
1091 :
1092 : /*
1093 : * Save an index item into so->currPos.items[itemIndex] for current posting
1094 : * tuple.
1095 : *
1096 : * Assumes that _bt_setuppostingitems() has already been called for current
1097 : * posting list tuple. Caller passes its return value as tupleOffset.
1098 : */
1099 : static inline void
1100 2584458 : _bt_savepostingitem(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
1101 : ItemPointer heapTid, int tupleOffset)
1102 : {
1103 2584458 : BTScanPosItem *currItem = &so->currPos.items[itemIndex];
1104 :
1105 2584458 : currItem->heapTid = *heapTid;
1106 2584458 : currItem->indexOffset = offnum;
1107 :
1108 : /*
1109 : * Have index-only scans return the same base IndexTuple for every TID
1110 : * that originates from the same posting list
1111 : */
1112 2584458 : if (so->currTuples)
1113 952074 : currItem->tupleOffset = tupleOffset;
1114 2584458 : }
1115 :
1116 : #define LOOK_AHEAD_REQUIRED_RECHECKS 3
1117 : #define LOOK_AHEAD_DEFAULT_DISTANCE 5
1118 : #define NSKIPADVANCES_THRESHOLD 3
1119 :
1120 : /*
1121 : * Test whether an indextuple satisfies all the scankey conditions.
1122 : *
1123 : * Return true if so, false if not. If the tuple fails to pass the qual,
1124 : * we also determine whether there's any need to continue the scan beyond
1125 : * this tuple, and set pstate.continuescan accordingly. See comments for
1126 : * _bt_preprocess_keys() about how this is done.
1127 : *
1128 : * Forward scan callers can pass a high key tuple in the hopes of having
1129 : * us set *continuescan to false, and avoiding an unnecessary visit to
1130 : * the page to the right.
1131 : *
1132 : * Advances the scan's array keys when necessary for arrayKeys=true callers.
1133 : * Scans without any array keys must always pass arrayKeys=false.
1134 : *
1135 : * Also stops and starts primitive index scans for arrayKeys=true callers.
1136 : * Scans with array keys are required to set up page state that helps us with
1137 : * this. The page's finaltup tuple (the page high key for a forward scan, or
1138 : * the page's first non-pivot tuple for a backward scan) must be set in
1139 : * pstate.finaltup ahead of the first call here for the page. Set this to
1140 : * NULL for rightmost page (or the leftmost page for backwards scans).
1141 : *
1142 : * scan: index scan descriptor (containing a search-type scankey)
1143 : * pstate: page level input and output parameters
1144 : * arrayKeys: should we advance the scan's array keys if necessary?
1145 : * tuple: index tuple to test
1146 : * tupnatts: number of attributes in tupnatts (high key may be truncated)
1147 : */
1148 : static bool
1149 62368124 : _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys,
1150 : IndexTuple tuple, int tupnatts)
1151 : {
1152 62368124 : TupleDesc tupdesc = RelationGetDescr(scan->indexRelation);
1153 62368124 : BTScanOpaque so PG_USED_FOR_ASSERTS_ONLY = (BTScanOpaque) scan->opaque;
1154 62368124 : ScanDirection dir = pstate->dir;
1155 62368124 : int ikey = pstate->startikey;
1156 : bool res;
1157 :
1158 : Assert(BTreeTupleGetNAtts(tuple, scan->indexRelation) == tupnatts);
1159 : Assert(!so->needPrimScan && !so->scanBehind && !so->oppositeDirCheck);
1160 : Assert(arrayKeys || so->numArrayKeys == 0);
1161 :
1162 62368124 : res = _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, arrayKeys,
1163 62368124 : pstate->forcenonrequired, &pstate->continuescan,
1164 : &ikey);
1165 :
1166 : /*
1167 : * If _bt_check_compare relied on the pstate.startikey optimization, call
1168 : * again (in assert-enabled builds) to verify it didn't affect our answer.
1169 : *
1170 : * Note: we can't do this when !pstate.forcenonrequired, since any arrays
1171 : * before pstate.startikey won't have advanced on this page at all.
1172 : */
1173 : Assert(!pstate->forcenonrequired || arrayKeys);
1174 : #ifdef USE_ASSERT_CHECKING
1175 : if (pstate->startikey > 0 && !pstate->forcenonrequired)
1176 : {
1177 : bool dres,
1178 : dcontinuescan;
1179 : int dikey = 0;
1180 :
1181 : /* Pass arrayKeys=false to avoid array side-effects */
1182 : dres = _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, false,
1183 : pstate->forcenonrequired, &dcontinuescan,
1184 : &dikey);
1185 : Assert(res == dres);
1186 : Assert(pstate->continuescan == dcontinuescan);
1187 :
1188 : /*
1189 : * Should also get the same ikey result. We need a slightly weaker
1190 : * assertion during arrayKeys calls, since they might be using an
1191 : * array that couldn't be marked required during preprocessing.
1192 : */
1193 : Assert(arrayKeys || ikey == dikey);
1194 : Assert(ikey <= dikey);
1195 : }
1196 : #endif
1197 :
1198 : /*
1199 : * Only one _bt_check_compare call is required in the common case where
1200 : * there are no equality strategy array scan keys. Otherwise we can only
1201 : * accept _bt_check_compare's answer unreservedly when it didn't set
1202 : * pstate.continuescan=false.
1203 : */
1204 62368124 : if (!arrayKeys || pstate->continuescan)
1205 62135048 : return res;
1206 :
1207 : /*
1208 : * _bt_check_compare call set continuescan=false in the presence of
1209 : * equality type array keys. This could mean that the tuple is just past
1210 : * the end of matches for the current array keys.
1211 : *
1212 : * It's also possible that the scan is still _before_ the _start_ of
1213 : * tuples matching the current set of array keys. Check for that first.
1214 : */
1215 : Assert(!pstate->forcenonrequired);
1216 233076 : if (_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc, tupnatts, true,
1217 : ikey, NULL))
1218 : {
1219 : /* Override _bt_check_compare, continue primitive scan */
1220 41658 : pstate->continuescan = true;
1221 :
1222 : /*
1223 : * We will end up here repeatedly given a group of tuples > the
1224 : * previous array keys and < the now-current keys (for a backwards
1225 : * scan it's just the same, though the operators swap positions).
1226 : *
1227 : * We must avoid allowing this linear search process to scan very many
1228 : * tuples from well before the start of tuples matching the current
1229 : * array keys (or from well before the point where we'll once again
1230 : * have to advance the scan's array keys).
1231 : *
1232 : * We keep the overhead under control by speculatively "looking ahead"
1233 : * to later still-unscanned items from this same leaf page. We'll
1234 : * only attempt this once the number of tuples that the linear search
1235 : * process has examined starts to get out of hand.
1236 : */
1237 41658 : pstate->rechecks++;
1238 41658 : if (pstate->rechecks >= LOOK_AHEAD_REQUIRED_RECHECKS)
1239 : {
1240 : /* See if we should skip ahead within the current leaf page */
1241 12168 : _bt_checkkeys_look_ahead(scan, pstate, tupnatts, tupdesc);
1242 :
1243 : /*
1244 : * Might have set pstate.skip to a later page offset. When that
1245 : * happens then _bt_readpage caller will inexpensively skip ahead
1246 : * to a later tuple from the same page (the one just after the
1247 : * tuple we successfully "looked ahead" to).
1248 : */
1249 : }
1250 :
1251 : /* This indextuple doesn't match the current qual, in any case */
1252 41658 : return false;
1253 : }
1254 :
1255 : /*
1256 : * Caller's tuple is >= the current set of array keys and other equality
1257 : * constraint scan keys (or <= if this is a backwards scan). It's now
1258 : * clear that we _must_ advance any required array keys in lockstep with
1259 : * the scan.
1260 : */
1261 191418 : return _bt_advance_array_keys(scan, pstate, tuple, tupnatts, tupdesc,
1262 : ikey, true);
1263 : }
1264 :
1265 : /*
1266 : * Test whether an indextuple satisfies current scan condition.
1267 : *
1268 : * Return true if so, false if not. If not, also sets *continuescan to false
1269 : * when it's also not possible for any later tuples to pass the current qual
1270 : * (with the scan's current set of array keys, in the current scan direction),
1271 : * in addition to setting *ikey to the so->keyData[] subscript/offset for the
1272 : * unsatisfied scan key (needed when caller must consider advancing the scan's
1273 : * array keys).
1274 : *
1275 : * This is a subroutine for _bt_checkkeys. We provisionally assume that
1276 : * reaching the end of the current set of required keys (in particular the
1277 : * current required array keys) ends the ongoing (primitive) index scan.
1278 : * Callers without array keys should just end the scan right away when they
1279 : * find that continuescan has been set to false here by us. Things are more
1280 : * complicated for callers with array keys.
1281 : *
1282 : * Callers with array keys must first consider advancing the arrays when
1283 : * continuescan has been set to false here by us. They must then consider if
1284 : * it really does make sense to end the current (primitive) index scan, in
1285 : * light of everything that is known at that point. (In general when we set
1286 : * continuescan=false for these callers it must be treated as provisional.)
1287 : *
1288 : * We deal with advancing unsatisfied non-required arrays directly, though.
1289 : * This is safe, since by definition non-required keys can't end the scan.
1290 : * This is just how we determine if non-required arrays are just unsatisfied
1291 : * by the current array key, or if they're truly unsatisfied (that is, if
1292 : * they're unsatisfied by every possible array key).
1293 : *
1294 : * Pass advancenonrequired=false to avoid all array related side effects.
1295 : * This allows _bt_advance_array_keys caller to avoid infinite recursion.
1296 : *
1297 : * Pass forcenonrequired=true to instruct us to treat all keys as nonrequired.
1298 : * This is used to make it safe to temporarily stop properly maintaining the
1299 : * scan's required arrays. _bt_checkkeys caller (_bt_readpage, actually)
1300 : * determines a prefix of keys that must satisfy every possible corresponding
1301 : * index attribute value from its page, which is passed to us via *ikey arg
1302 : * (this is the first key that might be unsatisfied by tuples on the page).
1303 : * Obviously, we won't maintain any array keys from before *ikey, so it's
1304 : * quite possible for such arrays to "fall behind" the index's keyspace.
1305 : * Caller will need to "catch up" by passing forcenonrequired=true (alongside
1306 : * an *ikey=0) once the page's finaltup is reached.
1307 : *
1308 : * Note: it's safe to pass an *ikey > 0 with forcenonrequired=false, but only
1309 : * when caller determines that it won't affect array maintenance.
1310 : */
1311 : static bool
1312 62428052 : _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
1313 : IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
1314 : bool advancenonrequired, bool forcenonrequired,
1315 : bool *continuescan, int *ikey)
1316 : {
1317 62428052 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
1318 :
1319 62428052 : *continuescan = true; /* default assumption */
1320 :
1321 120255126 : for (; *ikey < so->numberOfKeys; (*ikey)++)
1322 : {
1323 71284642 : ScanKey key = so->keyData + *ikey;
1324 : Datum datum;
1325 : bool isNull;
1326 71284642 : bool requiredSameDir = false,
1327 71284642 : requiredOppositeDirOnly = false;
1328 :
1329 : /*
1330 : * Check if the key is required in the current scan direction, in the
1331 : * opposite scan direction _only_, or in neither direction (except
1332 : * when we're forced to treat all scan keys as nonrequired)
1333 : */
1334 71284642 : if (forcenonrequired)
1335 : {
1336 : /* treating scan's keys as non-required */
1337 : }
1338 70861040 : else if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
1339 15003254 : ((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
1340 55882530 : requiredSameDir = true;
1341 14978510 : else if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsBackward(dir)) ||
1342 6245388 : ((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsForward(dir)))
1343 14978510 : requiredOppositeDirOnly = true;
1344 :
1345 71284642 : if (key->sk_attno > tupnatts)
1346 : {
1347 : /*
1348 : * This attribute is truncated (must be high key). The value for
1349 : * this attribute in the first non-pivot tuple on the page to the
1350 : * right could be any possible value. Assume that truncated
1351 : * attribute passes the qual.
1352 : */
1353 : Assert(BTreeTupleIsPivot(tuple));
1354 19048320 : continue;
1355 : }
1356 :
1357 : /*
1358 : * A skip array scan key uses one of several sentinel values. We just
1359 : * fall back on _bt_tuple_before_array_skeys when we see such a value.
1360 : */
1361 71282350 : if (key->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL |
1362 : SK_BT_NEXT | SK_BT_PRIOR))
1363 : {
1364 : Assert(key->sk_flags & SK_SEARCHARRAY);
1365 : Assert(key->sk_flags & SK_BT_SKIP);
1366 : Assert(requiredSameDir || forcenonrequired);
1367 :
1368 : /*
1369 : * Cannot fall back on _bt_tuple_before_array_skeys when we're
1370 : * treating the scan's keys as nonrequired, though. Just handle
1371 : * this like any other non-required equality-type array key.
1372 : */
1373 35850 : if (forcenonrequired)
1374 13457568 : return _bt_advance_array_keys(scan, NULL, tuple, tupnatts,
1375 : tupdesc, *ikey, false);
1376 :
1377 33822 : *continuescan = false;
1378 33822 : return false;
1379 : }
1380 :
1381 : /* row-comparison keys need special processing */
1382 71246500 : if (key->sk_flags & SK_ROW_HEADER)
1383 : {
1384 2454 : if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir,
1385 : forcenonrequired, continuescan))
1386 2388 : continue;
1387 66 : return false;
1388 : }
1389 :
1390 71244046 : datum = index_getattr(tuple,
1391 71244046 : key->sk_attno,
1392 : tupdesc,
1393 : &isNull);
1394 :
1395 71244046 : if (key->sk_flags & SK_ISNULL)
1396 : {
1397 : /* Handle IS NULL/NOT NULL tests */
1398 19061412 : if (key->sk_flags & SK_SEARCHNULL)
1399 : {
1400 18128 : if (isNull)
1401 428 : continue; /* tuple satisfies this qual */
1402 : }
1403 : else
1404 : {
1405 : Assert(key->sk_flags & SK_SEARCHNOTNULL);
1406 : Assert(!(key->sk_flags & SK_BT_SKIP));
1407 19043284 : if (!isNull)
1408 19043212 : continue; /* tuple satisfies this qual */
1409 : }
1410 :
1411 : /*
1412 : * Tuple fails this qual. If it's a required qual for the current
1413 : * scan direction, then we can conclude no further tuples will
1414 : * pass, either.
1415 : */
1416 17772 : if (requiredSameDir)
1417 204 : *continuescan = false;
1418 17568 : else if (unlikely(key->sk_flags & SK_BT_SKIP))
1419 : {
1420 : /*
1421 : * If we're treating scan keys as nonrequired, and encounter a
1422 : * skip array scan key whose current element is NULL, then it
1423 : * must be a non-range skip array. It must be satisfied, so
1424 : * there's no need to call _bt_advance_array_keys to check.
1425 : */
1426 : Assert(forcenonrequired && *ikey > 0);
1427 0 : continue;
1428 : }
1429 :
1430 : /*
1431 : * This indextuple doesn't match the qual.
1432 : */
1433 17772 : return false;
1434 : }
1435 :
1436 52182634 : if (isNull)
1437 : {
1438 : /*
1439 : * Scalar scan key isn't satisfied by NULL tuple value.
1440 : *
1441 : * If we're treating scan keys as nonrequired, and key is for a
1442 : * skip array, then we must attempt to advance the array to NULL
1443 : * (if we're successful then the tuple might match the qual).
1444 : */
1445 228 : if (unlikely(forcenonrequired && key->sk_flags & SK_BT_SKIP))
1446 0 : return _bt_advance_array_keys(scan, NULL, tuple, tupnatts,
1447 : tupdesc, *ikey, false);
1448 :
1449 228 : if (key->sk_flags & SK_BT_NULLS_FIRST)
1450 : {
1451 : /*
1452 : * Since NULLs are sorted before non-NULLs, we know we have
1453 : * reached the lower limit of the range of values for this
1454 : * index attr. On a backward scan, we can stop if this qual
1455 : * is one of the "must match" subset. We can stop regardless
1456 : * of whether the qual is > or <, so long as it's required,
1457 : * because it's not possible for any future tuples to pass. On
1458 : * a forward scan, however, we must keep going, because we may
1459 : * have initially positioned to the start of the index.
1460 : * (_bt_advance_array_keys also relies on this behavior during
1461 : * forward scans.)
1462 : */
1463 0 : if ((requiredSameDir || requiredOppositeDirOnly) &&
1464 : ScanDirectionIsBackward(dir))
1465 0 : *continuescan = false;
1466 : }
1467 : else
1468 : {
1469 : /*
1470 : * Since NULLs are sorted after non-NULLs, we know we have
1471 : * reached the upper limit of the range of values for this
1472 : * index attr. On a forward scan, we can stop if this qual is
1473 : * one of the "must match" subset. We can stop regardless of
1474 : * whether the qual is > or <, so long as it's required,
1475 : * because it's not possible for any future tuples to pass. On
1476 : * a backward scan, however, we must keep going, because we
1477 : * may have initially positioned to the end of the index.
1478 : * (_bt_advance_array_keys also relies on this behavior during
1479 : * backward scans.)
1480 : */
1481 228 : if ((requiredSameDir || requiredOppositeDirOnly) &&
1482 : ScanDirectionIsForward(dir))
1483 222 : *continuescan = false;
1484 : }
1485 :
1486 : /*
1487 : * This indextuple doesn't match the qual.
1488 : */
1489 228 : return false;
1490 : }
1491 :
1492 52182406 : if (!DatumGetBool(FunctionCall2Coll(&key->sk_func, key->sk_collation,
1493 : datum, key->sk_argument)))
1494 : {
1495 : /*
1496 : * Tuple fails this qual. If it's a required qual for the current
1497 : * scan direction, then we can conclude no further tuples will
1498 : * pass, either.
1499 : */
1500 13403652 : if (requiredSameDir)
1501 13029806 : *continuescan = false;
1502 :
1503 : /*
1504 : * If this is a non-required equality-type array key, the tuple
1505 : * needs to be checked against every possible array key. Handle
1506 : * this by "advancing" the scan key's array to a matching value
1507 : * (if we're successful then the tuple might match the qual).
1508 : */
1509 373846 : else if (advancenonrequired &&
1510 366412 : key->sk_strategy == BTEqualStrategyNumber &&
1511 290600 : (key->sk_flags & SK_SEARCHARRAY))
1512 6498 : return _bt_advance_array_keys(scan, NULL, tuple, tupnatts,
1513 : tupdesc, *ikey, false);
1514 :
1515 : /*
1516 : * This indextuple doesn't match the qual.
1517 : */
1518 13397154 : return false;
1519 : }
1520 : }
1521 :
1522 : /* If we get here, the tuple passes all index quals. */
1523 48970484 : return true;
1524 : }
1525 :
1526 : /*
1527 : * Test whether an indextuple satisfies a row-comparison scan condition.
1528 : *
1529 : * Return true if so, false if not. If not, also clear *continuescan if
1530 : * it's not possible for any future tuples in the current scan direction
1531 : * to pass the qual.
1532 : *
1533 : * This is a subroutine for _bt_checkkeys/_bt_check_compare. Caller passes us
1534 : * a row compare header key taken from so->keyData[].
1535 : *
1536 : * Row value comparisons can be described in terms of logical expansions that
1537 : * use only scalar operators. Consider the following example row comparison:
1538 : *
1539 : * "(a, b, c) > (7, 'bar', 62)"
1540 : *
1541 : * This can be evaluated as:
1542 : *
1543 : * "(a = 7 AND b = 'bar' AND c > 62) OR (a = 7 AND b > 'bar') OR (a > 7)".
1544 : *
1545 : * Notice that this condition is satisfied by _all_ rows that satisfy "a > 7",
1546 : * and by a subset of all rows that satisfy "a >= 7" (possibly all such rows).
1547 : * It _can't_ be satisfied by other rows (where "a < 7" or where "a IS NULL").
1548 : * A row comparison header key can therefore often be treated as if it was a
1549 : * simple scalar inequality on the row compare's most significant column.
1550 : * (For example, _bt_advance_array_keys and most preprocessing routines treat
1551 : * row compares like any other same-strategy inequality on the same column.)
1552 : *
1553 : * Things get more complicated for our row compare given a row where "a = 7".
1554 : * Note that a row compare isn't necessarily satisfied by _every_ tuple that
1555 : * appears between the first and last satisfied tuple returned by the scan,
1556 : * due to the way that its lower-order subkeys are only conditionally applied.
1557 : * A forwards scan that uses our example qual might initially return a tuple
1558 : * "(a, b, c) = (7, 'zebra', 54)". But it won't subsequently return a tuple
1559 : * "(a, b, c) = (7, NULL, 1)" located to the right of the first matching tuple
1560 : * (assume that "b" was declared NULLS LAST here). The scan will only return
1561 : * additional matches upon reaching tuples where "a > 7". If you rereview our
1562 : * example row comparison's logical expansion, you'll understand why this is.
1563 : * (Here we assume that all subkeys could be marked required, guaranteeing
1564 : * that row comparison order matches index order. This is the common case.)
1565 : *
1566 : * Note that a row comparison header key behaves _exactly_ the same as a
1567 : * similar scalar inequality key on the row's most significant column once the
1568 : * scan reaches the point where it no longer needs to evaluate lower-order
1569 : * subkeys (or before the point where it starts needing to evaluate them).
1570 : * For example, once a forwards scan that uses our example qual reaches the
1571 : * first tuple "a > 7", we'll behave in just the same way as our caller would
1572 : * behave with a similar scalar inequality "a > 7" for the remainder of the
1573 : * scan (assuming that the scan never changes direction/never goes backwards).
1574 : * We'll even set continuescan=false according to exactly the same rules as
1575 : * the ones our caller applies with simple scalar inequalities, including the
1576 : * rules it applies when NULL tuple values don't satisfy an inequality qual.
1577 : */
1578 : static bool
1579 2454 : _bt_check_rowcompare(ScanKey header, IndexTuple tuple, int tupnatts,
1580 : TupleDesc tupdesc, ScanDirection dir,
1581 : bool forcenonrequired, bool *continuescan)
1582 : {
1583 2454 : ScanKey subkey = (ScanKey) DatumGetPointer(header->sk_argument);
1584 2454 : int32 cmpresult = 0;
1585 : bool result;
1586 :
1587 : /* First subkey should be same as the header says */
1588 : Assert(header->sk_flags & SK_ROW_HEADER);
1589 : Assert(subkey->sk_attno == header->sk_attno);
1590 : Assert(subkey->sk_strategy == header->sk_strategy);
1591 :
1592 : /* Loop over columns of the row condition */
1593 : for (;;)
1594 240 : {
1595 : Datum datum;
1596 : bool isNull;
1597 :
1598 : Assert(subkey->sk_flags & SK_ROW_MEMBER);
1599 :
1600 : /* When a NULL row member is compared, the row never matches */
1601 2694 : if (subkey->sk_flags & SK_ISNULL)
1602 : {
1603 : /*
1604 : * Unlike the simple-scankey case, this isn't a disallowed case
1605 : * (except when it's the first row element that has the NULL arg).
1606 : * But it can never match. If all the earlier row comparison
1607 : * columns are required for the scan direction, we can stop the
1608 : * scan, because there can't be another tuple that will succeed.
1609 : */
1610 : Assert(subkey != (ScanKey) DatumGetPointer(header->sk_argument));
1611 12 : subkey--;
1612 12 : if (forcenonrequired)
1613 : {
1614 : /* treating scan's keys as non-required */
1615 : }
1616 12 : else if ((subkey->sk_flags & SK_BT_REQFWD) &&
1617 : ScanDirectionIsForward(dir))
1618 6 : *continuescan = false;
1619 6 : else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
1620 : ScanDirectionIsBackward(dir))
1621 6 : *continuescan = false;
1622 66 : return false;
1623 : }
1624 :
1625 2682 : if (subkey->sk_attno > tupnatts)
1626 : {
1627 : /*
1628 : * This attribute is truncated (must be high key). The value for
1629 : * this attribute in the first non-pivot tuple on the page to the
1630 : * right could be any possible value. Assume that truncated
1631 : * attribute passes the qual.
1632 : */
1633 : Assert(BTreeTupleIsPivot(tuple));
1634 6 : return true;
1635 : }
1636 :
1637 2676 : datum = index_getattr(tuple,
1638 2676 : subkey->sk_attno,
1639 : tupdesc,
1640 : &isNull);
1641 :
1642 2676 : if (isNull)
1643 : {
1644 : int reqflags;
1645 :
1646 48 : if (forcenonrequired)
1647 : {
1648 : /* treating scan's keys as non-required */
1649 : }
1650 48 : else if (subkey->sk_flags & SK_BT_NULLS_FIRST)
1651 : {
1652 : /*
1653 : * Since NULLs are sorted before non-NULLs, we know we have
1654 : * reached the lower limit of the range of values for this
1655 : * index attr. On a backward scan, we can stop if this qual
1656 : * is one of the "must match" subset. However, on a forwards
1657 : * scan, we must keep going, because we may have initially
1658 : * positioned to the start of the index.
1659 : *
1660 : * All required NULLS FIRST > row members can use NULL tuple
1661 : * values to end backwards scans, just like with other values.
1662 : * A qual "WHERE (a, b, c) > (9, 42, 'foo')" can terminate a
1663 : * backwards scan upon reaching the index's rightmost "a = 9"
1664 : * tuple whose "b" column contains a NULL (if not sooner).
1665 : * Since "b" is NULLS FIRST, we can treat its NULLs as "<" 42.
1666 : */
1667 0 : reqflags = SK_BT_REQBKWD;
1668 :
1669 : /*
1670 : * When a most significant required NULLS FIRST < row compare
1671 : * member sees NULL tuple values during a backwards scan, it
1672 : * signals the end of matches for the whole row compare/scan.
1673 : * A qual "WHERE (a, b, c) < (9, 42, 'foo')" will terminate a
1674 : * backwards scan upon reaching the rightmost tuple whose "a"
1675 : * column has a NULL. The "a" NULL value is "<" 9, and yet
1676 : * our < row compare will still end the scan. (This isn't
1677 : * safe with later/lower-order row members. Notice that it
1678 : * can only happen with an "a" NULL some time after the scan
1679 : * completely stops needing to use its "b" and "c" members.)
1680 : */
1681 0 : if (subkey == (ScanKey) DatumGetPointer(header->sk_argument))
1682 0 : reqflags |= SK_BT_REQFWD; /* safe, first row member */
1683 :
1684 0 : if ((subkey->sk_flags & reqflags) &&
1685 : ScanDirectionIsBackward(dir))
1686 0 : *continuescan = false;
1687 : }
1688 : else
1689 : {
1690 : /*
1691 : * Since NULLs are sorted after non-NULLs, we know we have
1692 : * reached the upper limit of the range of values for this
1693 : * index attr. On a forward scan, we can stop if this qual is
1694 : * one of the "must match" subset. However, on a backward
1695 : * scan, we must keep going, because we may have initially
1696 : * positioned to the end of the index.
1697 : *
1698 : * All required NULLS LAST < row members can use NULL tuple
1699 : * values to end forwards scans, just like with other values.
1700 : * A qual "WHERE (a, b, c) < (9, 42, 'foo')" can terminate a
1701 : * forwards scan upon reaching the index's leftmost "a = 9"
1702 : * tuple whose "b" column contains a NULL (if not sooner).
1703 : * Since "b" is NULLS LAST, we can treat its NULLs as ">" 42.
1704 : */
1705 48 : reqflags = SK_BT_REQFWD;
1706 :
1707 : /*
1708 : * When a most significant required NULLS LAST > row compare
1709 : * member sees NULL tuple values during a forwards scan, it
1710 : * signals the end of matches for the whole row compare/scan.
1711 : * A qual "WHERE (a, b, c) > (9, 42, 'foo')" will terminate a
1712 : * forwards scan upon reaching the leftmost tuple whose "a"
1713 : * column has a NULL. The "a" NULL value is ">" 9, and yet
1714 : * our > row compare will end the scan. (This isn't safe with
1715 : * later/lower-order row members. Notice that it can only
1716 : * happen with an "a" NULL some time after the scan completely
1717 : * stops needing to use its "b" and "c" members.)
1718 : */
1719 48 : if (subkey == (ScanKey) DatumGetPointer(header->sk_argument))
1720 0 : reqflags |= SK_BT_REQBKWD; /* safe, first row member */
1721 :
1722 48 : if ((subkey->sk_flags & reqflags) &&
1723 : ScanDirectionIsForward(dir))
1724 0 : *continuescan = false;
1725 : }
1726 :
1727 : /*
1728 : * In any case, this indextuple doesn't match the qual.
1729 : */
1730 48 : return false;
1731 : }
1732 :
1733 : /* Perform the test --- three-way comparison not bool operator */
1734 2628 : cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
1735 : subkey->sk_collation,
1736 : datum,
1737 : subkey->sk_argument));
1738 :
1739 2628 : if (subkey->sk_flags & SK_BT_DESC)
1740 0 : INVERT_COMPARE_RESULT(cmpresult);
1741 :
1742 : /* Done comparing if unequal, else advance to next column */
1743 2628 : if (cmpresult != 0)
1744 2388 : break;
1745 :
1746 240 : if (subkey->sk_flags & SK_ROW_END)
1747 0 : break;
1748 240 : subkey++;
1749 : }
1750 :
1751 : /* Final subkey/column determines if row compare is satisfied */
1752 2388 : result = _bt_rowcompare_cmpresult(subkey, cmpresult);
1753 :
1754 2388 : if (!result && !forcenonrequired)
1755 : {
1756 : /*
1757 : * Tuple fails this qual. If it's a required qual for the current
1758 : * scan direction, then we can conclude no further tuples will pass,
1759 : * either. Note we have to look at the deciding column, not
1760 : * necessarily the first or last column of the row condition.
1761 : */
1762 6 : if ((subkey->sk_flags & SK_BT_REQFWD) &&
1763 : ScanDirectionIsForward(dir))
1764 6 : *continuescan = false;
1765 0 : else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
1766 : ScanDirectionIsBackward(dir))
1767 0 : *continuescan = false;
1768 : }
1769 :
1770 2388 : return result;
1771 : }
1772 :
1773 : /*
1774 : * Call here when a row compare member returns a non-zero result, or with the
1775 : * result for the final ROW_END row compare member (no matter the cmpresult).
1776 : *
1777 : * cmpresult indicates the overall result of the row comparison (must already
1778 : * be commuted for DESC subkeys), and subkey is the deciding row member.
1779 : */
1780 : static bool
1781 2388 : _bt_rowcompare_cmpresult(ScanKey subkey, int cmpresult)
1782 : {
1783 : bool satisfied;
1784 :
1785 : Assert(subkey->sk_flags & SK_ROW_MEMBER);
1786 :
1787 2388 : switch (subkey->sk_strategy)
1788 : {
1789 186 : case BTLessStrategyNumber:
1790 186 : satisfied = (cmpresult < 0);
1791 186 : break;
1792 1584 : case BTLessEqualStrategyNumber:
1793 1584 : satisfied = (cmpresult <= 0);
1794 1584 : break;
1795 246 : case BTGreaterEqualStrategyNumber:
1796 246 : satisfied = (cmpresult >= 0);
1797 246 : break;
1798 372 : case BTGreaterStrategyNumber:
1799 372 : satisfied = (cmpresult > 0);
1800 372 : break;
1801 0 : default:
1802 : /* EQ and NE cases aren't allowed here */
1803 0 : elog(ERROR, "unexpected strategy number %d", subkey->sk_strategy);
1804 : satisfied = false; /* keep compiler quiet */
1805 : break;
1806 : }
1807 :
1808 2388 : return satisfied;
1809 : }
1810 :
1811 : /*
1812 : * _bt_tuple_before_array_skeys() -- too early to advance required arrays?
1813 : *
1814 : * We always compare the tuple using the current array keys (which we assume
1815 : * are already set in so->keyData[]). readpagetup indicates if tuple is the
1816 : * scan's current _bt_readpage-wise tuple.
1817 : *
1818 : * readpagetup callers must only call here when _bt_check_compare already set
1819 : * continuescan=false. We help these callers deal with _bt_check_compare's
1820 : * inability to distinguish between the < and > cases (it uses equality
1821 : * operator scan keys, whereas we use 3-way ORDER procs). These callers pass
1822 : * a _bt_check_compare-set sktrig value that indicates which scan key
1823 : * triggered the call (!readpagetup callers just pass us sktrig=0 instead).
1824 : * This information allows us to avoid wastefully checking earlier scan keys
1825 : * that were already deemed to have been satisfied inside _bt_check_compare.
1826 : *
1827 : * Returns false when caller's tuple is >= the current required equality scan
1828 : * keys (or <=, in the case of backwards scans). This happens to readpagetup
1829 : * callers when the scan has reached the point of needing its array keys
1830 : * advanced; caller will need to advance required and non-required arrays at
1831 : * scan key offsets >= sktrig, plus scan keys < sktrig iff sktrig rolls over.
1832 : * (When we return false to readpagetup callers, tuple can only be == current
1833 : * required equality scan keys when caller's sktrig indicates that the arrays
1834 : * need to be advanced due to an unsatisfied required inequality key trigger.)
1835 : *
1836 : * Returns true when caller passes a tuple that is < the current set of
1837 : * equality keys for the most significant non-equal required scan key/column
1838 : * (or > the keys, during backwards scans). This happens to readpagetup
1839 : * callers when tuple is still before the start of matches for the scan's
1840 : * required equality strategy scan keys. (sktrig can't have indicated that an
1841 : * inequality strategy scan key wasn't satisfied in _bt_check_compare when we
1842 : * return true. In fact, we automatically return false when passed such an
1843 : * inequality sktrig by readpagetup callers -- _bt_check_compare's initial
1844 : * continuescan=false doesn't really need to be confirmed here by us.)
1845 : *
1846 : * !readpagetup callers optionally pass us *scanBehind, which tracks whether
1847 : * any missing truncated attributes might have affected array advancement
1848 : * (compared to what would happen if it was shown the first non-pivot tuple on
1849 : * the page to the right of caller's finaltup/high key tuple instead). It's
1850 : * only possible that we'll set *scanBehind to true when caller passes us a
1851 : * pivot tuple (with truncated -inf attributes) that we return false for.
1852 : */
1853 : static bool
1854 329350 : _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir,
1855 : IndexTuple tuple, TupleDesc tupdesc, int tupnatts,
1856 : bool readpagetup, int sktrig, bool *scanBehind)
1857 : {
1858 329350 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
1859 :
1860 : Assert(so->numArrayKeys);
1861 : Assert(so->numberOfKeys);
1862 : Assert(sktrig == 0 || readpagetup);
1863 : Assert(!readpagetup || scanBehind == NULL);
1864 :
1865 329350 : if (scanBehind)
1866 84670 : *scanBehind = false;
1867 :
1868 332622 : for (int ikey = sktrig; ikey < so->numberOfKeys; ikey++)
1869 : {
1870 331870 : ScanKey cur = so->keyData + ikey;
1871 : Datum tupdatum;
1872 : bool tupnull;
1873 : int32 result;
1874 :
1875 : /* readpagetup calls require one ORDER proc comparison (at most) */
1876 : Assert(!readpagetup || ikey == sktrig);
1877 :
1878 : /*
1879 : * Once we reach a non-required scan key, we're completely done.
1880 : *
1881 : * Note: we deliberately don't consider the scan direction here.
1882 : * _bt_advance_array_keys caller requires that we track *scanBehind
1883 : * without concern for scan direction.
1884 : */
1885 331870 : if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) == 0)
1886 : {
1887 : Assert(!readpagetup);
1888 : Assert(ikey > sktrig || ikey == 0);
1889 328598 : return false;
1890 : }
1891 :
1892 331870 : if (cur->sk_attno > tupnatts)
1893 : {
1894 : Assert(!readpagetup);
1895 :
1896 : /*
1897 : * When we reach a high key's truncated attribute, assume that the
1898 : * tuple attribute's value is >= the scan's equality constraint
1899 : * scan keys (but set *scanBehind to let interested callers know
1900 : * that a truncated attribute might have affected our answer).
1901 : */
1902 28 : if (scanBehind)
1903 28 : *scanBehind = true;
1904 :
1905 28 : return false;
1906 : }
1907 :
1908 : /*
1909 : * Deal with inequality strategy scan keys that _bt_check_compare set
1910 : * continuescan=false for
1911 : */
1912 331842 : if (cur->sk_strategy != BTEqualStrategyNumber)
1913 : {
1914 : /*
1915 : * When _bt_check_compare indicated that a required inequality
1916 : * scan key wasn't satisfied, there's no need to verify anything;
1917 : * caller always calls _bt_advance_array_keys with this sktrig.
1918 : */
1919 618 : if (readpagetup)
1920 348 : return false;
1921 :
1922 : /*
1923 : * Otherwise we can't give up, since we must check all required
1924 : * scan keys (required in either direction) in order to correctly
1925 : * track *scanBehind for caller
1926 : */
1927 270 : continue;
1928 : }
1929 :
1930 331224 : tupdatum = index_getattr(tuple, cur->sk_attno, tupdesc, &tupnull);
1931 :
1932 331224 : if (likely(!(cur->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))))
1933 : {
1934 : /* Scankey has a valid/comparable sk_argument value */
1935 325830 : result = _bt_compare_array_skey(&so->orderProcs[ikey],
1936 : tupdatum, tupnull,
1937 : cur->sk_argument, cur);
1938 :
1939 325830 : if (result == 0)
1940 : {
1941 : /*
1942 : * Interpret result in a way that takes NEXT/PRIOR into
1943 : * account
1944 : */
1945 16982 : if (cur->sk_flags & SK_BT_NEXT)
1946 13950 : result = -1;
1947 3032 : else if (cur->sk_flags & SK_BT_PRIOR)
1948 30 : result = 1;
1949 :
1950 : Assert(result == 0 || (cur->sk_flags & SK_BT_SKIP));
1951 : }
1952 : }
1953 : else
1954 : {
1955 5394 : BTArrayKeyInfo *array = NULL;
1956 :
1957 : /*
1958 : * Current array element/array = scan key value is a sentinel
1959 : * value that represents the lowest (or highest) possible value
1960 : * that's still within the range of the array.
1961 : *
1962 : * Like _bt_first, we only see MINVAL keys during forwards scans
1963 : * (and similarly only see MAXVAL keys during backwards scans).
1964 : * Even if the scan's direction changes, we'll stop at some higher
1965 : * order key before we can ever reach any MAXVAL (or MINVAL) keys.
1966 : * (However, unlike _bt_first we _can_ get to keys marked either
1967 : * NEXT or PRIOR, regardless of the scan's current direction.)
1968 : */
1969 : Assert(ScanDirectionIsForward(dir) ?
1970 : !(cur->sk_flags & SK_BT_MAXVAL) :
1971 : !(cur->sk_flags & SK_BT_MINVAL));
1972 :
1973 : /*
1974 : * There are no valid sk_argument values in MINVAL/MAXVAL keys.
1975 : * Check if tupdatum is within the range of skip array instead.
1976 : */
1977 5900 : for (int arrayidx = 0; arrayidx < so->numArrayKeys; arrayidx++)
1978 : {
1979 5900 : array = &so->arrayKeys[arrayidx];
1980 5900 : if (array->scan_key == ikey)
1981 5394 : break;
1982 : }
1983 :
1984 5394 : _bt_binsrch_skiparray_skey(false, dir, tupdatum, tupnull,
1985 : array, cur, &result);
1986 :
1987 5394 : if (result == 0)
1988 : {
1989 : /*
1990 : * tupdatum satisfies both low_compare and high_compare, so
1991 : * it's time to advance the array keys.
1992 : *
1993 : * Note: It's possible that the skip array will "advance" from
1994 : * its MINVAL (or MAXVAL) representation to an alternative,
1995 : * logically equivalent representation of the same value: a
1996 : * representation where the = key gets a valid datum in its
1997 : * sk_argument. This is only possible when low_compare uses
1998 : * the >= strategy (or high_compare uses the <= strategy).
1999 : */
2000 5388 : return false;
2001 : }
2002 : }
2003 :
2004 : /*
2005 : * Does this comparison indicate that caller must _not_ advance the
2006 : * scan's arrays just yet?
2007 : */
2008 325836 : if ((ScanDirectionIsForward(dir) && result < 0) ||
2009 3252 : (ScanDirectionIsBackward(dir) && result > 0))
2010 63716 : return true;
2011 :
2012 : /*
2013 : * Does this comparison indicate that caller should now advance the
2014 : * scan's arrays? (Must be if we get here during a readpagetup call.)
2015 : */
2016 262120 : if (readpagetup || result != 0)
2017 : {
2018 : Assert(result != 0);
2019 259118 : return false;
2020 : }
2021 :
2022 : /*
2023 : * Inconclusive -- need to check later scan keys, too.
2024 : *
2025 : * This must be a finaltup precheck, or a call made from an assertion.
2026 : */
2027 : Assert(result == 0);
2028 : }
2029 :
2030 : Assert(!readpagetup);
2031 :
2032 752 : return false;
2033 : }
2034 :
2035 : /*
2036 : * Determine if a scan with array keys should skip over uninteresting tuples.
2037 : *
2038 : * This is a subroutine for _bt_checkkeys. Called when _bt_readpage's linear
2039 : * search process (started after it finishes reading an initial group of
2040 : * matching tuples, used to locate the start of the next group of tuples
2041 : * matching the next set of required array keys) has already scanned an
2042 : * excessive number of tuples whose key space is "between arrays".
2043 : *
2044 : * When we perform look ahead successfully, we'll sets pstate.skip, which
2045 : * instructs _bt_readpage to skip ahead to that tuple next (could be past the
2046 : * end of the scan's leaf page). Pages where the optimization is effective
2047 : * will generally still need to skip several times. Each call here performs
2048 : * only a single "look ahead" comparison of a later tuple, whose distance from
2049 : * the current tuple's offset number is determined by applying heuristics.
2050 : */
2051 : static void
2052 12168 : _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
2053 : int tupnatts, TupleDesc tupdesc)
2054 : {
2055 12168 : ScanDirection dir = pstate->dir;
2056 : OffsetNumber aheadoffnum;
2057 : IndexTuple ahead;
2058 :
2059 : Assert(!pstate->forcenonrequired);
2060 :
2061 : /* Avoid looking ahead when comparing the page high key */
2062 12168 : if (pstate->offnum < pstate->minoff)
2063 0 : return;
2064 :
2065 : /*
2066 : * Don't look ahead when there aren't enough tuples remaining on the page
2067 : * (in the current scan direction) for it to be worth our while
2068 : */
2069 12168 : if (ScanDirectionIsForward(dir) &&
2070 12090 : pstate->offnum >= pstate->maxoff - LOOK_AHEAD_DEFAULT_DISTANCE)
2071 540 : return;
2072 11628 : else if (ScanDirectionIsBackward(dir) &&
2073 78 : pstate->offnum <= pstate->minoff + LOOK_AHEAD_DEFAULT_DISTANCE)
2074 24 : return;
2075 :
2076 : /*
2077 : * The look ahead distance starts small, and ramps up as each call here
2078 : * allows _bt_readpage to skip over more tuples
2079 : */
2080 11604 : if (!pstate->targetdistance)
2081 6596 : pstate->targetdistance = LOOK_AHEAD_DEFAULT_DISTANCE;
2082 5008 : else if (pstate->targetdistance < MaxIndexTuplesPerPage / 2)
2083 5008 : pstate->targetdistance *= 2;
2084 :
2085 : /* Don't read past the end (or before the start) of the page, though */
2086 11604 : if (ScanDirectionIsForward(dir))
2087 11550 : aheadoffnum = Min((int) pstate->maxoff,
2088 : (int) pstate->offnum + pstate->targetdistance);
2089 : else
2090 54 : aheadoffnum = Max((int) pstate->minoff,
2091 : (int) pstate->offnum - pstate->targetdistance);
2092 :
2093 11604 : ahead = (IndexTuple) PageGetItem(pstate->page,
2094 11604 : PageGetItemId(pstate->page, aheadoffnum));
2095 11604 : if (_bt_tuple_before_array_skeys(scan, dir, ahead, tupdesc, tupnatts,
2096 : false, 0, NULL))
2097 : {
2098 : /*
2099 : * Success -- instruct _bt_readpage to skip ahead to very next tuple
2100 : * after the one we determined was still before the current array keys
2101 : */
2102 4170 : if (ScanDirectionIsForward(dir))
2103 4134 : pstate->skip = aheadoffnum + 1;
2104 : else
2105 36 : pstate->skip = aheadoffnum - 1;
2106 : }
2107 : else
2108 : {
2109 : /*
2110 : * Failure -- "ahead" tuple is too far ahead (we were too aggressive).
2111 : *
2112 : * Reset the number of rechecks, and aggressively reduce the target
2113 : * distance (we're much more aggressive here than we were when the
2114 : * distance was initially ramped up).
2115 : */
2116 7434 : pstate->rechecks = 0;
2117 7434 : pstate->targetdistance = Max(pstate->targetdistance / 8, 1);
2118 : }
2119 : }
2120 :
2121 : /*
2122 : * _bt_advance_array_keys() -- Advance array elements using a tuple
2123 : *
2124 : * The scan always gets a new qual as a consequence of calling here (except
2125 : * when we determine that the top-level scan has run out of matching tuples).
2126 : * All later _bt_check_compare calls also use the same new qual that was first
2127 : * used here (at least until the next call here advances the keys once again).
2128 : * It's convenient to structure _bt_check_compare rechecks of caller's tuple
2129 : * (using the new qual) as one the steps of advancing the scan's array keys,
2130 : * so this function works as a wrapper around _bt_check_compare.
2131 : *
2132 : * Like _bt_check_compare, we'll set pstate.continuescan on behalf of the
2133 : * caller, and return a boolean indicating if caller's tuple satisfies the
2134 : * scan's new qual. But unlike _bt_check_compare, we set so->needPrimScan
2135 : * when we set continuescan=false, indicating if a new primitive index scan
2136 : * has been scheduled (otherwise, the top-level scan has run out of tuples in
2137 : * the current scan direction).
2138 : *
2139 : * Caller must use _bt_tuple_before_array_skeys to determine if the current
2140 : * place in the scan is >= the current array keys _before_ calling here.
2141 : * We're responsible for ensuring that caller's tuple is <= the newly advanced
2142 : * required array keys once we return. We try to find an exact match, but
2143 : * failing that we'll advance the array keys to whatever set of array elements
2144 : * comes next in the key space for the current scan direction. Required array
2145 : * keys "ratchet forwards" (or backwards). They can only advance as the scan
2146 : * itself advances through the index/key space.
2147 : *
2148 : * (The rules are the same for backwards scans, except that the operators are
2149 : * flipped: just replace the precondition's >= operator with a <=, and the
2150 : * postcondition's <= operator with a >=. In other words, just swap the
2151 : * precondition with the postcondition.)
2152 : *
2153 : * We also deal with "advancing" non-required arrays here (or arrays that are
2154 : * treated as non-required for the duration of a _bt_readpage call). Callers
2155 : * whose sktrig scan key is non-required specify sktrig_required=false. These
2156 : * calls are the only exception to the general rule about always advancing the
2157 : * required array keys (the scan may not even have a required array). These
2158 : * callers should just pass a NULL pstate (since there is never any question
2159 : * of stopping the scan). No call to _bt_tuple_before_array_skeys is required
2160 : * ahead of these calls (it's already clear that any required scan keys must
2161 : * be satisfied by caller's tuple).
2162 : *
2163 : * Note that we deal with non-array required equality strategy scan keys as
2164 : * degenerate single element arrays here. Obviously, they can never really
2165 : * advance in the way that real arrays can, but they must still affect how we
2166 : * advance real array scan keys (exactly like true array equality scan keys).
2167 : * We have to keep around a 3-way ORDER proc for these (using the "=" operator
2168 : * won't do), since in general whether the tuple is < or > _any_ unsatisfied
2169 : * required equality key influences how the scan's real arrays must advance.
2170 : *
2171 : * Note also that we may sometimes need to advance the array keys when the
2172 : * existing required array keys (and other required equality keys) are already
2173 : * an exact match for every corresponding value from caller's tuple. We must
2174 : * do this for inequalities that _bt_check_compare set continuescan=false for.
2175 : * They'll advance the array keys here, just like any other scan key that
2176 : * _bt_check_compare stops on. (This can even happen _after_ we advance the
2177 : * array keys, in which case we'll advance the array keys a second time. That
2178 : * way _bt_checkkeys caller always has its required arrays advance to the
2179 : * maximum possible extent that its tuple will allow.)
2180 : */
2181 : static bool
2182 200166 : _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
2183 : IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
2184 : int sktrig, bool sktrig_required)
2185 : {
2186 200166 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
2187 200166 : Relation rel = scan->indexRelation;
2188 200166 : ScanDirection dir = pstate ? pstate->dir : ForwardScanDirection;
2189 200166 : int arrayidx = 0;
2190 200166 : bool beyond_end_advance = false,
2191 200166 : skip_array_advanced = false,
2192 200166 : has_required_opposite_direction_only = false,
2193 200166 : all_required_satisfied = true,
2194 200166 : all_satisfied = true;
2195 :
2196 : Assert(!so->needPrimScan && !so->scanBehind && !so->oppositeDirCheck);
2197 : Assert(_bt_verify_keys_with_arraykeys(scan));
2198 :
2199 200166 : if (sktrig_required)
2200 : {
2201 : /*
2202 : * Precondition array state assertion
2203 : */
2204 : Assert(!_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc,
2205 : tupnatts, false, 0, NULL));
2206 :
2207 : /*
2208 : * Once we return we'll have a new set of required array keys, so
2209 : * reset state used by "look ahead" optimization
2210 : */
2211 191640 : pstate->rechecks = 0;
2212 191640 : pstate->targetdistance = 0;
2213 : }
2214 8526 : else if (sktrig < so->numberOfKeys - 1 &&
2215 8526 : !(so->keyData[so->numberOfKeys - 1].sk_flags & SK_SEARCHARRAY))
2216 : {
2217 8526 : int least_sign_ikey = so->numberOfKeys - 1;
2218 : bool continuescan;
2219 :
2220 : /*
2221 : * Optimization: perform a precheck of the least significant key
2222 : * during !sktrig_required calls when it isn't already our sktrig
2223 : * (provided the precheck key is not itself an array).
2224 : *
2225 : * When the precheck works out we'll avoid an expensive binary search
2226 : * of sktrig's array (plus any other arrays before least_sign_ikey).
2227 : */
2228 : Assert(so->keyData[sktrig].sk_flags & SK_SEARCHARRAY);
2229 8526 : if (!_bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, false,
2230 : false, &continuescan,
2231 : &least_sign_ikey))
2232 2238 : return false;
2233 : }
2234 :
2235 580866 : for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
2236 : {
2237 388704 : ScanKey cur = so->keyData + ikey;
2238 388704 : BTArrayKeyInfo *array = NULL;
2239 : Datum tupdatum;
2240 388704 : bool required = false,
2241 : tupnull;
2242 : int32 result;
2243 388704 : int set_elem = 0;
2244 :
2245 388704 : if (cur->sk_strategy == BTEqualStrategyNumber)
2246 : {
2247 : /* Manage array state */
2248 340594 : if (cur->sk_flags & SK_SEARCHARRAY)
2249 : {
2250 208194 : array = &so->arrayKeys[arrayidx++];
2251 : Assert(array->scan_key == ikey);
2252 : }
2253 : }
2254 : else
2255 : {
2256 : /*
2257 : * Are any inequalities required in the opposite direction only
2258 : * present here?
2259 : */
2260 48110 : if (((ScanDirectionIsForward(dir) &&
2261 48110 : (cur->sk_flags & (SK_BT_REQBKWD))) ||
2262 0 : (ScanDirectionIsBackward(dir) &&
2263 0 : (cur->sk_flags & (SK_BT_REQFWD)))))
2264 15566 : has_required_opposite_direction_only = true;
2265 : }
2266 :
2267 : /* Optimization: skip over known-satisfied scan keys */
2268 388704 : if (ikey < sktrig)
2269 76032 : continue;
2270 :
2271 372390 : if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))
2272 : {
2273 372390 : required = true;
2274 :
2275 372390 : if (cur->sk_attno > tupnatts)
2276 : {
2277 : /* Set this just like _bt_tuple_before_array_skeys */
2278 : Assert(sktrig < ikey);
2279 2296 : so->scanBehind = true;
2280 : }
2281 : }
2282 :
2283 : /*
2284 : * Handle a required non-array scan key that the initial call to
2285 : * _bt_check_compare indicated triggered array advancement, if any.
2286 : *
2287 : * The non-array scan key's strategy will be <, <=, or = during a
2288 : * forwards scan (or any one of =, >=, or > during a backwards scan).
2289 : * It follows that the corresponding tuple attribute's value must now
2290 : * be either > or >= the scan key value (for backwards scans it must
2291 : * be either < or <= that value).
2292 : *
2293 : * If this is a required equality strategy scan key, this is just an
2294 : * optimization; _bt_tuple_before_array_skeys already confirmed that
2295 : * this scan key places us ahead of caller's tuple. There's no need
2296 : * to repeat that work now. (The same underlying principle also gets
2297 : * applied by the cur_elem_trig optimization used to speed up searches
2298 : * for the next array element.)
2299 : *
2300 : * If this is a required inequality strategy scan key, we _must_ rely
2301 : * on _bt_check_compare like this; we aren't capable of directly
2302 : * evaluating required inequality strategy scan keys here, on our own.
2303 : */
2304 372390 : if (ikey == sktrig && !array)
2305 : {
2306 : Assert(sktrig_required && required && all_required_satisfied);
2307 :
2308 : /* Use "beyond end" advancement. See below for an explanation. */
2309 7408 : beyond_end_advance = true;
2310 7408 : all_satisfied = all_required_satisfied = false;
2311 :
2312 7408 : continue;
2313 : }
2314 :
2315 : /*
2316 : * Nothing more for us to do with an inequality strategy scan key that
2317 : * wasn't the one that _bt_check_compare stopped on, though.
2318 : *
2319 : * Note: if our later call to _bt_check_compare (to recheck caller's
2320 : * tuple) sets continuescan=false due to finding this same inequality
2321 : * unsatisfied (possible when it's required in the scan direction),
2322 : * we'll deal with it via a recursive "second pass" call.
2323 : */
2324 364982 : else if (cur->sk_strategy != BTEqualStrategyNumber)
2325 47540 : continue;
2326 :
2327 : /*
2328 : * Nothing for us to do with an equality strategy scan key that isn't
2329 : * marked required, either -- unless it's a non-required array
2330 : */
2331 317442 : else if (!required && !array)
2332 0 : continue;
2333 :
2334 : /*
2335 : * Here we perform steps for all array scan keys after a required
2336 : * array scan key whose binary search triggered "beyond end of array
2337 : * element" array advancement due to encountering a tuple attribute
2338 : * value > the closest matching array key (or < for backwards scans).
2339 : */
2340 317442 : if (beyond_end_advance)
2341 : {
2342 1428 : if (array)
2343 594 : _bt_array_set_low_or_high(rel, cur, array,
2344 : ScanDirectionIsBackward(dir));
2345 :
2346 1428 : continue;
2347 : }
2348 :
2349 : /*
2350 : * Here we perform steps for all array scan keys after a required
2351 : * array scan key whose tuple attribute was < the closest matching
2352 : * array key when we dealt with it (or > for backwards scans).
2353 : *
2354 : * This earlier required array key already puts us ahead of caller's
2355 : * tuple in the key space (for the current scan direction). We must
2356 : * make sure that subsequent lower-order array keys do not put us too
2357 : * far ahead (ahead of tuples that have yet to be seen by our caller).
2358 : * For example, when a tuple "(a, b) = (42, 5)" advances the array
2359 : * keys on "a" from 40 to 45, we must also set "b" to whatever the
2360 : * first array element for "b" is. It would be wrong to allow "b" to
2361 : * be set based on the tuple value.
2362 : *
2363 : * Perform the same steps with truncated high key attributes. You can
2364 : * think of this as a "binary search" for the element closest to the
2365 : * value -inf. Again, the arrays must never get ahead of the scan.
2366 : */
2367 316014 : if (!all_required_satisfied || cur->sk_attno > tupnatts)
2368 : {
2369 3342 : if (array)
2370 772 : _bt_array_set_low_or_high(rel, cur, array,
2371 : ScanDirectionIsForward(dir));
2372 :
2373 3342 : continue;
2374 : }
2375 :
2376 : /*
2377 : * Search in scankey's array for the corresponding tuple attribute
2378 : * value from caller's tuple
2379 : */
2380 312672 : tupdatum = index_getattr(tuple, cur->sk_attno, tupdesc, &tupnull);
2381 :
2382 312672 : if (array)
2383 : {
2384 192292 : bool cur_elem_trig = (sktrig_required && ikey == sktrig);
2385 :
2386 : /*
2387 : * "Binary search" by checking if tupdatum/tupnull are within the
2388 : * range of the skip array
2389 : */
2390 192292 : if (array->num_elems == -1)
2391 160334 : _bt_binsrch_skiparray_skey(cur_elem_trig, dir,
2392 : tupdatum, tupnull, array, cur,
2393 : &result);
2394 :
2395 : /*
2396 : * Binary search for the closest match from the SAOP array
2397 : */
2398 : else
2399 31958 : set_elem = _bt_binsrch_array_skey(&so->orderProcs[ikey],
2400 : cur_elem_trig, dir,
2401 : tupdatum, tupnull, array, cur,
2402 : &result);
2403 : }
2404 : else
2405 : {
2406 : Assert(required);
2407 :
2408 : /*
2409 : * This is a required non-array equality strategy scan key, which
2410 : * we'll treat as a degenerate single element array.
2411 : *
2412 : * This scan key's imaginary "array" can't really advance, but it
2413 : * can still roll over like any other array. (Actually, this is
2414 : * no different to real single value arrays, which never advance
2415 : * without rolling over -- they can never truly advance, either.)
2416 : */
2417 120380 : result = _bt_compare_array_skey(&so->orderProcs[ikey],
2418 : tupdatum, tupnull,
2419 : cur->sk_argument, cur);
2420 : }
2421 :
2422 : /*
2423 : * Consider "beyond end of array element" array advancement.
2424 : *
2425 : * When the tuple attribute value is > the closest matching array key
2426 : * (or < in the backwards scan case), we need to ratchet this array
2427 : * forward (backward) by one increment, so that caller's tuple ends up
2428 : * being < final array value instead (or > final array value instead).
2429 : * This process has to work for all of the arrays, not just this one:
2430 : * it must "carry" to higher-order arrays when the set_elem that we
2431 : * just found happens to be the final one for the scan's direction.
2432 : * Incrementing (decrementing) set_elem itself isn't good enough.
2433 : *
2434 : * Our approach is to provisionally use set_elem as if it was an exact
2435 : * match now, then set each later/less significant array to whatever
2436 : * its final element is. Once outside the loop we'll then "increment
2437 : * this array's set_elem" by calling _bt_advance_array_keys_increment.
2438 : * That way the process rolls over to higher order arrays as needed.
2439 : *
2440 : * Under this scheme any required arrays only ever ratchet forwards
2441 : * (or backwards), and always do so to the maximum possible extent
2442 : * that we can know will be safe without seeing the scan's next tuple.
2443 : * We don't need any special handling for required scan keys that lack
2444 : * a real array to advance, nor for redundant scan keys that couldn't
2445 : * be eliminated by _bt_preprocess_keys. It won't matter if some of
2446 : * our "true" array scan keys (or even all of them) are non-required.
2447 : */
2448 312672 : if (sktrig_required && required &&
2449 306384 : ((ScanDirectionIsForward(dir) && result > 0) ||
2450 1716 : (ScanDirectionIsBackward(dir) && result < 0)))
2451 23864 : beyond_end_advance = true;
2452 :
2453 : Assert(all_required_satisfied && all_satisfied);
2454 312672 : if (result != 0)
2455 : {
2456 : /*
2457 : * Track whether caller's tuple satisfies our new post-advancement
2458 : * qual, for required scan keys, as well as for the entire set of
2459 : * interesting scan keys (all required scan keys plus non-required
2460 : * array scan keys are considered interesting.)
2461 : */
2462 143536 : all_satisfied = false;
2463 143536 : if (sktrig_required && required)
2464 137770 : all_required_satisfied = false;
2465 : else
2466 : {
2467 : /*
2468 : * There's no need to advance the arrays using the best
2469 : * available match for a non-required array. Give up now.
2470 : * (Though note that sktrig_required calls still have to do
2471 : * all the usual post-advancement steps, including the recheck
2472 : * call to _bt_check_compare.)
2473 : */
2474 : break;
2475 : }
2476 : }
2477 :
2478 : /* Advance array keys, even when we don't have an exact match */
2479 306906 : if (array)
2480 : {
2481 186526 : if (array->num_elems == -1)
2482 : {
2483 : /* Skip array's new element is tupdatum (or MINVAL/MAXVAL) */
2484 154568 : _bt_skiparray_set_element(rel, cur, array, result,
2485 : tupdatum, tupnull);
2486 154568 : skip_array_advanced = true;
2487 : }
2488 31958 : else if (array->cur_elem != set_elem)
2489 : {
2490 : /* SAOP array's new element is set_elem datum */
2491 23782 : array->cur_elem = set_elem;
2492 23782 : cur->sk_argument = array->elem_values[set_elem];
2493 : }
2494 : }
2495 : }
2496 :
2497 : /*
2498 : * Advance the array keys incrementally whenever "beyond end of array
2499 : * element" array advancement happens, so that advancement will carry to
2500 : * higher-order arrays (might exhaust all the scan's arrays instead, which
2501 : * ends the top-level scan).
2502 : */
2503 197928 : if (beyond_end_advance &&
2504 31272 : !_bt_advance_array_keys_increment(scan, dir, &skip_array_advanced))
2505 8326 : goto end_toplevel_scan;
2506 :
2507 : Assert(_bt_verify_keys_with_arraykeys(scan));
2508 :
2509 : /*
2510 : * Maintain a page-level count of the number of times the scan's array
2511 : * keys advanced in a way that affected at least one skip array
2512 : */
2513 189602 : if (sktrig_required && skip_array_advanced)
2514 160564 : pstate->nskipadvances++;
2515 :
2516 : /*
2517 : * Does tuple now satisfy our new qual? Recheck with _bt_check_compare.
2518 : *
2519 : * Calls triggered by an unsatisfied required scan key, whose tuple now
2520 : * satisfies all required scan keys, but not all nonrequired array keys,
2521 : * will still require a recheck call to _bt_check_compare. They'll still
2522 : * need its "second pass" handling of required inequality scan keys.
2523 : * (Might have missed a still-unsatisfied required inequality scan key
2524 : * that caller didn't detect as the sktrig scan key during its initial
2525 : * _bt_check_compare call that used the old/original qual.)
2526 : *
2527 : * Calls triggered by an unsatisfied nonrequired array scan key never need
2528 : * "second pass" handling of required inequalities (nor any other handling
2529 : * of any required scan key). All that matters is whether caller's tuple
2530 : * satisfies the new qual, so it's safe to just skip the _bt_check_compare
2531 : * recheck when we've already determined that it can only return 'false'.
2532 : *
2533 : * Note: In practice most scan keys are marked required by preprocessing,
2534 : * if necessary by generating a preceding skip array. We nevertheless
2535 : * often handle array keys marked required as if they were nonrequired.
2536 : * This behavior is requested by our _bt_check_compare caller, though only
2537 : * when it is passed "forcenonrequired=true" by _bt_checkkeys.
2538 : */
2539 189602 : if ((sktrig_required && all_required_satisfied) ||
2540 143140 : (!sktrig_required && all_satisfied))
2541 : {
2542 46984 : int nsktrig = sktrig + 1;
2543 : bool continuescan;
2544 :
2545 : Assert(all_required_satisfied);
2546 :
2547 : /* Recheck _bt_check_compare on behalf of caller */
2548 46984 : if (_bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, false,
2549 46984 : !sktrig_required, &continuescan,
2550 46984 : &nsktrig) &&
2551 39328 : !so->scanBehind)
2552 : {
2553 : /* This tuple satisfies the new qual */
2554 : Assert(all_satisfied && continuescan);
2555 :
2556 37132 : if (pstate)
2557 36610 : pstate->continuescan = true;
2558 :
2559 37354 : return true;
2560 : }
2561 :
2562 : /*
2563 : * Consider "second pass" handling of required inequalities.
2564 : *
2565 : * It's possible that our _bt_check_compare call indicated that the
2566 : * scan should end due to some unsatisfied inequality that wasn't
2567 : * initially recognized as such by us. Handle this by calling
2568 : * ourselves recursively, this time indicating that the trigger is the
2569 : * inequality that we missed first time around (and using a set of
2570 : * required array/equality keys that are now exact matches for tuple).
2571 : *
2572 : * We make a strong, general guarantee that every _bt_checkkeys call
2573 : * here will advance the array keys to the maximum possible extent
2574 : * that we can know to be safe based on caller's tuple alone. If we
2575 : * didn't perform this step, then that guarantee wouldn't quite hold.
2576 : */
2577 9852 : if (unlikely(!continuescan))
2578 : {
2579 : bool satisfied PG_USED_FOR_ASSERTS_ONLY;
2580 :
2581 : Assert(sktrig_required);
2582 : Assert(so->keyData[nsktrig].sk_strategy != BTEqualStrategyNumber);
2583 :
2584 : /*
2585 : * The tuple must use "beyond end" advancement during the
2586 : * recursive call, so we cannot possibly end up back here when
2587 : * recursing. We'll consume a small, fixed amount of stack space.
2588 : */
2589 : Assert(!beyond_end_advance);
2590 :
2591 : /* Advance the array keys a second time using same tuple */
2592 222 : satisfied = _bt_advance_array_keys(scan, pstate, tuple, tupnatts,
2593 : tupdesc, nsktrig, true);
2594 :
2595 : /* This tuple doesn't satisfy the inequality */
2596 : Assert(!satisfied);
2597 222 : return false;
2598 : }
2599 :
2600 : /*
2601 : * Some non-required scan key (from new qual) still not satisfied.
2602 : *
2603 : * All scan keys required in the current scan direction must still be
2604 : * satisfied, though, so we can trust all_required_satisfied below.
2605 : */
2606 : }
2607 :
2608 : /*
2609 : * When we were called just to deal with "advancing" non-required arrays,
2610 : * this is as far as we can go (cannot stop the scan for these callers)
2611 : */
2612 152248 : if (!sktrig_required)
2613 : {
2614 : /* Caller's tuple doesn't match any qual */
2615 5766 : return false;
2616 : }
2617 :
2618 : /*
2619 : * Postcondition array state assertion (for still-unsatisfied tuples).
2620 : *
2621 : * By here we have established that the scan's required arrays (scan must
2622 : * have at least one required array) advanced, without becoming exhausted.
2623 : *
2624 : * Caller's tuple is now < the newly advanced array keys (or > when this
2625 : * is a backwards scan), except in the case where we only got this far due
2626 : * to an unsatisfied non-required scan key. Verify that with an assert.
2627 : *
2628 : * Note: we don't just quit at this point when all required scan keys were
2629 : * found to be satisfied because we need to consider edge-cases involving
2630 : * scan keys required in the opposite direction only; those aren't tracked
2631 : * by all_required_satisfied.
2632 : */
2633 : Assert(_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc, tupnatts,
2634 : false, 0, NULL) ==
2635 : !all_required_satisfied);
2636 :
2637 : /*
2638 : * We generally permit primitive index scans to continue onto the next
2639 : * sibling page when the page's finaltup satisfies all required scan keys
2640 : * at the point where we're between pages.
2641 : *
2642 : * If caller's tuple is also the page's finaltup, and we see that required
2643 : * scan keys still aren't satisfied, start a new primitive index scan.
2644 : */
2645 146482 : if (!all_required_satisfied && pstate->finaltup == tuple)
2646 512 : goto new_prim_scan;
2647 :
2648 : /*
2649 : * Proactively check finaltup (don't wait until finaltup is reached by the
2650 : * scan) when it might well turn out to not be satisfied later on.
2651 : *
2652 : * Note: if so->scanBehind hasn't already been set for finaltup by us,
2653 : * it'll be set during this call to _bt_tuple_before_array_skeys. Either
2654 : * way, it'll be set correctly (for the whole page) after this point.
2655 : */
2656 228058 : if (!all_required_satisfied && pstate->finaltup &&
2657 164176 : _bt_tuple_before_array_skeys(scan, dir, pstate->finaltup, tupdesc,
2658 164176 : BTreeTupleGetNAtts(pstate->finaltup, rel),
2659 : false, 0, &so->scanBehind))
2660 17476 : goto new_prim_scan;
2661 :
2662 : /*
2663 : * When we encounter a truncated finaltup high key attribute, we're
2664 : * optimistic about the chances of its corresponding required scan key
2665 : * being satisfied when we go on to recheck it against tuples from this
2666 : * page's right sibling leaf page. We consider truncated attributes to be
2667 : * satisfied by required scan keys, which allows the primitive index scan
2668 : * to continue to the next leaf page. We must set so->scanBehind to true
2669 : * to remember that the last page's finaltup had "satisfied" required scan
2670 : * keys for one or more truncated attribute values (scan keys required in
2671 : * _either_ scan direction).
2672 : *
2673 : * There is a chance that _bt_readpage (which checks so->scanBehind) will
2674 : * find that even the sibling leaf page's finaltup is < the new array
2675 : * keys. When that happens, our optimistic policy will have incurred a
2676 : * single extra leaf page access that could have been avoided.
2677 : *
2678 : * A pessimistic policy would give backward scans a gratuitous advantage
2679 : * over forward scans. We'd punish forward scans for applying more
2680 : * accurate information from the high key, rather than just using the
2681 : * final non-pivot tuple as finaltup, in the style of backward scans.
2682 : * Being pessimistic would also give some scans with non-required arrays a
2683 : * perverse advantage over similar scans that use required arrays instead.
2684 : *
2685 : * This is similar to our scan-level heuristics, below. They also set
2686 : * scanBehind to speculatively continue the primscan onto the next page.
2687 : */
2688 128494 : if (so->scanBehind)
2689 : {
2690 : /* Truncated high key -- _bt_scanbehind_checkkeys recheck scheduled */
2691 : }
2692 :
2693 : /*
2694 : * Handle inequalities marked required in the opposite scan direction.
2695 : * They can also signal that we should start a new primitive index scan.
2696 : *
2697 : * It's possible that the scan is now positioned where "matching" tuples
2698 : * begin, and that caller's tuple satisfies all scan keys required in the
2699 : * current scan direction. But if caller's tuple still doesn't satisfy
2700 : * other scan keys that are required in the opposite scan direction only
2701 : * (e.g., a required >= strategy scan key when scan direction is forward),
2702 : * it's still possible that there are many leaf pages before the page that
2703 : * _bt_first could skip straight to. Groveling through all those pages
2704 : * will always give correct answers, but it can be very inefficient. We
2705 : * must avoid needlessly scanning extra pages.
2706 : *
2707 : * Separately, it's possible that _bt_check_compare set continuescan=false
2708 : * for a scan key that's required in the opposite direction only. This is
2709 : * a special case, that happens only when _bt_check_compare sees that the
2710 : * inequality encountered a NULL value. This signals the end of non-NULL
2711 : * values in the current scan direction, which is reason enough to end the
2712 : * (primitive) scan. If this happens at the start of a large group of
2713 : * NULL values, then we shouldn't expect to be called again until after
2714 : * the scan has already read indefinitely-many leaf pages full of tuples
2715 : * with NULL suffix values. (_bt_first is expected to skip over the group
2716 : * of NULLs by applying a similar "deduce NOT NULL" rule of its own, which
2717 : * involves consing up an explicit SK_SEARCHNOTNULL key.)
2718 : *
2719 : * Apply a test against finaltup to detect and recover from the problem:
2720 : * if even finaltup doesn't satisfy such an inequality, we just skip by
2721 : * starting a new primitive index scan. When we skip, we know for sure
2722 : * that all of the tuples on the current page following caller's tuple are
2723 : * also before the _bt_first-wise start of tuples for our new qual. That
2724 : * at least suggests many more skippable pages beyond the current page.
2725 : * (when so->scanBehind and so->oppositeDirCheck are set, this'll happen
2726 : * when we test the next page's finaltup/high key instead.)
2727 : */
2728 126270 : else if (has_required_opposite_direction_only && pstate->finaltup &&
2729 4292 : unlikely(!_bt_oppodir_checkkeys(scan, dir, pstate->finaltup)))
2730 2 : goto new_prim_scan;
2731 :
2732 126268 : continue_scan:
2733 :
2734 : /*
2735 : * Stick with the ongoing primitive index scan for now.
2736 : *
2737 : * It's possible that later tuples will also turn out to have values that
2738 : * are still < the now-current array keys (or > the current array keys).
2739 : * Our caller will handle this by performing what amounts to a linear
2740 : * search of the page, implemented by calling _bt_check_compare and then
2741 : * _bt_tuple_before_array_skeys for each tuple.
2742 : *
2743 : * This approach has various advantages over a binary search of the page.
2744 : * Repeated binary searches of the page (one binary search for every array
2745 : * advancement) won't outperform a continuous linear search. While there
2746 : * are workloads that a naive linear search won't handle well, our caller
2747 : * has a "look ahead" fallback mechanism to deal with that problem.
2748 : */
2749 129360 : pstate->continuescan = true; /* Override _bt_check_compare */
2750 129360 : so->needPrimScan = false; /* _bt_readpage has more tuples to check */
2751 :
2752 129360 : if (so->scanBehind)
2753 : {
2754 : /*
2755 : * Remember if recheck needs to call _bt_oppodir_checkkeys for next
2756 : * page's finaltup (see above comments about "Handle inequalities
2757 : * marked required in the opposite scan direction" for why).
2758 : */
2759 3092 : so->oppositeDirCheck = has_required_opposite_direction_only;
2760 :
2761 : /*
2762 : * skip by setting "look ahead" mechanism's offnum for forwards scans
2763 : * (backwards scans check scanBehind flag directly instead)
2764 : */
2765 3092 : if (ScanDirectionIsForward(dir))
2766 3074 : pstate->skip = pstate->maxoff + 1;
2767 : }
2768 :
2769 : /* Caller's tuple doesn't match the new qual */
2770 129360 : return false;
2771 :
2772 17990 : new_prim_scan:
2773 :
2774 : Assert(pstate->finaltup); /* not on rightmost/leftmost page */
2775 :
2776 : /*
2777 : * Looks like another primitive index scan is required. But consider
2778 : * continuing the current primscan based on scan-level heuristics.
2779 : *
2780 : * Continue the ongoing primitive scan (and schedule a recheck for when
2781 : * the scan arrives on the next sibling leaf page) when it has already
2782 : * read at least one leaf page before the one we're reading now. This
2783 : * makes primscan scheduling more efficient when scanning subsets of an
2784 : * index with many distinct attribute values matching many array elements.
2785 : * It encourages fewer, larger primitive scans where that makes sense.
2786 : * This will in turn encourage _bt_readpage to apply the pstate.startikey
2787 : * optimization more often.
2788 : *
2789 : * Also continue the ongoing primitive index scan when it is still on the
2790 : * first page if there have been more than NSKIPADVANCES_THRESHOLD calls
2791 : * here that each advanced at least one of the scan's skip arrays
2792 : * (deliberately ignore advancements that only affected SAOP arrays here).
2793 : * A page that cycles through this many skip array elements is quite
2794 : * likely to neighbor similar pages, that we'll also need to read.
2795 : *
2796 : * Note: These heuristics aren't as aggressive as you might think. We're
2797 : * conservative about allowing a primitive scan to step from the first
2798 : * leaf page it reads to the page's sibling page (we only allow it on
2799 : * first pages whose finaltup strongly suggests that it'll work out, as
2800 : * well as first pages that have a large number of skip array advances).
2801 : * Clearing this first page finaltup hurdle is a strong signal in itself.
2802 : *
2803 : * Note: The NSKIPADVANCES_THRESHOLD heuristic exists only to avoid
2804 : * pathological cases. Specifically, cases where a skip scan should just
2805 : * behave like a traditional full index scan, but ends up "skipping" again
2806 : * and again, descending to the prior leaf page's direct sibling leaf page
2807 : * each time. This misbehavior would otherwise be possible during scans
2808 : * that never quite manage to "clear the first page finaltup hurdle".
2809 : */
2810 17990 : if (!pstate->firstpage || pstate->nskipadvances > NSKIPADVANCES_THRESHOLD)
2811 : {
2812 : /* Schedule a recheck once on the next (or previous) page */
2813 868 : so->scanBehind = true;
2814 :
2815 : /* Continue the current primitive scan after all */
2816 868 : goto continue_scan;
2817 : }
2818 :
2819 : /*
2820 : * End this primitive index scan, but schedule another.
2821 : *
2822 : * Note: We make a soft assumption that the current scan direction will
2823 : * also be used within _bt_next, when it is asked to step off this page.
2824 : * It is up to _bt_next to cancel this scheduled primitive index scan
2825 : * whenever it steps to a page in the direction opposite currPos.dir.
2826 : */
2827 17122 : pstate->continuescan = false; /* Tell _bt_readpage we're done... */
2828 17122 : so->needPrimScan = true; /* ...but call _bt_first again */
2829 :
2830 17122 : if (scan->parallel_scan)
2831 36 : _bt_parallel_primscan_schedule(scan, so->currPos.currPage);
2832 :
2833 : /* Caller's tuple doesn't match the new qual */
2834 17122 : return false;
2835 :
2836 8326 : end_toplevel_scan:
2837 :
2838 : /*
2839 : * End the current primitive index scan, but don't schedule another.
2840 : *
2841 : * This ends the entire top-level scan in the current scan direction.
2842 : *
2843 : * Note: The scan's arrays (including any non-required arrays) are now in
2844 : * their final positions for the current scan direction. If the scan
2845 : * direction happens to change, then the arrays will already be in their
2846 : * first positions for what will then be the current scan direction.
2847 : */
2848 8326 : pstate->continuescan = false; /* Tell _bt_readpage we're done... */
2849 8326 : so->needPrimScan = false; /* ...and don't call _bt_first again */
2850 :
2851 : /* Caller's tuple doesn't match any qual */
2852 8326 : return false;
2853 : }
2854 :
2855 : /*
2856 : * _bt_advance_array_keys_increment() -- Advance to next set of array elements
2857 : *
2858 : * Advances the array keys by a single increment in the current scan
2859 : * direction. When there are multiple array keys this can roll over from the
2860 : * lowest order array to higher order arrays.
2861 : *
2862 : * Returns true if there is another set of values to consider, false if not.
2863 : * On true result, the scankeys are initialized with the next set of values.
2864 : * On false result, the scankeys stay the same, and the array keys are not
2865 : * advanced (every array remains at its final element for scan direction).
2866 : */
2867 : static bool
2868 31272 : _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
2869 : bool *skip_array_set)
2870 : {
2871 31272 : Relation rel = scan->indexRelation;
2872 31272 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
2873 :
2874 : /*
2875 : * We must advance the last array key most quickly, since it will
2876 : * correspond to the lowest-order index column among the available
2877 : * qualifications
2878 : */
2879 40736 : for (int i = so->numArrayKeys - 1; i >= 0; i--)
2880 : {
2881 32410 : BTArrayKeyInfo *array = &so->arrayKeys[i];
2882 32410 : ScanKey skey = &so->keyData[array->scan_key];
2883 :
2884 32410 : if (array->num_elems == -1)
2885 24014 : *skip_array_set = true;
2886 :
2887 32410 : if (ScanDirectionIsForward(dir))
2888 : {
2889 31498 : if (_bt_array_increment(rel, skey, array))
2890 22070 : return true;
2891 : }
2892 : else
2893 : {
2894 912 : if (_bt_array_decrement(rel, skey, array))
2895 876 : return true;
2896 : }
2897 :
2898 : /*
2899 : * Couldn't increment (or decrement) array. Handle array roll over.
2900 : *
2901 : * Start over at the array's lowest sorting value (or its highest
2902 : * value, for backward scans)...
2903 : */
2904 9464 : _bt_array_set_low_or_high(rel, skey, array,
2905 : ScanDirectionIsForward(dir));
2906 :
2907 : /* ...then increment (or decrement) next most significant array */
2908 : }
2909 :
2910 : /*
2911 : * The array keys are now exhausted.
2912 : *
2913 : * Restore the array keys to the state they were in immediately before we
2914 : * were called. This ensures that the arrays only ever ratchet in the
2915 : * current scan direction.
2916 : *
2917 : * Without this, scans could overlook matching tuples when the scan
2918 : * direction gets reversed just before btgettuple runs out of items to
2919 : * return, but just after _bt_readpage prepares all the items from the
2920 : * scan's final page in so->currPos. When we're on the final page it is
2921 : * typical for so->currPos to get invalidated once btgettuple finally
2922 : * returns false, which'll effectively invalidate the scan's array keys.
2923 : * That hasn't happened yet, though -- and in general it may never happen.
2924 : */
2925 8326 : _bt_start_array_keys(scan, -dir);
2926 :
2927 8326 : return false;
2928 : }
2929 :
2930 : /*
2931 : * _bt_array_increment() -- increment array scan key's sk_argument
2932 : *
2933 : * Return value indicates whether caller's array was successfully incremented.
2934 : * Cannot increment an array whose current element is already the final one.
2935 : */
2936 : static bool
2937 31498 : _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array)
2938 : {
2939 31498 : bool oflow = false;
2940 : Datum inc_sk_argument;
2941 :
2942 : Assert(skey->sk_flags & SK_SEARCHARRAY);
2943 : Assert(!(skey->sk_flags & (SK_BT_MINVAL | SK_BT_NEXT | SK_BT_PRIOR)));
2944 :
2945 : /* SAOP array? */
2946 31498 : if (array->num_elems != -1)
2947 : {
2948 : Assert(!(skey->sk_flags & (SK_BT_SKIP | SK_BT_MINVAL | SK_BT_MAXVAL)));
2949 8360 : if (array->cur_elem < array->num_elems - 1)
2950 : {
2951 : /*
2952 : * Just increment current element, and assign its datum to skey
2953 : * (only skip arrays need us to free existing sk_argument memory)
2954 : */
2955 38 : array->cur_elem++;
2956 38 : skey->sk_argument = array->elem_values[array->cur_elem];
2957 :
2958 : /* Successfully incremented array */
2959 38 : return true;
2960 : }
2961 :
2962 : /* Cannot increment past final array element */
2963 8322 : return false;
2964 : }
2965 :
2966 : /* Nope, this is a skip array */
2967 : Assert(skey->sk_flags & SK_BT_SKIP);
2968 :
2969 : /*
2970 : * The sentinel value that represents the maximum value within the range
2971 : * of a skip array (often just +inf) is never incrementable
2972 : */
2973 23138 : if (skey->sk_flags & SK_BT_MAXVAL)
2974 640 : return false;
2975 :
2976 : /*
2977 : * When the current array element is NULL, and the highest sorting value
2978 : * in the index is also NULL, we cannot increment past the final element
2979 : */
2980 22498 : if ((skey->sk_flags & SK_ISNULL) && !(skey->sk_flags & SK_BT_NULLS_FIRST))
2981 436 : return false;
2982 :
2983 : /*
2984 : * Opclasses without skip support "increment" the scan key's current
2985 : * element by setting the NEXT flag. The true next value is determined by
2986 : * repositioning to the first index tuple > existing sk_argument/current
2987 : * array element. Note that this works in the usual way when the scan key
2988 : * is already marked ISNULL (i.e. when the current element is NULL).
2989 : */
2990 22062 : if (!array->sksup)
2991 : {
2992 : /* Successfully "incremented" array */
2993 14786 : skey->sk_flags |= SK_BT_NEXT;
2994 14786 : return true;
2995 : }
2996 :
2997 : /*
2998 : * Opclasses with skip support directly increment sk_argument
2999 : */
3000 7276 : if (skey->sk_flags & SK_ISNULL)
3001 : {
3002 : Assert(skey->sk_flags & SK_BT_NULLS_FIRST);
3003 :
3004 : /*
3005 : * Existing sk_argument/array element is NULL (for an IS NULL qual).
3006 : *
3007 : * "Increment" from NULL to the low_elem value provided by opclass
3008 : * skip support routine.
3009 : */
3010 36 : skey->sk_flags &= ~(SK_SEARCHNULL | SK_ISNULL);
3011 72 : skey->sk_argument = datumCopy(array->sksup->low_elem,
3012 36 : array->attbyval, array->attlen);
3013 36 : return true;
3014 : }
3015 :
3016 : /*
3017 : * Ask opclass support routine to provide incremented copy of existing
3018 : * non-NULL sk_argument
3019 : */
3020 7240 : inc_sk_argument = array->sksup->increment(rel, skey->sk_argument, &oflow);
3021 7240 : if (unlikely(oflow))
3022 : {
3023 : /* inc_sk_argument has undefined value (so no pfree) */
3024 30 : if (array->null_elem && !(skey->sk_flags & SK_BT_NULLS_FIRST))
3025 : {
3026 12 : _bt_skiparray_set_isnull(rel, skey, array);
3027 :
3028 : /* Successfully "incremented" array to NULL */
3029 12 : return true;
3030 : }
3031 :
3032 : /* Cannot increment past final array element */
3033 18 : return false;
3034 : }
3035 :
3036 : /*
3037 : * Successfully incremented sk_argument to a non-NULL value. Make sure
3038 : * that the incremented value is still within the range of the array.
3039 : */
3040 7210 : if (array->high_compare &&
3041 42 : !DatumGetBool(FunctionCall2Coll(&array->high_compare->sk_func,
3042 42 : array->high_compare->sk_collation,
3043 : inc_sk_argument,
3044 42 : array->high_compare->sk_argument)))
3045 : {
3046 : /* Keep existing sk_argument after all */
3047 12 : if (!array->attbyval)
3048 0 : pfree(DatumGetPointer(inc_sk_argument));
3049 :
3050 : /* Cannot increment past final array element */
3051 12 : return false;
3052 : }
3053 :
3054 : /* Accept value returned by opclass increment callback */
3055 7198 : if (!array->attbyval && skey->sk_argument)
3056 0 : pfree(DatumGetPointer(skey->sk_argument));
3057 7198 : skey->sk_argument = inc_sk_argument;
3058 :
3059 : /* Successfully incremented array */
3060 7198 : return true;
3061 : }
3062 :
3063 : /*
3064 : * _bt_array_decrement() -- decrement array scan key's sk_argument
3065 : *
3066 : * Return value indicates whether caller's array was successfully decremented.
3067 : * Cannot decrement an array whose current element is already the first one.
3068 : */
3069 : static bool
3070 912 : _bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *array)
3071 : {
3072 912 : bool uflow = false;
3073 : Datum dec_sk_argument;
3074 :
3075 : Assert(skey->sk_flags & SK_SEARCHARRAY);
3076 : Assert(!(skey->sk_flags & (SK_BT_MAXVAL | SK_BT_NEXT | SK_BT_PRIOR)));
3077 :
3078 : /* SAOP array? */
3079 912 : if (array->num_elems != -1)
3080 : {
3081 : Assert(!(skey->sk_flags & (SK_BT_SKIP | SK_BT_MINVAL | SK_BT_MAXVAL)));
3082 36 : if (array->cur_elem > 0)
3083 : {
3084 : /*
3085 : * Just decrement current element, and assign its datum to skey
3086 : * (only skip arrays need us to free existing sk_argument memory)
3087 : */
3088 6 : array->cur_elem--;
3089 6 : skey->sk_argument = array->elem_values[array->cur_elem];
3090 :
3091 : /* Successfully decremented array */
3092 6 : return true;
3093 : }
3094 :
3095 : /* Cannot decrement to before first array element */
3096 30 : return false;
3097 : }
3098 :
3099 : /* Nope, this is a skip array */
3100 : Assert(skey->sk_flags & SK_BT_SKIP);
3101 :
3102 : /*
3103 : * The sentinel value that represents the minimum value within the range
3104 : * of a skip array (often just -inf) is never decrementable
3105 : */
3106 876 : if (skey->sk_flags & SK_BT_MINVAL)
3107 0 : return false;
3108 :
3109 : /*
3110 : * When the current array element is NULL, and the lowest sorting value in
3111 : * the index is also NULL, we cannot decrement before first array element
3112 : */
3113 876 : if ((skey->sk_flags & SK_ISNULL) && (skey->sk_flags & SK_BT_NULLS_FIRST))
3114 0 : return false;
3115 :
3116 : /*
3117 : * Opclasses without skip support "decrement" the scan key's current
3118 : * element by setting the PRIOR flag. The true prior value is determined
3119 : * by repositioning to the last index tuple < existing sk_argument/current
3120 : * array element. Note that this works in the usual way when the scan key
3121 : * is already marked ISNULL (i.e. when the current element is NULL).
3122 : */
3123 876 : if (!array->sksup)
3124 : {
3125 : /* Successfully "decremented" array */
3126 12 : skey->sk_flags |= SK_BT_PRIOR;
3127 12 : return true;
3128 : }
3129 :
3130 : /*
3131 : * Opclasses with skip support directly decrement sk_argument
3132 : */
3133 864 : if (skey->sk_flags & SK_ISNULL)
3134 : {
3135 : Assert(!(skey->sk_flags & SK_BT_NULLS_FIRST));
3136 :
3137 : /*
3138 : * Existing sk_argument/array element is NULL (for an IS NULL qual).
3139 : *
3140 : * "Decrement" from NULL to the high_elem value provided by opclass
3141 : * skip support routine.
3142 : */
3143 6 : skey->sk_flags &= ~(SK_SEARCHNULL | SK_ISNULL);
3144 12 : skey->sk_argument = datumCopy(array->sksup->high_elem,
3145 6 : array->attbyval, array->attlen);
3146 6 : return true;
3147 : }
3148 :
3149 : /*
3150 : * Ask opclass support routine to provide decremented copy of existing
3151 : * non-NULL sk_argument
3152 : */
3153 858 : dec_sk_argument = array->sksup->decrement(rel, skey->sk_argument, &uflow);
3154 858 : if (unlikely(uflow))
3155 : {
3156 : /* dec_sk_argument has undefined value (so no pfree) */
3157 0 : if (array->null_elem && (skey->sk_flags & SK_BT_NULLS_FIRST))
3158 : {
3159 0 : _bt_skiparray_set_isnull(rel, skey, array);
3160 :
3161 : /* Successfully "decremented" array to NULL */
3162 0 : return true;
3163 : }
3164 :
3165 : /* Cannot decrement to before first array element */
3166 0 : return false;
3167 : }
3168 :
3169 : /*
3170 : * Successfully decremented sk_argument to a non-NULL value. Make sure
3171 : * that the decremented value is still within the range of the array.
3172 : */
3173 858 : if (array->low_compare &&
3174 12 : !DatumGetBool(FunctionCall2Coll(&array->low_compare->sk_func,
3175 12 : array->low_compare->sk_collation,
3176 : dec_sk_argument,
3177 12 : array->low_compare->sk_argument)))
3178 : {
3179 : /* Keep existing sk_argument after all */
3180 6 : if (!array->attbyval)
3181 0 : pfree(DatumGetPointer(dec_sk_argument));
3182 :
3183 : /* Cannot decrement to before first array element */
3184 6 : return false;
3185 : }
3186 :
3187 : /* Accept value returned by opclass decrement callback */
3188 852 : if (!array->attbyval && skey->sk_argument)
3189 0 : pfree(DatumGetPointer(skey->sk_argument));
3190 852 : skey->sk_argument = dec_sk_argument;
3191 :
3192 : /* Successfully decremented array */
3193 852 : return true;
3194 : }
3195 :
3196 : /*
3197 : * _bt_array_set_low_or_high() -- Set array scan key to lowest/highest element
3198 : *
3199 : * Caller also passes associated scan key, which will have its argument set to
3200 : * the lowest/highest array value in passing.
3201 : */
3202 : static void
3203 93850 : _bt_array_set_low_or_high(Relation rel, ScanKey skey, BTArrayKeyInfo *array,
3204 : bool low_not_high)
3205 : {
3206 : Assert(skey->sk_flags & SK_SEARCHARRAY);
3207 :
3208 93850 : if (array->num_elems != -1)
3209 : {
3210 : /* set low or high element for SAOP array */
3211 84770 : int set_elem = 0;
3212 :
3213 : Assert(!(skey->sk_flags & SK_BT_SKIP));
3214 :
3215 84770 : if (!low_not_high)
3216 8336 : set_elem = array->num_elems - 1;
3217 :
3218 : /*
3219 : * Just copy over array datum (only skip arrays require freeing and
3220 : * allocating memory for sk_argument)
3221 : */
3222 84770 : array->cur_elem = set_elem;
3223 84770 : skey->sk_argument = array->elem_values[set_elem];
3224 :
3225 84770 : return;
3226 : }
3227 :
3228 : /* set low or high element for skip array */
3229 : Assert(skey->sk_flags & SK_BT_SKIP);
3230 : Assert(array->num_elems == -1);
3231 :
3232 : /* Free memory previously allocated for sk_argument if needed */
3233 9080 : if (!array->attbyval && skey->sk_argument)
3234 1872 : pfree(DatumGetPointer(skey->sk_argument));
3235 :
3236 : /* Reset flags */
3237 9080 : skey->sk_argument = (Datum) 0;
3238 9080 : skey->sk_flags &= ~(SK_SEARCHNULL | SK_ISNULL |
3239 : SK_BT_MINVAL | SK_BT_MAXVAL |
3240 : SK_BT_NEXT | SK_BT_PRIOR);
3241 :
3242 9080 : if (array->null_elem &&
3243 7276 : (low_not_high == ((skey->sk_flags & SK_BT_NULLS_FIRST) != 0)))
3244 : {
3245 : /* Requested element (either lowest or highest) has the value NULL */
3246 962 : skey->sk_flags |= (SK_SEARCHNULL | SK_ISNULL);
3247 : }
3248 8118 : else if (low_not_high)
3249 : {
3250 : /* Setting array to lowest element (according to low_compare) */
3251 7402 : skey->sk_flags |= SK_BT_MINVAL;
3252 : }
3253 : else
3254 : {
3255 : /* Setting array to highest element (according to high_compare) */
3256 716 : skey->sk_flags |= SK_BT_MAXVAL;
3257 : }
3258 : }
3259 :
3260 : /*
3261 : * _bt_skiparray_set_element() -- Set skip array scan key's sk_argument
3262 : *
3263 : * Caller passes set_elem_result returned by _bt_binsrch_skiparray_skey for
3264 : * caller's tupdatum/tupnull.
3265 : *
3266 : * We copy tupdatum/tupnull into skey's sk_argument iff set_elem_result == 0.
3267 : * Otherwise, we set skey to either the lowest or highest value that's within
3268 : * the range of caller's skip array (whichever is the best available match to
3269 : * tupdatum/tupnull that is still within the range of the skip array according
3270 : * to _bt_binsrch_skiparray_skey/set_elem_result).
3271 : */
3272 : static void
3273 154568 : _bt_skiparray_set_element(Relation rel, ScanKey skey, BTArrayKeyInfo *array,
3274 : int32 set_elem_result, Datum tupdatum, bool tupnull)
3275 : {
3276 : Assert(skey->sk_flags & SK_BT_SKIP);
3277 : Assert(skey->sk_flags & SK_SEARCHARRAY);
3278 :
3279 154568 : if (set_elem_result)
3280 : {
3281 : /* tupdatum/tupnull is out of the range of the skip array */
3282 : Assert(!array->null_elem);
3283 :
3284 640 : _bt_array_set_low_or_high(rel, skey, array, set_elem_result < 0);
3285 640 : return;
3286 : }
3287 :
3288 : /* Advance skip array to tupdatum (or tupnull) value */
3289 153928 : if (unlikely(tupnull))
3290 : {
3291 36 : _bt_skiparray_set_isnull(rel, skey, array);
3292 36 : return;
3293 : }
3294 :
3295 : /* Free memory previously allocated for sk_argument if needed */
3296 153892 : if (!array->attbyval && skey->sk_argument)
3297 77882 : pfree(DatumGetPointer(skey->sk_argument));
3298 :
3299 : /* tupdatum becomes new sk_argument/new current element */
3300 153892 : skey->sk_flags &= ~(SK_SEARCHNULL | SK_ISNULL |
3301 : SK_BT_MINVAL | SK_BT_MAXVAL |
3302 : SK_BT_NEXT | SK_BT_PRIOR);
3303 153892 : skey->sk_argument = datumCopy(tupdatum, array->attbyval, array->attlen);
3304 : }
3305 :
3306 : /*
3307 : * _bt_skiparray_set_isnull() -- set skip array scan key to NULL
3308 : */
3309 : static void
3310 48 : _bt_skiparray_set_isnull(Relation rel, ScanKey skey, BTArrayKeyInfo *array)
3311 : {
3312 : Assert(skey->sk_flags & SK_BT_SKIP);
3313 : Assert(skey->sk_flags & SK_SEARCHARRAY);
3314 : Assert(array->null_elem && !array->low_compare && !array->high_compare);
3315 :
3316 : /* Free memory previously allocated for sk_argument if needed */
3317 48 : if (!array->attbyval && skey->sk_argument)
3318 6 : pfree(DatumGetPointer(skey->sk_argument));
3319 :
3320 : /* NULL becomes new sk_argument/new current element */
3321 48 : skey->sk_argument = (Datum) 0;
3322 48 : skey->sk_flags &= ~(SK_BT_MINVAL | SK_BT_MAXVAL |
3323 : SK_BT_NEXT | SK_BT_PRIOR);
3324 48 : skey->sk_flags |= (SK_SEARCHNULL | SK_ISNULL);
3325 48 : }
3326 :
3327 : /*
3328 : * _bt_compare_array_skey() -- apply array comparison function
3329 : *
3330 : * Compares caller's tuple attribute value to a scan key/array element.
3331 : * Helper function used during binary searches of SK_SEARCHARRAY arrays.
3332 : *
3333 : * This routine returns:
3334 : * <0 if tupdatum < arrdatum;
3335 : * 0 if tupdatum == arrdatum;
3336 : * >0 if tupdatum > arrdatum.
3337 : *
3338 : * This is essentially the same interface as _bt_compare: both functions
3339 : * compare the value that they're searching for to a binary search pivot.
3340 : * However, unlike _bt_compare, this function's "tuple argument" comes first,
3341 : * while its "array/scankey argument" comes second.
3342 : */
3343 : static inline int32
3344 470444 : _bt_compare_array_skey(FmgrInfo *orderproc,
3345 : Datum tupdatum, bool tupnull,
3346 : Datum arrdatum, ScanKey cur)
3347 : {
3348 470444 : int32 result = 0;
3349 :
3350 : Assert(cur->sk_strategy == BTEqualStrategyNumber);
3351 : Assert(!(cur->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)));
3352 :
3353 470444 : if (tupnull) /* NULL tupdatum */
3354 : {
3355 228 : if (cur->sk_flags & SK_ISNULL)
3356 132 : result = 0; /* NULL "=" NULL */
3357 96 : else if (cur->sk_flags & SK_BT_NULLS_FIRST)
3358 0 : result = -1; /* NULL "<" NOT_NULL */
3359 : else
3360 96 : result = 1; /* NULL ">" NOT_NULL */
3361 : }
3362 470216 : else if (cur->sk_flags & SK_ISNULL) /* NOT_NULL tupdatum, NULL arrdatum */
3363 : {
3364 30540 : if (cur->sk_flags & SK_BT_NULLS_FIRST)
3365 54 : result = 1; /* NOT_NULL ">" NULL */
3366 : else
3367 30486 : result = -1; /* NOT_NULL "<" NULL */
3368 : }
3369 : else
3370 : {
3371 : /*
3372 : * Like _bt_compare, we need to be careful of cross-type comparisons,
3373 : * so the left value has to be the value that came from an index tuple
3374 : */
3375 439676 : result = DatumGetInt32(FunctionCall2Coll(orderproc, cur->sk_collation,
3376 : tupdatum, arrdatum));
3377 :
3378 : /*
3379 : * We flip the sign by following the obvious rule: flip whenever the
3380 : * column is a DESC column.
3381 : *
3382 : * _bt_compare does it the wrong way around (flip when *ASC*) in order
3383 : * to compensate for passing its orderproc arguments backwards. We
3384 : * don't need to play these games because we find it natural to pass
3385 : * tupdatum as the left value (and arrdatum as the right value).
3386 : */
3387 439676 : if (cur->sk_flags & SK_BT_DESC)
3388 45498 : INVERT_COMPARE_RESULT(result);
3389 : }
3390 :
3391 470444 : return result;
3392 : }
3393 :
3394 : /*
3395 : * _bt_binsrch_array_skey() -- Binary search for next matching array key
3396 : *
3397 : * Returns an index to the first array element >= caller's tupdatum argument.
3398 : * This convention is more natural for forwards scan callers, but that can't
3399 : * really matter to backwards scan callers. Both callers require handling for
3400 : * the case where the match we return is < tupdatum, and symmetric handling
3401 : * for the case where our best match is > tupdatum.
3402 : *
3403 : * Also sets *set_elem_result to the result _bt_compare_array_skey returned
3404 : * when we used it to compare the matching array element to tupdatum/tupnull.
3405 : *
3406 : * cur_elem_trig indicates if array advancement was triggered by this array's
3407 : * scan key, and that the array is for a required scan key. We can apply this
3408 : * information to find the next matching array element in the current scan
3409 : * direction using far fewer comparisons (fewer on average, compared to naive
3410 : * binary search). This scheme takes advantage of an important property of
3411 : * required arrays: required arrays always advance in lockstep with the index
3412 : * scan's progress through the index's key space.
3413 : */
3414 : int
3415 31988 : _bt_binsrch_array_skey(FmgrInfo *orderproc,
3416 : bool cur_elem_trig, ScanDirection dir,
3417 : Datum tupdatum, bool tupnull,
3418 : BTArrayKeyInfo *array, ScanKey cur,
3419 : int32 *set_elem_result)
3420 : {
3421 31988 : int low_elem = 0,
3422 31988 : mid_elem = -1,
3423 31988 : high_elem = array->num_elems - 1,
3424 31988 : result = 0;
3425 : Datum arrdatum;
3426 :
3427 : Assert(cur->sk_flags & SK_SEARCHARRAY);
3428 : Assert(!(cur->sk_flags & SK_BT_SKIP));
3429 : Assert(!(cur->sk_flags & SK_ISNULL)); /* SAOP arrays never have NULLs */
3430 : Assert(cur->sk_strategy == BTEqualStrategyNumber);
3431 :
3432 31988 : if (cur_elem_trig)
3433 : {
3434 : Assert(!ScanDirectionIsNoMovement(dir));
3435 : Assert(cur->sk_flags & SK_BT_REQFWD);
3436 :
3437 : /*
3438 : * When the scan key that triggered array advancement is a required
3439 : * array scan key, it is now certain that the current array element
3440 : * (plus all prior elements relative to the current scan direction)
3441 : * cannot possibly be at or ahead of the corresponding tuple value.
3442 : * (_bt_checkkeys must have called _bt_tuple_before_array_skeys, which
3443 : * makes sure this is true as a condition of advancing the arrays.)
3444 : *
3445 : * This makes it safe to exclude array elements up to and including
3446 : * the former-current array element from our search.
3447 : *
3448 : * Separately, when array advancement was triggered by a required scan
3449 : * key, the array element immediately after the former-current element
3450 : * is often either an exact tupdatum match, or a "close by" near-match
3451 : * (a near-match tupdatum is one whose key space falls _between_ the
3452 : * former-current and new-current array elements). We'll detect both
3453 : * cases via an optimistic comparison of the new search lower bound
3454 : * (or new search upper bound in the case of backwards scans).
3455 : */
3456 31670 : if (ScanDirectionIsForward(dir))
3457 : {
3458 31610 : low_elem = array->cur_elem + 1; /* old cur_elem exhausted */
3459 :
3460 : /* Compare prospective new cur_elem (also the new lower bound) */
3461 31610 : if (high_elem >= low_elem)
3462 : {
3463 23512 : arrdatum = array->elem_values[low_elem];
3464 23512 : result = _bt_compare_array_skey(orderproc, tupdatum, tupnull,
3465 : arrdatum, cur);
3466 :
3467 23512 : if (result <= 0)
3468 : {
3469 : /* Optimistic comparison optimization worked out */
3470 23426 : *set_elem_result = result;
3471 23426 : return low_elem;
3472 : }
3473 86 : mid_elem = low_elem;
3474 86 : low_elem++; /* this cur_elem exhausted, too */
3475 : }
3476 :
3477 8184 : if (high_elem < low_elem)
3478 : {
3479 : /* Caller needs to perform "beyond end" array advancement */
3480 8104 : *set_elem_result = 1;
3481 8104 : return high_elem;
3482 : }
3483 : }
3484 : else
3485 : {
3486 60 : high_elem = array->cur_elem - 1; /* old cur_elem exhausted */
3487 :
3488 : /* Compare prospective new cur_elem (also the new upper bound) */
3489 60 : if (high_elem >= low_elem)
3490 : {
3491 42 : arrdatum = array->elem_values[high_elem];
3492 42 : result = _bt_compare_array_skey(orderproc, tupdatum, tupnull,
3493 : arrdatum, cur);
3494 :
3495 42 : if (result >= 0)
3496 : {
3497 : /* Optimistic comparison optimization worked out */
3498 30 : *set_elem_result = result;
3499 30 : return high_elem;
3500 : }
3501 12 : mid_elem = high_elem;
3502 12 : high_elem--; /* this cur_elem exhausted, too */
3503 : }
3504 :
3505 30 : if (high_elem < low_elem)
3506 : {
3507 : /* Caller needs to perform "beyond end" array advancement */
3508 30 : *set_elem_result = -1;
3509 30 : return low_elem;
3510 : }
3511 : }
3512 : }
3513 :
3514 698 : while (high_elem > low_elem)
3515 : {
3516 438 : mid_elem = low_elem + ((high_elem - low_elem) / 2);
3517 438 : arrdatum = array->elem_values[mid_elem];
3518 :
3519 438 : result = _bt_compare_array_skey(orderproc, tupdatum, tupnull,
3520 : arrdatum, cur);
3521 :
3522 438 : if (result == 0)
3523 : {
3524 : /*
3525 : * It's safe to quit as soon as we see an equal array element.
3526 : * This often saves an extra comparison or two...
3527 : */
3528 138 : low_elem = mid_elem;
3529 138 : break;
3530 : }
3531 :
3532 300 : if (result > 0)
3533 270 : low_elem = mid_elem + 1;
3534 : else
3535 30 : high_elem = mid_elem;
3536 : }
3537 :
3538 : /*
3539 : * ...but our caller also cares about how its searched-for tuple datum
3540 : * compares to the low_elem datum. Must always set *set_elem_result with
3541 : * the result of that comparison specifically.
3542 : */
3543 398 : if (low_elem != mid_elem)
3544 242 : result = _bt_compare_array_skey(orderproc, tupdatum, tupnull,
3545 242 : array->elem_values[low_elem], cur);
3546 :
3547 398 : *set_elem_result = result;
3548 :
3549 398 : return low_elem;
3550 : }
3551 :
3552 : /*
3553 : * _bt_binsrch_skiparray_skey() -- "Binary search" within a skip array
3554 : *
3555 : * Does not return an index into the array, since skip arrays don't really
3556 : * contain elements (they generate their array elements procedurally instead).
3557 : * Our interface matches that of _bt_binsrch_array_skey in every other way.
3558 : *
3559 : * Sets *set_elem_result just like _bt_binsrch_array_skey would with a true
3560 : * array. The value 0 indicates that tupdatum/tupnull is within the range of
3561 : * the skip array. We return -1 when tupdatum/tupnull is lower that any value
3562 : * within the range of the array, and 1 when it is higher than every value.
3563 : * Caller should pass *set_elem_result to _bt_skiparray_set_element to advance
3564 : * the array.
3565 : *
3566 : * cur_elem_trig indicates if array advancement was triggered by this array's
3567 : * scan key. We use this to optimize-away comparisons that are known by our
3568 : * caller to be unnecessary from context, just like _bt_binsrch_array_skey.
3569 : */
3570 : static void
3571 166244 : _bt_binsrch_skiparray_skey(bool cur_elem_trig, ScanDirection dir,
3572 : Datum tupdatum, bool tupnull,
3573 : BTArrayKeyInfo *array, ScanKey cur,
3574 : int32 *set_elem_result)
3575 : {
3576 : Assert(cur->sk_flags & SK_BT_SKIP);
3577 : Assert(cur->sk_flags & SK_SEARCHARRAY);
3578 : Assert(cur->sk_flags & SK_BT_REQFWD);
3579 : Assert(array->num_elems == -1);
3580 : Assert(!ScanDirectionIsNoMovement(dir));
3581 :
3582 166244 : if (array->null_elem)
3583 : {
3584 : Assert(!array->low_compare && !array->high_compare);
3585 :
3586 140112 : *set_elem_result = 0;
3587 140112 : return;
3588 : }
3589 :
3590 26132 : if (tupnull) /* NULL tupdatum */
3591 : {
3592 24 : if (cur->sk_flags & SK_BT_NULLS_FIRST)
3593 0 : *set_elem_result = -1; /* NULL "<" NOT_NULL */
3594 : else
3595 24 : *set_elem_result = 1; /* NULL ">" NOT_NULL */
3596 24 : return;
3597 : }
3598 :
3599 : /*
3600 : * Array inequalities determine whether tupdatum is within the range of
3601 : * caller's skip array
3602 : */
3603 26108 : *set_elem_result = 0;
3604 26108 : if (ScanDirectionIsForward(dir))
3605 : {
3606 : /*
3607 : * Evaluate low_compare first (unless cur_elem_trig tells us that it
3608 : * cannot possibly fail to be satisfied), then evaluate high_compare
3609 : */
3610 26060 : if (!cur_elem_trig && array->low_compare &&
3611 706 : !DatumGetBool(FunctionCall2Coll(&array->low_compare->sk_func,
3612 706 : array->low_compare->sk_collation,
3613 : tupdatum,
3614 706 : array->low_compare->sk_argument)))
3615 0 : *set_elem_result = -1;
3616 26060 : else if (array->high_compare &&
3617 9968 : !DatumGetBool(FunctionCall2Coll(&array->high_compare->sk_func,
3618 9968 : array->high_compare->sk_collation,
3619 : tupdatum,
3620 9968 : array->high_compare->sk_argument)))
3621 6400 : *set_elem_result = 1;
3622 : }
3623 : else
3624 : {
3625 : /*
3626 : * Evaluate high_compare first (unless cur_elem_trig tells us that it
3627 : * cannot possibly fail to be satisfied), then evaluate low_compare
3628 : */
3629 48 : if (!cur_elem_trig && array->high_compare &&
3630 6 : !DatumGetBool(FunctionCall2Coll(&array->high_compare->sk_func,
3631 6 : array->high_compare->sk_collation,
3632 : tupdatum,
3633 6 : array->high_compare->sk_argument)))
3634 0 : *set_elem_result = 1;
3635 48 : else if (array->low_compare &&
3636 24 : !DatumGetBool(FunctionCall2Coll(&array->low_compare->sk_func,
3637 24 : array->low_compare->sk_collation,
3638 : tupdatum,
3639 24 : array->low_compare->sk_argument)))
3640 0 : *set_elem_result = -1;
3641 : }
3642 :
3643 : /*
3644 : * Assert that any keys that were assumed to be satisfied already (due to
3645 : * caller passing cur_elem_trig=true) really are satisfied as expected
3646 : */
3647 : #ifdef USE_ASSERT_CHECKING
3648 : if (cur_elem_trig)
3649 : {
3650 : if (ScanDirectionIsForward(dir) && array->low_compare)
3651 : Assert(DatumGetBool(FunctionCall2Coll(&array->low_compare->sk_func,
3652 : array->low_compare->sk_collation,
3653 : tupdatum,
3654 : array->low_compare->sk_argument)));
3655 :
3656 : if (ScanDirectionIsBackward(dir) && array->high_compare)
3657 : Assert(DatumGetBool(FunctionCall2Coll(&array->high_compare->sk_func,
3658 : array->high_compare->sk_collation,
3659 : tupdatum,
3660 : array->high_compare->sk_argument)));
3661 : }
3662 : #endif
3663 : }
3664 :
3665 : #ifdef USE_ASSERT_CHECKING
3666 : /*
3667 : * Verify that the scan's "so->keyData[]" scan keys are in agreement with
3668 : * its array key state
3669 : */
3670 : static bool
3671 : _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
3672 : {
3673 : BTScanOpaque so = (BTScanOpaque) scan->opaque;
3674 : int last_sk_attno = InvalidAttrNumber,
3675 : arrayidx = 0;
3676 : bool nonrequiredseen = false;
3677 :
3678 : if (!so->qual_ok)
3679 : return false;
3680 :
3681 : for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
3682 : {
3683 : ScanKey cur = so->keyData + ikey;
3684 : BTArrayKeyInfo *array;
3685 :
3686 : if (cur->sk_strategy != BTEqualStrategyNumber ||
3687 : !(cur->sk_flags & SK_SEARCHARRAY))
3688 : continue;
3689 :
3690 : array = &so->arrayKeys[arrayidx++];
3691 : if (array->scan_key != ikey)
3692 : return false;
3693 :
3694 : if (array->num_elems == 0 || array->num_elems < -1)
3695 : return false;
3696 :
3697 : if (array->num_elems != -1 &&
3698 : cur->sk_argument != array->elem_values[array->cur_elem])
3699 : return false;
3700 : if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))
3701 : {
3702 : if (last_sk_attno > cur->sk_attno)
3703 : return false;
3704 : if (nonrequiredseen)
3705 : return false;
3706 : }
3707 : else
3708 : nonrequiredseen = true;
3709 :
3710 : last_sk_attno = cur->sk_attno;
3711 : }
3712 :
3713 : if (arrayidx != so->numArrayKeys)
3714 : return false;
3715 :
3716 : return true;
3717 : }
3718 : #endif
|