Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashsearch.c
4 : * search code for postgres hash tables
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hashsearch.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/hash.h"
18 : #include "access/relscan.h"
19 : #include "miscadmin.h"
20 : #include "executor/instrument_node.h"
21 : #include "pgstat.h"
22 : #include "storage/predicate.h"
23 : #include "utils/rel.h"
24 :
25 : static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP,
26 : ScanDirection dir);
27 : static int _hash_load_qualified_items(IndexScanDesc scan, Page page,
28 : OffsetNumber offnum, ScanDirection dir);
29 : static inline void _hash_saveitem(HashScanOpaque so, int itemIndex,
30 : OffsetNumber offnum, IndexTuple itup);
31 : static void _hash_readnext(IndexScanDesc scan, Buffer *bufp,
32 : Page *pagep, HashPageOpaque *opaquep);
33 :
34 : /*
35 : * _hash_next() -- Get the next item in a scan.
36 : *
37 : * On entry, so->currPos describes the current page, which may
38 : * be pinned but not locked, and so->currPos.itemIndex identifies
39 : * which item was previously returned.
40 : *
41 : * On successful exit, scan->xs_heaptid is set to the TID of the next
42 : * heap tuple. so->currPos is updated as needed.
43 : *
44 : * On failure exit (no more tuples), we return false with pin
45 : * held on bucket page but no pins or locks held on overflow
46 : * page.
47 : */
48 : bool
49 101132 : _hash_next(IndexScanDesc scan, ScanDirection dir)
50 : {
51 101132 : Relation rel = scan->indexRelation;
52 101132 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
53 : HashScanPosItem *currItem;
54 : BlockNumber blkno;
55 : Buffer buf;
56 101132 : bool end_of_scan = false;
57 :
58 : /*
59 : * Advance to the next tuple on the current page; or if done, try to read
60 : * data from the next or previous page based on the scan direction. Before
61 : * moving to the next or previous page make sure that we deal with all the
62 : * killed items.
63 : */
64 101132 : if (ScanDirectionIsForward(dir))
65 : {
66 68132 : if (++so->currPos.itemIndex > so->currPos.lastItem)
67 : {
68 572 : if (so->numKilled > 0)
69 2 : _hash_kill_items(scan);
70 :
71 572 : blkno = so->currPos.nextPage;
72 572 : if (BlockNumberIsValid(blkno))
73 : {
74 156 : buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
75 156 : if (!_hash_readpage(scan, &buf, dir))
76 0 : end_of_scan = true;
77 : }
78 : else
79 416 : end_of_scan = true;
80 : }
81 : }
82 : else
83 : {
84 33000 : if (--so->currPos.itemIndex < so->currPos.firstItem)
85 : {
86 84 : if (so->numKilled > 0)
87 0 : _hash_kill_items(scan);
88 :
89 84 : blkno = so->currPos.prevPage;
90 84 : if (BlockNumberIsValid(blkno))
91 : {
92 78 : buf = _hash_getbuf(rel, blkno, HASH_READ,
93 : LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
94 :
95 : /*
96 : * We always maintain the pin on bucket page for whole scan
97 : * operation, so releasing the additional pin we have acquired
98 : * here.
99 : */
100 78 : if (buf == so->hashso_bucket_buf ||
101 72 : buf == so->hashso_split_bucket_buf)
102 6 : _hash_dropbuf(rel, buf);
103 :
104 78 : if (!_hash_readpage(scan, &buf, dir))
105 0 : end_of_scan = true;
106 : }
107 : else
108 6 : end_of_scan = true;
109 : }
110 : }
111 :
112 101132 : if (end_of_scan)
113 : {
114 422 : _hash_dropscanbuf(rel, so);
115 422 : HashScanPosInvalidate(so->currPos);
116 422 : return false;
117 : }
118 :
119 : /* OK, itemIndex says what to return */
120 100710 : currItem = &so->currPos.items[so->currPos.itemIndex];
121 100710 : scan->xs_heaptid = currItem->heapTid;
122 :
123 100710 : return true;
124 : }
125 :
126 : /*
127 : * Advance to next page in a bucket, if any. If we are scanning the bucket
128 : * being populated during split operation then this function advances to the
129 : * bucket being split after the last bucket page of bucket being populated.
130 : */
131 : static void
132 194 : _hash_readnext(IndexScanDesc scan,
133 : Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
134 : {
135 : BlockNumber blkno;
136 194 : Relation rel = scan->indexRelation;
137 194 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
138 194 : bool block_found = false;
139 :
140 194 : blkno = (*opaquep)->hasho_nextblkno;
141 :
142 : /*
143 : * Retain the pin on primary bucket page till the end of scan. Refer the
144 : * comments in _hash_first to know the reason of retaining pin.
145 : */
146 194 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
147 122 : LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
148 : else
149 72 : _hash_relbuf(rel, *bufp);
150 :
151 194 : *bufp = InvalidBuffer;
152 : /* check for interrupts while we're not holding any buffer lock */
153 194 : CHECK_FOR_INTERRUPTS();
154 194 : if (BlockNumberIsValid(blkno))
155 : {
156 78 : *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
157 78 : block_found = true;
158 : }
159 116 : else if (so->hashso_buc_populated && !so->hashso_buc_split)
160 : {
161 : /*
162 : * end of bucket, scan bucket being split if there was a split in
163 : * progress at the start of scan.
164 : */
165 0 : *bufp = so->hashso_split_bucket_buf;
166 :
167 : /*
168 : * buffer for bucket being split must be valid as we acquire the pin
169 : * on it before the start of scan and retain it till end of scan.
170 : */
171 : Assert(BufferIsValid(*bufp));
172 :
173 0 : LockBuffer(*bufp, BUFFER_LOCK_SHARE);
174 0 : PredicateLockPage(rel, BufferGetBlockNumber(*bufp), scan->xs_snapshot);
175 :
176 : /*
177 : * setting hashso_buc_split to true indicates that we are scanning
178 : * bucket being split.
179 : */
180 0 : so->hashso_buc_split = true;
181 :
182 0 : block_found = true;
183 : }
184 :
185 194 : if (block_found)
186 : {
187 78 : *pagep = BufferGetPage(*bufp);
188 78 : *opaquep = HashPageGetOpaque(*pagep);
189 : }
190 194 : }
191 :
192 : /*
193 : * Advance to previous page in a bucket, if any. If the current scan has
194 : * started during split operation then this function advances to bucket
195 : * being populated after the first bucket page of bucket being split.
196 : */
197 : static void
198 0 : _hash_readprev(IndexScanDesc scan,
199 : Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
200 : {
201 : BlockNumber blkno;
202 0 : Relation rel = scan->indexRelation;
203 0 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
204 : bool haveprevblk;
205 :
206 0 : blkno = (*opaquep)->hasho_prevblkno;
207 :
208 : /*
209 : * Retain the pin on primary bucket page till the end of scan. Refer the
210 : * comments in _hash_first to know the reason of retaining pin.
211 : */
212 0 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
213 : {
214 0 : LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
215 0 : haveprevblk = false;
216 : }
217 : else
218 : {
219 0 : _hash_relbuf(rel, *bufp);
220 0 : haveprevblk = true;
221 : }
222 :
223 0 : *bufp = InvalidBuffer;
224 : /* check for interrupts while we're not holding any buffer lock */
225 0 : CHECK_FOR_INTERRUPTS();
226 :
227 0 : if (haveprevblk)
228 : {
229 : Assert(BlockNumberIsValid(blkno));
230 0 : *bufp = _hash_getbuf(rel, blkno, HASH_READ,
231 : LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
232 0 : *pagep = BufferGetPage(*bufp);
233 0 : *opaquep = HashPageGetOpaque(*pagep);
234 :
235 : /*
236 : * We always maintain the pin on bucket page for whole scan operation,
237 : * so releasing the additional pin we have acquired here.
238 : */
239 0 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
240 0 : _hash_dropbuf(rel, *bufp);
241 : }
242 0 : else if (so->hashso_buc_populated && so->hashso_buc_split)
243 : {
244 : /*
245 : * end of bucket, scan bucket being populated if there was a split in
246 : * progress at the start of scan.
247 : */
248 0 : *bufp = so->hashso_bucket_buf;
249 :
250 : /*
251 : * buffer for bucket being populated must be valid as we acquire the
252 : * pin on it before the start of scan and retain it till end of scan.
253 : */
254 : Assert(BufferIsValid(*bufp));
255 :
256 0 : LockBuffer(*bufp, BUFFER_LOCK_SHARE);
257 0 : *pagep = BufferGetPage(*bufp);
258 0 : *opaquep = HashPageGetOpaque(*pagep);
259 :
260 : /* move to the end of bucket chain */
261 0 : while (BlockNumberIsValid((*opaquep)->hasho_nextblkno))
262 0 : _hash_readnext(scan, bufp, pagep, opaquep);
263 :
264 : /*
265 : * setting hashso_buc_split to false indicates that we are scanning
266 : * bucket being populated.
267 : */
268 0 : so->hashso_buc_split = false;
269 : }
270 0 : }
271 :
272 : /*
273 : * _hash_first() -- Find the first item in a scan.
274 : *
275 : * We find the first item (or, if backward scan, the last item) in the
276 : * index that satisfies the qualification associated with the scan
277 : * descriptor.
278 : *
279 : * On successful exit, if the page containing current index tuple is an
280 : * overflow page, both pin and lock are released whereas if it is a bucket
281 : * page then it is pinned but not locked and data about the matching
282 : * tuple(s) on the page has been loaded into so->currPos,
283 : * scan->xs_heaptid is set to the heap TID of the current tuple.
284 : *
285 : * On failure exit (no more tuples), we return false, with pin held on
286 : * bucket page but no pins or locks held on overflow page.
287 : */
288 : bool
289 546 : _hash_first(IndexScanDesc scan, ScanDirection dir)
290 : {
291 546 : Relation rel = scan->indexRelation;
292 546 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
293 : ScanKey cur;
294 : uint32 hashkey;
295 : Bucket bucket;
296 : Buffer buf;
297 : Page page;
298 : HashPageOpaque opaque;
299 : HashScanPosItem *currItem;
300 :
301 546 : pgstat_count_index_scan(rel);
302 546 : if (scan->instrument)
303 538 : scan->instrument->nsearches++;
304 :
305 : /*
306 : * We do not support hash scans with no index qualification, because we
307 : * would have to read the whole index rather than just one bucket. That
308 : * creates a whole raft of problems, since we haven't got a practical way
309 : * to lock all the buckets against splits or compactions.
310 : */
311 546 : if (scan->numberOfKeys < 1)
312 0 : ereport(ERROR,
313 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
314 : errmsg("hash indexes do not support whole-index scans")));
315 :
316 : /* There may be more than one index qual, but we hash only the first */
317 546 : cur = &scan->keyData[0];
318 :
319 : /* We support only single-column hash indexes */
320 : Assert(cur->sk_attno == 1);
321 : /* And there's only one operator strategy, too */
322 : Assert(cur->sk_strategy == HTEqualStrategyNumber);
323 :
324 : /*
325 : * If the constant in the index qual is NULL, assume it cannot match any
326 : * items in the index.
327 : */
328 546 : if (cur->sk_flags & SK_ISNULL)
329 0 : return false;
330 :
331 : /*
332 : * Okay to compute the hash key. We want to do this before acquiring any
333 : * locks, in case a user-defined hash function happens to be slow.
334 : *
335 : * If scankey operator is not a cross-type comparison, we can use the
336 : * cached hash function; otherwise gotta look it up in the catalogs.
337 : *
338 : * We support the convention that sk_subtype == InvalidOid means the
339 : * opclass input type; this is a hack to simplify life for ScanKeyInit().
340 : */
341 546 : if (cur->sk_subtype == rel->rd_opcintype[0] ||
342 8 : cur->sk_subtype == InvalidOid)
343 546 : hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
344 : else
345 0 : hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
346 : cur->sk_subtype);
347 :
348 546 : so->hashso_sk_hash = hashkey;
349 :
350 546 : buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
351 546 : PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
352 546 : page = BufferGetPage(buf);
353 546 : opaque = HashPageGetOpaque(page);
354 546 : bucket = opaque->hasho_bucket;
355 :
356 546 : so->hashso_bucket_buf = buf;
357 :
358 : /*
359 : * If a bucket split is in progress, then while scanning the bucket being
360 : * populated, we need to skip tuples that were copied from bucket being
361 : * split. We also need to maintain a pin on the bucket being split to
362 : * ensure that split-cleanup work done by vacuum doesn't remove tuples
363 : * from it till this scan is done. We need to maintain a pin on the
364 : * bucket being populated to ensure that vacuum doesn't squeeze that
365 : * bucket till this scan is complete; otherwise, the ordering of tuples
366 : * can't be maintained during forward and backward scans. Here, we have
367 : * to be cautious about locking order: first, acquire the lock on bucket
368 : * being split; then, release the lock on it but not the pin; then,
369 : * acquire a lock on bucket being populated and again re-verify whether
370 : * the bucket split is still in progress. Acquiring the lock on bucket
371 : * being split first ensures that the vacuum waits for this scan to
372 : * finish.
373 : */
374 546 : if (H_BUCKET_BEING_POPULATED(opaque))
375 : {
376 : BlockNumber old_blkno;
377 : Buffer old_buf;
378 :
379 0 : old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket);
380 :
381 : /*
382 : * release the lock on new bucket and re-acquire it after acquiring
383 : * the lock on old bucket.
384 : */
385 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
386 :
387 0 : old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
388 :
389 : /*
390 : * remember the split bucket buffer so as to use it later for
391 : * scanning.
392 : */
393 0 : so->hashso_split_bucket_buf = old_buf;
394 0 : LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
395 :
396 0 : LockBuffer(buf, BUFFER_LOCK_SHARE);
397 0 : page = BufferGetPage(buf);
398 0 : opaque = HashPageGetOpaque(page);
399 : Assert(opaque->hasho_bucket == bucket);
400 :
401 0 : if (H_BUCKET_BEING_POPULATED(opaque))
402 0 : so->hashso_buc_populated = true;
403 : else
404 : {
405 0 : _hash_dropbuf(rel, so->hashso_split_bucket_buf);
406 0 : so->hashso_split_bucket_buf = InvalidBuffer;
407 : }
408 : }
409 :
410 : /* If a backwards scan is requested, move to the end of the chain */
411 546 : if (ScanDirectionIsBackward(dir))
412 : {
413 : /*
414 : * Backward scans that start during split needs to start from end of
415 : * bucket being split.
416 : */
417 84 : while (BlockNumberIsValid(opaque->hasho_nextblkno) ||
418 6 : (so->hashso_buc_populated && !so->hashso_buc_split))
419 78 : _hash_readnext(scan, &buf, &page, &opaque);
420 : }
421 :
422 : /* remember which buffer we have pinned, if any */
423 : Assert(BufferIsInvalid(so->currPos.buf));
424 546 : so->currPos.buf = buf;
425 :
426 : /* Now find all the tuples satisfying the qualification from a page */
427 546 : if (!_hash_readpage(scan, &buf, dir))
428 116 : return false;
429 :
430 : /* OK, itemIndex says what to return */
431 430 : currItem = &so->currPos.items[so->currPos.itemIndex];
432 430 : scan->xs_heaptid = currItem->heapTid;
433 :
434 : /* if we're here, _hash_readpage found a valid tuples */
435 430 : return true;
436 : }
437 :
438 : /*
439 : * _hash_readpage() -- Load data from current index page into so->currPos
440 : *
441 : * We scan all the items in the current index page and save them into
442 : * so->currPos if it satisfies the qualification. If no matching items
443 : * are found in the current page, we move to the next or previous page
444 : * in a bucket chain as indicated by the direction.
445 : *
446 : * Return true if any matching items are found else return false.
447 : */
448 : static bool
449 780 : _hash_readpage(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
450 : {
451 780 : Relation rel = scan->indexRelation;
452 780 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
453 : Buffer buf;
454 : Page page;
455 : HashPageOpaque opaque;
456 : OffsetNumber offnum;
457 : uint16 itemIndex;
458 :
459 780 : buf = *bufP;
460 : Assert(BufferIsValid(buf));
461 780 : _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
462 780 : page = BufferGetPage(buf);
463 780 : opaque = HashPageGetOpaque(page);
464 :
465 780 : so->currPos.buf = buf;
466 780 : so->currPos.currPage = BufferGetBlockNumber(buf);
467 :
468 780 : if (ScanDirectionIsForward(dir))
469 : {
470 696 : BlockNumber prev_blkno = InvalidBlockNumber;
471 :
472 : for (;;)
473 : {
474 : /* new page, locate starting position by binary search */
475 696 : offnum = _hash_binsearch(page, so->hashso_sk_hash);
476 :
477 696 : itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
478 :
479 696 : if (itemIndex != 0)
480 580 : break;
481 :
482 : /*
483 : * Could not find any matching tuples in the current page, move to
484 : * the next page. Before leaving the current page, deal with any
485 : * killed items.
486 : */
487 116 : if (so->numKilled > 0)
488 0 : _hash_kill_items(scan);
489 :
490 : /*
491 : * If this is a primary bucket page, hasho_prevblkno is not a real
492 : * block number.
493 : */
494 116 : if (so->currPos.buf == so->hashso_bucket_buf ||
495 0 : so->currPos.buf == so->hashso_split_bucket_buf)
496 116 : prev_blkno = InvalidBlockNumber;
497 : else
498 0 : prev_blkno = opaque->hasho_prevblkno;
499 :
500 116 : _hash_readnext(scan, &buf, &page, &opaque);
501 116 : if (BufferIsValid(buf))
502 : {
503 0 : so->currPos.buf = buf;
504 0 : so->currPos.currPage = BufferGetBlockNumber(buf);
505 : }
506 : else
507 : {
508 : /*
509 : * Remember next and previous block numbers for scrollable
510 : * cursors to know the start position and return false
511 : * indicating that no more matching tuples were found. Also,
512 : * don't reset currPage or lsn, because we expect
513 : * _hash_kill_items to be called for the old page after this
514 : * function returns.
515 : */
516 116 : so->currPos.prevPage = prev_blkno;
517 116 : so->currPos.nextPage = InvalidBlockNumber;
518 116 : so->currPos.buf = buf;
519 116 : return false;
520 : }
521 : }
522 :
523 580 : so->currPos.firstItem = 0;
524 580 : so->currPos.lastItem = itemIndex - 1;
525 580 : so->currPos.itemIndex = 0;
526 : }
527 : else
528 : {
529 84 : BlockNumber next_blkno = InvalidBlockNumber;
530 :
531 : for (;;)
532 : {
533 : /* new page, locate starting position by binary search */
534 84 : offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
535 :
536 84 : itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
537 :
538 84 : if (itemIndex != MaxIndexTuplesPerPage)
539 84 : break;
540 :
541 : /*
542 : * Could not find any matching tuples in the current page, move to
543 : * the previous page. Before leaving the current page, deal with
544 : * any killed items.
545 : */
546 0 : if (so->numKilled > 0)
547 0 : _hash_kill_items(scan);
548 :
549 0 : if (so->currPos.buf == so->hashso_bucket_buf ||
550 0 : so->currPos.buf == so->hashso_split_bucket_buf)
551 0 : next_blkno = opaque->hasho_nextblkno;
552 :
553 0 : _hash_readprev(scan, &buf, &page, &opaque);
554 0 : if (BufferIsValid(buf))
555 : {
556 0 : so->currPos.buf = buf;
557 0 : so->currPos.currPage = BufferGetBlockNumber(buf);
558 : }
559 : else
560 : {
561 : /*
562 : * Remember next and previous block numbers for scrollable
563 : * cursors to know the start position and return false
564 : * indicating that no more matching tuples were found. Also,
565 : * don't reset currPage or lsn, because we expect
566 : * _hash_kill_items to be called for the old page after this
567 : * function returns.
568 : */
569 0 : so->currPos.prevPage = InvalidBlockNumber;
570 0 : so->currPos.nextPage = next_blkno;
571 0 : so->currPos.buf = buf;
572 0 : return false;
573 : }
574 : }
575 :
576 84 : so->currPos.firstItem = itemIndex;
577 84 : so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
578 84 : so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
579 : }
580 :
581 664 : if (so->currPos.buf == so->hashso_bucket_buf ||
582 234 : so->currPos.buf == so->hashso_split_bucket_buf)
583 : {
584 430 : so->currPos.prevPage = InvalidBlockNumber;
585 430 : so->currPos.nextPage = opaque->hasho_nextblkno;
586 430 : LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
587 : }
588 : else
589 : {
590 234 : so->currPos.prevPage = opaque->hasho_prevblkno;
591 234 : so->currPos.nextPage = opaque->hasho_nextblkno;
592 234 : _hash_relbuf(rel, so->currPos.buf);
593 234 : so->currPos.buf = InvalidBuffer;
594 : }
595 :
596 : Assert(so->currPos.firstItem <= so->currPos.lastItem);
597 664 : return true;
598 : }
599 :
600 : /*
601 : * Load all the qualified items from a current index page
602 : * into so->currPos. Helper function for _hash_readpage.
603 : */
604 : static int
605 780 : _hash_load_qualified_items(IndexScanDesc scan, Page page,
606 : OffsetNumber offnum, ScanDirection dir)
607 : {
608 780 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
609 : IndexTuple itup;
610 : int itemIndex;
611 : OffsetNumber maxoff;
612 :
613 780 : maxoff = PageGetMaxOffsetNumber(page);
614 :
615 780 : if (ScanDirectionIsForward(dir))
616 : {
617 : /* load items[] in ascending order */
618 696 : itemIndex = 0;
619 :
620 68838 : while (offnum <= maxoff)
621 : {
622 : Assert(offnum >= FirstOffsetNumber);
623 68448 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
624 :
625 : /*
626 : * skip the tuples that are moved by split operation for the scan
627 : * that has started when split was in progress. Also, skip the
628 : * tuples that are marked as dead.
629 : */
630 68448 : if ((so->hashso_buc_populated && !so->hashso_buc_split &&
631 0 : (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
632 68448 : (scan->ignore_killed_tuples &&
633 68448 : (ItemIdIsDead(PageGetItemId(page, offnum)))))
634 : {
635 2 : offnum = OffsetNumberNext(offnum); /* move forward */
636 2 : continue;
637 : }
638 :
639 136586 : if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
640 68140 : _hash_checkqual(scan, itup))
641 : {
642 : /* tuple is qualified, so remember it */
643 68140 : _hash_saveitem(so, itemIndex, offnum, itup);
644 68140 : itemIndex++;
645 : }
646 : else
647 : {
648 : /*
649 : * No more matching tuples exist in this page. so, exit while
650 : * loop.
651 : */
652 : break;
653 : }
654 :
655 68140 : offnum = OffsetNumberNext(offnum);
656 : }
657 :
658 : Assert(itemIndex <= MaxIndexTuplesPerPage);
659 696 : return itemIndex;
660 : }
661 : else
662 : {
663 : /* load items[] in descending order */
664 84 : itemIndex = MaxIndexTuplesPerPage;
665 :
666 33084 : while (offnum >= FirstOffsetNumber)
667 : {
668 : Assert(offnum <= maxoff);
669 33000 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
670 :
671 : /*
672 : * skip the tuples that are moved by split operation for the scan
673 : * that has started when split was in progress. Also, skip the
674 : * tuples that are marked as dead.
675 : */
676 33000 : if ((so->hashso_buc_populated && !so->hashso_buc_split &&
677 0 : (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
678 33000 : (scan->ignore_killed_tuples &&
679 33000 : (ItemIdIsDead(PageGetItemId(page, offnum)))))
680 : {
681 0 : offnum = OffsetNumberPrev(offnum); /* move back */
682 0 : continue;
683 : }
684 :
685 66000 : if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
686 33000 : _hash_checkqual(scan, itup))
687 : {
688 33000 : itemIndex--;
689 : /* tuple is qualified, so remember it */
690 33000 : _hash_saveitem(so, itemIndex, offnum, itup);
691 : }
692 : else
693 : {
694 : /*
695 : * No more matching tuples exist in this page. so, exit while
696 : * loop.
697 : */
698 : break;
699 : }
700 :
701 33000 : offnum = OffsetNumberPrev(offnum);
702 : }
703 :
704 : Assert(itemIndex >= 0);
705 84 : return itemIndex;
706 : }
707 : }
708 :
709 : /* Save an index item into so->currPos.items[itemIndex] */
710 : static inline void
711 101140 : _hash_saveitem(HashScanOpaque so, int itemIndex,
712 : OffsetNumber offnum, IndexTuple itup)
713 : {
714 101140 : HashScanPosItem *currItem = &so->currPos.items[itemIndex];
715 :
716 101140 : currItem->heapTid = itup->t_tid;
717 101140 : currItem->indexOffset = offnum;
718 101140 : }
|