Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashsearch.c
4 : * search code for postgres hash tables
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hashsearch.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/hash.h"
18 : #include "access/relscan.h"
19 : #include "miscadmin.h"
20 : #include "pgstat.h"
21 : #include "storage/predicate.h"
22 : #include "utils/rel.h"
23 :
24 : static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP,
25 : ScanDirection dir);
26 : static int _hash_load_qualified_items(IndexScanDesc scan, Page page,
27 : OffsetNumber offnum, ScanDirection dir);
28 : static inline void _hash_saveitem(HashScanOpaque so, int itemIndex,
29 : OffsetNumber offnum, IndexTuple itup);
30 : static void _hash_readnext(IndexScanDesc scan, Buffer *bufp,
31 : Page *pagep, HashPageOpaque *opaquep);
32 :
33 : /*
34 : * _hash_next() -- Get the next item in a scan.
35 : *
36 : * On entry, so->currPos describes the current page, which may
37 : * be pinned but not locked, and so->currPos.itemIndex identifies
38 : * which item was previously returned.
39 : *
40 : * On successful exit, scan->xs_heaptid is set to the TID of the next
41 : * heap tuple. so->currPos is updated as needed.
42 : *
43 : * On failure exit (no more tuples), we return false with pin
44 : * held on bucket page but no pins or locks held on overflow
45 : * page.
46 : */
47 : bool
48 101120 : _hash_next(IndexScanDesc scan, ScanDirection dir)
49 : {
50 101120 : Relation rel = scan->indexRelation;
51 101120 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
52 : HashScanPosItem *currItem;
53 : BlockNumber blkno;
54 : Buffer buf;
55 101120 : bool end_of_scan = false;
56 :
57 : /*
58 : * Advance to the next tuple on the current page; or if done, try to read
59 : * data from the next or previous page based on the scan direction. Before
60 : * moving to the next or previous page make sure that we deal with all the
61 : * killed items.
62 : */
63 101120 : if (ScanDirectionIsForward(dir))
64 : {
65 68120 : if (++so->currPos.itemIndex > so->currPos.lastItem)
66 : {
67 560 : if (so->numKilled > 0)
68 0 : _hash_kill_items(scan);
69 :
70 560 : blkno = so->currPos.nextPage;
71 560 : if (BlockNumberIsValid(blkno))
72 : {
73 156 : buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
74 156 : if (!_hash_readpage(scan, &buf, dir))
75 0 : end_of_scan = true;
76 : }
77 : else
78 404 : end_of_scan = true;
79 : }
80 : }
81 : else
82 : {
83 33000 : if (--so->currPos.itemIndex < so->currPos.firstItem)
84 : {
85 84 : if (so->numKilled > 0)
86 0 : _hash_kill_items(scan);
87 :
88 84 : blkno = so->currPos.prevPage;
89 84 : if (BlockNumberIsValid(blkno))
90 : {
91 78 : buf = _hash_getbuf(rel, blkno, HASH_READ,
92 : LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
93 :
94 : /*
95 : * We always maintain the pin on bucket page for whole scan
96 : * operation, so releasing the additional pin we have acquired
97 : * here.
98 : */
99 78 : if (buf == so->hashso_bucket_buf ||
100 72 : buf == so->hashso_split_bucket_buf)
101 6 : _hash_dropbuf(rel, buf);
102 :
103 78 : if (!_hash_readpage(scan, &buf, dir))
104 0 : end_of_scan = true;
105 : }
106 : else
107 6 : end_of_scan = true;
108 : }
109 : }
110 :
111 101120 : if (end_of_scan)
112 : {
113 410 : _hash_dropscanbuf(rel, so);
114 410 : HashScanPosInvalidate(so->currPos);
115 410 : return false;
116 : }
117 :
118 : /* OK, itemIndex says what to return */
119 100710 : currItem = &so->currPos.items[so->currPos.itemIndex];
120 100710 : scan->xs_heaptid = currItem->heapTid;
121 :
122 100710 : return true;
123 : }
124 :
125 : /*
126 : * Advance to next page in a bucket, if any. If we are scanning the bucket
127 : * being populated during split operation then this function advances to the
128 : * bucket being split after the last bucket page of bucket being populated.
129 : */
130 : static void
131 192 : _hash_readnext(IndexScanDesc scan,
132 : Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
133 : {
134 : BlockNumber blkno;
135 192 : Relation rel = scan->indexRelation;
136 192 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
137 192 : bool block_found = false;
138 :
139 192 : blkno = (*opaquep)->hasho_nextblkno;
140 :
141 : /*
142 : * Retain the pin on primary bucket page till the end of scan. Refer the
143 : * comments in _hash_first to know the reason of retaining pin.
144 : */
145 192 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
146 120 : LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
147 : else
148 72 : _hash_relbuf(rel, *bufp);
149 :
150 192 : *bufp = InvalidBuffer;
151 : /* check for interrupts while we're not holding any buffer lock */
152 192 : CHECK_FOR_INTERRUPTS();
153 192 : if (BlockNumberIsValid(blkno))
154 : {
155 78 : *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
156 78 : block_found = true;
157 : }
158 114 : else if (so->hashso_buc_populated && !so->hashso_buc_split)
159 : {
160 : /*
161 : * end of bucket, scan bucket being split if there was a split in
162 : * progress at the start of scan.
163 : */
164 0 : *bufp = so->hashso_split_bucket_buf;
165 :
166 : /*
167 : * buffer for bucket being split must be valid as we acquire the pin
168 : * on it before the start of scan and retain it till end of scan.
169 : */
170 : Assert(BufferIsValid(*bufp));
171 :
172 0 : LockBuffer(*bufp, BUFFER_LOCK_SHARE);
173 0 : PredicateLockPage(rel, BufferGetBlockNumber(*bufp), scan->xs_snapshot);
174 :
175 : /*
176 : * setting hashso_buc_split to true indicates that we are scanning
177 : * bucket being split.
178 : */
179 0 : so->hashso_buc_split = true;
180 :
181 0 : block_found = true;
182 : }
183 :
184 192 : if (block_found)
185 : {
186 78 : *pagep = BufferGetPage(*bufp);
187 78 : *opaquep = HashPageGetOpaque(*pagep);
188 : }
189 192 : }
190 :
191 : /*
192 : * Advance to previous page in a bucket, if any. If the current scan has
193 : * started during split operation then this function advances to bucket
194 : * being populated after the first bucket page of bucket being split.
195 : */
196 : static void
197 0 : _hash_readprev(IndexScanDesc scan,
198 : Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
199 : {
200 : BlockNumber blkno;
201 0 : Relation rel = scan->indexRelation;
202 0 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
203 : bool haveprevblk;
204 :
205 0 : blkno = (*opaquep)->hasho_prevblkno;
206 :
207 : /*
208 : * Retain the pin on primary bucket page till the end of scan. Refer the
209 : * comments in _hash_first to know the reason of retaining pin.
210 : */
211 0 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
212 : {
213 0 : LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
214 0 : haveprevblk = false;
215 : }
216 : else
217 : {
218 0 : _hash_relbuf(rel, *bufp);
219 0 : haveprevblk = true;
220 : }
221 :
222 0 : *bufp = InvalidBuffer;
223 : /* check for interrupts while we're not holding any buffer lock */
224 0 : CHECK_FOR_INTERRUPTS();
225 :
226 0 : if (haveprevblk)
227 : {
228 : Assert(BlockNumberIsValid(blkno));
229 0 : *bufp = _hash_getbuf(rel, blkno, HASH_READ,
230 : LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
231 0 : *pagep = BufferGetPage(*bufp);
232 0 : *opaquep = HashPageGetOpaque(*pagep);
233 :
234 : /*
235 : * We always maintain the pin on bucket page for whole scan operation,
236 : * so releasing the additional pin we have acquired here.
237 : */
238 0 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
239 0 : _hash_dropbuf(rel, *bufp);
240 : }
241 0 : else if (so->hashso_buc_populated && so->hashso_buc_split)
242 : {
243 : /*
244 : * end of bucket, scan bucket being populated if there was a split in
245 : * progress at the start of scan.
246 : */
247 0 : *bufp = so->hashso_bucket_buf;
248 :
249 : /*
250 : * buffer for bucket being populated must be valid as we acquire the
251 : * pin on it before the start of scan and retain it till end of scan.
252 : */
253 : Assert(BufferIsValid(*bufp));
254 :
255 0 : LockBuffer(*bufp, BUFFER_LOCK_SHARE);
256 0 : *pagep = BufferGetPage(*bufp);
257 0 : *opaquep = HashPageGetOpaque(*pagep);
258 :
259 : /* move to the end of bucket chain */
260 0 : while (BlockNumberIsValid((*opaquep)->hasho_nextblkno))
261 0 : _hash_readnext(scan, bufp, pagep, opaquep);
262 :
263 : /*
264 : * setting hashso_buc_split to false indicates that we are scanning
265 : * bucket being populated.
266 : */
267 0 : so->hashso_buc_split = false;
268 : }
269 0 : }
270 :
271 : /*
272 : * _hash_first() -- Find the first item in a scan.
273 : *
274 : * We find the first item (or, if backward scan, the last item) in the
275 : * index that satisfies the qualification associated with the scan
276 : * descriptor.
277 : *
278 : * On successful exit, if the page containing current index tuple is an
279 : * overflow page, both pin and lock are released whereas if it is a bucket
280 : * page then it is pinned but not locked and data about the matching
281 : * tuple(s) on the page has been loaded into so->currPos,
282 : * scan->xs_heaptid is set to the heap TID of the current tuple.
283 : *
284 : * On failure exit (no more tuples), we return false, with pin held on
285 : * bucket page but no pins or locks held on overflow page.
286 : */
287 : bool
288 532 : _hash_first(IndexScanDesc scan, ScanDirection dir)
289 : {
290 532 : Relation rel = scan->indexRelation;
291 532 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
292 : ScanKey cur;
293 : uint32 hashkey;
294 : Bucket bucket;
295 : Buffer buf;
296 : Page page;
297 : HashPageOpaque opaque;
298 : HashScanPosItem *currItem;
299 :
300 532 : pgstat_count_index_scan(rel);
301 532 : if (scan->instrument)
302 524 : scan->instrument->nsearches++;
303 :
304 : /*
305 : * We do not support hash scans with no index qualification, because we
306 : * would have to read the whole index rather than just one bucket. That
307 : * creates a whole raft of problems, since we haven't got a practical way
308 : * to lock all the buckets against splits or compactions.
309 : */
310 532 : if (scan->numberOfKeys < 1)
311 0 : ereport(ERROR,
312 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
313 : errmsg("hash indexes do not support whole-index scans")));
314 :
315 : /* There may be more than one index qual, but we hash only the first */
316 532 : cur = &scan->keyData[0];
317 :
318 : /* We support only single-column hash indexes */
319 : Assert(cur->sk_attno == 1);
320 : /* And there's only one operator strategy, too */
321 : Assert(cur->sk_strategy == HTEqualStrategyNumber);
322 :
323 : /*
324 : * If the constant in the index qual is NULL, assume it cannot match any
325 : * items in the index.
326 : */
327 532 : if (cur->sk_flags & SK_ISNULL)
328 0 : return false;
329 :
330 : /*
331 : * Okay to compute the hash key. We want to do this before acquiring any
332 : * locks, in case a user-defined hash function happens to be slow.
333 : *
334 : * If scankey operator is not a cross-type comparison, we can use the
335 : * cached hash function; otherwise gotta look it up in the catalogs.
336 : *
337 : * We support the convention that sk_subtype == InvalidOid means the
338 : * opclass input type; this is a hack to simplify life for ScanKeyInit().
339 : */
340 532 : if (cur->sk_subtype == rel->rd_opcintype[0] ||
341 8 : cur->sk_subtype == InvalidOid)
342 532 : hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
343 : else
344 0 : hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
345 : cur->sk_subtype);
346 :
347 532 : so->hashso_sk_hash = hashkey;
348 :
349 532 : buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
350 532 : PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
351 532 : page = BufferGetPage(buf);
352 532 : opaque = HashPageGetOpaque(page);
353 532 : bucket = opaque->hasho_bucket;
354 :
355 532 : so->hashso_bucket_buf = buf;
356 :
357 : /*
358 : * If a bucket split is in progress, then while scanning the bucket being
359 : * populated, we need to skip tuples that were copied from bucket being
360 : * split. We also need to maintain a pin on the bucket being split to
361 : * ensure that split-cleanup work done by vacuum doesn't remove tuples
362 : * from it till this scan is done. We need to maintain a pin on the
363 : * bucket being populated to ensure that vacuum doesn't squeeze that
364 : * bucket till this scan is complete; otherwise, the ordering of tuples
365 : * can't be maintained during forward and backward scans. Here, we have
366 : * to be cautious about locking order: first, acquire the lock on bucket
367 : * being split; then, release the lock on it but not the pin; then,
368 : * acquire a lock on bucket being populated and again re-verify whether
369 : * the bucket split is still in progress. Acquiring the lock on bucket
370 : * being split first ensures that the vacuum waits for this scan to
371 : * finish.
372 : */
373 532 : if (H_BUCKET_BEING_POPULATED(opaque))
374 : {
375 : BlockNumber old_blkno;
376 : Buffer old_buf;
377 :
378 0 : old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket);
379 :
380 : /*
381 : * release the lock on new bucket and re-acquire it after acquiring
382 : * the lock on old bucket.
383 : */
384 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
385 :
386 0 : old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
387 :
388 : /*
389 : * remember the split bucket buffer so as to use it later for
390 : * scanning.
391 : */
392 0 : so->hashso_split_bucket_buf = old_buf;
393 0 : LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
394 :
395 0 : LockBuffer(buf, BUFFER_LOCK_SHARE);
396 0 : page = BufferGetPage(buf);
397 0 : opaque = HashPageGetOpaque(page);
398 : Assert(opaque->hasho_bucket == bucket);
399 :
400 0 : if (H_BUCKET_BEING_POPULATED(opaque))
401 0 : so->hashso_buc_populated = true;
402 : else
403 : {
404 0 : _hash_dropbuf(rel, so->hashso_split_bucket_buf);
405 0 : so->hashso_split_bucket_buf = InvalidBuffer;
406 : }
407 : }
408 :
409 : /* If a backwards scan is requested, move to the end of the chain */
410 532 : if (ScanDirectionIsBackward(dir))
411 : {
412 : /*
413 : * Backward scans that start during split needs to start from end of
414 : * bucket being split.
415 : */
416 84 : while (BlockNumberIsValid(opaque->hasho_nextblkno) ||
417 6 : (so->hashso_buc_populated && !so->hashso_buc_split))
418 78 : _hash_readnext(scan, &buf, &page, &opaque);
419 : }
420 :
421 : /* remember which buffer we have pinned, if any */
422 : Assert(BufferIsInvalid(so->currPos.buf));
423 532 : so->currPos.buf = buf;
424 :
425 : /* Now find all the tuples satisfying the qualification from a page */
426 532 : if (!_hash_readpage(scan, &buf, dir))
427 114 : return false;
428 :
429 : /* OK, itemIndex says what to return */
430 418 : currItem = &so->currPos.items[so->currPos.itemIndex];
431 418 : scan->xs_heaptid = currItem->heapTid;
432 :
433 : /* if we're here, _hash_readpage found a valid tuples */
434 418 : return true;
435 : }
436 :
437 : /*
438 : * _hash_readpage() -- Load data from current index page into so->currPos
439 : *
440 : * We scan all the items in the current index page and save them into
441 : * so->currPos if it satisfies the qualification. If no matching items
442 : * are found in the current page, we move to the next or previous page
443 : * in a bucket chain as indicated by the direction.
444 : *
445 : * Return true if any matching items are found else return false.
446 : */
447 : static bool
448 766 : _hash_readpage(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
449 : {
450 766 : Relation rel = scan->indexRelation;
451 766 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
452 : Buffer buf;
453 : Page page;
454 : HashPageOpaque opaque;
455 : OffsetNumber offnum;
456 : uint16 itemIndex;
457 :
458 766 : buf = *bufP;
459 : Assert(BufferIsValid(buf));
460 766 : _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
461 766 : page = BufferGetPage(buf);
462 766 : opaque = HashPageGetOpaque(page);
463 :
464 766 : so->currPos.buf = buf;
465 766 : so->currPos.currPage = BufferGetBlockNumber(buf);
466 :
467 766 : if (ScanDirectionIsForward(dir))
468 : {
469 682 : BlockNumber prev_blkno = InvalidBlockNumber;
470 :
471 : for (;;)
472 : {
473 : /* new page, locate starting position by binary search */
474 682 : offnum = _hash_binsearch(page, so->hashso_sk_hash);
475 :
476 682 : itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
477 :
478 682 : if (itemIndex != 0)
479 568 : break;
480 :
481 : /*
482 : * Could not find any matching tuples in the current page, move to
483 : * the next page. Before leaving the current page, deal with any
484 : * killed items.
485 : */
486 114 : if (so->numKilled > 0)
487 0 : _hash_kill_items(scan);
488 :
489 : /*
490 : * If this is a primary bucket page, hasho_prevblkno is not a real
491 : * block number.
492 : */
493 114 : if (so->currPos.buf == so->hashso_bucket_buf ||
494 0 : so->currPos.buf == so->hashso_split_bucket_buf)
495 114 : prev_blkno = InvalidBlockNumber;
496 : else
497 0 : prev_blkno = opaque->hasho_prevblkno;
498 :
499 114 : _hash_readnext(scan, &buf, &page, &opaque);
500 114 : if (BufferIsValid(buf))
501 : {
502 0 : so->currPos.buf = buf;
503 0 : so->currPos.currPage = BufferGetBlockNumber(buf);
504 : }
505 : else
506 : {
507 : /*
508 : * Remember next and previous block numbers for scrollable
509 : * cursors to know the start position and return false
510 : * indicating that no more matching tuples were found. Also,
511 : * don't reset currPage or lsn, because we expect
512 : * _hash_kill_items to be called for the old page after this
513 : * function returns.
514 : */
515 114 : so->currPos.prevPage = prev_blkno;
516 114 : so->currPos.nextPage = InvalidBlockNumber;
517 114 : so->currPos.buf = buf;
518 114 : return false;
519 : }
520 : }
521 :
522 568 : so->currPos.firstItem = 0;
523 568 : so->currPos.lastItem = itemIndex - 1;
524 568 : so->currPos.itemIndex = 0;
525 : }
526 : else
527 : {
528 84 : BlockNumber next_blkno = InvalidBlockNumber;
529 :
530 : for (;;)
531 : {
532 : /* new page, locate starting position by binary search */
533 84 : offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
534 :
535 84 : itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
536 :
537 84 : if (itemIndex != MaxIndexTuplesPerPage)
538 84 : break;
539 :
540 : /*
541 : * Could not find any matching tuples in the current page, move to
542 : * the previous page. Before leaving the current page, deal with
543 : * any killed items.
544 : */
545 0 : if (so->numKilled > 0)
546 0 : _hash_kill_items(scan);
547 :
548 0 : if (so->currPos.buf == so->hashso_bucket_buf ||
549 0 : so->currPos.buf == so->hashso_split_bucket_buf)
550 0 : next_blkno = opaque->hasho_nextblkno;
551 :
552 0 : _hash_readprev(scan, &buf, &page, &opaque);
553 0 : if (BufferIsValid(buf))
554 : {
555 0 : so->currPos.buf = buf;
556 0 : so->currPos.currPage = BufferGetBlockNumber(buf);
557 : }
558 : else
559 : {
560 : /*
561 : * Remember next and previous block numbers for scrollable
562 : * cursors to know the start position and return false
563 : * indicating that no more matching tuples were found. Also,
564 : * don't reset currPage or lsn, because we expect
565 : * _hash_kill_items to be called for the old page after this
566 : * function returns.
567 : */
568 0 : so->currPos.prevPage = InvalidBlockNumber;
569 0 : so->currPos.nextPage = next_blkno;
570 0 : so->currPos.buf = buf;
571 0 : return false;
572 : }
573 : }
574 :
575 84 : so->currPos.firstItem = itemIndex;
576 84 : so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
577 84 : so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
578 : }
579 :
580 652 : if (so->currPos.buf == so->hashso_bucket_buf ||
581 234 : so->currPos.buf == so->hashso_split_bucket_buf)
582 : {
583 418 : so->currPos.prevPage = InvalidBlockNumber;
584 418 : so->currPos.nextPage = opaque->hasho_nextblkno;
585 418 : LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
586 : }
587 : else
588 : {
589 234 : so->currPos.prevPage = opaque->hasho_prevblkno;
590 234 : so->currPos.nextPage = opaque->hasho_nextblkno;
591 234 : _hash_relbuf(rel, so->currPos.buf);
592 234 : so->currPos.buf = InvalidBuffer;
593 : }
594 :
595 : Assert(so->currPos.firstItem <= so->currPos.lastItem);
596 652 : return true;
597 : }
598 :
599 : /*
600 : * Load all the qualified items from a current index page
601 : * into so->currPos. Helper function for _hash_readpage.
602 : */
603 : static int
604 766 : _hash_load_qualified_items(IndexScanDesc scan, Page page,
605 : OffsetNumber offnum, ScanDirection dir)
606 : {
607 766 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
608 : IndexTuple itup;
609 : int itemIndex;
610 : OffsetNumber maxoff;
611 :
612 766 : maxoff = PageGetMaxOffsetNumber(page);
613 :
614 766 : if (ScanDirectionIsForward(dir))
615 : {
616 : /* load items[] in ascending order */
617 682 : itemIndex = 0;
618 :
619 68810 : while (offnum <= maxoff)
620 : {
621 : Assert(offnum >= FirstOffsetNumber);
622 68426 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
623 :
624 : /*
625 : * skip the tuples that are moved by split operation for the scan
626 : * that has started when split was in progress. Also, skip the
627 : * tuples that are marked as dead.
628 : */
629 68426 : if ((so->hashso_buc_populated && !so->hashso_buc_split &&
630 0 : (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
631 68426 : (scan->ignore_killed_tuples &&
632 68426 : (ItemIdIsDead(PageGetItemId(page, offnum)))))
633 : {
634 0 : offnum = OffsetNumberNext(offnum); /* move forward */
635 0 : continue;
636 : }
637 :
638 136554 : if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
639 68128 : _hash_checkqual(scan, itup))
640 : {
641 : /* tuple is qualified, so remember it */
642 68128 : _hash_saveitem(so, itemIndex, offnum, itup);
643 68128 : itemIndex++;
644 : }
645 : else
646 : {
647 : /*
648 : * No more matching tuples exist in this page. so, exit while
649 : * loop.
650 : */
651 : break;
652 : }
653 :
654 68128 : offnum = OffsetNumberNext(offnum);
655 : }
656 :
657 : Assert(itemIndex <= MaxIndexTuplesPerPage);
658 682 : return itemIndex;
659 : }
660 : else
661 : {
662 : /* load items[] in descending order */
663 84 : itemIndex = MaxIndexTuplesPerPage;
664 :
665 33084 : while (offnum >= FirstOffsetNumber)
666 : {
667 : Assert(offnum <= maxoff);
668 33000 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
669 :
670 : /*
671 : * skip the tuples that are moved by split operation for the scan
672 : * that has started when split was in progress. Also, skip the
673 : * tuples that are marked as dead.
674 : */
675 33000 : if ((so->hashso_buc_populated && !so->hashso_buc_split &&
676 0 : (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
677 33000 : (scan->ignore_killed_tuples &&
678 33000 : (ItemIdIsDead(PageGetItemId(page, offnum)))))
679 : {
680 0 : offnum = OffsetNumberPrev(offnum); /* move back */
681 0 : continue;
682 : }
683 :
684 66000 : if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
685 33000 : _hash_checkqual(scan, itup))
686 : {
687 33000 : itemIndex--;
688 : /* tuple is qualified, so remember it */
689 33000 : _hash_saveitem(so, itemIndex, offnum, itup);
690 : }
691 : else
692 : {
693 : /*
694 : * No more matching tuples exist in this page. so, exit while
695 : * loop.
696 : */
697 : break;
698 : }
699 :
700 33000 : offnum = OffsetNumberPrev(offnum);
701 : }
702 :
703 : Assert(itemIndex >= 0);
704 84 : return itemIndex;
705 : }
706 : }
707 :
708 : /* Save an index item into so->currPos.items[itemIndex] */
709 : static inline void
710 101128 : _hash_saveitem(HashScanOpaque so, int itemIndex,
711 : OffsetNumber offnum, IndexTuple itup)
712 : {
713 101128 : HashScanPosItem *currItem = &so->currPos.items[itemIndex];
714 :
715 101128 : currItem->heapTid = itup->t_tid;
716 101128 : currItem->indexOffset = offnum;
717 101128 : }
|