src/backend/access/hash/hashsearch.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * hashsearch.c
   4  *        search code for postgres hash tables
   5  *
   6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/access/hash/hashsearch.c
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include "access/hash.h"
  18 #include "access/relscan.h"
  19 #include "miscadmin.h"
  20 #include "pgstat.h"
  21 #include "storage/predicate.h"
  22 #include "utils/rel.h"
  23
  24 static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP,
  25                                                    ScanDirection dir);
  26 static int      _hash_load_qualified_items(IndexScanDesc scan, Page page,
  27                                                                            OffsetNumber offnum, ScanDirection dir);
  28 static inline void _hash_saveitem(HashScanOpaque so, int itemIndex,
  29                                                                   OffsetNumber offnum, IndexTuple itup);
  30 static void _hash_readnext(IndexScanDesc scan, Buffer *bufp,
  31                                                    Page *pagep, HashPageOpaque *opaquep);
  32
  33 /*
  34  *      _hash_next() -- Get the next item in a scan.
  35  *
  36  *              On entry, so->currPos describes the current page, which may
  37  *              be pinned but not locked, and so->currPos.itemIndex identifies
  38  *              which item was previously returned.
  39  *
  40  *              On successful exit, scan->xs_ctup.t_self is set to the TID
  41  *              of the next heap tuple. so->currPos is updated as needed.
  42  *
  43  *              On failure exit (no more tuples), we return false with pin
  44  *              held on bucket page but no pins or locks held on overflow
  45  *              page.
  46  */
  47 bool
  48 _hash_next(IndexScanDesc scan, ScanDirection dir)
  49 {
  50         Relation        rel = scan->indexRelation;
  51         HashScanOpaque so = (HashScanOpaque) scan->opaque;
  52         HashScanPosItem *currItem;
  53         BlockNumber blkno;
  54         Buffer          buf;
  55         bool            end_of_scan = false;
  56
  57         /*
  58          * Advance to the next tuple on the current page; or if done, try to read
  59          * data from the next or previous page based on the scan direction. Before
  60          * moving to the next or previous page make sure that we deal with all the
  61          * killed items.
  62          */
  63         if (ScanDirectionIsForward(dir))
  64         {
  65                 if (++so->currPos.itemIndex > so->currPos.lastItem)
  66                 {
  67                         if (so->numKilled > 0)
  68                                 _hash_kill_items(scan);
  69
  70                         blkno = so->currPos.nextPage;
  71                         if (BlockNumberIsValid(blkno))
  72                         {
  73                                 buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
  74                                 TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
  75                                 if (!_hash_readpage(scan, &buf, dir))
  76                                         end_of_scan = true;
  77                         }
  78                         else
  79                                 end_of_scan = true;
  80                 }
  81         }
  82         else
  83         {
  84                 if (--so->currPos.itemIndex < so->currPos.firstItem)
  85                 {
  86                         if (so->numKilled > 0)
  87                                 _hash_kill_items(scan);
  88
  89                         blkno = so->currPos.prevPage;
  90                         if (BlockNumberIsValid(blkno))
  91                         {
  92                                 buf = _hash_getbuf(rel, blkno, HASH_READ,
  93                                                                    LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
  94                                 TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
  95
  96                                 /*
  97                                  * We always maintain the pin on bucket page for whole scan
  98                                  * operation, so releasing the additional pin we have acquired
  99                                  * here.
 100                                  */
 101                                 if (buf == so->hashso_bucket_buf ||
 102                                         buf == so->hashso_split_bucket_buf)
 103                                         _hash_dropbuf(rel, buf);
 104
 105                                 if (!_hash_readpage(scan, &buf, dir))
 106                                         end_of_scan = true;
 107                         }
 108                         else
 109                                 end_of_scan = true;
 110                 }
 111         }
 112
 113         if (end_of_scan)
 114         {
 115                 _hash_dropscanbuf(rel, so);
 116                 HashScanPosInvalidate(so->currPos);
 117                 return false;
 118         }
 119
 120         /* OK, itemIndex says what to return */
 121         currItem = &so->currPos.items[so->currPos.itemIndex];
 122         scan->xs_heaptid = currItem->heapTid;
 123
 124         return true;
 125 }
 126
 127 /*
 128  * Advance to next page in a bucket, if any.  If we are scanning the bucket
 129  * being populated during split operation then this function advances to the
 130  * bucket being split after the last bucket page of bucket being populated.
 131  */
 132 static void
 133 _hash_readnext(IndexScanDesc scan,
 134                            Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
 135 {
 136         BlockNumber blkno;
 137         Relation        rel = scan->indexRelation;
 138         HashScanOpaque so = (HashScanOpaque) scan->opaque;
 139         bool            block_found = false;
 140
 141         blkno = (*opaquep)->hasho_nextblkno;
 142
 143         /*
 144          * Retain the pin on primary bucket page till the end of scan.  Refer the
 145          * comments in _hash_first to know the reason of retaining pin.
 146          */
 147         if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
 148                 LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
 149         else
 150                 _hash_relbuf(rel, *bufp);
 151
 152         *bufp = InvalidBuffer;
 153         /* check for interrupts while we're not holding any buffer lock */
 154         CHECK_FOR_INTERRUPTS();
 155         if (BlockNumberIsValid(blkno))
 156         {
 157                 *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
 158                 block_found = true;
 159         }
 160         else if (so->hashso_buc_populated && !so->hashso_buc_split)
 161         {
 162                 /*
 163                  * end of bucket, scan bucket being split if there was a split in
 164                  * progress at the start of scan.
 165                  */
 166                 *bufp = so->hashso_split_bucket_buf;
 167
 168                 /*
 169                  * buffer for bucket being split must be valid as we acquire the pin
 170                  * on it before the start of scan and retain it till end of scan.
 171                  */
 172                 Assert(BufferIsValid(*bufp));
 173
 174                 LockBuffer(*bufp, BUFFER_LOCK_SHARE);
 175                 PredicateLockPage(rel, BufferGetBlockNumber(*bufp), scan->xs_snapshot);
 176
 177                 /*
 178                  * setting hashso_buc_split to true indicates that we are scanning
 179                  * bucket being split.
 180                  */
 181                 so->hashso_buc_split = true;
 182
 183                 block_found = true;
 184         }
 185
 186         if (block_found)
 187         {
 188                 *pagep = BufferGetPage(*bufp);
 189                 TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
 190                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
 191         }
 192 }
 193
 194 /*
 195  * Advance to previous page in a bucket, if any.  If the current scan has
 196  * started during split operation then this function advances to bucket
 197  * being populated after the first bucket page of bucket being split.
 198  */
 199 static void
 200 _hash_readprev(IndexScanDesc scan,
 201                            Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
 202 {
 203         BlockNumber blkno;
 204         Relation        rel = scan->indexRelation;
 205         HashScanOpaque so = (HashScanOpaque) scan->opaque;
 206         bool            haveprevblk;
 207
 208         blkno = (*opaquep)->hasho_prevblkno;
 209
 210         /*
 211          * Retain the pin on primary bucket page till the end of scan.  Refer the
 212          * comments in _hash_first to know the reason of retaining pin.
 213          */
 214         if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
 215         {
 216                 LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
 217                 haveprevblk = false;
 218         }
 219         else
 220         {
 221                 _hash_relbuf(rel, *bufp);
 222                 haveprevblk = true;
 223         }
 224
 225         *bufp = InvalidBuffer;
 226         /* check for interrupts while we're not holding any buffer lock */
 227         CHECK_FOR_INTERRUPTS();
 228
 229         if (haveprevblk)
 230         {
 231                 Assert(BlockNumberIsValid(blkno));
 232                 *bufp = _hash_getbuf(rel, blkno, HASH_READ,
 233                                                          LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 234                 *pagep = BufferGetPage(*bufp);
 235                 TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
 236                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
 237
 238                 /*
 239                  * We always maintain the pin on bucket page for whole scan operation,
 240                  * so releasing the additional pin we have acquired here.
 241                  */
 242                 if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
 243                         _hash_dropbuf(rel, *bufp);
 244         }
 245         else if (so->hashso_buc_populated && so->hashso_buc_split)
 246         {
 247                 /*
 248                  * end of bucket, scan bucket being populated if there was a split in
 249                  * progress at the start of scan.
 250                  */
 251                 *bufp = so->hashso_bucket_buf;
 252
 253                 /*
 254                  * buffer for bucket being populated must be valid as we acquire the
 255                  * pin on it before the start of scan and retain it till end of scan.
 256                  */
 257                 Assert(BufferIsValid(*bufp));
 258
 259                 LockBuffer(*bufp, BUFFER_LOCK_SHARE);
 260                 *pagep = BufferGetPage(*bufp);
 261                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
 262
 263                 /* move to the end of bucket chain */
 264                 while (BlockNumberIsValid((*opaquep)->hasho_nextblkno))
 265                         _hash_readnext(scan, bufp, pagep, opaquep);
 266
 267                 /*
 268                  * setting hashso_buc_split to false indicates that we are scanning
 269                  * bucket being populated.
 270                  */
 271                 so->hashso_buc_split = false;
 272         }
 273 }
 274
 275 /*
 276  *      _hash_first() -- Find the first item in a scan.
 277  *
 278  *              We find the first item (or, if backward scan, the last item) in the
 279  *              index that satisfies the qualification associated with the scan
 280  *              descriptor.
 281  *
 282  *              On successful exit, if the page containing current index tuple is an
 283  *              overflow page, both pin and lock are released whereas if it is a bucket
 284  *              page then it is pinned but not locked and data about the matching
 285  *              tuple(s) on the page has been loaded into so->currPos,
 286  *              scan->xs_ctup.t_self is set to the heap TID of the current tuple.
 287  *
 288  *              On failure exit (no more tuples), we return false, with pin held on
 289  *              bucket page but no pins or locks held on overflow page.
 290  */
 291 bool
 292 _hash_first(IndexScanDesc scan, ScanDirection dir)
 293 {
 294         Relation        rel = scan->indexRelation;
 295         HashScanOpaque so = (HashScanOpaque) scan->opaque;
 296         ScanKey         cur;
 297         uint32          hashkey;
 298         Bucket          bucket;
 299         Buffer          buf;
 300         Page            page;
 301         HashPageOpaque opaque;
 302         HashScanPosItem *currItem;
 303
 304         pgstat_count_index_scan(rel);
 305
 306         /*
 307          * We do not support hash scans with no index qualification, because we
 308          * would have to read the whole index rather than just one bucket. That
 309          * creates a whole raft of problems, since we haven't got a practical way
 310          * to lock all the buckets against splits or compactions.
 311          */
 312         if (scan->numberOfKeys < 1)
 313                 ereport(ERROR,
 314                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 315                                  errmsg("hash indexes do not support whole-index scans")));
 316
 317         /* There may be more than one index qual, but we hash only the first */
 318         cur = &scan->keyData[0];
 319
 320         /* We support only single-column hash indexes */
 321         Assert(cur->sk_attno == 1);
 322         /* And there's only one operator strategy, too */
 323         Assert(cur->sk_strategy == HTEqualStrategyNumber);
 324
 325         /*
 326          * If the constant in the index qual is NULL, assume it cannot match any
 327          * items in the index.
 328          */
 329         if (cur->sk_flags & SK_ISNULL)
 330                 return false;
 331
 332         /*
 333          * Okay to compute the hash key.  We want to do this before acquiring any
 334          * locks, in case a user-defined hash function happens to be slow.
 335          *
 336          * If scankey operator is not a cross-type comparison, we can use the
 337          * cached hash function; otherwise gotta look it up in the catalogs.
 338          *
 339          * We support the convention that sk_subtype == InvalidOid means the
 340          * opclass input type; this is a hack to simplify life for ScanKeyInit().
 341          */
 342         if (cur->sk_subtype == rel->rd_opcintype[0] ||
 343                 cur->sk_subtype == InvalidOid)
 344                 hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
 345         else
 346                 hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
 347                                                                                    cur->sk_subtype);
 348
 349         so->hashso_sk_hash = hashkey;
 350
 351         buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
 352         PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
 353         page = BufferGetPage(buf);
 354         TestForOldSnapshot(scan->xs_snapshot, rel, page);
 355         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 356         bucket = opaque->hasho_bucket;
 357
 358         so->hashso_bucket_buf = buf;
 359
 360         /*
 361          * If a bucket split is in progress, then while scanning the bucket being
 362          * populated, we need to skip tuples that were copied from bucket being
 363          * split.  We also need to maintain a pin on the bucket being split to
 364          * ensure that split-cleanup work done by vacuum doesn't remove tuples
 365          * from it till this scan is done.  We need to maintain a pin on the
 366          * bucket being populated to ensure that vacuum doesn't squeeze that
 367          * bucket till this scan is complete; otherwise, the ordering of tuples
 368          * can't be maintained during forward and backward scans.  Here, we have
 369          * to be cautious about locking order: first, acquire the lock on bucket
 370          * being split; then, release the lock on it but not the pin; then,
 371          * acquire a lock on bucket being populated and again re-verify whether
 372          * the bucket split is still in progress.  Acquiring the lock on bucket
 373          * being split first ensures that the vacuum waits for this scan to
 374          * finish.
 375          */
 376         if (H_BUCKET_BEING_POPULATED(opaque))
 377         {
 378                 BlockNumber old_blkno;
 379                 Buffer          old_buf;
 380
 381                 old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket);
 382
 383                 /*
 384                  * release the lock on new bucket and re-acquire it after acquiring
 385                  * the lock on old bucket.
 386                  */
 387                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 388
 389                 old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
 390                 TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(old_buf));
 391
 392                 /*
 393                  * remember the split bucket buffer so as to use it later for
 394                  * scanning.
 395                  */
 396                 so->hashso_split_bucket_buf = old_buf;
 397                 LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
 398
 399                 LockBuffer(buf, BUFFER_LOCK_SHARE);
 400                 page = BufferGetPage(buf);
 401                 opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 402                 Assert(opaque->hasho_bucket == bucket);
 403
 404                 if (H_BUCKET_BEING_POPULATED(opaque))
 405                         so->hashso_buc_populated = true;
 406                 else
 407                 {
 408                         _hash_dropbuf(rel, so->hashso_split_bucket_buf);
 409                         so->hashso_split_bucket_buf = InvalidBuffer;
 410                 }
 411         }
 412
 413         /* If a backwards scan is requested, move to the end of the chain */
 414         if (ScanDirectionIsBackward(dir))
 415         {
 416                 /*
 417                  * Backward scans that start during split needs to start from end of
 418                  * bucket being split.
 419                  */
 420                 while (BlockNumberIsValid(opaque->hasho_nextblkno) ||
 421                            (so->hashso_buc_populated && !so->hashso_buc_split))
 422                         _hash_readnext(scan, &buf, &page, &opaque);
 423         }
 424
 425         /* remember which buffer we have pinned, if any */
 426         Assert(BufferIsInvalid(so->currPos.buf));
 427         so->currPos.buf = buf;
 428
 429         /* Now find all the tuples satisfying the qualification from a page */
 430         if (!_hash_readpage(scan, &buf, dir))
 431                 return false;
 432
 433         /* OK, itemIndex says what to return */
 434         currItem = &so->currPos.items[so->currPos.itemIndex];
 435         scan->xs_heaptid = currItem->heapTid;
 436
 437         /* if we're here, _hash_readpage found a valid tuples */
 438         return true;
 439 }
 440
 441 /*
 442  *      _hash_readpage() -- Load data from current index page into so->currPos
 443  *
 444  *      We scan all the items in the current index page and save them into
 445  *      so->currPos if it satisfies the qualification. If no matching items
 446  *      are found in the current page, we move to the next or previous page
 447  *      in a bucket chain as indicated by the direction.
 448  *
 449  *      Return true if any matching items are found else return false.
 450  */
 451 static bool
 452 _hash_readpage(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 453 {
 454         Relation        rel = scan->indexRelation;
 455         HashScanOpaque so = (HashScanOpaque) scan->opaque;
 456         Buffer          buf;
 457         Page            page;
 458         HashPageOpaque opaque;
 459         OffsetNumber offnum;
 460         uint16          itemIndex;
 461
 462         buf = *bufP;
 463         Assert(BufferIsValid(buf));
 464         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 465         page = BufferGetPage(buf);
 466         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 467
 468         so->currPos.buf = buf;
 469         so->currPos.currPage = BufferGetBlockNumber(buf);
 470
 471         if (ScanDirectionIsForward(dir))
 472         {
 473                 BlockNumber prev_blkno = InvalidBlockNumber;
 474
 475                 for (;;)
 476                 {
 477                         /* new page, locate starting position by binary search */
 478                         offnum = _hash_binsearch(page, so->hashso_sk_hash);
 479
 480                         itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
 481
 482                         if (itemIndex != 0)
 483                                 break;
 484
 485                         /*
 486                          * Could not find any matching tuples in the current page, move to
 487                          * the next page. Before leaving the current page, deal with any
 488                          * killed items.
 489                          */
 490                         if (so->numKilled > 0)
 491                                 _hash_kill_items(scan);
 492
 493                         /*
 494                          * If this is a primary bucket page, hasho_prevblkno is not a real
 495                          * block number.
 496                          */
 497                         if (so->currPos.buf == so->hashso_bucket_buf ||
 498                                 so->currPos.buf == so->hashso_split_bucket_buf)
 499                                 prev_blkno = InvalidBlockNumber;
 500                         else
 501                                 prev_blkno = opaque->hasho_prevblkno;
 502
 503                         _hash_readnext(scan, &buf, &page, &opaque);
 504                         if (BufferIsValid(buf))
 505                         {
 506                                 so->currPos.buf = buf;
 507                                 so->currPos.currPage = BufferGetBlockNumber(buf);
 508                         }
 509                         else
 510                         {
 511                                 /*
 512                                  * Remember next and previous block numbers for scrollable
 513                                  * cursors to know the start position and return false
 514                                  * indicating that no more matching tuples were found. Also,
 515                                  * don't reset currPage or lsn, because we expect
 516                                  * _hash_kill_items to be called for the old page after this
 517                                  * function returns.
 518                                  */
 519                                 so->currPos.prevPage = prev_blkno;
 520                                 so->currPos.nextPage = InvalidBlockNumber;
 521                                 so->currPos.buf = buf;
 522                                 return false;
 523                         }
 524                 }
 525
 526                 so->currPos.firstItem = 0;
 527                 so->currPos.lastItem = itemIndex - 1;
 528                 so->currPos.itemIndex = 0;
 529         }
 530         else
 531         {
 532                 BlockNumber next_blkno = InvalidBlockNumber;
 533
 534                 for (;;)
 535                 {
 536                         /* new page, locate starting position by binary search */
 537                         offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
 538
 539                         itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
 540
 541                         if (itemIndex != MaxIndexTuplesPerPage)
 542                                 break;
 543
 544                         /*
 545                          * Could not find any matching tuples in the current page, move to
 546                          * the previous page. Before leaving the current page, deal with
 547                          * any killed items.
 548                          */
 549                         if (so->numKilled > 0)
 550                                 _hash_kill_items(scan);
 551
 552                         if (so->currPos.buf == so->hashso_bucket_buf ||
 553                                 so->currPos.buf == so->hashso_split_bucket_buf)
 554                                 next_blkno = opaque->hasho_nextblkno;
 555
 556                         _hash_readprev(scan, &buf, &page, &opaque);
 557                         if (BufferIsValid(buf))
 558                         {
 559                                 so->currPos.buf = buf;
 560                                 so->currPos.currPage = BufferGetBlockNumber(buf);
 561                         }
 562                         else
 563                         {
 564                                 /*
 565                                  * Remember next and previous block numbers for scrollable
 566                                  * cursors to know the start position and return false
 567                                  * indicating that no more matching tuples were found. Also,
 568                                  * don't reset currPage or lsn, because we expect
 569                                  * _hash_kill_items to be called for the old page after this
 570                                  * function returns.
 571                                  */
 572                                 so->currPos.prevPage = InvalidBlockNumber;
 573                                 so->currPos.nextPage = next_blkno;
 574                                 so->currPos.buf = buf;
 575                                 return false;
 576                         }
 577                 }
 578
 579                 so->currPos.firstItem = itemIndex;
 580                 so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
 581                 so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
 582         }
 583
 584         if (so->currPos.buf == so->hashso_bucket_buf ||
 585                 so->currPos.buf == so->hashso_split_bucket_buf)
 586         {
 587                 so->currPos.prevPage = InvalidBlockNumber;
 588                 so->currPos.nextPage = opaque->hasho_nextblkno;
 589                 LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
 590         }
 591         else
 592         {
 593                 so->currPos.prevPage = opaque->hasho_prevblkno;
 594                 so->currPos.nextPage = opaque->hasho_nextblkno;
 595                 _hash_relbuf(rel, so->currPos.buf);
 596                 so->currPos.buf = InvalidBuffer;
 597         }
 598
 599         Assert(so->currPos.firstItem <= so->currPos.lastItem);
 600         return true;
 601 }
 602
 603 /*
 604  * Load all the qualified items from a current index page
 605  * into so->currPos. Helper function for _hash_readpage.
 606  */
 607 static int
 608 _hash_load_qualified_items(IndexScanDesc scan, Page page,
 609                                                    OffsetNumber offnum, ScanDirection dir)
 610 {
 611         HashScanOpaque so = (HashScanOpaque) scan->opaque;
 612         IndexTuple      itup;
 613         int                     itemIndex;
 614         OffsetNumber maxoff;
 615
 616         maxoff = PageGetMaxOffsetNumber(page);
 617
 618         if (ScanDirectionIsForward(dir))
 619         {
 620                 /* load items[] in ascending order */
 621                 itemIndex = 0;
 622
 623                 while (offnum <= maxoff)
 624                 {
 625                         Assert(offnum >= FirstOffsetNumber);
 626                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
 627
 628                         /*
 629                          * skip the tuples that are moved by split operation for the scan
 630                          * that has started when split was in progress. Also, skip the
 631                          * tuples that are marked as dead.
 632                          */
 633                         if ((so->hashso_buc_populated && !so->hashso_buc_split &&
 634                                  (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
 635                                 (scan->ignore_killed_tuples &&
 636                                  (ItemIdIsDead(PageGetItemId(page, offnum)))))
 637                         {
 638                                 offnum = OffsetNumberNext(offnum);      /* move forward */
 639                                 continue;
 640                         }
 641
 642                         if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
 643                                 _hash_checkqual(scan, itup))
 644                         {
 645                                 /* tuple is qualified, so remember it */
 646                                 _hash_saveitem(so, itemIndex, offnum, itup);
 647                                 itemIndex++;
 648                         }
 649                         else
 650                         {
 651                                 /*
 652                                  * No more matching tuples exist in this page. so, exit while
 653                                  * loop.
 654                                  */
 655                                 break;
 656                         }
 657
 658                         offnum = OffsetNumberNext(offnum);
 659                 }
 660
 661                 Assert(itemIndex <= MaxIndexTuplesPerPage);
 662                 return itemIndex;
 663         }
 664         else
 665         {
 666                 /* load items[] in descending order */
 667                 itemIndex = MaxIndexTuplesPerPage;
 668
 669                 while (offnum >= FirstOffsetNumber)
 670                 {
 671                         Assert(offnum <= maxoff);
 672                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
 673
 674                         /*
 675                          * skip the tuples that are moved by split operation for the scan
 676                          * that has started when split was in progress. Also, skip the
 677                          * tuples that are marked as dead.
 678                          */
 679                         if ((so->hashso_buc_populated && !so->hashso_buc_split &&
 680                                  (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
 681                                 (scan->ignore_killed_tuples &&
 682                                  (ItemIdIsDead(PageGetItemId(page, offnum)))))
 683                         {
 684                                 offnum = OffsetNumberPrev(offnum);      /* move back */
 685                                 continue;
 686                         }
 687
 688                         if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
 689                                 _hash_checkqual(scan, itup))
 690                         {
 691                                 itemIndex--;
 692                                 /* tuple is qualified, so remember it */
 693                                 _hash_saveitem(so, itemIndex, offnum, itup);
 694                         }
 695                         else
 696                         {
 697                                 /*
 698                                  * No more matching tuples exist in this page. so, exit while
 699                                  * loop.
 700                                  */
 701                                 break;
 702                         }
 703
 704                         offnum = OffsetNumberPrev(offnum);
 705                 }
 706
 707                 Assert(itemIndex >= 0);
 708                 return itemIndex;
 709         }
 710 }
 711
 712 /* Save an index item into so->currPos.items[itemIndex] */
 713 static inline void
 714 _hash_saveitem(HashScanOpaque so, int itemIndex,
 715                            OffsetNumber offnum, IndexTuple itup)
 716 {
 717         HashScanPosItem *currItem = &so->currPos.items[itemIndex];
 718
 719         currItem->heapTid = itup->t_tid;
 720         currItem->indexOffset = offnum;
 721 }