1 /*-------------------------------------------------------------------------
4 * search code for postgres hash tables
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
13 *-------------------------------------------------------------------------
17 #include "access/hash.h"
18 #include "access/relscan.h"
20 #include "storage/bufmgr.h"
21 #include "utils/rel.h"
25 * _hash_next() -- Get the next item in a scan.
27 * On entry, we have a valid hashso_curpos in the scan, and a
28 * pin and read lock on the page that contains that item.
29 * We find the next item in the scan, if any.
30 * On success exit, we have the page containing the next item
34 _hash_next(IndexScanDesc scan
, ScanDirection dir
)
36 Relation rel
= scan
->indexRelation
;
37 HashScanOpaque so
= (HashScanOpaque
) scan
->opaque
;
44 /* we still have the buffer pinned and read-locked */
45 buf
= so
->hashso_curbuf
;
46 Assert(BufferIsValid(buf
));
49 * step to next valid tuple.
51 if (!_hash_step(scan
, &buf
, dir
))
54 /* if we're here, _hash_step found a valid tuple */
55 current
= &(so
->hashso_curpos
);
56 offnum
= ItemPointerGetOffsetNumber(current
);
57 _hash_checkpage(rel
, buf
, LH_BUCKET_PAGE
| LH_OVERFLOW_PAGE
);
58 page
= BufferGetPage(buf
);
59 itup
= (IndexTuple
) PageGetItem(page
, PageGetItemId(page
, offnum
));
60 scan
->xs_ctup
.t_self
= itup
->t_tid
;
66 * Advance to next page in a bucket, if any.
69 _hash_readnext(Relation rel
,
70 Buffer
*bufp
, Page
*pagep
, HashPageOpaque
*opaquep
)
74 blkno
= (*opaquep
)->hasho_nextblkno
;
75 _hash_relbuf(rel
, *bufp
);
76 *bufp
= InvalidBuffer
;
77 if (BlockNumberIsValid(blkno
))
79 *bufp
= _hash_getbuf(rel
, blkno
, HASH_READ
, LH_OVERFLOW_PAGE
);
80 *pagep
= BufferGetPage(*bufp
);
81 *opaquep
= (HashPageOpaque
) PageGetSpecialPointer(*pagep
);
86 * Advance to previous page in a bucket, if any.
89 _hash_readprev(Relation rel
,
90 Buffer
*bufp
, Page
*pagep
, HashPageOpaque
*opaquep
)
94 blkno
= (*opaquep
)->hasho_prevblkno
;
95 _hash_relbuf(rel
, *bufp
);
96 *bufp
= InvalidBuffer
;
97 if (BlockNumberIsValid(blkno
))
99 *bufp
= _hash_getbuf(rel
, blkno
, HASH_READ
,
100 LH_BUCKET_PAGE
| LH_OVERFLOW_PAGE
);
101 *pagep
= BufferGetPage(*bufp
);
102 *opaquep
= (HashPageOpaque
) PageGetSpecialPointer(*pagep
);
107 * _hash_first() -- Find the first item in a scan.
109 * Find the first item in the index that
110 * satisfies the qualification associated with the scan descriptor. On
111 * success, the page containing the current index tuple is read locked
112 * and pinned, and the scan's opaque data entry is updated to
113 * include the buffer.
116 _hash_first(IndexScanDesc scan
, ScanDirection dir
)
118 Relation rel
= scan
->indexRelation
;
119 HashScanOpaque so
= (HashScanOpaque
) scan
->opaque
;
127 HashPageOpaque opaque
;
133 pgstat_count_index_scan(rel
);
135 current
= &(so
->hashso_curpos
);
136 ItemPointerSetInvalid(current
);
139 * We do not support hash scans with no index qualification, because we
140 * would have to read the whole index rather than just one bucket. That
141 * creates a whole raft of problems, since we haven't got a practical way
142 * to lock all the buckets against splits or compactions.
144 if (scan
->numberOfKeys
< 1)
146 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
147 errmsg("hash indexes do not support whole-index scans")));
149 /* There may be more than one index qual, but we hash only the first */
150 cur
= &scan
->keyData
[0];
152 /* We support only single-column hash indexes */
153 Assert(cur
->sk_attno
== 1);
154 /* And there's only one operator strategy, too */
155 Assert(cur
->sk_strategy
== HTEqualStrategyNumber
);
158 * If the constant in the index qual is NULL, assume it cannot match any
159 * items in the index.
161 if (cur
->sk_flags
& SK_ISNULL
)
165 * Okay to compute the hash key. We want to do this before acquiring any
166 * locks, in case a user-defined hash function happens to be slow.
168 * If scankey operator is not a cross-type comparison, we can use the
169 * cached hash function; otherwise gotta look it up in the catalogs.
171 * We support the convention that sk_subtype == InvalidOid means the
172 * opclass input type; this is a hack to simplify life for ScanKeyInit().
174 if (cur
->sk_subtype
== rel
->rd_opcintype
[0] ||
175 cur
->sk_subtype
== InvalidOid
)
176 hashkey
= _hash_datum2hashkey(rel
, cur
->sk_argument
);
178 hashkey
= _hash_datum2hashkey_type(rel
, cur
->sk_argument
,
181 so
->hashso_sk_hash
= hashkey
;
184 * Acquire shared split lock so we can compute the target bucket safely
187 _hash_getlock(rel
, 0, HASH_SHARE
);
189 /* Read the metapage */
190 metabuf
= _hash_getbuf(rel
, HASH_METAPAGE
, HASH_READ
, LH_META_PAGE
);
191 metap
= HashPageGetMeta(BufferGetPage(metabuf
));
194 * Compute the target bucket number, and convert to block number.
196 bucket
= _hash_hashkey2bucket(hashkey
,
197 metap
->hashm_maxbucket
,
198 metap
->hashm_highmask
,
199 metap
->hashm_lowmask
);
201 blkno
= BUCKET_TO_BLKNO(metap
, bucket
);
203 /* done with the metapage */
204 _hash_relbuf(rel
, metabuf
);
207 * Acquire share lock on target bucket; then we can release split lock.
209 _hash_getlock(rel
, blkno
, HASH_SHARE
);
211 _hash_droplock(rel
, 0, HASH_SHARE
);
213 /* Update scan opaque state to show we have lock on the bucket */
214 so
->hashso_bucket
= bucket
;
215 so
->hashso_bucket_valid
= true;
216 so
->hashso_bucket_blkno
= blkno
;
218 /* Fetch the primary bucket page for the bucket */
219 buf
= _hash_getbuf(rel
, blkno
, HASH_READ
, LH_BUCKET_PAGE
);
220 page
= BufferGetPage(buf
);
221 opaque
= (HashPageOpaque
) PageGetSpecialPointer(page
);
222 Assert(opaque
->hasho_bucket
== bucket
);
224 /* If a backwards scan is requested, move to the end of the chain */
225 if (ScanDirectionIsBackward(dir
))
227 while (BlockNumberIsValid(opaque
->hasho_nextblkno
))
228 _hash_readnext(rel
, &buf
, &page
, &opaque
);
231 /* Now find the first tuple satisfying the qualification */
232 if (!_hash_step(scan
, &buf
, dir
))
235 /* if we're here, _hash_step found a valid tuple */
236 offnum
= ItemPointerGetOffsetNumber(current
);
237 _hash_checkpage(rel
, buf
, LH_BUCKET_PAGE
| LH_OVERFLOW_PAGE
);
238 page
= BufferGetPage(buf
);
239 itup
= (IndexTuple
) PageGetItem(page
, PageGetItemId(page
, offnum
));
240 scan
->xs_ctup
.t_self
= itup
->t_tid
;
246 * _hash_step() -- step to the next valid item in a scan in the bucket.
248 * If no valid record exists in the requested direction, return
249 * false. Else, return true and set the hashso_curpos for the
250 * scan to the right thing.
252 * 'bufP' points to the current buffer, which is pinned and read-locked.
253 * On success exit, we have pin and read-lock on whichever page
254 * contains the right item; on failure, we have released all buffers.
257 _hash_step(IndexScanDesc scan
, Buffer
*bufP
, ScanDirection dir
)
259 Relation rel
= scan
->indexRelation
;
260 HashScanOpaque so
= (HashScanOpaque
) scan
->opaque
;
264 HashPageOpaque opaque
;
270 current
= &(so
->hashso_curpos
);
273 _hash_checkpage(rel
, buf
, LH_BUCKET_PAGE
| LH_OVERFLOW_PAGE
);
274 page
= BufferGetPage(buf
);
275 opaque
= (HashPageOpaque
) PageGetSpecialPointer(page
);
278 * If _hash_step is called from _hash_first, current will not be valid, so
279 * we can't dereference it. However, in that case, we presumably want to
280 * start at the beginning/end of the page...
282 maxoff
= PageGetMaxOffsetNumber(page
);
283 if (ItemPointerIsValid(current
))
284 offnum
= ItemPointerGetOffsetNumber(current
);
286 offnum
= InvalidOffsetNumber
;
289 * 'offnum' now points to the last tuple we examined (if any).
291 * continue to step through tuples until: 1) we get to the end of the
292 * bucket chain or 2) we find a valid tuple.
298 case ForwardScanDirection
:
299 if (offnum
!= InvalidOffsetNumber
)
300 offnum
= OffsetNumberNext(offnum
); /* move forward */
303 /* new page, locate starting position by binary search */
304 offnum
= _hash_binsearch(page
, so
->hashso_sk_hash
);
310 * check if we're still in the range of items with
311 * the target hash key
313 if (offnum
<= maxoff
)
315 Assert(offnum
>= FirstOffsetNumber
);
316 itup
= (IndexTuple
) PageGetItem(page
, PageGetItemId(page
, offnum
));
317 if (so
->hashso_sk_hash
== _hash_get_indextuple_hashkey(itup
))
318 break; /* yes, so exit for-loop */
322 * ran off the end of this page, try the next
324 _hash_readnext(rel
, &buf
, &page
, &opaque
);
325 if (BufferIsValid(buf
))
327 maxoff
= PageGetMaxOffsetNumber(page
);
328 offnum
= _hash_binsearch(page
, so
->hashso_sk_hash
);
334 break; /* exit for-loop */
339 case BackwardScanDirection
:
340 if (offnum
!= InvalidOffsetNumber
)
341 offnum
= OffsetNumberPrev(offnum
); /* move back */
344 /* new page, locate starting position by binary search */
345 offnum
= _hash_binsearch_last(page
, so
->hashso_sk_hash
);
351 * check if we're still in the range of items with
352 * the target hash key
354 if (offnum
>= FirstOffsetNumber
)
356 Assert(offnum
<= maxoff
);
357 itup
= (IndexTuple
) PageGetItem(page
, PageGetItemId(page
, offnum
));
358 if (so
->hashso_sk_hash
== _hash_get_indextuple_hashkey(itup
))
359 break; /* yes, so exit for-loop */
363 * ran off the end of this page, try the next
365 _hash_readprev(rel
, &buf
, &page
, &opaque
);
366 if (BufferIsValid(buf
))
368 maxoff
= PageGetMaxOffsetNumber(page
);
369 offnum
= _hash_binsearch_last(page
, so
->hashso_sk_hash
);
375 break; /* exit for-loop */
381 /* NoMovementScanDirection */
382 /* this should not be reached */
389 /* we ran off the end of the bucket without finding a match */
390 *bufP
= so
->hashso_curbuf
= InvalidBuffer
;
391 ItemPointerSetInvalid(current
);
395 /* check the tuple quals, loop around if not met */
396 } while (!_hash_checkqual(scan
, itup
));
398 /* if we made it to here, we've found a valid tuple */
399 blkno
= BufferGetBlockNumber(buf
);
400 *bufP
= so
->hashso_curbuf
= buf
;
401 ItemPointerSet(current
, blkno
, offnum
);