Change hash indexes to store only the hash code rather than the whole indexed
[PostgreSQL.git] / src / backend / access / hash / hashsearch.c
blob8b77d2cc5335958a59a864d8530de58ed6bcb7dc
1 /*-------------------------------------------------------------------------
3 * hashsearch.c
4 * search code for postgres hash tables
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
13 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include "access/hash.h"
18 #include "access/relscan.h"
19 #include "pgstat.h"
20 #include "storage/bufmgr.h"
21 #include "utils/rel.h"
25 * _hash_next() -- Get the next item in a scan.
27 * On entry, we have a valid hashso_curpos in the scan, and a
28 * pin and read lock on the page that contains that item.
29 * We find the next item in the scan, if any.
30 * On success exit, we have the page containing the next item
31 * pinned and locked.
33 bool
34 _hash_next(IndexScanDesc scan, ScanDirection dir)
36 Relation rel = scan->indexRelation;
37 HashScanOpaque so = (HashScanOpaque) scan->opaque;
38 Buffer buf;
39 Page page;
40 OffsetNumber offnum;
41 ItemPointer current;
42 IndexTuple itup;
44 /* we still have the buffer pinned and read-locked */
45 buf = so->hashso_curbuf;
46 Assert(BufferIsValid(buf));
49 * step to next valid tuple.
51 if (!_hash_step(scan, &buf, dir))
52 return false;
54 /* if we're here, _hash_step found a valid tuple */
55 current = &(so->hashso_curpos);
56 offnum = ItemPointerGetOffsetNumber(current);
57 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
58 page = BufferGetPage(buf);
59 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
60 scan->xs_ctup.t_self = itup->t_tid;
62 return true;
66 * Advance to next page in a bucket, if any.
68 static void
69 _hash_readnext(Relation rel,
70 Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
72 BlockNumber blkno;
74 blkno = (*opaquep)->hasho_nextblkno;
75 _hash_relbuf(rel, *bufp);
76 *bufp = InvalidBuffer;
77 if (BlockNumberIsValid(blkno))
79 *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
80 *pagep = BufferGetPage(*bufp);
81 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
86 * Advance to previous page in a bucket, if any.
88 static void
89 _hash_readprev(Relation rel,
90 Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
92 BlockNumber blkno;
94 blkno = (*opaquep)->hasho_prevblkno;
95 _hash_relbuf(rel, *bufp);
96 *bufp = InvalidBuffer;
97 if (BlockNumberIsValid(blkno))
99 *bufp = _hash_getbuf(rel, blkno, HASH_READ,
100 LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
101 *pagep = BufferGetPage(*bufp);
102 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
107 * _hash_first() -- Find the first item in a scan.
109 * Find the first item in the index that
110 * satisfies the qualification associated with the scan descriptor. On
111 * success, the page containing the current index tuple is read locked
112 * and pinned, and the scan's opaque data entry is updated to
113 * include the buffer.
115 bool
116 _hash_first(IndexScanDesc scan, ScanDirection dir)
118 Relation rel = scan->indexRelation;
119 HashScanOpaque so = (HashScanOpaque) scan->opaque;
120 ScanKey cur;
121 uint32 hashkey;
122 Bucket bucket;
123 BlockNumber blkno;
124 Buffer buf;
125 Buffer metabuf;
126 Page page;
127 HashPageOpaque opaque;
128 HashMetaPage metap;
129 IndexTuple itup;
130 ItemPointer current;
131 OffsetNumber offnum;
133 pgstat_count_index_scan(rel);
135 current = &(so->hashso_curpos);
136 ItemPointerSetInvalid(current);
139 * We do not support hash scans with no index qualification, because we
140 * would have to read the whole index rather than just one bucket. That
141 * creates a whole raft of problems, since we haven't got a practical way
142 * to lock all the buckets against splits or compactions.
144 if (scan->numberOfKeys < 1)
145 ereport(ERROR,
146 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
147 errmsg("hash indexes do not support whole-index scans")));
149 /* There may be more than one index qual, but we hash only the first */
150 cur = &scan->keyData[0];
152 /* We support only single-column hash indexes */
153 Assert(cur->sk_attno == 1);
154 /* And there's only one operator strategy, too */
155 Assert(cur->sk_strategy == HTEqualStrategyNumber);
158 * If the constant in the index qual is NULL, assume it cannot match any
159 * items in the index.
161 if (cur->sk_flags & SK_ISNULL)
162 return false;
165 * Okay to compute the hash key. We want to do this before acquiring any
166 * locks, in case a user-defined hash function happens to be slow.
168 * If scankey operator is not a cross-type comparison, we can use the
169 * cached hash function; otherwise gotta look it up in the catalogs.
171 * We support the convention that sk_subtype == InvalidOid means the
172 * opclass input type; this is a hack to simplify life for ScanKeyInit().
174 if (cur->sk_subtype == rel->rd_opcintype[0] ||
175 cur->sk_subtype == InvalidOid)
176 hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
177 else
178 hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
179 cur->sk_subtype);
181 so->hashso_sk_hash = hashkey;
184 * Acquire shared split lock so we can compute the target bucket safely
185 * (see README).
187 _hash_getlock(rel, 0, HASH_SHARE);
189 /* Read the metapage */
190 metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
191 metap = HashPageGetMeta(BufferGetPage(metabuf));
194 * Compute the target bucket number, and convert to block number.
196 bucket = _hash_hashkey2bucket(hashkey,
197 metap->hashm_maxbucket,
198 metap->hashm_highmask,
199 metap->hashm_lowmask);
201 blkno = BUCKET_TO_BLKNO(metap, bucket);
203 /* done with the metapage */
204 _hash_relbuf(rel, metabuf);
207 * Acquire share lock on target bucket; then we can release split lock.
209 _hash_getlock(rel, blkno, HASH_SHARE);
211 _hash_droplock(rel, 0, HASH_SHARE);
213 /* Update scan opaque state to show we have lock on the bucket */
214 so->hashso_bucket = bucket;
215 so->hashso_bucket_valid = true;
216 so->hashso_bucket_blkno = blkno;
218 /* Fetch the primary bucket page for the bucket */
219 buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
220 page = BufferGetPage(buf);
221 opaque = (HashPageOpaque) PageGetSpecialPointer(page);
222 Assert(opaque->hasho_bucket == bucket);
224 /* If a backwards scan is requested, move to the end of the chain */
225 if (ScanDirectionIsBackward(dir))
227 while (BlockNumberIsValid(opaque->hasho_nextblkno))
228 _hash_readnext(rel, &buf, &page, &opaque);
231 /* Now find the first tuple satisfying the qualification */
232 if (!_hash_step(scan, &buf, dir))
233 return false;
235 /* if we're here, _hash_step found a valid tuple */
236 offnum = ItemPointerGetOffsetNumber(current);
237 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
238 page = BufferGetPage(buf);
239 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
240 scan->xs_ctup.t_self = itup->t_tid;
242 return true;
246 * _hash_step() -- step to the next valid item in a scan in the bucket.
248 * If no valid record exists in the requested direction, return
249 * false. Else, return true and set the hashso_curpos for the
250 * scan to the right thing.
252 * 'bufP' points to the current buffer, which is pinned and read-locked.
253 * On success exit, we have pin and read-lock on whichever page
254 * contains the right item; on failure, we have released all buffers.
256 bool
257 _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
259 Relation rel = scan->indexRelation;
260 HashScanOpaque so = (HashScanOpaque) scan->opaque;
261 ItemPointer current;
262 Buffer buf;
263 Page page;
264 HashPageOpaque opaque;
265 OffsetNumber maxoff;
266 OffsetNumber offnum;
267 BlockNumber blkno;
268 IndexTuple itup;
270 current = &(so->hashso_curpos);
272 buf = *bufP;
273 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
274 page = BufferGetPage(buf);
275 opaque = (HashPageOpaque) PageGetSpecialPointer(page);
278 * If _hash_step is called from _hash_first, current will not be valid, so
279 * we can't dereference it. However, in that case, we presumably want to
280 * start at the beginning/end of the page...
282 maxoff = PageGetMaxOffsetNumber(page);
283 if (ItemPointerIsValid(current))
284 offnum = ItemPointerGetOffsetNumber(current);
285 else
286 offnum = InvalidOffsetNumber;
289 * 'offnum' now points to the last tuple we examined (if any).
291 * continue to step through tuples until: 1) we get to the end of the
292 * bucket chain or 2) we find a valid tuple.
296 switch (dir)
298 case ForwardScanDirection:
299 if (offnum != InvalidOffsetNumber)
300 offnum = OffsetNumberNext(offnum); /* move forward */
301 else
303 /* new page, locate starting position by binary search */
304 offnum = _hash_binsearch(page, so->hashso_sk_hash);
307 for (;;)
310 * check if we're still in the range of items with
311 * the target hash key
313 if (offnum <= maxoff)
315 Assert(offnum >= FirstOffsetNumber);
316 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
317 if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
318 break; /* yes, so exit for-loop */
322 * ran off the end of this page, try the next
324 _hash_readnext(rel, &buf, &page, &opaque);
325 if (BufferIsValid(buf))
327 maxoff = PageGetMaxOffsetNumber(page);
328 offnum = _hash_binsearch(page, so->hashso_sk_hash);
330 else
332 /* end of bucket */
333 itup = NULL;
334 break; /* exit for-loop */
337 break;
339 case BackwardScanDirection:
340 if (offnum != InvalidOffsetNumber)
341 offnum = OffsetNumberPrev(offnum); /* move back */
342 else
344 /* new page, locate starting position by binary search */
345 offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
348 for (;;)
351 * check if we're still in the range of items with
352 * the target hash key
354 if (offnum >= FirstOffsetNumber)
356 Assert(offnum <= maxoff);
357 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
358 if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
359 break; /* yes, so exit for-loop */
363 * ran off the end of this page, try the next
365 _hash_readprev(rel, &buf, &page, &opaque);
366 if (BufferIsValid(buf))
368 maxoff = PageGetMaxOffsetNumber(page);
369 offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
371 else
373 /* end of bucket */
374 itup = NULL;
375 break; /* exit for-loop */
378 break;
380 default:
381 /* NoMovementScanDirection */
382 /* this should not be reached */
383 itup = NULL;
384 break;
387 if (itup == NULL)
389 /* we ran off the end of the bucket without finding a match */
390 *bufP = so->hashso_curbuf = InvalidBuffer;
391 ItemPointerSetInvalid(current);
392 return false;
395 /* check the tuple quals, loop around if not met */
396 } while (!_hash_checkqual(scan, itup));
398 /* if we made it to here, we've found a valid tuple */
399 blkno = BufferGetBlockNumber(buf);
400 *bufP = so->hashso_curbuf = buf;
401 ItemPointerSet(current, blkno, offnum);
402 return true;