Update.
[glibc.git] / db2 / hash / hash.c
blob5e0660b727b5398e56fbb6428d0a1ec5ea2f35d9
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
16 * Margo Seltzer.
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
47 #include "config.h"
49 #ifndef lint
50 static const char sccsid[] = "@(#)hash.c 10.45 (Sleepycat) 5/11/98";
51 #endif /* not lint */
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
56 #include <errno.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #endif
61 #include "db_int.h"
62 #include "shqueue.h"
63 #include "db_page.h"
64 #include "db_am.h"
65 #include "db_ext.h"
66 #include "hash.h"
67 #include "log.h"
69 static int __ham_c_close __P((DBC *));
70 static int __ham_c_del __P((DBC *, u_int32_t));
71 static int __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
72 static int __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
73 static int __ham_c_init __P((DB *, DB_TXN *, DBC **));
74 static int __ham_cursor __P((DB *, DB_TXN *, DBC **));
75 static int __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
76 static int __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
77 static int __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
78 static void __ham_init_htab __P((HTAB *, u_int32_t, u_int32_t));
79 static int __ham_lookup __P((HTAB *,
80 HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
81 static int __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
82 static int __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
83 static int __ham_sync __P((DB *, u_int32_t));
85 /************************** INTERFACE ROUTINES ***************************/
86 /* OPEN/CLOSE */
89 * __ham_open --
91 * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
93 int
94 __ham_open(dbp, dbinfo)
95 DB *dbp;
96 DB_INFO *dbinfo;
98 DB_ENV *dbenv;
99 DBC *curs;
100 HTAB *hashp;
101 int file_existed, ret;
103 dbenv = dbp->dbenv;
105 if ((hashp = (HTAB *)__db_calloc(1, sizeof(HTAB))) == NULL)
106 return (ENOMEM);
107 hashp->dbp = dbp;
109 /* Set the hash function if specified by the user. */
110 if (dbinfo != NULL && dbinfo->h_hash != NULL)
111 hashp->hash = dbinfo->h_hash;
114 * Initialize the remaining fields of the dbp. The type, close and
115 * fd functions are all set in db_open.
117 dbp->internal = hashp;
118 dbp->cursor = __ham_cursor;
119 dbp->del = __ham_delete;
120 dbp->get = __ham_get;
121 dbp->put = __ham_put;
122 dbp->sync = __ham_sync;
124 /* If locking is turned on, lock the meta data page. */
125 if (F_ISSET(dbp, DB_AM_LOCKING)) {
126 dbp->lock.pgno = BUCKET_INVALID;
127 if ((ret = lock_get(dbenv->lk_info, dbp->locker,
128 0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) {
129 if (ret < 0)
130 ret = EAGAIN;
131 goto out;
136 * Now, we can try to read the meta-data page and figure out
137 * if we set up locking and get the meta-data page properly.
138 * If this is a new file, initialize it, and put it back dirty.
140 if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0)
141 goto out;
143 /* Initialize the hashp structure */
144 if (hashp->hdr->magic == DB_HASHMAGIC) {
145 file_existed = 1;
146 /* File exists, verify the data in the header. */
147 if (hashp->hash == NULL)
148 hashp->hash =
149 hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
150 if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
151 hashp->hdr->h_charkey) {
152 __db_err(hashp->dbp->dbenv,
153 "hash: incompatible hash function");
154 ret = EINVAL;
155 goto out;
157 if (F_ISSET(hashp->hdr, DB_HASH_DUP))
158 F_SET(dbp, DB_AM_DUP);
159 } else {
161 * File does not exist, we must initialize the header. If
162 * locking is enabled that means getting a write lock first.
164 file_existed = 0;
165 if (F_ISSET(dbp, DB_AM_LOCKING) &&
166 ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 ||
167 (ret = lock_get(dbenv->lk_info, dbp->locker, 0,
168 &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) {
169 if (ret < 0)
170 ret = EAGAIN;
171 goto out;
174 __ham_init_htab(hashp,
175 dbinfo != NULL ? dbinfo->h_nelem : 0,
176 dbinfo != NULL ? dbinfo->h_ffactor : 0);
177 if (F_ISSET(dbp, DB_AM_DUP))
178 F_SET(hashp->hdr, DB_HASH_DUP);
179 if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
180 goto out;
183 /* Initialize the default cursor. */
184 __ham_c_init(dbp, NULL, &curs);
185 TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
187 /* Allocate memory for our split buffer. */
188 if ((hashp->split_buf = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) {
189 ret = ENOMEM;
190 goto out;
193 #ifdef NO_STATISTICS_FOR_DB_ERR
194 __db_err(dbp->dbenv,
195 "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
196 "TABLE POINTER ", (long)hashp,
197 "BUCKET SIZE ", (long)hashp->hdr->pagesize,
198 "FILL FACTOR ", (long)hashp->hdr->ffactor,
199 "MAX BUCKET ", (long)hashp->hdr->max_bucket,
200 "OVFL POINT ", (long)hashp->hdr->ovfl_point,
201 "LAST FREED ", (long)hashp->hdr->last_freed,
202 "HIGH MASK ", (long)hashp->hdr->high_mask,
203 "LOW MASK ", (long)hashp->hdr->low_mask,
204 "NELEM ", (long)hashp->hdr->nelem,
205 "FLAGS ", (long)hashp->hdr->flags);
206 #endif
208 /* Release the meta data page */
209 (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0);
210 if (F_ISSET(dbp, DB_AM_LOCKING) &&
211 (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) {
212 if (ret < 0)
213 ret = EAGAIN;
214 goto out;
217 hashp->hlock = 0;
218 hashp->hdr = NULL;
219 /* Sync the file so that we know that the meta data goes to disk. */
220 if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
221 goto out;
222 return (0);
224 out: (void)__ham_close(dbp);
225 return (ret);
229 * PUBLIC: int __ham_close __P((DB *));
232 __ham_close(dbp)
233 DB *dbp;
235 HTAB *hashp;
236 int ret, t_ret;
238 DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0);
239 hashp = (HTAB *)dbp->internal;
240 ret = 0;
242 /* Free the split page. */
243 if (hashp->split_buf)
244 FREE(hashp->split_buf, dbp->pgsize);
246 if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp,
247 (PAGE *)hashp->hdr, 0)) != 0 && ret == 0)
248 ret = t_ret;
249 if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info,
250 hashp->hlock)) != 0 && ret == 0)
251 ret = t_ret;
253 FREE(hashp, sizeof(HTAB));
254 dbp->internal = NULL;
255 return (ret);
258 /************************** LOCAL CREATION ROUTINES **********************/
260 * Returns 0 on No Error
262 static void
263 __ham_init_htab(hashp, nelem, ffactor)
264 HTAB *hashp;
265 u_int32_t nelem, ffactor;
267 int32_t l2, nbuckets;
269 memset(hashp->hdr, 0, sizeof(HASHHDR));
270 hashp->hdr->ffactor = ffactor;
271 hashp->hdr->pagesize = hashp->dbp->pgsize;
272 ZERO_LSN(hashp->hdr->lsn);
273 hashp->hdr->magic = DB_HASHMAGIC;
274 hashp->hdr->version = DB_HASHVERSION;
275 if (hashp->hash == NULL)
276 hashp->hash =
277 hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
278 hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
279 if (nelem != 0 && hashp->hdr->ffactor != 0) {
280 nelem = (nelem - 1) / hashp->hdr->ffactor + 1;
281 l2 = __db_log2(nelem > 2 ? nelem : 2);
282 } else
283 l2 = 2;
285 nbuckets = 1 << l2;
287 hashp->hdr->ovfl_point = l2;
288 hashp->hdr->last_freed = PGNO_INVALID;
290 hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1;
291 hashp->hdr->low_mask = (nbuckets >> 1) - 1;
292 memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN);
295 /********************** DESTROY/CLOSE ROUTINES ************************/
299 * Write modified pages to disk
301 * Returns:
302 * 0 == OK
303 * -1 ERROR
305 static int
306 __ham_sync(dbp, flags)
307 DB *dbp;
308 u_int32_t flags;
310 int ret;
312 DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags);
313 if ((ret = __db_syncchk(dbp, flags)) != 0)
314 return (ret);
315 if (F_ISSET(dbp, DB_AM_RDONLY))
316 return (0);
318 if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
319 ret = 0;
321 return (ret);
324 /*******************************SEARCH ROUTINES *****************************/
326 * All the access routines return
328 * Returns:
329 * 0 on SUCCESS
330 * 1 to indicate an external ERROR (i.e. key not found, etc)
331 * -1 to indicate an internal ERROR (i.e. out of memory, etc)
334 static int
335 __ham_get(dbp, txn, key, data, flags)
336 DB *dbp;
337 DB_TXN *txn;
338 DBT *key;
339 DBT *data;
340 u_int32_t flags;
342 DB *ldbp;
343 HTAB *hashp;
344 HASH_CURSOR *hcp;
345 int ret, t_ret;
347 DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags);
348 if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
349 return (ret);
351 ldbp = dbp;
352 if (F_ISSET(dbp, DB_AM_THREAD) &&
353 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
354 return (ret);
356 hashp = (HTAB *)ldbp->internal;
357 SET_LOCKER(ldbp, txn);
358 GET_META(ldbp, hashp);
360 hashp->hash_accesses++;
361 hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
362 if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0)
363 if (F_ISSET(hcp, H_OK))
364 ret = __ham_dup_return(hashp, hcp, data, DB_FIRST);
365 else /* Key was not found */
366 ret = DB_NOTFOUND;
368 if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
369 ret = t_ret;
370 RELEASE_META(ldbp, hashp);
371 if (F_ISSET(dbp, DB_AM_THREAD))
372 __db_puthandle(ldbp);
373 return (ret);
376 static int
377 __ham_put(dbp, txn, key, data, flags)
378 DB *dbp;
379 DB_TXN *txn;
380 DBT *key;
381 DBT *data;
382 u_int32_t flags;
384 DB *ldbp;
385 DBT tmp_val, *myval;
386 HASH_CURSOR *hcp;
387 HTAB *hashp;
388 u_int32_t nbytes;
389 int ret, t_ret;
391 DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
392 if ((ret = __db_putchk(dbp, key, data,
393 flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
394 return (ret);
396 ldbp = dbp;
397 if (F_ISSET(dbp, DB_AM_THREAD) &&
398 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
399 return (ret);
401 hashp = (HTAB *)ldbp->internal;
402 SET_LOCKER(ldbp, txn);
403 GET_META(ldbp, hashp);
404 hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
406 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
407 HKEYDATA_PSIZE(key->size)) +
408 (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
409 HKEYDATA_PSIZE(data->size));
411 hashp->hash_accesses++;
412 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
414 if (ret == DB_NOTFOUND) {
415 ret = 0;
416 if (hcp->seek_found_page != PGNO_INVALID &&
417 hcp->seek_found_page != hcp->pgno) {
418 if ((ret = __ham_item_done(hashp, hcp, 0)) != 0)
419 goto out;
420 hcp->pgno = hcp->seek_found_page;
421 hcp->bndx = NDX_INVALID;
424 if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
426 * Doing a partial put, but the key does not exist
427 * and we are not beginning the write at 0. We
428 * must create a data item padded up to doff and
429 * then write the new bytes represented by val.
431 ret = __ham_init_dbt(&tmp_val, data->size + data->doff,
432 &hcp->big_data, &hcp->big_datalen);
433 if (ret == 0) {
434 memset(tmp_val.data, 0, data->doff);
435 memcpy((u_int8_t *)tmp_val.data + data->doff,
436 data->data, data->size);
437 myval = &tmp_val;
439 } else
440 myval = (DBT *)data;
442 if (ret == 0)
443 ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA);
444 } else if (ret == 0 && F_ISSET(hcp, H_OK)) {
445 if (flags == DB_NOOVERWRITE)
446 ret = DB_KEYEXIST;
447 else if (F_ISSET(ldbp, DB_AM_DUP))
448 ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST);
449 else
450 ret = __ham_overwrite(hashp, hcp, data);
453 /* Free up all the cursor pages. */
454 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
455 ret = t_ret;
456 /* Now check if we have to grow. */
457 out: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
458 ret = __ham_expand_table(hashp);
459 F_CLR(hcp, H_EXPAND);
462 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
463 ret = t_ret;
464 RELEASE_META(ldbp, hashp);
465 if (F_ISSET(dbp, DB_AM_THREAD))
466 __db_puthandle(ldbp);
467 return (ret);
470 static int
471 __ham_cursor(dbp, txnid, dbcp)
472 DB *dbp;
473 DB_TXN *txnid;
474 DBC **dbcp;
476 int ret;
478 DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0);
479 if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0)
480 return (ret);
482 DB_THREAD_LOCK(dbp);
483 TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links);
484 DB_THREAD_UNLOCK(dbp);
485 return (ret);
488 static int
489 __ham_c_init(dbp, txnid, dbcp)
490 DB *dbp;
491 DB_TXN *txnid;
492 DBC **dbcp;
494 DBC *db_curs;
495 HASH_CURSOR *new_curs;
497 if ((db_curs = (DBC *)__db_calloc(sizeof(DBC), 1)) == NULL)
498 return (ENOMEM);
500 if ((new_curs =
501 (HASH_CURSOR *)__db_calloc(sizeof(struct cursor_t), 1)) == NULL) {
502 FREE(db_curs, sizeof(DBC));
503 return (ENOMEM);
506 db_curs->internal = new_curs;
507 db_curs->c_close = __ham_c_close;
508 db_curs->c_del = __ham_c_del;
509 db_curs->c_get = __ham_c_get;
510 db_curs->c_put = __ham_c_put;
511 db_curs->txn = txnid;
512 db_curs->dbp = dbp;
514 new_curs->db_cursor = db_curs;
515 __ham_item_init(new_curs);
517 if (dbcp != NULL)
518 *dbcp = db_curs;
519 return (0);
522 static int
523 __ham_delete(dbp, txn, key, flags)
524 DB *dbp;
525 DB_TXN *txn;
526 DBT *key;
527 u_int32_t flags;
529 DB *ldbp;
530 HTAB *hashp;
531 HASH_CURSOR *hcp;
532 int ret, t_ret;
534 DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
535 if ((ret =
536 __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
537 return (ret);
539 ldbp = dbp;
540 if (F_ISSET(dbp, DB_AM_THREAD) &&
541 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
542 return (ret);
543 hashp = (HTAB *)ldbp->internal;
544 SET_LOCKER(ldbp, txn);
545 GET_META(ldbp, hashp);
546 hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
548 hashp->hash_accesses++;
549 if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0)
550 if (F_ISSET(hcp, H_OK))
551 ret = __ham_del_pair(hashp, hcp, 1);
552 else
553 ret = DB_NOTFOUND;
555 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
556 ret = t_ret;
557 RELEASE_META(ldbp, hashp);
558 if (F_ISSET(dbp, DB_AM_THREAD))
559 __db_puthandle(ldbp);
560 return (ret);
563 /* ****************** CURSORS ********************************** */
564 static int
565 __ham_c_close(cursor)
566 DBC *cursor;
568 DB *ldbp;
569 int ret;
571 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0);
573 * If the pagep, dpagep, and lock fields of the cursor are all NULL,
574 * then there really isn't a need to get a handle here. However,
575 * the normal case is that at least one of those fields is non-NULL,
576 * and putting those checks in here would couple the ham_item_done
577 * functionality with cursor close which would be pretty disgusting.
578 * Instead, we pay the overhead here of always getting the handle.
580 ldbp = cursor->dbp;
581 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
582 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
583 return (ret);
585 ret = __ham_c_iclose(ldbp, cursor);
587 if (F_ISSET(ldbp, DB_AM_THREAD))
588 __db_puthandle(ldbp);
589 return (ret);
592 * __ham_c_iclose --
594 * Internal cursor close routine; assumes it is being passed the correct
595 * handle, rather than getting and putting a handle.
597 * PUBLIC: int __ham_c_iclose __P((DB *, DBC *));
600 __ham_c_iclose(dbp, dbc)
601 DB *dbp;
602 DBC *dbc;
604 HASH_CURSOR *hcp;
605 HTAB *hashp;
606 int ret;
608 hashp = (HTAB *)dbp->internal;
609 hcp = (HASH_CURSOR *)dbc->internal;
610 ret = __ham_item_done(hashp, hcp, 0);
612 if (hcp->big_key)
613 FREE(hcp->big_key, hcp->big_keylen);
614 if (hcp->big_data)
615 FREE(hcp->big_data, hcp->big_datalen);
618 * All cursors (except the default ones) are linked off the master.
619 * Therefore, when we close the cursor, we have to remove it from
620 * the master, not the local one.
621 * XXX I am always removing from the master; what about local cursors?
623 DB_THREAD_LOCK(dbc->dbp);
624 TAILQ_REMOVE(&dbc->dbp->curs_queue, dbc, links);
625 DB_THREAD_UNLOCK(dbc->dbp);
627 FREE(hcp, sizeof(HASH_CURSOR));
628 FREE(dbc, sizeof(DBC));
630 return (ret);
633 static int
634 __ham_c_del(cursor, flags)
635 DBC *cursor;
636 u_int32_t flags;
638 DB *ldbp;
639 HASH_CURSOR *hcp;
640 HASH_CURSOR save_curs;
641 HTAB *hashp;
642 db_pgno_t ppgno, chg_pgno;
643 int ret, t_ret;
645 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags);
646 ldbp = cursor->dbp;
647 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
648 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
649 return (ret);
650 hashp = (HTAB *)ldbp->internal;
651 hcp = (HASH_CURSOR *)cursor->internal;
652 save_curs = *hcp;
653 if ((ret = __db_cdelchk(ldbp, flags,
654 F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
655 return (ret);
656 if (F_ISSET(hcp, H_DELETED))
657 return (DB_NOTFOUND);
659 SET_LOCKER(hashp->dbp, cursor->txn);
660 GET_META(hashp->dbp, hashp);
661 hashp->hash_accesses++;
662 if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
663 goto out;
664 if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
666 * We are about to remove a duplicate from offpage.
668 * There are 4 cases.
669 * 1. We will remove an item on a page, but there are more
670 * items on that page.
671 * 2. We will remove the last item on a page, but there is a
672 * following page of duplicates.
673 * 3. We will remove the last item on a page, this page was the
674 * last page in a duplicate set, but there were dups before
675 * it.
676 * 4. We will remove the last item on a page, removing the last
677 * duplicate.
678 * In case 1 hcp->dpagep is unchanged.
679 * In case 2 hcp->dpagep comes back pointing to the next dup
680 * page.
681 * In case 3 hcp->dpagep comes back NULL.
682 * In case 4 hcp->dpagep comes back NULL.
684 * Case 4 results in deleting the pair off the master page.
685 * The normal code for doing this knows how to delete the
686 * duplicates, so we will handle this case in the normal code.
688 ppgno = PREV_PGNO(hcp->dpagep);
689 if (ppgno == PGNO_INVALID &&
690 NEXT_PGNO(hcp->dpagep) == PGNO_INVALID &&
691 NUM_ENT(hcp->dpagep) == 1)
692 goto normal;
694 /* Remove item from duplicate page. */
695 chg_pgno = hcp->dpgno;
696 if ((ret = __db_drem(hashp->dbp,
697 &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
698 goto out;
700 if (hcp->dpagep == NULL) {
701 if (ppgno != PGNO_INVALID) { /* Case 3 */
702 hcp->dpgno = ppgno;
703 if ((ret = __ham_get_cpage(hashp, hcp,
704 DB_LOCK_READ)) != 0)
705 goto out;
706 hcp->dndx = NUM_ENT(hcp->dpagep);
707 F_SET(hcp, H_DELETED);
708 } else { /* Case 4 */
709 ret = __ham_del_pair(hashp, hcp, 1);
710 hcp->dpgno = PGNO_INVALID;
712 * Delpair updated the cursor queue, so we
713 * don't have to do that here.
715 chg_pgno = PGNO_INVALID;
717 } else if (PGNO(hcp->dpagep) != hcp->dpgno) {
718 hcp->dndx = 0; /* Case 2 */
719 hcp->dpgno = PGNO(hcp->dpagep);
720 if (ppgno == PGNO_INVALID)
721 memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep,
722 H_DATAINDEX(hcp->bndx))),
723 &hcp->dpgno, sizeof(db_pgno_t));
724 F_SET(hcp, H_DELETED);
725 } else /* Case 1 */
726 F_SET(hcp, H_DELETED);
727 if (chg_pgno != PGNO_INVALID)
728 __ham_c_update(hcp, chg_pgno, 0, 0, 1);
729 } else if (F_ISSET(hcp, H_ISDUP)) { /* on page */
730 if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
731 LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
732 ret = __ham_del_pair(hashp, hcp, 1);
733 else {
734 DBT repldbt;
736 repldbt.flags = 0;
737 F_SET(&repldbt, DB_DBT_PARTIAL);
738 repldbt.doff = hcp->dup_off;
739 repldbt.dlen = DUP_SIZE(hcp->dup_len);
740 repldbt.size = 0;
741 ret = __ham_replpair(hashp, hcp, &repldbt, 0);
742 hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
743 F_SET(hcp, H_DELETED);
744 __ham_c_update(hcp, hcp->pgno,
745 DUP_SIZE(hcp->dup_len), 0, 1);
748 } else
749 /* Not a duplicate */
750 normal: ret = __ham_del_pair(hashp, hcp, 1);
752 out: if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
753 ret = t_ret;
754 if (ret != 0)
755 *hcp = save_curs;
756 RELEASE_META(hashp->dbp, hashp);
757 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
758 __db_puthandle(ldbp);
759 return (ret);
762 static int
763 __ham_c_get(cursor, key, data, flags)
764 DBC *cursor;
765 DBT *key;
766 DBT *data;
767 u_int32_t flags;
769 DB *ldbp;
770 HTAB *hashp;
771 HASH_CURSOR *hcp, save_curs;
772 int get_key, ret, t_ret;
774 DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get",
775 flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
776 NULL, flags);
777 ldbp = cursor->dbp;
778 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
779 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
780 return (ret);
781 hashp = (HTAB *)(ldbp->internal);
782 hcp = (HASH_CURSOR *)cursor->internal;
783 save_curs = *hcp;
784 if ((ret =
785 __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0)
786 return (ret);
788 SET_LOCKER(hashp->dbp, cursor->txn);
789 GET_META(hashp->dbp, hashp);
790 hashp->hash_accesses++;
792 hcp->seek_size = 0;
794 ret = 0;
795 get_key = 1;
796 switch (flags) {
797 case DB_PREV:
798 if (hcp->bucket != BUCKET_INVALID) {
799 ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
800 break;
802 /* FALLTHROUGH */
803 case DB_LAST:
804 ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
805 break;
806 case DB_FIRST:
807 ret = __ham_item_first(hashp, hcp, DB_LOCK_READ);
808 break;
809 case DB_NEXT:
810 if (hcp->bucket == BUCKET_INVALID)
811 hcp->bucket = 0;
812 ret = __ham_item_next(hashp, hcp, DB_LOCK_READ);
813 break;
814 case DB_SET:
815 case DB_SET_RANGE:
816 ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ);
817 get_key = 0;
818 break;
819 case DB_CURRENT:
820 if (F_ISSET(hcp, H_DELETED)) {
821 ret = DB_KEYEMPTY;
822 goto out;
825 ret = __ham_item(hashp, hcp, DB_LOCK_READ);
826 break;
830 * Must always enter this loop to do error handling and
831 * check for big key/data pair.
833 while (1) {
834 if (ret != 0 && ret != DB_NOTFOUND)
835 goto out1;
836 else if (F_ISSET(hcp, H_OK)) {
837 /* Get the key. */
838 if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep,
839 H_KEYINDEX(hcp->bndx), key, &hcp->big_key,
840 &hcp->big_keylen)) != 0)
841 goto out1;
843 ret = __ham_dup_return(hashp, hcp, data, flags);
844 break;
845 } else if (!F_ISSET(hcp, H_NOMORE)) {
846 abort();
847 break;
851 * Ran out of entries in a bucket; change buckets.
853 switch (flags) {
854 case DB_LAST:
855 case DB_PREV:
856 ret = __ham_item_done(hashp, hcp, 0);
857 if (hcp->bucket == 0) {
858 ret = DB_NOTFOUND;
859 goto out1;
861 hcp->bucket--;
862 hcp->bndx = NDX_INVALID;
863 if (ret == 0)
864 ret = __ham_item_prev(hashp,
865 hcp, DB_LOCK_READ);
866 break;
867 case DB_FIRST:
868 case DB_NEXT:
869 ret = __ham_item_done(hashp, hcp, 0);
870 hcp->bndx = NDX_INVALID;
871 hcp->bucket++;
872 hcp->pgno = PGNO_INVALID;
873 hcp->pagep = NULL;
874 if (hcp->bucket > hashp->hdr->max_bucket) {
875 ret = DB_NOTFOUND;
876 goto out1;
878 if (ret == 0)
879 ret = __ham_item_next(hashp,
880 hcp, DB_LOCK_READ);
881 break;
882 case DB_SET:
883 case DB_SET_RANGE:
884 /* Key not found. */
885 ret = DB_NOTFOUND;
886 goto out1;
889 out1: if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
890 ret = t_ret;
891 out: if (ret)
892 *hcp = save_curs;
893 RELEASE_META(hashp->dbp, hashp);
894 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
895 __db_puthandle(ldbp);
896 return (ret);
899 static int
900 __ham_c_put(cursor, key, data, flags)
901 DBC *cursor;
902 DBT *key;
903 DBT *data;
904 u_int32_t flags;
906 DB *ldbp;
907 HASH_CURSOR *hcp, save_curs;
908 HTAB *hashp;
909 u_int32_t nbytes;
910 int ret, t_ret;
912 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
913 flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
914 data, flags);
915 ldbp = cursor->dbp;
916 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
917 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
918 return (ret);
919 hashp = (HTAB *)(ldbp->internal);
920 hcp = (HASH_CURSOR *)cursor->internal;
921 save_curs = *hcp;
923 if ((ret = __db_cputchk(hashp->dbp, key, data, flags,
924 F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
925 return (ret);
926 if (F_ISSET(hcp, H_DELETED))
927 return (DB_NOTFOUND);
929 SET_LOCKER(hashp->dbp, cursor->txn);
930 GET_META(hashp->dbp, hashp);
931 ret = 0;
933 switch (flags) {
934 case DB_KEYLAST:
935 case DB_KEYFIRST:
936 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
937 HKEYDATA_PSIZE(key->size)) +
938 (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
939 HKEYDATA_PSIZE(data->size));
940 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
941 break;
942 case DB_BEFORE:
943 case DB_AFTER:
944 case DB_CURRENT:
945 ret = __ham_item(hashp, hcp, DB_LOCK_WRITE);
946 break;
949 if (ret == 0) {
950 if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP))
951 ret = __ham_overwrite(hashp, hcp, data);
952 else
953 ret = __ham_add_dup(hashp, hcp, data, flags);
956 if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
957 ret = __ham_expand_table(hashp);
958 F_CLR(hcp, H_EXPAND);
961 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
962 ret = t_ret;
963 if (ret != 0)
964 *hcp = save_curs;
965 RELEASE_META(hashp->dbp, hashp);
966 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
967 __db_puthandle(ldbp);
968 return (ret);
971 /********************************* UTILITIES ************************/
974 * __ham_expand_table --
976 * PUBLIC: int __ham_expand_table __P((HTAB *));
979 __ham_expand_table(hashp)
980 HTAB *hashp;
982 DB_LSN new_lsn;
983 u_int32_t old_bucket, new_bucket, spare_ndx;
984 int ret;
986 ret = 0;
987 DIRTY_META(hashp, ret);
988 if (ret)
989 return (ret);
992 * If the split point is about to increase, make sure that we
993 * have enough extra pages. The calculation here is weird.
994 * We'd like to do this after we've upped max_bucket, but it's
995 * too late then because we've logged the meta-data split. What
996 * we'll do between then and now is increment max bucket and then
997 * see what the log of one greater than that is; here we have to
998 * look at the log of max + 2. VERY NASTY STUFF.
1000 if (__db_log2(hashp->hdr->max_bucket + 2) > hashp->hdr->ovfl_point) {
1002 * We are about to shift the split point. Make sure that
1003 * if the next doubling is going to be big (more than 8
1004 * pages), we have some extra pages around.
1006 if (hashp->hdr->max_bucket + 1 >= 8 &&
1007 hashp->hdr->spares[hashp->hdr->ovfl_point] <
1008 hashp->hdr->spares[hashp->hdr->ovfl_point - 1] +
1009 hashp->hdr->ovfl_point + 1)
1010 __ham_init_ovflpages(hashp);
1013 /* Now we can log the meta-data split. */
1014 if (DB_LOGGING(hashp->dbp)) {
1015 if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
1016 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1017 hashp->dbp->log_fileid,
1018 hashp->hdr->max_bucket, hashp->hdr->ovfl_point,
1019 hashp->hdr->spares[hashp->hdr->ovfl_point],
1020 &hashp->hdr->lsn)) != 0)
1021 return (ret);
1023 hashp->hdr->lsn = new_lsn;
1026 hashp->hash_expansions++;
1027 new_bucket = ++hashp->hdr->max_bucket;
1028 old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
1031 * If the split point is increasing, copy the current contents
1032 * of the spare split bucket to the next bucket.
1034 spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
1035 if (spare_ndx > hashp->hdr->ovfl_point) {
1036 hashp->hdr->spares[spare_ndx] =
1037 hashp->hdr->spares[hashp->hdr->ovfl_point];
1038 hashp->hdr->ovfl_point = spare_ndx;
1041 if (new_bucket > hashp->hdr->high_mask) {
1042 /* Starting a new doubling */
1043 hashp->hdr->low_mask = hashp->hdr->high_mask;
1044 hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask;
1047 if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) {
1048 __db_err(hashp->dbp->dbenv,
1049 "hash: Cannot allocate new bucket. Pages exhausted.");
1050 return (ENOSPC);
1053 /* Relocate records to the new bucket */
1054 return (__ham_split_page(hashp, old_bucket, new_bucket));
1058 * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1060 u_int32_t
1061 __ham_call_hash(hashp, k, len)
1062 HTAB *hashp;
1063 u_int8_t *k;
1064 int32_t len;
1066 u_int32_t n, bucket;
1068 n = (u_int32_t)hashp->hash(k, len);
1069 bucket = n & hashp->hdr->high_mask;
1070 if (bucket > hashp->hdr->max_bucket)
1071 bucket = bucket & hashp->hdr->low_mask;
1072 return (bucket);
1076 * Check for duplicates, and call __db_ret appropriately. Release
1077 * everything held by the cursor.
1079 static int
1080 __ham_dup_return(hashp, hcp, val, flags)
1081 HTAB *hashp;
1082 HASH_CURSOR *hcp;
1083 DBT *val;
1084 u_int32_t flags;
1086 PAGE *pp;
1087 DBT *myval, tmp_val;
1088 db_indx_t ndx;
1089 db_pgno_t pgno;
1090 u_int8_t *hk, type;
1091 int ret;
1092 db_indx_t len;
1094 /* Check for duplicate and return the first one. */
1095 ndx = H_DATAINDEX(hcp->bndx);
1096 type = HPAGE_TYPE(hcp->pagep, ndx);
1097 pp = hcp->pagep;
1098 myval = val;
1101 * There are 3 cases:
1102 * 1. We are not in duplicate, simply call db_ret.
1103 * 2. We are looking at keys and stumbled onto a duplicate.
1104 * 3. We are in the middle of a duplicate set. (ISDUP set)
1108 * Here we check for the case where we just stumbled onto a
1109 * duplicate. In this case, we do initialization and then
1110 * let the normal duplicate code handle it.
1112 if (!F_ISSET(hcp, H_ISDUP))
1113 if (type == H_DUPLICATE) {
1114 F_SET(hcp, H_ISDUP);
1115 hcp->dup_tlen = LEN_HDATA(hcp->pagep,
1116 hashp->hdr->pagesize, hcp->bndx);
1117 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1118 if (flags == DB_LAST || flags == DB_PREV) {
1119 hcp->dndx = 0;
1120 hcp->dup_off = 0;
1121 do {
1122 memcpy(&len,
1123 HKEYDATA_DATA(hk) + hcp->dup_off,
1124 sizeof(db_indx_t));
1125 hcp->dup_off += DUP_SIZE(len);
1126 hcp->dndx++;
1127 } while (hcp->dup_off < hcp->dup_tlen);
1128 hcp->dup_off -= DUP_SIZE(len);
1129 hcp->dndx--;
1130 } else {
1131 memcpy(&len,
1132 HKEYDATA_DATA(hk), sizeof(db_indx_t));
1133 hcp->dup_off = 0;
1134 hcp->dndx = 0;
1136 hcp->dup_len = len;
1137 } else if (type == H_OFFDUP) {
1138 F_SET(hcp, H_ISDUP);
1139 memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
1140 sizeof(db_pgno_t));
1141 if (flags == DB_LAST || flags == DB_PREV) {
1142 if ((ret = __db_dend(hashp->dbp,
1143 pgno, &hcp->dpagep)) != 0)
1144 return (ret);
1145 hcp->dpgno = PGNO(hcp->dpagep);
1146 hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
1147 } else if ((ret = __ham_next_cpage(hashp,
1148 hcp, pgno, 0, H_ISDUP)) != 0)
1149 return (ret);
1154 * Now, everything is initialized, grab a duplicate if
1155 * necessary.
1157 if (F_ISSET(hcp, H_ISDUP))
1158 if (hcp->dpgno != PGNO_INVALID) {
1159 pp = hcp->dpagep;
1160 ndx = hcp->dndx;
1161 } else {
1163 * Copy the DBT in case we are retrieving into
1164 * user memory and we need the parameters for
1165 * it.
1167 memcpy(&tmp_val, val, sizeof(*val));
1168 F_SET(&tmp_val, DB_DBT_PARTIAL);
1169 tmp_val.dlen = hcp->dup_len;
1170 tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
1171 myval = &tmp_val;
1176 * Finally, if we had a duplicate, pp, ndx, and myval should be
1177 * set appropriately.
1179 if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data,
1180 &hcp->big_datalen)) != 0)
1181 return (ret);
1184 * In case we sent a temporary off to db_ret, set the real
1185 * return values.
1187 val->data = myval->data;
1188 val->size = myval->size;
1190 return (0);
1193 static int
1194 __ham_overwrite(hashp, hcp, nval)
1195 HTAB *hashp;
1196 HASH_CURSOR *hcp;
1197 DBT *nval;
1199 DBT *myval, tmp_val;
1200 u_int8_t *hk;
1202 if (F_ISSET(hashp->dbp, DB_AM_DUP))
1203 return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST));
1204 else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
1205 /* Put/overwrite */
1206 memcpy(&tmp_val, nval, sizeof(*nval));
1207 F_SET(&tmp_val, DB_DBT_PARTIAL);
1208 tmp_val.doff = 0;
1209 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1210 if (HPAGE_PTYPE(hk) == H_OFFPAGE)
1211 memcpy(&tmp_val.dlen,
1212 HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1213 else
1214 tmp_val.dlen = LEN_HDATA(hcp->pagep,
1215 hashp->hdr->pagesize,hcp->bndx);
1216 myval = &tmp_val;
1217 } else /* Regular partial put */
1218 myval = nval;
1220 return (__ham_replpair(hashp, hcp, myval, 0));
1224 * Given a key and a cursor, sets the cursor to the page/ndx on which
1225 * the key resides. If the key is found, the cursor H_OK flag is set
1226 * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1227 * If the key is not found, the H_OK flag is not set. If the sought
1228 * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1229 * are set indicating where an add might take place. If it is 0,
1230 * non of the cursor pointer field are valid.
1232 static int
1233 __ham_lookup(hashp, hcp, key, sought, mode)
1234 HTAB *hashp;
1235 HASH_CURSOR *hcp;
1236 const DBT *key;
1237 u_int32_t sought;
1238 db_lockmode_t mode;
1240 db_pgno_t pgno;
1241 u_int32_t tlen;
1242 int match, ret, t_ret;
1243 u_int8_t *hk;
1246 * Set up cursor so that we're looking for space to add an item
1247 * as we cycle through the pages looking for the key.
1249 if ((ret = __ham_item_reset(hashp, hcp)) != 0)
1250 return (ret);
1251 hcp->seek_size = sought;
1253 hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size);
1254 while (1) {
1255 if ((ret = __ham_item_next(hashp, hcp, mode)) != 0)
1256 return (ret);
1258 if (F_ISSET(hcp, H_NOMORE))
1259 break;
1261 hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
1262 switch (HPAGE_PTYPE(hk)) {
1263 case H_OFFPAGE:
1264 memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1265 if (tlen == key->size) {
1266 memcpy(&pgno,
1267 HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
1268 match = __db_moff(hashp->dbp, key, pgno);
1269 if (match == 0) {
1270 F_SET(hcp, H_OK);
1271 return (0);
1274 break;
1275 case H_KEYDATA:
1276 if (key->size == LEN_HKEY(hcp->pagep,
1277 hashp->hdr->pagesize, hcp->bndx) &&
1278 memcmp(key->data,
1279 HKEYDATA_DATA(hk), key->size) == 0) {
1280 F_SET(hcp, H_OK);
1281 return (0);
1283 break;
1284 case H_DUPLICATE:
1285 case H_OFFDUP:
1287 * These are errors because keys are never
1288 * duplicated, only data items are.
1290 return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep)));
1292 hashp->hash_collisions++;
1296 * Item was not found, adjust cursor properly.
1299 if (sought != 0)
1300 return (ret);
1302 if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
1303 ret = t_ret;
1304 return (ret);
1308 * Initialize a dbt using some possibly already allocated storage
1309 * for items.
1310 * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1313 __ham_init_dbt(dbt, size, bufp, sizep)
1314 DBT *dbt;
1315 u_int32_t size;
1316 void **bufp;
1317 u_int32_t *sizep;
1319 memset(dbt, 0, sizeof(*dbt));
1320 if (*sizep < size) {
1321 if ((*bufp = (void *)(*bufp == NULL ?
1322 __db_malloc(size) : __db_realloc(*bufp, size))) == NULL) {
1323 *sizep = 0;
1324 return (ENOMEM);
1326 *sizep = size;
1328 dbt->data = *bufp;
1329 dbt->size = size;
1330 return (0);
1334 * Adjust the cursor after an insert or delete. The cursor passed is
1335 * the one that was operated upon; we just need to check any of the
1336 * others.
1338 * len indicates the length of the item added/deleted
1339 * add indicates if the item indicated by the cursor has just been
1340 * added (add == 1) or deleted (add == 0).
1341 * dup indicates if the addition occurred into a duplicate set.
1343 * PUBLIC: void __ham_c_update
1344 * PUBLIC: __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1346 void
1347 __ham_c_update(hcp, chg_pgno, len, add, is_dup)
1348 HASH_CURSOR *hcp;
1349 db_pgno_t chg_pgno;
1350 u_int32_t len;
1351 int add, is_dup;
1353 DBC *cp;
1354 HTAB *hp;
1355 HASH_CURSOR *lcp;
1356 int page_deleted;
1359 * Regular adds are always at the end of a given page, so we never
1360 * have to adjust anyone's cursor after a regular add.
1362 if (!is_dup && add)
1363 return;
1366 * Determine if a page was deleted. If this is a regular update
1367 * (i.e., not is_dup) then the deleted page's number will be that in
1368 * chg_pgno, and the pgno in the cursor will be different. If this
1369 * was an onpage-duplicate, then the same conditions apply. If this
1370 * was an off-page duplicate, then we need to verify if hcp->dpgno
1371 * is the same (no delete) or different (delete) than chg_pgno.
1373 if (!is_dup || hcp->dpgno == PGNO_INVALID)
1374 page_deleted =
1375 chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno;
1376 else
1377 page_deleted =
1378 chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno;
1380 hp = hcp->db_cursor->dbp->master->internal;
1381 DB_THREAD_LOCK(hp->dbp);
1383 for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL;
1384 cp = TAILQ_NEXT(cp, links)) {
1385 if (cp->internal == hcp)
1386 continue;
1388 lcp = (HASH_CURSOR *)cp->internal;
1390 if (!is_dup && lcp->pgno != chg_pgno)
1391 continue;
1393 if (is_dup) {
1394 if (F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
1395 continue;
1396 if (!F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
1397 continue;
1400 if (page_deleted) {
1401 if (is_dup) {
1402 lcp->dpgno = hcp->dpgno;
1403 lcp->dndx = hcp->dndx;
1404 } else {
1405 lcp->pgno = hcp->pgno;
1406 lcp->bndx = hcp->bndx;
1407 lcp->bucket = hcp->bucket;
1409 F_CLR(lcp, H_ISDUP);
1410 continue;
1413 if (!is_dup && lcp->bndx > hcp->bndx)
1414 lcp->bndx--;
1415 else if (!is_dup && lcp->bndx == hcp->bndx)
1416 F_SET(lcp, H_DELETED);
1417 else if (is_dup && lcp->bndx == hcp->bndx) {
1418 /* Assign dpgno in case there was page conversion. */
1419 lcp->dpgno = hcp->dpgno;
1420 if (add && lcp->dndx >= hcp->dndx )
1421 lcp->dndx++;
1422 else if (!add && lcp->dndx > hcp->dndx)
1423 lcp->dndx--;
1424 else if (!add && lcp->dndx == hcp->dndx)
1425 F_SET(lcp, H_DELETED);
1427 /* Now adjust on-page information. */
1428 if (lcp->dpgno == PGNO_INVALID)
1429 if (add) {
1430 lcp->dup_tlen += len;
1431 if (lcp->dndx > hcp->dndx)
1432 lcp->dup_off += len;
1433 } else {
1434 lcp->dup_tlen -= len;
1435 if (lcp->dndx > hcp->dndx)
1436 lcp->dup_off -= len;
1440 DB_THREAD_UNLOCK(hp->dbp);
1444 * __ham_hdup --
1445 * This function gets called when we create a duplicate handle for a
1446 * threaded DB. It should create the private part of the DB structure.
1448 * PUBLIC: int __ham_hdup __P((DB *, DB *));
1451 __ham_hdup(orig, new)
1452 DB *orig, *new;
1454 DBC *curs;
1455 HTAB *hashp;
1456 int ret;
1458 if ((hashp = (HTAB *)__db_malloc(sizeof(HTAB))) == NULL)
1459 return (ENOMEM);
1461 new->internal = hashp;
1463 hashp->dbp = new;
1464 hashp->hlock = 0;
1465 hashp->hdr = NULL;
1466 hashp->hash = ((HTAB *)orig->internal)->hash;
1467 if ((hashp->split_buf = (PAGE *)__db_malloc(orig->pgsize)) == NULL)
1468 return (ENOMEM);
1469 hashp->local_errno = 0;
1470 hashp->hash_accesses = 0;
1471 hashp->hash_collisions = 0;
1472 hashp->hash_expansions = 0;
1473 hashp->hash_overflows = 0;
1474 hashp->hash_bigpages = 0;
1475 /* Initialize the cursor queue. */
1476 ret = __ham_c_init(new, NULL, &curs);
1477 TAILQ_INSERT_TAIL(&new->curs_queue, curs, links);
1478 return (ret);