2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
11 static const char sccsid
[] = "@(#)lock.c 10.52 (Sleepycat) 5/10/98";
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
26 #include "common_ext.h"
29 static void __lock_checklocker
__P((DB_LOCKTAB
*, struct __db_lock
*, int));
30 static void __lock_freeobj
__P((DB_LOCKTAB
*, DB_LOCKOBJ
*));
31 static int __lock_get_internal
__P((DB_LOCKTAB
*, u_int32_t
, u_int32_t
,
32 const DBT
*, db_lockmode_t
, struct __db_lock
**));
33 static int __lock_put_internal
__P((DB_LOCKTAB
*, struct __db_lock
*, int));
34 static void __lock_remove_waiter
35 __P((DB_LOCKTAB
*, DB_LOCKOBJ
*, struct __db_lock
*, db_status_t
));
45 if (lt
->region
->id
>= DB_LOCK_MAXID
)
47 id
= ++lt
->region
->id
;
48 UNLOCK_LOCKREGION(lt
);
55 lock_vec(lt
, locker
, flags
, list
, nlist
, elistp
)
57 u_int32_t locker
, flags
;
59 DB_LOCKREQ
*list
, **elistp
;
62 DB_LOCKOBJ
*sh_obj
, *sh_locker
;
65 /* Validate arguments. */
67 __db_fchk(lt
->dbenv
, "lock_vec", flags
, DB_LOCK_NOWAIT
)) != 0)
72 if ((ret
= __lock_validate_region(lt
)) != 0) {
73 UNLOCK_LOCKREGION(lt
);
78 for (i
= 0; i
< nlist
&& ret
== 0; i
++) {
81 ret
= __lock_get_internal(lt
, locker
, flags
,
82 list
[i
].obj
, list
[i
].mode
, &lp
);
84 list
[i
].lock
= LOCK_TO_OFFSET(lt
, lp
);
85 lt
->region
->nrequests
++;
89 lp
= OFFSET_TO_LOCK(lt
, list
[i
].lock
);
90 if (lp
->holder
!= locker
) {
91 ret
= DB_LOCK_NOTHELD
;
94 list
[i
].mode
= lp
->mode
;
96 /* XXX Need to copy the object. ??? */
97 ret
= __lock_put_internal(lt
, lp
, 0);
100 /* Find the locker. */
101 if ((ret
= __lock_getobj(lt
, locker
,
102 NULL
, DB_LOCK_LOCKER
, &sh_locker
)) != 0)
105 for (lp
= SH_LIST_FIRST(&sh_locker
->heldby
, __db_lock
);
107 lp
= SH_LIST_FIRST(&sh_locker
->heldby
, __db_lock
)) {
108 if ((ret
= __lock_put_internal(lt
, lp
, 1)) != 0)
111 __lock_freeobj(lt
, sh_locker
);
112 lt
->region
->nlockers
--;
114 case DB_LOCK_PUT_OBJ
:
116 /* Look up the object in the hash table. */
117 HASHLOOKUP(lt
->hashtab
, __db_lockobj
, links
,
118 list
[i
].obj
, sh_obj
, lt
->region
->table_size
,
119 __lock_ohash
, __lock_cmp
);
120 if (sh_obj
== NULL
) {
125 * Release waiters first, because they won't cause
126 * anyone else to be awakened. If we release the
127 * lockers first, all the waiters get awakened
130 for (lp
= SH_TAILQ_FIRST(&sh_obj
->waiters
, __db_lock
);
132 lp
= SH_TAILQ_FIRST(&sh_obj
->waiters
, __db_lock
)) {
133 lt
->region
->nreleases
+= lp
->refcount
;
134 __lock_remove_waiter(lt
, sh_obj
, lp
,
136 __lock_checklocker(lt
, lp
, 1);
139 for (lp
= SH_TAILQ_FIRST(&sh_obj
->holders
, __db_lock
);
141 lp
= SH_TAILQ_FIRST(&sh_obj
->holders
, __db_lock
)) {
143 lt
->region
->nreleases
+= lp
->refcount
;
144 SH_LIST_REMOVE(lp
, locker_links
, __db_lock
);
145 SH_TAILQ_REMOVE(&sh_obj
->holders
, lp
, links
,
147 lp
->status
= DB_LSTAT_FREE
;
148 SH_TAILQ_INSERT_HEAD(<
->region
->free_locks
,
149 lp
, links
, __db_lock
);
152 /* Now free the object. */
153 __lock_freeobj(lt
, sh_obj
);
157 /* Find the locker. */
158 if ((ret
= __lock_getobj(lt
, locker
,
159 NULL
, DB_LOCK_LOCKER
, &sh_locker
)) != 0)
162 for (lp
= SH_LIST_FIRST(&sh_locker
->heldby
, __db_lock
);
164 lp
= SH_LIST_NEXT(lp
, locker_links
, __db_lock
)) {
165 __lock_printlock(lt
, lp
, 1);
169 __lock_freeobj(lt
, sh_locker
);
170 lt
->region
->nlockers
--;
180 if (lt
->region
->need_dd
&& lt
->region
->detect
!= DB_LOCK_NORUN
) {
182 lt
->region
->need_dd
= 0;
186 UNLOCK_LOCKREGION(lt
);
188 if (ret
== 0 && run_dd
)
189 lock_detect(lt
, 0, lt
->region
->detect
);
191 if (elistp
&& ret
!= 0)
192 *elistp
= &list
[i
- 1];
197 lock_get(lt
, locker
, flags
, obj
, lock_mode
, lock
)
199 u_int32_t locker
, flags
;
201 db_lockmode_t lock_mode
;
204 struct __db_lock
*lockp
;
207 /* Validate arguments. */
209 __db_fchk(lt
->dbenv
, "lock_get", flags
, DB_LOCK_NOWAIT
)) != 0)
214 ret
= __lock_validate_region(lt
);
215 if (ret
== 0 && (ret
= __lock_get_internal(lt
,
216 locker
, flags
, obj
, lock_mode
, &lockp
)) == 0) {
217 *lock
= LOCK_TO_OFFSET(lt
, lockp
);
218 lt
->region
->nrequests
++;
221 UNLOCK_LOCKREGION(lt
);
230 struct __db_lock
*lockp
;
235 if ((ret
= __lock_validate_region(lt
)) != 0)
238 lockp
= OFFSET_TO_LOCK(lt
, lock
);
239 ret
= __lock_put_internal(lt
, lockp
, 0);
242 __lock_checklocker(lt
, lockp
, 0);
244 if (lt
->region
->need_dd
&& lt
->region
->detect
!= DB_LOCK_NORUN
) {
246 lt
->region
->need_dd
= 0;
250 UNLOCK_LOCKREGION(lt
);
252 if (ret
== 0 && run_dd
)
253 lock_detect(lt
, 0, lt
->region
->detect
);
259 __lock_put_internal(lt
, lockp
, do_all
)
261 struct __db_lock
*lockp
;
264 struct __db_lock
*lp_w
, *lp_h
, *next_waiter
;
268 if (lockp
->refcount
== 0 || (lockp
->status
!= DB_LSTAT_HELD
&&
269 lockp
->status
!= DB_LSTAT_WAITING
) || lockp
->obj
== 0) {
270 __db_err(lt
->dbenv
, "lock_put: invalid lock %lu",
271 (u_long
)((u_int8_t
*)lockp
- (u_int8_t
*)lt
->region
));
276 lt
->region
->nreleases
+= lockp
->refcount
;
278 lt
->region
->nreleases
++;
279 if (do_all
== 0 && lockp
->refcount
> 1) {
284 /* Get the object associated with this lock. */
285 sh_obj
= (DB_LOCKOBJ
*)((u_int8_t
*)lockp
+ lockp
->obj
);
287 /* Remove lock from locker list. */
288 SH_LIST_REMOVE(lockp
, locker_links
, __db_lock
);
290 /* Remove this lock from its holders/waitlist. */
291 if (lockp
->status
!= DB_LSTAT_HELD
)
292 __lock_remove_waiter(lt
, sh_obj
, lockp
, DB_LSTAT_FREE
);
294 SH_TAILQ_REMOVE(&sh_obj
->holders
, lockp
, links
, __db_lock
);
297 * We need to do lock promotion. We also need to determine if
298 * we're going to need to run the deadlock detector again. If
299 * we release locks, and there are waiters, but no one gets promoted,
300 * then we haven't fundamentally changed the lockmgr state, so
301 * we may still have a deadlock and we have to run again. However,
302 * if there were no waiters, or we actually promoted someone, then
303 * we are OK and we don't have to run it immediately.
305 for (lp_w
= SH_TAILQ_FIRST(&sh_obj
->waiters
, __db_lock
),
306 state_changed
= lp_w
== NULL
;
308 lp_w
= next_waiter
) {
309 next_waiter
= SH_TAILQ_NEXT(lp_w
, links
, __db_lock
);
310 for (lp_h
= SH_TAILQ_FIRST(&sh_obj
->holders
, __db_lock
);
312 lp_h
= SH_TAILQ_NEXT(lp_h
, links
, __db_lock
)) {
313 if (CONFLICTS(lt
, lp_h
->mode
, lp_w
->mode
) &&
314 lp_h
->holder
!= lp_w
->holder
)
317 if (lp_h
!= NULL
) /* Found a conflict. */
320 /* No conflict, promote the waiting lock. */
321 SH_TAILQ_REMOVE(&sh_obj
->waiters
, lp_w
, links
, __db_lock
);
322 lp_w
->status
= DB_LSTAT_PENDING
;
323 SH_TAILQ_INSERT_TAIL(&sh_obj
->holders
, lp_w
, links
);
325 /* Wake up waiter. */
326 (void)__db_mutex_unlock(&lp_w
->mutex
, lt
->reginfo
.fd
);
330 /* Check if object should be reclaimed. */
331 if (SH_TAILQ_FIRST(&sh_obj
->holders
, __db_lock
) == NULL
) {
332 HASHREMOVE_EL(lt
->hashtab
, __db_lockobj
,
333 links
, sh_obj
, lt
->region
->table_size
, __lock_lhash
);
334 if (sh_obj
->lockobj
.size
> sizeof(sh_obj
->objdata
))
335 __db_shalloc_free(lt
->mem
,
336 SH_DBT_PTR(&sh_obj
->lockobj
));
337 SH_TAILQ_INSERT_HEAD(<
->region
->free_objs
, sh_obj
, links
,
343 lockp
->status
= DB_LSTAT_FREE
;
344 SH_TAILQ_INSERT_HEAD(<
->region
->free_locks
, lockp
, links
, __db_lock
);
347 * If we did not promote anyone; we need to run the deadlock
350 if (state_changed
== 0)
351 lt
->region
->need_dd
= 1;
357 __lock_get_internal(lt
, locker
, flags
, obj
, lock_mode
, lockp
)
359 u_int32_t locker
, flags
;
361 db_lockmode_t lock_mode
;
362 struct __db_lock
**lockp
;
364 struct __db_lock
*newl
, *lp
;
365 DB_LOCKOBJ
*sh_obj
, *sh_locker
;
372 * Check that lock mode is valid.
376 if ((u_int32_t
)lock_mode
>= lrp
->nmodes
) {
378 "lock_get: invalid lock mode %lu\n", (u_long
)lock_mode
);
382 /* Allocate a new lock. Optimize for the common case of a grant. */
383 if ((newl
= SH_TAILQ_FIRST(&lrp
->free_locks
, __db_lock
)) == NULL
) {
384 if ((ret
= __lock_grow_region(lt
, DB_LOCK_LOCK
, 0)) != 0)
387 newl
= SH_TAILQ_FIRST(&lrp
->free_locks
, __db_lock
);
389 newl_off
= LOCK_TO_OFFSET(lt
, newl
);
391 /* Optimize for common case of granting a lock. */
392 SH_TAILQ_REMOVE(&lrp
->free_locks
, newl
, links
, __db_lock
);
394 newl
->mode
= lock_mode
;
395 newl
->status
= DB_LSTAT_HELD
;
396 newl
->holder
= locker
;
399 if ((ret
= __lock_getobj(lt
, 0, obj
, DB_LOCK_OBJTYPE
, &sh_obj
)) != 0)
402 lrp
= lt
->region
; /* getobj might have grown */
403 newl
= OFFSET_TO_LOCK(lt
, newl_off
);
405 /* Now make new lock point to object */
406 newl
->obj
= SH_PTR_TO_OFF(newl
, sh_obj
);
409 * Now we have a lock and an object and we need to see if we should
410 * grant the lock. We use a FIFO ordering so we can only grant a
411 * new lock if it does not conflict with anyone on the holders list
412 * OR anyone on the waiters list. The reason that we don't grant if
413 * there's a conflict is that this can lead to starvation (a writer
414 * waiting on a popularly read item will never be granted). The
415 * downside of this is that a waiting reader can prevent an upgrade
416 * from reader to writer, which is not uncommon.
418 * There is one exception to the no-conflict rule. If a lock is held
419 * by the requesting locker AND the new lock does not conflict with
420 * any other holders, then we grant the lock. The most common place
421 * this happens is when the holder has a WRITE lock and a READ lock
422 * request comes in for the same locker. If we do not grant the read
423 * lock, then we guarantee deadlock.
425 * In case of conflict, we put the new lock on the end of the waiters
429 for (lp
= SH_TAILQ_FIRST(&sh_obj
->holders
, __db_lock
);
431 lp
= SH_TAILQ_NEXT(lp
, links
, __db_lock
)) {
432 if (locker
== lp
->holder
) {
433 if (lp
->mode
== lock_mode
&&
434 lp
->status
== DB_LSTAT_HELD
) {
435 /* Lock is held, just inc the ref count. */
437 SH_TAILQ_INSERT_HEAD(&lrp
->free_locks
,
438 newl
, links
, __db_lock
);
443 } else if (CONFLICTS(lt
, lp
->mode
, lock_mode
))
447 if (lp
== NULL
&& !ihold
)
448 for (lp
= SH_TAILQ_FIRST(&sh_obj
->waiters
, __db_lock
);
450 lp
= SH_TAILQ_NEXT(lp
, links
, __db_lock
)) {
451 if (CONFLICTS(lt
, lp
->mode
, lock_mode
) &&
452 locker
!= lp
->holder
)
456 SH_TAILQ_INSERT_TAIL(&sh_obj
->holders
, newl
, links
);
457 else if (!(flags
& DB_LOCK_NOWAIT
))
458 SH_TAILQ_INSERT_TAIL(&sh_obj
->waiters
, newl
, links
);
460 /* Free the lock and return an error. */
461 newl
->status
= DB_LSTAT_FREE
;
462 SH_TAILQ_INSERT_HEAD(&lrp
->free_locks
, newl
, links
, __db_lock
);
463 return (DB_LOCK_NOTGRANTED
);
467 * This is really a blocker for the process, so initialize it
468 * set. That way the current process will block when it tries
469 * to get it and the waking process will release it.
471 (void)__db_mutex_init(&newl
->mutex
,
472 MUTEX_LOCK_OFFSET(lt
->region
, &newl
->mutex
));
473 (void)__db_mutex_lock(&newl
->mutex
, lt
->reginfo
.fd
);
476 * Now, insert the lock onto its locker's list.
479 __lock_getobj(lt
, locker
, NULL
, DB_LOCK_LOCKER
, &sh_locker
)) != 0)
483 SH_LIST_INSERT_HEAD(&sh_locker
->heldby
, newl
, locker_links
, __db_lock
);
486 newl
->status
= DB_LSTAT_WAITING
;
489 * We are about to wait; must release the region mutex.
490 * Then, when we wakeup, we need to reacquire the region
491 * mutex before continuing.
493 if (lrp
->detect
== DB_LOCK_NORUN
)
494 lt
->region
->need_dd
= 1;
495 UNLOCK_LOCKREGION(lt
);
498 * We are about to wait; before waiting, see if the deadlock
499 * detector should be run.
501 if (lrp
->detect
!= DB_LOCK_NORUN
)
502 ret
= lock_detect(lt
, 0, lrp
->detect
);
504 (void)__db_mutex_lock(&newl
->mutex
, lt
->reginfo
.fd
);
507 if (newl
->status
!= DB_LSTAT_PENDING
) {
508 /* Return to free list. */
509 __lock_checklocker(lt
, newl
, 0);
510 SH_TAILQ_INSERT_HEAD(&lrp
->free_locks
, newl
, links
,
512 switch (newl
->status
) {
513 case DB_LSTAT_ABORTED
:
514 ret
= DB_LOCK_DEADLOCK
;
516 case DB_LSTAT_NOGRANT
:
517 ret
= DB_LOCK_NOTGRANTED
;
523 newl
->status
= DB_LSTAT_FREE
;
526 newl
->status
= DB_LSTAT_HELD
;
534 * __lock_is_locked --
536 * PUBLIC: int __lock_is_locked
537 * PUBLIC: __P((DB_LOCKTAB *, u_int32_t, DBT *, db_lockmode_t));
540 __lock_is_locked(lt
, locker
, dbt
, mode
)
546 struct __db_lock
*lp
;
552 /* Look up the object in the hash table. */
553 HASHLOOKUP(lt
->hashtab
, __db_lockobj
, links
,
554 dbt
, sh_obj
, lrp
->table_size
, __lock_ohash
, __lock_cmp
);
558 for (lp
= SH_TAILQ_FIRST(&sh_obj
->holders
, __db_lock
);
560 lp
= SH_TAILQ_FIRST(&sh_obj
->holders
, __db_lock
)) {
561 if (lp
->holder
== locker
&& lp
->mode
== mode
)
569 * __lock_printlock --
571 * PUBLIC: void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
574 __lock_printlock(lt
, lp
, ispgno
)
576 struct __db_lock
*lp
;
583 const char *mode
, *status
;
608 switch (lp
->status
) {
609 case DB_LSTAT_ABORTED
:
621 case DB_LSTAT_NOGRANT
:
624 case DB_LSTAT_WAITING
:
627 case DB_LSTAT_PENDING
:
634 printf("\t%lx\t%s\t%lu\t%s\t",
635 (u_long
)lp
->holder
, mode
, (u_long
)lp
->refcount
, status
);
637 lockobj
= (DB_LOCKOBJ
*)((u_int8_t
*)lp
+ lp
->obj
);
638 ptr
= SH_DBT_PTR(&lockobj
->lockobj
);
640 /* Assume this is a DBT lock. */
641 memcpy(&pgno
, ptr
, sizeof(db_pgno_t
));
642 printf("page %lu\n", (u_long
)pgno
);
644 obj
= (u_int8_t
*)lp
+ lp
->obj
- (u_int8_t
*)lt
->region
;
645 printf("0x%lx ", (u_long
)obj
);
646 __db_pr(ptr
, lockobj
->lockobj
.size
);
652 * PUBLIC: int __lock_getobj __P((DB_LOCKTAB *,
653 * PUBLIC: u_int32_t, const DBT *, u_int32_t type, DB_LOCKOBJ **));
656 __lock_getobj(lt
, locker
, dbt
, type
, objp
)
658 u_int32_t locker
, type
;
670 /* Look up the object in the hash table. */
671 if (type
== DB_LOCK_OBJTYPE
) {
672 HASHLOOKUP(lt
->hashtab
, __db_lockobj
, links
, dbt
, sh_obj
,
673 lrp
->table_size
, __lock_ohash
, __lock_cmp
);
674 obj_size
= dbt
->size
;
676 HASHLOOKUP(lt
->hashtab
, __db_lockobj
, links
, locker
,
677 sh_obj
, lrp
->table_size
, __lock_locker_hash
,
679 obj_size
= sizeof(locker
);
683 * If we found the object, then we can just return it. If
684 * we didn't find the object, then we need to create it.
686 if (sh_obj
== NULL
) {
687 /* Create new object and then insert it into hash table. */
689 SH_TAILQ_FIRST(&lrp
->free_objs
, __db_lockobj
)) == NULL
) {
690 if ((ret
= __lock_grow_region(lt
, DB_LOCK_OBJ
, 0)) != 0)
693 sh_obj
= SH_TAILQ_FIRST(&lrp
->free_objs
, __db_lockobj
);
697 * If we can fit this object in the structure, do so instead
698 * of shalloc-ing space for it.
700 if (obj_size
<= sizeof(sh_obj
->objdata
))
704 __db_shalloc(lt
->mem
, obj_size
, 0, &p
)) != 0) {
705 if ((ret
= __lock_grow_region(lt
,
706 DB_LOCK_MEM
, obj_size
)) != 0)
709 /* Reacquire the head of the list. */
710 sh_obj
= SH_TAILQ_FIRST(&lrp
->free_objs
,
712 (void)__db_shalloc(lt
->mem
, obj_size
, 0, &p
);
715 src
= type
== DB_LOCK_OBJTYPE
? dbt
->data
: (void *)&locker
;
716 memcpy(p
, src
, obj_size
);
719 SH_TAILQ_REMOVE(&lrp
->free_objs
, sh_obj
, links
, __db_lockobj
);
721 SH_TAILQ_INIT(&sh_obj
->waiters
);
722 if (type
== DB_LOCK_LOCKER
)
723 SH_LIST_INIT(&sh_obj
->heldby
);
725 SH_TAILQ_INIT(&sh_obj
->holders
);
726 sh_obj
->lockobj
.size
= obj_size
;
727 sh_obj
->lockobj
.off
= SH_PTR_TO_OFF(&sh_obj
->lockobj
, p
);
729 HASHINSERT(lt
->hashtab
,
730 __db_lockobj
, links
, sh_obj
, lrp
->table_size
, __lock_lhash
);
732 if (type
== DB_LOCK_LOCKER
)
741 * Any lock on the waitlist has a process waiting for it. Therefore, we
742 * can't return the lock to the freelist immediately. Instead, we can
743 * remove the lock from the list of waiters, set the status field of the
744 * lock, and then let the process waking up return the lock to the
748 __lock_remove_waiter(lt
, sh_obj
, lockp
, status
)
751 struct __db_lock
*lockp
;
754 SH_TAILQ_REMOVE(&sh_obj
->waiters
, lockp
, links
, __db_lock
);
755 lockp
->status
= status
;
757 /* Wake whoever is waiting on this lock. */
758 (void)__db_mutex_unlock(&lockp
->mutex
, lt
->reginfo
.fd
);
762 __lock_checklocker(lt
, lockp
, do_remove
)
764 struct __db_lock
*lockp
;
767 DB_LOCKOBJ
*sh_locker
;
770 SH_LIST_REMOVE(lockp
, locker_links
, __db_lock
);
772 /* if the locker list is NULL, free up the object. */
773 if (__lock_getobj(lt
, lockp
->holder
, NULL
, DB_LOCK_LOCKER
, &sh_locker
)
774 == 0 && SH_LIST_FIRST(&sh_locker
->heldby
, __db_lock
) == NULL
) {
775 __lock_freeobj(lt
, sh_locker
);
776 lt
->region
->nlockers
--;
781 __lock_freeobj(lt
, obj
)
785 HASHREMOVE_EL(lt
->hashtab
,
786 __db_lockobj
, links
, obj
, lt
->region
->table_size
, __lock_lhash
);
787 if (obj
->lockobj
.size
> sizeof(obj
->objdata
))
788 __db_shalloc_free(lt
->mem
, SH_DBT_PTR(&obj
->lockobj
));
789 SH_TAILQ_INSERT_HEAD(<
->region
->free_objs
, obj
, links
, __db_lockobj
);