HAMMER VFS - Major retooling of the refcount mechanics, and fix a deadlock
[dragonfly.git] / sys / vfs / hammer / hammer_subs.c
blob39f1a59c9c70a10d0a0310b19813282aa5b67c81
1 /*
2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $
37 * HAMMER structural locking
40 #include "hammer.h"
41 #include <sys/dirent.h>
43 void
44 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
46 thread_t td = curthread;
47 u_int lv;
48 u_int nlv;
50 KKASSERT(lock->refs);
51 for (;;) {
52 lv = lock->lockval;
54 if (lv == 0) {
55 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
56 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
57 lock->lowner = td;
58 break;
60 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
61 lock->lowner == td) {
62 nlv = (lv + 1);
63 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
64 break;
65 } else {
66 if (hammer_debug_locks) {
67 kprintf("hammer_lock_ex: held by %p\n",
68 lock->lowner);
70 nlv = lv | HAMMER_LOCKF_WANTED;
71 ++hammer_contention_count;
72 tsleep_interlock(&lock->lockval, 0);
73 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
74 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
75 if (hammer_debug_locks)
76 kprintf("hammer_lock_ex: try again\n");
83 * Try to obtain an exclusive lock
85 int
86 hammer_lock_ex_try(struct hammer_lock *lock)
88 thread_t td = curthread;
89 int error;
90 u_int lv;
91 u_int nlv;
93 KKASSERT(lock->refs);
94 for (;;) {
95 lv = lock->lockval;
97 if (lv == 0) {
98 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
99 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
100 lock->lowner = td;
101 error = 0;
102 break;
104 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
105 lock->lowner == td) {
106 nlv = (lv + 1);
107 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
108 error = 0;
109 break;
111 } else {
112 error = EAGAIN;
113 break;
116 return (error);
120 * Obtain a shared lock
122 * We do not give pending exclusive locks priority over shared locks as
123 * doing so could lead to a deadlock.
125 void
126 hammer_lock_sh(struct hammer_lock *lock)
128 thread_t td = curthread;
129 u_int lv;
130 u_int nlv;
131 const char *ident = "hmrlck";
133 KKASSERT(lock->refs);
134 for (;;) {
135 lv = lock->lockval;
137 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
138 nlv = (lv + 1);
139 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
140 break;
141 } else if (lock->lowner == td) {
143 * Disallowed case, drop into kernel debugger for
144 * now. A cont continues w/ an exclusive lock.
146 nlv = (lv + 1);
147 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
148 if (hammer_debug_critical)
149 Debugger("hammer_lock_sh: holding ex");
150 break;
152 } else {
153 nlv = lv | HAMMER_LOCKF_WANTED;
154 ++hammer_contention_count;
155 tsleep_interlock(&lock->lockval, 0);
156 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
157 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
163 hammer_lock_sh_try(struct hammer_lock *lock)
165 thread_t td = curthread;
166 u_int lv;
167 u_int nlv;
168 int error;
170 KKASSERT(lock->refs);
171 for (;;) {
172 lv = lock->lockval;
174 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
175 nlv = (lv + 1);
176 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
177 error = 0;
178 break;
180 } else if (lock->lowner == td) {
182 * Disallowed case, drop into kernel debugger for
183 * now. A cont continues w/ an exclusive lock.
185 nlv = (lv + 1);
186 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
187 if (hammer_debug_critical)
188 Debugger("hammer_lock_sh: holding ex");
189 error = 0;
190 break;
192 } else {
193 error = EAGAIN;
194 break;
197 return (error);
201 * Upgrade a shared lock to an exclusively held lock. This function will
202 * return EDEADLK If there is more then one shared holder.
204 * No error occurs and no action is taken if the lock is already exclusively
205 * held by the caller. If the lock is not held at all or held exclusively
206 * by someone else, this function will panic.
209 hammer_lock_upgrade(struct hammer_lock *lock)
211 thread_t td = curthread;
212 u_int lv;
213 u_int nlv;
214 int error;
216 for (;;) {
217 lv = lock->lockval;
219 if ((lv & ~HAMMER_LOCKF_WANTED) == 1) {
220 nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
221 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
222 lock->lowner = td;
223 error = 0;
224 break;
226 } else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
227 if (lock->lowner != curthread)
228 panic("hammer_lock_upgrade: illegal state");
229 error = 0;
230 break;
231 } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
232 panic("hammer_lock_upgrade: lock is not held");
233 /* NOT REACHED */
234 error = EDEADLK;
235 break;
236 } else {
237 error = EDEADLK;
238 break;
241 return (error);
245 * Downgrade an exclusively held lock to a shared lock.
247 void
248 hammer_lock_downgrade(struct hammer_lock *lock)
250 thread_t td __debugvar = curthread;
251 u_int lv;
252 u_int nlv;
254 KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
255 (HAMMER_LOCKF_EXCLUSIVE | 1));
256 KKASSERT(lock->lowner == td);
259 * NOTE: Must clear owner before releasing exclusivity
261 lock->lowner = NULL;
263 for (;;) {
264 lv = lock->lockval;
265 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
266 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
267 if (lv & HAMMER_LOCKF_WANTED)
268 wakeup(&lock->lockval);
269 break;
274 void
275 hammer_unlock(struct hammer_lock *lock)
277 thread_t td __debugvar = curthread;
278 u_int lv;
279 u_int nlv;
281 lv = lock->lockval;
282 KKASSERT(lv != 0);
283 if (lv & HAMMER_LOCKF_EXCLUSIVE)
284 KKASSERT(lock->lowner == td);
286 for (;;) {
287 lv = lock->lockval;
288 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
289 if (nlv > 1) {
290 nlv = lv - 1;
291 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
292 break;
293 } else if (nlv == 1) {
294 nlv = 0;
295 if (lv & HAMMER_LOCKF_EXCLUSIVE)
296 lock->lowner = NULL;
297 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
298 if (lv & HAMMER_LOCKF_WANTED)
299 wakeup(&lock->lockval);
300 break;
302 } else {
303 panic("hammer_unlock: lock %p is not held", lock);
309 * The calling thread must be holding a shared or exclusive lock.
310 * Returns < 0 if lock is held shared, and > 0 if held exlusively.
313 hammer_lock_status(struct hammer_lock *lock)
315 u_int lv = lock->lockval;
317 if (lv & HAMMER_LOCKF_EXCLUSIVE)
318 return(1);
319 else if (lv)
320 return(-1);
321 panic("hammer_lock_status: lock must be held: %p", lock);
325 * Bump the ref count for a lock (not the excl/share count, but a separate
326 * structural reference count). The CHECK flag will be set on a 0->1
327 * transition.
329 * This function does nothing to serialize races between multple threads.
330 * The caller can interlock it later on to deal with serialization.
332 * MPSAFE
334 void
335 hammer_ref(struct hammer_lock *lock)
337 u_int lv;
338 u_int nlv;
340 for (;;) {
341 lv = lock->refs;
342 if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
343 nlv = (lv + 1) | HAMMER_REFS_CHECK;
344 if (atomic_cmpset_int(&lock->refs, lv, nlv))
345 return;
346 } else {
347 nlv = (lv + 1);
348 KKASSERT((int)nlv > 0);
349 if (atomic_cmpset_int(&lock->refs, lv, nlv))
350 return;
353 /* not reached */
357 * Drop the ref count for a lock (not the excl/share count, but a separate
358 * structural reference count). The CHECK flag will be cleared on a 1->0
359 * transition.
361 * This function does nothing to serialize races between multple threads.
363 * MPSAFE
365 void
366 hammer_rel(struct hammer_lock *lock)
368 u_int lv;
369 u_int nlv;
371 for (;;) {
372 lv = lock->refs;
373 if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
374 nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
375 if (atomic_cmpset_int(&lock->refs, lv, nlv))
376 return;
377 } else {
378 KKASSERT((int)lv > 0);
379 nlv = (lv - 1);
380 if (atomic_cmpset_int(&lock->refs, lv, nlv))
381 return;
384 /* not reached */
388 * The hammer_*_interlock() and hammer_*_interlock_done() functions are
389 * more sophisticated versions which handle MP transition races and block
390 * when necessary.
392 * hammer_ref_interlock() bumps the ref-count and conditionally acquires
393 * the interlock for 0->1 transitions or if the CHECK is found to be set.
395 * This case will return TRUE, the interlock will be held, and the CHECK
396 * bit also set. Other threads attempting to ref will see the CHECK bit
397 * and block until we clean up.
399 * FALSE is returned for transitions other than 0->1 when the CHECK bit
400 * is not found to be set, or if the function loses the race with another
401 * thread.
403 * TRUE is only returned to one thread and the others will block.
404 * Effectively a TRUE indicator means 'someone transitioned 0->1
405 * and you are the first guy to successfully lock it after that, so you
406 * need to check'. Due to races the ref-count may be greater than 1 upon
407 * return.
409 * MPSAFE
412 hammer_ref_interlock(struct hammer_lock *lock)
414 u_int lv;
415 u_int nlv;
418 * Integrated reference count bump, lock, and check, with hot-path.
420 * (a) Return 1 (+LOCKED, +CHECK) 0->1 transition
421 * (b) Return 0 (-LOCKED, -CHECK) N->N+1 transition
422 * (c) Break out (+CHECK) Check condition and Cannot lock
423 * (d) Return 1 (+LOCKED, +CHECK) Successfully locked
425 for (;;) {
426 lv = lock->refs;
427 if (lv == 0) {
428 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
429 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
430 lock->rowner = curthread;
431 return(1);
433 } else {
434 nlv = (lv + 1);
435 if ((lv & ~HAMMER_REFS_FLAGS) == 0)
436 nlv |= HAMMER_REFS_CHECK;
437 if ((nlv & HAMMER_REFS_CHECK) == 0) {
438 if (atomic_cmpset_int(&lock->refs, lv, nlv))
439 return(0);
440 } else if (lv & HAMMER_REFS_LOCKED) {
441 /* CHECK also set here */
442 if (atomic_cmpset_int(&lock->refs, lv, nlv))
443 break;
444 } else {
445 /* CHECK also set here */
446 nlv |= HAMMER_REFS_LOCKED;
447 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
448 lock->rowner = curthread;
449 return(1);
456 * Defered check condition because we were unable to acquire the
457 * lock. We must block until the check condition is cleared due
458 * to a race with another thread, or we are able to acquire the
459 * lock.
461 * (a) Return 0 (-CHECK) Another thread handled it
462 * (b) Return 1 (+LOCKED, +CHECK) We handled it.
464 for (;;) {
465 lv = lock->refs;
466 if ((lv & HAMMER_REFS_CHECK) == 0)
467 return(0);
468 if (lv & HAMMER_REFS_LOCKED) {
469 tsleep_interlock(&lock->refs, 0);
470 nlv = (lv | HAMMER_REFS_WANTED);
471 if (atomic_cmpset_int(&lock->refs, lv, nlv))
472 tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
473 } else {
474 /* CHECK also set here */
475 nlv = lv | HAMMER_REFS_LOCKED;
476 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
477 lock->rowner = curthread;
478 return(1);
482 /* not reached */
486 * This is the same as hammer_ref_interlock() but asserts that the
487 * 0->1 transition is always true, thus the lock must have no references
488 * on entry or have CHECK set, and will have one reference with the
489 * interlock held on return. It must also not be interlocked on entry
490 * by anyone.
492 * NOTE that CHECK will never be found set when the ref-count is 0.
494 * TRUE is always returned to match the API for hammer_ref_interlock().
495 * This function returns with one ref, the lock held, and the CHECK bit set.
498 hammer_ref_interlock_true(struct hammer_lock *lock)
500 u_int lv;
501 u_int nlv;
503 for (;;) {
504 lv = lock->refs;
506 if (lv) {
507 panic("hammer_ref_interlock_true: bad lock %p %08x\n",
508 lock, lock->refs);
510 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
511 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
512 lock->rowner = curthread;
513 return (1);
519 * Unlock the interlock acquired by hammer_ref_interlock() and clear the
520 * CHECK flag. The ref-count remains unchanged.
522 * This routine is called in the load path when the load succeeds.
524 void
525 hammer_ref_interlock_done(struct hammer_lock *lock)
527 u_int lv;
528 u_int nlv;
530 for (;;) {
531 lv = lock->refs;
532 nlv = lv & ~HAMMER_REFS_FLAGS;
533 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
534 if (lv & HAMMER_REFS_WANTED)
535 wakeup(&lock->refs);
536 break;
542 * hammer_rel_interlock() works a bit differently in that it must
543 * acquire the lock in tandem with a 1->0 transition. CHECK is
544 * not used.
546 * TRUE is returned on 1->0 transitions with the lock held on return
547 * and FALSE is returned otherwise with the lock not held.
549 * It is important to note that the refs are not stable and may
550 * increase while we hold the lock, the TRUE indication only means
551 * that we transitioned 1->0, not necessarily that we stayed at 0.
553 * Another thread bumping refs while we hold the lock will set CHECK,
554 * causing one of the competing hammer_ref_interlock() calls to
555 * return TRUE after we release our lock.
557 * MPSAFE
560 hammer_rel_interlock(struct hammer_lock *lock, int locked)
562 u_int lv;
563 u_int nlv;
566 * In locked mode (failure/unload path) we release the
567 * ref-count but leave it locked.
569 if (locked) {
570 hammer_rel(lock);
571 return(1);
575 * Integrated reference count drop with LOCKED, plus the hot-path
576 * returns.
578 for (;;) {
579 lv = lock->refs;
581 if (lv == 1) {
582 nlv = 0 | HAMMER_REFS_LOCKED;
583 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
584 lock->rowner = curthread;
585 return(1);
587 } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
588 if ((lv & HAMMER_REFS_LOCKED) == 0) {
589 nlv = (lv - 1) | HAMMER_REFS_LOCKED;
590 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
591 lock->rowner = curthread;
592 return(1);
594 } else {
595 nlv = lv | HAMMER_REFS_WANTED;
596 tsleep_interlock(&lock->refs, 0);
597 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
598 tsleep(&lock->refs, PINTERLOCKED,
599 "h0lk", 0);
602 } else {
603 nlv = (lv - 1);
604 KKASSERT((int)nlv >= 0);
605 if (atomic_cmpset_int(&lock->refs, lv, nlv))
606 return(0);
609 /* not reached */
613 * Unlock the interlock acquired by hammer_rel_interlock().
615 * If orig_locked is non-zero the interlock was originally held prior to
616 * the hammer_rel_interlock() call and passed through to us. In this
617 * case we want to retain the CHECK error state if not transitioning
618 * to 0.
620 * The code is the same either way so we do not have to conditionalize
621 * on orig_locked.
623 void
624 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
626 u_int lv;
627 u_int nlv;
629 for (;;) {
630 lv = lock->refs;
631 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
632 if ((lv & ~HAMMER_REFS_FLAGS) == 0)
633 nlv &= ~HAMMER_REFS_CHECK;
634 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
635 if (lv & HAMMER_REFS_WANTED)
636 wakeup(&lock->refs);
637 break;
643 * Acquire the interlock on lock->refs.
645 * Return TRUE if CHECK is currently set. Note that CHECK will not
646 * be set if the reference count is 0, but can get set if this function
647 * is preceeded by, say, hammer_ref(), or through races with other
648 * threads. The return value allows the caller to use the same logic
649 * as hammer_ref_interlock().
651 * MPSAFE
654 hammer_get_interlock(struct hammer_lock *lock)
656 u_int lv;
657 u_int nlv;
659 for (;;) {
660 lv = lock->refs;
661 if (lv & HAMMER_REFS_LOCKED) {
662 nlv = lv | HAMMER_REFS_WANTED;
663 tsleep_interlock(&lock->refs, 0);
664 if (atomic_cmpset_int(&lock->refs, lv, nlv))
665 tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
666 } else {
667 nlv = (lv | HAMMER_REFS_LOCKED);
668 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
669 lock->rowner = curthread;
670 return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
677 * Attempt to acquire the interlock and expect 0 refs. Used by the buffer
678 * cache callback code to disassociate or lock the bufs related to HAMMER
679 * structures.
681 * During teardown the related bp will be acquired by hammer_io_release()
682 * which interocks our test.
684 * Returns non-zero on success, zero on failure.
687 hammer_try_interlock_norefs(struct hammer_lock *lock)
689 u_int lv;
690 u_int nlv;
692 for (;;) {
693 lv = lock->refs;
694 if (lv == 0) {
695 nlv = lv | HAMMER_REFS_LOCKED;
696 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
697 lock->rowner = curthread;
698 return(1);
700 } else {
701 return(0);
704 /* not reached */
708 * Release the interlock on lock->refs. This function will set
709 * CHECK if the refs is non-zero and error is non-zero, and clear
710 * CHECK otherwise.
712 * MPSAFE
714 void
715 hammer_put_interlock(struct hammer_lock *lock, int error)
717 u_int lv;
718 u_int nlv;
720 for (;;) {
721 lv = lock->refs;
722 KKASSERT(lv & HAMMER_REFS_LOCKED);
723 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
725 if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
726 nlv &= ~HAMMER_REFS_CHECK;
727 else
728 nlv |= HAMMER_REFS_CHECK;
730 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
731 if (lv & HAMMER_REFS_WANTED)
732 wakeup(&lock->refs);
733 return;
739 * The sync_lock must be held when doing any modifying operations on
740 * meta-data. It does not have to be held when modifying non-meta-data buffers
741 * (backend or frontend).
743 * The flusher holds the lock exclusively while all other consumers hold it
744 * shared. All modifying operations made while holding the lock are atomic
745 * in that they will be made part of the same flush group.
747 * Due to the atomicy requirement deadlock recovery code CANNOT release the
748 * sync lock, nor can we give pending exclusive sync locks priority over
749 * a shared sync lock as this could lead to a 3-way deadlock.
751 void
752 hammer_sync_lock_ex(hammer_transaction_t trans)
754 ++trans->sync_lock_refs;
755 hammer_lock_ex(&trans->hmp->sync_lock);
758 void
759 hammer_sync_lock_sh(hammer_transaction_t trans)
761 ++trans->sync_lock_refs;
762 hammer_lock_sh(&trans->hmp->sync_lock);
766 hammer_sync_lock_sh_try(hammer_transaction_t trans)
768 int error;
770 ++trans->sync_lock_refs;
771 if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
772 --trans->sync_lock_refs;
773 return (error);
776 void
777 hammer_sync_unlock(hammer_transaction_t trans)
779 --trans->sync_lock_refs;
780 hammer_unlock(&trans->hmp->sync_lock);
784 * Misc
786 u_int32_t
787 hammer_to_unix_xid(uuid_t *uuid)
789 return(*(u_int32_t *)&uuid->node[2]);
792 void
793 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
795 bzero(uuid, sizeof(*uuid));
796 *(u_int32_t *)&uuid->node[2] = guid;
799 void
800 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
802 ts->tv_sec = (unsigned long)(xtime / 1000000);
803 ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
806 u_int64_t
807 hammer_timespec_to_time(struct timespec *ts)
809 u_int64_t xtime;
811 xtime = (unsigned)(ts->tv_nsec / 1000) +
812 (unsigned long)ts->tv_sec * 1000000ULL;
813 return(xtime);
818 * Convert a HAMMER filesystem object type to a vnode type
820 enum vtype
821 hammer_get_vnode_type(u_int8_t obj_type)
823 switch(obj_type) {
824 case HAMMER_OBJTYPE_DIRECTORY:
825 return(VDIR);
826 case HAMMER_OBJTYPE_REGFILE:
827 return(VREG);
828 case HAMMER_OBJTYPE_DBFILE:
829 return(VDATABASE);
830 case HAMMER_OBJTYPE_FIFO:
831 return(VFIFO);
832 case HAMMER_OBJTYPE_SOCKET:
833 return(VSOCK);
834 case HAMMER_OBJTYPE_CDEV:
835 return(VCHR);
836 case HAMMER_OBJTYPE_BDEV:
837 return(VBLK);
838 case HAMMER_OBJTYPE_SOFTLINK:
839 return(VLNK);
840 default:
841 return(VBAD);
843 /* not reached */
847 hammer_get_dtype(u_int8_t obj_type)
849 switch(obj_type) {
850 case HAMMER_OBJTYPE_DIRECTORY:
851 return(DT_DIR);
852 case HAMMER_OBJTYPE_REGFILE:
853 return(DT_REG);
854 case HAMMER_OBJTYPE_DBFILE:
855 return(DT_DBF);
856 case HAMMER_OBJTYPE_FIFO:
857 return(DT_FIFO);
858 case HAMMER_OBJTYPE_SOCKET:
859 return(DT_SOCK);
860 case HAMMER_OBJTYPE_CDEV:
861 return(DT_CHR);
862 case HAMMER_OBJTYPE_BDEV:
863 return(DT_BLK);
864 case HAMMER_OBJTYPE_SOFTLINK:
865 return(DT_LNK);
866 default:
867 return(DT_UNKNOWN);
869 /* not reached */
872 u_int8_t
873 hammer_get_obj_type(enum vtype vtype)
875 switch(vtype) {
876 case VDIR:
877 return(HAMMER_OBJTYPE_DIRECTORY);
878 case VREG:
879 return(HAMMER_OBJTYPE_REGFILE);
880 case VDATABASE:
881 return(HAMMER_OBJTYPE_DBFILE);
882 case VFIFO:
883 return(HAMMER_OBJTYPE_FIFO);
884 case VSOCK:
885 return(HAMMER_OBJTYPE_SOCKET);
886 case VCHR:
887 return(HAMMER_OBJTYPE_CDEV);
888 case VBLK:
889 return(HAMMER_OBJTYPE_BDEV);
890 case VLNK:
891 return(HAMMER_OBJTYPE_SOFTLINK);
892 default:
893 return(HAMMER_OBJTYPE_UNKNOWN);
895 /* not reached */
899 * Return flags for hammer_delete_at_cursor()
902 hammer_nohistory(hammer_inode_t ip)
904 if (ip->hmp->hflags & HMNT_NOHISTORY)
905 return(HAMMER_DELETE_DESTROY);
906 if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
907 return(HAMMER_DELETE_DESTROY);
908 return(0);
912 * ALGORITHM VERSION 1:
913 * Return a namekey hash. The 64 bit namekey hash consists of a 32 bit
914 * crc in the MSB and 0 in the LSB. The caller will use the low 32 bits
915 * to generate a unique key and will scan all entries with the same upper
916 * 32 bits when issuing a lookup.
918 * 0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
920 * ALGORITHM VERSION 2:
922 * The 64 bit hash key is generated from the following components. The
923 * first three characters are encoded as 5-bit quantities, the middle
924 * N characters are hashed into a 6 bit quantity, and the last two
925 * characters are encoded as 5-bit quantities. A 32 bit hash of the
926 * entire filename is encoded in the low 32 bits. Bit 0 is set to
927 * 0 to guarantee us a 2^24 bit iteration space.
929 * 0aaaaabbbbbccccc mmmmmmyyyyyzzzzz hhhhhhhhhhhhhhhh hhhhhhhhhhhhhhh0
931 * This gives us a domain sort for the first three characters, the last
932 * two characters, and breaks the middle space into 64 random domains.
933 * The domain sort folds upper case, lower case, digits, and punctuation
934 * spaces together, the idea being the filenames tend to not be a mix
935 * of those domains.
937 * The 64 random domains act as a sub-sort for the middle characters
938 * but may cause a random seek. If the filesystem is being accessed
939 * in sorted order we should tend to get very good linearity for most
940 * filenames and devolve into more random seeks otherwise.
942 * We strip bit 63 in order to provide a positive key, this way a seek
943 * offset of 0 will represent the base of the directory.
945 * This function can never return 0. We use the MSB-0 space to synthesize
946 * artificial directory entries such as "." and "..".
948 int64_t
949 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
950 u_int32_t *max_iterationsp)
952 int64_t key;
953 int32_t crcx;
954 const char *aname = name;
956 switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
957 case HAMMER_INODE_CAP_DIRHASH_ALG0:
958 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
959 if (key == 0)
960 key |= 0x100000000LL;
961 *max_iterationsp = 0xFFFFFFFFU;
962 break;
963 case HAMMER_INODE_CAP_DIRHASH_ALG1:
964 key = (u_int32_t)crc32(aname, len) & 0xFFFFFFFEU;
966 switch(len) {
967 default:
968 crcx = crc32(aname + 3, len - 5);
969 crcx = crcx ^ (crcx >> 6) ^ (crcx >> 12);
970 key |= (int64_t)(crcx & 0x3F) << 42;
971 /* fall through */
972 case 5:
973 case 4:
974 /* fall through */
975 case 3:
976 key |= ((int64_t)(aname[2] & 0x1F) << 48);
977 /* fall through */
978 case 2:
979 key |= ((int64_t)(aname[1] & 0x1F) << 53) |
980 ((int64_t)(aname[len-2] & 0x1F) << 37);
981 /* fall through */
982 case 1:
983 key |= ((int64_t)(aname[0] & 0x1F) << 58) |
984 ((int64_t)(aname[len-1] & 0x1F) << 32);
985 /* fall through */
986 case 0:
987 break;
989 if ((key & 0xFFFFFFFF00000000LL) == 0)
990 key |= 0x100000000LL;
991 if (hammer_debug_general & 0x0400) {
992 kprintf("namekey2: 0x%016llx %*.*s\n",
993 (long long)key, len, len, aname);
995 *max_iterationsp = 0x00FFFFFF;
996 break;
997 case HAMMER_INODE_CAP_DIRHASH_ALG2:
998 case HAMMER_INODE_CAP_DIRHASH_ALG3:
999 default:
1000 key = 0; /* compiler warning */
1001 *max_iterationsp = 1; /* sanity */
1002 panic("hammer_directory_namekey: bad algorithm %p\n", dip);
1003 break;
1005 return(key);
1009 * Convert string after @@ (@@ not included) to TID. Returns 0 on success,
1010 * EINVAL on failure.
1012 * If this function fails *ispfs, *tidp, and *localizationp will not
1013 * be modified.
1016 hammer_str_to_tid(const char *str, int *ispfsp,
1017 hammer_tid_t *tidp, u_int32_t *localizationp)
1019 hammer_tid_t tid;
1020 u_int32_t localization;
1021 char *ptr;
1022 int ispfs;
1023 int n;
1026 * Forms allowed for TID: "0x%016llx"
1027 * "-1"
1029 tid = strtouq(str, &ptr, 0);
1030 n = ptr - str;
1031 if (n == 2 && str[0] == '-' && str[1] == '1') {
1032 /* ok */
1033 } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1034 /* ok */
1035 } else {
1036 return(EINVAL);
1040 * Forms allowed for PFS: ":%05d" (i.e. "...:0" would be illegal).
1042 str = ptr;
1043 if (*str == ':') {
1044 localization = strtoul(str + 1, &ptr, 10) << 16;
1045 if (ptr - str != 6)
1046 return(EINVAL);
1047 str = ptr;
1048 ispfs = 1;
1049 } else {
1050 localization = *localizationp;
1051 ispfs = 0;
1055 * Any trailing junk invalidates special extension handling.
1057 if (*str)
1058 return(EINVAL);
1059 *tidp = tid;
1060 *localizationp = localization;
1061 *ispfsp = ispfs;
1062 return(0);
1065 void
1066 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
1068 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1071 void
1072 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
1074 ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1075 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1079 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
1081 hammer_crc_t crc;
1083 crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1084 return (blockmap->entry_crc == crc);
1088 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
1090 hammer_crc_t crc;
1092 crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1093 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1094 return (ondisk->vol_crc == crc);
1098 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
1100 hammer_crc_t crc;
1102 crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
1103 return (ondisk->crc == crc);
1107 * Test or set the leaf->data_crc field. Deal with any special cases given
1108 * a generic B-Tree leaf element and its data.
1110 * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
1111 * to be updated in-place.
1114 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1116 hammer_crc_t crc;
1118 if (leaf->data_len == 0) {
1119 crc = 0;
1120 } else {
1121 switch(leaf->base.rec_type) {
1122 case HAMMER_RECTYPE_INODE:
1123 if (leaf->data_len != sizeof(struct hammer_inode_data))
1124 return(0);
1125 crc = crc32(data, HAMMER_INODE_CRCSIZE);
1126 break;
1127 default:
1128 crc = crc32(data, leaf->data_len);
1129 break;
1132 return (leaf->data_crc == crc);
1135 void
1136 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1138 if (leaf->data_len == 0) {
1139 leaf->data_crc = 0;
1140 } else {
1141 switch(leaf->base.rec_type) {
1142 case HAMMER_RECTYPE_INODE:
1143 KKASSERT(leaf->data_len ==
1144 sizeof(struct hammer_inode_data));
1145 leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
1146 break;
1147 default:
1148 leaf->data_crc = crc32(data, leaf->data_len);
1149 break;
1154 void
1155 hkprintf(const char *ctl, ...)
1157 __va_list va;
1159 if (hammer_debug_debug) {
1160 __va_start(va, ctl);
1161 kvprintf(ctl, va);
1162 __va_end(va);
1167 * Return the block size at the specified file offset.
1170 hammer_blocksize(int64_t file_offset)
1172 if (file_offset < HAMMER_XDEMARC)
1173 return(HAMMER_BUFSIZE);
1174 else
1175 return(HAMMER_XBUFSIZE);
1179 hammer_blockoff(int64_t file_offset)
1181 if (file_offset < HAMMER_XDEMARC)
1182 return((int)file_offset & HAMMER_BUFMASK);
1183 else
1184 return((int)file_offset & HAMMER_XBUFMASK);
1188 * Return the demarkation point between the two offsets where
1189 * the block size changes.
1191 int64_t
1192 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1194 if (file_offset1 < HAMMER_XDEMARC) {
1195 if (file_offset2 <= HAMMER_XDEMARC)
1196 return(file_offset2);
1197 return(HAMMER_XDEMARC);
1199 panic("hammer_blockdemarc: illegal range %lld %lld\n",
1200 (long long)file_offset1, (long long)file_offset2);
1203 udev_t
1204 hammer_fsid_to_udev(uuid_t *uuid)
1206 u_int32_t crc;
1208 crc = crc32(uuid, sizeof(*uuid));
1209 return((udev_t)crc);