2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $
37 * HAMMER structural locking
41 #include <sys/dirent.h>
44 hammer_lock_ex_ident(struct hammer_lock
*lock
, const char *ident
)
46 thread_t td
= curthread
;
50 KKASSERT(lock
->refs
> 0);
55 nlv
= 1 | HAMMER_LOCKF_EXCLUSIVE
;
56 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
60 } else if ((lv
& HAMMER_LOCKF_EXCLUSIVE
) && lock
->owner
== td
) {
62 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
))
65 if (hammer_debug_locks
) {
66 kprintf("hammer_lock_ex: held by %p\n",
69 nlv
= lv
| HAMMER_LOCKF_WANTED
;
70 ++hammer_contention_count
;
71 tsleep_interlock(lock
, 0);
72 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
73 tsleep(lock
, PINTERLOCKED
, ident
, 0);
74 if (hammer_debug_locks
)
75 kprintf("hammer_lock_ex: try again\n");
82 * Try to obtain an exclusive lock
85 hammer_lock_ex_try(struct hammer_lock
*lock
)
87 thread_t td
= curthread
;
92 KKASSERT(lock
->refs
> 0);
97 nlv
= 1 | HAMMER_LOCKF_EXCLUSIVE
;
98 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
103 } else if ((lv
& HAMMER_LOCKF_EXCLUSIVE
) && lock
->owner
== td
) {
105 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
118 * Obtain a shared lock
120 * We do not give pending exclusive locks priority over shared locks as
121 * doing so could lead to a deadlock.
124 hammer_lock_sh(struct hammer_lock
*lock
)
126 thread_t td
= curthread
;
130 KKASSERT(lock
->refs
> 0);
134 if ((lv
& HAMMER_LOCKF_EXCLUSIVE
) == 0) {
136 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
))
138 } else if (lock
->owner
== td
) {
140 * Disallowed case, drop into kernel debugger for
141 * now. A cont continues w/ an exclusive lock.
144 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
145 if (hammer_debug_critical
)
146 Debugger("hammer_lock_sh: holding ex");
150 nlv
= lv
| HAMMER_LOCKF_WANTED
;
151 ++hammer_contention_count
;
152 tsleep_interlock(lock
, 0);
153 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
154 tsleep(lock
, PINTERLOCKED
, "hmrlck", 0);
161 hammer_lock_sh_try(struct hammer_lock
*lock
)
163 thread_t td
= curthread
;
168 KKASSERT(lock
->refs
> 0);
172 if ((lv
& HAMMER_LOCKF_EXCLUSIVE
) == 0) {
174 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
178 } else if (lock
->owner
== td
) {
180 * Disallowed case, drop into kernel debugger for
181 * now. A cont continues w/ an exclusive lock.
184 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
185 if (hammer_debug_critical
)
186 Debugger("hammer_lock_sh: holding ex");
199 * Upgrade a shared lock to an exclusively held lock. This function will
200 * return EDEADLK If there is more then one shared holder.
202 * No error occurs and no action is taken if the lock is already exclusively
203 * held by the caller. If the lock is not held at all or held exclusively
204 * by someone else, this function will panic.
207 hammer_lock_upgrade(struct hammer_lock
*lock
)
209 thread_t td
= curthread
;
217 if ((lv
& ~HAMMER_LOCKF_WANTED
) == 1) {
218 nlv
= lv
| HAMMER_LOCKF_EXCLUSIVE
;
219 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
224 } else if (lv
& HAMMER_LOCKF_EXCLUSIVE
) {
225 if (lock
->owner
!= curthread
)
226 panic("hammer_lock_upgrade: illegal state");
229 } else if ((lv
& ~HAMMER_LOCKF_WANTED
) == 0) {
230 panic("hammer_lock_upgrade: lock is not held");
243 * Downgrade an exclusively held lock to a shared lock.
246 hammer_lock_downgrade(struct hammer_lock
*lock
)
248 thread_t td __debugvar
= curthread
;
252 KKASSERT((lock
->lockval
& ~HAMMER_LOCKF_WANTED
) ==
253 (HAMMER_LOCKF_EXCLUSIVE
| 1));
254 KKASSERT(lock
->owner
== td
);
257 * NOTE: Must clear owner before releasing exclusivity
263 nlv
= lv
& ~(HAMMER_LOCKF_EXCLUSIVE
| HAMMER_LOCKF_WANTED
);
264 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
265 if (lv
& HAMMER_LOCKF_WANTED
)
273 hammer_unlock(struct hammer_lock
*lock
)
275 thread_t td __debugvar
= curthread
;
281 if (lv
& HAMMER_LOCKF_EXCLUSIVE
)
282 KKASSERT(lock
->owner
== td
);
286 nlv
= lv
& ~(HAMMER_LOCKF_EXCLUSIVE
| HAMMER_LOCKF_WANTED
);
289 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
))
291 } else if (nlv
== 1) {
293 if (lv
& HAMMER_LOCKF_EXCLUSIVE
)
295 if (atomic_cmpset_int(&lock
->lockval
, lv
, nlv
)) {
296 if (lv
& HAMMER_LOCKF_WANTED
)
301 panic("hammer_unlock: lock %p is not held", lock
);
307 * The calling thread must be holding a shared or exclusive lock.
308 * Returns < 0 if lock is held shared, and > 0 if held exlusively.
311 hammer_lock_status(struct hammer_lock
*lock
)
313 u_int lv
= lock
->lockval
;
315 if (lv
& HAMMER_LOCKF_EXCLUSIVE
)
319 panic("hammer_lock_status: lock must be held: %p", lock
);
323 hammer_ref(struct hammer_lock
*lock
)
325 KKASSERT(lock
->refs
>= 0);
326 atomic_add_int(&lock
->refs
, 1);
330 hammer_unref(struct hammer_lock
*lock
)
332 KKASSERT(lock
->refs
> 0);
333 atomic_subtract_int(&lock
->refs
, 1);
337 * The sync_lock must be held when doing any modifying operations on
338 * meta-data. It does not have to be held when modifying non-meta-data buffers
339 * (backend or frontend).
341 * The flusher holds the lock exclusively while all other consumers hold it
342 * shared. All modifying operations made while holding the lock are atomic
343 * in that they will be made part of the same flush group.
345 * Due to the atomicy requirement deadlock recovery code CANNOT release the
346 * sync lock, nor can we give pending exclusive sync locks priority over
347 * a shared sync lock as this could lead to a 3-way deadlock.
350 hammer_sync_lock_ex(hammer_transaction_t trans
)
352 ++trans
->sync_lock_refs
;
353 hammer_lock_ex(&trans
->hmp
->sync_lock
);
357 hammer_sync_lock_sh(hammer_transaction_t trans
)
359 ++trans
->sync_lock_refs
;
360 hammer_lock_sh(&trans
->hmp
->sync_lock
);
364 hammer_sync_lock_sh_try(hammer_transaction_t trans
)
368 ++trans
->sync_lock_refs
;
369 if ((error
= hammer_lock_sh_try(&trans
->hmp
->sync_lock
)) != 0)
370 --trans
->sync_lock_refs
;
375 hammer_sync_unlock(hammer_transaction_t trans
)
377 --trans
->sync_lock_refs
;
378 hammer_unlock(&trans
->hmp
->sync_lock
);
385 hammer_to_unix_xid(uuid_t
*uuid
)
387 return(*(u_int32_t
*)&uuid
->node
[2]);
391 hammer_guid_to_uuid(uuid_t
*uuid
, u_int32_t guid
)
393 bzero(uuid
, sizeof(*uuid
));
394 *(u_int32_t
*)&uuid
->node
[2] = guid
;
398 hammer_time_to_timespec(u_int64_t xtime
, struct timespec
*ts
)
400 ts
->tv_sec
= (unsigned long)(xtime
/ 1000000);
401 ts
->tv_nsec
= (unsigned int)(xtime
% 1000000) * 1000L;
405 hammer_timespec_to_time(struct timespec
*ts
)
409 xtime
= (unsigned)(ts
->tv_nsec
/ 1000) +
410 (unsigned long)ts
->tv_sec
* 1000000ULL;
416 * Convert a HAMMER filesystem object type to a vnode type
419 hammer_get_vnode_type(u_int8_t obj_type
)
422 case HAMMER_OBJTYPE_DIRECTORY
:
424 case HAMMER_OBJTYPE_REGFILE
:
426 case HAMMER_OBJTYPE_DBFILE
:
428 case HAMMER_OBJTYPE_FIFO
:
430 case HAMMER_OBJTYPE_SOCKET
:
432 case HAMMER_OBJTYPE_CDEV
:
434 case HAMMER_OBJTYPE_BDEV
:
436 case HAMMER_OBJTYPE_SOFTLINK
:
445 hammer_get_dtype(u_int8_t obj_type
)
448 case HAMMER_OBJTYPE_DIRECTORY
:
450 case HAMMER_OBJTYPE_REGFILE
:
452 case HAMMER_OBJTYPE_DBFILE
:
454 case HAMMER_OBJTYPE_FIFO
:
456 case HAMMER_OBJTYPE_SOCKET
:
458 case HAMMER_OBJTYPE_CDEV
:
460 case HAMMER_OBJTYPE_BDEV
:
462 case HAMMER_OBJTYPE_SOFTLINK
:
471 hammer_get_obj_type(enum vtype vtype
)
475 return(HAMMER_OBJTYPE_DIRECTORY
);
477 return(HAMMER_OBJTYPE_REGFILE
);
479 return(HAMMER_OBJTYPE_DBFILE
);
481 return(HAMMER_OBJTYPE_FIFO
);
483 return(HAMMER_OBJTYPE_SOCKET
);
485 return(HAMMER_OBJTYPE_CDEV
);
487 return(HAMMER_OBJTYPE_BDEV
);
489 return(HAMMER_OBJTYPE_SOFTLINK
);
491 return(HAMMER_OBJTYPE_UNKNOWN
);
497 * Return flags for hammer_delete_at_cursor()
500 hammer_nohistory(hammer_inode_t ip
)
502 if (ip
->hmp
->hflags
& HMNT_NOHISTORY
)
503 return(HAMMER_DELETE_DESTROY
);
504 if (ip
->ino_data
.uflags
& (SF_NOHISTORY
|UF_NOHISTORY
))
505 return(HAMMER_DELETE_DESTROY
);
510 * ALGORITHM VERSION 1:
511 * Return a namekey hash. The 64 bit namekey hash consists of a 32 bit
512 * crc in the MSB and 0 in the LSB. The caller will use the low 32 bits
513 * to generate a unique key and will scan all entries with the same upper
514 * 32 bits when issuing a lookup.
516 * 0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
518 * ALGORITHM VERSION 2:
520 * The 64 bit hash key is generated from the following components. The
521 * first three characters are encoded as 5-bit quantities, the middle
522 * N characters are hashed into a 6 bit quantity, and the last two
523 * characters are encoded as 5-bit quantities. A 32 bit hash of the
524 * entire filename is encoded in the low 32 bits. Bit 0 is set to
525 * 0 to guarantee us a 2^24 bit iteration space.
527 * 0aaaaabbbbbccccc mmmmmmyyyyyzzzzz hhhhhhhhhhhhhhhh hhhhhhhhhhhhhhh0
529 * This gives us a domain sort for the first three characters, the last
530 * two characters, and breaks the middle space into 64 random domains.
531 * The domain sort folds upper case, lower case, digits, and punctuation
532 * spaces together, the idea being the filenames tend to not be a mix
535 * The 64 random domains act as a sub-sort for the middle characters
536 * but may cause a random seek. If the filesystem is being accessed
537 * in sorted order we should tend to get very good linearity for most
538 * filenames and devolve into more random seeks otherwise.
540 * We strip bit 63 in order to provide a positive key, this way a seek
541 * offset of 0 will represent the base of the directory.
543 * This function can never return 0. We use the MSB-0 space to synthesize
544 * artificial directory entries such as "." and "..".
547 hammer_directory_namekey(hammer_inode_t dip
, const void *name
, int len
,
548 u_int32_t
*max_iterationsp
)
552 const char *aname
= name
;
554 switch (dip
->ino_data
.cap_flags
& HAMMER_INODE_CAP_DIRHASH_MASK
) {
555 case HAMMER_INODE_CAP_DIRHASH_ALG0
:
556 key
= (int64_t)(crc32(aname
, len
) & 0x7FFFFFFF) << 32;
558 key
|= 0x100000000LL
;
559 *max_iterationsp
= 0xFFFFFFFFU
;
561 case HAMMER_INODE_CAP_DIRHASH_ALG1
:
562 key
= (u_int32_t
)crc32(aname
, len
) & 0xFFFFFFFEU
;
566 crcx
= crc32(aname
+ 3, len
- 5);
567 crcx
= crcx
^ (crcx
>> 6) ^ (crcx
>> 12);
568 key
|= (int64_t)(crcx
& 0x3F) << 42;
574 key
|= ((int64_t)(aname
[2] & 0x1F) << 48);
577 key
|= ((int64_t)(aname
[1] & 0x1F) << 53) |
578 ((int64_t)(aname
[len
-2] & 0x1F) << 37);
581 key
|= ((int64_t)(aname
[0] & 0x1F) << 58) |
582 ((int64_t)(aname
[len
-1] & 0x1F) << 32);
587 if ((key
& 0xFFFFFFFF00000000LL
) == 0)
588 key
|= 0x100000000LL
;
589 if (hammer_debug_general
& 0x0400) {
590 kprintf("namekey2: 0x%016llx %*.*s\n",
591 (long long)key
, len
, len
, aname
);
593 *max_iterationsp
= 0x00FFFFFF;
595 case HAMMER_INODE_CAP_DIRHASH_ALG2
:
596 case HAMMER_INODE_CAP_DIRHASH_ALG3
:
598 key
= 0; /* compiler warning */
599 *max_iterationsp
= 1; /* sanity */
600 panic("hammer_directory_namekey: bad algorithm %p\n", dip
);
607 * Convert string after @@ (@@ not included) to TID. Returns 0 on success,
610 * If this function fails *ispfs, *tidp, and *localizationp will not
614 hammer_str_to_tid(const char *str
, int *ispfsp
,
615 hammer_tid_t
*tidp
, u_int32_t
*localizationp
)
618 u_int32_t localization
;
624 * Forms allowed for TID: "0x%016llx"
627 tid
= strtouq(str
, &ptr
, 0);
629 if (n
== 2 && str
[0] == '-' && str
[1] == '1') {
631 } else if (n
== 18 && str
[0] == '0' && (str
[1] | 0x20) == 'x') {
638 * Forms allowed for PFS: ":%05d" (i.e. "...:0" would be illegal).
642 localization
= strtoul(str
+ 1, &ptr
, 10) << 16;
648 localization
= *localizationp
;
653 * Any trailing junk invalidates special extension handling.
658 *localizationp
= localization
;
664 hammer_crc_set_blockmap(hammer_blockmap_t blockmap
)
666 blockmap
->entry_crc
= crc32(blockmap
, HAMMER_BLOCKMAP_CRCSIZE
);
670 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk
)
672 ondisk
->vol_crc
= crc32(ondisk
, HAMMER_VOL_CRCSIZE1
) ^
673 crc32(&ondisk
->vol_crc
+ 1, HAMMER_VOL_CRCSIZE2
);
677 hammer_crc_test_blockmap(hammer_blockmap_t blockmap
)
681 crc
= crc32(blockmap
, HAMMER_BLOCKMAP_CRCSIZE
);
682 return (blockmap
->entry_crc
== crc
);
686 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk
)
690 crc
= crc32(ondisk
, HAMMER_VOL_CRCSIZE1
) ^
691 crc32(&ondisk
->vol_crc
+ 1, HAMMER_VOL_CRCSIZE2
);
692 return (ondisk
->vol_crc
== crc
);
696 hammer_crc_test_btree(hammer_node_ondisk_t ondisk
)
700 crc
= crc32(&ondisk
->crc
+ 1, HAMMER_BTREE_CRCSIZE
);
701 return (ondisk
->crc
== crc
);
705 * Test or set the leaf->data_crc field. Deal with any special cases given
706 * a generic B-Tree leaf element and its data.
708 * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
709 * to be updated in-place.
712 hammer_crc_test_leaf(void *data
, hammer_btree_leaf_elm_t leaf
)
716 if (leaf
->data_len
== 0) {
719 switch(leaf
->base
.rec_type
) {
720 case HAMMER_RECTYPE_INODE
:
721 if (leaf
->data_len
!= sizeof(struct hammer_inode_data
))
723 crc
= crc32(data
, HAMMER_INODE_CRCSIZE
);
726 crc
= crc32(data
, leaf
->data_len
);
730 return (leaf
->data_crc
== crc
);
734 hammer_crc_set_leaf(void *data
, hammer_btree_leaf_elm_t leaf
)
736 if (leaf
->data_len
== 0) {
739 switch(leaf
->base
.rec_type
) {
740 case HAMMER_RECTYPE_INODE
:
741 KKASSERT(leaf
->data_len
==
742 sizeof(struct hammer_inode_data
));
743 leaf
->data_crc
= crc32(data
, HAMMER_INODE_CRCSIZE
);
746 leaf
->data_crc
= crc32(data
, leaf
->data_len
);
753 hkprintf(const char *ctl
, ...)
757 if (hammer_debug_debug
) {
765 * Return the block size at the specified file offset.
768 hammer_blocksize(int64_t file_offset
)
770 if (file_offset
< HAMMER_XDEMARC
)
771 return(HAMMER_BUFSIZE
);
773 return(HAMMER_XBUFSIZE
);
777 hammer_blockoff(int64_t file_offset
)
779 if (file_offset
< HAMMER_XDEMARC
)
780 return((int)file_offset
& HAMMER_BUFMASK
);
782 return((int)file_offset
& HAMMER_XBUFMASK
);
786 * Return the demarkation point between the two offsets where
787 * the block size changes.
790 hammer_blockdemarc(int64_t file_offset1
, int64_t file_offset2
)
792 if (file_offset1
< HAMMER_XDEMARC
) {
793 if (file_offset2
<= HAMMER_XDEMARC
)
794 return(file_offset2
);
795 return(HAMMER_XDEMARC
);
797 panic("hammer_blockdemarc: illegal range %lld %lld\n",
798 (long long)file_offset1
, (long long)file_offset2
);
802 hammer_fsid_to_udev(uuid_t
*uuid
)
806 crc
= crc32(uuid
, sizeof(*uuid
));