2 * Copyright (c) 2004 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/kern/vfs_rangelock.c,v 1.2 2006/12/23 00:35:04 swildner Exp $
37 * This module implements hard range locks for files and directories. It is
38 * not to be confused with the UNIX advisory lock mechanism. This module
39 * will allow the kernel and VFS to break large I/O requests into smaller
40 * pieces without losing atomicy guarentees and, eventually, this module will
41 * be responsible for providing hooks for remote cache coherency protocols
45 #include <sys/param.h>
46 #include <sys/systm.h>
49 #include <sys/kernel.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/unistd.h>
54 #include <sys/vnode.h>
57 #include <machine/limits.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 #include <vm/vnode_pager.h>
65 static void vrange_lock_overlapped(struct vnode
*vp
,
66 struct vrangelock
*vr
, struct vrangelock
*scan
);
67 static int vrange_lock_conflicted(struct vnode
*vp
, struct vrangelock
*vr
);
70 * Lock a range within a vnode.
72 * The lock list is sorted by vr_offset.
75 vrange_lock(struct vnode
*vp
, struct vrangelock
*vr
)
77 struct vrangelock
*scan
;
80 eoff
= vr
->vr_offset
+ vr
->vr_length
;
82 KKASSERT((vr
->vr_flags
& RNGL_ONLIST
) == 0);
83 vr
->vr_flags
|= RNGL_ONLIST
;
85 TAILQ_FOREACH(scan
, &vp
->v_range
.vh_list
, vr_node
) {
87 * If the new element is entirely in front of the scan element
88 * we are done. If it is entirely beyond the scan element we
89 * loop. Otherwise an overlap has occured.
91 if (eoff
<= scan
->vr_offset
) {
92 TAILQ_INSERT_BEFORE(scan
, vr
, vr_node
);
95 if (vr
->vr_offset
>= scan
->vr_offset
+ scan
->vr_length
)
97 vrange_lock_overlapped(vp
, vr
, scan
);
99 TAILQ_INSERT_TAIL(&vp
->v_range
.vh_list
, vr
, vr_node
);
103 * An overlap occured. The request is still inserted sorted based on
104 * vr_offset but we must also scan the conflict space and block while
108 vrange_lock_overlapped(struct vnode
*vp
,
109 struct vrangelock
*vr
, struct vrangelock
*scan
)
116 eoff
= vr
->vr_offset
+ vr
->vr_length
;
118 while (scan
->vr_offset
< eoff
) {
119 if ((vr
->vr_flags
& scan
->vr_flags
& RNGL_SHARED
) == 0) {
120 scan
->vr_flags
|= RNGL_CHECK
;
121 vr
->vr_flags
|= RNGL_WAITING
;
124 if (inserted
== 0 && vr
->vr_offset
< scan
->vr_offset
) {
125 TAILQ_INSERT_BEFORE(scan
, vr
, vr_node
);
128 if ((scan
= TAILQ_NEXT(scan
, vr_node
)) == NULL
) {
130 TAILQ_INSERT_TAIL(&vp
->v_range
.vh_list
, vr
, vr_node
);
136 * sleep until the conflict has been resolved.
139 if (tsleep(&vp
->v_range
.vh_list
, 0, "vrnglk", hz
* 3) == EWOULDBLOCK
) {
141 kprintf("warning: conflicted lock vp %p %lld,%lld blocked\n",
142 vp
, vr
->vr_offset
, vr
->vr_length
);
145 conflicted
= vrange_lock_conflicted(vp
, vr
);
148 kprintf("waring: conflicted lock vp %p %lld,%lld unblocked\n",
149 vp
, vr
->vr_offset
, vr
->vr_length
);
154 * Check for conflicts by scanning both forwards and backwards from the
155 * node in question. The list is sorted by vr_offset but ending offsets
156 * may vary. Because of this, the reverse scan cannot stop early.
158 * Return 0 on success, 1 if the lock is still conflicted. We do not
159 * check elements that are waiting as that might result in a deadlock.
160 * We can stop the moment we hit a conflict.
163 vrange_lock_conflicted(struct vnode
*vp
, struct vrangelock
*vr
)
165 struct vrangelock
*scan
;
168 eoff
= vr
->vr_offset
+ vr
->vr_length
;
170 KKASSERT(vr
->vr_flags
& RNGL_WAITING
);
172 while ((scan
= TAILQ_PREV(scan
, vrangelock_list
, vr_node
)) != NULL
) {
173 if (scan
->vr_flags
& RNGL_WAITING
)
175 if (scan
->vr_offset
+ scan
->vr_length
> vr
->vr_offset
) {
176 if ((vr
->vr_flags
& scan
->vr_flags
& RNGL_SHARED
) == 0) {
177 scan
->vr_flags
|= RNGL_CHECK
;
183 while ((scan
= TAILQ_NEXT(scan
, vr_node
)) != NULL
) {
184 if (eoff
<= scan
->vr_offset
)
186 if (scan
->vr_flags
& RNGL_WAITING
)
188 if ((vr
->vr_flags
& scan
->vr_flags
& RNGL_SHARED
) == 0) {
189 scan
->vr_flags
|= RNGL_CHECK
;
193 vr
->vr_flags
&= ~RNGL_WAITING
;
198 vrange_unlock(struct vnode
*vp
, struct vrangelock
*vr
)
200 KKASSERT((vr
->vr_flags
& RNGL_ONLIST
) != 0);
201 vr
->vr_flags
&= ~RNGL_ONLIST
;
202 TAILQ_REMOVE(&vp
->v_range
.vh_list
, vr
, vr_node
);
203 if (vr
->vr_flags
& RNGL_CHECK
) {
204 vr
->vr_flags
&= ~RNGL_CHECK
;
205 wakeup(&vp
->v_range
.vh_list
);